From f2676b151d6feb16d6f259ea8bac8ef16cc08f99 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Thu, 19 May 2022 13:55:18 +0000 Subject: [PATCH 001/908] [mlir][tensor] Add canonicalization for tensor.cast from extract_slice Propagate static size information into extract_slice producer if possible. Differential Revision: https://reviews.llvm.org/D125972 --- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 49 ++++++++++++++++++- .../Dialect/Linalg/tile-and-fuse-tensors.mlir | 17 ++----- mlir/test/Dialect/Tensor/canonicalize.mlir | 24 +++++++++ 3 files changed, 76 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 932973a13c2175..fff1563051786e 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -229,11 +229,58 @@ struct ChainedTensorCast : public OpRewritePattern { } }; +/// Fold tensor.cast into tesor.extract_slice producer. +/// Example: +/// ``` +/// %0 = tensor.extract_slice %arg0[%o, 0] [%s, 512] [1, 1] : +/// tensor<128x512xf32> to tensor +/// %1 = tensor.cast %0 : tensor to tensor<16x512xf32> +/// ``` +/// -> +/// ``` +/// %1 = tensor.extract_slice %arg0[%o, 0] [16, 512] [1, 1] : +/// tensor<128x512xf32> to tensor<16x512xf32> +/// ``` +struct TensorCastExtractSlice : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(CastOp tensorCast, + PatternRewriter &rewriter) const final { + auto extractOperand = + tensorCast.getOperand().getDefiningOp(); + + if (!extractOperand || !canFoldIntoProducerOp(tensorCast) || + tensorCast.getType().getShape() == + tensorCast.source().getType().cast().getShape()) + return failure(); + + SmallVector sizes = extractOperand.getMixedSizes(); + auto dimMask = computeRankReductionMask( + extractFromI64ArrayAttr(extractOperand.static_sizes()), + extractOperand.getType().getShape()); + size_t dimIndex = 0; + for (size_t i = 0, e = sizes.size(); i < e; i++) { + if (dimMask && dimMask->count(i)) + continue; + int64_t dim = tensorCast.getType().getShape()[dimIndex++]; + if (ShapedType::isDynamic(dim)) + continue; + sizes[i] = rewriter.getIndexAttr(dim); + } + + rewriter.replaceOpWithNewOp( + tensorCast, tensorCast.getType().cast(), + extractOperand.source(), extractOperand.getMixedOffsets(), sizes, + extractOperand.getMixedStrides()); + return success(); + } +}; + } // namespace void CastOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir index 6f7c5caf8ccb0a..6c0bbc3c1cf958 100644 --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -30,9 +30,6 @@ func.func @matmul_tensors(%arg0: tensor, %arg1: tensor, %arg2: return %3 : tensor } -// CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> - // CHECK: func @matmul_tensors( // CHECK-SAME: %[[A:[0-9a-z]*]]: tensor // CHECK-SAME: %[[B:[0-9a-z]*]]: tensor @@ -40,26 +37,20 @@ func.func @matmul_tensors(%arg0: tensor, %arg1: tensor, %arg2: // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[dA0:.*]] = tensor.dim %[[A]], %[[C0]] : tensor // CHECK-DAG: %[[dA1:.*]] = tensor.dim %[[A]], %[[C1]] : tensor // CHECK-DAG: %[[dB0:.*]] = tensor.dim %[[B]], %[[C0]] : tensor // CHECK-DAG: %[[dB1:.*]] = tensor.dim %[[B]], %[[C1]] : tensor // CHECK: scf.for %[[I:[0-9a-z]*]] -// CHECK: %[[sizeA0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dA0]]] -// CHECK: %[[stA:.*]] = tensor.extract_slice %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1] : tensor to tensor -// CHECK: %[[castA:.*]] = tensor.cast %[[stA]] : tensor to tensor<2x?xf32> +// CHECK: %[[stA:.*]] = tensor.extract_slice %[[A]][%[[I]], 0] [2, %[[dA1]]] [1, 1] : tensor to tensor<2x?xf32> // CHECK: scf.for %[[J:[0-9a-z]*]] // CHECK-NEXT: scf.for %[[K:[0-9a-z]*]] {{.*}} iter_args(%[[RES:[0-9a-z]*]] // CHECK-DAG: %[[stB1:.*]] = tensor.extract_slice %[[B]][%[[K]], %[[J]]] [4, 3] [1, 1] : tensor to tensor<4x3xf32> // CHECK-DAG: %[[stF:.*]] = tensor.extract_slice %[[RES]][%[[I]], %[[J]]] [2, 3] [1, 1] : tensor to tensor<2x3xf32> // // slices of the producing matmul. -// CHECK: %[[sizeB1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dB1]]] -// CHECK: %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], %[[sizeB1]]] [1, 1] : tensor to tensor -// CHECK: %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [%[[sizeA0]], %[[sizeB1]]] [1, 1] : tensor to tensor -// CHECK-DAG: %[[castC:.+]] = tensor.cast %[[stC]] : tensor to tensor<2x4xf32> -// CHECK-DAG: %[[castB:.+]] = tensor.cast %[[stB2]] : tensor to tensor -// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[castA]], %[[castB]] : tensor<2x?xf32>, tensor) outs(%[[castC]] : tensor<2x4xf32>) -> tensor<2x4xf32> +// CHECK-DAG: %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], 4] [1, 1] : tensor to tensor +// CHECK-DAG: %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [2, 4] [1, 1] : tensor to tensor<2x4xf32> +// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor) outs(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32> // CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> // CHECK-NEXT: tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]] diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index a0bbcc61e4cf71..ee5653af3e62ff 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -1401,3 +1401,27 @@ func.func @insert_slice_cast(%arg0 : tensor<1x?xf32>, %arg1 : tensor, % // CHECK: return %[[RES]] : tensor return %1 : tensor } + +// ----- + +// CHECK-LABEL: func @cast_extract_slice +func.func @cast_extract_slice(%arg0 : tensor<128x512xf32>, %s : index, %o : index) + -> tensor<16x512xf32> { +// CHECK: %[[E:.*]] = tensor.extract_slice %{{.*}}[%{{.*}}, 0] [16, 512] [1, 1] : tensor<128x512xf32> to tensor<16x512xf32> + %0 = tensor.extract_slice %arg0[%o, 0] [%s, 512] [1, 1] : tensor<128x512xf32> to tensor + %1 = tensor.cast %0 : tensor to tensor<16x512xf32> +// CHECK: return %[[E]] : tensor<16x512xf32> + return %1 : tensor<16x512xf32> +} + +// ----- + +// CHECK-LABEL: func @cast_extract_slice_rank_reduce +func.func @cast_extract_slice_rank_reduce(%arg0 : tensor<128x512xf32>, %s : index, %o : index) + -> tensor<16xf32> { +// CHECK: %[[E:.*]] = tensor.extract_slice %{{.*}}[%{{.*}}, 0] [16, 1] [1, 1] : tensor<128x512xf32> to tensor<16xf32> + %0 = tensor.extract_slice %arg0[%o, 0] [%s, 1] [1, 1] : tensor<128x512xf32> to tensor + %1 = tensor.cast %0 : tensor to tensor<16xf32> +// CHECK: return %[[E]] : tensor<16xf32> + return %1 : tensor<16xf32> +} From 4c1b65e7bc923b1ed0473cad51b5070471a201a5 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Thu, 19 May 2022 17:38:04 +0000 Subject: [PATCH 002/908] [mlir][vector] Fix crash in DropInnerMostUnitDims pattern Fix number of dimensions when incrementally replacing dimensions in affine map. Differential Revision: https://reviews.llvm.org/D125984 --- .../Vector/Transforms/VectorTransforms.cpp | 5 ++--- ...vector-transfer-collapse-inner-most-dims.mlir | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp index 50b89c1a9374f6..f84fc2da08f23d 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -2398,13 +2398,12 @@ class DropInnerMostUnitDims : public OpRewritePattern { {}, srcType.getMemorySpaceAsInt()); } else { AffineMap map = srcType.getLayout().getAffineMap(); - int numResultDims = map.getNumDims() - dimsToDrop; int numSymbols = map.getNumSymbols(); for (size_t i = 0; i < dimsToDrop; ++i) { int dim = srcType.getRank() - i - 1; map = map.replace(rewriter.getAffineDimExpr(dim), - rewriter.getAffineConstantExpr(0), numResultDims, - numSymbols); + rewriter.getAffineConstantExpr(0), + map.getNumDims() - 1, numSymbols); } resultMemrefType = MemRefType::get( srcType.getShape().drop_back(dimsToDrop), srcType.getElementType(), diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir index cd96da3a87923b..288c7e67f756b0 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir @@ -47,3 +47,19 @@ func.func @contiguous_inner_most_dim_bounds(%A: memref<1000x1xf32>, %i:index, %i // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]] // CHECK-SAME: {in_bounds = [true]} // CHECK-SAME: vector<4xf32> + +// ----- + +func.func @contiguous_inner_most_dim_bounds_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) { + %c0 = arith.constant 0 : index + %cst = arith.constant 0.0 : f32 + %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>> + %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>, vector<4x1x1xf32> + return %1 : vector<4x1x1xf32> +} +// CHECK: func @contiguous_inner_most_dim_bounds_2d(%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32> +// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]] +// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]] +// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]] +// CHECK-SAME: {in_bounds = [true]} +// CHECK-SAME: vector<4xf32> From e8e7581fb10d974c6e5cf475f40164035288442f Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sun, 17 Apr 2022 16:38:01 -0700 Subject: [PATCH 003/908] [llvm-jitlink] Print session report even if entry-point lookup errors out. --- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 80 ++++++++++++++---------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 0b8df595a2e646..d202e8bf316fff 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -1941,6 +1941,36 @@ static Expected getOrcRuntimeEntryPoint(Session &S) { return S.ES.lookup(S.JDSearchOrder, S.ES.intern(RuntimeEntryPoint)); } +static Expected getEntryPoint(Session &S) { + JITEvaluatedSymbol EntryPoint; + + // Find the entry-point function unconditionally, since we want to force + // it to be materialized to collect stats. + if (auto EP = getMainEntryPoint(S)) + EntryPoint = *EP; + else + return EP.takeError(); + LLVM_DEBUG({ + dbgs() << "Using entry point \"" << EntryPointName + << "\": " << formatv("{0:x16}", EntryPoint.getAddress()) << "\n"; + }); + + // If we're running with the ORC runtime then replace the entry-point + // with the __orc_rt_run_program symbol. + if (!OrcRuntime.empty()) { + if (auto EP = getOrcRuntimeEntryPoint(S)) + EntryPoint = *EP; + else + return EP.takeError(); + LLVM_DEBUG({ + dbgs() << "(called via __orc_rt_run_program_wrapper at " + << formatv("{0:x16}", EntryPoint.getAddress()) << ")\n"; + }); + } + + return EntryPoint; +} + static Expected runWithRuntime(Session &S, ExecutorAddr EntryPointAddr) { StringRef DemangledEntryPoint = EntryPointName; const auto &TT = S.ES.getExecutorProcessControl().getTargetTriple(); @@ -2002,48 +2032,31 @@ int main(int argc, char *argv[]) { if (ShowInitialExecutionSessionState) S->ES.dump(outs()); - JITEvaluatedSymbol EntryPoint = nullptr; + Expected EntryPoint(nullptr); { + ExpectedAsOutParameter _(&EntryPoint); TimeRegion TR(Timers ? &Timers->LinkTimer : nullptr); - // Find the entry-point function unconditionally, since we want to force - // it to be materialized to collect stats. - if (auto EP = getMainEntryPoint(*S)) - EntryPoint = *EP; - else { - reportLLVMJITLinkError(EP.takeError()); - exit(1); - } - LLVM_DEBUG({ - dbgs() << "Using entry point \"" << EntryPointName - << "\": " << formatv("{0:x16}", EntryPoint.getAddress()) << "\n"; - }); - - // If we're running with the ORC runtime then replace the entry-point - // with the __orc_rt_run_program symbol. - if (!OrcRuntime.empty()) { - if (auto EP = getOrcRuntimeEntryPoint(*S)) - EntryPoint = *EP; - else { - reportLLVMJITLinkError(EP.takeError()); - exit(1); - } - LLVM_DEBUG({ - dbgs() << "(called via __orc_rt_run_program_wrapper at " - << formatv("{0:x16}", EntryPoint.getAddress()) << ")\n"; - }); - } + EntryPoint = getEntryPoint(*S); } + // Print any reports regardless of whether we succeeded or failed. if (ShowEntryExecutionSessionState) S->ES.dump(outs()); if (ShowAddrs) S->dumpSessionInfo(outs()); - ExitOnErr(runChecks(*S)); - dumpSessionStats(*S); + if (!EntryPoint) { + if (Timers) + Timers->JITLinkTG.printAll(errs()); + reportLLVMJITLinkError(EntryPoint.takeError()); + exit(1); + } + + ExitOnErr(runChecks(*S)); + if (NoExec) return 0; @@ -2053,16 +2066,19 @@ int main(int argc, char *argv[]) { TimeRegion TR(Timers ? &Timers->RunTimer : nullptr); if (!OrcRuntime.empty()) Result = - ExitOnErr(runWithRuntime(*S, ExecutorAddr(EntryPoint.getAddress()))); + ExitOnErr(runWithRuntime(*S, ExecutorAddr(EntryPoint->getAddress()))); else Result = ExitOnErr( - runWithoutRuntime(*S, ExecutorAddr(EntryPoint.getAddress()))); + runWithoutRuntime(*S, ExecutorAddr(EntryPoint->getAddress()))); } // Destroy the session. ExitOnErr(S->ES.endSession()); S.reset(); + if (Timers) + Timers->JITLinkTG.printAll(errs()); + // If the executing code set a test result override then use that. if (UseTestResultOverride) Result = TestResultOverride; From 4bb18a89c418082c3b1e9d16fde40fbc915d17e5 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 18 May 2022 18:39:33 -0700 Subject: [PATCH 004/908] [ORC] Add missing std::moves, pass SymbolLookupSet by value. Avoids some unnecessary SymbolStringPtr copies. --- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 2 +- llvm/lib/ExecutionEngine/Orc/Core.cpp | 4 ++-- llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index d525914200a132..b3c85b53eda3a5 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -1523,7 +1523,7 @@ class ExecutionSession { /// after resolution, the function will return a success value, but the /// error will be reported via reportErrors. Expected lookup(const JITDylibSearchOrder &SearchOrder, - const SymbolLookupSet &Symbols, + SymbolLookupSet Symbols, LookupKind K = LookupKind::Static, SymbolState RequiredState = SymbolState::Ready, RegisterDependenciesFunction RegisterDependencies = diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 7514b242d0c008..7467de3c15934f 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -2070,7 +2070,7 @@ void ExecutionSession::lookup( Expected ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder, - const SymbolLookupSet &Symbols, LookupKind K, + SymbolLookupSet Symbols, LookupKind K, SymbolState RequiredState, RegisterDependenciesFunction RegisterDependencies) { #if LLVM_ENABLE_THREADS @@ -2102,7 +2102,7 @@ ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder, #endif // Perform the asynchronous lookup. - lookup(K, SearchOrder, Symbols, RequiredState, NotifyComplete, + lookup(K, SearchOrder, std::move(Symbols), RequiredState, NotifyComplete, RegisterDependencies); #if LLVM_ENABLE_THREADS diff --git a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp index 44cb78c773c9fb..3452267e4df4c5 100644 --- a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp @@ -24,7 +24,7 @@ void lookupAndRecordAddrs( Symbols.add(KV.first, LookupFlags); ES.lookup( - K, SearchOrder, Symbols, SymbolState::Ready, + K, SearchOrder, std::move(Symbols), SymbolState::Ready, [Pairs = std::move(Pairs), OnRec = std::move(OnRecorded)](Expected Result) mutable { if (!Result) @@ -47,7 +47,7 @@ Error lookupAndRecordAddrs( std::promise ResultP; auto ResultF = ResultP.get_future(); lookupAndRecordAddrs([&](Error Err) { ResultP.set_value(std::move(Err)); }, - ES, K, SearchOrder, Pairs, LookupFlags); + ES, K, SearchOrder, std::move(Pairs), LookupFlags); return ResultF.get(); } From d27e4bcebdb275a169b859f2df7890f0813ef2f8 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 18 May 2022 18:58:14 -0700 Subject: [PATCH 005/908] [ORC] Add a FIXME. --- llvm/lib/ExecutionEngine/Orc/Core.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 7467de3c15934f..e13028196c84c3 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -485,6 +485,9 @@ Expected buildSimpleReexportsAliasMap(JITDylib &SourceJD, class InProgressLookupState { public: + // FIXME: Reduce the number of SymbolStringPtrs here. See + // https://github.com/llvm/llvm-project/issues/55576. + InProgressLookupState(LookupKind K, JITDylibSearchOrder SearchOrder, SymbolLookupSet LookupSet, SymbolState RequiredState) : K(K), SearchOrder(std::move(SearchOrder)), From 2ca81cd91486280b232a6be45858f757a8ea0870 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 18 May 2022 19:19:57 -0700 Subject: [PATCH 006/908] [ORC] Avoid more SymbolStringPtr copies. --- llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp index c1ad569dd65d8b..394a555e453b8b 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp @@ -63,7 +63,6 @@ getMachOObjectFileSymbolInfo(ExecutionSession &ES, auto Name = Sym.getName(); if (!Name) return Name.takeError(); - auto InternedName = ES.intern(*Name); auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym); if (!SymFlags) return SymFlags.takeError(); @@ -72,7 +71,7 @@ getMachOObjectFileSymbolInfo(ExecutionSession &ES, if (Name->startswith("l")) *SymFlags &= ~JITSymbolFlags::Exported; - I.SymbolFlags[InternedName] = std::move(*SymFlags); + I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags); } for (auto &Sec : Obj.sections()) { @@ -121,7 +120,7 @@ getELFObjectFileSymbolInfo(ExecutionSession &ES, auto Name = Sym.getName(); if (!Name) return Name.takeError(); - auto InternedName = ES.intern(*Name); + auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym); if (!SymFlags) return SymFlags.takeError(); @@ -130,7 +129,7 @@ getELFObjectFileSymbolInfo(ExecutionSession &ES, if (Sym.getBinding() == ELF::STB_GNU_UNIQUE) *SymFlags |= JITSymbolFlags::Weak; - I.SymbolFlags[InternedName] = std::move(*SymFlags); + I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags); } SymbolStringPtr InitSymbol; @@ -175,12 +174,12 @@ getGenericObjectFileSymbolInfo(ExecutionSession &ES, auto Name = Sym.getName(); if (!Name) return Name.takeError(); - auto InternedName = ES.intern(*Name); + auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym); if (!SymFlags) return SymFlags.takeError(); - I.SymbolFlags[InternedName] = std::move(*SymFlags); + I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags); } return I; From 066243057fc2ae45ae6bbc2f4874ca1f84c3a3ff Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Tue, 3 May 2022 18:43:46 -0700 Subject: [PATCH 007/908] [Object] Fix updating darwin archives When creating an archive, llvm-ar looks at the host to determine the archive format to use, on Apple platforms this means it uses the K_DARWIN format. K_DARWIN is _virtually_ equivalent to K_BSD, expect for some very slight differences around padding, timestamps in deterministic mode, and 64 bit formats. When updating an archive using llvm-ar, or llvm-objcopy, Archive would try to determine the kind, but it was not possible to get K_DARWIN in the initialization of the archive, because they're virtually inciting usable from K_BSD, especially since the slight differences only apply in very specific cases. This leads to linker failures when the alignment workaround is not applied to an archive copied with llvm-objcopy. This change teaches Archive to infer the K_DARWIN type in the cases where it's possible and the first object in the archive is a macho object. This avoids using the host triple to determine this to not affect cross compiling. Ideally we would eliminate the separate K_DARWIN type entirely since it's not a truly separate archive type, but then we'd have to force the macho workarounds on the BSD format generally. This might be acceptable but then it would be unclear how to handle this case without forcing the K_DARWIN64 format on all BSD users: ``` if (LastOffset >= Sym64Threshold) { if (Kind == object::Archive::K_DARWIN) Kind = object::Archive::K_DARWIN64; else Kind = object::Archive::K_GNU64; } ``` The logic used to determine if the object is macho is derived from the logic llvm-ar uses. Previous context: - 111cd669e90e5b2132187d36f8b141b11a671a8b - 23a76be5adcaa768ba538f8a4514a7afccf61988 Differential Revision: https://reviews.llvm.org/D124895 --- llvm/include/llvm/Object/Archive.h | 1 + llvm/include/llvm/Object/ArchiveWriter.h | 5 ++ llvm/lib/ObjCopy/Archive.cpp | 5 ++ llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp | 5 +- llvm/lib/Object/Archive.cpp | 9 +++ llvm/lib/Object/ArchiveWriter.cpp | 37 +++++++++++ llvm/test/tools/llvm-ar/macho-edit.test | 16 +++++ .../llvm-objcopy/MachO/archive-format.test | 12 ++++ .../llvm-objcopy/MachO/universal-object.test | 4 +- llvm/tools/llvm-ar/llvm-ar.cpp | 63 +++++-------------- 10 files changed, 106 insertions(+), 51 deletions(-) create mode 100644 llvm/test/tools/llvm-ar/macho-edit.test create mode 100644 llvm/test/tools/llvm-objcopy/MachO/archive-format.test diff --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h index 1f4a274e67fdf2..a36c9bd6163b49 100644 --- a/llvm/include/llvm/Object/Archive.h +++ b/llvm/include/llvm/Object/Archive.h @@ -340,6 +340,7 @@ class Archive : public Binary { Kind kind() const { return (Kind)Format; } bool isThin() const { return IsThin; } + static object::Archive::Kind getDefaultKindForHost(); child_iterator child_begin(Error &Err, bool SkipInternal = true) const; child_iterator child_end() const; diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h index 7eaf13e8fb2294..6acab45215da1c 100644 --- a/llvm/include/llvm/Object/ArchiveWriter.h +++ b/llvm/include/llvm/Object/ArchiveWriter.h @@ -26,6 +26,11 @@ struct NewArchiveMember { NewArchiveMember() = default; NewArchiveMember(MemoryBufferRef BufRef); + // Detect the archive format from the object or bitcode file. This helps + // assume the archive format when creating or editing archives in the case + // one isn't explicitly set. + object::Archive::Kind detectKindFromObject() const; + static Expected getOldMember(const object::Archive::Child &OldMember, bool Deterministic); diff --git a/llvm/lib/ObjCopy/Archive.cpp b/llvm/lib/ObjCopy/Archive.cpp index ef893ccb409cba..742ca0b890cf26 100644 --- a/llvm/lib/ObjCopy/Archive.cpp +++ b/llvm/lib/ObjCopy/Archive.cpp @@ -11,6 +11,7 @@ #include "llvm/ObjCopy/MultiFormatConfig.h" #include "llvm/ObjCopy/ObjCopy.h" #include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" @@ -61,6 +62,10 @@ static Error deepWriteArchive(StringRef ArcName, ArrayRef NewMembers, bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic, bool Thin) { + if (Kind == object::Archive::K_BSD && !NewMembers.empty() && + NewMembers.front().detectKindFromObject() == object::Archive::K_DARWIN) + Kind = object::Archive::K_DARWIN; + if (Error E = writeArchive(ArcName, NewMembers, WriteSymtab, Kind, Deterministic, Thin)) return createFileError(ArcName, std::move(E)); diff --git a/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp index 94e89224b3c8fd..8ba0c77389324d 100644 --- a/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp @@ -484,9 +484,12 @@ Error objcopy::macho::executeObjcopyOnMachOUniversalBinary( createNewArchiveMembers(Config, **ArOrErr); if (!NewArchiveMembersOrErr) return NewArchiveMembersOrErr.takeError(); + auto Kind = (*ArOrErr)->kind(); + if (Kind == object::Archive::K_BSD) + Kind = object::Archive::K_DARWIN; Expected> OutputBufferOrErr = writeArchiveToBuffer(*NewArchiveMembersOrErr, - (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(), + (*ArOrErr)->hasSymbolTable(), Kind, Config.getCommonConfig().DeterministicArchives, (*ArOrErr)->isThin()); if (!OutputBufferOrErr) diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp index 3960971d882cd0..0b29428f0c00d2 100644 --- a/llvm/lib/Object/Archive.cpp +++ b/llvm/lib/Object/Archive.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -925,6 +926,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) Err = Error::success(); } +object::Archive::Kind Archive::getDefaultKindForHost() { + Triple HostTriple(sys::getProcessTriple()); + return HostTriple.isOSDarwin() + ? object::Archive::K_DARWIN + : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG + : object::Archive::K_GNU); +} + Archive::child_iterator Archive::child_begin(Error &Err, bool SkipInternal) const { if (isEmpty()) diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index 0ac11224a48ca4..dbf5052cdac060 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -18,8 +18,11 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Error.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Object/XCOFFObjectFile.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Errc.h" @@ -44,6 +47,40 @@ NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), MemberName(BufRef.getBufferIdentifier()) {} +object::Archive::Kind NewArchiveMember::detectKindFromObject() const { + auto MemBufferRef = this->Buf->getMemBufferRef(); + Expected> OptionalObject = + object::ObjectFile::createObjectFile(MemBufferRef); + + if (OptionalObject) + return isa(**OptionalObject) + ? object::Archive::K_DARWIN + : (isa(**OptionalObject) + ? object::Archive::K_AIXBIG + : object::Archive::K_GNU); + + // Squelch the error in case we had a non-object file. + consumeError(OptionalObject.takeError()); + + // If we're adding a bitcode file to the archive, detect the Archive kind + // based on the target triple. + LLVMContext Context; + if (identify_magic(MemBufferRef.getBuffer()) == file_magic::bitcode) { + if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile( + MemBufferRef, file_magic::bitcode, &Context)) { + auto &IRObject = cast(**ObjOrErr); + return Triple(IRObject.getTargetTriple()).isOSDarwin() + ? object::Archive::K_DARWIN + : object::Archive::K_GNU; + } else { + // Squelch the error in case this was not a SymbolicFile. + consumeError(ObjOrErr.takeError()); + } + } + + return object::Archive::getDefaultKindForHost(); +} + Expected NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, bool Deterministic) { diff --git a/llvm/test/tools/llvm-ar/macho-edit.test b/llvm/test/tools/llvm-ar/macho-edit.test new file mode 100644 index 00000000000000..9bac39053d6759 --- /dev/null +++ b/llvm/test/tools/llvm-ar/macho-edit.test @@ -0,0 +1,16 @@ +## Make sure the darwin format specifics are preserved when updating archives. + +# RUN: rm -rf %t && mkdir -p %t +# RUN: yaml2obj %p/Inputs/macho.yaml > %t/dup.o + +## Create the archive with a duplicate object to ensure that darwin specific +## incrementing timestamps are used. +# RUN: llvm-ar --format=darwin crD %t/lib.a %t/dup.o %t/dup.o +# RUN: cp %t/lib.a %t/lib.copy.a + +## Replace an object file in the archive to force a re-write. +# RUN: llvm-ar crD %t/lib.a %t/dup.o +# RUN: obj2yaml %t/lib.a | FileCheck --implicit-check-not=LastModified %s + +# CHECK: LastModified: '1' +# CHECK: LastModified: '2' diff --git a/llvm/test/tools/llvm-objcopy/MachO/archive-format.test b/llvm/test/tools/llvm-objcopy/MachO/archive-format.test new file mode 100644 index 00000000000000..dbbb2c5bf6ceb6 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/archive-format.test @@ -0,0 +1,12 @@ +# REQUIRES: x86-registered-target + +## Make sure the darwin format specifics are preserved when updating archives. + +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/macho.64.s -o %t/dup.o +# RUN: llvm-ar --format=darwin crD %t/lib.a %t/dup.o %t/dup.o +# RUN: llvm-objcopy %t/lib.a %t/lib.copy.a +# RUN: obj2yaml %t/lib.copy.a | FileCheck --implicit-check-not=LastModified %s + +# CHECK: LastModified: '1' +# CHECK: LastModified: '2' diff --git a/llvm/test/tools/llvm-objcopy/MachO/universal-object.test b/llvm/test/tools/llvm-objcopy/MachO/universal-object.test index a6146fd56483a0..b23fecee6c640c 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/universal-object.test +++ b/llvm/test/tools/llvm-objcopy/MachO/universal-object.test @@ -12,9 +12,9 @@ # RUN: cmp %t.i386 %t.i386.copy # RUN: cmp %t.x86_64 %t.x86_64.copy -## Case 2: copy a universal object file containing an archive. +## Case 2: copy a universal object file containing an archive with darwin specific timestamps. # RUN: rm -f %t.archive.i386 -# RUN: llvm-ar cr %t.archive.i386 %t.i386 +# RUN: llvm-ar --format=darwin cr %t.archive.i386 %t.i386 %t.i386 # RUN: llvm-lipo %t.archive.i386 %t.x86_64 -create -output %t.universal.containing.archive # RUN: llvm-objcopy %t.universal.containing.archive %t.universal.containing.archive.copy # RUN: llvm-lipo %t.universal.containing.archive.copy -archs | FileCheck --check-prefix=VERIFY_ARCHS %s diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp index 214b6e679698ca..a93943295cf938 100644 --- a/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/llvm/tools/llvm-ar/llvm-ar.cpp @@ -880,48 +880,6 @@ computeNewArchiveMembers(ArchiveOperation Operation, return Ret; } -static object::Archive::Kind getDefaultForHost() { - Triple HostTriple(sys::getProcessTriple()); - return HostTriple.isOSDarwin() - ? object::Archive::K_DARWIN - : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG - : object::Archive::K_GNU); -} - -static object::Archive::Kind getKindFromMember(const NewArchiveMember &Member) { - auto MemBufferRef = Member.Buf->getMemBufferRef(); - Expected> OptionalObject = - object::ObjectFile::createObjectFile(MemBufferRef); - - if (OptionalObject) - return isa(**OptionalObject) - ? object::Archive::K_DARWIN - : (isa(**OptionalObject) - ? object::Archive::K_AIXBIG - : object::Archive::K_GNU); - - // squelch the error in case we had a non-object file - consumeError(OptionalObject.takeError()); - - // If we're adding a bitcode file to the archive, detect the Archive kind - // based on the target triple. - LLVMContext Context; - if (identify_magic(MemBufferRef.getBuffer()) == file_magic::bitcode) { - if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile( - MemBufferRef, file_magic::bitcode, &Context)) { - auto &IRObject = cast(**ObjOrErr); - return Triple(IRObject.getTargetTriple()).isOSDarwin() - ? object::Archive::K_DARWIN - : object::Archive::K_GNU; - } else { - // Squelch the error in case this was not a SymbolicFile. - consumeError(ObjOrErr.takeError()); - } - } - - return getDefaultForHost(); -} - static void performWriteOperation(ArchiveOperation Operation, object::Archive *OldArchive, std::unique_ptr OldArchiveBuf, @@ -943,14 +901,23 @@ static void performWriteOperation(ArchiveOperation Operation, case Default: if (Thin) Kind = object::Archive::K_GNU; - else if (OldArchive) + else if (OldArchive) { Kind = OldArchive->kind(); - else if (NewMembersP) - Kind = !NewMembersP->empty() ? getKindFromMember(NewMembersP->front()) - : getDefaultForHost(); + if (Kind == object::Archive::K_BSD) { + auto InferredKind = object::Archive::K_BSD; + if (NewMembersP && !NewMembersP->empty()) + InferredKind = NewMembersP->front().detectKindFromObject(); + else if (!NewMembers.empty()) + InferredKind = NewMembers.front().detectKindFromObject(); + if (InferredKind == object::Archive::K_DARWIN) + Kind = object::Archive::K_DARWIN; + } + } else if (NewMembersP) + Kind = !NewMembersP->empty() ? NewMembersP->front().detectKindFromObject() + : object::Archive::getDefaultKindForHost(); else - Kind = !NewMembers.empty() ? getKindFromMember(NewMembers.front()) - : getDefaultForHost(); + Kind = !NewMembers.empty() ? NewMembers.front().detectKindFromObject() + : object::Archive::getDefaultKindForHost(); break; case GNU: Kind = object::Archive::K_GNU; From 6746e6a37284e6d9aa83f1f2d0b76013898256cb Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Wed, 23 Mar 2022 16:29:07 -0700 Subject: [PATCH 008/908] [docs][tools] Remove old llvm-bcanalyzer options These no longer exist. A few have been added since but I'm not enough of an expert to provide a useful blurb on them outside of what you see with `--help`. Differential Revision: https://reviews.llvm.org/D122361 --- llvm/docs/CommandGuide/llvm-bcanalyzer.rst | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-bcanalyzer.rst b/llvm/docs/CommandGuide/llvm-bcanalyzer.rst index 36ad3998ea4b10..8f15e03db3b02c 100644 --- a/llvm/docs/CommandGuide/llvm-bcanalyzer.rst +++ b/llvm/docs/CommandGuide/llvm-bcanalyzer.rst @@ -28,25 +28,13 @@ OPTIONS .. program:: llvm-bcanalyzer -.. option:: -nodetails - - Causes :program:`llvm-bcanalyzer` to abbreviate its output by writing out only - a module level summary. The details for individual functions are not - displayed. - -.. option:: -dump +.. option:: --dump Causes :program:`llvm-bcanalyzer` to dump the bitcode in a human readable format. This format is significantly different from LLVM assembly and provides details about the encoding of the bitcode file. -.. option:: -verify - - Causes :program:`llvm-bcanalyzer` to verify the module produced by reading the - bitcode. This ensures that the statistics generated are based on a consistent - module. - -.. option:: -help +.. option:: --help Print a summary of command line options. From 2569f79a4fe404da9d905188333484d5d66caf7e Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Mon, 14 Mar 2022 21:50:46 -0700 Subject: [PATCH 009/908] [llvm-dis] Improve missing file error message Previously the error message didn't include the failing path, which made it hard to tell what went wrong. Differential Revision: https://reviews.llvm.org/D121665 --- llvm/test/tools/llvm-dis/errors.test | 3 +++ llvm/tools/llvm-dis/llvm-dis.cpp | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-dis/errors.test diff --git a/llvm/test/tools/llvm-dis/errors.test b/llvm/test/tools/llvm-dis/errors.test new file mode 100644 index 00000000000000..47bd655263e0ef --- /dev/null +++ b/llvm/test/tools/llvm-dis/errors.test @@ -0,0 +1,3 @@ +# RUN: not llvm-dis missing-file-path 2>&1 | FileCheck %s --check-prefix=MISSING + +# MISSING: error: missing-file-path: No such file or directory diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index 3d40da21f472c1..f0e1655b19be09 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -180,8 +180,13 @@ int main(int argc, char **argv) { } for (std::string InputFilename : InputFilenames) { - std::unique_ptr MB = ExitOnErr( - errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename))); + ErrorOr> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(InputFilename); + if (std::error_code EC = BufferOrErr.getError()) { + WithColor::error() << InputFilename << ": " << EC.message() << '\n'; + return 1; + } + std::unique_ptr MB = std::move(BufferOrErr.get()); BitcodeFileContents IF = ExitOnErr(llvm::getBitcodeFileContents(*MB)); From 44718c5ef2c557a035793fff6aec96452fba4cc8 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 12 May 2022 19:30:55 -0700 Subject: [PATCH 010/908] [WebAssembly] Use CHECK-NEXT for irreducible-cfg.mir Reviewed By: dschuff Differential Revision: https://reviews.llvm.org/D125514 --- .../CodeGen/WebAssembly/irreducible-cfg.mir | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir b/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir index af2f9b81376475..122775977a70e4 100644 --- a/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir +++ b/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir @@ -32,52 +32,52 @@ body: | %0:i32 = CONST_I32 100, implicit-def $arguments BR_IF %bb.2, %0:i32, implicit-def $arguments ; CHECK: bb.0.pred0: - ; CHECK: BR_IF %bb.2, %0, implicit-def $arguments + ; CHECK: BR_IF %bb.2, %0, implicit-def $arguments bb.1.pred1: ; predecessors: %bb.1 successors: %bb.2, %bb.3 BR_IF %bb.3, %0:i32, implicit-def $arguments ; CHECK: bb.1.pred1: - ; CHECK: BR_IF %bb.7, %0, implicit-def $arguments + ; CHECK: BR_IF %bb.7, %0, implicit-def $arguments ; This falls through to bb.2, so we don't need an additional BR here - ; CHECK-NOT: BR + ; CHECK-NOT: BR ; Routing block for entry0, when predecessor is outside the loop ; This routing block is shared between the two predecessors: pred0 and pred1. ; CHECK: bb.2: - ; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments - ; CHECK: BR %bb.6, implicit-def $arguments + ; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments + ; CHECK-NEXT: BR %bb.6, implicit-def $arguments bb.2.entry0: ; predecessors: %bb.0, %bb.1, %bb.1 successors: %bb.3 BR %bb.3, implicit-def $arguments ; CHECK: bb.3.entry0: - ; CHECK: BR %bb.4, implicit-def $arguments + ; CHECK: BR %bb.4, implicit-def $arguments ; Routing block for entry1, when predecessor is inside the loop ; CHECK: bb.4: - ; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments - ; CHECK: BR %bb.6, implicit-def $arguments + ; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments + ; CHECK-NEXT: BR %bb.6, implicit-def $arguments bb.3.entry1: ; predecessors: %bb.1, %bb.2 successors: %bb.2 BR %bb.2, implicit-def $arguments ; CHECK: bb.5.entry1: - ; CHECK: BR %bb.8, implicit-def $arguments + ; CHECK: BR %bb.8, implicit-def $arguments ; Dispatch block ; CHECK: bb.6: - ; CHECK: BR_TABLE_I32 %1, %bb.3, %bb.5, %bb.5, implicit-def $arguments + ; CHECK: BR_TABLE_I32 %1, %bb.3, %bb.5, %bb.5, implicit-def $arguments ; Routing block for entry1, when predecessor is outside the loop ; CHECK: bb.7: - ; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments - ; CHECK: BR %bb.6, implicit-def $arguments + ; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments + ; CHECK-NEXT: BR %bb.6, implicit-def $arguments ; Routing block for entry0, when predecessor is inside the loop ; CHECK: bb.8: - ; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments - ; CHECK: BR %bb.6, implicit-def $arguments + ; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments + ; CHECK-NEXT: BR %bb.6, implicit-def $arguments From cde083e010952722babb3e08b457e473e86f412d Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 12 May 2022 18:46:53 -0700 Subject: [PATCH 011/908] [WebAssembly] Fix register use-def in FixIrreducibleControlFlow FixIrreducibleControlFlow pass adds dispatch blocks with a `br_table` that has multiple predecessors and successors, because it serves as something like a traffic hub for BBs. As a result of this, there can be register uses that are not dominated by a def in every path from the entry block. For example, suppose register %a is defined in BB1 and used in BB2, and there is a single path from BB1 and BB2: ``` BB1 -> ... -> BB2 ``` After FixIrreducibleControlFlow runs, there can be a dispatch block between these two BBs: ``` BB1 -> ... -> Dispatch -> ... -> BB2 ``` And this dispatch block has multiple predecessors, now there is a path to BB2 that does not first visit BB1, and in that path %a is not dominated by a def anymore. To fix this problem, we have been adding `IMPLICIT_DEF`s to all registers in PrepareForLiveInternals pass, and then remove unnecessary ones in OptimizeLiveIntervals pass after computing `LiveIntervals`. But FixIrreducibleControlFlow pass itself ends up violating register use-def relationship, resulting in invalid code. This was OK so far because MIR verifier apparently didn't check this in validation. But @arsenm fixed this and it caught this bug in validation (https://github.com/llvm/llvm-project/issues/55249). This CL moves the `IMPLICIT_DEF` adding routine from PrepareForLiveInternals to FixIrreducibleControlFlow. We only run it when FixIrreducibleControlFlow changes the code. And then PrepareForLiveInternals doesn't do anything other than setting `TracksLiveness` property, which is a prerequisite for running `LiveIntervals` analysis, which is required by the next pass OptimizeLiveIntervals. But in our backend we don't seem to do anything that invalidates this up until OptimizeLiveIntervals, and I'm not sure why we are calling `invalidateLiveness` in ReplacePhysRegs pass, because what that pass does is to replace physical registers with virtual ones 1-to-1. I deleted the `invalidateLiveness` call there and we don't need to set that flag explicitly, which obviates all the need for PrepareForLiveInternals. (By the way, This 'Liveness' here is different from `LiveIntervals` analysis. Setting this only means BBs' live-in info is correct, all uses are dominated by defs, `kill` flag is conservatively correct, which means if there is a `kill` flag set it should be the last use. See https://github.com/llvm/llvm-project/blob/2a0837aab1489c88efb03784e34c4dc9f2e28302/llvm/include/llvm/CodeGen/MachineFunction.h#L125-L134 for details.) So this CL removes PrepareForLiveInternals pass altogether. Something similar to this was attempted by D56091 long ago but that came short of actually removing the pass, and I couldn't land it because FixIrreducibleControlFlow violated use-def relationship, which this CL fixes. This doesn't change output in any meaningful way. All test changes except `irreducible-cfg.mir` are register numbering. Also this will likely to reduce compilation time, because we have been adding `IMPLICIT_DEF` for all registers every time `-O2` is given, but now we do that only when there is irreducible control flow, which is rare. Fixes https://github.com/llvm/llvm-project/issues/55249. Reviewed By: dschuff, kripken Differential Revision: https://reviews.llvm.org/D125515 --- llvm/lib/Target/WebAssembly/CMakeLists.txt | 1 - llvm/lib/Target/WebAssembly/WebAssembly.h | 2 - .../WebAssemblyFixIrreducibleControlFlow.cpp | 49 ++++++- .../WebAssemblyOptimizeLiveIntervals.cpp | 7 +- .../WebAssemblyPrepareForLiveIntervals.cpp | 126 ------------------ .../WebAssemblyReplacePhysRegs.cpp | 3 - .../WebAssembly/WebAssemblyTargetMachine.cpp | 4 - .../CodeGen/WebAssembly/irreducible-cfg.mir | 2 + llvm/test/CodeGen/WebAssembly/reg-stackify.ll | 8 +- .../umulo-128-legalisation-lowering.ll | 8 +- llvm/test/CodeGen/WebAssembly/umulo-i64.ll | 2 +- .../llvm/lib/Target/WebAssembly/BUILD.gn | 1 - 12 files changed, 65 insertions(+), 148 deletions(-) delete mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp diff --git a/llvm/lib/Target/WebAssembly/CMakeLists.txt b/llvm/lib/Target/WebAssembly/CMakeLists.txt index e44ff54d04f96d..8653c1b4e1ed5b 100644 --- a/llvm/lib/Target/WebAssembly/CMakeLists.txt +++ b/llvm/lib/Target/WebAssembly/CMakeLists.txt @@ -43,7 +43,6 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyOptimizeLiveIntervals.cpp WebAssemblyOptimizeReturned.cpp WebAssemblyPeephole.cpp - WebAssemblyPrepareForLiveIntervals.cpp WebAssemblyRegisterInfo.cpp WebAssemblyRegColoring.cpp WebAssemblyRegNumbering.cpp diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h index d7cd6e6f6f0004..aee8f160f38d34 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.h +++ b/llvm/lib/Target/WebAssembly/WebAssembly.h @@ -40,7 +40,6 @@ FunctionPass *createWebAssemblySetP2AlignOperands(); // Late passes. FunctionPass *createWebAssemblyReplacePhysRegs(); FunctionPass *createWebAssemblyNullifyDebugValueLists(); -FunctionPass *createWebAssemblyPrepareForLiveIntervals(); FunctionPass *createWebAssemblyOptimizeLiveIntervals(); FunctionPass *createWebAssemblyMemIntrinsicResults(); FunctionPass *createWebAssemblyRegStackify(); @@ -66,7 +65,6 @@ void initializeWebAssemblyArgumentMovePass(PassRegistry &); void initializeWebAssemblySetP2AlignOperandsPass(PassRegistry &); void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &); void initializeWebAssemblyNullifyDebugValueListsPass(PassRegistry &); -void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &); void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &); void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &); void initializeWebAssemblyRegStackifyPass(PassRegistry &); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 5d09b3fa764548..80792298c4bb9b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -492,6 +492,46 @@ FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { return new WebAssemblyFixIrreducibleControlFlow(); } +// Test whether the given register has an ARGUMENT def. +static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { + for (const auto &Def : MRI.def_instructions(Reg)) + if (WebAssembly::isArgument(Def.getOpcode())) + return true; + return false; +} + +// Add a register definition with IMPLICIT_DEFs for every register to cover for +// register uses that don't have defs in every possible path. +// TODO: This is fairly heavy-handed; find a better approach. +static void addImplicitDefs(MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock &Entry = *MF.begin(); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { + Register Reg = Register::index2VirtReg(I); + + // Skip unused registers. + if (MRI.use_nodbg_empty(Reg)) + continue; + + // Skip registers that have an ARGUMENT definition. + if (hasArgumentDef(Reg, MRI)) + continue; + + BuildMI(Entry, Entry.begin(), DebugLoc(), + TII.get(WebAssembly::IMPLICIT_DEF), Reg); + } + + // Move ARGUMENT_* instructions to the top of the entry block, so that their + // liveness reflects the fact that these really are live-in values. + for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) { + if (WebAssembly::isArgument(MI.getOpcode())) { + MI.removeFromParent(); + Entry.insert(Entry.begin(), &MI); + } + } +} + bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" @@ -506,8 +546,15 @@ bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) { // We rewrote part of the function; recompute relevant things. - MF.getRegInfo().invalidateLiveness(); MF.RenumberBlocks(); + // Now we've inserted dispatch blocks, some register uses can have incoming + // paths without a def. For example, before this pass register %a was + // defined in BB1 and used in BB2, and there was only one path from BB1 and + // BB2. But if this pass inserts a dispatch block having multiple + // predecessors between the two BBs, now there are paths to BB2 without + // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding + // IMPLICIT_DEFs to all regs is one simple way to fix it. + addImplicitDefs(MF); return true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp index 6a6cac6d956fb8..d542ddb45c2e20 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -49,6 +49,11 @@ class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::TracksLiveness); + } + bool runOnMachineFunction(MachineFunction &MF) override; public: @@ -102,7 +107,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction( SplitLIs.clear(); } - // In PrepareForLiveIntervals, we conservatively inserted IMPLICIT_DEF + // In FixIrreducibleControlFlow, we conservatively inserted IMPLICIT_DEF // instructions to satisfy LiveIntervals' requirement that all uses be // dominated by defs. Now that LiveIntervals has computed which of these // defs are actually needed and which are dead, remove the dead ones. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp deleted file mode 100644 index 5682cadc1a64f2..00000000000000 --- a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ /dev/null @@ -1,126 +0,0 @@ -//===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Fix up code to meet LiveInterval's requirements. -/// -/// Some CodeGen passes don't preserve LiveInterval's requirements, because -/// they run after register allocation and it isn't important. However, -/// WebAssembly runs LiveIntervals in a late pass. This pass transforms code -/// to meet LiveIntervals' requirements; primarily, it ensures that all -/// virtual register uses have definitions (IMPLICIT_DEF definitions if -/// nothing else). -/// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" -#include "Utils/WebAssemblyUtilities.h" -#include "WebAssembly.h" -#include "WebAssemblyMachineFunctionInfo.h" -#include "WebAssemblySubtarget.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "wasm-prepare-for-live-intervals" - -namespace { -class WebAssemblyPrepareForLiveIntervals final : public MachineFunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {} - -private: - StringRef getPassName() const override { - return "WebAssembly Prepare For LiveIntervals"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; -}; -} // end anonymous namespace - -char WebAssemblyPrepareForLiveIntervals::ID = 0; -INITIALIZE_PASS(WebAssemblyPrepareForLiveIntervals, DEBUG_TYPE, - "Fix up code for LiveIntervals", false, false) - -FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() { - return new WebAssemblyPrepareForLiveIntervals(); -} - -// Test whether the given register has an ARGUMENT def. -static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { - for (const auto &Def : MRI.def_instructions(Reg)) - if (WebAssembly::isArgument(Def.getOpcode())) - return true; - return false; -} - -bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction( - MachineFunction &MF) { - LLVM_DEBUG({ - dbgs() << "********** Prepare For LiveIntervals **********\n" - << "********** Function: " << MF.getName() << '\n'; - }); - - bool Changed = false; - MachineRegisterInfo &MRI = MF.getRegInfo(); - const auto &TII = *MF.getSubtarget().getInstrInfo(); - MachineBasicBlock &Entry = *MF.begin(); - - assert(!mustPreserveAnalysisID(LiveIntervalsID) && - "LiveIntervals shouldn't be active yet!"); - - // We don't preserve SSA form. - MRI.leaveSSA(); - - // BranchFolding and perhaps other passes don't preserve IMPLICIT_DEF - // instructions. LiveIntervals requires that all paths to virtual register - // uses provide a definition. Insert IMPLICIT_DEFs in the entry block to - // conservatively satisfy this. - // - // TODO: This is fairly heavy-handed; find a better approach. - // - for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { - Register Reg = Register::index2VirtReg(I); - - // Skip unused registers. - if (MRI.use_nodbg_empty(Reg)) - continue; - - // Skip registers that have an ARGUMENT definition. - if (hasArgumentDef(Reg, MRI)) - continue; - - BuildMI(Entry, Entry.begin(), DebugLoc(), - TII.get(WebAssembly::IMPLICIT_DEF), Reg); - Changed = true; - } - - // Move ARGUMENT_* instructions to the top of the entry block, so that their - // liveness reflects the fact that these really are live-in values. - for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) { - if (WebAssembly::isArgument(MI.getOpcode())) { - MI.removeFromParent(); - Entry.insert(Entry.begin(), &MI); - } - } - - // Ok, we're now ready to run the LiveIntervals analysis again. - MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness); - - return Changed; -} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp index 71f0bd28e1bebc..1e2bee7a5c73b4 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -72,9 +72,6 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) { assert(!mustPreserveAnalysisID(LiveIntervalsID) && "LiveIntervals shouldn't be active yet!"); - // We don't preserve SSA or liveness. - MRI.leaveSSA(); - MRI.invalidateLiveness(); for (unsigned PReg = WebAssembly::NoRegister + 1; PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 30db4beff6f661..03124781ea4e60 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -63,7 +63,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() { initializeWebAssemblyArgumentMovePass(PR); initializeWebAssemblySetP2AlignOperandsPass(PR); initializeWebAssemblyReplacePhysRegsPass(PR); - initializeWebAssemblyPrepareForLiveIntervalsPass(PR); initializeWebAssemblyOptimizeLiveIntervalsPass(PR); initializeWebAssemblyMemIntrinsicResultsPass(PR); initializeWebAssemblyRegStackifyPass(PR); @@ -523,9 +522,6 @@ void WebAssemblyPassConfig::addPreEmitPass() { // Preparations and optimizations related to register stackification. if (getOptLevel() != CodeGenOpt::None) { - // LiveIntervals isn't commonly run this late. Re-establish preconditions. - addPass(createWebAssemblyPrepareForLiveIntervals()); - // Depend on LiveIntervals and perform some optimizations on it. addPass(createWebAssemblyOptimizeLiveIntervals()); diff --git a/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir b/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir index 122775977a70e4..4058de2b7a6ab4 100644 --- a/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir +++ b/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir @@ -32,6 +32,8 @@ body: | %0:i32 = CONST_I32 100, implicit-def $arguments BR_IF %bb.2, %0:i32, implicit-def $arguments ; CHECK: bb.0.pred0: + ; CHECK: %1:i32 = IMPLICIT_DEF + ; CHECK-NEXT: %0:i32 = IMPLICIT_DEF ; CHECK: BR_IF %bb.2, %0, implicit-def $arguments bb.1.pred1: diff --git a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll index baeee63e0c7c19..e06a1dcc007134 100644 --- a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll +++ b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll @@ -331,9 +331,9 @@ entry: ; NOREGS-NEXT: local.get 1{{$}} ; NOREGS-NEXT: local.get 0{{$}} ; NOREGS-NEXT: i32.mul -; NOREGS-NEXT: local.tee 1{{$}} +; NOREGS-NEXT: local.tee 0{{$}} ; NOREGS-NEXT: call use_a{{$}} -; NOREGS-NEXT: local.get 1{{$}} +; NOREGS-NEXT: local.get 0{{$}} ; NOREGS-NEXT: call use_b{{$}} ; NOREGS-NEXT: return{{$}} declare void @use_a(i32) @@ -358,8 +358,8 @@ define void @simple_multiple_use(i32 %x, i32 %y) { ; NOREGS-NEXT: local.get 1{{$}} ; NOREGS-NEXT: local.get 0{{$}} ; NOREGS-NEXT: i32.mul -; NOREGS-NEXT: local.tee 1{{$}} -; NOREGS-NEXT: local.get 1{{$}} +; NOREGS-NEXT: local.tee 0{{$}} +; NOREGS-NEXT: local.get 0{{$}} ; NOREGS-NEXT: call use_2{{$}} ; NOREGS-NEXT: return{{$}} declare void @use_2(i32, i32) diff --git a/llvm/test/CodeGen/WebAssembly/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/WebAssembly/umulo-128-legalisation-lowering.ll index 9900960496254b..093912abe75502 100644 --- a/llvm/test/CodeGen/WebAssembly/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/WebAssembly/umulo-128-legalisation-lowering.ll @@ -39,14 +39,14 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; WASM32: i32.const $push5=, 40 ; WASM32: i32.add $push6=, $pop52, $pop5 ; WASM32: i64.load $push33=, 0($pop6) -; WASM32: local.tee $push32=, 1, $pop33 +; WASM32: local.tee $push32=, 3, $pop33 ; WASM32: local.get $push53=, 5 ; WASM32: i64.load $push3=, 0($pop53) ; WASM32: local.get $push54=, 5 ; WASM32: i64.load $push2=, 16($pop54) ; WASM32: i64.add $push4=, $pop3, $pop2 ; WASM32: i64.add $push31=, $pop32, $pop4 -; WASM32: local.tee $push30=, 3, $pop31 +; WASM32: local.tee $push30=, 1, $pop31 ; WASM32: i64.store 8($pop55), $pop30 ; WASM32: local.get $push62=, 0 ; WASM32: local.get $push56=, 2 @@ -66,8 +66,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; WASM32: i64.const $push26=, 0 ; WASM32: i64.ne $push14=, $pop13, $pop26 ; WASM32: i32.or $push15=, $pop12, $pop14 -; WASM32: local.get $push61=, 3 -; WASM32: local.get $push60=, 1 +; WASM32: local.get $push61=, 1 +; WASM32: local.get $push60=, 3 ; WASM32: i64.lt_u $push16=, $pop61, $pop60 ; WASM32: i32.or $push17=, $pop15, $pop16 ; WASM32: i32.store8 16($pop62), $pop17 diff --git a/llvm/test/CodeGen/WebAssembly/umulo-i64.ll b/llvm/test/CodeGen/WebAssembly/umulo-i64.ll index 30990845aa0d91..efac514e9b5675 100644 --- a/llvm/test/CodeGen/WebAssembly/umulo-i64.ll +++ b/llvm/test/CodeGen/WebAssembly/umulo-i64.ll @@ -21,7 +21,7 @@ attributes #1 = { nounwind readnone speculatable } ; CHECK-LABEL: wut: ; CHECK: call __multi3, $2, $0, $pop0, $1, $pop7 -; CHECK: i64.load $0=, 8($2) +; CHECK: i64.load $1=, 8($2) define i1 @wut(i64, i64) { start: %2 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %0, i64 %1) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn index f993cf480ec398..07dcd6f89fa561 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn @@ -60,7 +60,6 @@ static_library("LLVMWebAssemblyCodeGen") { "WebAssemblyOptimizeLiveIntervals.cpp", "WebAssemblyOptimizeReturned.cpp", "WebAssemblyPeephole.cpp", - "WebAssemblyPrepareForLiveIntervals.cpp", "WebAssemblyRegColoring.cpp", "WebAssemblyRegNumbering.cpp", "WebAssemblyRegStackify.cpp", From eda4ef3add4d25345e0b29580776f1576040c525 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 19 May 2022 14:18:51 -0400 Subject: [PATCH 012/908] [Libomptarget] Add `leaf` attribute to `vprintf` declaration Summary: This patch adds the `leaf` attribute to the `vprintf` declaration in the OpenMP runtime. This attribute allows us to determine that the `vprintf` function will not call any functions within the translation unit, allowing us to deduce `norecurse` attributes on the caller. --- openmp/libomptarget/DeviceRTL/src/Debug.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp index 45e08fa5b16bfb..c6cf5071b95d1d 100644 --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -36,7 +36,7 @@ int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t); #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) -int32_t vprintf(const char *, void *); +int32_t __attribute__((leaf)) vprintf(const char *, void *); namespace impl { int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { return vprintf(Format, Arguments); From 0f37ba7b236252b29c33335fb00836d81671ea65 Mon Sep 17 00:00:00 2001 From: William Huang Date: Tue, 3 May 2022 22:19:01 +0000 Subject: [PATCH 013/908] [ValueTracking] Baseline tests for Power-of-2 value tracking on PHI nodes Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D124885 --- .../ValueTracking/known-power-of-two-urem.ll | 388 ++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll diff --git a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll new file mode 100644 index 00000000000000..880e14dae8778b --- /dev/null +++ b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll @@ -0,0 +1,388 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +; Check if a value can be deduced as a power of 2, allowing urem optimization. +; i.e. A urem B = A & (B - 1) if B is a power of 2. + +define i64 @known_power_of_two_urem_phi(i64 %size, i1 %cmp, i1 %cmp1) { +; CHECK-LABEL: @known_power_of_two_urem_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[COND_TRUE_TRUE:%.*]], label [[COND_TRUE_FALSE:%.*]] +; CHECK: cond.true.true: +; CHECK-NEXT: br label [[COND_TRUE_END:%.*]] +; CHECK: cond.true.false: +; CHECK-NEXT: br label [[COND_TRUE_END]] +; CHECK: cond.true.end: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 2, [[COND_TRUE_TRUE]] ], [ 4, [[COND_TRUE_FALSE]] ] +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ 4096, [[ENTRY:%.*]] ], [ [[PHI]], [[COND_TRUE_END]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI1]] +; CHECK-NEXT: ret i64 [[UREM]] +; +entry: + br i1 %cmp, label %cond.true, label %cond.end + +cond.true: + br i1 %cmp1, label %cond.true.true, label %cond.true.false + +cond.true.true: + br label %cond.true.end + +cond.true.false: + br label %cond.true.end + +cond.true.end: + %phi = phi i64 [ 2, %cond.true.true ], [ 4, %cond.true.false ] + br label %cond.end + +cond.end: + %phi1 = phi i64 [ 4096, %entry ], [ %phi, %cond.true.end ] + %urem = urem i64 %size, %phi1 + ret i64 %urem +} + +define i64 @known_power_of_two_urem_nested_expr(i64 %size, i1 %cmp, i1 %cmp1, i64 %0) { +; CHECK-LABEL: @known_power_of_two_urem_nested_expr( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 1, [[TMP0:%.*]] +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP1:%.*]], i64 2, i64 8 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[SELECT]], [[COND_FALSE]] ], [ [[TMP1]], [[COND_TRUE]] ], [ [[PHI]], [[COND_END]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[UREM]], 10 +; CHECK-NEXT: br i1 [[CMP2]], label [[COND_END]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret i64 [[UREM]] +; +entry: + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + %1 = shl nuw i64 1, %0 + br label %cond.end + +cond.false: + %select = select i1 %cmp1, i64 2, i64 8 + br label %cond.end + +cond.end: + %phi = phi i64 [ %select, %cond.false ], [ %1, %cond.true ], [ %phi, %cond.end ] + %2 = phi i64 [ %size, %cond.false ], [ %size, %cond.true ], [ %0, %cond.end ] + %urem = urem i64 %size, %phi + %cmp2 = icmp ult i64 %urem, 10 + br i1 %cmp2, label %cond.end, label %end + +end: + ret i64 %urem +} + +define i64 @known_power_of_two_urem_negative(i64 %size, i1 %cmp, i64 %0) { +; urem is not replaced if not all operands are power of 2. +; +; CHECK-LABEL: @known_power_of_two_urem_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 4, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[COND_TRUE]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: ret i64 [[UREM]] +; +entry: + br i1 %cmp, label %cond.true, label %cond.end + +cond.true: + br label %cond.end + +cond.end: + %phi = phi i64 [ 4, %entry ], [ %0, %cond.true ] + %urem = urem i64 %size, %phi + ret i64 %urem +} + +define i64 @known_power_of_two_urem_loop_mul(i64 %size, i64 %a) { +; CHECK-LABEL: @known_power_of_two_urem_loop_mul( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = shl nuw i64 1, [[A:%.*]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]] +; CHECK-NEXT: [[I]] = shl nuw i64 [[PHI]], 2 +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 25000000 +; CHECK-NEXT: br i1 [[ICMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret i64 [[SUM]] +; +entry: + %start = shl nuw i64 1, %a + br label %for.body + +for.body: + %phi = phi i64 [ %start, %entry ], [ %i, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %urem = urem i64 %size, %phi + %add = add nuw i64 %sum, %urem + %i = mul nuw i64 %phi, 4 + %icmp = icmp ult i64 %i, 100000000 + br i1 %icmp, label %for.body, label %for.end + +for.end: + %r = phi i64 [ %sum, %for.body ] + ret i64 %r +} + +define i64 @known_power_of_two_urem_loop_mul_negative(i64 %size, i64 %a) { +; Cannot deduce induction variable is a power of 2 if it is multiplied by 3. +; +; CHECK-LABEL: @known_power_of_two_urem_loop_mul_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = shl nuw i64 1, [[A:%.*]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]] +; CHECK-NEXT: [[I]] = mul nuw i64 [[PHI]], 3 +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[I]], 100000000 +; CHECK-NEXT: br i1 [[ICMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret i64 [[SUM]] +; +entry: + %start = shl nuw i64 1, %a + br label %for.body + +for.body: + %phi = phi i64 [ %start, %entry ], [ %i, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %urem = urem i64 %size, %phi + %add = add nuw i64 %sum, %urem + %i = mul nuw i64 %phi, 3 + %icmp = icmp ult i64 %i, 100000000 + br i1 %icmp, label %for.body, label %for.end + +for.end: + %r = phi i64 [ %sum, %for.body ] + ret i64 %r +} + +define i64 @known_power_of_two_urem_loop_shl(i64 %size, i64 %a) { +; CHECK-LABEL: @known_power_of_two_urem_loop_shl( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = shl nuw i64 1, [[A:%.*]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]] +; CHECK-NEXT: [[I]] = shl nuw i64 [[PHI]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 50000000 +; CHECK-NEXT: br i1 [[ICMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret i64 [[SUM]] +; +entry: + %start = shl nuw i64 1, %a + br label %for.body + +for.body: + %phi = phi i64 [ %start, %entry ], [ %i, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %urem = urem i64 %size, %phi + %add = add nuw i64 %sum, %urem + %i = shl nuw i64 %phi, 1 + %icmp = icmp ult i64 %i, 100000000 + br i1 %icmp, label %for.body, label %for.end + +for.end: + %r = phi i64 [ %sum, %for.body ] + ret i64 %r +} + +define i64 @known_power_of_two_urem_loop_lshr(i64 %size, i64 %a) { +; CHECK-LABEL: @known_power_of_two_urem_loop_lshr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = shl nuw i64 1, [[A:%.*]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]] +; CHECK-NEXT: [[I]] = lshr i64 [[PHI]], 1 +; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 2 +; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret i64 [[SUM]] +; +entry: + %start = shl nuw i64 1, %a + br label %for.body + +for.body: + %phi = phi i64 [ %start, %entry ], [ %i, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %urem = urem i64 %size, %phi + %add = add nuw i64 %sum, %urem + %i = lshr i64 %phi, 1 + %icmp = icmp ugt i64 %i, 0 + br i1 %icmp, label %for.body, label %for.end + +for.end: + %r = phi i64 [ %sum, %for.body ] + ret i64 %r +} + + +define i64 @known_power_of_two_urem_loop_ashr(i64 %size, i64 %a) { +; CHECK-LABEL: @known_power_of_two_urem_loop_ashr( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 4096, [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[SUM]], [[UREM]] +; CHECK-NEXT: [[I]] = lshr i64 [[PHI]], [[A:%.*]] +; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0 +; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret i64 [[SUM]] +; +entry: + br label %for.body + +for.body: + %phi = phi i64 [ 4096, %entry ], [ %i, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %urem = urem i64 %size, %phi + %add = add nsw i64 %sum, %urem + %i = ashr i64 %phi, %a + %icmp = icmp ugt i64 %i, 0 + br i1 %icmp, label %for.body, label %for.end + +for.end: + %r = phi i64 [ %sum, %for.body ] + ret i64 %r +} + +define i64 @known_power_of_two_urem_loop_ashr_negative(i64 %size, i64 %a) { +; Cannot deduce induction variable is a power of 2 for ashr if its starting +; value is equal to sign bit +; +; CHECK-LABEL: @known_power_of_two_urem_loop_ashr_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ -9223372036854775808, [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[UREM]] +; CHECK-NEXT: [[I]] = ashr i64 [[PHI]], [[A:%.*]] +; CHECK-NEXT: br i1 true, label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret i64 [[SUM]] +; +entry: + br label %for.body + +for.body: + %phi = phi i64 [ u0x8000000000000000, %entry ], [ %i, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %urem = urem i64 %size, %phi + %add = add nsw i64 %sum, %urem + %i = ashr i64 %phi, %a + %icmp = icmp ugt i64 %i, 0 + br i1 %icmp, label %for.body, label %for.end + +for.end: + %r = phi i64 [ %sum, %for.body ] + ret i64 %r +} + +define i64 @known_power_of_two_urem_loop_ashr_negative_2(i64 %size, i64 %a) { +; Cannot deduce induction variable is a power of 2 for ashr if its starting +; value may equal to sign bit. +; +; CHECK-LABEL: @known_power_of_two_urem_loop_ashr_negative_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = shl nuw i64 1, [[A:%.*]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[UREM]] +; CHECK-NEXT: [[I]] = ashr i64 [[PHI]], 2 +; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 4 +; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret i64 [[SUM]] +; +entry: + %start = shl nuw i64 1, %a + br label %for.body + +for.body: + %phi = phi i64 [ %start, %entry ], [ %i, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %urem = urem i64 %size, %phi + %add = add nsw i64 %sum, %urem + %i = ashr i64 %phi, 2 + %icmp = icmp ugt i64 %i, 0 + br i1 %icmp, label %for.body, label %for.end + +for.end: + %r = phi i64 [ %sum, %for.body ] + ret i64 %r +} + +define i64 @known_power_of_two_urem_loop_negative(i64 %size, i64 %a) { +; Cannot deduce induction variable is a power of 2 if the recurrence is not one +; of the valid arithmetic operations. +; +; CHECK-LABEL: @known_power_of_two_urem_loop_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = shl nuw i64 1, [[A:%.*]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]] +; CHECK-NEXT: [[I]] = add nuw i64 [[PHI]], 1 +; CHECK-NEXT: br i1 true, label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret i64 [[SUM]] +; +entry: + %start = shl nuw i64 1, %a + br label %for.body + +for.body: + %phi = phi i64 [ %start, %entry ], [ %i, %for.body ] + %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] + %urem = urem i64 %size, %phi + %add = add nuw i64 %sum, %urem + %i = add nuw i64 %phi, 1 + %icmp = icmp ugt i64 %i, 0 + br i1 %icmp, label %for.body, label %for.end + +for.end: + %r = phi i64 [ %sum, %for.body ] + ret i64 %r +} From a1cf20f012baef53ad734d4489d3a967d7ee4f80 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Thu, 19 May 2022 11:29:37 -0700 Subject: [PATCH 014/908] [llvm-dis][test] Fix error case on Windows The `N` case in the error differs across platforms. --- llvm/test/tools/llvm-dis/errors.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/tools/llvm-dis/errors.test b/llvm/test/tools/llvm-dis/errors.test index 47bd655263e0ef..27c37b2186a32c 100644 --- a/llvm/test/tools/llvm-dis/errors.test +++ b/llvm/test/tools/llvm-dis/errors.test @@ -1,3 +1,3 @@ -# RUN: not llvm-dis missing-file-path 2>&1 | FileCheck %s --check-prefix=MISSING +# RUN: not llvm-dis missing-file-path 2>&1 | FileCheck %s --check-prefix=MISSING --ignore-case -# MISSING: error: missing-file-path: No such file or directory +# MISSING: error: missing-file-path: no such file or directory From d5c130f17e503e128b8a413c2ce0e522987d2a16 Mon Sep 17 00:00:00 2001 From: William Huang Date: Tue, 10 May 2022 19:47:10 +0000 Subject: [PATCH 015/908] [ValueTracking] Added support to deduce PHI Nodes values being a power of 2 Add Value Tracking support to deduce induction variable being a power of 2, allowing urem optimizations Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D125332 --- llvm/lib/Analysis/ValueTracking.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index a32633589c94a6..60dbfaa64460c8 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2128,6 +2128,25 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, } } + // A PHI node is power of two if all incoming values are power of two. + if (const PHINode *PN = dyn_cast(V)) { + Query RecQ = Q; + + // Recursively check all incoming values. Limit recursion to 2 levels, so + // that search complexity is limited to number of operands^2. + unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); + return llvm::all_of(PN->operands(), [&](const Use &U) { + // Value is power of 2 if it is coming from PHI node itself by induction. + if (U.get() == PN) + return true; + + // Change the context instruction to the incoming block where it is + // evaluated. + RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); + return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); + }); + } + // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not // copying a sign bit (sdiv int_min, 2). From ac2f0a6f1d4b3f5ff61dac33d9aba395d79b6a3d Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Thu, 19 May 2022 12:25:01 +0200 Subject: [PATCH 016/908] [Office Hours] add initial guidance for hosts This includes adding guidance to announce an office hours session on the Discord channel and/or IRC, as discussed at the office hours round table at EuroLLVM 2022, see https://discourse.llvm.org/t/office-hours-eurollvm-round-table-summary/62480. Fixes #55423 --- llvm/docs/GettingInvolved.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index bdab9c42fbb3fd..33537159038aba 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -281,6 +281,32 @@ don't find anyone present, chances are they happen to be off that day. - `GoogleMeet `__ - English, Russian, German (not fluently) + +Guidance for office hours hosts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* If you're interested in becoming an office hours host, please add your + information to the list above. +* When starting an office hours session, consider typing something like "*Hi, + I'm available for chats in the next half hour at* video chat URL. *I'm + looking forward to having conversations on the video chat or here.*" on the + LLVM chat channels that you are already on. These could include: + + * the `#office-hours Discord channel + `__. + * :ref:`IRC` + + Doing this can help: + * overcome potential anxiety to call in for a first time, + * people who prefer to first exchange a few messages through text chat + before dialing in, and + * remind the wider community that office hours do exist. +* If you decide to no longer host office hours, please do remove your entry + from the list above. + + +.. _IRC: + IRC --- From 6107cdc9295d339304ce3e88e0b038db661d2bcb Mon Sep 17 00:00:00 2001 From: William Huang Date: Thu, 12 May 2022 00:32:03 +0000 Subject: [PATCH 017/908] [InstCombine] NEW Baseline tests for InstCombine optimization to merge GEP instructions with constant indices Splitted the merge constant-indexed GEP optimization into two smaller transformations: 1. Merging GEP of GEP if both are constant-indexed. 2. Swapping constant indexed GEP in a chain of (non-constant) GEP to the end, so that 1 can be applied repeatedly. There is existing code to partially handle transformation 1, but it only deals with limited cases Unit tests are breaking down into two parts for the 2 transformations. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D125438 --- .../gep-canonicalize-constant-indices.ll | 127 ++++++++ .../InstCombine/gep-merge-constant-indices.ll | 285 ++++++++++++++++++ 2 files changed, 412 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll create mode 100644 llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll new file mode 100644 index 00000000000000..65ef7bda280356 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s + +; Constant-indexed GEP instructions in a chain of GEP instructions should be +; swapped to the end whenever such transformation is valid. This allows them to +; be merged. + +declare void @use(i1) + + +; The constant-indexed GEP instruction should be swapped to the end, even +; without merging. +; result = (((i32*) p + a) + b) + 1 +define ptr @basic(ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: @basic( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[B:%.*]] +; CHECK-NEXT: ret ptr [[TMP3]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 %a + %3 = getelementptr inbounds i32, ptr %2, i64 %b + ret ptr %3 +} + +; GEP with the last index being a constant should also be swapped. +define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: @partialConstant1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1 + %2 = getelementptr inbounds i32, ptr %p, i64 %b + ret ptr %2 +} + +; Negative test. GEP should not be swapped if the last index is not a constant. +define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: @partialConstant2( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds [4 x i32], ptr %p, i64 1, i64 %a + %2 = getelementptr inbounds i32, ptr %p, i64 %b + ret ptr %2 +} + +; Constant-indexed GEP are merged after swawpping. +; result = ((i32*) p + a) + 3 +define ptr @merge(ptr %p, i64 %a) { +; CHECK-LABEL: @merge( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 +; CHECK-NEXT: ret ptr [[TMP3]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 %a + %3 = getelementptr inbounds i32, ptr %2, i64 2 + ret ptr %3 +} + +; Multiple constant-indexed GEP. Note that the first two cannot be merged at +; first, but after the second and third are merged, the result can be merged +; with the first one on the next pass. +; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 9 +define ptr @nested(ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: @nested( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <3 x i32>, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, ptr [[TMP5]], i64 1 +; CHECK-NEXT: ret ptr [[TMP6]] +; + %1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1 + %2 = getelementptr inbounds i8, ptr %1, i64 %a + %3 = mul i64 %a, %b + %4 = getelementptr inbounds <5 x i32>, ptr %2, i64 4 + %5 = getelementptr inbounds i16, ptr %4, i64 %3 + %6 = getelementptr inbounds <4 x i32>, ptr %5, i64 1 + ret ptr %6 +} + +; It is valid to swap if the source operand of the first GEP has multiple uses. +define ptr @multipleUses1(ptr %p) { +; CHECK-LABEL: @multipleUses1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: ret ptr [[TMP3]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = ptrtoint ptr %p to i64 + %3 = getelementptr inbounds i32, ptr %1, i64 %2 + ret ptr %3 +} + +; It is valid to swap if the second GEP has multiple uses. +define ptr @multipleUses2(ptr %p, i64 %a) { +; CHECK-LABEL: @multipleUses2( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: call void @use(ptr nonnull [[TMP2]]) +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 %a + call void @use(ptr %2) + ret ptr %2 +} + +; Negative test. It is not valid to swap if the first GEP has multiple uses. +define ptr @multipleUses3(ptr %p) { +; CHECK-LABEL: @multipleUses3( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: ret ptr [[TMP3]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = ptrtoint ptr %1 to i64 + %3 = getelementptr inbounds i32, ptr %1, i64 %2 + ret ptr %3 +} diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll new file mode 100644 index 00000000000000..af79faf265febd --- /dev/null +++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll @@ -0,0 +1,285 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s + +; Test merging GEP of GEP with constant indices. + +target datalayout = "i24:8:8" + +%struct.A = type { [123 x i8], i32 } +%struct.B = type { i8, [3 x i16], %struct.A, float } +%struct.C = type { i8, i32, i32 } + +; result = (i32*) p + 3 +define ptr @mergeBasic(ptr %p) { +; CHECK-LABEL: @mergeBasic( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3 +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 2 + ret ptr %2 +} + +; First GEP is merged into the second. +; result = (i8*) p + 10 +define ptr @mergeDifferentTypes(ptr %p) { +; CHECK-LABEL: @mergeDifferentTypes( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10 +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds i8, ptr %p, i64 2 + %2 = getelementptr inbounds i64, ptr %1, i64 1 + ret ptr %2 +} + +; Second GEP is merged into the first. +; result = (i8*) p + 10 +define ptr @mergeReverse(ptr %p) { +; CHECK-LABEL: @mergeReverse( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i64, ptr %p, i64 1 + %2 = getelementptr inbounds i8, ptr %1, i64 2 + ret ptr %2 +} + +; Offsets of first and last GEP cancel out. +; result = p +define ptr @zeroSum(ptr %p) { +; CHECK-LABEL: @zeroSum( +; CHECK-NEXT: ret ptr [[P:%.*]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds i8, ptr %1, i64 -4 + ret ptr %2 +} + +; result = (i8*) (([20 x i8]*) p + 1) + 17 +define ptr @array1(ptr %p) { +; CHECK-LABEL: @array1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 17 +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds [20 x i8], ptr %p, i64 1, i64 1 + %2 = getelementptr inbounds i64, ptr %1, i64 2 + ret ptr %2 +} + +; result = (i8*) p + 14 +define ptr @array2(ptr %p, i64 %a) { +; CHECK-LABEL: @array2( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [7 x i32], ptr [[P:%.*]], i64 0, i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds [7 x i32], ptr %p, i64 0, i64 3 + %2 = getelementptr inbounds i8, ptr %1, i64 2 + ret ptr %2 +} + +; result = (i8*) (([3 x i8]*) p + 6) + 2 +define ptr @array3(ptr %p) { +; CHECK-LABEL: @array3( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x i8], ptr [[TMP1]], i64 1, i64 1 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i64, ptr %p, i64 2 + %2 = getelementptr inbounds [3 x i8], ptr %1, i64 1, i64 1 + ret ptr %2 +} + +; result = (struct.C*) p + 3 +define ptr @struct1(ptr %p) { +; CHECK-LABEL: @struct1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i64, ptr %p, i64 3 + %2 = getelementptr inbounds %struct.C, ptr %1, i64 1 + ret ptr %2 +} + +; result = &((struct.A*) p - 1).member1 +define ptr @struct2(ptr %p) { +; CHECK-LABEL: @struct2( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[P:%.*]], i64 -1, i32 1 +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds %struct.A, ptr %p, i64 0, i32 1 + %2 = getelementptr inbounds i8, ptr %1, i64 -128 + ret ptr %2 +} + +; result = (i8*) p - 4 +define ptr @struct3(ptr %p, i64 %a) { +; CHECK-LABEL: @struct3( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -4 +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds i8, ptr %p, i64 -128 + %2 = getelementptr inbounds %struct.A, ptr %1, i64 0, i32 1 + ret ptr %2 +} +; result = ((struct.C*) p + 1).member2 +define ptr @struct4(ptr %p, i64 %a) { +; CHECK-LABEL: @struct4( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i64, ptr %p, i64 1 + %2 = getelementptr inbounds %struct.C, ptr %1, i64 1 + ret ptr %2 +} + +; result = (i8*) &((struct.B) p)[0].member2.member0 + 7 +define ptr @structStruct(ptr %p) { +; CHECK-LABEL: @structStruct( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 7 +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds %struct.B, ptr %p, i64 0, i32 2, i32 0, i64 3 + %2 = getelementptr inbounds %struct.A, ptr %1, i64 0, i32 0, i64 4 + ret ptr %2 +} + +; First GEP offset is not divisible by last GEP's source element size, but first +; GEP points to an array such that the last GEP offset is divisible by the +; array's element size, so the first GEP can be rewritten with an extra index. +; result = (i16*) &((struct.B*) p)[0].member1 + 2 +define ptr @appendIndex(ptr %p) { +; CHECK-LABEL: @appendIndex( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 1, i64 2 +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = getelementptr inbounds %struct.B, ptr %p, i64 0, i32 1 + %2 = getelementptr inbounds i32, ptr %1, i64 1 + ret ptr %2 +} + +; result = (i8*) &((struct.A*) &((struct.B*) p)[0].member2).member0 + 2 +define ptr @appendIndexReverse(ptr %p) { +; CHECK-LABEL: @appendIndexReverse( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP1]], i64 0, i32 1 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i64, ptr %p, i64 1 + %2 = getelementptr inbounds %struct.B, ptr %1, i64 0, i32 1 + ret ptr %2 +} + +; Constant-indexed GEP can be merged if the sum of offsets aliases a member's +; address of one of the GEP instructions. +; result = &((struct.C*) p + 2).member1 +define ptr @structMemberAliasing(ptr %p, i64 %a) { +; CHECK-LABEL: @structMemberAliasing( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1, i32 2 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i64, ptr %p, i64 1 + %2 = getelementptr inbounds %struct.C, ptr %1, i64 1, i32 2 + ret ptr %2 +} + +; Negative test. Offset of either GEP is not divisible by the other's size. +; Here i24 is 8-bit aligned. +define ptr @notDivisible(ptr %p) { +; CHECK-LABEL: @notDivisible( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i24, ptr %p, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 1 + ret ptr %2 +} + +; Two GEP instructions can be merged if one is constant-indexed and the other +; is a sequential type with a constant last index, and the constant offset is +; divisible by the sequential type size. +; result = (i32*) (([4 x i32]*) p + a) + 3 +define ptr @partialConstant1(ptr %p, i64 %a) { +; CHECK-LABEL: @partialConstant1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 2 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds [4 x i32], ptr %1, i64 %a, i64 2 + ret ptr %2 +} + +; Negative test. Similar to above, but two GEP should not be merged if the +; constant offset is not divisible. +define ptr @partialConstant2(ptr %p, i64 %a) { +; CHECK-LABEL: @partialConstant2( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[A:%.*]], i64 2 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds [4 x i64], ptr %1, i64 %a, i64 2 + ret ptr %2 +} + +; Negative test. Similar to above, but two GEP should not be merged if there is +; another use of the first GEP by the second GEP. +define ptr @partialConstant3(ptr %p) { +; CHECK-LABEL: @partialConstant3( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[TMP2]], i64 2 +; CHECK-NEXT: ret ptr [[TMP3]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = ptrtoint ptr %1 to i64 + %3 = getelementptr inbounds [4 x i64], ptr %1, i64 %2, i64 2 + ret ptr %3 +} + +; Two GEP instructions can be merged if one is constant-indexed and the other +; is an aggregate type with a constant last index, and the resulting pointer +; address by adding the constant offset aliases the address of another member. +; result = &((struct.C*) p + a).member2 +define ptr @partialConstantMemberAliasing1(ptr %p, i64 %a) { +; CHECK-LABEL: @partialConstantMemberAliasing1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 1 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds %struct.C, ptr %1, i64 %a, i32 1 + ret ptr %2 +} + +; Negative test. Similar to above, but the new address does not alias the +; address of another member. +define ptr @partialConstantMemberAliasing2(ptr %p, i64 %a) { +; CHECK-LABEL: @partialConstantMemberAliasing2( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 1 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr %p, i64 1 + %2 = getelementptr inbounds %struct.C, ptr %1, i64 %a, i32 1 + ret ptr %2 +} + +; Negative test. Similar to above, but the new address falls outside the address +; range of the object currently pointed by the non-constant GEP. +define ptr @partialConstantMemberAliasing3(ptr %p, i64 %a) { +; CHECK-LABEL: @partialConstantMemberAliasing3( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 2 +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds %struct.C, ptr %1, i64 %a, i32 2 + ret ptr %2 +} From 80aab0312acea2231b9aec4ba2ceb5951ae8016a Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 19 May 2022 19:53:21 +0100 Subject: [PATCH 018/908] [ARM] Cost modelling for scalar fptoi_sat Similar to D124357, this adds some cost modelling for fptoi_sat for Arm targets. Where VFP2 is available (and FP64/FP16 for the relevant types), the operations are legal as the Arm instructions naturally saturate. Otherwise they will need an extra smin/smax clamp, similar to AArch64. Differential Revision: https://reviews.llvm.org/D125665 --- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 33 +++++ llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll | 122 +++++++++--------- 2 files changed, 94 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 9d376ade084d35..874216303b7235 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1765,6 +1765,39 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return LT.first * ST->getMVEVectorCostFactor(CostKind); break; } + case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: { + if (ICA.getArgTypes().empty()) + break; + bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat; + auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]); + EVT MTy = TLI->getValueType(DL, ICA.getReturnType()); + // Check for the legal types, with the corect subtarget features. + if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) || + (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) || + (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32)) + return LT.first; + + // Otherwise we use a legal convert followed by a min+max + if (((ST->hasVFP2Base() && LT.second == MVT::f32) || + (ST->hasFP64() && LT.second == MVT::f64) || + (ST->hasFullFP16() && LT.second == MVT::f16)) && + LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) { + Type *LegalTy = Type::getIntNTy(ICA.getReturnType()->getContext(), + LT.second.getScalarSizeInBits()); + InstructionCost Cost = 1; + IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin + : Intrinsic::umin, + LegalTy, {LegalTy, LegalTy}); + Cost += getIntrinsicInstrCost(Attrs1, CostKind); + IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax + : Intrinsic::umax, + LegalTy, {LegalTy, LegalTy}); + Cost += getIntrinsicInstrCost(Attrs2, CostKind); + return LT.first * Cost; + } + break; + } } return BaseT::getIntrinsicInstrCost(ICA, CostKind); diff --git a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll index 72d4c2819d059c..8a7784c35599fc 100644 --- a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt -passes='print' 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt -passes='print' 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP +; RUN: opt -passes='print' 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 < %s | FileCheck %s --check-prefix=CHECK-MVEFP define void @casts() { ; CHECK-MVE-LABEL: 'casts' @@ -108,25 +108,25 @@ define void @casts() { ; ; CHECK-MVEFP-LABEL: 'casts' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef) @@ -137,16 +137,16 @@ define void @casts() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef) @@ -157,16 +157,16 @@ define void @casts() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -177,16 +177,16 @@ define void @casts() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1104 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 906 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 794 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 828 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 794 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 828 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 794 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 828 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1336 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1296 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 762 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 650 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 650 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 650 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1192 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 290 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -197,16 +197,16 @@ define void @casts() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4488 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4400 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2986 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2762 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2828 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2762 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2828 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2760 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2824 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2752 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4848 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4768 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2698 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2474 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2540 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2474 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2540 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2472 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2536 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2464 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4560 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4480 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) @@ -378,13 +378,13 @@ define void @fp16() { ; ; CHECK-MVEFP-LABEL: 'fp16' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) From f613e6d19d293d4eff9885b1fb429162bc6700cd Mon Sep 17 00:00:00 2001 From: Jonathan Peyton Date: Thu, 19 May 2022 13:57:02 -0500 Subject: [PATCH 019/908] [OpenMP][libomp] Fix accidental removal of else for core attributes --- openmp/runtime/src/kmp_settings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index c003294876bb5a..49b23a866a5831 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -5022,7 +5022,7 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value, attr.set_core_type(KMP_HW_CORE_TYPE_CORE); } else if (__kmp_str_match("intel_atom", -1, attr_ptr + 1)) { attr.set_core_type(KMP_HW_CORE_TYPE_ATOM); - } + } else #endif if (__kmp_str_match("eff", 3, attr_ptr + 1)) { const char *number = attr_ptr + 1; From 304a5a7a14dd26ae93194b336fef9a1213fa44b5 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 19 May 2022 15:05:30 -0400 Subject: [PATCH 020/908] Revert "[ValueTracking] Added support to deduce PHI Nodes values being a power of 2" This reverts commit d5c130f17e503e128b8a413c2ce0e522987d2a16. Breaks tests, see https://reviews.llvm.org/D125332#3525819 --- llvm/lib/Analysis/ValueTracking.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 60dbfaa64460c8..a32633589c94a6 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2128,25 +2128,6 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, } } - // A PHI node is power of two if all incoming values are power of two. - if (const PHINode *PN = dyn_cast(V)) { - Query RecQ = Q; - - // Recursively check all incoming values. Limit recursion to 2 levels, so - // that search complexity is limited to number of operands^2. - unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); - return llvm::all_of(PN->operands(), [&](const Use &U) { - // Value is power of 2 if it is coming from PHI node itself by induction. - if (U.get() == PN) - return true; - - // Change the context instruction to the incoming block where it is - // evaluated. - RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); - return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); - }); - } - // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not // copying a sign bit (sdiv int_min, 2). From 505ddb6b745038a00b27999e98f2f2c8ff5e43e9 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Wed, 11 May 2022 17:12:03 -0700 Subject: [PATCH 021/908] [lld][test] Delete empty Unit test directory This became empty when we removed the legacy macho lld. This results in a warning when running `check-lld`. We can revert this in the future if we want unit tests. Differential Revision: https://reviews.llvm.org/D125436 --- lld/test/CMakeLists.txt | 6 ------ lld/test/Unit/lit.cfg.py | 37 -------------------------------- lld/test/Unit/lit.site.cfg.py.in | 17 --------------- 3 files changed, 60 deletions(-) delete mode 100644 lld/test/Unit/lit.cfg.py delete mode 100644 lld/test/Unit/lit.site.cfg.py.in diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt index 9c4dd2b277349c..e3562444d7bcda 100644 --- a/lld/test/CMakeLists.txt +++ b/lld/test/CMakeLists.txt @@ -14,12 +14,6 @@ configure_lit_site_cfg( MAIN_CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py ) -configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.cfg.py - ) set(LLD_TEST_DEPS lld) if (NOT LLD_BUILT_STANDALONE) diff --git a/lld/test/Unit/lit.cfg.py b/lld/test/Unit/lit.cfg.py deleted file mode 100644 index dac9f5dc6c33ff..00000000000000 --- a/lld/test/Unit/lit.cfg.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- Python -*- - -# Configuration file for the 'lit' test runner. - -import os - -import lit.formats - -# name: The name of this test suite. -config.name = 'lld-Unit' - -# suffixes: A list of file extensions to treat as test files. -config.suffixes = [] - -# test_source_root: The root path where unit test binaries are located. -# test_exec_root: The root path where tests should be run. -config.test_source_root = os.path.join(config.lld_obj_root, 'unittests') -config.test_exec_root = config.test_source_root - - -# Tweak the PATH to include the tools dir. -path = os.path.pathsep.join((config.lld_tools_dir, config.llvm_tools_dir, config.environment['PATH'])) -config.environment['PATH'] = path - -path = os.path.pathsep.join((config.lld_libs_dir, config.llvm_libs_dir, - config.environment.get('LD_LIBRARY_PATH',''))) -config.environment['LD_LIBRARY_PATH'] = path - -# Propagate LLVM_SRC_ROOT into the environment. -config.environment['LLVM_SRC_ROOT'] = config.llvm_src_root - -# Propagate PYTHON_EXECUTABLE into the environment -config.environment['PYTHON_EXECUTABLE'] = sys.executable - - -# testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.GoogleTest(config.llvm_build_mode, 'Tests') diff --git a/lld/test/Unit/lit.site.cfg.py.in b/lld/test/Unit/lit.site.cfg.py.in deleted file mode 100644 index 70ff63a4d9223c..00000000000000 --- a/lld/test/Unit/lit.site.cfg.py.in +++ /dev/null @@ -1,17 +0,0 @@ -@LIT_SITE_CFG_IN_HEADER@ - -config.llvm_src_root = "@LLVM_SOURCE_DIR@" -config.llvm_obj_root = "@LLVM_BINARY_DIR@" -config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") -config.llvm_libs_dir = lit_config.substitute("@LLVM_LIBS_DIR@") -config.llvm_build_mode = lit_config.substitute("@LLVM_BUILD_MODE@") -config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" -config.lld_obj_root = "@LLD_BINARY_DIR@" -config.lld_src_root = "@LLD_SOURCE_DIR@" -config.lld_libs_dir = lit_config.substitute("@CURRENT_LIBS_DIR@") -config.lld_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@") -config.target_triple = "@LLVM_TARGET_TRIPLE@" -config.python_executable = "@Python3_EXECUTABLE@" - -# Let the main config do the real work. -lit_config.load_config(config, "@LLD_SOURCE_DIR@/test/Unit/lit.cfg.py") From e0b98902a2dfe729e239c07abec4f67ad8d10708 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 19 May 2022 15:18:56 -0400 Subject: [PATCH 022/908] [gn build] (manually) port 505ddb6b7450 (remove Unit/lit.site.cfg.py) --- llvm/utils/gn/secondary/lld/test/BUILD.gn | 82 ++++++++----------- .../gn/secondary/llvm/utils/llvm-lit/BUILD.gn | 1 - 2 files changed, 32 insertions(+), 51 deletions(-) diff --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn index bee3aab5ef05ef..569880a3c3d8da 100644 --- a/llvm/utils/gn/secondary/lld/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn @@ -6,45 +6,35 @@ import("//llvm/utils/gn/build/libs/zlib/enable.gni") import("//llvm/utils/gn/build/write_cmake_config.gni") import("lld_lit_site_cfg_files.gni") -# The bits common to writing lit.site.cfg.py.in and Unit/lit.site.cfg.py.in. -template("write_lit_cfg") { - write_cmake_config(target_name) { - input = invoker.input - output = invoker.output - values = [ - "LIT_SITE_CFG_IN_HEADER=## Autogenerated from $input, do not edit", - "LLD_BINARY_DIR=" + - rebase_path(get_label_info("//lld", "target_out_dir")), - "CURRENT_LIBS_DIR=", # FIXME: for shared builds only (?) - "CURRENT_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), - "LLD_SOURCE_DIR=" + rebase_path("//lld"), - "LLVM_BINARY_DIR=" + - rebase_path(get_label_info("//llvm", "target_out_dir")), - "LLVM_LIBS_DIR=", # needed only for shared builds - "LLVM_LIT_TOOLS_DIR=", # Intentionally empty, matches cmake build. - "LLVM_SOURCE_DIR=" + rebase_path("//llvm"), - "LLVM_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), - "Python3_EXECUTABLE=$python_path", - "LLVM_TARGET_TRIPLE=$llvm_target_triple", - ] - values += invoker.extra_values - } -} - -write_lit_cfg("lit_site_cfg") { +write_cmake_config("lit_site_cfg") { # Fully-qualified instead of relative for LIT_SITE_CFG_IN_HEADER. input = "//lld/test/lit.site.cfg.py.in" output = lld_lit_site_cfg_file dir = get_path_info(output, "dir") - extra_values = [ + values = [ + "LIT_SITE_CFG_IN_HEADER=## Autogenerated from $input, do not edit", + "LLD_BINARY_DIR=" + + rebase_path(get_label_info("//lld", "target_out_dir")), + "CURRENT_LIBS_DIR=", # FIXME: for shared builds only (?) + "CURRENT_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), + "LLD_SOURCE_DIR=" + rebase_path("//lld"), + "LLVM_BINARY_DIR=" + + rebase_path(get_label_info("//llvm", "target_out_dir")), + "LLVM_LIBS_DIR=", # needed only for shared builds + "LLVM_LIT_TOOLS_DIR=", # Intentionally empty, matches cmake build. + "LLVM_SOURCE_DIR=" + rebase_path("//llvm"), + "LLVM_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), + "Python3_EXECUTABLE=$python_path", + "LLVM_TARGET_TRIPLE=$llvm_target_triple", + "LLD_DEFAULT_LD_LLD_IS_MINGW=0", "LLVM_BUILD_EXAMPLES=0", "LLVM_BYE_LINK_INTO_TOOLS=0", ] if (host_os == "win") { - extra_values += [ + values += [ "LLVM_LIT_ERRC_MESSAGES=no such file or directory;is a directory;" + "invalid argument;permission denied", "LLVM_ENABLE_PLUGINS=0", @@ -53,7 +43,7 @@ write_lit_cfg("lit_site_cfg") { "SHLIBDIR=" + rebase_path("$root_out_dir/bin", dir), ] } else { - extra_values += [ + values += [ "LLVM_LIT_ERRC_MESSAGES=", "LLVM_ENABLE_PLUGINS=1", "SHLIBDIR=" + rebase_path("$root_out_dir/lib", dir), @@ -61,59 +51,51 @@ write_lit_cfg("lit_site_cfg") { } if (host_os == "mac") { - extra_values += [ "SHLIBEXT=.dylib" ] + values += [ "SHLIBEXT=.dylib" ] } else if (host_os == "win") { - extra_values += [ "SHLIBEXT=.dll" ] + values += [ "SHLIBEXT=.dll" ] } else { - extra_values += [ "SHLIBEXT=.so" ] + values += [ "SHLIBEXT=.so" ] } if (llvm_enable_dia_sdk) { - extra_values += [ "LLVM_ENABLE_DIA_SDK=1" ] + values += [ "LLVM_ENABLE_DIA_SDK=1" ] } else { - extra_values += [ "LLVM_ENABLE_DIA_SDK=0" ] # Must be 0. + values += [ "LLVM_ENABLE_DIA_SDK=0" ] # Must be 0. } if (llvm_enable_libxar) { - extra_values += [ "LLVM_HAVE_LIBXAR=1" ] + values += [ "LLVM_HAVE_LIBXAR=1" ] } else { - extra_values += [ "LLVM_HAVE_LIBXAR=0" ] # Must be 0. + values += [ "LLVM_HAVE_LIBXAR=0" ] # Must be 0. } if (llvm_enable_libxml2) { - extra_values += [ "LLVM_ENABLE_LIBXML2=1" ] + values += [ "LLVM_ENABLE_LIBXML2=1" ] } else { - extra_values += [ "LLVM_ENABLE_LIBXML2=0" ] # Must be 0. + values += [ "LLVM_ENABLE_LIBXML2=0" ] # Must be 0. } if (llvm_enable_zlib) { - extra_values += [ "LLVM_ENABLE_ZLIB=1" ] + values += [ "LLVM_ENABLE_ZLIB=1" ] } else { - extra_values += [ "LLVM_ENABLE_ZLIB=0" ] # Must be 0. + values += [ "LLVM_ENABLE_ZLIB=0" ] # Must be 0. } if (current_cpu == "x64" || current_cpu == "arm64" || current_cpu == "ppc64") { - extra_values += [ "CMAKE_SIZEOF_VOID_P=8" ] + values += [ "CMAKE_SIZEOF_VOID_P=8" ] } else { - extra_values += [ "CMAKE_SIZEOF_VOID_P=4" ] + values += [ "CMAKE_SIZEOF_VOID_P=4" ] } } -write_lit_cfg("lit_unit_site_cfg") { - # Fully-qualified instead of relative for LIT_SITE_CFG_IN_HEADER. - input = "//lld/test/Unit/lit.site.cfg.py.in" - output = lld_lit_unit_site_cfg_file - extra_values = [ "LLVM_BUILD_MODE=." ] -} - # This target should contain all dependencies of check-lld. # //:default depends on it, so that ninja's default target builds all # prerequisites for check-lld but doesn't run check-lld itself. group("test") { deps = [ ":lit_site_cfg", - ":lit_unit_site_cfg", "//lld/tools/lld:symlinks", "//llvm/tools/dsymutil", "//llvm/tools/llc", diff --git a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn index 2e43ec0ef63d7f..b317b969312b18 100644 --- a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn @@ -30,7 +30,6 @@ write_cmake_config("llvm-lit") { "//clang/test:lit_site_cfg", "//clang/test:lit_unit_site_cfg", "//lld/test:lit_site_cfg", - "//lld/test:lit_unit_site_cfg", "//lldb/test:lit_api_site_cfg", "//lldb/test:lit_shell_site_cfg", "//lldb/test:lit_site_cfg", From c90235f0ef0bcdfab5087da496ea44de652a7363 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 19 May 2022 20:36:46 +0100 Subject: [PATCH 023/908] [LV] Drop wrap flags for reductions using VP def-use chain. Update clearReductionWrapFlags to use the VPlan def-use chain from the reduction phi recipe to drop reduction wrap flags. This addresses an existing FIXME and fixes a crash when instructions in the reduction chain are not used and have been removed before VPlan codegeneration. Fixes #55540. --- .../Transforms/Vectorize/LoopVectorize.cpp | 43 ++++++++++--------- .../reduction-with-invariant-store.ll | 35 +++++++++++++++ 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0669f9ad4dfc98..5e13c6e1fd1f65 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -581,7 +581,7 @@ class InnerLoopVectorizer { void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State); /// Clear NSW/NUW flags from reduction instructions if necessary. - void clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc, + void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR, VPTransformState &State); /// Fixup the LCSSA phi nodes in the unique exit block. This simply @@ -3884,7 +3884,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, Type *VecTy = State.get(LoopExitInstDef, 0)->getType(); // Wrap flags are in general invalid after vectorization, clear them. - clearReductionWrapFlags(RdxDesc, State); + clearReductionWrapFlags(PhiR, State); // Before each round, move the insertion point right between // the PHIs and the values we are going to write. @@ -4060,34 +4060,35 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); } -void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc, +void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR, VPTransformState &State) { + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); RecurKind RK = RdxDesc.getRecurrenceKind(); if (RK != RecurKind::Add && RK != RecurKind::Mul) return; - Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr(); - assert(LoopExitInstr && "null loop exit instruction"); - SmallVector Worklist; - SmallPtrSet Visited; - Worklist.push_back(LoopExitInstr); - Visited.insert(LoopExitInstr); + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(PhiR); + Visited.insert(PhiR); while (!Worklist.empty()) { - Instruction *Cur = Worklist.pop_back_val(); - if (isa(Cur)) - for (unsigned Part = 0; Part < UF; ++Part) { - // FIXME: Should not rely on getVPValue at this point. - Value *V = State.get(State.Plan->getVPValue(Cur, true), Part); - cast(V)->dropPoisonGeneratingFlags(); + VPValue *Cur = Worklist.pop_back_val(); + for (unsigned Part = 0; Part < UF; ++Part) { + Value *V = State.get(Cur, Part); + if (!isa(V)) + break; + cast(V)->dropPoisonGeneratingFlags(); } - for (User *U : Cur->users()) { - Instruction *UI = cast(U); - if ((Cur != LoopExitInstr || OrigLoop->contains(UI->getParent())) && - Visited.insert(UI).second) - Worklist.push_back(UI); - } + for (VPUser *U : Cur->users()) { + auto *UserRecipe = dyn_cast(U); + if (!UserRecipe) + continue; + for (VPValue *V : UserRecipe->definedValues()) + if (Visited.insert(V).second) + Worklist.push_back(V); + } } } diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index 862a3845b4a1bd..9a8e57d396a5a1 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -478,3 +478,38 @@ exit: store i32 %sum.lcssa, i32* %gep.dst.1, align 4 ret void } + +; Test for PR55540. +define void @test_drop_poison_generating_dead_recipe(i64* %dst) { +; CHECK-LABEL: @test_drop_poison_generating_dead_recipe( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ [[TMP0:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP0]] = add <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 360 +; CHECK-NEXT: br i1 [[TMP1]], label %middle.block, label %vector.body +; CHECK: middle.block: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]]) +; CHECK-NEXT: store i64 [[TMP2]], i64* [[DST:%.*]], align 8 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 363, 360 +; CHECK-NEXT: br i1 [[CMP_N]], label %exit, label %scalar.ph +; CHECK: scalar.ph: +; +entry: + br label %body + +body: + %red = phi i64 [ 0, %entry ], [ %red.next, %body ] + %iv = phi i32 [ 2, %entry ], [ %iv.next, %body ] + %add.1 = add nuw i64 %red, -23523 + store i64 %add.1, i64* %dst, align 8 + %red.next = add nuw i64 %red, -31364 + store i64 %red.next, i64* %dst, align 8 + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp ugt i32 %iv, 363 + br i1 %ec, label %exit, label %body + +exit: + ret void +} From 7aa9c39381989134e5d36ec1ea859dc6dc44d8eb Mon Sep 17 00:00:00 2001 From: Jennifer Yu Date: Tue, 17 May 2022 14:17:32 -0700 Subject: [PATCH 024/908] [Clang][[OpenMP5.1] Initial parser/sema for default(private) clause This implements the default(private) clause as defined in OMP5.1 Differential Revision: https://reviews.llvm.org/D125912 --- clang/docs/LibASTMatchersReference.html | 24 +++- clang/include/clang/ASTMatchers/ASTMatchers.h | 27 +++- clang/lib/ASTMatchers/Dynamic/Registry.cpp | 1 + clang/lib/Parse/ParseOpenMP.cpp | 14 +- clang/lib/Sema/SemaOpenMP.cpp | 75 ++++++++-- .../OpenMP/default_firstprivate_ast_print.cpp | 102 ++++++++++++++ .../test/OpenMP/default_private_ast_print.cpp | 99 +++++++++++++ ...stribute_parallel_for_default_messages.cpp | 19 ++- ...ute_parallel_for_simd_default_messages.cpp | 19 ++- .../test/OpenMP/parallel_default_messages.cpp | 10 +- .../OpenMP/parallel_for_default_messages.cpp | 11 +- .../parallel_for_simd_default_messages.cpp | 11 +- clang/test/OpenMP/parallel_master_codegen.cpp | 130 +++++++++++------- .../parallel_master_default_messages.cpp | 20 +-- .../parallel_sections_default_messages.cpp | 37 +++-- .../target_parallel_default_messages.cpp | 11 +- .../target_parallel_for_default_messages.cpp | 11 +- ...get_parallel_for_simd_default_messages.cpp | 11 +- .../OpenMP/target_teams_default_messages.cpp | 11 +- ...rget_teams_distribute_default_messages.cpp | 11 +- ...stribute_parallel_for_default_messages.cpp | 11 +- ...ute_parallel_for_simd_default_messages.cpp | 11 +- clang/test/OpenMP/task_default_messages.cpp | 11 +- clang/test/OpenMP/teams_default_messages.cpp | 12 +- .../teams_distribute_default_messages.cpp | 12 +- ...stribute_parallel_for_default_messages.cpp | 12 +- ...ute_parallel_for_simd_default_messages.cpp | 11 +- ...teams_distribute_simd_default_messages.cpp | 16 ++- .../ASTMatchers/ASTMatchersNarrowingTest.cpp | 101 ++++++++++++-- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 1 + 30 files changed, 693 insertions(+), 159 deletions(-) create mode 100644 clang/test/OpenMP/default_firstprivate_ast_print.cpp create mode 100644 clang/test/OpenMP/default_private_ast_print.cpp diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index b6dc7777f26ec0..c206c63e84acb8 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -1231,11 +1231,12 @@

Node Matchers

#pragma omp parallel default(none) #pragma omp parallel default(shared) + #pragma omp parallel default(private) #pragma omp parallel default(firstprivate) #pragma omp parallel -``ompDefaultClause()`` matches ``default(none)``, ``default(shared)``, and -``default(firstprivate)`` +``ompDefaultClause()`` matches ``default(none)``, ``default(shared)``, +``default(private)`` and ``default(firstprivate)`` @@ -4715,6 +4716,22 @@

Narrowing Matchers

+Matcher<OMPDefaultClause>isFirstPrivateKind +
Matches if the OpenMP ``default`` clause has ``private`` kind
+specified.
+
+Given
+
+  #pragma omp parallel
+  #pragma omp parallel default(none)
+  #pragma omp parallel default(shared)
+  #pragma omp parallel default(private)
+  #pragma omp parallel default(firstprivate)
+
+``ompDefaultClause(isFirstPrivateKind())`` matches only
+``default(private)``.
+
+ Matcher<OMPDefaultClause>isFirstPrivateKind
Matches if the OpenMP ``default`` clause has ``firstprivate`` kind
 specified.
@@ -4724,6 +4741,7 @@ 

Narrowing Matchers

#pragma omp parallel #pragma omp parallel default(none) #pragma omp parallel default(shared) + #pragma omp parallel default(private) #pragma omp parallel default(firstprivate) ``ompDefaultClause(isFirstPrivateKind())`` matches only @@ -4739,6 +4757,7 @@

Narrowing Matchers

#pragma omp parallel #pragma omp parallel default(none) #pragma omp parallel default(shared) + #pragma omp parallel default(private) #pragma omp parallel default(firstprivate) ``ompDefaultClause(isNoneKind())`` matches only ``default(none)``. @@ -4753,6 +4772,7 @@

Narrowing Matchers

#pragma omp parallel #pragma omp parallel default(none) #pragma omp parallel default(shared) + #pragma omp parallel default(private) #pragma omp parallel default(firstprivate) ``ompDefaultClause(isSharedKind())`` matches only ``default(shared)``. diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index d2bffe8fef4917..0b51c2b463bd08 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -8303,12 +8303,13 @@ AST_MATCHER_P(OMPExecutableDirective, hasAnyClause, /// \code /// #pragma omp parallel default(none) /// #pragma omp parallel default(shared) +/// #pragma omp parallel default(private) /// #pragma omp parallel default(firstprivate) /// #pragma omp parallel /// \endcode /// -/// ``ompDefaultClause()`` matches ``default(none)``, ``default(shared)``, and -/// ``default(firstprivate)`` +/// ``ompDefaultClause()`` matches ``default(none)``, ``default(shared)``, +/// `` default(private)`` and ``default(firstprivate)`` extern const internal::VariadicDynCastAllOfMatcher ompDefaultClause; @@ -8320,6 +8321,7 @@ extern const internal::VariadicDynCastAllOfMatcher /// #pragma omp parallel /// #pragma omp parallel default(none) /// #pragma omp parallel default(shared) +/// #pragma omp parallel default(private) /// #pragma omp parallel default(firstprivate) /// \endcode /// @@ -8336,6 +8338,7 @@ AST_MATCHER(OMPDefaultClause, isNoneKind) { /// #pragma omp parallel /// #pragma omp parallel default(none) /// #pragma omp parallel default(shared) +/// #pragma omp parallel default(private) /// #pragma omp parallel default(firstprivate) /// \endcode /// @@ -8344,6 +8347,25 @@ AST_MATCHER(OMPDefaultClause, isSharedKind) { return Node.getDefaultKind() == llvm::omp::OMP_DEFAULT_shared; } +/// Matches if the OpenMP ``default`` clause has ``private`` kind +/// specified. +/// +/// Given +/// +/// \code +/// #pragma omp parallel +/// #pragma omp parallel default(none) +/// #pragma omp parallel default(shared) +/// #pragma omp parallel default(private) +/// #pragma omp parallel default(firstprivate) +/// \endcode +/// +/// ``ompDefaultClause(isPrivateKind())`` matches only +/// ``default(private)``. +AST_MATCHER(OMPDefaultClause, isPrivateKind) { + return Node.getDefaultKind() == llvm::omp::OMP_DEFAULT_private; +} + /// Matches if the OpenMP ``default`` clause has ``firstprivate`` kind /// specified. /// @@ -8353,6 +8375,7 @@ AST_MATCHER(OMPDefaultClause, isSharedKind) { /// #pragma omp parallel /// #pragma omp parallel default(none) /// #pragma omp parallel default(shared) +/// #pragma omp parallel default(private) /// #pragma omp parallel default(firstprivate) /// \endcode /// diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp index a6f93c8941c7c8..72629d0aa91e5c 100644 --- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp @@ -425,6 +425,7 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(isExpr); REGISTER_MATCHER(isExternC); REGISTER_MATCHER(isFinal); + REGISTER_MATCHER(isPrivateKind); REGISTER_MATCHER(isFirstPrivateKind); REGISTER_MATCHER(isImplicit); REGISTER_MATCHER(isInStdNamespace); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 36105981ec259a..e3594e17610850 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -1768,7 +1768,7 @@ void Parser::ParseOpenMPEndAssumesDirective(SourceLocation Loc) { /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'. /// /// default-clause: -/// 'default' '(' 'none' | 'shared' | 'firstprivate' ') +/// 'default' '(' 'none' | 'shared' | 'private' | 'firstprivate' ') /// /// proc_bind-clause: /// 'proc_bind' '(' 'master' | 'close' | 'spread' ') @@ -3586,7 +3586,7 @@ OMPClause *Parser::ParseOpenMPInteropClause(OpenMPClauseKind Kind, /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'. /// /// default-clause: -/// 'default' '(' 'none' | 'shared' | 'firstprivate' ')' +/// 'default' '(' 'none' | 'shared' | 'private' | 'firstprivate' ')' /// /// proc_bind-clause: /// 'proc_bind' '(' 'master' | 'close' | 'spread' ')' @@ -3604,10 +3604,14 @@ OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind, if (!Val || ParseOnly) return nullptr; if (getLangOpts().OpenMP < 51 && Kind == OMPC_default && - static_cast(Val.getValue().Type) == - OMP_DEFAULT_firstprivate) { + (static_cast(Val.getValue().Type) == OMP_DEFAULT_private || + static_cast(Val.getValue().Type) == + OMP_DEFAULT_firstprivate)) { Diag(Val.getValue().LOpen, diag::err_omp_invalid_dsa) - << getOpenMPClauseName(OMPC_firstprivate) + << getOpenMPClauseName(static_cast(Val.getValue().Type) == + OMP_DEFAULT_private + ? OMPC_private + : OMPC_firstprivate) << getOpenMPClauseName(OMPC_default) << "5.1"; return nullptr; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 9f3574e8d0e75e..e4305ba8c1718a 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -59,7 +59,8 @@ enum DefaultDataSharingAttributes { DSA_unspecified = 0, /// Data sharing attribute not specified. DSA_none = 1 << 0, /// Default data sharing attribute 'none'. DSA_shared = 1 << 1, /// Default data sharing attribute 'shared'. - DSA_firstprivate = 1 << 2, /// Default data sharing attribute 'firstprivate'. + DSA_private = 1 << 2, /// Default data sharing attribute 'private'. + DSA_firstprivate = 1 << 3, /// Default data sharing attribute 'firstprivate'. }; /// Stack for tracking declarations used in OpenMP directives and @@ -695,6 +696,11 @@ class DSAStackTy { getTopOfStack().DefaultAttr = DSA_shared; getTopOfStack().DefaultAttrLoc = Loc; } + /// Set default data sharing attribute to private. + void setDefaultDSAPrivate(SourceLocation Loc) { + getTopOfStack().DefaultAttr = DSA_private; + getTopOfStack().DefaultAttrLoc = Loc; + } /// Set default data sharing attribute to firstprivate. void setDefaultDSAFirstPrivate(SourceLocation Loc) { getTopOfStack().DefaultAttr = DSA_firstprivate; @@ -1233,7 +1239,7 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter, case DSA_none: return DVar; case DSA_firstprivate: - if (VD->getStorageDuration() == SD_Static && + if (VD && VD->getStorageDuration() == SD_Static && VD->getDeclContext()->isFileContext()) { DVar.CKind = OMPC_unknown; } else { @@ -1241,6 +1247,18 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter, } DVar.ImplicitDSALoc = Iter->DefaultAttrLoc; return DVar; + case DSA_private: + // each variable with static storage duration that is declared + // in a namespace or global scope and referenced in the construct, + // and that does not have a predetermined data-sharing attribute + if (VD && VD->getStorageDuration() == SD_Static && + VD->getDeclContext()->isFileContext()) { + DVar.CKind = OMPC_unknown; + } else { + DVar.CKind = OMPC_private; + } + DVar.ImplicitDSALoc = Iter->DefaultAttrLoc; + return DVar; case DSA_unspecified: // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced // in a Construct, implicitly determined, p.2] @@ -2142,7 +2160,8 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, !cast(D)->getInit()->isGLValue()) && // If the variable is implicitly firstprivate and scalar - capture by // copy - !(DSAStack->getDefaultDSA() == DSA_firstprivate && + !((DSAStack->getDefaultDSA() == DSA_firstprivate || + DSAStack->getDefaultDSA() == DSA_private) && !DSAStack->hasExplicitDSA( D, [](OpenMPClauseKind K, bool) { return K != OMPC_unknown; }, Level) && @@ -2290,11 +2309,13 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, // Global shared must not be captured. if (VD && !VD->hasLocalStorage() && DVarPrivate.CKind == OMPC_unknown && ((DSAStack->getDefaultDSA() != DSA_none && + DSAStack->getDefaultDSA() != DSA_private && DSAStack->getDefaultDSA() != DSA_firstprivate) || DVarTop.CKind == OMPC_shared)) return nullptr; if (DVarPrivate.CKind != OMPC_unknown || (VD && (DSAStack->getDefaultDSA() == DSA_none || + DSAStack->getDefaultDSA() == DSA_private || DSAStack->getDefaultDSA() == DSA_firstprivate))) return VD ? VD : cast(DVarPrivate.PrivateCopy->getDecl()); } @@ -2464,7 +2485,11 @@ bool Sema::isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level, unsigned NumLevels = getOpenMPCaptureLevels(DSAStack->getDirective(Level)); if (Level == 0) - return (NumLevels == CaptureLevel + 1) && TopDVar.CKind != OMPC_shared; + // non-file scope static variale with default(firstprivate) + // should be gloabal captured. + return (NumLevels == CaptureLevel + 1 && + (TopDVar.CKind != OMPC_shared || + DSAStack->getDefaultDSA() == DSA_firstprivate)); do { --Level; DSAStackTy::DSAVarData DVar = DSAStack->getImplicitDSA(D, Level); @@ -3444,6 +3469,7 @@ class DSAAttrChecker final : public StmtVisitor { CapturedStmt *CS = nullptr; const static unsigned DefaultmapKindNum = OMPC_DEFAULTMAP_pointer + 1; llvm::SmallVector ImplicitFirstprivate; + llvm::SmallVector ImplicitPrivate; llvm::SmallVector ImplicitMap[DefaultmapKindNum][OMPC_MAP_delete]; llvm::SmallVector ImplicitMapModifier[DefaultmapKindNum]; @@ -3539,18 +3565,21 @@ class DSAAttrChecker final : public StmtVisitor { // by being listed in a data-sharing attribute clause. if (DVar.CKind == OMPC_unknown && (Stack->getDefaultDSA() == DSA_none || + Stack->getDefaultDSA() == DSA_private || Stack->getDefaultDSA() == DSA_firstprivate) && isImplicitOrExplicitTaskingRegion(DKind) && VarsWithInheritedDSA.count(VD) == 0) { bool InheritedDSA = Stack->getDefaultDSA() == DSA_none; - if (!InheritedDSA && Stack->getDefaultDSA() == DSA_firstprivate) { + if (!InheritedDSA && (Stack->getDefaultDSA() == DSA_firstprivate || + Stack->getDefaultDSA() == DSA_private)) { DSAStackTy::DSAVarData DVar = Stack->getImplicitDSA(VD, /*FromParent=*/false); InheritedDSA = DVar.CKind == OMPC_unknown; } if (InheritedDSA) VarsWithInheritedDSA[VD] = E; - return; + if (Stack->getDefaultDSA() == DSA_none) + return; } // OpenMP 5.0 [2.19.7.2, defaultmap clause, Description] @@ -3558,7 +3587,7 @@ class DSAAttrChecker final : public StmtVisitor { // construct that does not have a predetermined data-sharing attribute // and does not appear in a to or link clause on a declare target // directive must be listed in a data-mapping attribute clause, a - // data-haring attribute clause (including a data-sharing attribute + // data-sharing attribute clause (including a data-sharing attribute // clause on a combined construct where target. is one of the // constituent constructs), or an is_device_ptr clause. OpenMPDefaultmapClauseKind ClauseKind = @@ -3669,10 +3698,16 @@ class DSAAttrChecker final : public StmtVisitor { // Define implicit data-sharing attributes for task. DVar = Stack->getImplicitDSA(VD, /*FromParent=*/false); if (((isOpenMPTaskingDirective(DKind) && DVar.CKind != OMPC_shared) || - (Stack->getDefaultDSA() == DSA_firstprivate && - DVar.CKind == OMPC_firstprivate && !DVar.RefExpr)) && + (((Stack->getDefaultDSA() == DSA_firstprivate && + DVar.CKind == OMPC_firstprivate) || + (Stack->getDefaultDSA() == DSA_private && + DVar.CKind == OMPC_private)) && + !DVar.RefExpr)) && !Stack->isLoopControlVariable(VD).first) { - ImplicitFirstprivate.push_back(E); + if (Stack->getDefaultDSA() == DSA_private) + ImplicitPrivate.push_back(E); + else + ImplicitFirstprivate.push_back(E); return; } @@ -3888,6 +3923,7 @@ class DSAAttrChecker final : public StmtVisitor { ArrayRef getImplicitFirstprivate() const { return ImplicitFirstprivate; } + ArrayRef getImplicitPrivate() const { return ImplicitPrivate; } ArrayRef getImplicitMap(OpenMPDefaultmapClauseKind DK, OpenMPMapClauseKind MK) const { return ImplicitMap[DK][MK]; @@ -5882,6 +5918,9 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( SmallVector ImplicitFirstprivates( DSAChecker.getImplicitFirstprivate().begin(), DSAChecker.getImplicitFirstprivate().end()); + SmallVector ImplicitPrivates( + DSAChecker.getImplicitPrivate().begin(), + DSAChecker.getImplicitPrivate().end()); const unsigned DefaultmapKindNum = OMPC_DEFAULTMAP_pointer + 1; SmallVector ImplicitMaps[DefaultmapKindNum][OMPC_MAP_delete]; SmallVector @@ -5934,6 +5973,17 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( ErrorFound = true; } } + if (!ImplicitPrivates.empty()) { + if (OMPClause *Implicit = + ActOnOpenMPPrivateClause(ImplicitPrivates, SourceLocation(), + SourceLocation(), SourceLocation())) { + ClausesWithImplicit.push_back(Implicit); + ErrorFound = cast(Implicit)->varlist_size() != + ImplicitPrivates.size(); + } else { + ErrorFound = true; + } + } // OpenMP 5.0 [2.19.7] // If a list item appears in a reduction, lastprivate or linear // clause on a combined target construct then it is treated as @@ -6352,6 +6402,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( // Check variables in the clauses if default(none) or // default(firstprivate) was specified. if (DSAStack->getDefaultDSA() == DSA_none || + DSAStack->getDefaultDSA() == DSA_private || DSAStack->getDefaultDSA() == DSA_firstprivate) { DSAAttrChecker DSAChecker(DSAStack, *this, nullptr); for (OMPClause *C : Clauses) { @@ -6471,6 +6522,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( continue; ErrorFound = true; if (DSAStack->getDefaultDSA() == DSA_none || + DSAStack->getDefaultDSA() == DSA_private || DSAStack->getDefaultDSA() == DSA_firstprivate) { Diag(P.second->getExprLoc(), diag::err_omp_no_dsa_for_variable) << P.first << P.second->getSourceRange(); @@ -16027,6 +16079,9 @@ OMPClause *Sema::ActOnOpenMPDefaultClause(DefaultKind Kind, case OMP_DEFAULT_firstprivate: DSAStack->setDefaultDSAFirstPrivate(KindKwLoc); break; + case OMP_DEFAULT_private: + DSAStack->setDefaultDSAPrivate(KindKwLoc); + break; default: llvm_unreachable("DSA unexpected in OpenMP default clause"); } diff --git a/clang/test/OpenMP/default_firstprivate_ast_print.cpp b/clang/test/OpenMP/default_firstprivate_ast_print.cpp new file mode 100644 index 00000000000000..22af85db4fc214 --- /dev/null +++ b/clang/test/OpenMP/default_firstprivate_ast_print.cpp @@ -0,0 +1,102 @@ +// expected-no-diagnostics + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -ast-print %s | FileCheck %s --check-prefix=PRINT + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -ast-dump %s | FileCheck %s --check-prefix=DUMP + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -emit-pch -o %t %s + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP + +#ifndef HEADER +#define HEADER + +struct SomeKernel { + int targetDev; + float devPtr; + SomeKernel(); + ~SomeKernel(); + + template + void apply() { +#pragma omp parallel default(firstprivate) + { + targetDev++; + } + // PRINT: #pragma omp parallel default(firstprivate) + // PRINT-NEXT: { + // PRINT-NEXT: this->targetDev++; + // CHECK-NEXT: } + // DUMP: -OMPParallelDirective + // DUMP->NEXT: -OMPDefaultClause + } + // PRINT: template<> void apply<32U>() + // PRINT: #pragma omp parallel default(firstprivate) + // PRINT-NEXT: { + // PRINT-NEXT: this->targetDev++; + // CHECK-NEXT: } + // DUMP: -OMPParallelDirective + // DUMP-NEXT: -OMPDefaultClause +}; + +void use_template() { + SomeKernel aKern; + aKern.apply<32>(); +} + +void foo() { + int a; +#pragma omp parallel default(firstprivate) + a++; + // PRINT: #pragma omp parallel default(firstprivate) + // PRINT-NEXT: a++; + // DUMP: -OMPParallelDirective + // DUMP-NEXT: -OMPDefaultClause + // DUMP-NEXT: -OMPFirstprivateClause {{.*}} + // DUMP-NEXT: -DeclRefExpr {{.*}} 'a' +} + +struct St { + int a, b; + static int y; + St() : a(0), b(0) {} + ~St() {} +}; +int St::y = 0; +void bar() { + St a = St(); + static int yy = 0; +#pragma omp parallel default(firstprivate) + { + a.a += 1; + a.b += 1; + a.y++; + yy++; + St::y++; + } + // PRINT: #pragma omp parallel default(firstprivate) + // DUMP: -OMPParallelDirective + // DUMP-NEXT: -OMPDefaultClause + // DUMP-NEXT: -OMPFirstprivateClause {{.*}} + // DUMP-NEXT: -DeclRefExpr {{.*}} 'a' + // DUMP-NEXT: -DeclRefExpr {{.*}} 'yy' + // DUMP-NEXT: -DeclRefExpr {{.*}} 'y' +} +#endif // HEADER diff --git a/clang/test/OpenMP/default_private_ast_print.cpp b/clang/test/OpenMP/default_private_ast_print.cpp new file mode 100644 index 00000000000000..c0edb4821178d8 --- /dev/null +++ b/clang/test/OpenMP/default_private_ast_print.cpp @@ -0,0 +1,99 @@ +// expected-no-diagnostics + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -ast-print %s | FileCheck %s --check-prefix=PRINT + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -ast-dump %s | FileCheck %s --check-prefix=DUMP + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -emit-pch -o %t %s + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +//RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 \ +//RUN: -x c++ -std=c++14 -fexceptions -fcxx-exceptions \ +//RUN: -Wno-source-uses-openmp -Wno-openmp-clauses \ +//RUN: -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP + +#ifndef HEADER +#define HEADER + +struct SomeKernel { + int targetDev; + float devPtr; + SomeKernel(); + ~SomeKernel(); + + template + void apply() { +#pragma omp parallel default(private) + { + targetDev++; + } + // PRINT: #pragma omp parallel default(private) + // PRINT-NEXT: { + // PRINT-NEXT: this->targetDev++; + // CHECK-NEXT: } + // DUMP: -OMPParallelDirective + // DUMP->NEXT: -OMPDefaultClause + } + // PRINT: template<> void apply<32U>() + // PRINT: #pragma omp parallel default(private) + // PRINT-NEXT: { + // PRINT-NEXT: this->targetDev++; + // CHECK-NEXT: } + // DUMP: -OMPParallelDirective + // DUMP-NEXT: -OMPDefaultClause +}; + +void use_template() { + SomeKernel aKern; + aKern.apply<32>(); +} + +void foo() { + int a; +#pragma omp parallel default(private) + a++; + // PRINT: #pragma omp parallel default(private) + // PRINT-NEXT: a++; + // DUMP: -OMPParallelDirective + // DUMP-NEXT: -OMPDefaultClause + // DUMP-NEXT: -OMPPrivateClause {{.*}} + // DUMP-NEXT: -DeclRefExpr {{.*}} 'a' +} + +struct St { + int a, b; + int y; + St() : a(0), b(0) {} + ~St() {} +}; +void bar() { + St a = St(); + static int yy = 0; +#pragma omp parallel default(private) + { + a.a += 1; + a.b += 1; + a.y++; + yy++; + } + // PRINT: #pragma omp parallel default(private) + // DUMP: -OMPParallelDirective + // DUMP-NEXT: -OMPDefaultClause + // DUMP-NEXT: -OMPPrivateClause {{.*}} + // DUMP-NEXT: -DeclRefExpr {{.*}} 'a' + // DUMP-NEXT: -DeclRefExpr {{.*}} 'yy' +} +#endif // HEADER diff --git a/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp index 67e4615ae8c01d..1eb2ce81460e18 100644 --- a/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp @@ -23,12 +23,12 @@ T tmain(T argc) { foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp distribute parallel for default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (i = 0; i < argc; ++i) foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp distribute parallel for default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target @@ -43,7 +43,7 @@ T tmain(T argc) { foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp distribute parallel for default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target @@ -71,12 +71,12 @@ int main(int argc, char **argv) { foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp distribute parallel for default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (i = 0; i < argc; ++i) foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp distribute parallel for default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target @@ -91,7 +91,7 @@ int main(int argc, char **argv) { foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp distribute parallel for default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target @@ -115,6 +115,13 @@ int main(int argc, char **argv) { ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + for (i = 0; i < argc; ++i) { + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif return (tmain(argc) + tmain(argv[0][0])); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp index 9aab00f16c48fd..c7417fb45e8c25 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp @@ -23,12 +23,12 @@ T tmain(T argc) { foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp distribute parallel for simd default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (i = 0; i < argc; ++i) foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp distribute parallel for simd default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target @@ -43,7 +43,7 @@ T tmain(T argc) { foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target @@ -71,12 +71,12 @@ int main(int argc, char **argv) { foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp distribute parallel for simd default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (i = 0; i < argc; ++i) foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp distribute parallel for simd default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target @@ -91,7 +91,7 @@ int main(int argc, char **argv) { foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target @@ -107,6 +107,13 @@ int main(int argc, char **argv) { ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + for (i = 0; i < argc; ++i) { + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif #pragma omp parallel default(none) // expected-note 2 {{explicit data sharing attribute requested here}} diff --git a/clang/test/OpenMP/parallel_default_messages.cpp b/clang/test/OpenMP/parallel_default_messages.cpp index e9d2946b7686a5..7db15d72673176 100644 --- a/clang/test/OpenMP/parallel_default_messages.cpp +++ b/clang/test/OpenMP/parallel_default_messages.cpp @@ -18,11 +18,10 @@ int main(int argc, char **argv) { const int c = 0; #pragma omp parallel default // expected-error {{expected '(' after 'default'}} -#pragma omp parallel default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} -#pragma omp parallel default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp parallel default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp parallel default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} #pragma omp parallel default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} #pragma omp parallel default(shared), default(shared) // expected-error {{directive '#pragma omp parallel' cannot contain more than one 'default' clause}} -#pragma omp parallel default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} foo(); #pragma omp parallel default(none) // expected-note {{explicit data sharing attribute requested here}} @@ -41,6 +40,11 @@ int main(int argc, char **argv) { ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp parallel default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + { + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif return 0; diff --git a/clang/test/OpenMP/parallel_for_default_messages.cpp b/clang/test/OpenMP/parallel_for_default_messages.cpp index c64b76948c0186..6afa1d7817e74e 100644 --- a/clang/test/OpenMP/parallel_for_default_messages.cpp +++ b/clang/test/OpenMP/parallel_for_default_messages.cpp @@ -18,10 +18,10 @@ int main(int argc, char **argv) { #pragma omp parallel for default // expected-error {{expected '(' after 'default'}} for (i = 0; i < argc; ++i) foo(); -#pragma omp parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp parallel for default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (i = 0; i < argc; ++i) foo(); -#pragma omp parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp parallel for default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp parallel for default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}} @@ -30,7 +30,7 @@ int main(int argc, char **argv) { #pragma omp parallel for default(shared), default(shared) // expected-error {{directive '#pragma omp parallel for' cannot contain more than one 'default' clause}} for (i = 0; i < argc; ++i) foo(); -#pragma omp parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp parallel for default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); @@ -49,6 +49,11 @@ int main(int argc, char **argv) { ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} } +#pragma omp parallel for default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + for (i = 0; i < argc; ++i) { + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + } #endif return 0; diff --git a/clang/test/OpenMP/parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/parallel_for_simd_default_messages.cpp index 6368d280de5db9..089572298ec7dc 100644 --- a/clang/test/OpenMP/parallel_for_simd_default_messages.cpp +++ b/clang/test/OpenMP/parallel_for_simd_default_messages.cpp @@ -18,10 +18,10 @@ int main(int argc, char **argv) { #pragma omp parallel for simd default // expected-error {{expected '(' after 'default'}} for (i = 0; i < argc; ++i) foo(); -#pragma omp parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp parallel for simd default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (i = 0; i < argc; ++i) foo(); -#pragma omp parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp parallel for simd default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp parallel for simd default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}} @@ -30,7 +30,7 @@ int main(int argc, char **argv) { #pragma omp parallel for simd default(shared), default(shared) // expected-error {{directive '#pragma omp parallel for simd' cannot contain more than one 'default' clause}} for (i = 0; i < argc; ++i) foo(); -#pragma omp parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp parallel for simd default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); @@ -49,6 +49,11 @@ int main(int argc, char **argv) { x++; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} y++; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp parallel for default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + for (i = 0; i < argc; ++i) { + x++; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + y++; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif return 0; diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp index 511c52d1e1e4f4..6105f657b30a60 100644 --- a/clang/test/OpenMP/parallel_master_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_codegen.cpp @@ -593,13 +593,18 @@ void parallel_master_allocate() { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 // CHECK17-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[Y_CASTED1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A]]) // CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* @_ZZ36parallel_master_default_firstprivatevE1y, align 4 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[Y_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[Y_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.St*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.St* [[A]], i64 [[TMP1]]) -// CHECK17-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR3:[0-9]+]] +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* @_ZN2St1yE, align 4 +// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[Y_CASTED1]] to i32* +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[Y_CASTED1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.St*, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.St* [[A]], i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR4:[0-9]+]] // CHECK17-NEXT: ret void // // @@ -614,56 +619,64 @@ void parallel_master_allocate() { // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.St* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i64 noundef [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.St* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i64 noundef [[Y:%.*]], i64 noundef [[Y1:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca %struct.St*, align 8 // CHECK17-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[Y_ADDR2:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[A4:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store %struct.St* [[A]], %struct.St** [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[Y]], i64* [[Y_ADDR]], align 8 +// CHECK17-NEXT: store i64 [[Y1]], i64* [[Y_ADDR2]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.St*, %struct.St** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[Y_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK17-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[Y_ADDR2]] to i32* +// CHECK17-NEXT: [[TMP1:%.*]] = bitcast %struct.St* [[A4]] to i8* +// CHECK17-NEXT: [[TMP2:%.*]] = bitcast %struct.St* [[TMP0]] to i8* +// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK17-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK17: omp_if.then: -// CHECK17-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[A1]], align 4 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP0]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[B]], align 4 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[B]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[A4]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[A5]], align 4 +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[A4]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[B]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[B]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK17-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* @_ZN2St1yE, align 4 -// CHECK17-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[INC3]], i32* @_ZN2St1yE, align 4 -// CHECK17-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* @_ZN2St1yE, align 4 +// CHECK17-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: store i32 [[INC7]], i32* @_ZN2St1yE, align 4 +// CHECK17-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: br label [[OMP_IF_END]] // CHECK17: omp_if.end: +// CHECK17-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A4]]) #[[ATTR4]] // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@_ZN2StD1Ev -// CHECK17-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] comdat align 2 { +// CHECK17-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR5:[0-9]+]] comdat align 2 { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 8 // CHECK17-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[THIS1:%.*]] = load %struct.St*, %struct.St** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void @_ZN2StD2Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]] +// CHECK17-NEXT: call void @_ZN2StD2Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR4]] // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@_ZN2StC2Ev -// CHECK17-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK17-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 8 // CHECK17-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 8 @@ -676,7 +689,7 @@ void parallel_master_allocate() { // // // CHECK17-LABEL: define {{[^@]+}}@_ZN2StD2Ev -// CHECK17-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK17-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 8 // CHECK17-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 8 @@ -689,13 +702,18 @@ void parallel_master_allocate() { // CHECK18-NEXT: entry: // CHECK18-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 // CHECK18-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 +// CHECK18-NEXT: [[Y_CASTED1:%.*]] = alloca i64, align 8 // CHECK18-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A]]) // CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* @_ZZ36parallel_master_default_firstprivatevE1y, align 4 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[Y_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[Y_CASTED]], align 8 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.St*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.St* [[A]], i64 [[TMP1]]) -// CHECK18-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR3:[0-9]+]] +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* @_ZN2St1yE, align 4 +// CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[Y_CASTED1]] to i32* +// CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 +// CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[Y_CASTED1]], align 8 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.St*, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.St* [[A]], i64 [[TMP1]], i64 [[TMP3]]) +// CHECK18-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR4:[0-9]+]] // CHECK18-NEXT: ret void // // @@ -710,56 +728,64 @@ void parallel_master_allocate() { // // // CHECK18-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.St* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i64 noundef [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.St* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i64 noundef [[Y:%.*]], i64 noundef [[Y1:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK18-NEXT: entry: // CHECK18-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK18-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK18-NEXT: [[A_ADDR:%.*]] = alloca %struct.St*, align 8 // CHECK18-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 +// CHECK18-NEXT: [[Y_ADDR2:%.*]] = alloca i64, align 8 +// CHECK18-NEXT: [[A4:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 // CHECK18-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store %struct.St* [[A]], %struct.St** [[A_ADDR]], align 8 // CHECK18-NEXT: store i64 [[Y]], i64* [[Y_ADDR]], align 8 +// CHECK18-NEXT: store i64 [[Y1]], i64* [[Y_ADDR2]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load %struct.St*, %struct.St** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[Y_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK18-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[Y_ADDR2]] to i32* +// CHECK18-NEXT: [[TMP1:%.*]] = bitcast %struct.St* [[A4]] to i8* +// CHECK18-NEXT: [[TMP2:%.*]] = bitcast %struct.St* [[TMP0]] to i8* +// CHECK18-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK18-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK18-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK18-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK18-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK18: omp_if.then: -// CHECK18-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP0]], i32 0, i32 0 -// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[A1]], align 4 -// CHECK18-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP0]], i32 0, i32 1 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[B]], align 4 -// CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[B]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK18-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK18-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[A4]], i32 0, i32 0 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[A5]], align 4 +// CHECK18-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[A4]], i32 0, i32 1 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[B]], align 4 +// CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK18-NEXT: store i32 [[ADD6]], i32* [[B]], align 4 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK18-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK18-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* @_ZN2St1yE, align 4 -// CHECK18-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[INC3]], i32* @_ZN2St1yE, align 4 -// CHECK18-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* @_ZN2St1yE, align 4 +// CHECK18-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK18-NEXT: store i32 [[INC7]], i32* @_ZN2St1yE, align 4 +// CHECK18-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK18-NEXT: br label [[OMP_IF_END]] // CHECK18: omp_if.end: +// CHECK18-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A4]]) #[[ATTR4]] // CHECK18-NEXT: ret void // // // CHECK18-LABEL: define {{[^@]+}}@_ZN2StD1Ev -// CHECK18-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] comdat align 2 { +// CHECK18-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR5:[0-9]+]] comdat align 2 { // CHECK18-NEXT: entry: // CHECK18-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 8 // CHECK18-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[THIS1:%.*]] = load %struct.St*, %struct.St** [[THIS_ADDR]], align 8 -// CHECK18-NEXT: call void @_ZN2StD2Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]] +// CHECK18-NEXT: call void @_ZN2StD2Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR4]] // CHECK18-NEXT: ret void // // // CHECK18-LABEL: define {{[^@]+}}@_ZN2StC2Ev -// CHECK18-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK18-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { // CHECK18-NEXT: entry: // CHECK18-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 8 // CHECK18-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 8 @@ -772,7 +798,7 @@ void parallel_master_allocate() { // // // CHECK18-LABEL: define {{[^@]+}}@_ZN2StD2Ev -// CHECK18-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK18-SAME: (%struct.St* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 { // CHECK18-NEXT: entry: // CHECK18-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 8 // CHECK18-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 8 diff --git a/clang/test/OpenMP/parallel_master_default_messages.cpp b/clang/test/OpenMP/parallel_master_default_messages.cpp index 39f78ea53ae160..992b589c46a768 100644 --- a/clang/test/OpenMP/parallel_master_default_messages.cpp +++ b/clang/test/OpenMP/parallel_master_default_messages.cpp @@ -16,17 +16,16 @@ static int x = 0; int main(int argc, char **argv) { #pragma omp parallel master default // expected-error {{expected '(' after 'default'}} { -#pragma omp parallel master default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} - { -#pragma omp parallel master default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp parallel master default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} { +#pragma omp parallel master default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} + { #pragma omp parallel master default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} - { + { #pragma omp parallel master default(shared), default(shared) // expected-error {{directive '#pragma omp parallel master' cannot contain more than one 'default' clause}} - { -#pragma omp parallel master default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} - { - foo(); + { +#pragma omp parallel master default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} + {foo(); } } } @@ -53,6 +52,11 @@ int main(int argc, char **argv) { ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp parallel master default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + { + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif return 0; diff --git a/clang/test/OpenMP/parallel_sections_default_messages.cpp b/clang/test/OpenMP/parallel_sections_default_messages.cpp index cfa95445fb5361..a319269780247b 100644 --- a/clang/test/OpenMP/parallel_sections_default_messages.cpp +++ b/clang/test/OpenMP/parallel_sections_default_messages.cpp @@ -1,23 +1,30 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s \ +// RUN: -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s \ +// RUN: -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s \ +// RUN: -fopenmp-version=51 -DOMP51 -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s \ +// RUN: -fopenmp-version=51 -DOMP51 -Wuninitialized void foo(); int main(int argc, char **argv) { #pragma omp parallel sections default // expected-error {{expected '(' after 'default'}} { -#pragma omp parallel sections default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} - { -#pragma omp parallel sections default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp parallel sections default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} { +#pragma omp parallel sections default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} + { #pragma omp parallel sections default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} - { + { #pragma omp parallel sections default(shared), default(shared) // expected-error {{directive '#pragma omp parallel sections' cannot contain more than one 'default' clause}} - { -#pragma omp parallel sections default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} - { - foo(); + { +#pragma omp parallel sections default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} + {foo(); } } } @@ -37,5 +44,15 @@ int main(int argc, char **argv) { ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}} } } + +#ifdef OMP51 +#pragma omp parallel sections default(none) // expected-note {{explicit data sharing attribute requested here}} + { +#pragma omp parallel sections default(private) + { + ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}} + } + } +#endif // OMP51 return 0; } diff --git a/clang/test/OpenMP/target_parallel_default_messages.cpp b/clang/test/OpenMP/target_parallel_default_messages.cpp index c8f68659438fe7..254499aff8408e 100644 --- a/clang/test/OpenMP/target_parallel_default_messages.cpp +++ b/clang/test/OpenMP/target_parallel_default_messages.cpp @@ -16,15 +16,15 @@ static int x = 0; int main(int argc, char **argv) { #pragma omp target parallel default // expected-error {{expected '(' after 'default'}} foo(); -#pragma omp target parallel default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp target parallel default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} foo(); -#pragma omp target parallel default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target parallel default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} foo(); #pragma omp target parallel default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} foo(); #pragma omp target parallel default (shared), default(shared) // expected-error {{directive '#pragma omp target parallel' cannot contain more than one 'default' clause}} foo(); -#pragma omp target parallel default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target parallel default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} foo(); #pragma omp target parallel default(none) // expected-note {{explicit data sharing attribute requested here}} @@ -44,6 +44,11 @@ int main(int argc, char **argv) { ++x; ++y; } +#pragma omp target parallel default(private) // expected-error {{data-sharing attribute 'private' in 'default' clause requires OpenMP version 5.1 or above}} + { + ++x; + ++y; + } #endif return 0; diff --git a/clang/test/OpenMP/target_parallel_for_default_messages.cpp b/clang/test/OpenMP/target_parallel_for_default_messages.cpp index 4a3aae68e08659..681d96bd10b2c9 100644 --- a/clang/test/OpenMP/target_parallel_for_default_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_default_messages.cpp @@ -18,10 +18,10 @@ int main(int argc, char **argv) { #pragma omp target parallel for default // expected-error {{expected '(' after 'default'}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp target parallel for default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target parallel for default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}} @@ -30,7 +30,7 @@ int main(int argc, char **argv) { #pragma omp target parallel for default(shared), default(shared) // expected-error {{directive '#pragma omp target parallel for' cannot contain more than one 'default' clause}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target parallel for default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); @@ -49,6 +49,11 @@ int main(int argc, char **argv) { ++x; ++y; } +#pragma omp target parallel for default(private) // expected-error {{data-sharing attribute 'private' in 'default' clause requires OpenMP version 5.1 or above}} + for (i = 0; i < argc; ++i) { + ++x; + ++y; + } #endif return 0; diff --git a/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp index 48489309ef037f..66df9b5cf048f1 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp @@ -18,10 +18,10 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd default // expected-error {{expected '(' after 'default'}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp target parallel for simd default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target parallel for simd default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}} @@ -30,7 +30,7 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd default(shared), default(shared) // expected-error {{directive '#pragma omp target parallel for simd' cannot contain more than one 'default' clause}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target parallel for simd default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (i = 0; i < argc; ++i) foo(); @@ -49,6 +49,11 @@ int main(int argc, char **argv) { ++x; ++y; } +#pragma omp target parallel for simd default(private) // expected-error {{data-sharing attribute 'private' in 'default' clause requires OpenMP version 5.1 or above}} + for (int i = 0; i < argc; i++) { + ++x; + ++y; + } #endif return 0; diff --git a/clang/test/OpenMP/target_teams_default_messages.cpp b/clang/test/OpenMP/target_teams_default_messages.cpp index 85c417f8f98531..629df8fb9240bc 100644 --- a/clang/test/OpenMP/target_teams_default_messages.cpp +++ b/clang/test/OpenMP/target_teams_default_messages.cpp @@ -16,15 +16,15 @@ static int x = 0; int main(int argc, char **argv) { #pragma omp target teams default // expected-error {{expected '(' after 'default'}} foo(); -#pragma omp target teams default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp target teams default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} foo(); -#pragma omp target teams default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target teams default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} foo(); #pragma omp target teams default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} foo(); #pragma omp target teams default (shared), default(shared) // expected-error {{directive '#pragma omp target teams' cannot contain more than one 'default' clause}} foo(); -#pragma omp target teams default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target teams default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} foo(); #pragma omp target teams default(none) // expected-note {{explicit data sharing attribute requested here}} @@ -40,6 +40,11 @@ int main(int argc, char **argv) { ++x; ++y; } +#pragma omp target teams default(private) // expected-error {{data-sharing attribute 'private' in 'default' clause requires OpenMP version 5.1 or above}} + { + ++x; + ++y; + } #endif return 0; diff --git a/clang/test/OpenMP/target_teams_distribute_default_messages.cpp b/clang/test/OpenMP/target_teams_distribute_default_messages.cpp index a490ad61385ff4..90615c3cfb8c03 100644 --- a/clang/test/OpenMP/target_teams_distribute_default_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_default_messages.cpp @@ -16,15 +16,15 @@ static int x = 0; int main(int argc, char **argv) { #pragma omp target teams distribute default // expected-error {{expected '(' after 'default'}} for (int i=0; i<200; i++) foo(); -#pragma omp target teams distribute default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp target teams distribute default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i=0; i<200; i++) foo(); -#pragma omp target teams distribute default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target teams distribute default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target teams distribute default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i=0; i<200; i++) foo(); #pragma omp target teams distribute default (shared), default(shared) // expected-error {{directive '#pragma omp target teams distribute' cannot contain more than one 'default' clause}} for (int i=0; i<200; i++) foo(); -#pragma omp target teams distribute default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target teams distribute default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target teams distribute default(none) // expected-note {{explicit data sharing attribute requested here}} @@ -36,6 +36,11 @@ int main(int argc, char **argv) { ++x; ++y; } +#pragma omp target teams distribute default(private) // expected-error {{data-sharing attribute 'private' in 'default' clause requires OpenMP version 5.1 or above}} + for (int i = 0; i < 200; i++) { + ++x; + ++y; + } #endif return 0; diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp index 2fe7931369618c..d00668e0b8ce54 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp @@ -16,15 +16,15 @@ static int x = 0; int main(int argc, char **argv) { #pragma omp target teams distribute parallel for default // expected-error {{expected '(' after 'default'}} for (int i=0; i<200; i++) foo(); -#pragma omp target teams distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp target teams distribute parallel for default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i=0; i<200; i++) foo(); -#pragma omp target teams distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target teams distribute parallel for default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target teams distribute parallel for default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i=0; i<200; i++) foo(); #pragma omp target teams distribute parallel for default (shared), default(shared) // expected-error {{directive '#pragma omp target teams distribute parallel for' cannot contain more than one 'default' clause}} for (int i=0; i<200; i++) foo(); -#pragma omp target teams distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target teams distribute parallel for default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target teams distribute parallel for default(none) // expected-note {{explicit data sharing attribute requested here}} @@ -36,6 +36,11 @@ int main(int argc, char **argv) { ++x; ++y; } +#pragma omp target teams distribute parallel for default(private) // expected-error {{data-sharing attribute 'private' in 'default' clause requires OpenMP version 5.1 or above}} + for (int i = 0; i < 200; i++) { + ++x; + ++y; + } #endif return 0; diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp index e5ff8562225011..5eb702dc555300 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp @@ -17,10 +17,10 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for simd default // expected-error {{expected '(' after 'default'}} for (int i=0; i<200; i++) foo(); -#pragma omp target teams distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp target teams distribute parallel for simd default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i=0; i<200; i++) foo(); -#pragma omp target teams distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target teams distribute parallel for simd default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target teams distribute parallel for simd default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -29,7 +29,7 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for simd default (shared), default(shared) // expected-error {{directive '#pragma omp target teams distribute parallel for simd' cannot contain more than one 'default' clause}} for (int i=0; i<200; i++) foo(); -#pragma omp target teams distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp target teams distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target teams distribute parallel for simd default(none) // expected-note {{explicit data sharing attribute requested here}} @@ -41,6 +41,11 @@ int main(int argc, char **argv) { ++x; ++y; } +#pragma omp target teams distribute parallel for simd default(private) // expected-error {{data-sharing attribute 'private' in 'default' clause requires OpenMP version 5.1 or above}} + for (int i = 0; i < argc; ++i) { + ++x; + ++y; + } #endif return 0; diff --git a/clang/test/OpenMP/task_default_messages.cpp b/clang/test/OpenMP/task_default_messages.cpp index 8b6809ee05d560..820e593a110adf 100644 --- a/clang/test/OpenMP/task_default_messages.cpp +++ b/clang/test/OpenMP/task_default_messages.cpp @@ -15,11 +15,11 @@ static int x = 0; int main(int argc, char **argv) { #pragma omp task default // expected-error {{expected '(' after 'default'}} -#pragma omp task default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} -#pragma omp task default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp task default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp task default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} #pragma omp task default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} #pragma omp task default(shared), default(shared) // expected-error {{directive '#pragma omp task' cannot contain more than one 'default' clause}} -#pragma omp task default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp task default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} foo(); #pragma omp task default(none) // expected-note {{explicit data sharing attribute requested here}} @@ -35,6 +35,11 @@ int main(int argc, char **argv) { ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp task default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + { + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif return 0; } diff --git a/clang/test/OpenMP/teams_default_messages.cpp b/clang/test/OpenMP/teams_default_messages.cpp index b117ef4948a0f3..bd56ea88ca7459 100644 --- a/clang/test/OpenMP/teams_default_messages.cpp +++ b/clang/test/OpenMP/teams_default_messages.cpp @@ -18,10 +18,10 @@ int main(int argc, char **argv) { #pragma omp teams default // expected-error {{expected '(' after 'default'}} foo(); #pragma omp target -#pragma omp teams default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp teams default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} foo(); #pragma omp target -#pragma omp teams default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} foo(); #pragma omp target #pragma omp teams default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -30,7 +30,7 @@ int main(int argc, char **argv) { #pragma omp teams default (shared), default(shared) // expected-error {{directive '#pragma omp teams' cannot contain more than one 'default' clause}} foo(); #pragma omp target -#pragma omp teams default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} foo(); #pragma omp target @@ -49,6 +49,12 @@ int main(int argc, char **argv) { ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp target +#pragma omp teams default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + { + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif return 0; } diff --git a/clang/test/OpenMP/teams_distribute_default_messages.cpp b/clang/test/OpenMP/teams_distribute_default_messages.cpp index 1d5fd40c53a6b0..c485093f76b9f4 100644 --- a/clang/test/OpenMP/teams_distribute_default_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_default_messages.cpp @@ -18,10 +18,10 @@ int main(int argc, char **argv) { #pragma omp teams distribute default // expected-error {{expected '(' after 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp teams distribute default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams distribute default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target #pragma omp teams distribute default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -30,7 +30,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute' cannot contain more than one 'default' clause}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams distribute default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target @@ -44,6 +44,12 @@ int main(int argc, char **argv) { ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp target +#pragma omp teams distribute default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + for (int i = 0; i < 200; i++) { + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif return 0; diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp index 3a414543be806c..170087f0643886 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp @@ -18,10 +18,10 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for default // expected-error {{expected '(' after 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp teams distribute parallel for default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams distribute parallel for default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target #pragma omp teams distribute parallel for default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -30,7 +30,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute parallel for' cannot contain more than one 'default' clause}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams distribute parallel for default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target @@ -44,6 +44,12 @@ int main(int argc, char **argv) { ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp target +#pragma omp teams distribute parallel for default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + for (int i = 0; i < 200; i++) { + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif return 0; diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp index ce7f35b4795926..6240f95173c42e 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp @@ -18,10 +18,10 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for simd default // expected-error {{expected '(' after 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp teams distribute parallel for simd default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams distribute parallel for simd default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target #pragma omp teams distribute parallel for simd default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -30,7 +30,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for simd default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute parallel for simd' cannot contain more than one 'default' clause}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target @@ -43,6 +43,11 @@ int main(int argc, char **argv) { ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} } +#pragma omp teams distribute parallel for default(private) // expected-note 2 {{explicit data sharing attribute requested here}} + for (int i = 0; i < 200; i++) { + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + } #endif return 0; diff --git a/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp index 11f5d1cd1fc8fd..ffc103671e69fb 100644 --- a/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp @@ -14,10 +14,10 @@ int main(int argc, char **argv) { #pragma omp teams distribute simd default // expected-error {{expected '(' after 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp teams distribute simd default( // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams distribute simd default() // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target #pragma omp teams distribute simd default (none // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -26,7 +26,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute simd default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute simd' cannot contain more than one 'default' clause}} for (int i=0; i<200; i++) foo(); #pragma omp target -#pragma omp teams distribute simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} +#pragma omp teams distribute simd default(x) // expected-error {{expected 'none', 'shared', 'private' or 'firstprivate' in OpenMP clause 'default'}} for (int i=0; i<200; i++) foo(); #pragma omp target @@ -43,5 +43,15 @@ int main(int argc, char **argv) { for (int i = 0; i < 200; i++) ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} +#pragma omp target +#pragma omp teams distribute simd default(private) // expected-note {{explicit data sharing attribute requested here}} + for (int i = 0; i < 200; i++) + ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}} + +#pragma omp target +#pragma omp teams distribute simd default(private) // expected-note {{explicit data sharing attribute requested here}} + for (int i = 0; i < 200; i++) + ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}} + return 0; } diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp index d1c9790401f026..7811402199ccb6 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp @@ -4122,12 +4122,19 @@ void x(int x) { })"; EXPECT_TRUE(notMatchesWithOpenMP51(Source4, Matcher)); - const std::string Source5 = R"( + StringRef Source5 = R"( +void x(int x) { +#pragma omp parallel default(private) +; +})"; + EXPECT_TRUE(notMatchesWithOpenMP51(Source5, Matcher)); + + const std::string Source6 = R"( void x(int x) { #pragma omp parallel num_threads(x) ; })"; - EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher)); + EXPECT_TRUE(notMatchesWithOpenMP(Source6, Matcher)); } TEST_P(ASTMatchersTest, OMPDefaultClause_IsSharedKind) { @@ -4168,12 +4175,19 @@ void x(int x) { })"; EXPECT_TRUE(notMatchesWithOpenMP51(Source4, Matcher)); - const std::string Source5 = R"( + StringRef Source5 = R"( +void x(int x) { +#pragma omp parallel default(private) +; +})"; + EXPECT_TRUE(notMatchesWithOpenMP51(Source5, Matcher)); + + const std::string Source6 = R"( void x(int x) { #pragma omp parallel num_threads(x) ; })"; - EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher)); + EXPECT_TRUE(notMatchesWithOpenMP(Source6, Matcher)); } TEST(OMPDefaultClause, isFirstPrivateKind) { @@ -4216,10 +4230,70 @@ void x(int x) { const std::string Source5 = R"( void x(int x) { +#pragma omp parallel default(private) +; +})"; + EXPECT_TRUE(notMatchesWithOpenMP51(Source5, Matcher)); + + const std::string Source6 = R"( +void x(int x) { #pragma omp parallel num_threads(x) ; })"; - EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher)); + EXPECT_TRUE(notMatchesWithOpenMP(Source6, Matcher)); +} + +TEST(OMPDefaultClause, istPrivateKind) { + auto Matcher = + ompExecutableDirective(hasAnyClause(ompDefaultClause(isPrivateKind()))); + + const std::string Source0 = R"( +void x() { +; +})"; + EXPECT_TRUE(notMatchesWithOpenMP(Source0, Matcher)); + + const std::string Source1 = R"( +void x() { +#pragma omp parallel +; +})"; + EXPECT_TRUE(notMatchesWithOpenMP(Source1, Matcher)); + + const std::string Source2 = R"( +void x() { +#pragma omp parallel default(shared) +; +})"; + EXPECT_TRUE(notMatchesWithOpenMP(Source2, Matcher)); + + const std::string Source3 = R"( +void x() { +#pragma omp parallel default(none) +; +})"; + EXPECT_TRUE(notMatchesWithOpenMP(Source3, Matcher)); + + const std::string Source4 = R"( +void x(int x) { +#pragma omp parallel default(firstprivate) +; +})"; + EXPECT_TRUE(notMatchesWithOpenMP51(Source4, Matcher)); + + const std::string Source5 = R"( +void x(int x) { +#pragma omp parallel default(private) +; +})"; + EXPECT_TRUE(matchesWithOpenMP51(Source5, Matcher)); + + const std::string Source6 = R"( +void x(int x) { +#pragma omp parallel num_threads(x) +; +})"; + EXPECT_TRUE(notMatchesWithOpenMP(Source6, Matcher)); } TEST_P(ASTMatchersTest, OMPExecutableDirective_IsAllowedToContainClauseKind) { @@ -4261,24 +4335,31 @@ void x() { EXPECT_TRUE(matchesWithOpenMP51(Source4, Matcher)); StringRef Source5 = R"( +void x() { +#pragma omp parallel default(private) +; +})"; + EXPECT_TRUE(matchesWithOpenMP51(Source5, Matcher)); + + StringRef Source6 = R"( void x(int x) { #pragma omp parallel num_threads(x) ; })"; - EXPECT_TRUE(matchesWithOpenMP(Source5, Matcher)); + EXPECT_TRUE(matchesWithOpenMP(Source6, Matcher)); - StringRef Source6 = R"( + StringRef Source7 = R"( void x() { #pragma omp taskyield })"; - EXPECT_TRUE(notMatchesWithOpenMP(Source6, Matcher)); + EXPECT_TRUE(notMatchesWithOpenMP(Source7, Matcher)); - StringRef Source7 = R"( + StringRef Source8 = R"( void x() { #pragma omp task ; })"; - EXPECT_TRUE(matchesWithOpenMP(Source7, Matcher)); + EXPECT_TRUE(matchesWithOpenMP(Source8, Matcher)); } TEST_P(ASTMatchersTest, HasAnyBase_DirectBase) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 2f18e8c615c06e..0106fc444fac75 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -1032,6 +1032,7 @@ __OMP_CANCEL_KIND(taskgroup, 4) __OMP_DEFAULT_KIND(none) __OMP_DEFAULT_KIND(shared) +__OMP_DEFAULT_KIND(private) __OMP_DEFAULT_KIND(firstprivate) __OMP_DEFAULT_KIND(unknown) From 33b7df8c1fb6506c5f87a06ab4b6e352fb9b639d Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Thu, 19 May 2022 12:51:37 -0700 Subject: [PATCH 025/908] [mlir] Remove unused properties from the standalone example's lit configuration Since these are unused, I've removed them from the configuration, so that it can be easier to read and follow. Reviewed By: stellaraccident Differential Revision: https://reviews.llvm.org/D125132 --- mlir/examples/standalone/test/lit.cfg.py | 4 +-- .../standalone/test/lit.site.cfg.py.in | 28 +------------------ .../standalone/test/python/smoketest.py | 2 +- 3 files changed, 3 insertions(+), 31 deletions(-) diff --git a/mlir/examples/standalone/test/lit.cfg.py b/mlir/examples/standalone/test/lit.cfg.py index e7ae44c8758434..a6a3d2444d0cef 100644 --- a/mlir/examples/standalone/test/lit.cfg.py +++ b/mlir/examples/standalone/test/lit.cfg.py @@ -30,7 +30,6 @@ config.test_exec_root = os.path.join(config.standalone_obj_root, 'test') config.substitutions.append(('%PATH%', config.environment['PATH'])) -config.substitutions.append(('%shlibext', config.llvm_shlib_ext)) llvm_config.with_system_environment( ['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP']) @@ -54,11 +53,10 @@ 'standalone-capi-test', 'standalone-opt', 'standalone-translate', - ToolSubst('%PYTHON', config.python_executable, unresolved='ignore'), ] llvm_config.add_tool_substitutions(tools, tool_dirs) llvm_config.with_environment('PYTHONPATH', [ - os.path.join(config.mlir_binary_dir, 'python_packages', 'standalone'), + os.path.join(config.mlir_obj_dir, 'python_packages', 'standalone'), ], append_path=True) diff --git a/mlir/examples/standalone/test/lit.site.cfg.py.in b/mlir/examples/standalone/test/lit.site.cfg.py.in index 840e61210cdbc2..747f8d3d182980 100644 --- a/mlir/examples/standalone/test/lit.site.cfg.py.in +++ b/mlir/examples/standalone/test/lit.site.cfg.py.in @@ -1,36 +1,10 @@ @LIT_SITE_CFG_IN_HEADER@ -import sys - -config.host_triple = "@LLVM_HOST_TRIPLE@" -config.target_triple = "@LLVM_TARGET_TRIPLE@" -config.llvm_src_root = "@LLVM_SOURCE_DIR@" -config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") -config.llvm_lib_dir = lit_config.substitute("@LLVM_LIBS_DIR@") -config.llvm_shlib_dir = lit_config.substitute("@SHLIBDIR@") -config.llvm_shlib_ext = "@SHLIBEXT@" -config.llvm_exe_ext = "@EXEEXT@" config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" -config.mlir_binary_dir = "@MLIR_BINARY_DIR@" +config.mlir_obj_dir = "@MLIR_BINARY_DIR@" config.python_executable = "@Python3_EXECUTABLE@" config.enable_bindings_python = @MLIR_ENABLE_BINDINGS_PYTHON@ -config.gold_executable = "@GOLD_EXECUTABLE@" -config.ld64_executable = "@LD64_EXECUTABLE@" -config.enable_shared = @ENABLE_SHARED@ -config.enable_assertions = @ENABLE_ASSERTIONS@ -config.targets_to_build = "@TARGETS_TO_BUILD@" -config.native_target = "@LLVM_NATIVE_ARCH@" -config.llvm_bindings = "@LLVM_BINDINGS@".split(' ') -config.host_os = "@HOST_OS@" -config.host_cc = "@HOST_CC@" -config.host_cxx = "@HOST_CXX@" -config.enable_libcxx = "@LLVM_ENABLE_LIBCXX@" -# Note: ldflags can contain double-quoted paths, so must use single quotes here. -config.host_ldflags = '@HOST_LDFLAGS@' -config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" -config.llvm_host_triple = '@LLVM_HOST_TRIPLE@' -config.host_arch = "@HOST_ARCH@" config.standalone_src_root = "@CMAKE_SOURCE_DIR@" config.standalone_obj_root = "@CMAKE_BINARY_DIR@" diff --git a/mlir/examples/standalone/test/python/smoketest.py b/mlir/examples/standalone/test/python/smoketest.py index 67214da0d43a8f..0d8f41c27e8efa 100644 --- a/mlir/examples/standalone/test/python/smoketest.py +++ b/mlir/examples/standalone/test/python/smoketest.py @@ -1,4 +1,4 @@ -# RUN: %PYTHON %s | FileCheck %s +# RUN: %python %s | FileCheck %s from mlir_standalone.ir import * from mlir_standalone.dialects import ( From 86b55edab6879f9e4c80a374629f548c6351e2c0 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 19 May 2022 13:43:43 +0100 Subject: [PATCH 026/908] [AMDGPU] Mark s_getreg as having side effects instead of reading memory s_getreg does not interact with anything else that is modelled as a memory access either in IR or MachineIR. Differential Revision: https://reviews.llvm.org/D125968 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 ++-- llvm/lib/Target/AMDGPU/SOPInstructions.td | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f5ccdeaefb36d8..2777bffad0e2de 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1379,11 +1379,11 @@ def int_amdgcn_s_setprio : Intrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; +// This is IntrHasSideEffects so it can be used to read cycle counters. def int_amdgcn_s_getreg : GCCBuiltin<"__builtin_amdgcn_s_getreg">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, - IntrWillReturn, ImmArg>] + [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg>] >; // Note this can be used to set FP environment properties that are diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 565dc665f56fc5..62eaad56412521 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -873,9 +873,7 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo < "$sdst, $simm16" >; -let mayLoad = 1 in { -// s_getreg_b32 should use hasSideEffects = 1 for tablegen to allow -// its use in the readcyclecounter selection. +// This is hasSideEffects to allow its use in readcyclecounter selection. // FIXME: Need to truncate immediate to 16-bits. def S_GETREG_B32 : SOPK_Pseudo < "s_getreg_b32", @@ -885,7 +883,6 @@ def S_GETREG_B32 : SOPK_Pseudo < let SOPKZext = 1; let hasSideEffects = 1; } -} // End mayLoad = 1 let Defs = [MODE], Uses = [MODE] in { From 9ece0518471a28348324d05111f22f8143475715 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 19 May 2022 16:52:41 +0100 Subject: [PATCH 027/908] [AMDGPU] Mark s_get_waveid_in_workgroup as not reading memory It is already marked as having side effects, at least in MIR. It does not interact with anything else that is modelled as a memory access either in IR or MachineIR. Differential Revision: https://reviews.llvm.org/D125985 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/SMInstructions.td | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 2777bffad0e2de..59444c2cb423be 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1860,7 +1860,7 @@ def int_amdgcn_mov_dpp8 : def int_amdgcn_s_get_waveid_in_workgroup : GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">, Intrinsic<[llvm_i32_ty], [], - [IntrReadMem, IntrInaccessibleMemOnly, IntrWillReturn]>; + [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; class AMDGPUGlobalAtomicRtn : Intrinsic < [vt], diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 1765355510eb57..d21f3d9a6fccc7 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -235,7 +235,7 @@ class SM_WaveId_Pseudo : SM_Pseudo< " $sdst", [(set i32:$sdst, (node))]> { let hasSideEffects = 1; let mayStore = 0; - let mayLoad = 1; + let mayLoad = 0; let has_sbase = 0; } From 3b13f8805c0225046558b7c6657860f39f4e78f9 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 19 May 2022 12:23:52 -0700 Subject: [PATCH 028/908] [mlir][sparse] fix unsigned comparison bug in assert Reviewed By: bixia, wrengr Differential Revision: https://reviews.llvm.org/D126007 --- mlir/lib/ExecutionEngine/SparseTensorUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 24d77a6c3ec3fd..21ad81525a1e1f 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -885,7 +885,7 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { const uint64_t pstop = static_cast(pointers_d[parentPos + 1]); // Loop-invariant code for looking up the `d`-level coordinates/indices. const std::vector &indices_d = src.indices[d]; - assert(pstop - 1 < indices_d.size() && "Index position is out of bounds"); + assert(pstop <= indices_d.size() && "Index position is out of bounds"); uint64_t &cursor_reord_d = this->cursor[this->reord[d]]; for (uint64_t pos = pstart; pos < pstop; pos++) { cursor_reord_d = static_cast(indices_d[pos]); From c153c61fadf9499062981658113335bcd43c33fe Mon Sep 17 00:00:00 2001 From: Nicolas Capens Date: Thu, 19 May 2022 13:46:49 -0700 Subject: [PATCH 029/908] Handle instrumentation of scalar single-precision (_ss) intrinsics Instrumentation of scalar double-precision intrinsics such as x86_sse41_round_sd was already handled by https://reviews.llvm.org/D82398, but not their single-precision counterparts. https://issuetracker.google.com/172238865 Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D124871 --- .../Instrumentation/MemorySanitizer.cpp | 35 ++++++++++++------ .../MemorySanitizer/sse-intrinsics-x86.ll | 10 +++-- .../MemorySanitizer/sse41-intrinsics-x86.ll | 37 +++++++++---------- 3 files changed, 48 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 79adb5639dfbf9..bc1a340fdc7e3d 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3200,27 +3200,37 @@ struct MemorySanitizerVisitor : public InstVisitor { SOC.Done(&I); } - // Instrument _mm_*_sd intrinsics - void handleUnarySdIntrinsic(IntrinsicInst &I) { + // Instrument _mm_*_sd|ss intrinsics + void handleUnarySdSsIntrinsic(IntrinsicInst &I) { IRBuilder<> IRB(&I); + unsigned Width = + cast(I.getArgOperand(0)->getType())->getNumElements(); Value *First = getShadow(&I, 0); Value *Second = getShadow(&I, 1); - // High word of first operand, low word of second - Value *Shadow = - IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef({2, 1})); + // First element of second operand, remaining elements of first operand + SmallVector Mask; + Mask.push_back(Width); + for (unsigned i = 1; i < Width; i++) + Mask.push_back(i); + Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask); setShadow(&I, Shadow); setOriginForNaryOp(I); } - void handleBinarySdIntrinsic(IntrinsicInst &I) { + void handleBinarySdSsIntrinsic(IntrinsicInst &I) { IRBuilder<> IRB(&I); + unsigned Width = + cast(I.getArgOperand(0)->getType())->getNumElements(); Value *First = getShadow(&I, 0); Value *Second = getShadow(&I, 1); Value *OrShadow = IRB.CreateOr(First, Second); - // High word of first operand, low word of both OR'd together - Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, - llvm::makeArrayRef({2, 1})); + // First element of both OR'd together, remaining elements of first operand + SmallVector Mask; + Mask.push_back(Width); + for (unsigned i = 1; i < Width; i++) + Mask.push_back(i); + Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask); setShadow(&I, Shadow); setOriginForNaryOp(I); @@ -3495,11 +3505,14 @@ struct MemorySanitizerVisitor : public InstVisitor { break; case Intrinsic::x86_sse41_round_sd: - handleUnarySdIntrinsic(I); + case Intrinsic::x86_sse41_round_ss: + handleUnarySdSsIntrinsic(I); break; case Intrinsic::x86_sse2_max_sd: + case Intrinsic::x86_sse_max_ss: case Intrinsic::x86_sse2_min_sd: - handleBinarySdIntrinsic(I); + case Intrinsic::x86_sse_min_ss: + handleBinarySdSsIntrinsic(I); break; case Intrinsic::fshl: diff --git a/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll index 2cfaaccd32eb5b..8972027e40c23f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll @@ -249,9 +249,10 @@ define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) #0 { ; CHECK-DAG: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 ; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) -; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] @@ -281,9 +282,10 @@ define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) #0 { ; CHECK-DAG: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 ; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) -; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] diff --git a/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll index 0fd304e3587443..46724491940e21 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll @@ -432,30 +432,29 @@ define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double> } -define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) #0 { -; CHECK-LABEL: @test_x86_sse41_round_ss( -; CHECK-DAG: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 -; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, <4 x float>* %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_round_ss_load( +; CHECK-DAG: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] -; CHECK-NEXT: unreachable -; CHECK: 5: -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] -; CHECK: 7: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] ; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i32 7) -; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK: 4: +; CHECK-NEXT: [[A1B:%.*]] = load <4 x float>, <4 x float>* [[A1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <4 x float>* [[A1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <4 x i32>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[_MSLD]], <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1B]], i32 7) +; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; - %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + %a1b = load <4 x float>, <4 x float>* %a1 + %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1b, i32 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res } declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone From 4202d69d9efe450f6f9f09ef58fcc008b707d24e Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Thu, 19 May 2022 20:23:04 +0000 Subject: [PATCH 030/908] [Flang][OpenMP] Upstream the lowering of the parallel do combined construct When parallel is used in a combined construct, then use a separate function to create the parallel operation. It handles the parallel specific clauses and leaves the rest for handling at the inner operations. Reviewed By: peixin, shraiysh Differential Revision: https://reviews.llvm.org/D125465 Co-authored-by: Sourabh Singh Tomar Co-authored-by: Eric Schweitz Co-authored-by: Valentin Clement Co-authored-by: Nimish Mishra --- flang/lib/Lower/OpenMP.cpp | 117 ++++++++++++++---- .../Fir/convert-to-llvm-openmp-and-fir.fir | 33 +++++ .../test/Lower/OpenMP/omp-parallel-wsloop.f90 | 96 ++++++++++++++ flang/test/Lower/OpenMP/parallel-sections.f90 | 4 +- 4 files changed, 221 insertions(+), 29 deletions(-) create mode 100644 flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 571cf5c85a87c8..35e0efb7ecca77 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -278,6 +278,80 @@ genOMP(Fortran::lower::AbstractConverter &converter, standaloneConstruct.u); } +static omp::ClauseProcBindKindAttr genProcBindKindAttr( + fir::FirOpBuilder &firOpBuilder, + const Fortran::parser::OmpClause::ProcBind *procBindClause) { + omp::ClauseProcBindKind pbKind; + switch (procBindClause->v.v) { + case Fortran::parser::OmpProcBindClause::Type::Master: + pbKind = omp::ClauseProcBindKind::Master; + break; + case Fortran::parser::OmpProcBindClause::Type::Close: + pbKind = omp::ClauseProcBindKind::Close; + break; + case Fortran::parser::OmpProcBindClause::Type::Spread: + pbKind = omp::ClauseProcBindKind::Spread; + break; + case Fortran::parser::OmpProcBindClause::Type::Primary: + pbKind = omp::ClauseProcBindKind::Primary; + break; + } + return omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind); +} + +/* When parallel is used in a combined construct, then use this function to + * create the parallel operation. It handles the parallel specific clauses + * and leaves the rest for handling at the inner operations. + * TODO: Refactor clause handling + */ +template +static void +createCombinedParallelOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + const Directive &directive) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::Location currentLocation = converter.getCurrentLocation(); + Fortran::lower::StatementContext stmtCtx; + llvm::ArrayRef argTy; + mlir::Value ifClauseOperand, numThreadsClauseOperand; + SmallVector allocatorOperands, allocateOperands; + mlir::omp::ClauseProcBindKindAttr procBindKindAttr; + const auto &opClauseList = + std::get(directive.t); + // TODO: Handle the following clauses + // 1. default + // 2. copyin + // Note: rest of the clauses are handled when the inner operation is created + for (const Fortran::parser::OmpClause &clause : opClauseList.v) { + if (const auto &ifClause = + std::get_if(&clause.u)) { + auto &expr = std::get(ifClause->v.t); + mlir::Value ifVal = fir::getBase( + converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx)); + ifClauseOperand = firOpBuilder.createConvert( + currentLocation, firOpBuilder.getI1Type(), ifVal); + } else if (const auto &numThreadsClause = + std::get_if( + &clause.u)) { + numThreadsClauseOperand = fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx)); + } else if (const auto &procBindClause = + std::get_if( + &clause.u)) { + procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause); + } + } + // Create and insert the operation. + auto parallelOp = firOpBuilder.create( + currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand, + allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(), + /*reductions=*/nullptr, procBindKindAttr); + + createBodyOfOp(parallelOp, converter, currentLocation, + &opClauseList, /*iv=*/{}, + /*isCombined=*/true); +} + static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, @@ -318,23 +392,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, } else if (const auto &procBindClause = std::get_if( &clause.u)) { - omp::ClauseProcBindKind pbKind; - switch (procBindClause->v.v) { - case Fortran::parser::OmpProcBindClause::Type::Master: - pbKind = omp::ClauseProcBindKind::Master; - break; - case Fortran::parser::OmpProcBindClause::Type::Close: - pbKind = omp::ClauseProcBindKind::Close; - break; - case Fortran::parser::OmpProcBindClause::Type::Spread: - pbKind = omp::ClauseProcBindKind::Spread; - break; - case Fortran::parser::OmpProcBindClause::Type::Primary: - pbKind = omp::ClauseProcBindKind::Primary; - break; - } - procBindKindAttr = - omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind); + procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause); } else if (const auto &allocateClause = std::get_if( &clause.u)) { @@ -419,11 +477,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, noWaitClauseOperand, orderedClauseOperand, orderClauseOperand; const auto &wsLoopOpClauseList = std::get( std::get(loopConstruct.t).t); - if (llvm::omp::OMPD_do != + + const auto ompDirective = std::get( std::get(loopConstruct.t).t) - .v) { - TODO(converter.getCurrentLocation(), "Combined worksharing loop construct"); + .v; + if (llvm::omp::OMPD_parallel_do == ompDirective) { + createCombinedParallelOp( + converter, eval, + std::get(loopConstruct.t)); + } else if (llvm::omp::OMPD_do != ompDirective) { + TODO(converter.getCurrentLocation(), "Construct enclosing do loop"); } int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList); @@ -648,15 +712,14 @@ genOMP(Fortran::lower::AbstractConverter &converter, // Parallel Sections Construct if (dir == llvm::omp::Directive::OMPD_parallel_sections) { - auto parallelOp = firOpBuilder.create( - currentLocation, /*if_expr_var*/ nullptr, /*num_threads_var*/ nullptr, - allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(), - /*reductions=*/nullptr, /*proc_bind_val*/ nullptr); - createBodyOfOp(parallelOp, converter, currentLocation); + createCombinedParallelOp( + converter, eval, + std::get( + sectionsConstruct.t)); auto sectionsOp = firOpBuilder.create( currentLocation, /*reduction_vars*/ ValueRange(), - /*reductions=*/nullptr, /*allocate_vars*/ ValueRange(), - /*allocators_vars*/ ValueRange(), /*nowait=*/nullptr); + /*reductions=*/nullptr, allocateOperands, allocatorOperands, + /*nowait=*/nullptr); createBodyOfOp(sectionsOp, converter, currentLocation); // Sections Construct diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 6d2cf700d81eab..601cf70815cf0d 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -71,3 +71,36 @@ func.func @_QPsb2(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref> {fir.bindc_name = "arr"}) { + %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsbEi"} + omp.parallel { + %c1 = arith.constant 1 : i32 + %c50 = arith.constant 50 : i32 + omp.wsloop for (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) { + %1 = fir.convert %indx : (i32) -> i64 + %c1_i64 = arith.constant 1 : i64 + %2 = arith.subi %1, %c1_i64 : i64 + %3 = fir.coordinate_of %arr, %2 : (!fir.box>, i64) -> !fir.ref + fir.store %indx to %3 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// Check only for the structure of the OpenMP portion and the feasibility of the conversion +// CHECK-LABEL: @_QPsb +// CHECK-SAME: %{{.*}}: !llvm.ptr> {fir.bindc_name = "arr"} +// CHECK: omp.parallel { +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32 +// CHECK: omp.wsloop for (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) { +// CHECK: llvm.store %[[INDX]], %{{.*}} : !llvm.ptr +// CHECK: omp.yield +// CHECK: omp.terminator +// CHECK: llvm.return diff --git a/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 new file mode 100644 index 00000000000000..80bb1ca19daebe --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 @@ -0,0 +1,96 @@ +! This test checks lowering of OpenMP DO Directive (Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s + +! CHECK-LABEL: func @_QPsimple_parallel_do() +subroutine simple_parallel_do + integer :: i + ! CHECK: omp.parallel + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO + do i=1, 9 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses +! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref {fir.bindc_name = "nt"} +subroutine parallel_do_with_parallel_clauses(cond, nt) + logical :: cond + integer :: nt + integer :: i + ! CHECK: %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref> + ! CHECK: %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1 + ! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref + ! CHECK: omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close) + do i=1, 9 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +! CHECK-LABEL: func @_QPparallel_do_with_clauses +! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref {fir.bindc_name = "nt"} +subroutine parallel_do_with_clauses(nt) + integer :: nt + integer :: i + ! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref + ! CHECK: omp.parallel num_threads(%[[NT]] : i32) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic) + do i=1, 9 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine + +! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses +! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref {fir.bindc_name = "nt"} +subroutine parallel_do_with_privatisation_clauses(cond,nt) + logical :: cond + integer :: nt + integer :: i + ! CHECK: omp.parallel + ! CHECK: %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"} + ! CHECK: %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"} + ! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref + ! CHECK: fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt) + do i=1, 9 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref, i32) -> i1 + ! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref> + ! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1 + ! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref, i1) -> i1 + ! CHECK: %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref, i32) -> i1 + print*, i, cond, nt + end do + ! CHECK: omp.yield + ! CHECK: omp.terminator + !$OMP END PARALLEL DO +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-sections.f90 b/flang/test/Lower/OpenMP/parallel-sections.f90 index c88d60cc8f88b5..e9759072c52347 100644 --- a/flang/test/Lower/OpenMP/parallel-sections.f90 +++ b/flang/test/Lower/OpenMP/parallel-sections.f90 @@ -40,8 +40,8 @@ subroutine omp_parallel_sections_allocate(x, y) integer, intent(inout) :: x, y !FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32 !LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32 - !OMPDialect: omp.parallel allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref) { - !OMPDialect: omp.sections { + !OMPDialect: omp.parallel { + !OMPDialect: omp.sections allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref) { !$omp parallel sections allocate(omp_high_bw_mem_alloc: x) !OMPDialect: omp.section { !$omp section From 221b7a45833dcee19964924ec34fd2173c2b68a4 Mon Sep 17 00:00:00 2001 From: Jorge Gorbe Moya Date: Thu, 19 May 2022 14:17:36 -0700 Subject: [PATCH 031/908] [bazel] Add lib/Basic/BuiltinTargetFeatures.h to clang:basic `hdrs`. This header is included by clang/unittests/CodeGen/CheckTargetFeaturesTest.cpp so it needs to be exposed here to make it visible. --- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index eb900106c21e6d..b9e8df560d294e 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -403,6 +403,7 @@ cc_library( ]), hdrs = glob([ "include/clang/Basic/*.h", + "lib/Basic/BuiltinTargetFeatures.h", ]), copts = [ "-DHAVE_VCS_VERSION_INC", From 6990e7477d24ff585ae86549f5280f0be65422a6 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Thu, 19 May 2022 20:25:47 +0000 Subject: [PATCH 032/908] [ConstantRange] Improve the implementation of binaryOr This diff adjusts binaryOr to take advantage of the analysis based on KnownBits. Differential revision: https://reviews.llvm.org/D125933 Test plan: 1/ ninja check-llvm 2/ ninja check-llvm-unit --- llvm/lib/IR/ConstantRange.cpp | 15 +++++----- llvm/unittests/IR/ConstantRangeTest.cpp | 38 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index c3915cee00476d..be6386c571b596 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -1410,14 +1410,13 @@ ConstantRange ConstantRange::binaryOr(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return getEmpty(); - // Use APInt's implementation of OR for single element ranges. - if (isSingleElement() && Other.isSingleElement()) - return {*getSingleElement() | *Other.getSingleElement()}; - - // TODO: replace this with something less conservative - - APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()); - return getNonEmpty(std::move(umax), APInt::getZero(getBitWidth())); + ConstantRange KnownBitsRange = + fromKnownBits(toKnownBits() | Other.toKnownBits(), false); + // Upper wrapped range. + ConstantRange UMaxUMinRange = + getNonEmpty(APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()), + APInt::getZero(getBitWidth())); + return KnownBitsRange.intersectWith(UMaxUMinRange); } ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const { diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp index ffd0f004c7575d..8a543b1f7df41f 100644 --- a/llvm/unittests/IR/ConstantRangeTest.cpp +++ b/llvm/unittests/IR/ConstantRangeTest.cpp @@ -2560,6 +2560,44 @@ TEST_F(ConstantRangeTest, binaryAnd) { CheckSingleElementsOnly); } +TEST_F(ConstantRangeTest, binaryOr) { + // Single element ranges. + ConstantRange R16(APInt(8, 16)); + ConstantRange R20(APInt(8, 20)); + EXPECT_EQ(*R16.binaryOr(R16).getSingleElement(), APInt(8, 16)); + EXPECT_EQ(*R16.binaryOr(R20).getSingleElement(), APInt(8, 16 | 20)); + + ConstantRange R16_32(APInt(8, 16), APInt(8, 32)); + // 'Or' with a high bits mask. + // KnownBits estimate is important, otherwise the maximum included element + // would be 2^8 - 1. + ConstantRange R32(APInt(8, 32)); + ConstantRange R48_64(APInt(8, 48), APInt(8, 64)); + EXPECT_EQ(R16_32.binaryOr(R32), R48_64); + EXPECT_EQ(R32.binaryOr(R16_32), R48_64); + // 'Or' with a low bits mask. + ConstantRange R4(APInt(8, 4)); + ConstantRange R0_16(APInt(8, 0), APInt(8, 16)); + ConstantRange R4_16(APInt(8, 4), APInt(8, 16)); + EXPECT_EQ(R0_16.binaryOr(R4), R4_16); + EXPECT_EQ(R4.binaryOr(R0_16), R4_16); + + // Ranges with more than one element. Handled conservatively for now. + // UMaxUMin estimate is important, otherwise the lower bound would be zero. + ConstantRange R0_64(APInt(8, 0), APInt(8, 64)); + ConstantRange R5_32(APInt(8, 5), APInt(8, 32)); + ConstantRange R5_64(APInt(8, 5), APInt(8, 64)); + EXPECT_EQ(R0_64.binaryOr(R5_32), R5_64); + EXPECT_EQ(R5_32.binaryOr(R0_64), R5_64); + + TestBinaryOpExhaustive( + [](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.binaryOr(CR2); + }, + [](const APInt &N1, const APInt &N2) { return N1 | N2; }, PreferSmallest, + CheckSingleElementsOnly); +} + TEST_F(ConstantRangeTest, binaryXor) { // Single element ranges. ConstantRange R16(APInt(8, 16)); From dfe513ae1bb6e788ead93b850d80d77d54cf29d3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 19 May 2022 14:32:15 -0700 Subject: [PATCH 033/908] Revert "[RISCV] Use selectShiftMaskXLen ComplexPattern for isel of rotates." This reverts commit 86f7d7074a0129955aa2f5c82fe8c383eb17a35a. The test cases added for this exposed an pre-existing bug that is failing the expensive checks bot. Reverting so I can revert that patch. --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 8 ++++---- llvm/test/CodeGen/RISCV/rotl-rotr.ll | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index a1e795819b1a84..b0e82e45e9eced 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -817,8 +817,8 @@ def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbbOrZbpOrZbkb] let Predicates = [HasStdExtZbbOrZbpOrZbkb] in { -def : PatGprGpr, ROL>; -def : PatGprGpr, ROR>; +def : PatGprGpr; +def : PatGprGpr; def : PatGprImm; // There's no encoding for roli in the the 'B' extension as it can be @@ -828,8 +828,8 @@ def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt), } // Predicates = [HasStdExtZbbOrZbpOrZbkb] let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { -def : PatGprGpr, ROLW>; -def : PatGprGpr, RORW>; +def : PatGprGpr; +def : PatGprGpr; def : PatGprImm; def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll index 2026a756542ade..a7f7c38409a5f2 100644 --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -578,6 +578,7 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; ; RV32ZBB-LABEL: rotl_32_mask_multiple: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: andi a2, a2, 31 ; RV32ZBB-NEXT: rol a0, a0, a2 ; RV32ZBB-NEXT: rol a1, a1, a2 ; RV32ZBB-NEXT: add a0, a0, a1 @@ -585,6 +586,7 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; ; RV64ZBB-LABEL: rotl_32_mask_multiple: ; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: andi a2, a2, 31 ; RV64ZBB-NEXT: rolw a0, a0, a2 ; RV64ZBB-NEXT: rolw a1, a1, a2 ; RV64ZBB-NEXT: addw a0, a0, a1 @@ -702,6 +704,7 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; ; RV64ZBB-LABEL: rotl_64_mask_multiple: ; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: andi a2, a2, 63 ; RV64ZBB-NEXT: rol a0, a0, a2 ; RV64ZBB-NEXT: rol a1, a1, a2 ; RV64ZBB-NEXT: add a0, a0, a1 @@ -741,6 +744,7 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; ; RV32ZBB-LABEL: rotr_32_mask_multiple: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: andi a2, a2, 31 ; RV32ZBB-NEXT: ror a0, a0, a2 ; RV32ZBB-NEXT: ror a1, a1, a2 ; RV32ZBB-NEXT: add a0, a0, a1 @@ -748,6 +752,7 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; ; RV64ZBB-LABEL: rotr_32_mask_multiple: ; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: andi a2, a2, 31 ; RV64ZBB-NEXT: rorw a0, a0, a2 ; RV64ZBB-NEXT: rorw a1, a1, a2 ; RV64ZBB-NEXT: addw a0, a0, a1 @@ -863,6 +868,7 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; ; RV64ZBB-LABEL: rotr_64_mask_multiple: ; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: andi a2, a2, 63 ; RV64ZBB-NEXT: ror a0, a0, a2 ; RV64ZBB-NEXT: ror a1, a1, a2 ; RV64ZBB-NEXT: add a0, a0, a1 From 35564fff67bbb94f84e10afd20a653dfc4b787ef Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 19 May 2022 14:33:02 -0700 Subject: [PATCH 034/908] Revert "[RISCV] Add test cases showing failure to remove mask on rotate amounts." This reverts commit e2f410feeab27a8bb2c015fc02bb8527702e401f. This exposes a pre-existing bug in type legalization that is failing expensive checks. --- llvm/test/CodeGen/RISCV/rotl-rotr.ll | 330 --------------------------- 1 file changed, 330 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll index a7f7c38409a5f2..fd5437e3cb1474 100644 --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -550,333 +550,3 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { %d = or i64 %b, %c ret i64 %d } - -define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { -; RV32I-LABEL: rotl_32_mask_multiple: -; RV32I: # %bb.0: -; RV32I-NEXT: sll a3, a0, a2 -; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: srl a0, a0, a4 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: sll a2, a1, a2 -; RV32I-NEXT: srl a1, a1, a4 -; RV32I-NEXT: or a1, a2, a1 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: rotl_32_mask_multiple: -; RV64I: # %bb.0: -; RV64I-NEXT: sllw a3, a0, a2 -; RV64I-NEXT: negw a4, a2 -; RV64I-NEXT: srlw a0, a0, a4 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: sllw a2, a1, a2 -; RV64I-NEXT: srlw a1, a1, a4 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: addw a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: rotl_32_mask_multiple: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a2, a2, 31 -; RV32ZBB-NEXT: rol a0, a0, a2 -; RV32ZBB-NEXT: rol a1, a1, a2 -; RV32ZBB-NEXT: add a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: rotl_32_mask_multiple: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a2, a2, 31 -; RV64ZBB-NEXT: rolw a0, a0, a2 -; RV64ZBB-NEXT: rolw a1, a1, a2 -; RV64ZBB-NEXT: addw a0, a0, a1 -; RV64ZBB-NEXT: ret - %maskedamt = and i32 %amt, 31 - %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt) - %2 = tail call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 %maskedamt) - %3 = add i32 %1, %2 - ret i32 %3 -} -declare i32 @llvm.fshl.i32(i32, i32, i32) - -define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { -; RV32I-LABEL: rotl_64_mask_multiple: -; RV32I: # %bb.0: -; RV32I-NEXT: slli a5, a4, 26 -; RV32I-NEXT: srli a5, a5, 31 -; RV32I-NEXT: mv a6, a1 -; RV32I-NEXT: bnez a5, .LBB9_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a6, a0 -; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: bnez a5, .LBB9_4 -; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB9_4: -; RV32I-NEXT: sll a7, a6, a4 -; RV32I-NEXT: srli t0, a0, 1 -; RV32I-NEXT: not a1, a4 -; RV32I-NEXT: srl t0, t0, a1 -; RV32I-NEXT: sll t1, a0, a4 -; RV32I-NEXT: srli a0, a6, 1 -; RV32I-NEXT: srl t2, a0, a1 -; RV32I-NEXT: mv a0, a3 -; RV32I-NEXT: bnez a5, .LBB9_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: .LBB9_6: -; RV32I-NEXT: or a6, a7, t0 -; RV32I-NEXT: or a7, t1, t2 -; RV32I-NEXT: sll t0, a0, a4 -; RV32I-NEXT: bnez a5, .LBB9_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: .LBB9_8: -; RV32I-NEXT: srli a3, a2, 1 -; RV32I-NEXT: srl a3, a3, a1 -; RV32I-NEXT: or a3, t0, a3 -; RV32I-NEXT: sll a2, a2, a4 -; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: srl a0, a0, a1 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: add a1, a7, a0 -; RV32I-NEXT: add a0, a6, a3 -; RV32I-NEXT: sltu a2, a0, a6 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: ret -; -; RV64I-LABEL: rotl_64_mask_multiple: -; RV64I: # %bb.0: -; RV64I-NEXT: sll a3, a0, a2 -; RV64I-NEXT: neg a4, a2 -; RV64I-NEXT: srl a0, a0, a4 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: sll a2, a1, a2 -; RV64I-NEXT: srl a1, a1, a4 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: rotl_64_mask_multiple: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: slli a5, a4, 26 -; RV32ZBB-NEXT: srli a5, a5, 31 -; RV32ZBB-NEXT: mv a6, a1 -; RV32ZBB-NEXT: bnez a5, .LBB9_2 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: mv a6, a0 -; RV32ZBB-NEXT: .LBB9_2: -; RV32ZBB-NEXT: bnez a5, .LBB9_4 -; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: mv a0, a1 -; RV32ZBB-NEXT: .LBB9_4: -; RV32ZBB-NEXT: sll a7, a6, a4 -; RV32ZBB-NEXT: srli t0, a0, 1 -; RV32ZBB-NEXT: not a1, a4 -; RV32ZBB-NEXT: srl t0, t0, a1 -; RV32ZBB-NEXT: sll t1, a0, a4 -; RV32ZBB-NEXT: srli a0, a6, 1 -; RV32ZBB-NEXT: srl t2, a0, a1 -; RV32ZBB-NEXT: mv a0, a3 -; RV32ZBB-NEXT: bnez a5, .LBB9_6 -; RV32ZBB-NEXT: # %bb.5: -; RV32ZBB-NEXT: mv a0, a2 -; RV32ZBB-NEXT: .LBB9_6: -; RV32ZBB-NEXT: or a6, a7, t0 -; RV32ZBB-NEXT: or a7, t1, t2 -; RV32ZBB-NEXT: sll t0, a0, a4 -; RV32ZBB-NEXT: bnez a5, .LBB9_8 -; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: mv a2, a3 -; RV32ZBB-NEXT: .LBB9_8: -; RV32ZBB-NEXT: srli a3, a2, 1 -; RV32ZBB-NEXT: srl a3, a3, a1 -; RV32ZBB-NEXT: or a3, t0, a3 -; RV32ZBB-NEXT: sll a2, a2, a4 -; RV32ZBB-NEXT: srli a0, a0, 1 -; RV32ZBB-NEXT: srl a0, a0, a1 -; RV32ZBB-NEXT: or a0, a2, a0 -; RV32ZBB-NEXT: add a1, a7, a0 -; RV32ZBB-NEXT: add a0, a6, a3 -; RV32ZBB-NEXT: sltu a2, a0, a6 -; RV32ZBB-NEXT: add a1, a1, a2 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: rotl_64_mask_multiple: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a2, a2, 63 -; RV64ZBB-NEXT: rol a0, a0, a2 -; RV64ZBB-NEXT: rol a1, a1, a2 -; RV64ZBB-NEXT: add a0, a0, a1 -; RV64ZBB-NEXT: ret - %maskedamt = and i64 %amt, 63 - %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt) - %2 = tail call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 %maskedamt) - %3 = add i64 %1, %2 - ret i64 %3 -} -declare i64 @llvm.fshl.i64(i64, i64, i64) - -define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { -; RV32I-LABEL: rotr_32_mask_multiple: -; RV32I: # %bb.0: -; RV32I-NEXT: srl a3, a0, a2 -; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: sll a0, a0, a4 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: srl a2, a1, a2 -; RV32I-NEXT: sll a1, a1, a4 -; RV32I-NEXT: or a1, a2, a1 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: rotr_32_mask_multiple: -; RV64I: # %bb.0: -; RV64I-NEXT: srlw a3, a0, a2 -; RV64I-NEXT: negw a4, a2 -; RV64I-NEXT: sllw a0, a0, a4 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: srlw a2, a1, a2 -; RV64I-NEXT: sllw a1, a1, a4 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: addw a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: rotr_32_mask_multiple: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a2, a2, 31 -; RV32ZBB-NEXT: ror a0, a0, a2 -; RV32ZBB-NEXT: ror a1, a1, a2 -; RV32ZBB-NEXT: add a0, a0, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: rotr_32_mask_multiple: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a2, a2, 31 -; RV64ZBB-NEXT: rorw a0, a0, a2 -; RV64ZBB-NEXT: rorw a1, a1, a2 -; RV64ZBB-NEXT: addw a0, a0, a1 -; RV64ZBB-NEXT: ret - %maskedamt = and i32 %amt, 31 - %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt) - %2 = tail call i32 @llvm.fshr.i32(i32 %b, i32 %b, i32 %maskedamt) - %3 = add i32 %1, %2 - ret i32 %3 -} -declare i32 @llvm.fshr.i32(i32, i32, i32) - -define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { -; RV32I-LABEL: rotr_64_mask_multiple: -; RV32I: # %bb.0: -; RV32I-NEXT: andi a5, a4, 32 -; RV32I-NEXT: mv a6, a0 -; RV32I-NEXT: beqz a5, .LBB11_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a6, a1 -; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: beqz a5, .LBB11_4 -; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: .LBB11_4: -; RV32I-NEXT: srl a7, a6, a4 -; RV32I-NEXT: slli t0, a1, 1 -; RV32I-NEXT: not a0, a4 -; RV32I-NEXT: sll t0, t0, a0 -; RV32I-NEXT: srl t1, a1, a4 -; RV32I-NEXT: slli a1, a6, 1 -; RV32I-NEXT: sll t2, a1, a0 -; RV32I-NEXT: mv a6, a2 -; RV32I-NEXT: beqz a5, .LBB11_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: .LBB11_6: -; RV32I-NEXT: or a1, t0, a7 -; RV32I-NEXT: or a7, t2, t1 -; RV32I-NEXT: srl t0, a6, a4 -; RV32I-NEXT: beqz a5, .LBB11_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv a3, a2 -; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: slli a2, a3, 1 -; RV32I-NEXT: sll a2, a2, a0 -; RV32I-NEXT: or a2, a2, t0 -; RV32I-NEXT: srl a3, a3, a4 -; RV32I-NEXT: slli a4, a6, 1 -; RV32I-NEXT: sll a0, a4, a0 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: add a3, a7, a0 -; RV32I-NEXT: add a0, a1, a2 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: add a1, a3, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: rotr_64_mask_multiple: -; RV64I: # %bb.0: -; RV64I-NEXT: srl a3, a0, a2 -; RV64I-NEXT: neg a4, a2 -; RV64I-NEXT: sll a0, a0, a4 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: srl a2, a1, a2 -; RV64I-NEXT: sll a1, a1, a4 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: ret -; -; RV32ZBB-LABEL: rotr_64_mask_multiple: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a5, a4, 32 -; RV32ZBB-NEXT: mv a6, a0 -; RV32ZBB-NEXT: beqz a5, .LBB11_2 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: mv a6, a1 -; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: beqz a5, .LBB11_4 -; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: mv a1, a0 -; RV32ZBB-NEXT: .LBB11_4: -; RV32ZBB-NEXT: srl a7, a6, a4 -; RV32ZBB-NEXT: slli t0, a1, 1 -; RV32ZBB-NEXT: not a0, a4 -; RV32ZBB-NEXT: sll t0, t0, a0 -; RV32ZBB-NEXT: srl t1, a1, a4 -; RV32ZBB-NEXT: slli a1, a6, 1 -; RV32ZBB-NEXT: sll t2, a1, a0 -; RV32ZBB-NEXT: mv a6, a2 -; RV32ZBB-NEXT: beqz a5, .LBB11_6 -; RV32ZBB-NEXT: # %bb.5: -; RV32ZBB-NEXT: mv a6, a3 -; RV32ZBB-NEXT: .LBB11_6: -; RV32ZBB-NEXT: or a1, t0, a7 -; RV32ZBB-NEXT: or a7, t2, t1 -; RV32ZBB-NEXT: srl t0, a6, a4 -; RV32ZBB-NEXT: beqz a5, .LBB11_8 -; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: mv a3, a2 -; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: slli a2, a3, 1 -; RV32ZBB-NEXT: sll a2, a2, a0 -; RV32ZBB-NEXT: or a2, a2, t0 -; RV32ZBB-NEXT: srl a3, a3, a4 -; RV32ZBB-NEXT: slli a4, a6, 1 -; RV32ZBB-NEXT: sll a0, a4, a0 -; RV32ZBB-NEXT: or a0, a0, a3 -; RV32ZBB-NEXT: add a3, a7, a0 -; RV32ZBB-NEXT: add a0, a1, a2 -; RV32ZBB-NEXT: sltu a1, a0, a1 -; RV32ZBB-NEXT: add a1, a3, a1 -; RV32ZBB-NEXT: ret -; -; RV64ZBB-LABEL: rotr_64_mask_multiple: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a2, a2, 63 -; RV64ZBB-NEXT: ror a0, a0, a2 -; RV64ZBB-NEXT: ror a1, a1, a2 -; RV64ZBB-NEXT: add a0, a0, a1 -; RV64ZBB-NEXT: ret - %maskedamt = and i64 %amt, 63 - %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt) - %2 = tail call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 %maskedamt) - %3 = add i64 %1, %2 - ret i64 %3 -} -declare i64 @llvm.fshr.i64(i64, i64, i64) From 027499a82434ea7a784b2e5a0227540f00fbc307 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Thu, 19 May 2022 14:47:04 -0700 Subject: [PATCH 035/908] [lldb/test] Fix PExpect.launch issue when disabling color support This patch should fix a bug in PExpect.launch that happened when color support is not enabled. In that case, we need to add the `--no-use-colors` flag to lldb's launch argument list. However, previously, each character to the string was appended separately to the `args` list. This patch solves that by adding the whole string to the list. This should fix the TestIOHandlerResize failure on GreenDragon. Differential Revision: https://reviews.llvm.org/D126021 Signed-off-by: Med Ismail Bennani --- lldb/packages/Python/lldbsuite/test/lldbpexpect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py index 1083705a201198..22a30c5d22c45d 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py +++ b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py @@ -34,7 +34,7 @@ def launch(self, executable=None, extra_args=None, timeout=60, args += run_under args += [lldbtest_config.lldbExec, '--no-lldbinit'] if not use_colors: - args += '--no-use-colors' + args.append('--no-use-colors') for cmd in self.setUpCommands(): if "use-color false" in cmd and use_colors: continue From b1183305f882f15a368cc33ed9b2f77aedb5b9a6 Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Thu, 19 May 2022 21:47:28 +0000 Subject: [PATCH 036/908] [libc] Add strlcat Differential Revision: https://reviews.llvm.org/D125978 --- libc/config/darwin/arm/entrypoints.txt | 1 + libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/config/windows/entrypoints.txt | 1 + libc/spec/bsd_ext.td | 5 +++ libc/src/string/CMakeLists.txt | 15 +++++++-- libc/src/string/string_utils.h | 13 ++++++++ libc/src/string/strlcat.cpp | 27 +++++++++++++++++ libc/src/string/strlcat.h | 20 ++++++++++++ libc/src/string/strlcpy.cpp | 11 +------ libc/test/src/string/CMakeLists.txt | 10 ++++++ libc/test/src/string/strlcat_test.cpp | 37 +++++++++++++++++++++++ 12 files changed, 130 insertions(+), 12 deletions(-) create mode 100644 libc/src/string/strlcat.cpp create mode 100644 libc/src/string/strlcat.h create mode 100644 libc/test/src/string/strlcat_test.cpp diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt index 568a8fb119dee3..a8f882f04cc211 100644 --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -35,6 +35,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.strcmp libc.src.string.strcpy libc.src.string.strcspn + libc.src.string.strlcat libc.src.string.strlcpy libc.src.string.strlen libc.src.string.strncat diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 31a596928481bb..01b4fac24af789 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -40,6 +40,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.strcmp libc.src.string.strcpy libc.src.string.strcspn + libc.src.string.strlcat libc.src.string.strlcpy libc.src.string.strlen libc.src.string.strncat diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 783dabf9a8f079..a0b7a6b5af225e 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -40,6 +40,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.strcmp libc.src.string.strcpy libc.src.string.strcspn + libc.src.string.strlcat libc.src.string.strlcpy libc.src.string.strlen libc.src.string.strncat diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt index 0f5b14a6e3c2b8..8d7cf6713c86ec 100644 --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -35,6 +35,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.strcmp libc.src.string.strcpy libc.src.string.strcspn + libc.src.string.strlcat libc.src.string.strlcpy libc.src.string.strlen libc.src.string.strncat diff --git a/libc/spec/bsd_ext.td b/libc/spec/bsd_ext.td index 09832f71686503..577d978ca7264d 100644 --- a/libc/spec/bsd_ext.td +++ b/libc/spec/bsd_ext.td @@ -5,6 +5,11 @@ def BsdExtensions : StandardSpec<"BSDExtensions"> { [], // Types [], // Enumerations [ + FunctionSpec< + "strlcat", + RetValSpec, + [ArgSpec, ArgSpec, ArgSpec] + >, FunctionSpec< "strlcpy", RetValSpec, diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 20b1b4c95c01b7..68962e21c15d10 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -6,6 +6,8 @@ add_header_library( string_utils.h DEPENDS libc.src.__support.CPP.bitset + .memory_utils.memcpy_implementation + .memory_utils.memset_implementation ) add_entrypoint_object( @@ -126,6 +128,17 @@ add_entrypoint_object( libc.include.stdlib ) +add_entrypoint_object( + strlcat + SRCS + strlcat.cpp + HDRS + strlcat.h + DEPENDS + .string_utils + libc.include.string +) + add_entrypoint_object( strlcpy SRCS @@ -133,8 +146,6 @@ add_entrypoint_object( HDRS strlcpy.h DEPENDS - .memory_utils.memcpy_implementation - .memory_utils.memset_implementation .string_utils libc.include.string ) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 699f65f0a9b194..3d854d3c881fc4 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -11,6 +11,8 @@ #include "src/__support/CPP/Bitset.h" #include "src/__support/common.h" +#include "src/string/memory_utils/memcpy_implementations.h" +#include "src/string/memory_utils/memset_implementations.h" #include // size_t namespace __llvm_libc { @@ -85,6 +87,17 @@ static inline char *string_token(char *__restrict src, return token; } +static inline size_t strlcpy(char *__restrict dst, const char *__restrict src, + size_t size) { + size_t len = internal::string_length(src); + if (!size) + return len; + size_t n = len < size - 1 ? len : size - 1; + inline_memcpy(dst, src, n); + inline_memset(dst + n, 0, size - n); + return len; +} + } // namespace internal } // namespace __llvm_libc diff --git a/libc/src/string/strlcat.cpp b/libc/src/string/strlcat.cpp new file mode 100644 index 00000000000000..a718de7698838e --- /dev/null +++ b/libc/src/string/strlcat.cpp @@ -0,0 +1,27 @@ +//===-- Implementation of strlcat -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strlcat.h" +#include "src/string/string_utils.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(size_t, strlcat, + (char *__restrict dst, const char *__restrict src, + size_t size)) { + char *new_dst = reinterpret_cast(internal::find_first_character( + reinterpret_cast(dst), 0, size)); + if (!new_dst) + return size + internal::string_length(src); + size_t first_len = new_dst - dst; + return first_len + internal::strlcpy(new_dst, src, size - first_len); +} + +} // namespace __llvm_libc diff --git a/libc/src/string/strlcat.h b/libc/src/string/strlcat.h new file mode 100644 index 00000000000000..7b073c67068266 --- /dev/null +++ b/libc/src/string/strlcat.h @@ -0,0 +1,20 @@ +//===-- Implementation header for strlcat -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRLCAT_H +#define LLVM_LIBC_SRC_STRING_STRLCAT_H + +#include + +namespace __llvm_libc { + +size_t strlcat(char *__restrict dst, const char *__restrict src, size_t size); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRLCAT_H diff --git a/libc/src/string/strlcpy.cpp b/libc/src/string/strlcpy.cpp index 3cb3dd151108e0..fa1f10290e8ec0 100644 --- a/libc/src/string/strlcpy.cpp +++ b/libc/src/string/strlcpy.cpp @@ -7,9 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/string/strlcpy.h" -#include "src/string/bzero.h" -#include "src/string/memory_utils/memcpy_implementations.h" -#include "src/string/memory_utils/memset_implementations.h" #include "src/string/string_utils.h" #include "src/__support/common.h" @@ -19,13 +16,7 @@ namespace __llvm_libc { LLVM_LIBC_FUNCTION(size_t, strlcpy, (char *__restrict dst, const char *__restrict src, size_t size)) { - size_t len = internal::string_length(src); - if (!size) - return len; - size_t n = len < size - 1 ? len : size - 1; - inline_memcpy(dst, src, n); - inline_memset(dst + n, 0, size - n); - return len; + return internal::strlcpy(dst, src, size); } } // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt index 5bdb47a7e67294..e60f9ab3d3994e 100644 --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -123,6 +123,16 @@ add_libc_unittest( libc.src.string.strdup ) +add_libc_unittest( + strlcat_test + SUITE + libc_string_unittests + SRCS + strlcat_test.cpp + DEPENDS + libc.src.string.strlcat +) + add_libc_unittest( strlcpy_test SUITE diff --git a/libc/test/src/string/strlcat_test.cpp b/libc/test/src/string/strlcat_test.cpp new file mode 100644 index 00000000000000..66db15e5892781 --- /dev/null +++ b/libc/test/src/string/strlcat_test.cpp @@ -0,0 +1,37 @@ +//===-- Unittests for strlcat ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strlcat.h" +#include "utils/UnitTest/Test.h" +#include + +TEST(LlvmLibcStrlcatTest, TooBig) { + const char *str = "cd"; + char buf[4]{"ab"}; + EXPECT_EQ(__llvm_libc::strlcat(buf, str, 3), size_t(4)); + EXPECT_STREQ(buf, "ab"); + EXPECT_EQ(__llvm_libc::strlcat(buf, str, 4), size_t(4)); + EXPECT_STREQ(buf, "abc"); +} + +TEST(LlvmLibcStrlcatTest, Smaller) { + const char *str = "cd"; + char buf[7]{"ab"}; + + EXPECT_EQ(__llvm_libc::strlcat(buf, str, 7), size_t(4)); + EXPECT_STREQ(buf, "abcd"); +} + +TEST(LlvmLibcStrlcatTest, No0) { + const char *str = "cd"; + char buf[7]{"ab"}; + EXPECT_EQ(__llvm_libc::strlcat(buf, str, 1), size_t(3)); + EXPECT_STREQ(buf, "ab"); + EXPECT_EQ(__llvm_libc::strlcat(buf, str, 2), size_t(4)); + EXPECT_STREQ(buf, "ab"); +} From c6c13d4e5fcacd2fc2f32fcecaaf816ccc20c9e5 Mon Sep 17 00:00:00 2001 From: python3kgae Date: Tue, 17 May 2022 16:57:18 -0700 Subject: [PATCH 037/908] [DirectX backend] When cleanup module flags only remove unused flags. Only remove dx.valver from module flags when cleanup module flags in DXILTranslateMetadataPass. Reviewed By: beanz Differential Revision: https://reviews.llvm.org/D125842 --- .../Target/DirectX/DXILTranslateMetadata.cpp | 28 +++++++++++++++++-- llvm/test/CodeGen/DirectX/dxil_ver.ll | 6 ++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index 1dbe424058a667..e587dd53c62adb 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -9,8 +9,10 @@ //===----------------------------------------------------------------------===// #include "DirectX.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" @@ -56,10 +58,32 @@ static VersionTuple loadDXILValidatorVersion(MDNode *ValVerMD) { return VersionTuple(Major, Minor); } -static void cleanModule(Module &M) { - M.getOrInsertModuleFlagsMetadata()->eraseFromParent(); +static void cleanModuleFlags(Module &M) { + constexpr StringLiteral DeadKeys[] = {ValVerKey}; + // Collect DeadKeys in ModuleFlags. + StringSet<> DeadKeySet; + for (auto &Key : DeadKeys) { + if (M.getModuleFlag(Key)) + DeadKeySet.insert(Key); + } + if (DeadKeySet.empty()) + return; + + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + NamedMDNode *MDFlags = M.getModuleFlagsMetadata(); + MDFlags->eraseFromParent(); + // Add ModuleFlag which not dead. + for (auto &Flag : ModuleFlags) { + StringRef Key = Flag.Key->getString(); + if (DeadKeySet.contains(Key)) + continue; + M.addModuleFlag(Flag.Behavior, Key, Flag.Val); + } } +static void cleanModule(Module &M) { cleanModuleFlags(M); } + namespace { class DXILTranslateMetadata : public ModulePass { public: diff --git a/llvm/test/CodeGen/DirectX/dxil_ver.ll b/llvm/test/CodeGen/DirectX/dxil_ver.ll index 3c851ad95ffd81..dcc9b6a797f6a7 100644 --- a/llvm/test/CodeGen/DirectX/dxil_ver.ll +++ b/llvm/test/CodeGen/DirectX/dxil_ver.ll @@ -2,8 +2,14 @@ target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-pc-shadermodel6.3-library" +; Make sure dx.valver metadata is generated. ; CHECK:!dx.valver = !{![[valver:[0-9]+]]} +; Make sure module flags still exist and only have 1 operand left. +; CHECK:!llvm.module.flags = !{{{![0-9]}}} +; Make sure validator version is 1.1. ; CHECK:![[valver]] = !{i32 1, i32 1} +; Make sure wchar_size still exist. +; CHECK:!{i32 1, !"wchar_size", i32 4} !llvm.module.flags = !{!0, !1} !llvm.ident = !{!3} From 3fa1b6557d08a148ef853c2a761f1c43e09fef5e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 19 May 2022 13:45:43 -0700 Subject: [PATCH 038/908] [TableGen] Add generation of argument register lists There are cases, like with -fzero-call-used-regs, where we need to know which registers can be used by a certain calling convention. This change generates a list of registers used by each calling convention defined in *CallingConv.td. Calling conventions that use registers conditioned on Swift have those registers placed in a separate list. This allows us to be flexible about whether to use the Swift registers or not. Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D125421 --- llvm/utils/TableGen/CallingConvEmitter.cpp | 138 +++++++++++++++++++-- 1 file changed, 127 insertions(+), 11 deletions(-) diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index fe5b13eda122f1..279a01e4ac6aa1 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -20,6 +20,14 @@ using namespace llvm; namespace { class CallingConvEmitter { RecordKeeper &Records; + unsigned Counter; + std::string CurrentAction; + bool SwiftAction; + + std::map> AssignedRegsMap; + std::map> AssignedSwiftRegsMap; + std::map> DelegateToMap; + public: explicit CallingConvEmitter(RecordKeeper &R) : Records(R) {} @@ -28,7 +36,7 @@ class CallingConvEmitter { private: void EmitCallingConv(Record *CC, raw_ostream &O); void EmitAction(Record *Action, unsigned Indent, raw_ostream &O); - unsigned Counter; + void EmitArgRegisterLists(raw_ostream &O); }; } // End anonymous namespace @@ -38,6 +46,7 @@ void CallingConvEmitter::run(raw_ostream &O) { // Emit prototypes for all of the non-custom CC's so that they can forward ref // each other. Records.startTimer("Emit prototypes"); + O << "#ifndef GET_CC_REGISTER_LISTS\n\n"; for (Record *CC : CCs) { if (!CC->getValueAsBit("Custom")) { unsigned Pad = CC->getName().size(); @@ -58,18 +67,28 @@ void CallingConvEmitter::run(raw_ostream &O) { // Emit each non-custom calling convention description in full. Records.startTimer("Emit full descriptions"); for (Record *CC : CCs) { - if (!CC->getValueAsBit("Custom")) + if (!CC->getValueAsBit("Custom")) { EmitCallingConv(CC, O); + } } -} + EmitArgRegisterLists(O); + + O << "\n#endif // CC_REGISTER_LIST\n"; +} void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { ListInit *CCActions = CC->getValueAsListInit("Actions"); Counter = 0; + CurrentAction = CC->getName().str(); + // Call upon the creation of a map entry from the void! + // We want an entry in AssignedRegsMap for every action, even if that + // entry is empty. + AssignedRegsMap[CurrentAction] = {}; + O << "\n\n"; - unsigned Pad = CC->getName().size(); + unsigned Pad = CurrentAction.size(); if (CC->getValueAsBit("Entry")) { O << "bool llvm::"; Pad += 12; @@ -77,13 +96,21 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { O << "static bool "; Pad += 13; } - O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n" + O << CurrentAction << "(unsigned ValNo, MVT ValVT,\n" << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n" << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n"; // Emit all of the actions, in order. for (unsigned i = 0, e = CCActions->size(); i != e; ++i) { + Record *Action = CCActions->getElementAsRecord(i); + SwiftAction = llvm::any_of(Action->getSuperClasses(), + [](const std::pair &Class) { + StringRef Name = + Class.first->getNameInitAsString(); + return Name.startswith("CCIfSwift"); + }); + O << "\n"; - EmitAction(CCActions->getElementAsRecord(i), 2, O); + EmitAction(Action, 2, O); } O << "\n return true; // CC didn't match.\n"; @@ -93,7 +120,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, raw_ostream &O) { std::string IndentStr = std::string(Indent, ' '); - + if (Action->isSubClassOf("CCPredicateAction")) { O << IndentStr << "if ("; @@ -121,18 +148,30 @@ void CallingConvEmitter::EmitAction(Record *Action, O << IndentStr << "if (!" << CC->getName() << "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n" << IndentStr << " return false;\n"; + DelegateToMap[CurrentAction].insert(CC->getName().str()); } else if (Action->isSubClassOf("CCAssignToReg")) { ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { - O << IndentStr << "if (unsigned Reg = State.AllocateReg("; - O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n"; + std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); + O << IndentStr << "if (unsigned Reg = State.AllocateReg(" << Name + << ")) {\n"; + if (SwiftAction) + AssignedSwiftRegsMap[CurrentAction].insert(Name); + else + AssignedRegsMap[CurrentAction].insert(Name); } else { O << IndentStr << "static const MCPhysReg RegList" << ++Counter << "[] = {\n"; O << IndentStr << " "; ListSeparator LS; - for (unsigned i = 0, e = RegList->size(); i != e; ++i) - O << LS << getQualifiedName(RegList->getElementAsRecord(i)); + for (unsigned i = 0, e = RegList->size(); i != e; ++i) { + std::string Name = getQualifiedName(RegList->getElementAsRecord(i)); + if (SwiftAction) + AssignedSwiftRegsMap[CurrentAction].insert(Name); + else + AssignedRegsMap[CurrentAction].insert(Name); + O << LS << Name; + } O << "\n" << IndentStr << "};\n"; O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList" << Counter << ")) {\n"; @@ -287,6 +326,83 @@ void CallingConvEmitter::EmitAction(Record *Action, } } +void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) { + // Transitively merge all delegated CCs into AssignedRegsMap. + using EntryTy = std::pair>; + bool Redo; + do { + Redo = false; + std::deque Worklist(DelegateToMap.begin(), DelegateToMap.end()); + + while (!Worklist.empty()) { + EntryTy Entry = Worklist.front(); + Worklist.pop_front(); + + const std::string &CCName = Entry.first; + std::set &Registers = Entry.second; + if (!Registers.empty()) + continue; + + for (auto &InnerEntry : Worklist) { + const std::string &InnerCCName = InnerEntry.first; + std::set &InnerRegisters = InnerEntry.second; + + if (InnerRegisters.find(CCName) != InnerRegisters.end()) { + AssignedRegsMap[InnerCCName].insert( + AssignedRegsMap[CCName].begin(), + AssignedRegsMap[CCName].end()); + InnerRegisters.erase(CCName); + } + } + + DelegateToMap.erase(CCName); + Redo = true; + } + } while (Redo); + + if (AssignedRegsMap.empty()) + return; + + O << "\n#else\n\n"; + + for (auto &Entry : AssignedRegsMap) { + const std::string &RegName = Entry.first; + std::set &Registers = Entry.second; + + if (RegName.empty()) + continue; + + O << "const MCRegister " << Entry.first << "_ArgRegs[] = { "; + + if (Registers.empty()) { + O << "0"; + } else { + ListSeparator LS; + for (const std::string &Reg : Registers) + O << LS << Reg; + } + + O << " };\n"; + } + + if (AssignedSwiftRegsMap.empty()) + return; + + O << "\n// Registers used by Swift.\n"; + for (auto &Entry : AssignedSwiftRegsMap) { + const std::string &RegName = Entry.first; + std::set &Registers = Entry.second; + + O << "const MCRegister " << RegName << "_Swift_ArgRegs[] = { "; + + ListSeparator LS; + for (const std::string &Reg : Registers) + O << LS << Reg; + + O << " };\n"; + } +} + namespace llvm { void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS) { From fa6aed2abd5028134068d9b5cc6bfec1a86f747c Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Thu, 19 May 2022 13:47:36 -0700 Subject: [PATCH 039/908] [mlir][sparse] Using the name "dimSizes" more consistently Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D126016 --- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 116 +++++++++--------- 1 file changed, 59 insertions(+), 57 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 21ad81525a1e1f..11a40b7f4d88b9 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -85,21 +85,22 @@ static inline uint64_t checkedMul(uint64_t lhs, uint64_t rhs) { } // TODO: adjust this so it can be used by `openSparseTensorCOO` too. -// That version doesn't have the permutation, and the `sizes` are +// That version doesn't have the permutation, and the `dimSizes` are // a pointer/C-array rather than `std::vector`. // -/// Asserts that the `sizes` (in target-order) under the `perm` (mapping +/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping /// semantic-order to target-order) are a refinement of the desired `shape` /// (in semantic-order). /// /// Precondition: `perm` and `shape` must be valid for `rank`. static inline void -assertPermutedSizesMatchShape(const std::vector &sizes, uint64_t rank, - const uint64_t *perm, const uint64_t *shape) { +assertPermutedSizesMatchShape(const std::vector &dimSizes, + uint64_t rank, const uint64_t *perm, + const uint64_t *shape) { assert(perm && shape); - assert(rank == sizes.size() && "Rank mismatch"); + assert(rank == dimSizes.size() && "Rank mismatch"); for (uint64_t r = 0; r < rank; r++) - assert((shape[r] == 0 || shape[r] == sizes[perm[r]]) && + assert((shape[r] == 0 || shape[r] == dimSizes[perm[r]]) && "Dimension size mismatch"); } @@ -133,8 +134,8 @@ using ElementConsumer = template struct SparseTensorCOO final { public: - SparseTensorCOO(const std::vector &szs, uint64_t capacity) - : sizes(szs) { + SparseTensorCOO(const std::vector &dimSizes, uint64_t capacity) + : dimSizes(dimSizes) { if (capacity) { elements.reserve(capacity); indices.reserve(capacity * getRank()); @@ -147,9 +148,9 @@ struct SparseTensorCOO final { uint64_t *base = indices.data(); uint64_t size = indices.size(); uint64_t rank = getRank(); - assert(rank == ind.size()); + assert(ind.size() == rank && "Element rank mismatch"); for (uint64_t r = 0; r < rank; r++) { - assert(ind[r] < sizes[r]); // within bounds + assert(ind[r] < dimSizes[r] && "Index is too large for the dimension"); indices.push_back(ind[r]); } // This base only changes if indices were reallocated. In that case, we @@ -184,13 +185,13 @@ struct SparseTensorCOO final { }); } - /// Returns rank. - uint64_t getRank() const { return sizes.size(); } + /// Get the rank of the tensor. + uint64_t getRank() const { return dimSizes.size(); } - /// Getter for sizes array. - const std::vector &getSizes() const { return sizes; } + /// Getter for the dimension-sizes array. + const std::vector &getDimSizes() const { return dimSizes; } - /// Getter for elements array. + /// Getter for the elements array. const std::vector> &getElements() const { return elements; } /// Switch into iterator mode. @@ -213,23 +214,23 @@ struct SparseTensorCOO final { /// that same ordering for the given indices. The result is a /// fully permuted coordinate scheme. /// - /// Precondition: `sizes` and `perm` must be valid for `rank`. + /// Precondition: `dimSizes` and `perm` must be valid for `rank`. static SparseTensorCOO *newSparseTensorCOO(uint64_t rank, - const uint64_t *sizes, + const uint64_t *dimSizes, const uint64_t *perm, uint64_t capacity = 0) { std::vector permsz(rank); for (uint64_t r = 0; r < rank; r++) { - assert(sizes[r] > 0 && "Dimension size zero has trivial storage"); - permsz[perm[r]] = sizes[r]; + assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); + permsz[perm[r]] = dimSizes[r]; } return new SparseTensorCOO(permsz, capacity); } private: - const std::vector sizes; // per-dimension sizes - std::vector> elements; // all COO elements - std::vector indices; // shared index pool + const std::vector dimSizes; // per-dimension sizes + std::vector> elements; // all COO elements + std::vector indices; // shared index pool bool iteratorLocked = false; unsigned iteratorPos = 0; }; @@ -271,12 +272,12 @@ class SparseTensorStorageBase { public: /// Constructs a new storage object. The `perm` maps the tensor's /// semantic-ordering of dimensions to this object's storage-order. - /// The `szs` and `sparsity` arrays are already in storage-order. + /// The `dimSizes` and `sparsity` arrays are already in storage-order. /// - /// Precondition: `perm` and `sparsity` must be valid for `szs.size()`. - SparseTensorStorageBase(const std::vector &szs, + /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. + SparseTensorStorageBase(const std::vector &dimSizes, const uint64_t *perm, const DimLevelType *sparsity) - : dimSizes(szs), rev(getRank()), + : dimSizes(dimSizes), rev(getRank()), dimTypes(sparsity, sparsity + getRank()) { assert(perm && sparsity); const uint64_t rank = getRank(); @@ -400,10 +401,10 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// valid state after this constructor alone; e.g., `isCompressedDim(d)` /// doesn't entail `!(pointers[d].empty())`. /// - /// Precondition: `perm` and `sparsity` must be valid for `szs.size()`. - SparseTensorStorage(const std::vector &szs, const uint64_t *perm, - const DimLevelType *sparsity) - : SparseTensorStorageBase(szs, perm, sparsity), pointers(getRank()), + /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. + SparseTensorStorage(const std::vector &dimSizes, + const uint64_t *perm, const DimLevelType *sparsity) + : SparseTensorStorageBase(dimSizes, perm, sparsity), pointers(getRank()), indices(getRank()), idx(getRank()) {} public: @@ -411,10 +412,11 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// permutation, and per-dimension dense/sparse annotations, using /// the coordinate scheme tensor for the initial contents if provided. /// - /// Precondition: `perm` and `sparsity` must be valid for `szs.size()`. - SparseTensorStorage(const std::vector &szs, const uint64_t *perm, - const DimLevelType *sparsity, SparseTensorCOO *coo) - : SparseTensorStorage(szs, perm, sparsity) { + /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. + SparseTensorStorage(const std::vector &dimSizes, + const uint64_t *perm, const DimLevelType *sparsity, + SparseTensorCOO *coo) + : SparseTensorStorage(dimSizes, perm, sparsity) { // Provide hints on capacity of pointers and indices. // TODO: needs much fine-tuning based on actual sparsity; currently // we reserve pointer/index space based on all previous dense @@ -424,7 +426,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { uint64_t sz = 1; for (uint64_t r = 0, rank = getRank(); r < rank; r++) { if (isCompressedDim(r)) { - // TODO: Take a parameter between 1 and `sizes[r]`, and multiply + // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply // `sz` by that before reserving. (For now we just use 1.) pointers[r].reserve(sz + 1); pointers[r].push_back(0); @@ -438,7 +440,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { // Then assign contents from coordinate scheme tensor if provided. if (coo) { // Ensure both preconditions of `fromCOO`. - assert(coo->getSizes() == getDimSizes() && "Tensor size mismatch"); + assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch"); coo->sort(); // Now actually insert the `elements`. const std::vector> &elements = coo->getElements(); @@ -455,10 +457,10 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// the given sparse tensor for the initial contents. /// /// Preconditions: - /// * `perm` and `sparsity` must be valid for `szs.size()`. + /// * `perm` and `sparsity` must be valid for `dimSizes.size()`. /// * The `tensor` must have the same value type `V`. - SparseTensorStorage(const std::vector &szs, const uint64_t *perm, - const DimLevelType *sparsity, + SparseTensorStorage(const std::vector &dimSizes, + const uint64_t *perm, const DimLevelType *sparsity, const SparseTensorStorageBase &tensor); ~SparseTensorStorage() final override = default; @@ -562,7 +564,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const DimLevelType *sparsity, SparseTensorCOO *coo) { SparseTensorStorage *n = nullptr; if (coo) { - const auto &coosz = coo->getSizes(); + const auto &coosz = coo->getDimSizes(); assertPermutedSizesMatchShape(coosz, rank, perm, shape); n = new SparseTensorStorage(coosz, perm, sparsity, coo); } else { @@ -615,7 +617,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// sense. For non-dense dimensions, that means appending to the /// `indices[d]` array, checking that `i` is representable in the `I` /// type; however, we do not verify other semantic requirements (e.g., - /// that `i` is in bounds for `sizes[d]`, and not previously occurring + /// that `i` is in bounds for `dimSizes[d]`, and not previously occurring /// in the same segment). For dense dimensions, this method instead /// appends the appropriate number of zeros to the `values` array, /// where `full` is the number of "entries" already written to `values` @@ -639,7 +641,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Writes the given coordinate to `indices[d][pos]`. This method /// checks that `i` is representable in the `I` type; however, it /// does not check that `i` is semantically valid (i.e., in bounds - /// for `sizes[d]` and not elsewhere occurring in the same segment). + /// for `dimSizes[d]` and not elsewhere occurring in the same segment). void writeIndex(uint64_t d, uint64_t pos, uint64_t i) { assert(isCompressedDim(d)); // Subscript assignment to `std::vector` requires that the `pos`-th @@ -672,7 +674,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// /// Preconditions: /// (1) the `elements` must be lexicographically sorted. - /// (2) the indices of every element are valid for `sizes` (equal rank + /// (2) the indices of every element are valid for `dimSizes` (equal rank /// and pointwise less-than). void fromCOO(const std::vector> &elements, uint64_t lo, uint64_t hi, uint64_t d) { @@ -804,12 +806,12 @@ class SparseTensorEnumeratorBase { cursor(getRank()) { assert(perm && "Received nullptr for permutation"); assert(rank == getRank() && "Permutation rank mismatch"); - const auto &rev = src.getRev(); // source stg-order -> semantic-order - const auto &sizes = src.getDimSizes(); // in source storage-order - for (uint64_t s = 0; s < rank; s++) { // `s` source storage-order - uint64_t t = perm[rev[s]]; // `t` target-order + const auto &rev = src.getRev(); // source-order -> semantic-order + const auto &dimSizes = src.getDimSizes(); // in source storage-order + for (uint64_t s = 0; s < rank; s++) { // `s` source storage-order + uint64_t t = perm[rev[s]]; // `t` target-order reord[s] = t; - permsz[t] = sizes[s]; + permsz[t] = dimSizes[s]; } } @@ -917,10 +919,10 @@ class SparseTensorNNZ final { /// does not actually populate the statistics, however; for that see /// `initialize`. /// - /// Precondition: `szs` must not contain zeros. - SparseTensorNNZ(const std::vector &szs, + /// Precondition: `dimSizes` must not contain zeros. + SparseTensorNNZ(const std::vector &dimSizes, const std::vector &sparsity) - : dimSizes(szs), dimTypes(sparsity), nnz(getRank()) { + : dimSizes(dimSizes), dimTypes(sparsity), nnz(getRank()) { assert(dimSizes.size() == dimTypes.size() && "Rank mismatch"); bool uncompressed = true; uint64_t sz = 1; // the product of all `dimSizes` strictly less than `r`. @@ -1017,9 +1019,9 @@ class SparseTensorNNZ final { template SparseTensorStorage::SparseTensorStorage( - const std::vector &szs, const uint64_t *perm, + const std::vector &dimSizes, const uint64_t *perm, const DimLevelType *sparsity, const SparseTensorStorageBase &tensor) - : SparseTensorStorage(szs, perm, sparsity) { + : SparseTensorStorage(dimSizes, perm, sparsity) { SparseTensorEnumeratorBase *enumerator; tensor.newEnumerator(&enumerator, getRank(), perm); { @@ -1256,7 +1258,7 @@ static void outSparseTensor(void *tensor, void *dest, bool sort) { if (sort) coo->sort(); char *filename = static_cast(dest); - auto &sizes = coo->getSizes(); + auto &dimSizes = coo->getDimSizes(); auto &elements = coo->getElements(); uint64_t rank = coo->getRank(); uint64_t nnz = elements.size(); @@ -1265,8 +1267,8 @@ static void outSparseTensor(void *tensor, void *dest, bool sort) { assert(file.is_open()); file << "; extended FROSTT format\n" << rank << " " << nnz << std::endl; for (uint64_t r = 0; r < rank - 1; r++) - file << sizes[r] << " "; - file << sizes[rank - 1] << std::endl; + file << dimSizes[r] << " "; + file << dimSizes[rank - 1] << std::endl; for (uint64_t i = 0; i < nnz; i++) { auto &idx = elements[i].indices; for (uint64_t r = 0; r < rank; r++) @@ -1340,7 +1342,7 @@ static void fromMLIRSparseTensor(void *tensor, uint64_t *pRank, uint64_t *pNse, uint64_t *shape = new uint64_t[rank]; for (uint64_t i = 0; i < rank; i++) - shape[i] = coo->getSizes()[i]; + shape[i] = coo->getDimSizes()[i]; V *values = new V[nse]; uint64_t *indices = new uint64_t[rank * nse]; From aff9c89fabb3a50e6da31fb739b22ad2ed76bf62 Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Thu, 19 May 2022 14:04:35 -0700 Subject: [PATCH 040/908] [mlir][sparse] Simplifying closure By closing over the `rank` itself rather than `this`, we save a method call on each iteration. A minor optimization, but one that adds up. Depends On D126016 Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D126019 --- mlir/lib/ExecutionEngine/SparseTensorUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 11a40b7f4d88b9..53d26d5af68943 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -173,9 +173,9 @@ struct SparseTensorCOO final { assert(!iteratorLocked && "Attempt to sort() after startIterator()"); // TODO: we may want to cache an `isSorted` bit, to avoid // unnecessary/redundant sorting. + uint64_t rank = getRank(); std::sort(elements.begin(), elements.end(), - [this](const Element &e1, const Element &e2) { - uint64_t rank = getRank(); + [rank](const Element &e1, const Element &e2) { for (uint64_t r = 0; r < rank; r++) { if (e1.indices[r] == e2.indices[r]) continue; From db0ea51c830af095b8e9ff1b9ba23f87b10cf3f5 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 19 May 2022 15:16:08 -0700 Subject: [PATCH 041/908] Revert "[TableGen] Add generation of argument register lists" There are build bot failures. This reverts commit 3fa1b6557d08a148ef853c2a761f1c43e09fef5e. --- llvm/utils/TableGen/CallingConvEmitter.cpp | 138 ++------------------- 1 file changed, 11 insertions(+), 127 deletions(-) diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index 279a01e4ac6aa1..fe5b13eda122f1 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -20,14 +20,6 @@ using namespace llvm; namespace { class CallingConvEmitter { RecordKeeper &Records; - unsigned Counter; - std::string CurrentAction; - bool SwiftAction; - - std::map> AssignedRegsMap; - std::map> AssignedSwiftRegsMap; - std::map> DelegateToMap; - public: explicit CallingConvEmitter(RecordKeeper &R) : Records(R) {} @@ -36,7 +28,7 @@ class CallingConvEmitter { private: void EmitCallingConv(Record *CC, raw_ostream &O); void EmitAction(Record *Action, unsigned Indent, raw_ostream &O); - void EmitArgRegisterLists(raw_ostream &O); + unsigned Counter; }; } // End anonymous namespace @@ -46,7 +38,6 @@ void CallingConvEmitter::run(raw_ostream &O) { // Emit prototypes for all of the non-custom CC's so that they can forward ref // each other. Records.startTimer("Emit prototypes"); - O << "#ifndef GET_CC_REGISTER_LISTS\n\n"; for (Record *CC : CCs) { if (!CC->getValueAsBit("Custom")) { unsigned Pad = CC->getName().size(); @@ -67,28 +58,18 @@ void CallingConvEmitter::run(raw_ostream &O) { // Emit each non-custom calling convention description in full. Records.startTimer("Emit full descriptions"); for (Record *CC : CCs) { - if (!CC->getValueAsBit("Custom")) { + if (!CC->getValueAsBit("Custom")) EmitCallingConv(CC, O); - } } - - EmitArgRegisterLists(O); - - O << "\n#endif // CC_REGISTER_LIST\n"; } + void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { ListInit *CCActions = CC->getValueAsListInit("Actions"); Counter = 0; - CurrentAction = CC->getName().str(); - // Call upon the creation of a map entry from the void! - // We want an entry in AssignedRegsMap for every action, even if that - // entry is empty. - AssignedRegsMap[CurrentAction] = {}; - O << "\n\n"; - unsigned Pad = CurrentAction.size(); + unsigned Pad = CC->getName().size(); if (CC->getValueAsBit("Entry")) { O << "bool llvm::"; Pad += 12; @@ -96,21 +77,13 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { O << "static bool "; Pad += 13; } - O << CurrentAction << "(unsigned ValNo, MVT ValVT,\n" + O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n" << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n" << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n"; // Emit all of the actions, in order. for (unsigned i = 0, e = CCActions->size(); i != e; ++i) { - Record *Action = CCActions->getElementAsRecord(i); - SwiftAction = llvm::any_of(Action->getSuperClasses(), - [](const std::pair &Class) { - StringRef Name = - Class.first->getNameInitAsString(); - return Name.startswith("CCIfSwift"); - }); - O << "\n"; - EmitAction(Action, 2, O); + EmitAction(CCActions->getElementAsRecord(i), 2, O); } O << "\n return true; // CC didn't match.\n"; @@ -120,7 +93,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, raw_ostream &O) { std::string IndentStr = std::string(Indent, ' '); - + if (Action->isSubClassOf("CCPredicateAction")) { O << IndentStr << "if ("; @@ -148,30 +121,18 @@ void CallingConvEmitter::EmitAction(Record *Action, O << IndentStr << "if (!" << CC->getName() << "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n" << IndentStr << " return false;\n"; - DelegateToMap[CurrentAction].insert(CC->getName().str()); } else if (Action->isSubClassOf("CCAssignToReg")) { ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { - std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); - O << IndentStr << "if (unsigned Reg = State.AllocateReg(" << Name - << ")) {\n"; - if (SwiftAction) - AssignedSwiftRegsMap[CurrentAction].insert(Name); - else - AssignedRegsMap[CurrentAction].insert(Name); + O << IndentStr << "if (unsigned Reg = State.AllocateReg("; + O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n"; } else { O << IndentStr << "static const MCPhysReg RegList" << ++Counter << "[] = {\n"; O << IndentStr << " "; ListSeparator LS; - for (unsigned i = 0, e = RegList->size(); i != e; ++i) { - std::string Name = getQualifiedName(RegList->getElementAsRecord(i)); - if (SwiftAction) - AssignedSwiftRegsMap[CurrentAction].insert(Name); - else - AssignedRegsMap[CurrentAction].insert(Name); - O << LS << Name; - } + for (unsigned i = 0, e = RegList->size(); i != e; ++i) + O << LS << getQualifiedName(RegList->getElementAsRecord(i)); O << "\n" << IndentStr << "};\n"; O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList" << Counter << ")) {\n"; @@ -326,83 +287,6 @@ void CallingConvEmitter::EmitAction(Record *Action, } } -void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) { - // Transitively merge all delegated CCs into AssignedRegsMap. - using EntryTy = std::pair>; - bool Redo; - do { - Redo = false; - std::deque Worklist(DelegateToMap.begin(), DelegateToMap.end()); - - while (!Worklist.empty()) { - EntryTy Entry = Worklist.front(); - Worklist.pop_front(); - - const std::string &CCName = Entry.first; - std::set &Registers = Entry.second; - if (!Registers.empty()) - continue; - - for (auto &InnerEntry : Worklist) { - const std::string &InnerCCName = InnerEntry.first; - std::set &InnerRegisters = InnerEntry.second; - - if (InnerRegisters.find(CCName) != InnerRegisters.end()) { - AssignedRegsMap[InnerCCName].insert( - AssignedRegsMap[CCName].begin(), - AssignedRegsMap[CCName].end()); - InnerRegisters.erase(CCName); - } - } - - DelegateToMap.erase(CCName); - Redo = true; - } - } while (Redo); - - if (AssignedRegsMap.empty()) - return; - - O << "\n#else\n\n"; - - for (auto &Entry : AssignedRegsMap) { - const std::string &RegName = Entry.first; - std::set &Registers = Entry.second; - - if (RegName.empty()) - continue; - - O << "const MCRegister " << Entry.first << "_ArgRegs[] = { "; - - if (Registers.empty()) { - O << "0"; - } else { - ListSeparator LS; - for (const std::string &Reg : Registers) - O << LS << Reg; - } - - O << " };\n"; - } - - if (AssignedSwiftRegsMap.empty()) - return; - - O << "\n// Registers used by Swift.\n"; - for (auto &Entry : AssignedSwiftRegsMap) { - const std::string &RegName = Entry.first; - std::set &Registers = Entry.second; - - O << "const MCRegister " << RegName << "_Swift_ArgRegs[] = { "; - - ListSeparator LS; - for (const std::string &Reg : Registers) - O << LS << Reg; - - O << " };\n"; - } -} - namespace llvm { void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS) { From 88043c1958cb18f970095fa08b34a80c774cc2a9 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 19 May 2022 13:45:43 -0700 Subject: [PATCH 042/908] [TableGen] Add generation of argument register lists There are cases, like with -fzero-call-used-regs, where we need to know which registers can be used by a certain calling convention. This change generates a list of registers used by each calling convention defined in *CallingConv.td. Calling conventions that use registers conditioned on Swift have those registers placed in a separate list. This allows us to be flexible about whether to use the Swift registers or not. Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D125421 --- llvm/utils/TableGen/CallingConvEmitter.cpp | 138 +++++++++++++++++++-- 1 file changed, 127 insertions(+), 11 deletions(-) diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index fe5b13eda122f1..8f080cd250abcb 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -20,6 +20,14 @@ using namespace llvm; namespace { class CallingConvEmitter { RecordKeeper &Records; + unsigned Counter; + std::string CurrentAction; + bool SwiftAction; + + std::map> AssignedRegsMap; + std::map> AssignedSwiftRegsMap; + std::map> DelegateToMap; + public: explicit CallingConvEmitter(RecordKeeper &R) : Records(R) {} @@ -28,7 +36,7 @@ class CallingConvEmitter { private: void EmitCallingConv(Record *CC, raw_ostream &O); void EmitAction(Record *Action, unsigned Indent, raw_ostream &O); - unsigned Counter; + void EmitArgRegisterLists(raw_ostream &O); }; } // End anonymous namespace @@ -38,6 +46,7 @@ void CallingConvEmitter::run(raw_ostream &O) { // Emit prototypes for all of the non-custom CC's so that they can forward ref // each other. Records.startTimer("Emit prototypes"); + O << "#ifndef GET_CC_REGISTER_LISTS\n\n"; for (Record *CC : CCs) { if (!CC->getValueAsBit("Custom")) { unsigned Pad = CC->getName().size(); @@ -58,18 +67,28 @@ void CallingConvEmitter::run(raw_ostream &O) { // Emit each non-custom calling convention description in full. Records.startTimer("Emit full descriptions"); for (Record *CC : CCs) { - if (!CC->getValueAsBit("Custom")) + if (!CC->getValueAsBit("Custom")) { EmitCallingConv(CC, O); + } } -} + EmitArgRegisterLists(O); + + O << "\n#endif // CC_REGISTER_LIST\n"; +} void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { ListInit *CCActions = CC->getValueAsListInit("Actions"); Counter = 0; + CurrentAction = CC->getName().str(); + // Call upon the creation of a map entry from the void! + // We want an entry in AssignedRegsMap for every action, even if that + // entry is empty. + AssignedRegsMap[CurrentAction] = {}; + O << "\n\n"; - unsigned Pad = CC->getName().size(); + unsigned Pad = CurrentAction.size(); if (CC->getValueAsBit("Entry")) { O << "bool llvm::"; Pad += 12; @@ -77,13 +96,21 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { O << "static bool "; Pad += 13; } - O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n" + O << CurrentAction << "(unsigned ValNo, MVT ValVT,\n" << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n" << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n"; // Emit all of the actions, in order. for (unsigned i = 0, e = CCActions->size(); i != e; ++i) { + Record *Action = CCActions->getElementAsRecord(i); + SwiftAction = llvm::any_of(Action->getSuperClasses(), + [](const std::pair &Class) { + std::string Name = + Class.first->getNameInitAsString(); + return StringRef(Name).startswith("CCIfSwift"); + }); + O << "\n"; - EmitAction(CCActions->getElementAsRecord(i), 2, O); + EmitAction(Action, 2, O); } O << "\n return true; // CC didn't match.\n"; @@ -93,7 +120,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, raw_ostream &O) { std::string IndentStr = std::string(Indent, ' '); - + if (Action->isSubClassOf("CCPredicateAction")) { O << IndentStr << "if ("; @@ -121,18 +148,30 @@ void CallingConvEmitter::EmitAction(Record *Action, O << IndentStr << "if (!" << CC->getName() << "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n" << IndentStr << " return false;\n"; + DelegateToMap[CurrentAction].insert(CC->getName().str()); } else if (Action->isSubClassOf("CCAssignToReg")) { ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { - O << IndentStr << "if (unsigned Reg = State.AllocateReg("; - O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n"; + std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); + O << IndentStr << "if (unsigned Reg = State.AllocateReg(" << Name + << ")) {\n"; + if (SwiftAction) + AssignedSwiftRegsMap[CurrentAction].insert(Name); + else + AssignedRegsMap[CurrentAction].insert(Name); } else { O << IndentStr << "static const MCPhysReg RegList" << ++Counter << "[] = {\n"; O << IndentStr << " "; ListSeparator LS; - for (unsigned i = 0, e = RegList->size(); i != e; ++i) - O << LS << getQualifiedName(RegList->getElementAsRecord(i)); + for (unsigned i = 0, e = RegList->size(); i != e; ++i) { + std::string Name = getQualifiedName(RegList->getElementAsRecord(i)); + if (SwiftAction) + AssignedSwiftRegsMap[CurrentAction].insert(Name); + else + AssignedRegsMap[CurrentAction].insert(Name); + O << LS << Name; + } O << "\n" << IndentStr << "};\n"; O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList" << Counter << ")) {\n"; @@ -287,6 +326,83 @@ void CallingConvEmitter::EmitAction(Record *Action, } } +void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) { + // Transitively merge all delegated CCs into AssignedRegsMap. + using EntryTy = std::pair>; + bool Redo; + do { + Redo = false; + std::deque Worklist(DelegateToMap.begin(), DelegateToMap.end()); + + while (!Worklist.empty()) { + EntryTy Entry = Worklist.front(); + Worklist.pop_front(); + + const std::string &CCName = Entry.first; + std::set &Registers = Entry.second; + if (!Registers.empty()) + continue; + + for (auto &InnerEntry : Worklist) { + const std::string &InnerCCName = InnerEntry.first; + std::set &InnerRegisters = InnerEntry.second; + + if (InnerRegisters.find(CCName) != InnerRegisters.end()) { + AssignedRegsMap[InnerCCName].insert( + AssignedRegsMap[CCName].begin(), + AssignedRegsMap[CCName].end()); + InnerRegisters.erase(CCName); + } + } + + DelegateToMap.erase(CCName); + Redo = true; + } + } while (Redo); + + if (AssignedRegsMap.empty()) + return; + + O << "\n#else\n\n"; + + for (auto &Entry : AssignedRegsMap) { + const std::string &RegName = Entry.first; + std::set &Registers = Entry.second; + + if (RegName.empty()) + continue; + + O << "const MCRegister " << Entry.first << "_ArgRegs[] = { "; + + if (Registers.empty()) { + O << "0"; + } else { + ListSeparator LS; + for (const std::string &Reg : Registers) + O << LS << Reg; + } + + O << " };\n"; + } + + if (AssignedSwiftRegsMap.empty()) + return; + + O << "\n// Registers used by Swift.\n"; + for (auto &Entry : AssignedSwiftRegsMap) { + const std::string &RegName = Entry.first; + std::set &Registers = Entry.second; + + O << "const MCRegister " << RegName << "_Swift_ArgRegs[] = { "; + + ListSeparator LS; + for (const std::string &Reg : Registers) + O << LS << Reg; + + O << " };\n"; + } +} + namespace llvm { void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS) { From 774674ce9abbae6538d404ae1187d7b8a0fd120d Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Thu, 19 May 2022 15:01:23 -0700 Subject: [PATCH 043/908] [mlir][sparse] Factored out a "FATAL" macro for unrecoverable assertion failure Depends On D126019 Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D126022 --- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 156 +++++++++--------- 1 file changed, 79 insertions(+), 77 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 53d26d5af68943..d278e4e18e9d27 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -84,6 +84,17 @@ static inline uint64_t checkedMul(uint64_t lhs, uint64_t rhs) { return lhs * rhs; } +// This macro helps minimize repetition of this idiom, as well as ensuring +// we have some additional output indicating where the error is coming from. +// (Since `fprintf` doesn't provide a stacktrace, this helps make it easier +// to track down whether an error is coming from our code vs somewhere else +// in MLIR.) +#define FATAL(...) \ + { \ + fprintf(stderr, "SparseTensorUtils: " __VA_ARGS__); \ + exit(1); \ + } + // TODO: adjust this so it can be used by `openSparseTensorCOO` too. // That version doesn't have the permutation, and the `dimSizes` are // a pointer/C-array rather than `std::vector`. @@ -262,6 +273,11 @@ struct SparseTensorCOO final { template class SparseTensorEnumeratorBase; +// Helper macro for generating error messages when some +// `SparseTensorStorage` is cast to `SparseTensorStorageBase` +// and then the wrong "partial method specialization" is called. +#define FATAL_PIV(NAME) FATAL(" type mismatch for: " #NAME); + /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects /// of the tensor (e.g., shape, sparsity, permutation). In addition, @@ -325,37 +341,53 @@ class SparseTensorStorageBase { #define DECL_NEWENUMERATOR(VNAME, V) \ virtual void newEnumerator(SparseTensorEnumeratorBase **, uint64_t, \ const uint64_t *) const { \ - fatal("newEnumerator" #VNAME); \ + FATAL_PIV("newEnumerator" #VNAME); \ } FOREVERY_V(DECL_NEWENUMERATOR) #undef DECL_NEWENUMERATOR /// Overhead storage. - virtual void getPointers(std::vector **, uint64_t) { fatal("p64"); } - virtual void getPointers(std::vector **, uint64_t) { fatal("p32"); } - virtual void getPointers(std::vector **, uint64_t) { fatal("p16"); } - virtual void getPointers(std::vector **, uint64_t) { fatal("p8"); } - virtual void getIndices(std::vector **, uint64_t) { fatal("i64"); } - virtual void getIndices(std::vector **, uint64_t) { fatal("i32"); } - virtual void getIndices(std::vector **, uint64_t) { fatal("i16"); } - virtual void getIndices(std::vector **, uint64_t) { fatal("i8"); } + virtual void getPointers(std::vector **, uint64_t) { + FATAL_PIV("p64"); + } + virtual void getPointers(std::vector **, uint64_t) { + FATAL_PIV("p32"); + } + virtual void getPointers(std::vector **, uint64_t) { + FATAL_PIV("p16"); + } + virtual void getPointers(std::vector **, uint64_t) { + FATAL_PIV("p8"); + } + virtual void getIndices(std::vector **, uint64_t) { + FATAL_PIV("i64"); + } + virtual void getIndices(std::vector **, uint64_t) { + FATAL_PIV("i32"); + } + virtual void getIndices(std::vector **, uint64_t) { + FATAL_PIV("i16"); + } + virtual void getIndices(std::vector **, uint64_t) { + FATAL_PIV("i8"); + } /// Primary storage. #define DECL_GETVALUES(VNAME, V) \ - virtual void getValues(std::vector **) { fatal("getValues" #VNAME); } + virtual void getValues(std::vector **) { FATAL_PIV("getValues" #VNAME); } FOREVERY_V(DECL_GETVALUES) #undef DECL_GETVALUES /// Element-wise insertion in lexicographic index order. #define DECL_LEXINSERT(VNAME, V) \ - virtual void lexInsert(const uint64_t *, V) { fatal("lexInsert" #VNAME); } + virtual void lexInsert(const uint64_t *, V) { FATAL_PIV("lexInsert" #VNAME); } FOREVERY_V(DECL_LEXINSERT) #undef DECL_LEXINSERT /// Expanded insertion. #define DECL_EXPINSERT(VNAME, V) \ virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t) { \ - fatal("expInsert" #VNAME); \ + FATAL_PIV("expInsert" #VNAME); \ } FOREVERY_V(DECL_EXPINSERT) #undef DECL_EXPINSERT @@ -374,16 +406,13 @@ class SparseTensorStorageBase { SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete; private: - static void fatal(const char *tp) { - fprintf(stderr, "unsupported %s\n", tp); - exit(1); - } - const std::vector dimSizes; std::vector rev; const std::vector dimTypes; }; +#undef FATAL_PIV + // Forward. template class SparseTensorEnumerator; @@ -1122,10 +1151,8 @@ static void readMMEHeader(FILE *file, char *filename, char *line, char symmetry[64]; // Read header line. if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field, - symmetry) != 5) { - fprintf(stderr, "Corrupt header in %s\n", filename); - exit(1); - } + symmetry) != 5) + FATAL("Corrupt header in %s\n", filename); // Set properties *isPattern = (strcmp(toLower(field), "pattern") == 0); *isSymmetric = (strcmp(toLower(symmetry), "symmetric") == 0); @@ -1134,26 +1161,20 @@ static void readMMEHeader(FILE *file, char *filename, char *line, strcmp(toLower(object), "matrix") || strcmp(toLower(format), "coordinate") || (strcmp(toLower(field), "real") && !(*isPattern)) || - (strcmp(toLower(symmetry), "general") && !(*isSymmetric))) { - fprintf(stderr, "Cannot find a general sparse matrix in %s\n", filename); - exit(1); - } + (strcmp(toLower(symmetry), "general") && !(*isSymmetric))) + FATAL("Cannot find a general sparse matrix in %s\n", filename); // Skip comments. while (true) { - if (!fgets(line, kColWidth, file)) { - fprintf(stderr, "Cannot find data in %s\n", filename); - exit(1); - } + if (!fgets(line, kColWidth, file)) + FATAL("Cannot find data in %s\n", filename); if (line[0] != '%') break; } // Next line contains M N NNZ. idata[0] = 2; // rank if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3, - idata + 1) != 3) { - fprintf(stderr, "Cannot find size in %s\n", filename); - exit(1); - } + idata + 1) != 3) + FATAL("Cannot find size in %s\n", filename); } /// Read the "extended" FROSTT header. Although not part of the documented @@ -1164,25 +1185,18 @@ static void readExtFROSTTHeader(FILE *file, char *filename, char *line, uint64_t *idata) { // Skip comments. while (true) { - if (!fgets(line, kColWidth, file)) { - fprintf(stderr, "Cannot find data in %s\n", filename); - exit(1); - } + if (!fgets(line, kColWidth, file)) + FATAL("Cannot find data in %s\n", filename); if (line[0] != '#') break; } // Next line contains RANK and NNZ. - if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) { - fprintf(stderr, "Cannot find metadata in %s\n", filename); - exit(1); - } + if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) + FATAL("Cannot find metadata in %s\n", filename); // Followed by a line with the dimension sizes (one per rank). - for (uint64_t r = 0; r < idata[0]; r++) { - if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) { - fprintf(stderr, "Cannot find dimension size %s\n", filename); - exit(1); - } - } + for (uint64_t r = 0; r < idata[0]; r++) + if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) + FATAL("Cannot find dimension size %s\n", filename); fgets(line, kColWidth, file); // end of line } @@ -1193,12 +1207,10 @@ static SparseTensorCOO *openSparseTensorCOO(char *filename, uint64_t rank, const uint64_t *shape, const uint64_t *perm) { // Open the file. + assert(filename && "Received nullptr for filename"); FILE *file = fopen(filename, "r"); - if (!file) { - assert(filename && "Received nullptr for filename"); - fprintf(stderr, "Cannot find file %s\n", filename); - exit(1); - } + if (!file) + FATAL("Cannot find file %s\n", filename); // Perform some file format dependent set up. char line[kColWidth]; uint64_t idata[512]; @@ -1209,8 +1221,7 @@ static SparseTensorCOO *openSparseTensorCOO(char *filename, uint64_t rank, } else if (strstr(filename, ".tns")) { readExtFROSTTHeader(file, filename, line, idata); } else { - fprintf(stderr, "Unknown format %s\n", filename); - exit(1); + FATAL("Unknown format %s\n", filename); } // Prepare sparse tensor object with per-dimension sizes // and the number of nonzeros as initial capacity. @@ -1224,10 +1235,8 @@ static SparseTensorCOO *openSparseTensorCOO(char *filename, uint64_t rank, // Read all nonzero elements. std::vector indices(rank); for (uint64_t k = 0; k < nnz; k++) { - if (!fgets(line, kColWidth, file)) { - fprintf(stderr, "Cannot find next line of data in %s\n", filename); - exit(1); - } + if (!fgets(line, kColWidth, file)) + FATAL("Cannot find next line of data in %s\n", filename); char *linePtr = line; for (uint64_t r = 0; r < rank; r++) { uint64_t idx = strtoul(linePtr, &linePtr, 10); @@ -1290,22 +1299,15 @@ toMLIRSparseTensor(uint64_t rank, uint64_t nse, uint64_t *shape, V *values, // Verify that perm is a permutation of 0..(rank-1). std::vector order(perm, perm + rank); std::sort(order.begin(), order.end()); - for (uint64_t i = 0; i < rank; ++i) { - if (i != order[i]) { - fprintf(stderr, "Not a permutation of 0..%" PRIu64 "\n", rank); - exit(1); - } - } + for (uint64_t i = 0; i < rank; ++i) + if (i != order[i]) + FATAL("Not a permutation of 0..%" PRIu64 "\n", rank); // Verify that the sparsity values are supported. - for (uint64_t i = 0; i < rank; ++i) { + for (uint64_t i = 0; i < rank; ++i) if (sparsity[i] != DimLevelType::kDense && - sparsity[i] != DimLevelType::kCompressed) { - fprintf(stderr, "Unsupported sparsity value %d\n", - static_cast(sparsity[i])); - exit(1); - } - } + sparsity[i] != DimLevelType::kCompressed) + FATAL("Unsupported sparsity value %d\n", static_cast(sparsity[i])); #endif // Convert external format to internal COO. @@ -1539,8 +1541,10 @@ _mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT CASE_SECSAME(OverheadType::kU64, PrimaryType::kC32, uint64_t, complex32); // Unsupported case (add above if needed). - fputs("unsupported combination of types\n", stderr); - exit(1); + // TODO: better pretty-printing of enum values! + FATAL("unsupported combination of types: \n", + static_cast(ptrTp), static_cast(indTp), + static_cast(valTp)); } #undef CASE #undef CASE_SECSAME @@ -1704,10 +1708,8 @@ char *getTensorFilename(index_type id) { char var[80]; sprintf(var, "TENSOR%" PRIu64, id); char *env = getenv(var); - if (!env) { - fprintf(stderr, "Environment variable %s is not set\n", var); - exit(1); - } + if (!env) + FATAL("Environment variable %s is not set\n", var); return env; } From c3856cb7398e07e7861bd1fcd128689945836334 Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Thu, 19 May 2022 22:57:59 +0000 Subject: [PATCH 044/908] [bazel][libc] Fix bazel build Differential revision: https://reviews.llvm.org/D126028 --- .../llvm-project-overlay/libc/BUILD.bazel | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index df64defead8fef..c23a6fe53b7fd5 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -766,16 +766,6 @@ no_sanitize_features = [ "-ubsan", ] -cc_library( - name = "string_utils", - hdrs = ["src/string/string_utils.h"], - deps = [ - ":__support_common", - ":__support_cpp_bitset", - ":libc_root", - ], -) - cc_library( name = "string_memory_utils", hdrs = [ @@ -797,6 +787,17 @@ cc_library( ], ) +cc_library( + name = "string_utils", + hdrs = ["src/string/string_utils.h"], + deps = [ + ":__support_common", + ":__support_cpp_bitset", + ":libc_root", + ":string_memory_utils" + ], +) + libc_function( name = "memchr", srcs = ["src/string/memchr.cpp"], From 1dfd8e99f91c52879fd361466ad8765293a872be Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 19 May 2022 10:01:44 +0200 Subject: [PATCH 045/908] [Serialization] Delta encode locations in expansion sloc entries This is a 1.9% reduction in PCH size in my measurements. In abbreviated records, VBR6 seems to be slightl better than VBR8 for locations that may be delta-encoded (i.e. not the first) Differential Revision: https://reviews.llvm.org/D125952 --- clang/lib/Serialization/ASTReader.cpp | 15 +++++++-------- clang/lib/Serialization/ASTWriter.cpp | 11 ++++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 65ebb58c22ef8e..055e8550bd2728 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1589,14 +1589,13 @@ bool ASTReader::ReadSLocEntry(int ID) { } case SM_SLOC_EXPANSION_ENTRY: { - SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1]); - SourceMgr.createExpansionLoc(SpellingLoc, - ReadSourceLocation(*F, Record[2]), - ReadSourceLocation(*F, Record[3]), - Record[5], - Record[4], - ID, - BaseOffset + Record[0]); + LocSeq::State Seq; + SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq); + SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq); + SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq); + SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd, + Record[5], Record[4], ID, + BaseOffset + Record[0]); break; } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 61cf40fdd871c1..f064a93766c056 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1715,8 +1715,8 @@ static unsigned CreateSLocExpansionAbbrev(llvm::BitstreamWriter &Stream) { Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_EXPANSION_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Spelling location - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Start location - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // End location + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Start location + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // End location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Is token range Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Token length return Stream.EmitAbbrev(std::move(Abbrev)); @@ -2139,12 +2139,13 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, } else { // The source location entry is a macro expansion. const SrcMgr::ExpansionInfo &Expansion = SLoc->getExpansion(); - AddSourceLocation(Expansion.getSpellingLoc(), Record); - AddSourceLocation(Expansion.getExpansionLocStart(), Record); + LocSeq::State Seq; + AddSourceLocation(Expansion.getSpellingLoc(), Record, Seq); + AddSourceLocation(Expansion.getExpansionLocStart(), Record, Seq); AddSourceLocation(Expansion.isMacroArgExpansion() ? SourceLocation() : Expansion.getExpansionLocEnd(), - Record); + Record, Seq); Record.push_back(Expansion.isExpansionTokenRange()); // Compute the token length for this macro expansion. From a9a19f5965a5cd6f7672e8697f4f98f34ceed469 Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Thu, 19 May 2022 16:00:39 -0700 Subject: [PATCH 046/908] [mlir][sparse] Adding x-macros for OverheadType Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D126026 --- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 70 ++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index d278e4e18e9d27..339476d4553864 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -269,6 +269,26 @@ struct SparseTensorCOO final { DO(C64, complex64) \ DO(C32, complex32) +// This x-macro calls its argument on every overhead type which has +// fixed-width. It excludes `index_type` because that type is often +// handled specially (e.g., by translating it into the architecture-dependent +// equivalent fixed-width overhead type). +#define FOREVERY_FIXED_O(DO) \ + DO(64, uint64_t) \ + DO(32, uint32_t) \ + DO(16, uint16_t) \ + DO(8, uint8_t) + +// This x-macro calls its argument on every overhead type, including +// `index_type`. Our naming convention uses an empty suffix for +// `index_type`, so the missing first argument when we call `DO` +// gets resolved to the empty token which can then be concatenated +// as intended. (This behavior is standard per C99 6.10.3/4 and +// C++11 N3290 16.3/4; whereas in C++03 16.3/10 it was undefined behavior.) +#define FOREVERY_O(DO) \ + FOREVERY_FIXED_O(DO) \ + DO(, index_type) + // Forward. template class SparseTensorEnumeratorBase; @@ -347,30 +367,18 @@ class SparseTensorStorageBase { #undef DECL_NEWENUMERATOR /// Overhead storage. - virtual void getPointers(std::vector **, uint64_t) { - FATAL_PIV("p64"); - } - virtual void getPointers(std::vector **, uint64_t) { - FATAL_PIV("p32"); - } - virtual void getPointers(std::vector **, uint64_t) { - FATAL_PIV("p16"); - } - virtual void getPointers(std::vector **, uint64_t) { - FATAL_PIV("p8"); - } - virtual void getIndices(std::vector **, uint64_t) { - FATAL_PIV("i64"); - } - virtual void getIndices(std::vector **, uint64_t) { - FATAL_PIV("i32"); - } - virtual void getIndices(std::vector **, uint64_t) { - FATAL_PIV("i16"); +#define DECL_GETPOINTERS(PNAME, P) \ + virtual void getPointers(std::vector

**, uint64_t) { \ + FATAL_PIV("getPointers" #PNAME); \ } - virtual void getIndices(std::vector **, uint64_t) { - FATAL_PIV("i8"); + FOREVERY_FIXED_O(DECL_GETPOINTERS) +#undef DECL_GETPOINTERS +#define DECL_GETINDICES(INAME, I) \ + virtual void getIndices(std::vector **, uint64_t) { \ + FATAL_PIV("getIndices" #INAME); \ } + FOREVERY_FIXED_O(DECL_GETINDICES) +#undef DECL_GETINDICES /// Primary storage. #define DECL_GETVALUES(VNAME, V) \ @@ -1576,18 +1584,16 @@ FOREVERY_V(IMPL_SPARSEVALUES) ref->strides[0] = 1; \ } /// Methods that provide direct access to pointers. -IMPL_GETOVERHEAD(sparsePointers, index_type, getPointers) -IMPL_GETOVERHEAD(sparsePointers64, uint64_t, getPointers) -IMPL_GETOVERHEAD(sparsePointers32, uint32_t, getPointers) -IMPL_GETOVERHEAD(sparsePointers16, uint16_t, getPointers) -IMPL_GETOVERHEAD(sparsePointers8, uint8_t, getPointers) +#define IMPL_SPARSEPOINTERS(PNAME, P) \ + IMPL_GETOVERHEAD(sparsePointers##PNAME, P, getPointers) +FOREVERY_O(IMPL_SPARSEPOINTERS) +#undef IMPL_SPARSEPOINTERS /// Methods that provide direct access to indices. -IMPL_GETOVERHEAD(sparseIndices, index_type, getIndices) -IMPL_GETOVERHEAD(sparseIndices64, uint64_t, getIndices) -IMPL_GETOVERHEAD(sparseIndices32, uint32_t, getIndices) -IMPL_GETOVERHEAD(sparseIndices16, uint16_t, getIndices) -IMPL_GETOVERHEAD(sparseIndices8, uint8_t, getIndices) +#define IMPL_SPARSEINDICES(INAME, I) \ + IMPL_GETOVERHEAD(sparseIndices##INAME, I, getIndices) +FOREVERY_O(IMPL_SPARSEINDICES) +#undef IMPL_SPARSEINDICES #undef IMPL_GETOVERHEAD /// Helper to add value to coordinate scheme, one per value type. From 6e00a34cdb49ba1d4b72ec274e52260da9c52380 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 19 May 2022 16:57:40 -0700 Subject: [PATCH 047/908] [AArch64] Add support for -fzero-call-used-regs Support the "-fzero-call-used-regs" option on AArch64. This involves much less specialized code than the X86 version. Most of the checks can be done with TableGen. Reviewed By: nickdesaulniers, MaskRay Differential Revision: https://reviews.llvm.org/D124836 --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- .../Target/AArch64/AArch64FrameLowering.cpp | 132 ++++ .../lib/Target/AArch64/AArch64FrameLowering.h | 4 + .../Target/AArch64/AArch64RegisterInfo.cpp | 64 ++ llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 3 + .../lib/Target/AArch64/AArch64RegisterInfo.td | 9 + llvm/lib/Target/X86/X86RegisterInfo.cpp | 2 +- .../CodeGen/AArch64/zero-call-used-regs.ll | 666 ++++++++++++++++++ llvm/utils/TableGen/RegisterInfoEmitter.cpp | 16 + 10 files changed, 897 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/zero-call-used-regs.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e100711516089f..f19734172793ac 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2986,7 +2986,7 @@ def fenable_matrix : Flag<["-"], "fenable-matrix">, Group, def fzero_call_used_regs_EQ : Joined<["-"], "fzero-call-used-regs=">, Group, Flags<[CC1Option]>, - HelpText<"Clear call-used registers upon function return.">, + HelpText<"Clear call-used registers upon function return (AArch64/x86 only)">, Values<"skip,used-gpr-arg,used-gpr,used-arg,used,all-gpr-arg,all-gpr,all-arg,all">, NormalizedValues<["Skip", "UsedGPRArg", "UsedGPR", "UsedArg", "Used", "AllGPRArg", "AllGPR", "AllArg", "All"]>, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e53d436a8ffbd5..efaedb6b893d67 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5993,7 +5993,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // FIXME: There's no reason for this to be restricted to X86. The backend // code needs to be changed to include the appropriate function calls // automatically. - if (!Triple.isX86()) + if (!Triple.isX86() && !Triple.isAArch64()) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 5b7aa76bfd42d5..399a4e19ce2f8f 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -675,6 +675,138 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores( emitCalleeSavedRestores(MBB, MBBI, true); } +static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) { + switch (Reg.id()) { + default: + // The called routine is expected to preserve r19-r28 + // r29 and r30 are used as frame pointer and link register resp. + return 0; + + // GPRs +#define CASE(n) \ + case AArch64::W##n: \ + case AArch64::X##n: \ + return AArch64::X##n + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + CASE(17); + CASE(18); +#undef CASE + + // FPRs +#define CASE(n) \ + case AArch64::B##n: \ + case AArch64::H##n: \ + case AArch64::S##n: \ + case AArch64::D##n: \ + case AArch64::Q##n: \ + return HasSVE ? AArch64::Z##n : AArch64::Q##n + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + CASE(17); + CASE(18); + CASE(19); + CASE(20); + CASE(21); + CASE(22); + CASE(23); + CASE(24); + CASE(25); + CASE(26); + CASE(27); + CASE(28); + CASE(29); + CASE(30); + CASE(31); +#undef CASE + } +} + +void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const { + // Insertion point. + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + + // Fake a debug loc. + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + const MachineFunction &MF = *MBB.getParent(); + const AArch64Subtarget &STI = MF.getSubtarget(); + const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); + + BitVector GPRsToZero(TRI.getNumRegs()); + BitVector FPRsToZero(TRI.getNumRegs()); + bool HasSVE = STI.hasSVE(); + for (MCRegister Reg : RegsToZero.set_bits()) { + if (TRI.isGeneralPurposeRegister(MF, Reg)) { + // For GPRs, we only care to clear out the 64-bit register. + if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) + GPRsToZero.set(XReg); + } else if (AArch64::FPR128RegClass.contains(Reg) || + AArch64::FPR64RegClass.contains(Reg) || + AArch64::FPR32RegClass.contains(Reg) || + AArch64::FPR16RegClass.contains(Reg) || + AArch64::FPR8RegClass.contains(Reg)) { + // For FPRs, + if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) + FPRsToZero.set(XReg); + } + } + + const AArch64InstrInfo &TII = *STI.getInstrInfo(); + + // Zero out GPRs. + for (MCRegister Reg : GPRsToZero.set_bits()) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), Reg).addImm(0); + + // Zero out FP/vector registers. + for (MCRegister Reg : FPRsToZero.set_bits()) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVID), Reg).addImm(0); + + if (HasSVE) { + for (MCRegister PReg : + {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4, + AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9, + AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14, + AArch64::P15}) { + if (RegsToZero[PReg]) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg); + } + } +} + // Find a scratch register that we can use at the start of the prologue to // re-align the stack pointer. We avoid using callee-save registers since they // may appear to be free when this is called from canUseAsPrologue (during diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 6ed01d6088b250..f59860a24d9ba7 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -153,6 +153,10 @@ class AArch64FrameLowering : public TargetFrameLowering { MachineBasicBlock::iterator MBBI) const; void emitCalleeSavedSVERestores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; + + /// Emit target zero call-used regs. + void emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index ffde7f0ac5ef94..c036217078c610 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -33,6 +33,8 @@ using namespace llvm; +#define GET_CC_REGISTER_LISTS +#include "AArch64GenCallingConv.inc" #define GET_REGINFO_TARGET_DESC #include "AArch64GenRegisterInfo.inc" @@ -418,6 +420,68 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; } +bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const { + CallingConv::ID CC = MF.getFunction().getCallingConv(); + const AArch64Subtarget &STI = MF.getSubtarget(); + bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv()); + + auto HasReg = [](ArrayRef RegList, MCRegister Reg) { + return llvm::any_of(RegList, + [Reg](const MCRegister R) { return R == Reg; }); + }; + + switch (CC) { + default: + report_fatal_error("Unsupported calling convention."); + case CallingConv::WebKit_JS: + return HasReg(CC_AArch64_WebKit_JS_ArgRegs, Reg); + case CallingConv::GHC: + return HasReg(CC_AArch64_GHC_ArgRegs, Reg); + case CallingConv::C: + case CallingConv::Fast: + case CallingConv::PreserveMost: + case CallingConv::CXX_FAST_TLS: + case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: + if (STI.isTargetWindows() && IsVarArg) + return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg); + if (!STI.isTargetDarwin()) { + switch (CC) { + default: + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); + case CallingConv::Swift: + case CallingConv::SwiftTail: + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg) || + HasReg(CC_AArch64_AAPCS_Swift_ArgRegs, Reg); + } + } + if (!IsVarArg) { + switch (CC) { + default: + return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg); + case CallingConv::Swift: + case CallingConv::SwiftTail: + return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg) || + HasReg(CC_AArch64_DarwinPCS_Swift_ArgRegs, Reg); + } + } + if (STI.isTargetILP32()) + return HasReg(CC_AArch64_DarwinPCS_ILP32_VarArg_ArgRegs, Reg); + return HasReg(CC_AArch64_DarwinPCS_VarArg_ArgRegs, Reg); + case CallingConv::Win64: + if (IsVarArg) + HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg); + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); + case CallingConv::CFGuard_Check: + return HasReg(CC_AArch64_Win64_CFGuard_Check_ArgRegs, Reg); + case CallingConv::AArch64_VectorCall: + case CallingConv::AArch64_SVE_VectorCall: + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); + } +} + Register AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const AArch64FrameLowering *TFI = getFrameLowering(MF); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index 0c871ac089a71c..ac8c8fe270689c 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -120,6 +120,9 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo { bool hasBasePointer(const MachineFunction &MF) const; unsigned getBaseRegister() const; + bool isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const override; + // Debug information queries. Register getFrameRegister(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 70daf5abf81db7..401d2b72027864 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1385,3 +1385,12 @@ def svcr_op : Operand { return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr; }]; } + +//===----------------------------------------------------------------------===// +// Register categories. +// + +def GeneralPurposeRegisters : RegisterCategory<[GPR64, GPR32]>; + +def FIXED_REGS : RegisterClass<"AArch64", [i64], 64, (add FP, SP, VG, FFR)>; +def FixedRegisters : RegisterCategory<[CCR, FIXED_REGS]>; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index eb22744294659d..172353e7caa81a 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -656,7 +656,7 @@ bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) return true; - return false; + return X86GenRegisterInfo::isArgumentRegister(MF, Reg); } bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll new file mode 100644 index 00000000000000..b9425a6b8018d0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll @@ -0,0 +1,666 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,DEFAULT +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE + +@result = dso_local global i32 0, align 4 + +define dso_local i32 @skip(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="skip" { +; CHECK-LABEL: skip: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used_gpr_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr-arg" { +; CHECK-LABEL: used_gpr_arg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used_gpr(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr" { +; CHECK-LABEL: used_gpr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-arg" { +; CHECK-LABEL: used_arg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used" { +; CHECK-LABEL: used: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all_gpr_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all-gpr-arg" { +; CHECK-LABEL: all_gpr_arg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x3, #0 +; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x5, #0 +; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: mov x7, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all_gpr(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all-gpr" { +; CHECK-LABEL: all_gpr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul w8, w1, w0 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x3, #0 +; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x5, #0 +; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: mov x7, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: mov x9, #0 +; CHECK-NEXT: mov x10, #0 +; CHECK-NEXT: mov x11, #0 +; CHECK-NEXT: mov x12, #0 +; CHECK-NEXT: mov x13, #0 +; CHECK-NEXT: mov x14, #0 +; CHECK-NEXT: mov x15, #0 +; CHECK-NEXT: mov x16, #0 +; CHECK-NEXT: mov x17, #0 +; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all-arg" { +; DEFAULT-LABEL: all_arg: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: movi q0, #0000000000000000 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi q2, #0000000000000000 +; DEFAULT-NEXT: movi q3, #0000000000000000 +; DEFAULT-NEXT: movi q4, #0000000000000000 +; DEFAULT-NEXT: movi q5, #0000000000000000 +; DEFAULT-NEXT: movi q6, #0000000000000000 +; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_arg: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: movi z0, #0000000000000000 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: movi z2, #0000000000000000 +; SVE-NEXT: movi z3, #0000000000000000 +; SVE-NEXT: movi z4, #0000000000000000 +; SVE-NEXT: movi z5, #0000000000000000 +; SVE-NEXT: movi z6, #0000000000000000 +; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: pfalse p0.b +; SVE-NEXT: pfalse p1.b +; SVE-NEXT: pfalse p2.b +; SVE-NEXT: pfalse p3.b +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all" { +; DEFAULT-LABEL: all: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x9, #0 +; DEFAULT-NEXT: mov x10, #0 +; DEFAULT-NEXT: mov x11, #0 +; DEFAULT-NEXT: mov x12, #0 +; DEFAULT-NEXT: mov x13, #0 +; DEFAULT-NEXT: mov x14, #0 +; DEFAULT-NEXT: mov x15, #0 +; DEFAULT-NEXT: mov x16, #0 +; DEFAULT-NEXT: mov x17, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: movi q0, #0000000000000000 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi q2, #0000000000000000 +; DEFAULT-NEXT: movi q3, #0000000000000000 +; DEFAULT-NEXT: movi q4, #0000000000000000 +; DEFAULT-NEXT: movi q5, #0000000000000000 +; DEFAULT-NEXT: movi q6, #0000000000000000 +; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: movi q8, #0000000000000000 +; DEFAULT-NEXT: movi q9, #0000000000000000 +; DEFAULT-NEXT: movi q10, #0000000000000000 +; DEFAULT-NEXT: movi q11, #0000000000000000 +; DEFAULT-NEXT: movi q12, #0000000000000000 +; DEFAULT-NEXT: movi q13, #0000000000000000 +; DEFAULT-NEXT: movi q14, #0000000000000000 +; DEFAULT-NEXT: movi q15, #0000000000000000 +; DEFAULT-NEXT: movi q16, #0000000000000000 +; DEFAULT-NEXT: movi q17, #0000000000000000 +; DEFAULT-NEXT: movi q18, #0000000000000000 +; DEFAULT-NEXT: movi q19, #0000000000000000 +; DEFAULT-NEXT: movi q20, #0000000000000000 +; DEFAULT-NEXT: movi q21, #0000000000000000 +; DEFAULT-NEXT: movi q22, #0000000000000000 +; DEFAULT-NEXT: movi q23, #0000000000000000 +; DEFAULT-NEXT: movi q24, #0000000000000000 +; DEFAULT-NEXT: movi q25, #0000000000000000 +; DEFAULT-NEXT: movi q26, #0000000000000000 +; DEFAULT-NEXT: movi q27, #0000000000000000 +; DEFAULT-NEXT: movi q28, #0000000000000000 +; DEFAULT-NEXT: movi q29, #0000000000000000 +; DEFAULT-NEXT: movi q30, #0000000000000000 +; DEFAULT-NEXT: movi q31, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x9, #0 +; SVE-NEXT: mov x10, #0 +; SVE-NEXT: mov x11, #0 +; SVE-NEXT: mov x12, #0 +; SVE-NEXT: mov x13, #0 +; SVE-NEXT: mov x14, #0 +; SVE-NEXT: mov x15, #0 +; SVE-NEXT: mov x16, #0 +; SVE-NEXT: mov x17, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: movi z0, #0000000000000000 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: movi z2, #0000000000000000 +; SVE-NEXT: movi z3, #0000000000000000 +; SVE-NEXT: movi z4, #0000000000000000 +; SVE-NEXT: movi z5, #0000000000000000 +; SVE-NEXT: movi z6, #0000000000000000 +; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: movi z8, #0000000000000000 +; SVE-NEXT: movi z9, #0000000000000000 +; SVE-NEXT: movi z10, #0000000000000000 +; SVE-NEXT: movi z11, #0000000000000000 +; SVE-NEXT: movi z12, #0000000000000000 +; SVE-NEXT: movi z13, #0000000000000000 +; SVE-NEXT: movi z14, #0000000000000000 +; SVE-NEXT: movi z15, #0000000000000000 +; SVE-NEXT: movi z16, #0000000000000000 +; SVE-NEXT: movi z17, #0000000000000000 +; SVE-NEXT: movi z18, #0000000000000000 +; SVE-NEXT: movi z19, #0000000000000000 +; SVE-NEXT: movi z20, #0000000000000000 +; SVE-NEXT: movi z21, #0000000000000000 +; SVE-NEXT: movi z22, #0000000000000000 +; SVE-NEXT: movi z23, #0000000000000000 +; SVE-NEXT: movi z24, #0000000000000000 +; SVE-NEXT: movi z25, #0000000000000000 +; SVE-NEXT: movi z26, #0000000000000000 +; SVE-NEXT: movi z27, #0000000000000000 +; SVE-NEXT: movi z28, #0000000000000000 +; SVE-NEXT: movi z29, #0000000000000000 +; SVE-NEXT: movi z30, #0000000000000000 +; SVE-NEXT: movi z31, #0000000000000000 +; SVE-NEXT: pfalse p0.b +; SVE-NEXT: pfalse p1.b +; SVE-NEXT: pfalse p2.b +; SVE-NEXT: pfalse p3.b +; SVE-NEXT: pfalse p4.b +; SVE-NEXT: pfalse p5.b +; SVE-NEXT: pfalse p6.b +; SVE-NEXT: pfalse p7.b +; SVE-NEXT: pfalse p8.b +; SVE-NEXT: pfalse p9.b +; SVE-NEXT: pfalse p10.b +; SVE-NEXT: pfalse p11.b +; SVE-NEXT: pfalse p12.b +; SVE-NEXT: pfalse p13.b +; SVE-NEXT: pfalse p14.b +; SVE-NEXT: pfalse p15.b +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local double @skip_float(double noundef %a, float noundef %b) local_unnamed_addr #0 "zero-call-used-regs"="skip" { +; CHECK-LABEL: skip_float: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt d1, s1 +; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @used_gpr_arg_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr-arg" { +; CHECK-LABEL: used_gpr_arg_float: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt d1, s1 +; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @used_gpr_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr" { +; CHECK-LABEL: used_gpr_float: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt d1, s1 +; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @used_arg_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-arg" { +; DEFAULT-LABEL: used_arg_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used_arg_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @used_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used" { +; DEFAULT-LABEL: used_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @all_gpr_arg_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="all-gpr-arg" { +; CHECK-LABEL: all_gpr_arg_float: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt d1, s1 +; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: mov x0, #0 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x3, #0 +; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: mov x5, #0 +; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: mov x7, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @all_gpr_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="all-gpr" { +; CHECK-LABEL: all_gpr_float: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt d1, s1 +; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: mov x0, #0 +; CHECK-NEXT: mov x1, #0 +; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x3, #0 +; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: mov x5, #0 +; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: mov x7, #0 +; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: mov x9, #0 +; CHECK-NEXT: mov x10, #0 +; CHECK-NEXT: mov x11, #0 +; CHECK-NEXT: mov x12, #0 +; CHECK-NEXT: mov x13, #0 +; CHECK-NEXT: mov x14, #0 +; CHECK-NEXT: mov x15, #0 +; CHECK-NEXT: mov x16, #0 +; CHECK-NEXT: mov x17, #0 +; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @all_arg_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="all-arg" { +; DEFAULT-LABEL: all_arg_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: mov x0, #0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi q2, #0000000000000000 +; DEFAULT-NEXT: movi q3, #0000000000000000 +; DEFAULT-NEXT: movi q4, #0000000000000000 +; DEFAULT-NEXT: movi q5, #0000000000000000 +; DEFAULT-NEXT: movi q6, #0000000000000000 +; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_arg_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: mov x0, #0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: movi z2, #0000000000000000 +; SVE-NEXT: movi z3, #0000000000000000 +; SVE-NEXT: movi z4, #0000000000000000 +; SVE-NEXT: movi z5, #0000000000000000 +; SVE-NEXT: movi z6, #0000000000000000 +; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: pfalse p0.b +; SVE-NEXT: pfalse p1.b +; SVE-NEXT: pfalse p2.b +; SVE-NEXT: pfalse p3.b +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @all_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="all" { +; DEFAULT-LABEL: all_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: mov x0, #0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x9, #0 +; DEFAULT-NEXT: mov x10, #0 +; DEFAULT-NEXT: mov x11, #0 +; DEFAULT-NEXT: mov x12, #0 +; DEFAULT-NEXT: mov x13, #0 +; DEFAULT-NEXT: mov x14, #0 +; DEFAULT-NEXT: mov x15, #0 +; DEFAULT-NEXT: mov x16, #0 +; DEFAULT-NEXT: mov x17, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi q2, #0000000000000000 +; DEFAULT-NEXT: movi q3, #0000000000000000 +; DEFAULT-NEXT: movi q4, #0000000000000000 +; DEFAULT-NEXT: movi q5, #0000000000000000 +; DEFAULT-NEXT: movi q6, #0000000000000000 +; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: movi q8, #0000000000000000 +; DEFAULT-NEXT: movi q9, #0000000000000000 +; DEFAULT-NEXT: movi q10, #0000000000000000 +; DEFAULT-NEXT: movi q11, #0000000000000000 +; DEFAULT-NEXT: movi q12, #0000000000000000 +; DEFAULT-NEXT: movi q13, #0000000000000000 +; DEFAULT-NEXT: movi q14, #0000000000000000 +; DEFAULT-NEXT: movi q15, #0000000000000000 +; DEFAULT-NEXT: movi q16, #0000000000000000 +; DEFAULT-NEXT: movi q17, #0000000000000000 +; DEFAULT-NEXT: movi q18, #0000000000000000 +; DEFAULT-NEXT: movi q19, #0000000000000000 +; DEFAULT-NEXT: movi q20, #0000000000000000 +; DEFAULT-NEXT: movi q21, #0000000000000000 +; DEFAULT-NEXT: movi q22, #0000000000000000 +; DEFAULT-NEXT: movi q23, #0000000000000000 +; DEFAULT-NEXT: movi q24, #0000000000000000 +; DEFAULT-NEXT: movi q25, #0000000000000000 +; DEFAULT-NEXT: movi q26, #0000000000000000 +; DEFAULT-NEXT: movi q27, #0000000000000000 +; DEFAULT-NEXT: movi q28, #0000000000000000 +; DEFAULT-NEXT: movi q29, #0000000000000000 +; DEFAULT-NEXT: movi q30, #0000000000000000 +; DEFAULT-NEXT: movi q31, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: mov x0, #0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x9, #0 +; SVE-NEXT: mov x10, #0 +; SVE-NEXT: mov x11, #0 +; SVE-NEXT: mov x12, #0 +; SVE-NEXT: mov x13, #0 +; SVE-NEXT: mov x14, #0 +; SVE-NEXT: mov x15, #0 +; SVE-NEXT: mov x16, #0 +; SVE-NEXT: mov x17, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: movi z2, #0000000000000000 +; SVE-NEXT: movi z3, #0000000000000000 +; SVE-NEXT: movi z4, #0000000000000000 +; SVE-NEXT: movi z5, #0000000000000000 +; SVE-NEXT: movi z6, #0000000000000000 +; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: movi z8, #0000000000000000 +; SVE-NEXT: movi z9, #0000000000000000 +; SVE-NEXT: movi z10, #0000000000000000 +; SVE-NEXT: movi z11, #0000000000000000 +; SVE-NEXT: movi z12, #0000000000000000 +; SVE-NEXT: movi z13, #0000000000000000 +; SVE-NEXT: movi z14, #0000000000000000 +; SVE-NEXT: movi z15, #0000000000000000 +; SVE-NEXT: movi z16, #0000000000000000 +; SVE-NEXT: movi z17, #0000000000000000 +; SVE-NEXT: movi z18, #0000000000000000 +; SVE-NEXT: movi z19, #0000000000000000 +; SVE-NEXT: movi z20, #0000000000000000 +; SVE-NEXT: movi z21, #0000000000000000 +; SVE-NEXT: movi z22, #0000000000000000 +; SVE-NEXT: movi z23, #0000000000000000 +; SVE-NEXT: movi z24, #0000000000000000 +; SVE-NEXT: movi z25, #0000000000000000 +; SVE-NEXT: movi z26, #0000000000000000 +; SVE-NEXT: movi z27, #0000000000000000 +; SVE-NEXT: movi z28, #0000000000000000 +; SVE-NEXT: movi z29, #0000000000000000 +; SVE-NEXT: movi z30, #0000000000000000 +; SVE-NEXT: movi z31, #0000000000000000 +; SVE-NEXT: pfalse p0.b +; SVE-NEXT: pfalse p1.b +; SVE-NEXT: pfalse p2.b +; SVE-NEXT: pfalse p3.b +; SVE-NEXT: pfalse p4.b +; SVE-NEXT: pfalse p5.b +; SVE-NEXT: pfalse p6.b +; SVE-NEXT: pfalse p7.b +; SVE-NEXT: pfalse p8.b +; SVE-NEXT: pfalse p9.b +; SVE-NEXT: pfalse p10.b +; SVE-NEXT: pfalse p11.b +; SVE-NEXT: pfalse p12.b +; SVE-NEXT: pfalse p13.b +; SVE-NEXT: pfalse p14.b +; SVE-NEXT: pfalse p15.b +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +; Don't emit zeroing registers in "main" function. +define dso_local i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: main: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + +entry: + ret i32 0 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8a" } diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp index 8696cbbc1c9f31..0ba6dee9f7d691 100644 --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -1188,6 +1188,8 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target, << "MCRegister) const override;\n" << " bool isFixedRegister(const MachineFunction &, " << "MCRegister) const override;\n" + << " bool isArgumentRegister(const MachineFunction &, " + << "MCRegister) const override;\n" << " /// Devirtualized TargetFrameLowering.\n" << " static const " << TargetName << "FrameLowering *getFrameLowering(\n" << " const MachineFunction &MF);\n" @@ -1662,6 +1664,20 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, OS << " false;\n"; OS << "}\n\n"; + OS << "bool " << ClassName << "::\n" + << "isArgumentRegister(const MachineFunction &MF, " + << "MCRegister PhysReg) const {\n" + << " return\n"; + for (const CodeGenRegisterCategory &Category : RegCategories) + if (Category.getName() == "ArgumentRegisters") { + for (const CodeGenRegisterClass *RC : Category.getClasses()) + OS << " " << RC->getQualifiedName() + << "RegClass.contains(PhysReg) ||\n"; + break; + } + OS << " false;\n"; + OS << "}\n\n"; + OS << "ArrayRef " << ClassName << "::getRegMaskNames() const {\n"; if (!CSRSets.empty()) { From 0e02bf635821be8deb2e8094706198838e45e968 Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Thu, 19 May 2022 09:25:22 +0000 Subject: [PATCH 048/908] [mlir][Arithmetic] fold overlapping negf. This patch folds negf(negf(x)) to x. Differential Revision: https://reviews.llvm.org/D125955 --- mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp | 3 +++ mlir/test/Dialect/Arithmetic/canonicalize.mlir | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index 8f26e66394287e..7db71a636c9d7a 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -549,6 +549,9 @@ void arith::XOrIOp::getCanonicalizationPatterns( //===----------------------------------------------------------------------===// OpFoldResult arith::NegFOp::fold(ArrayRef operands) { + /// negf(negf(x)) -> x + if (auto op = this->getOperand().getDefiningOp()) + return op.getOperand(); return constFoldUnaryOp(operands, [](const APFloat &a) { return -a; }); } diff --git a/mlir/test/Dialect/Arithmetic/canonicalize.mlir b/mlir/test/Dialect/Arithmetic/canonicalize.mlir index 11e20458bf60ed..1a78bf7ea01615 100644 --- a/mlir/test/Dialect/Arithmetic/canonicalize.mlir +++ b/mlir/test/Dialect/Arithmetic/canonicalize.mlir @@ -1320,6 +1320,15 @@ func.func @test_negf() -> (f32) { return %0: f32 } +// CHECK-LABEL: @test_negf1( +// CHECK-SAME: %[[arg0:.+]]: +// CHECK: return %[[arg0]] +func.func @test_negf1(%f : f32) -> (f32) { + %0 = arith.negf %f : f32 + %1 = arith.negf %0 : f32 + return %1: f32 +} + // ----- // CHECK-LABEL: @test_remui( From d33c36235df1925b894acd1352ba0d541e97afac Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 19 May 2022 19:22:02 -0700 Subject: [PATCH 049/908] [lit] Fix setup of sanitizer environment Not all options were propageted into tests. Reviewed By: ychen Differential Revision: https://reviews.llvm.org/D122869 --- clang/test/Unit/lit.cfg.py | 17 +++++++++++++---- clang/test/lit.cfg.py | 4 ---- cross-project-tests/lit.cfg.py | 5 ----- llvm/test/Unit/lit.cfg.py | 17 +++++++++++++---- llvm/test/lit.cfg.py | 2 +- llvm/utils/lit/lit/llvm/config.py | 11 +++++++++++ mlir/test/Unit/lit.cfg.py | 17 +++++++++++++---- polly/test/Unit/lit.cfg | 17 +++++++++++++---- 8 files changed, 64 insertions(+), 26 deletions(-) diff --git a/clang/test/Unit/lit.cfg.py b/clang/test/Unit/lit.cfg.py index a6e906b3a87aa3..1aa3abc13d7d1b 100644 --- a/clang/test/Unit/lit.cfg.py +++ b/clang/test/Unit/lit.cfg.py @@ -30,10 +30,19 @@ if 'TEMP' in os.environ: config.environment['TEMP'] = os.environ['TEMP'] -# Propagate path to symbolizer for ASan/MSan. -for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']: - if symbolizer in os.environ: - config.environment[symbolizer] = os.environ[symbolizer] +# Propagate sanitizer options. +for var in [ + 'ASAN_SYMBOLIZER_PATH', + 'MSAN_SYMBOLIZER_PATH', + 'TSAN_SYMBOLIZER_PATH', + 'UBSAN_SYMBOLIZER_PATH', + 'ASAN_OPTIONS', + 'MSAN_OPTIONS', + 'TSAN_OPTIONS', + 'UBSAN_OPTIONS', +]: + if var in os.environ: + config.environment[var] = os.environ[var] def find_shlibpath_var(): if platform.system() in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']: diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py index 44f5ed1d635255..bf1fa141fc48b3 100644 --- a/clang/test/lit.cfg.py +++ b/clang/test/lit.cfg.py @@ -49,10 +49,6 @@ config.substitutions.append( ('%target_triple', config.target_triple)) -# Propagate path to symbolizer for ASan/MSan. -llvm_config.with_system_environment( - ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']) - config.substitutions.append(('%PATH%', config.environment['PATH'])) diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index 0eeec7908fe47e..7bda584dc317fa 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -83,11 +83,6 @@ def get_required_attr(config, attr_name): if 'compiler-rt' in config.llvm_enabled_projects: config.available_features.add('compiler-rt') -if config.llvm_use_sanitizer: - # Propagate path to symbolizer for ASan/MSan. - llvm_config.with_system_environment( - ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']) - # Check which debuggers are available: lldb_path = llvm_config.use_llvm_tool('lldb', search_env='LLDB') diff --git a/llvm/test/Unit/lit.cfg.py b/llvm/test/Unit/lit.cfg.py index 3a5c40dc14da63..81e8dc04acea47 100644 --- a/llvm/test/Unit/lit.cfg.py +++ b/llvm/test/Unit/lit.cfg.py @@ -33,10 +33,19 @@ if 'HOME' in os.environ: config.environment['HOME'] = os.environ['HOME'] -# Propagate path to symbolizer for ASan/MSan. -for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']: - if symbolizer in os.environ: - config.environment[symbolizer] = os.environ[symbolizer] +# Propagate sanitizer options. +for var in [ + 'ASAN_SYMBOLIZER_PATH', + 'MSAN_SYMBOLIZER_PATH', + 'TSAN_SYMBOLIZER_PATH', + 'UBSAN_SYMBOLIZER_PATH', + 'ASAN_OPTIONS', + 'MSAN_OPTIONS', + 'TSAN_OPTIONS', + 'UBSAN_OPTIONS', +]: + if var in os.environ: + config.environment[var] = os.environ[var] # Win32 seeks DLLs along %PATH%. if sys.platform in ['win32', 'cygwin'] and os.path.isdir(config.shlibdir): diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 7227c92479342b..b6fb8aa9fab766 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -40,7 +40,7 @@ # Propagate some variables from the host environment. llvm_config.with_system_environment( - ['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP', 'ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']) + ['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP']) # Set up OCAMLPATH to include newly built OCaml libraries. diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index a8e73ce43dc908..b6531612814648 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -56,6 +56,17 @@ def __init__(self, lit_config, config): if not self.use_lit_shell: features.add('shell') + self.with_system_environment([ + 'ASAN_SYMBOLIZER_PATH', + 'MSAN_SYMBOLIZER_PATH', + 'TSAN_SYMBOLIZER_PATH', + 'UBSAN_SYMBOLIZER_PATH' + 'ASAN_OPTIONS', + 'MSAN_OPTIONS', + 'TSAN_OPTIONS', + 'UBSAN_OPTIONS', + ]) + # Running on Darwin OS if platform.system() == 'Darwin': # FIXME: lld uses the first, other projects use the second. diff --git a/mlir/test/Unit/lit.cfg.py b/mlir/test/Unit/lit.cfg.py index d645971074f54d..bf77dcbfb16215 100644 --- a/mlir/test/Unit/lit.cfg.py +++ b/mlir/test/Unit/lit.cfg.py @@ -33,7 +33,16 @@ if 'HOME' in os.environ: config.environment['HOME'] = os.environ['HOME'] -# Propagate path to symbolizer for ASan/MSan. -for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']: - if symbolizer in os.environ: - config.environment[symbolizer] = os.environ[symbolizer] +# Propagate sanitizer options. +for var in [ + 'ASAN_SYMBOLIZER_PATH', + 'MSAN_SYMBOLIZER_PATH', + 'TSAN_SYMBOLIZER_PATH', + 'UBSAN_SYMBOLIZER_PATH', + 'ASAN_OPTIONS', + 'MSAN_OPTIONS', + 'TSAN_OPTIONS', + 'UBSAN_OPTIONS', +]: + if var in os.environ: + config.environment[var] = os.environ[var] diff --git a/polly/test/Unit/lit.cfg b/polly/test/Unit/lit.cfg index eca3aaeb330899..4763c455e6b34c 100644 --- a/polly/test/Unit/lit.cfg +++ b/polly/test/Unit/lit.cfg @@ -32,10 +32,19 @@ if 'TMP' in os.environ: if 'TEMP' in os.environ: config.environment['TEMP'] = os.environ['TEMP'] -# Propagate path to symbolizer for ASan/MSan. -for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']: - if symbolizer in os.environ: - config.environment[symbolizer] = os.environ[symbolizer] +# Propagate sanitizer options. +for var in [ + 'ASAN_SYMBOLIZER_PATH', + 'MSAN_SYMBOLIZER_PATH', + 'TSAN_SYMBOLIZER_PATH', + 'UBSAN_SYMBOLIZER_PATH', + 'ASAN_OPTIONS', + 'MSAN_OPTIONS', + 'TSAN_OPTIONS', + 'UBSAN_OPTIONS', +]: + if var in os.environ: + config.environment[var] = os.environ[var] if platform.system() == 'Darwin': shlibpath_var = 'DYLD_LIBRARY_PATH' From cf348f6a2cac654bfc97dda964c319066bb9c961 Mon Sep 17 00:00:00 2001 From: Chenbing Zheng Date: Fri, 20 May 2022 09:56:33 +0800 Subject: [PATCH 050/908] [InstCombine] [NFC] Use a pattern matcher for ExtractElementInst Reviewed By: RKSimon, rampitec Differential Revision: https://reviews.llvm.org/D125857 --- .../Transforms/InstCombine/InstCombineCasts.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 764c8df2313d02..fd76ea6bbcc731 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2350,9 +2350,9 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, /// usually not type-specific like scalar integer or scalar floating-point. static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC) { - // TODO: Create and use a pattern matcher for ExtractElementInst. - auto *ExtElt = dyn_cast(BitCast.getOperand(0)); - if (!ExtElt || !ExtElt->hasOneUse()) + Value *VecOp, *Index; + if (!match(BitCast.getOperand(0), + m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index))))) return nullptr; // The bitcast must be to a vectorizable type, otherwise we can't make a new @@ -2361,10 +2361,10 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, if (!VectorType::isValidElementType(DestType)) return nullptr; - auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType()); - auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(), - NewVecType, "bc"); - return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); + auto *NewVecType = + VectorType::get(DestType, cast(VecOp->getType())); + auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc"); + return ExtractElementInst::Create(NewBC, Index); } /// Change the type of a bitwise logic operation if we can eliminate a bitcast. From ec563c5a905aeb10ac0eac039c85a3bf1fec1509 Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Thu, 19 May 2022 17:46:42 -0700 Subject: [PATCH 051/908] [ASan] Add sleep_before_init flag Also do a little bit of refactoring instead of just copy&paste. Differential Revision: https://reviews.llvm.org/D126037 --- compiler-rt/lib/asan/asan_flags.inc | 4 ++++ compiler-rt/lib/asan/asan_rtl.cpp | 14 ++++++-------- .../lib/sanitizer_common/sanitizer_common.cpp | 7 +++++++ .../lib/sanitizer_common/sanitizer_common.h | 1 + .../test/asan/TestCases/sleep_after_init.c | 10 ---------- .../test/asan/TestCases/sleep_before_dying.c | 10 ---------- .../test/asan/TestCases/sleep_for_debugger.c | 17 +++++++++++++++++ 7 files changed, 35 insertions(+), 28 deletions(-) delete mode 100644 compiler-rt/test/asan/TestCases/sleep_after_init.c delete mode 100644 compiler-rt/test/asan/TestCases/sleep_before_dying.c create mode 100644 compiler-rt/test/asan/TestCases/sleep_for_debugger.c diff --git a/compiler-rt/lib/asan/asan_flags.inc b/compiler-rt/lib/asan/asan_flags.inc index 314ed19353586e..682666615dbc41 100644 --- a/compiler-rt/lib/asan/asan_flags.inc +++ b/compiler-rt/lib/asan/asan_flags.inc @@ -83,6 +83,10 @@ ASAN_FLAG( int, sleep_after_init, 0, "Number of seconds to sleep after AddressSanitizer is initialized. " "Useful for debugging purposes (e.g. when one needs to attach gdb).") +ASAN_FLAG( + int, sleep_before_init, 0, + "Number of seconds to sleep before AddressSanitizer starts initializing. " + "Useful for debugging purposes (e.g. when one needs to attach gdb).") ASAN_FLAG(bool, check_malloc_usable_size, true, "Allows the users to work around the bug in Nvidia drivers prior to " "295.*.") diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp index 3a5261474b2980..29cf526d9eb0bf 100644 --- a/compiler-rt/lib/asan/asan_rtl.cpp +++ b/compiler-rt/lib/asan/asan_rtl.cpp @@ -51,10 +51,9 @@ static void AsanDie() { } if (common_flags()->print_module_map >= 1) DumpProcessMap(); - if (flags()->sleep_before_dying) { - Report("Sleeping for %d second(s)\n", flags()->sleep_before_dying); - SleepForSeconds(flags()->sleep_before_dying); - } + + WaitForDebugger(flags()->sleep_before_dying, "before dying"); + if (flags()->unmap_shadow_on_exit) { if (kMidMemBeg) { UnmapOrDie((void*)kLowShadowBeg, kMidMemBeg - kLowShadowBeg); @@ -386,6 +385,8 @@ static void AsanInitInternal() { // initialization steps look at flags(). InitializeFlags(); + WaitForDebugger(flags()->sleep_before_init, "before init"); + // Stop performing init at this point if we are being loaded via // dlopen() and the platform supports it. if (SANITIZER_SUPPORTS_INIT_FOR_DLOPEN && UNLIKELY(HandleDlopenInit())) { @@ -497,10 +498,7 @@ static void AsanInitInternal() { VReport(1, "AddressSanitizer Init done\n"); - if (flags()->sleep_after_init) { - Report("Sleeping for %d second(s)\n", flags()->sleep_after_init); - SleepForSeconds(flags()->sleep_after_init); - } + WaitForDebugger(flags()->sleep_after_init, "after init"); } // Initialize as requested from some part of ASan runtime library (interceptors, diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common.cpp index e30a93da5b598d..0b7401f925f2f8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.cpp @@ -351,6 +351,13 @@ void SleepForSeconds(unsigned seconds) { } void SleepForMillis(unsigned millis) { internal_usleep((u64)millis * 1000); } +void WaitForDebugger(unsigned seconds, const char *label) { + if (seconds) { + Report("Sleeping for %u second(s) %s\n", seconds, label); + SleepForSeconds(seconds); + } +} + } // namespace __sanitizer using namespace __sanitizer; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 17570d60688560..8648a5b174def7 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -294,6 +294,7 @@ void InitTlsSize(); uptr GetTlsSize(); // Other +void WaitForDebugger(unsigned seconds, const char *label); void SleepForSeconds(unsigned seconds); void SleepForMillis(unsigned millis); u64 NanoTime(); diff --git a/compiler-rt/test/asan/TestCases/sleep_after_init.c b/compiler-rt/test/asan/TestCases/sleep_after_init.c deleted file mode 100644 index 147af67c72ea76..00000000000000 --- a/compiler-rt/test/asan/TestCases/sleep_after_init.c +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %clang_asan -O2 %s -o %t -// RUN: %env_asan_opts=sleep_after_init=1 not %run %t 2>&1 | FileCheck %s - -#include -int main() { - // CHECK: Sleeping for 1 second - char *x = (char*)malloc(10 * sizeof(char)); - free(x); - return x[5]; -} diff --git a/compiler-rt/test/asan/TestCases/sleep_before_dying.c b/compiler-rt/test/asan/TestCases/sleep_before_dying.c deleted file mode 100644 index 8a50218b19dd2e..00000000000000 --- a/compiler-rt/test/asan/TestCases/sleep_before_dying.c +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %clang_asan -O2 %s -o %t -// RUN: %env_asan_opts=sleep_before_dying=1 not %run %t 2>&1 | FileCheck %s - -#include -int main() { - char *x = (char*)malloc(10 * sizeof(char)); - free(x); - return x[5]; - // CHECK: Sleeping for 1 second -} diff --git a/compiler-rt/test/asan/TestCases/sleep_for_debugger.c b/compiler-rt/test/asan/TestCases/sleep_for_debugger.c new file mode 100644 index 00000000000000..477708d7abf71b --- /dev/null +++ b/compiler-rt/test/asan/TestCases/sleep_for_debugger.c @@ -0,0 +1,17 @@ +// RUN: %clang_asan -O2 %s -o %t +// RUN: %env_asan_opts=verbosity=1:sleep_before_init=1:sleep_after_init=1:sleep_before_dying=1 not %run %t 2>&1 | FileCheck %s + +#include + +int main() { + char *x = (char*)malloc(10 * sizeof(char)); + free(x); + return x[5]; +} + +// CHECK: Sleeping for 1 second(s) before init +// CHECK: AddressSanitizer Init done +// CHECK: Sleeping for 1 second(s) after init +// CHECK: ERROR: AddressSanitizer +// CHECK: ABORTING +// CHECK: Sleeping for 1 second(s) before dying From 3e5b1e9ccfae859bc0248c2568fbda05585ba823 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 19 May 2022 20:59:26 -0700 Subject: [PATCH 052/908] [RISCV] Add test showing codegen for unaligned loads and stores of scalar types --- .../CodeGen/RISCV/unaligned-load-store.ll | 221 ++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/unaligned-load-store.ll diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll new file mode 100644 index 00000000000000..3bf0d0a0744e48 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -0,0 +1,221 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=BOTH,RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=BOTH,RV64I %s + +; A collection of cases showing codegen for unaligned loads and stores + +define i8 @load_i8(i8* %p) { +; BOTH-LABEL: load_i8: +; BOTH: # %bb.0: +; BOTH-NEXT: lb a0, 0(a0) +; BOTH-NEXT: ret + %res = load i8, i8* %p, align 1 + ret i8 %res +} + +define i16 @load_i16(i16* %p) { +; BOTH-LABEL: load_i16: +; BOTH: # %bb.0: +; BOTH-NEXT: lb a1, 1(a0) +; BOTH-NEXT: lbu a0, 0(a0) +; BOTH-NEXT: slli a1, a1, 8 +; BOTH-NEXT: or a0, a1, a0 +; BOTH-NEXT: ret + %res = load i16, i16* %p, align 1 + ret i16 %res +} + +define i24 @load_i24(i24* %p) { +; BOTH-LABEL: load_i24: +; BOTH: # %bb.0: +; BOTH-NEXT: lbu a1, 1(a0) +; BOTH-NEXT: lbu a2, 0(a0) +; BOTH-NEXT: lb a0, 2(a0) +; BOTH-NEXT: slli a1, a1, 8 +; BOTH-NEXT: or a1, a1, a2 +; BOTH-NEXT: slli a0, a0, 16 +; BOTH-NEXT: or a0, a1, a0 +; BOTH-NEXT: ret + %res = load i24, i24* %p, align 1 + ret i24 %res +} + +define i32 @load_i32(i32* %p) { +; RV32I-LABEL: load_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: lbu a1, 1(a0) +; RV32I-NEXT: lbu a2, 0(a0) +; RV32I-NEXT: lbu a3, 3(a0) +; RV32I-NEXT: lbu a0, 2(a0) +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a2, a3, 8 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: load_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a1, 1(a0) +; RV64I-NEXT: lbu a2, 0(a0) +; RV64I-NEXT: lb a3, 3(a0) +; RV64I-NEXT: lbu a0, 2(a0) +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a3, 8 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret + %res = load i32, i32* %p, align 1 + ret i32 %res +} + +define i64 @load_i64(i64* %p) { +; RV32I-LABEL: load_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: lbu a1, 1(a0) +; RV32I-NEXT: lbu a2, 0(a0) +; RV32I-NEXT: lbu a3, 3(a0) +; RV32I-NEXT: lbu a4, 2(a0) +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a2, a3, 8 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a2, a2, 16 +; RV32I-NEXT: or a2, a2, a1 +; RV32I-NEXT: lbu a1, 5(a0) +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: lbu a4, 7(a0) +; RV32I-NEXT: lbu a0, 6(a0) +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a3, a4, 8 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: load_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a1, 1(a0) +; RV64I-NEXT: lbu a2, 0(a0) +; RV64I-NEXT: lbu a3, 3(a0) +; RV64I-NEXT: lbu a4, 2(a0) +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a3, 8 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: lbu a2, 5(a0) +; RV64I-NEXT: lbu a3, 4(a0) +; RV64I-NEXT: lbu a4, 7(a0) +; RV64I-NEXT: lbu a0, 6(a0) +; RV64I-NEXT: slli a2, a2, 8 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a3, a4, 8 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret + %res = load i64, i64* %p, align 1 + ret i64 %res +} + +define void @store_i8(i8* %p, i8 %v) { +; BOTH-LABEL: store_i8: +; BOTH: # %bb.0: +; BOTH-NEXT: sb a1, 0(a0) +; BOTH-NEXT: ret + store i8 %v, i8* %p, align 1 + ret void +} + +define void @store_i16(i16* %p, i16 %v) { +; BOTH-LABEL: store_i16: +; BOTH: # %bb.0: +; BOTH-NEXT: sb a1, 0(a0) +; BOTH-NEXT: srli a1, a1, 8 +; BOTH-NEXT: sb a1, 1(a0) +; BOTH-NEXT: ret + store i16 %v, i16* %p, align 1 + ret void +} + +define void @store_i24(i24* %p, i24 %v) { +; BOTH-LABEL: store_i24: +; BOTH: # %bb.0: +; BOTH-NEXT: sb a1, 0(a0) +; BOTH-NEXT: srli a2, a1, 8 +; BOTH-NEXT: sb a2, 1(a0) +; BOTH-NEXT: srli a1, a1, 16 +; BOTH-NEXT: sb a1, 2(a0) +; BOTH-NEXT: ret + store i24 %v, i24* %p, align 1 + ret void +} + +define void @store_i32(i32* %p, i32 %v) { +; BOTH-LABEL: store_i32: +; BOTH: # %bb.0: +; BOTH-NEXT: sb a1, 0(a0) +; BOTH-NEXT: srli a2, a1, 24 +; BOTH-NEXT: sb a2, 3(a0) +; BOTH-NEXT: srli a2, a1, 16 +; BOTH-NEXT: sb a2, 2(a0) +; BOTH-NEXT: srli a1, a1, 8 +; BOTH-NEXT: sb a1, 1(a0) +; BOTH-NEXT: ret + store i32 %v, i32* %p, align 1 + ret void +} + +define void @store_i64(i64* %p, i64 %v) { +; RV32I-LABEL: store_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: sb a2, 4(a0) +; RV32I-NEXT: sb a1, 0(a0) +; RV32I-NEXT: srli a3, a2, 24 +; RV32I-NEXT: sb a3, 7(a0) +; RV32I-NEXT: srli a3, a2, 16 +; RV32I-NEXT: sb a3, 6(a0) +; RV32I-NEXT: srli a2, a2, 8 +; RV32I-NEXT: sb a2, 5(a0) +; RV32I-NEXT: srli a2, a1, 24 +; RV32I-NEXT: sb a2, 3(a0) +; RV32I-NEXT: srli a2, a1, 16 +; RV32I-NEXT: sb a2, 2(a0) +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 1(a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: store_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: sb a1, 0(a0) +; RV64I-NEXT: srli a2, a1, 56 +; RV64I-NEXT: sb a2, 7(a0) +; RV64I-NEXT: srli a2, a1, 48 +; RV64I-NEXT: sb a2, 6(a0) +; RV64I-NEXT: srli a2, a1, 40 +; RV64I-NEXT: sb a2, 5(a0) +; RV64I-NEXT: srli a2, a1, 32 +; RV64I-NEXT: sb a2, 4(a0) +; RV64I-NEXT: srli a2, a1, 24 +; RV64I-NEXT: sb a2, 3(a0) +; RV64I-NEXT: srli a2, a1, 16 +; RV64I-NEXT: sb a2, 2(a0) +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: sb a1, 1(a0) +; RV64I-NEXT: ret + store i64 %v, i64* %p, align 1 + ret void +} + + From ea4864007c72bfe1523013e28ceae4e391b66afc Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 19 May 2022 13:07:20 -0700 Subject: [PATCH 053/908] [lldb] Fix 'ptsname_r' is only available on macOS 10.13.4 or newer A deployment target less than 10.13.4 causes an error saying that 'ptsname_r' is only available on macOS 10.13.4 or newer. The current logic only checks if the symbol is available and doesn't account for the deployment target. This patch fixes that by adding an availability check. Differential revision: https://reviews.llvm.org/D125995 --- lldb/source/Host/common/PseudoTerminal.cpp | 38 +++++++++++++++------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/lldb/source/Host/common/PseudoTerminal.cpp b/lldb/source/Host/common/PseudoTerminal.cpp index dce4c5185c7b64..13c82e8b1ea91e 100644 --- a/lldb/source/Host/common/PseudoTerminal.cpp +++ b/lldb/source/Host/common/PseudoTerminal.cpp @@ -22,6 +22,10 @@ #include "lldb/Host/PosixApi.h" +#if defined(__APPLE__) +#include +#endif + #if defined(__ANDROID__) int posix_openpt(int flags); #endif @@ -99,21 +103,33 @@ llvm::Error PseudoTerminal::OpenSecondary(int oflag) { std::error_code(errno, std::generic_category())); } -std::string PseudoTerminal::GetSecondaryName() const { - assert(m_primary_fd >= 0); -#if HAVE_PTSNAME_R - char buf[PATH_MAX]; - buf[0] = '\0'; - int r = ptsname_r(m_primary_fd, buf, sizeof(buf)); - (void)r; - assert(r == 0); - return buf; -#else +static std::string use_ptsname(int fd) { static std::mutex mutex; std::lock_guard guard(mutex); - const char *r = ptsname(m_primary_fd); + const char *r = ptsname(fd); assert(r != nullptr); return r; +} + +std::string PseudoTerminal::GetSecondaryName() const { + assert(m_primary_fd >= 0); +#if HAVE_PTSNAME_R +#if defined(__APPLE__) + if (__builtin_available(macos 10.13.4, iOS 11.3, tvOS 11.3, watchOS 4.4, *)) { +#endif + char buf[PATH_MAX]; + buf[0] = '\0'; + int r = ptsname_r(m_primary_fd, buf, sizeof(buf)); + (void)r; + assert(r == 0); + return buf; +#if defined(__APPLE__) + } else { + return use_ptsname(m_primary_fd); + } +#endif +#else + return use_ptsname(m_primary_fd); #endif } From b9a30b69d814971de5bd90a134b17b5954a8a2b4 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 19 May 2022 13:38:18 -0700 Subject: [PATCH 054/908] [lldb] Update test_software_breakpoint_set_and_remove_work for AS On Apple Silicon the platform arch is arm64 rather than AArch64. --- lldb/test/API/tools/lldb-server/TestLldbGdbServer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py index c6ff42a9a522f3..8b3c8f0d914ba3 100644 --- a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py +++ b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py @@ -812,7 +812,7 @@ def breakpoint_set_and_remove_work(self, want_hardware): target_arch = self.getArchitecture() # Set the breakpoint. - if (target_arch == "arm") or (target_arch == "aarch64"): + if target_arch in ["arm", "arm64", "aarch64"]: # TODO: Handle case when setting breakpoint in thumb code BREAKPOINT_KIND = 4 else: From 86803008eabb8be14867ce2d41ef22e659a1a70c Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Fri, 20 May 2022 05:55:36 +0100 Subject: [PATCH 055/908] [MIR] Provide location of extra instruction operand when diagnosing it. Also resolves misspelled FileCheck directives caught with D125604. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D125965 --- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 2 +- llvm/test/CodeGen/MIR/AMDGPU/extra-imm-operand.mir | 9 +++++---- llvm/test/CodeGen/MIR/AMDGPU/extra-reg-operand.mir | 9 +++++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 52ba26098600aa..aed64d08dd92ba 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1100,7 +1100,7 @@ bool MIParser::parse(MachineInstr *&MI) { if (!IsImplicitOp) { if (!MCID.isVariadic() && NumExplicitOps >= MCID.getNumOperands() && !Operand.Operand.isValidExcessOperand()) - return error("too many operands for instruction"); + return error(Operand.Begin, "too many operands for instruction"); ++NumExplicitOps; } diff --git a/llvm/test/CodeGen/MIR/AMDGPU/extra-imm-operand.mir b/llvm/test/CodeGen/MIR/AMDGPU/extra-imm-operand.mir index db484f0798fcf2..65cb32cdd9aebf 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/extra-imm-operand.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/extra-imm-operand.mir @@ -1,12 +1,13 @@ -# RUN: not llc -march=amdgcn -run-pass=none -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not llc -march=amdgcn -run-pass=none -o /dev/null %s 2>&1 | \ +# RUN: FileCheck --strict-whitespace %s --- name: extra_imm_operand body: | bb.0: - ; CHECK: [[@LINE+3]]:18: too many operands for instruction - ; CHECK-NEXT: S_ENDPGM 0, 0 - ; CHECK_NEXT: ^ + ; CHECK: [[@LINE+3]]:17: too many operands for instruction + ; CHECK-NEXT: {{^}} S_ENDPGM 0, 0 + ; CHECK-NEXT: {{^}} ^ S_ENDPGM 0, 0 ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/extra-reg-operand.mir b/llvm/test/CodeGen/MIR/AMDGPU/extra-reg-operand.mir index 03a6777167fa63..d6b2522542a82d 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/extra-reg-operand.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/extra-reg-operand.mir @@ -1,12 +1,13 @@ -# RUN: not llc -march=amdgcn -run-pass=none -o /dev/null %s 2>&1 | FileCheck %s +# RUN: not llc -march=amdgcn -run-pass=none -o /dev/null %s 2>&1 | \ +# RUN: FileCheck --strict-whitespace %s --- name: extra_reg_operand body: | bb.0: - ; CHECK: [[@LINE+3]]:29: too many operands for instruction - ; S_ENDPGM 0, undef $vgpr0 - ; CHECK_NEXT: ^ + ; CHECK: [[@LINE+3]]:17: too many operands for instruction + ; CHECK-NEXT: {{^}} S_ENDPGM 0, undef $vgpr0 + ; CHECK-NEXT: {{^}} ^ S_ENDPGM 0, undef $vgpr0 ... From da201aa4242ebb06209296962a28f89297d0c9a1 Mon Sep 17 00:00:00 2001 From: eopXD Date: Thu, 19 May 2022 23:23:34 -0700 Subject: [PATCH 056/908] [RISCV][NFC] Remove `*=` operator for LMULType LMULType always manipulate on Log2LMUL, let all manipulations go through LMULType::MulLog2LMUL. Reviewed By: khchen Differential Revision: https://reviews.llvm.org/D126042 --- clang/include/clang/Support/RISCVVIntrinsicUtils.h | 1 - clang/lib/Support/RISCVVIntrinsicUtils.cpp | 12 +++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h index f1f06ba60786f9..2de7f855559f0e 100644 --- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h +++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h @@ -156,7 +156,6 @@ struct LMULType { std::string str() const; llvm::Optional getScale(unsigned ElementBitwidth) const; void MulLog2LMUL(int Log2LMUL); - LMULType &operator*=(uint32_t RHS); }; class RVVType; diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp index 9f5c1ffe202218..43189f6cf9f50a 100644 --- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp +++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp @@ -77,12 +77,6 @@ VScaleVal LMULType::getScale(unsigned ElementBitwidth) const { void LMULType::MulLog2LMUL(int log2LMUL) { Log2LMUL += log2LMUL; } -LMULType &LMULType::operator*=(uint32_t RHS) { - assert(isPowerOf2_32(RHS)); - this->Log2LMUL = this->Log2LMUL + Log2_32(RHS); - return *this; -} - RVVType::RVVType(BasicType BT, int Log2LMUL, const PrototypeDescriptor &prototype) : BT(BT), LMUL(LMULType(Log2LMUL)) { @@ -628,17 +622,17 @@ void RVVType::applyModifier(const PrototypeDescriptor &Transformer) { switch (static_cast(Transformer.VTM)) { case VectorTypeModifier::Widening2XVector: ElementBitwidth *= 2; - LMUL *= 2; + LMUL.MulLog2LMUL(1); Scale = LMUL.getScale(ElementBitwidth); break; case VectorTypeModifier::Widening4XVector: ElementBitwidth *= 4; - LMUL *= 4; + LMUL.MulLog2LMUL(2); Scale = LMUL.getScale(ElementBitwidth); break; case VectorTypeModifier::Widening8XVector: ElementBitwidth *= 8; - LMUL *= 8; + LMUL.MulLog2LMUL(3); Scale = LMUL.getScale(ElementBitwidth); break; case VectorTypeModifier::MaskVector: From 8fc4fcecb8cedffcbf5fdd564c5a3427274db4e2 Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Wed, 27 Apr 2022 07:20:31 +0000 Subject: [PATCH 057/908] [RISCV] Add VL patterns for vector widening floating-point fused multiply-add instructions. This patch adds VL patterns for vector widening floating-point fused multiply-add instructions to support fixed length vector type. Differential Revision: https://reviews.llvm.org/D124505 --- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 162 ++ .../RISCV/rvv/fixed-vectors-vfwmacc.ll | 1312 +++++++++++++++++ 2 files changed, 1474 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 47c3375b6470bc..15ad6424b19dc2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -900,6 +900,162 @@ multiclass VPatNarrowShiftSplat_WX_WI { } } +multiclass VPatWidenFPMulAccVL_VV_VF { + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(riscv_fma_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), VLOpFrag)), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag)), + (wti.Vector wti.RegClass:$rd), (vti.Mask true_mask), + VLOpFrag), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_fma_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag)), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag)), + (wti.Vector wti.RegClass:$rd), (vti.Mask true_mask), + VLOpFrag), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + +multiclass VPatWidenFPNegMulAccVL_VV_VF { + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(riscv_fma_vl + (riscv_fneg_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), VLOpFrag)), + (wti.Mask true_mask), VLOpFrag), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), + (riscv_fneg_vl wti.RegClass:$rd, (wti.Mask true_mask), + VLOpFrag), + (vti.Mask true_mask), VLOpFrag), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_fma_vl + (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag), + (riscv_fneg_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag)), + (vti.Mask true_mask), VLOpFrag), + (riscv_fneg_vl wti.RegClass:$rd, (wti.Mask true_mask), + VLOpFrag), + (vti.Mask true_mask), VLOpFrag), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_fma_vl + (riscv_fneg_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag)), + (vti.Mask true_mask), VLOpFrag), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), + (riscv_fneg_vl wti.RegClass:$rd, (wti.Mask true_mask), + VLOpFrag), + (vti.Mask true_mask), VLOpFrag), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + +multiclass VPatWidenFPMulSacVL_VV_VF { + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(riscv_fma_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), VLOpFrag)), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), + (riscv_fneg_vl wti.RegClass:$rd, (vti.Mask true_mask), + VLOpFrag), + (vti.Mask true_mask), VLOpFrag), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_fma_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag)), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), + (riscv_fneg_vl wti.RegClass:$rd, (vti.Mask true_mask), + VLOpFrag), + (vti.Mask true_mask), VLOpFrag), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + +multiclass VPatWidenFPNegMulSacVL_VV_VF { + foreach vtiToWti = AllWidenableFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + def : Pat<(riscv_fma_vl + (riscv_fneg_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), VLOpFrag)), + (vti.Mask true_mask), VLOpFrag), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), + wti.RegClass:$rd, (wti.Mask true_mask), VLOpFrag), + (!cast(instruction_name#"_VV_"#vti.LMul.MX) + wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_fma_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag)), + (riscv_fneg_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag)), + (wti.Mask true_mask), VLOpFrag), + wti.RegClass:$rd, (wti.Mask true_mask), VLOpFrag), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_fma_vl + (riscv_fneg_vl + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag)), + (vti.Mask true_mask), VLOpFrag), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), + wti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag), + (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -1408,6 +1564,12 @@ foreach vti = AllFloatVectors in { (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } +// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +defm : VPatWidenFPMulAccVL_VV_VF<"PseudoVFWMACC">; +defm : VPatWidenFPNegMulAccVL_VV_VF<"PseudoVFWNMACC">; +defm : VPatWidenFPMulSacVL_VV_VF<"PseudoVFWMSAC">; +defm : VPatWidenFPNegMulSacVL_VV_VF<"PseudoVFWNMSAC">; + // 14.11. Vector Floating-Point MIN/MAX Instructions defm : VPatBinaryFPVL_VV_VF; defm : VPatBinaryFPVL_VV_VF; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll new file mode 100644 index 00000000000000..ce2415220a149e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll @@ -0,0 +1,1312 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s + +declare <1 x float> @llvm.fma.v1f32(<1 x float>, <1 x float>, <1 x float>) + +define <1 x float> @vfwmacc_vv_nxv1f32(<1 x float> %va, <1 x half> %vb, <1 x half> %vc) { +; CHECK-LABEL: vfwmacc_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %vc to <1 x float> + %vf = call <1 x float> @llvm.fma.v1f32(<1 x float> %vd, <1 x float> %ve, <1 x float> %va) + ret <1 x float> %vf +} + +define <1 x float> @vfwmacc_vf_nxv1f32(<1 x float> %va, <1 x half> %vb, half %c) { +; CHECK-LABEL: vfwmacc_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x half> poison, half %c, i32 0 + %splat = shufflevector <1 x half> %head, <1 x half> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %splat to <1 x float> + %vf = call <1 x float> @llvm.fma.v1f32(<1 x float> %vd, <1 x float> %ve, <1 x float> %va) + ret <1 x float> %vf +} + +define <1 x float> @vfwnmacc_vv_nxv1f32(<1 x float> %va, <1 x half> %vb, <1 x half> %vc) { +; CHECK-LABEL: vfwnmacc_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %vc to <1 x float> + %vf = fneg <1 x float> %va + %vg = fneg <1 x float> %vd + %vh = call <1 x float> @llvm.fma.v1f32(<1 x float> %vg, <1 x float> %ve, <1 x float> %vf) + ret <1 x float> %vh +} + +define <1 x float> @vfwnmacc_vf_nxv1f32(<1 x float> %va, <1 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x half> poison, half %c, i32 0 + %splat = shufflevector <1 x half> %head, <1 x half> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %splat to <1 x float> + %vf = fneg <1 x float> %va + %vg = fneg <1 x float> %vd + %vh = call <1 x float> @llvm.fma.v1f32(<1 x float> %vg, <1 x float> %ve, <1 x float> %vf) + ret <1 x float> %vh +} + +define <1 x float> @vfwnmacc_fv_nxv1f32(<1 x float> %va, <1 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_fv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x half> poison, half %c, i32 0 + %splat = shufflevector <1 x half> %head, <1 x half> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %splat to <1 x float> + %vf = fneg <1 x float> %va + %vg = fneg <1 x float> %ve + %vh = call <1 x float> @llvm.fma.v1f32(<1 x float> %vd, <1 x float> %vg, <1 x float> %vf) + ret <1 x float> %vh +} + +define <1 x float> @vfwmsac_vv_nxv1f32(<1 x float> %va, <1 x half> %vb, <1 x half> %vc) { +; CHECK-LABEL: vfwmsac_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %vc to <1 x float> + %vf = fneg <1 x float> %va + %vg = call <1 x float> @llvm.fma.v1f32(<1 x float> %vd, <1 x float> %ve, <1 x float> %vf) + ret <1 x float> %vg +} + +define <1 x float> @vfwmsac_vf_nxv1f32(<1 x float> %va, <1 x half> %vb, half %c) { +; CHECK-LABEL: vfwmsac_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x half> poison, half %c, i32 0 + %splat = shufflevector <1 x half> %head, <1 x half> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %splat to <1 x float> + %vf = fneg <1 x float> %va + %vg = call <1 x float> @llvm.fma.v1f32(<1 x float> %vd, <1 x float> %ve, <1 x float> %vf) + ret <1 x float> %vg +} + +define <1 x float> @vfwnmsac_vv_nxv1f32(<1 x float> %va, <1 x half> %vb, <1 x half> %vc) { +; CHECK-LABEL: vfwnmsac_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %vc to <1 x float> + %vf = fneg <1 x float> %vd + %vg = call <1 x float> @llvm.fma.v1f32(<1 x float> %vf, <1 x float> %ve, <1 x float> %va) + ret <1 x float> %vg +} + +define <1 x float> @vfwnmsac_vf_nxv1f32(<1 x float> %va, <1 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x half> poison, half %c, i32 0 + %splat = shufflevector <1 x half> %head, <1 x half> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %splat to <1 x float> + %vf = fneg <1 x float> %vd + %vg = call <1 x float> @llvm.fma.v1f32(<1 x float> %vf, <1 x float> %ve, <1 x float> %va) + ret <1 x float> %vg +} + +define <1 x float> @vfwnmsac_fv_nxv1f32(<1 x float> %va, <1 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_fv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x half> poison, half %c, i32 0 + %splat = shufflevector <1 x half> %head, <1 x half> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x half> %vb to <1 x float> + %ve = fpext <1 x half> %splat to <1 x float> + %vf = fneg <1 x float> %ve + %vg = call <1 x float> @llvm.fma.v1f32(<1 x float> %vd, <1 x float> %vf, <1 x float> %va) + ret <1 x float> %vg +} + +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) + +define <2 x float> @vfwmacc_vv_nxv2f32(<2 x float> %va, <2 x half> %vb, <2 x half> %vc) { +; CHECK-LABEL: vfwmacc_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %vc to <2 x float> + %vf = call <2 x float> @llvm.fma.v2f32(<2 x float> %vd, <2 x float> %ve, <2 x float> %va) + ret <2 x float> %vf +} + +define <2 x float> @vfwmacc_vf_nxv2f32(<2 x float> %va, <2 x half> %vb, half %c) { +; CHECK-LABEL: vfwmacc_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x half> poison, half %c, i32 0 + %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %splat to <2 x float> + %vf = call <2 x float> @llvm.fma.v2f32(<2 x float> %vd, <2 x float> %ve, <2 x float> %va) + ret <2 x float> %vf +} + +define <2 x float> @vfwnmacc_vv_nxv2f32(<2 x float> %va, <2 x half> %vb, <2 x half> %vc) { +; CHECK-LABEL: vfwnmacc_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %vc to <2 x float> + %vf = fneg <2 x float> %va + %vg = fneg <2 x float> %vd + %vh = call <2 x float> @llvm.fma.v2f32(<2 x float> %vg, <2 x float> %ve, <2 x float> %vf) + ret <2 x float> %vh +} + +define <2 x float> @vfwnmacc_vf_nxv2f32(<2 x float> %va, <2 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x half> poison, half %c, i32 0 + %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %splat to <2 x float> + %vf = fneg <2 x float> %va + %vg = fneg <2 x float> %vd + %vh = call <2 x float> @llvm.fma.v2f32(<2 x float> %vg, <2 x float> %ve, <2 x float> %vf) + ret <2 x float> %vh +} + +define <2 x float> @vfwnmacc_fv_nxv2f32(<2 x float> %va, <2 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_fv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x half> poison, half %c, i32 0 + %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %splat to <2 x float> + %vf = fneg <2 x float> %va + %vg = fneg <2 x float> %ve + %vh = call <2 x float> @llvm.fma.v2f32(<2 x float> %vd, <2 x float> %vg, <2 x float> %vf) + ret <2 x float> %vh +} + +define <2 x float> @vfwmsac_vv_nxv2f32(<2 x float> %va, <2 x half> %vb, <2 x half> %vc) { +; CHECK-LABEL: vfwmsac_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %vc to <2 x float> + %vf = fneg <2 x float> %va + %vg = call <2 x float> @llvm.fma.v2f32(<2 x float> %vd, <2 x float> %ve, <2 x float> %vf) + ret <2 x float> %vg +} + +define <2 x float> @vfwmsac_vf_nxv2f32(<2 x float> %va, <2 x half> %vb, half %c) { +; CHECK-LABEL: vfwmsac_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x half> poison, half %c, i32 0 + %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %splat to <2 x float> + %vf = fneg <2 x float> %va + %vg = call <2 x float> @llvm.fma.v2f32(<2 x float> %vd, <2 x float> %ve, <2 x float> %vf) + ret <2 x float> %vg +} + +define <2 x float> @vfwnmsac_vv_nxv2f32(<2 x float> %va, <2 x half> %vb, <2 x half> %vc) { +; CHECK-LABEL: vfwnmsac_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %vc to <2 x float> + %vf = fneg <2 x float> %vd + %vg = call <2 x float> @llvm.fma.v2f32(<2 x float> %vf, <2 x float> %ve, <2 x float> %va) + ret <2 x float> %vg +} + +define <2 x float> @vfwnmsac_vf_nxv2f32(<2 x float> %va, <2 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x half> poison, half %c, i32 0 + %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %splat to <2 x float> + %vf = fneg <2 x float> %vd + %vg = call <2 x float> @llvm.fma.v2f32(<2 x float> %vf, <2 x float> %ve, <2 x float> %va) + ret <2 x float> %vg +} + +define <2 x float> @vfwnmsac_fv_nxv2f32(<2 x float> %va, <2 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_fv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x half> poison, half %c, i32 0 + %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x half> %vb to <2 x float> + %ve = fpext <2 x half> %splat to <2 x float> + %vf = fneg <2 x float> %ve + %vg = call <2 x float> @llvm.fma.v2f32(<2 x float> %vd, <2 x float> %vf, <2 x float> %va) + ret <2 x float> %vg +} + + +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @vfwmacc_vv_nxv4f32(<4 x float> %va, <4 x half> %vb, <4 x half> %vc) { +; CHECK-LABEL: vfwmacc_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %vc to <4 x float> + %vf = call <4 x float> @llvm.fma.v4f32(<4 x float> %vd, <4 x float> %ve, <4 x float> %va) + ret <4 x float> %vf +} + +define <4 x float> @vfwmacc_vf_nxv4f32(<4 x float> %va, <4 x half> %vb, half %c) { +; CHECK-LABEL: vfwmacc_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x half> poison, half %c, i32 0 + %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %splat to <4 x float> + %vf = call <4 x float> @llvm.fma.v4f32(<4 x float> %vd, <4 x float> %ve, <4 x float> %va) + ret <4 x float> %vf +} + +define <4 x float> @vfwnmacc_vv_nxv4f32(<4 x float> %va, <4 x half> %vb, <4 x half> %vc) { +; CHECK-LABEL: vfwnmacc_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %vc to <4 x float> + %vf = fneg <4 x float> %va + %vg = fneg <4 x float> %vd + %vh = call <4 x float> @llvm.fma.v4f32(<4 x float> %vg, <4 x float> %ve, <4 x float> %vf) + ret <4 x float> %vh +} + +define <4 x float> @vfwnmacc_vf_nxv4f32(<4 x float> %va, <4 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x half> poison, half %c, i32 0 + %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %splat to <4 x float> + %vf = fneg <4 x float> %va + %vg = fneg <4 x float> %vd + %vh = call <4 x float> @llvm.fma.v4f32(<4 x float> %vg, <4 x float> %ve, <4 x float> %vf) + ret <4 x float> %vh +} + +define <4 x float> @vfwnmacc_fv_nxv4f32(<4 x float> %va, <4 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_fv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x half> poison, half %c, i32 0 + %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %splat to <4 x float> + %vf = fneg <4 x float> %va + %vg = fneg <4 x float> %ve + %vh = call <4 x float> @llvm.fma.v4f32(<4 x float> %vd, <4 x float> %vg, <4 x float> %vf) + ret <4 x float> %vh +} + +define <4 x float> @vfwmsac_vv_nxv4f32(<4 x float> %va, <4 x half> %vb, <4 x half> %vc) { +; CHECK-LABEL: vfwmsac_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %vc to <4 x float> + %vf = fneg <4 x float> %va + %vg = call <4 x float> @llvm.fma.v4f32(<4 x float> %vd, <4 x float> %ve, <4 x float> %vf) + ret <4 x float> %vg +} + +define <4 x float> @vfwmsac_vf_nxv4f32(<4 x float> %va, <4 x half> %vb, half %c) { +; CHECK-LABEL: vfwmsac_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x half> poison, half %c, i32 0 + %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %splat to <4 x float> + %vf = fneg <4 x float> %va + %vg = call <4 x float> @llvm.fma.v4f32(<4 x float> %vd, <4 x float> %ve, <4 x float> %vf) + ret <4 x float> %vg +} + +define <4 x float> @vfwnmsac_vv_nxv4f32(<4 x float> %va, <4 x half> %vb, <4 x half> %vc) { +; CHECK-LABEL: vfwnmsac_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %vc to <4 x float> + %vf = fneg <4 x float> %vd + %vg = call <4 x float> @llvm.fma.v4f32(<4 x float> %vf, <4 x float> %ve, <4 x float> %va) + ret <4 x float> %vg +} + +define <4 x float> @vfwnmsac_vf_nxv4f32(<4 x float> %va, <4 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x half> poison, half %c, i32 0 + %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %splat to <4 x float> + %vf = fneg <4 x float> %vd + %vg = call <4 x float> @llvm.fma.v4f32(<4 x float> %vf, <4 x float> %ve, <4 x float> %va) + ret <4 x float> %vg +} + +define <4 x float> @vfwnmsac_fv_nxv4f32(<4 x float> %va, <4 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_fv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x half> poison, half %c, i32 0 + %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x half> %vb to <4 x float> + %ve = fpext <4 x half> %splat to <4 x float> + %vf = fneg <4 x float> %ve + %vg = call <4 x float> @llvm.fma.v4f32(<4 x float> %vd, <4 x float> %vf, <4 x float> %va) + ret <4 x float> %vg +} + +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) + +define <8 x float> @vfwmacc_vv_nxv8f32(<8 x float> %va, <8 x half> %vb, <8 x half> %vc) { +; CHECK-LABEL: vfwmacc_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwmacc.vv v8, v10, v11 +; CHECK-NEXT: ret + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %vc to <8 x float> + %vf = call <8 x float> @llvm.fma.v8f32(<8 x float> %vd, <8 x float> %ve, <8 x float> %va) + ret <8 x float> %vf +} + +define <8 x float> @vfwmacc_vf_nxv8f32(<8 x float> %va, <8 x half> %vb, half %c) { +; CHECK-LABEL: vfwmacc_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x half> poison, half %c, i32 0 + %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %splat to <8 x float> + %vf = call <8 x float> @llvm.fma.v8f32(<8 x float> %vd, <8 x float> %ve, <8 x float> %va) + ret <8 x float> %vf +} + +define <8 x float> @vfwnmacc_vv_nxv8f32(<8 x float> %va, <8 x half> %vb, <8 x half> %vc) { +; CHECK-LABEL: vfwnmacc_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwnmacc.vv v8, v10, v11 +; CHECK-NEXT: ret + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %vc to <8 x float> + %vf = fneg <8 x float> %va + %vg = fneg <8 x float> %vd + %vh = call <8 x float> @llvm.fma.v8f32(<8 x float> %vg, <8 x float> %ve, <8 x float> %vf) + ret <8 x float> %vh +} + +define <8 x float> @vfwnmacc_vf_nxv8f32(<8 x float> %va, <8 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x half> poison, half %c, i32 0 + %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %splat to <8 x float> + %vf = fneg <8 x float> %va + %vg = fneg <8 x float> %vd + %vh = call <8 x float> @llvm.fma.v8f32(<8 x float> %vg, <8 x float> %ve, <8 x float> %vf) + ret <8 x float> %vh +} + +define <8 x float> @vfwnmacc_fv_nxv8f32(<8 x float> %va, <8 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_fv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x half> poison, half %c, i32 0 + %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %splat to <8 x float> + %vf = fneg <8 x float> %va + %vg = fneg <8 x float> %ve + %vh = call <8 x float> @llvm.fma.v8f32(<8 x float> %vd, <8 x float> %vg, <8 x float> %vf) + ret <8 x float> %vh +} + +define <8 x float> @vfwmsac_vv_nxv8f32(<8 x float> %va, <8 x half> %vb, <8 x half> %vc) { +; CHECK-LABEL: vfwmsac_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwmsac.vv v8, v10, v11 +; CHECK-NEXT: ret + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %vc to <8 x float> + %vf = fneg <8 x float> %va + %vg = call <8 x float> @llvm.fma.v8f32(<8 x float> %vd, <8 x float> %ve, <8 x float> %vf) + ret <8 x float> %vg +} + +define <8 x float> @vfwmsac_vf_nxv8f32(<8 x float> %va, <8 x half> %vb, half %c) { +; CHECK-LABEL: vfwmsac_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x half> poison, half %c, i32 0 + %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %splat to <8 x float> + %vf = fneg <8 x float> %va + %vg = call <8 x float> @llvm.fma.v8f32(<8 x float> %vd, <8 x float> %ve, <8 x float> %vf) + ret <8 x float> %vg +} + +define <8 x float> @vfwnmsac_vv_nxv8f32(<8 x float> %va, <8 x half> %vb, <8 x half> %vc) { +; CHECK-LABEL: vfwnmsac_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwnmsac.vv v8, v10, v11 +; CHECK-NEXT: ret + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %vc to <8 x float> + %vf = fneg <8 x float> %vd + %vg = call <8 x float> @llvm.fma.v8f32(<8 x float> %vf, <8 x float> %ve, <8 x float> %va) + ret <8 x float> %vg +} + +define <8 x float> @vfwnmsac_vf_nxv8f32(<8 x float> %va, <8 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x half> poison, half %c, i32 0 + %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %splat to <8 x float> + %vf = fneg <8 x float> %vd + %vg = call <8 x float> @llvm.fma.v8f32(<8 x float> %vf, <8 x float> %ve, <8 x float> %va) + ret <8 x float> %vg +} + +define <8 x float> @vfwnmsac_fv_nxv8f32(<8 x float> %va, <8 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_fv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x half> poison, half %c, i32 0 + %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x half> %vb to <8 x float> + %ve = fpext <8 x half> %splat to <8 x float> + %vf = fneg <8 x float> %ve + %vg = call <8 x float> @llvm.fma.v8f32(<8 x float> %vd, <8 x float> %vf, <8 x float> %va) + ret <8 x float> %vg +} + +declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) + +define <16 x float> @vfwmacc_vv_nxv16f32(<16 x float> %va, <16 x half> %vb, <16 x half> %vc) { +; CHECK-LABEL: vfwmacc_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwmacc.vv v8, v12, v14 +; CHECK-NEXT: ret + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %vc to <16 x float> + %vf = call <16 x float> @llvm.fma.v16f32(<16 x float> %vd, <16 x float> %ve, <16 x float> %va) + ret <16 x float> %vf +} + +define <16 x float> @vfwmacc_vf_nxv16f32(<16 x float> %va, <16 x half> %vb, half %c) { +; CHECK-LABEL: vfwmacc_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x half> poison, half %c, i32 0 + %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %splat to <16 x float> + %vf = call <16 x float> @llvm.fma.v16f32(<16 x float> %vd, <16 x float> %ve, <16 x float> %va) + ret <16 x float> %vf +} + +define <16 x float> @vfwnmacc_vv_nxv16f32(<16 x float> %va, <16 x half> %vb, <16 x half> %vc) { +; CHECK-LABEL: vfwnmacc_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwnmacc.vv v8, v12, v14 +; CHECK-NEXT: ret + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %vc to <16 x float> + %vf = fneg <16 x float> %va + %vg = fneg <16 x float> %vd + %vh = call <16 x float> @llvm.fma.v16f32(<16 x float> %vg, <16 x float> %ve, <16 x float> %vf) + ret <16 x float> %vh +} + +define <16 x float> @vfwnmacc_vf_nxv16f32(<16 x float> %va, <16 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x half> poison, half %c, i32 0 + %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %splat to <16 x float> + %vf = fneg <16 x float> %va + %vg = fneg <16 x float> %vd + %vh = call <16 x float> @llvm.fma.v16f32(<16 x float> %vg, <16 x float> %ve, <16 x float> %vf) + ret <16 x float> %vh +} + +define <16 x float> @vfwnmacc_fv_nxv16f32(<16 x float> %va, <16 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmacc_fv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x half> poison, half %c, i32 0 + %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %splat to <16 x float> + %vf = fneg <16 x float> %va + %vg = fneg <16 x float> %ve + %vh = call <16 x float> @llvm.fma.v16f32(<16 x float> %vd, <16 x float> %vg, <16 x float> %vf) + ret <16 x float> %vh +} + +define <16 x float> @vfwmsac_vv_nxv16f32(<16 x float> %va, <16 x half> %vb, <16 x half> %vc) { +; CHECK-LABEL: vfwmsac_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwmsac.vv v8, v12, v14 +; CHECK-NEXT: ret + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %vc to <16 x float> + %vf = fneg <16 x float> %va + %vg = call <16 x float> @llvm.fma.v16f32(<16 x float> %vd, <16 x float> %ve, <16 x float> %vf) + ret <16 x float> %vg +} + +define <16 x float> @vfwmsac_vf_nxv16f32(<16 x float> %va, <16 x half> %vb, half %c) { +; CHECK-LABEL: vfwmsac_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x half> poison, half %c, i32 0 + %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %splat to <16 x float> + %vf = fneg <16 x float> %va + %vg = call <16 x float> @llvm.fma.v16f32(<16 x float> %vd, <16 x float> %ve, <16 x float> %vf) + ret <16 x float> %vg +} + +define <16 x float> @vfwnmsac_vv_nxv16f32(<16 x float> %va, <16 x half> %vb, <16 x half> %vc) { +; CHECK-LABEL: vfwnmsac_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwnmsac.vv v8, v12, v14 +; CHECK-NEXT: ret + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %vc to <16 x float> + %vf = fneg <16 x float> %vd + %vg = call <16 x float> @llvm.fma.v16f32(<16 x float> %vf, <16 x float> %ve, <16 x float> %va) + ret <16 x float> %vg +} + +define <16 x float> @vfwnmsac_vf_nxv16f32(<16 x float> %va, <16 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x half> poison, half %c, i32 0 + %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %splat to <16 x float> + %vf = fneg <16 x float> %vd + %vg = call <16 x float> @llvm.fma.v16f32(<16 x float> %vf, <16 x float> %ve, <16 x float> %va) + ret <16 x float> %vg +} + +define <16 x float> @vfwnmsac_fv_nxv16f32(<16 x float> %va, <16 x half> %vb, half %c) { +; CHECK-LABEL: vfwnmsac_fv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x half> poison, half %c, i32 0 + %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer + %vd = fpext <16 x half> %vb to <16 x float> + %ve = fpext <16 x half> %splat to <16 x float> + %vf = fneg <16 x float> %ve + %vg = call <16 x float> @llvm.fma.v16f32(<16 x float> %vd, <16 x float> %vf, <16 x float> %va) + ret <16 x float> %vg +} + +declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) + +define <1 x double> @vfwmacc_vv_nxv1f64(<1 x double> %va, <1 x float> %vb, <1 x float> %vc) { +; CHECK-LABEL: vfwmacc_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %vc to <1 x double> + %vf = call <1 x double> @llvm.fma.v1f64(<1 x double> %vd, <1 x double> %ve, <1 x double> %va) + ret <1 x double> %vf +} + +define <1 x double> @vfwmacc_vf_nxv1f64(<1 x double> %va, <1 x float> %vb, float %c) { +; CHECK-LABEL: vfwmacc_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x float> poison, float %c, i32 0 + %splat = shufflevector <1 x float> %head, <1 x float> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %splat to <1 x double> + %vf = call <1 x double> @llvm.fma.v1f64(<1 x double> %vd, <1 x double> %ve, <1 x double> %va) + ret <1 x double> %vf +} + +define <1 x double> @vfwnmacc_vv_nxv1f64(<1 x double> %va, <1 x float> %vb, <1 x float> %vc) { +; CHECK-LABEL: vfwnmacc_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %vc to <1 x double> + %vf = fneg <1 x double> %va + %vg = fneg <1 x double> %vd + %vh = call <1 x double> @llvm.fma.v1f64(<1 x double> %vg, <1 x double> %ve, <1 x double> %vf) + ret <1 x double> %vh +} + +define <1 x double> @vfwnmacc_vf_nxv1f64(<1 x double> %va, <1 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmacc_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x float> poison, float %c, i32 0 + %splat = shufflevector <1 x float> %head, <1 x float> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %splat to <1 x double> + %vf = fneg <1 x double> %va + %vg = fneg <1 x double> %vd + %vh = call <1 x double> @llvm.fma.v1f64(<1 x double> %vg, <1 x double> %ve, <1 x double> %vf) + ret <1 x double> %vh +} + +define <1 x double> @vfwnmacc_fv_nxv1f64(<1 x double> %va, <1 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmacc_fv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x float> poison, float %c, i32 0 + %splat = shufflevector <1 x float> %head, <1 x float> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %splat to <1 x double> + %vf = fneg <1 x double> %va + %vg = fneg <1 x double> %ve + %vh = call <1 x double> @llvm.fma.v1f64(<1 x double> %vd, <1 x double> %vg, <1 x double> %vf) + ret <1 x double> %vh +} + +define <1 x double> @vfwmsac_vv_nxv1f64(<1 x double> %va, <1 x float> %vb, <1 x float> %vc) { +; CHECK-LABEL: vfwmsac_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %vc to <1 x double> + %vf = fneg <1 x double> %va + %vg = call <1 x double> @llvm.fma.v1f64(<1 x double> %vd, <1 x double> %ve, <1 x double> %vf) + ret <1 x double> %vg +} + +define <1 x double> @vfwmsac_vf_nxv1f64(<1 x double> %va, <1 x float> %vb, float %c) { +; CHECK-LABEL: vfwmsac_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x float> poison, float %c, i32 0 + %splat = shufflevector <1 x float> %head, <1 x float> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %splat to <1 x double> + %vf = fneg <1 x double> %va + %vg = call <1 x double> @llvm.fma.v1f64(<1 x double> %vd, <1 x double> %ve, <1 x double> %vf) + ret <1 x double> %vg +} + +define <1 x double> @vfwnmsac_vv_nxv1f64(<1 x double> %va, <1 x float> %vb, <1 x float> %vc) { +; CHECK-LABEL: vfwnmsac_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %vc to <1 x double> + %vf = fneg <1 x double> %vd + %vg = call <1 x double> @llvm.fma.v1f64(<1 x double> %vf, <1 x double> %ve, <1 x double> %va) + ret <1 x double> %vg +} + +define <1 x double> @vfwnmsac_vf_nxv1f64(<1 x double> %va, <1 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmsac_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x float> poison, float %c, i32 0 + %splat = shufflevector <1 x float> %head, <1 x float> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %splat to <1 x double> + %vf = fneg <1 x double> %vd + %vg = call <1 x double> @llvm.fma.v1f64(<1 x double> %vf, <1 x double> %ve, <1 x double> %va) + ret <1 x double> %vg +} + +define <1 x double> @vfwnmsac_fv_nxv1f64(<1 x double> %va, <1 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmsac_fv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <1 x float> poison, float %c, i32 0 + %splat = shufflevector <1 x float> %head, <1 x float> poison, <1 x i32> zeroinitializer + %vd = fpext <1 x float> %vb to <1 x double> + %ve = fpext <1 x float> %splat to <1 x double> + %vf = fneg <1 x double> %ve + %vg = call <1 x double> @llvm.fma.v1f64(<1 x double> %vd, <1 x double> %vf, <1 x double> %va) + ret <1 x double> %vg +} + +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @vfwmacc_vv_nxv2f64(<2 x double> %va, <2 x float> %vb, <2 x float> %vc) { +; CHECK-LABEL: vfwmacc_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %vc to <2 x double> + %vf = call <2 x double> @llvm.fma.v2f64(<2 x double> %vd, <2 x double> %ve, <2 x double> %va) + ret <2 x double> %vf +} + +define <2 x double> @vfwmacc_vf_nxv2f64(<2 x double> %va, <2 x float> %vb, float %c) { +; CHECK-LABEL: vfwmacc_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x float> poison, float %c, i32 0 + %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %splat to <2 x double> + %vf = call <2 x double> @llvm.fma.v2f64(<2 x double> %vd, <2 x double> %ve, <2 x double> %va) + ret <2 x double> %vf +} + +define <2 x double> @vfwnmacc_vv_nxv2f64(<2 x double> %va, <2 x float> %vb, <2 x float> %vc) { +; CHECK-LABEL: vfwnmacc_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %vc to <2 x double> + %vf = fneg <2 x double> %va + %vg = fneg <2 x double> %vd + %vh = call <2 x double> @llvm.fma.v2f64(<2 x double> %vg, <2 x double> %ve, <2 x double> %vf) + ret <2 x double> %vh +} + +define <2 x double> @vfwnmacc_vf_nxv2f64(<2 x double> %va, <2 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmacc_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x float> poison, float %c, i32 0 + %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %splat to <2 x double> + %vf = fneg <2 x double> %va + %vg = fneg <2 x double> %vd + %vh = call <2 x double> @llvm.fma.v2f64(<2 x double> %vg, <2 x double> %ve, <2 x double> %vf) + ret <2 x double> %vh +} + +define <2 x double> @vfwnmacc_fv_nxv2f64(<2 x double> %va, <2 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmacc_fv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x float> poison, float %c, i32 0 + %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %splat to <2 x double> + %vf = fneg <2 x double> %va + %vg = fneg <2 x double> %ve + %vh = call <2 x double> @llvm.fma.v2f64(<2 x double> %vd, <2 x double> %vg, <2 x double> %vf) + ret <2 x double> %vh +} + +define <2 x double> @vfwmsac_vv_nxv2f64(<2 x double> %va, <2 x float> %vb, <2 x float> %vc) { +; CHECK-LABEL: vfwmsac_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %vc to <2 x double> + %vf = fneg <2 x double> %va + %vg = call <2 x double> @llvm.fma.v2f64(<2 x double> %vd, <2 x double> %ve, <2 x double> %vf) + ret <2 x double> %vg +} + +define <2 x double> @vfwmsac_vf_nxv2f64(<2 x double> %va, <2 x float> %vb, float %c) { +; CHECK-LABEL: vfwmsac_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x float> poison, float %c, i32 0 + %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %splat to <2 x double> + %vf = fneg <2 x double> %va + %vg = call <2 x double> @llvm.fma.v2f64(<2 x double> %vd, <2 x double> %ve, <2 x double> %vf) + ret <2 x double> %vg +} + +define <2 x double> @vfwnmsac_vv_nxv2f64(<2 x double> %va, <2 x float> %vb, <2 x float> %vc) { +; CHECK-LABEL: vfwnmsac_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %vc to <2 x double> + %vf = fneg <2 x double> %vd + %vg = call <2 x double> @llvm.fma.v2f64(<2 x double> %vf, <2 x double> %ve, <2 x double> %va) + ret <2 x double> %vg +} + +define <2 x double> @vfwnmsac_vf_nxv2f64(<2 x double> %va, <2 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmsac_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x float> poison, float %c, i32 0 + %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %splat to <2 x double> + %vf = fneg <2 x double> %vd + %vg = call <2 x double> @llvm.fma.v2f64(<2 x double> %vf, <2 x double> %ve, <2 x double> %va) + ret <2 x double> %vg +} + +define <2 x double> @vfwnmsac_fv_nxv2f64(<2 x double> %va, <2 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmsac_fv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x float> poison, float %c, i32 0 + %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer + %vd = fpext <2 x float> %vb to <2 x double> + %ve = fpext <2 x float> %splat to <2 x double> + %vf = fneg <2 x double> %ve + %vg = call <2 x double> @llvm.fma.v2f64(<2 x double> %vd, <2 x double> %vf, <2 x double> %va) + ret <2 x double> %vg +} + + +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) + +define <4 x double> @vfwmacc_vv_nxv4f64(<4 x double> %va, <4 x float> %vb, <4 x float> %vc) { +; CHECK-LABEL: vfwmacc_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwmacc.vv v8, v10, v11 +; CHECK-NEXT: ret + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %vc to <4 x double> + %vf = call <4 x double> @llvm.fma.v4f64(<4 x double> %vd, <4 x double> %ve, <4 x double> %va) + ret <4 x double> %vf +} + +define <4 x double> @vfwmacc_vf_nxv4f64(<4 x double> %va, <4 x float> %vb, float %c) { +; CHECK-LABEL: vfwmacc_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x float> poison, float %c, i32 0 + %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %splat to <4 x double> + %vf = call <4 x double> @llvm.fma.v4f64(<4 x double> %vd, <4 x double> %ve, <4 x double> %va) + ret <4 x double> %vf +} + +define <4 x double> @vfwnmacc_vv_nxv4f64(<4 x double> %va, <4 x float> %vb, <4 x float> %vc) { +; CHECK-LABEL: vfwnmacc_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwnmacc.vv v8, v10, v11 +; CHECK-NEXT: ret + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %vc to <4 x double> + %vf = fneg <4 x double> %va + %vg = fneg <4 x double> %vd + %vh = call <4 x double> @llvm.fma.v4f64(<4 x double> %vg, <4 x double> %ve, <4 x double> %vf) + ret <4 x double> %vh +} + +define <4 x double> @vfwnmacc_vf_nxv4f64(<4 x double> %va, <4 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmacc_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x float> poison, float %c, i32 0 + %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %splat to <4 x double> + %vf = fneg <4 x double> %va + %vg = fneg <4 x double> %vd + %vh = call <4 x double> @llvm.fma.v4f64(<4 x double> %vg, <4 x double> %ve, <4 x double> %vf) + ret <4 x double> %vh +} + +define <4 x double> @vfwnmacc_fv_nxv4f64(<4 x double> %va, <4 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmacc_fv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x float> poison, float %c, i32 0 + %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %splat to <4 x double> + %vf = fneg <4 x double> %va + %vg = fneg <4 x double> %ve + %vh = call <4 x double> @llvm.fma.v4f64(<4 x double> %vd, <4 x double> %vg, <4 x double> %vf) + ret <4 x double> %vh +} + +define <4 x double> @vfwmsac_vv_nxv4f64(<4 x double> %va, <4 x float> %vb, <4 x float> %vc) { +; CHECK-LABEL: vfwmsac_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwmsac.vv v8, v10, v11 +; CHECK-NEXT: ret + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %vc to <4 x double> + %vf = fneg <4 x double> %va + %vg = call <4 x double> @llvm.fma.v4f64(<4 x double> %vd, <4 x double> %ve, <4 x double> %vf) + ret <4 x double> %vg +} + +define <4 x double> @vfwmsac_vf_nxv4f64(<4 x double> %va, <4 x float> %vb, float %c) { +; CHECK-LABEL: vfwmsac_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x float> poison, float %c, i32 0 + %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %splat to <4 x double> + %vf = fneg <4 x double> %va + %vg = call <4 x double> @llvm.fma.v4f64(<4 x double> %vd, <4 x double> %ve, <4 x double> %vf) + ret <4 x double> %vg +} + +define <4 x double> @vfwnmsac_vv_nxv4f64(<4 x double> %va, <4 x float> %vb, <4 x float> %vc) { +; CHECK-LABEL: vfwnmsac_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwnmsac.vv v8, v10, v11 +; CHECK-NEXT: ret + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %vc to <4 x double> + %vf = fneg <4 x double> %vd + %vg = call <4 x double> @llvm.fma.v4f64(<4 x double> %vf, <4 x double> %ve, <4 x double> %va) + ret <4 x double> %vg +} + +define <4 x double> @vfwnmsac_vf_nxv4f64(<4 x double> %va, <4 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmsac_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x float> poison, float %c, i32 0 + %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %splat to <4 x double> + %vf = fneg <4 x double> %vd + %vg = call <4 x double> @llvm.fma.v4f64(<4 x double> %vf, <4 x double> %ve, <4 x double> %va) + ret <4 x double> %vg +} + +define <4 x double> @vfwnmsac_fv_nxv4f64(<4 x double> %va, <4 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmsac_fv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x float> poison, float %c, i32 0 + %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer + %vd = fpext <4 x float> %vb to <4 x double> + %ve = fpext <4 x float> %splat to <4 x double> + %vf = fneg <4 x double> %ve + %vg = call <4 x double> @llvm.fma.v4f64(<4 x double> %vd, <4 x double> %vf, <4 x double> %va) + ret <4 x double> %vg +} + +declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) + +define <8 x double> @vfwmacc_vv_nxv8f64(<8 x double> %va, <8 x float> %vb, <8 x float> %vc) { +; CHECK-LABEL: vfwmacc_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwmacc.vv v8, v12, v14 +; CHECK-NEXT: ret + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %vc to <8 x double> + %vf = call <8 x double> @llvm.fma.v8f64(<8 x double> %vd, <8 x double> %ve, <8 x double> %va) + ret <8 x double> %vf +} + +define <8 x double> @vfwmacc_vf_nxv8f64(<8 x double> %va, <8 x float> %vb, float %c) { +; CHECK-LABEL: vfwmacc_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x float> poison, float %c, i32 0 + %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %splat to <8 x double> + %vf = call <8 x double> @llvm.fma.v8f64(<8 x double> %vd, <8 x double> %ve, <8 x double> %va) + ret <8 x double> %vf +} + +define <8 x double> @vfwnmacc_vv_nxv8f64(<8 x double> %va, <8 x float> %vb, <8 x float> %vc) { +; CHECK-LABEL: vfwnmacc_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwnmacc.vv v8, v12, v14 +; CHECK-NEXT: ret + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %vc to <8 x double> + %vf = fneg <8 x double> %va + %vg = fneg <8 x double> %vd + %vh = call <8 x double> @llvm.fma.v8f64(<8 x double> %vg, <8 x double> %ve, <8 x double> %vf) + ret <8 x double> %vh +} + +define <8 x double> @vfwnmacc_vf_nxv8f64(<8 x double> %va, <8 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmacc_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x float> poison, float %c, i32 0 + %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %splat to <8 x double> + %vf = fneg <8 x double> %va + %vg = fneg <8 x double> %vd + %vh = call <8 x double> @llvm.fma.v8f64(<8 x double> %vg, <8 x double> %ve, <8 x double> %vf) + ret <8 x double> %vh +} + +define <8 x double> @vfwnmacc_fv_nxv8f64(<8 x double> %va, <8 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmacc_fv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x float> poison, float %c, i32 0 + %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %splat to <8 x double> + %vf = fneg <8 x double> %va + %vg = fneg <8 x double> %ve + %vh = call <8 x double> @llvm.fma.v8f64(<8 x double> %vd, <8 x double> %vg, <8 x double> %vf) + ret <8 x double> %vh +} + +define <8 x double> @vfwmsac_vv_nxv8f64(<8 x double> %va, <8 x float> %vb, <8 x float> %vc) { +; CHECK-LABEL: vfwmsac_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwmsac.vv v8, v12, v14 +; CHECK-NEXT: ret + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %vc to <8 x double> + %vf = fneg <8 x double> %va + %vg = call <8 x double> @llvm.fma.v8f64(<8 x double> %vd, <8 x double> %ve, <8 x double> %vf) + ret <8 x double> %vg +} + +define <8 x double> @vfwmsac_vf_nxv8f64(<8 x double> %va, <8 x float> %vb, float %c) { +; CHECK-LABEL: vfwmsac_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x float> poison, float %c, i32 0 + %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %splat to <8 x double> + %vf = fneg <8 x double> %va + %vg = call <8 x double> @llvm.fma.v8f64(<8 x double> %vd, <8 x double> %ve, <8 x double> %vf) + ret <8 x double> %vg +} + +define <8 x double> @vfwnmsac_vv_nxv8f64(<8 x double> %va, <8 x float> %vb, <8 x float> %vc) { +; CHECK-LABEL: vfwnmsac_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwnmsac.vv v8, v12, v14 +; CHECK-NEXT: ret + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %vc to <8 x double> + %vf = fneg <8 x double> %vd + %vg = call <8 x double> @llvm.fma.v8f64(<8 x double> %vf, <8 x double> %ve, <8 x double> %va) + ret <8 x double> %vg +} + +define <8 x double> @vfwnmsac_vf_nxv8f64(<8 x double> %va, <8 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmsac_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x float> poison, float %c, i32 0 + %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %splat to <8 x double> + %vf = fneg <8 x double> %vd + %vg = call <8 x double> @llvm.fma.v8f64(<8 x double> %vf, <8 x double> %ve, <8 x double> %va) + ret <8 x double> %vg +} + +define <8 x double> @vfwnmsac_fv_nxv8f64(<8 x double> %va, <8 x float> %vb, float %c) { +; CHECK-LABEL: vfwnmsac_fv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x float> poison, float %c, i32 0 + %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer + %vd = fpext <8 x float> %vb to <8 x double> + %ve = fpext <8 x float> %splat to <8 x double> + %vf = fneg <8 x double> %ve + %vg = call <8 x double> @llvm.fma.v8f64(<8 x double> %vd, <8 x double> %vf, <8 x double> %va) + ret <8 x double> %vg +} From 870f4421acc67899d1fe6db392fe555a4b186e80 Mon Sep 17 00:00:00 2001 From: Peixin-Qiao Date: Fri, 20 May 2022 15:16:03 +0800 Subject: [PATCH 058/908] [flang][OpenMP] Fix the types of worksharing-loop variables The types of lower bound, upper bound, and step are converted into the type of the loop variable if necessary. OpenMP runtime requires 32-bit or 64-bit loop variables. OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed. This patch is part of upstreaming code from the fir-dev branch of https://github.com/flang-compiler/f18-llvm-project. (#1256) Co-authored-by: kiranchandramohan Reviewed By: kiranchandramohan, shraiysh Differential Revision: https://reviews.llvm.org/D125740 --- flang/lib/Lower/OpenMP.cpp | 49 ++++++- .../test/Lower/OpenMP/omp-wsloop-variable.f90 | 126 ++++++++++++++++++ 2 files changed, 169 insertions(+), 6 deletions(-) create mode 100644 flang/test/Lower/OpenMP/omp-wsloop-variable.f90 diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 35e0efb7ecca77..e59d80c3436380 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -120,6 +120,24 @@ static void genObjectList(const Fortran::parser::OmpObjectList &objectList, } } +static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, + std::size_t loopVarTypeSize) { + // OpenMP runtime requires 32-bit or 64-bit loop variables. + loopVarTypeSize = loopVarTypeSize * 8; + if (loopVarTypeSize < 32) { + loopVarTypeSize = 32; + } else if (loopVarTypeSize > 64) { + loopVarTypeSize = 64; + mlir::emitWarning(converter.getCurrentLocation(), + "OpenMP loop iteration variable cannot have more than 64 " + "bits size and will be narrowed into 64 bits."); + } + assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) && + "OpenMP loop iteration variable size must be transformed into 32-bit " + "or 64-bit"); + return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); +} + /// Create the body (block) for an OpenMP Operation. /// /// \param [in] op - the operation the body belongs to. @@ -143,15 +161,19 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter, // e.g. For loops the arguments are the induction variable. And all further // uses of the induction variable should use this mlir value. if (args.size()) { + std::size_t loopVarTypeSize = 0; + for (const Fortran::semantics::Symbol *arg : args) + loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); SmallVector tiv; SmallVector locs; - int argIndex = 0; - for (auto &arg : args) { - tiv.push_back(converter.genType(*arg)); + for (int i = 0; i < (int)args.size(); i++) { + tiv.push_back(loopVarType); locs.push_back(loc); } firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs); - for (auto &arg : args) { + int argIndex = 0; + for (const Fortran::semantics::Symbol *arg : args) { fir::ExtendedValue exval = op.getRegion().front().getArgument(argIndex); converter.bindSymbol(*arg, exval); argIndex++; @@ -490,11 +512,12 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, TODO(converter.getCurrentLocation(), "Construct enclosing do loop"); } - int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList); - // Collect the loops to collapse. auto *doConstructEval = &eval.getFirstNestedEvaluation(); + std::int64_t collapseValue = + Fortran::lower::getCollapseValue(wsLoopOpClauseList); + std::size_t loopVarTypeSize = 0; SmallVector iv; do { auto *doLoop = &doConstructEval->getFirstNestedEvaluation(); @@ -518,12 +541,26 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, currentLocation, firOpBuilder.getIntegerType(32), 1)); } iv.push_back(bounds->name.thing.symbol); + loopVarTypeSize = std::max(loopVarTypeSize, + bounds->name.thing.symbol->GetUltimate().size()); collapseValue--; doConstructEval = &*std::next(doConstructEval->getNestedEvaluations().begin()); } while (collapseValue > 0); + // The types of lower bound, upper bound, and step are converted into the + // type of the loop variable if necessary. + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + for (unsigned it = 0; it < (unsigned)lowerBound.size(); it++) { + lowerBound[it] = firOpBuilder.createConvert(currentLocation, loopVarType, + lowerBound[it]); + upperBound[it] = firOpBuilder.createConvert(currentLocation, loopVarType, + upperBound[it]); + step[it] = + firOpBuilder.createConvert(currentLocation, loopVarType, step[it]); + } + // FIXME: Add support for following clauses: // 1. linear // 2. order diff --git a/flang/test/Lower/OpenMP/omp-wsloop-variable.f90 b/flang/test/Lower/OpenMP/omp-wsloop-variable.f90 new file mode 100644 index 00000000000000..2bffcd77dae27d --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-variable.f90 @@ -0,0 +1,126 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing) for different +! types of loop iteration variable, lower bound, upper bound, and step. + +!REQUIRES: shell +!RUN: bbc -fopenmp -emit-fir %s -o - 2>&1 | FileCheck %s + +!CHECK: OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed into 64 bits. + +program wsloop_variable + integer(kind=1) :: i1_lb, i1_ub + integer(kind=2) :: i2, i2_ub, i2_s + integer(kind=4) :: i4_s + integer(kind=8) :: i8, i8_s + integer(kind=16) :: i16, i16_lb + real :: x + +!CHECK: [[TMP0:%.*]] = arith.constant 1 : i32 +!CHECK: [[TMP1:%.*]] = arith.constant 100 : i32 +!CHECK: [[TMP2:%.*]] = fir.convert [[TMP0]] : (i32) -> i64 +!CHECK: [[TMP3:%.*]] = fir.convert %{{.*}} : (i8) -> i64 +!CHECK: [[TMP4:%.*]] = fir.convert %{{.*}} : (i16) -> i64 +!CHECK: [[TMP5:%.*]] = fir.convert %{{.*}} : (i128) -> i64 +!CHECK: [[TMP6:%.*]] = fir.convert [[TMP1]] : (i32) -> i64 +!CHECK: [[TMP7:%.*]] = fir.convert %{{.*}} : (i32) -> i64 +!CHECK: omp.wsloop collapse(2) for ([[TMP8:%.*]], [[TMP9:%.*]]) : i64 = ([[TMP2]], [[TMP5]]) to ([[TMP3]], [[TMP6]]) inclusive step ([[TMP4]], [[TMP7]]) { +!CHECK: [[TMP10:%.*]] = arith.addi [[TMP8]], [[TMP9]] : i64 +!CHECK: [[TMP11:%.*]] = fir.convert [[TMP10]] : (i64) -> f32 +!CHECK: fir.store [[TMP11]] to %{{.*}} : !fir.ref +!CHECK: omp.yield +!CHECK: } + + !$omp do collapse(2) + do i2 = 1, i1_ub, i2_s + do i8 = i16_lb, 100, i4_s + x = i2 + i8 + end do + end do + !$omp end do + +!CHECK: [[TMP12:%.*]] = arith.constant 1 : i32 +!CHECK: [[TMP13:%.*]] = fir.convert %{{.*}} : (i8) -> i32 +!CHECK: [[TMP14:%.*]] = fir.convert %{{.*}} : (i64) -> i32 +!CHECK: omp.wsloop for ([[TMP15:%.*]]) : i32 = ([[TMP12]]) to ([[TMP13]]) inclusive step ([[TMP14]]) { +!CHECK: [[TMP16:%.*]] = fir.convert [[TMP15]] : (i32) -> f32 +!CHECK: fir.store [[TMP16]] to %{{.*}} : !fir.ref +!CHECK: omp.yield +!CHECK: } + + !$omp do + do i2 = 1, i1_ub, i8_s + x = i2 + end do + !$omp end do + +!CHECK: [[TMP17:%.*]] = fir.convert %{{.*}} : (i8) -> i64 +!CHECK: [[TMP18:%.*]] = fir.convert %{{.*}} : (i16) -> i64 +!CHECK: [[TMP19:%.*]] = fir.convert %{{.*}} : (i32) -> i64 +!CHECK: omp.wsloop for ([[TMP20:%.*]]) : i64 = ([[TMP17]]) to ([[TMP18]]) inclusive step ([[TMP19]]) { +!CHECK: [[TMP21:%.*]] = fir.convert [[TMP20]] : (i64) -> f32 +!CHECK: fir.store [[TMP21]] to %{{.*}} : !fir.ref +!CHECK: omp.yield +!CHECK: } + + !$omp do + do i16 = i1_lb, i2_ub, i4_s + x = i16 + end do + !$omp end do + +end program wsloop_variable + +!CHECK-LABEL: func.func @_QPwsloop_variable_sub() { +!CHECK: %[[VAL_0:.*]] = fir.alloca i128 {bindc_name = "i16_lb", uniq_name = "_QFwsloop_variable_subEi16_lb"} +!CHECK: %[[VAL_1:.*]] = fir.alloca i8 {bindc_name = "i1_ub", uniq_name = "_QFwsloop_variable_subEi1_ub"} +!CHECK: %[[VAL_2:.*]] = fir.alloca i16 {bindc_name = "i2", uniq_name = "_QFwsloop_variable_subEi2"} +!CHECK: %[[VAL_3:.*]] = fir.alloca i16 {bindc_name = "i2_s", uniq_name = "_QFwsloop_variable_subEi2_s"} +!CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i4_s", uniq_name = "_QFwsloop_variable_subEi4_s"} +!CHECK: %[[VAL_5:.*]] = fir.alloca i64 {bindc_name = "i8", uniq_name = "_QFwsloop_variable_subEi8"} +!CHECK: %[[VAL_6:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFwsloop_variable_subEx"} +!CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +!CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_1]] : !fir.ref +!CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref +!CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32 +!CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32 +!CHECK: omp.wsloop for (%[[VAL_12:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +!CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref +!CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i128) -> index +!CHECK: %[[VAL_15:.*]] = arith.constant 100 : i32 +!CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> index +!CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_4]] : !fir.ref +!CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i32) -> index +!CHECK: %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %[[VAL_14]] to %[[VAL_16]] step %[[VAL_18]] -> index { +!CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (index) -> i64 +!CHECK: fir.store %[[VAL_21]] to %[[VAL_5]] : !fir.ref +!CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64 +!CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_5]] : !fir.ref +!CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_22]], %[[VAL_23]] : i64 +!CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i64) -> f32 +!CHECK: fir.store %[[VAL_25]] to %[[VAL_6]] : !fir.ref +!CHECK: %[[VAL_26:.*]] = arith.addi %[[VAL_20]], %[[VAL_18]] : index +!CHECK: fir.result %[[VAL_26]] : index +!CHECK: } +!CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_28:.*]] : (index) -> i64 +!CHECK: fir.store %[[VAL_27]] to %[[VAL_5]] : !fir.ref +!CHECK: omp.yield +!CHECK: } +!CHECK: return +!CHECK: } + +subroutine wsloop_variable_sub + integer(kind=1) :: i1_ub + integer(kind=2) :: i2, i2_s + integer(kind=4) :: i4_s + integer(kind=8) :: i8 + integer(kind=16) :: i16_lb + real :: x + + !$omp do + do i2 = 1, i1_ub, i2_s + do i8 = i16_lb, 100, i4_s + x = i2 + i8 + end do + end do + !$omp end do + +end From 5537b22ccbdc562929949844f9f816cf720e5205 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Sun, 15 May 2022 22:35:46 +0700 Subject: [PATCH 059/908] Make CompoundStmtBitfields::NumStmts not a bit-field Number of statements in CompoundStmt is kept in a bit-field of the common part of Stmt. The field has 24 bits for the number. To allocate a new bit field (as attempted in https://reviews.llvm.org/D123952), this number must be reduced, maximal number of statements in a compound statement becomes smaller. It can result in compilation errors of some programs. With this change the number of statements is kept in a field of type 'unsigned int' rather than in bit-field. To make room in CompoundStmtBitfields LBraceLoc is moved to fields of CompoundStmt. Differential Revision: https://reviews.llvm.org/D125635 --- clang/include/clang/AST/Stmt.h | 17 ++++++++--------- clang/lib/AST/Stmt.cpp | 3 +-- clang/lib/Serialization/ASTReaderStmt.cpp | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index 1135981319c1ac..653ce4b4b90c59 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -128,10 +128,7 @@ class alignas(void *) Stmt { unsigned : NumStmtBits; - unsigned NumStmts : 32 - NumStmtBits; - - /// The location of the opening "{". - SourceLocation LBraceLoc; + unsigned NumStmts; }; class LabelStmtBitfields { @@ -1406,7 +1403,10 @@ class CompoundStmt final : public Stmt, friend class ASTStmtReader; friend TrailingObjects; - /// The location of the closing "}". LBraceLoc is stored in CompoundStmtBits. + /// The location of the opening "{". + SourceLocation LBraceLoc; + + /// The location of the closing "}". SourceLocation RBraceLoc; CompoundStmt(ArrayRef Stmts, SourceLocation LB, SourceLocation RB); @@ -1420,9 +1420,8 @@ class CompoundStmt final : public Stmt, // Build an empty compound statement with a location. explicit CompoundStmt(SourceLocation Loc) - : Stmt(CompoundStmtClass), RBraceLoc(Loc) { + : Stmt(CompoundStmtClass), LBraceLoc(Loc), RBraceLoc(Loc) { CompoundStmtBits.NumStmts = 0; - CompoundStmtBits.LBraceLoc = Loc; } // Build an empty compound statement. @@ -1505,10 +1504,10 @@ class CompoundStmt final : public Stmt, return const_cast(this)->getStmtExprResult(); } - SourceLocation getBeginLoc() const { return CompoundStmtBits.LBraceLoc; } + SourceLocation getBeginLoc() const { return LBraceLoc; } SourceLocation getEndLoc() const { return RBraceLoc; } - SourceLocation getLBracLoc() const { return CompoundStmtBits.LBraceLoc; } + SourceLocation getLBracLoc() const { return LBraceLoc; } SourceLocation getRBracLoc() const { return RBraceLoc; } static bool classof(const Stmt *T) { diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp index be19d3b2cce2c1..d562717bf6b22d 100644 --- a/clang/lib/AST/Stmt.cpp +++ b/clang/lib/AST/Stmt.cpp @@ -363,10 +363,9 @@ int64_t Stmt::getID(const ASTContext &Context) const { CompoundStmt::CompoundStmt(ArrayRef Stmts, SourceLocation LB, SourceLocation RB) - : Stmt(CompoundStmtClass), RBraceLoc(RB) { + : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) { CompoundStmtBits.NumStmts = Stmts.size(); setStmts(Stmts); - CompoundStmtBits.LBraceLoc = LB; } void CompoundStmt::setStmts(ArrayRef Stmts) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 77ea1b95d5be51..09342de1eee96e 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -155,7 +155,7 @@ void ASTStmtReader::VisitCompoundStmt(CompoundStmt *S) { while (NumStmts--) Stmts.push_back(Record.readSubStmt()); S->setStmts(Stmts); - S->CompoundStmtBits.LBraceLoc = readSourceLocation(); + S->LBraceLoc = readSourceLocation(); S->RBraceLoc = readSourceLocation(); } From 83c431fb9e72abbd2eddf26388245eb4963370e2 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Fri, 20 May 2022 08:50:36 +0100 Subject: [PATCH 060/908] [amdgpu] Add amdgpu_kernel calling conv attribute to clang Allows emitting define amdgpu_kernel void @func() IR from C or C++. This replaces the current workflow which is to write a stub in opencl that calls an external C function implemented in C++ combined through llvm-link. Calling the resulting function still requires a manual implementation of the ABI from the host side. The primary application is for more rapid debugging of the amdgpu backend by permuting a C or C++ test file instead of manually updating an IR file. Implementation closely follows D54425. Non-amd reviewers from there. Reviewed By: yaxunl Differential Revision: https://reviews.llvm.org/D125970 --- clang/include/clang/Basic/Attr.td | 5 ++ clang/include/clang/Basic/Specifiers.h | 1 + clang/lib/AST/ItaniumMangle.cpp | 1 + clang/lib/AST/Type.cpp | 2 + clang/lib/AST/TypePrinter.cpp | 4 + clang/lib/Basic/Targets/AMDGPU.h | 1 + clang/lib/CodeGen/CGCall.cpp | 4 + clang/lib/CodeGen/CGDebugInfo.cpp | 1 + clang/lib/Sema/SemaDeclAttr.cpp | 7 ++ clang/lib/Sema/SemaType.cpp | 3 + .../amdgpu-kernel-arg-pointer-type.cpp | 83 +++++++++++++++++++ clang/test/Sema/callingconv.c | 2 + clang/tools/libclang/CXType.cpp | 1 + 13 files changed, 115 insertions(+) create mode 100644 clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 0c95adfa237d72..fed29b03a8b141 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1857,6 +1857,11 @@ def AMDGPUNumVGPR : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } +def AMDGPUKernelCall : DeclOrTypeAttr { + let Spellings = [Clang<"amdgpu_kernel">]; + let Documentation = [Undocumented]; +} + def BPFPreserveAccessIndex : InheritableAttr, TargetSpecificAttr { let Spellings = [Clang<"preserve_access_index">]; diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h index 7a727e7088deb3..7657ae36d21bb0 100644 --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -281,6 +281,7 @@ namespace clang { CC_PreserveAll, // __attribute__((preserve_all)) CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) + CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) }; /// Checks whether the given calling convention supports variadic diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 1be70487c1b4e3..b380e02fc8f7d0 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3150,6 +3150,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) { case CC_AAPCS_VFP: case CC_AArch64VectorCall: case CC_AArch64SVEPCS: + case CC_AMDGPUKernelCall: case CC_IntelOclBicc: case CC_SpirFunction: case CC_OpenCLKernel: diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 200a129437ed5a..ece4165c51f539 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3186,6 +3186,7 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { case CC_AAPCS_VFP: return "aapcs-vfp"; case CC_AArch64VectorCall: return "aarch64_vector_pcs"; case CC_AArch64SVEPCS: return "aarch64_sve_pcs"; + case CC_AMDGPUKernelCall: return "amdgpu_kernel"; case CC_IntelOclBicc: return "intel_ocl_bicc"; case CC_SpirFunction: return "spir_function"; case CC_OpenCLKernel: return "opencl_kernel"; @@ -3622,6 +3623,7 @@ bool AttributedType::isCallingConv() const { case attr::VectorCall: case attr::AArch64VectorPcs: case attr::AArch64SVEPcs: + case attr::AMDGPUKernelCall: case attr::Pascal: case attr::MSABI: case attr::SysVABI: diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 7bca45b5f56015..b5286de5b1cab1 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -964,6 +964,9 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, case CC_AArch64SVEPCS: OS << "__attribute__((aarch64_sve_pcs))"; break; + case CC_AMDGPUKernelCall: + OS << "__attribute__((amdgpu_kernel))"; + break; case CC_IntelOclBicc: OS << " __attribute__((intel_ocl_bicc))"; break; @@ -1754,6 +1757,7 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, } case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break; case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break; + case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break; case attr::IntelOclBicc: OS << "inteloclbicc"; break; case attr::PreserveMost: OS << "preserve_most"; diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 97492219148886..4e3db7a181f58e 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -411,6 +411,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { return CCCR_Warning; case CC_C: case CC_OpenCLKernel: + case CC_AMDGPUKernelCall: return CCCR_OK; } } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a35e3d811862d9..0d838b9aeb529a 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -63,6 +63,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall; case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall; case CC_AArch64SVEPCS: return llvm::CallingConv::AArch64_SVE_VectorCall; + case CC_AMDGPUKernelCall: return llvm::CallingConv::AMDGPU_KERNEL; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); case CC_PreserveMost: return llvm::CallingConv::PreserveMost; @@ -232,6 +233,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, if (D->hasAttr()) return CC_AArch64SVEPCS; + if (D->hasAttr()) + return CC_AMDGPUKernelCall; + if (D->hasAttr()) return CC_IntelOclBicc; diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 75342702944159..1f528fe4065c2a 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1335,6 +1335,7 @@ static unsigned getDwarfCC(CallingConv CC) { case CC_SpirFunction: return llvm::dwarf::DW_CC_LLVM_SpirFunction; case CC_OpenCLKernel: + case CC_AMDGPUKernelCall: return llvm::dwarf::DW_CC_LLVM_OpenCLKernel; case CC_Swift: return llvm::dwarf::DW_CC_LLVM_Swift; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 1b055543dc485b..f39e1af10e3d7a 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5011,6 +5011,9 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { case ParsedAttr::AT_AArch64SVEPcs: D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL)); return; + case ParsedAttr::AT_AMDGPUKernelCall: + D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL)); + return; case ParsedAttr::AT_IntelOclBicc: D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL)); return; @@ -5171,6 +5174,9 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, case ParsedAttr::AT_AArch64SVEPcs: CC = CC_AArch64SVEPCS; break; + case ParsedAttr::AT_AMDGPUKernelCall: + CC = CC_AMDGPUKernelCall; + break; case ParsedAttr::AT_RegCall: CC = CC_X86RegCall; break; @@ -8784,6 +8790,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_PreserveAll: case ParsedAttr::AT_AArch64VectorPcs: case ParsedAttr::AT_AArch64SVEPcs: + case ParsedAttr::AT_AMDGPUKernelCall: handleCallConvAttr(S, D, AL); break; case ParsedAttr::AT_Suppress: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 314110c926828a..5c4d3b9c2a2933 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -122,6 +122,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_VectorCall: \ case ParsedAttr::AT_AArch64VectorPcs: \ case ParsedAttr::AT_AArch64SVEPcs: \ + case ParsedAttr::AT_AMDGPUKernelCall: \ case ParsedAttr::AT_MSABI: \ case ParsedAttr::AT_SysVABI: \ case ParsedAttr::AT_Pcs: \ @@ -7482,6 +7483,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_AArch64SVEPcs: return createSimpleAttr(Ctx, Attr); + case ParsedAttr::AT_AMDGPUKernelCall: + return createSimpleAttr(Ctx, Attr); case ParsedAttr::AT_Pcs: { // The attribute may have had a fixit applied where we treated an // identifier as a string literal. The contents of the string are valid, diff --git a/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp b/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp new file mode 100644 index 00000000000000..90a9366ac9a200 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp @@ -0,0 +1,83 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s + +// Derived from CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu by deleting references to HOST +// The original test passes the result through opt O2, but that seems to introduce invalid +// addrspace casts which are not being fixed as part of the present change. + +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32* {{.*}} %x) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void kernel1(int *x) { + x[0]++; +} + +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32* {{.*}} nonnull align 4 dereferenceable(4) %x) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void kernel2(int &x) { + x++; +} + +// CHECK-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void kernel3(__attribute__((address_space(2))) int *x, + __attribute__((address_space(1))) int *y) { + y[0] = x[0]; +} + +// COMMON-LABEL: define{{.*}} void @_Z4funcPi(i32*{{.*}} %x) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void func(int *x) { + x[0]++; +} + +struct S { + int *x; + float *y; +}; +// `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect +// by-val). However, the enhanced address inferring pass should be able to +// assume they are global pointers. +// + +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0) +__attribute__((amdgpu_kernel)) void kernel4(struct S s) { + s.x[0]++; + s.y[0] += 1.f; +} + +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(%struct.S* {{.*}} %s) +__attribute__((amdgpu_kernel)) void kernel5(struct S *s) { + s->x[0]++; + s->y[0] += 1.f; +} + +struct T { + float *x[2]; +}; +// `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect +// by-val). However, the enhanced address inferring pass should be able to +// assume they are global pointers. +// +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(%struct.T addrspace(4)*{{.*}} byref(%struct.T) align 8 %0) +__attribute__((amdgpu_kernel)) void kernel6(struct T t) { + t.x[0][0] += 1.f; + t.x[1][0] += 2.f; +} + +// Check that coerced pointers retain the noalias attribute when qualified with __restrict. +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(i32* noalias{{.*}} %x) +__attribute__((amdgpu_kernel)) void kernel7(int *__restrict x) { + x[0]++; +} + +// Single element struct. +struct SS { + float *x; +}; +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float* %a.coerce) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void kernel8(struct SS a) { + *a.x += 3.f; +} + diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c index 675200cc9cfc72..1555a993218f7c 100644 --- a/clang/test/Sema/callingconv.c +++ b/clang/test/Sema/callingconv.c @@ -54,6 +54,8 @@ int __attribute__((pcs("foo"))) pcs7(void); // expected-error {{invalid PCS type int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aarch64_vector_pcs' calling convention is not supported for this target}} int __attribute__((aarch64_sve_pcs)) aasvepcs(void); // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}} +int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}} + // PR6361 void ctest3(); void __attribute__((cdecl)) ctest3() {} diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp index 4aee32210df1f0..cac5d3d4fa90db 100644 --- a/clang/tools/libclang/CXType.cpp +++ b/clang/tools/libclang/CXType.cpp @@ -677,6 +677,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) { TCALLINGCONV(PreserveMost); TCALLINGCONV(PreserveAll); case CC_SpirFunction: return CXCallingConv_Unexposed; + case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed; case CC_OpenCLKernel: return CXCallingConv_Unexposed; break; } From 1379b150991f70a5782e9a143c2ba5308da1161c Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 20 May 2022 09:31:00 +0100 Subject: [PATCH 061/908] [AArch64] Fix the generation of BE Nops Big endian Nops were being generated as d5 03 20 1f fnmadd s21, s30, s0, s0, getting the bytes of the NOP in the wrong order. This switches the bytes to not be dependant on the endianness. Differential Revision: https://reviews.llvm.org/D125980 --- .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 2 +- llvm/test/MC/AArch64/align.s | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MC/AArch64/align.s diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index c1188d1c139e56..a57656ffb38bbb 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -471,7 +471,7 @@ bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, // We are properly aligned, so write NOPs as requested. Count /= 4; for (uint64_t i = 0; i != Count; ++i) - support::endian::write(OS, 0xd503201f, Endian); + OS.write("\x1f\x20\x03\xd5", 4); return true; } diff --git a/llvm/test/MC/AArch64/align.s b/llvm/test/MC/AArch64/align.s new file mode 100644 index 00000000000000..fa9950f9877546 --- /dev/null +++ b/llvm/test/MC/AArch64/align.s @@ -0,0 +1,14 @@ +// RUN: llvm-mc -filetype=obj -triple aarch64-none-eabi %s | llvm-objdump -d - | FileCheck %s +// RUN: llvm-mc -filetype=obj -triple aarch64_be-none-eabi %s | llvm-objdump -d - | FileCheck %s + +// CHECK: 0: 00 00 80 d2 mov x0, #0 +// CHECK: 4: 00 00 80 d2 mov x0, #0 +// CHECK: 8: 1f 20 03 d5 nop +// CHECK: c: 1f 20 03 d5 nop +// CHECK: 10: 00 00 80 d2 mov x0, #0 + + .text + mov x0, #0 + mov x0, #0 + .p2align 4 + mov x0, #0 From 632cfbc9f933afc9956f7e0b26086fb1cb7294b5 Mon Sep 17 00:00:00 2001 From: Zi Xuan Wu Date: Fri, 20 May 2022 16:54:23 +0800 Subject: [PATCH 062/908] [NFC][test] Fix the line num of expected-error for CSKY at builtin-alloca-with-align.c --- clang/test/Sema/builtin-alloca-with-align.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Sema/builtin-alloca-with-align.c b/clang/test/Sema/builtin-alloca-with-align.c index 0a3e0f6a202209..5f55c070fa333b 100644 --- a/clang/test/Sema/builtin-alloca-with-align.c +++ b/clang/test/Sema/builtin-alloca-with-align.c @@ -31,7 +31,7 @@ void test7(int a) { void test8(void) { __builtin_alloca_with_align(sizeof(__INT64_TYPE__), __alignof__(__INT64_TYPE__)); // expected-warning {{second argument to __builtin_alloca_with_align is supposed to be in bits}} #if defined(__csky__) - // expected-error@-1 {{requested alignment must be 8 or greater}} + // expected-error@-2 {{requested alignment must be 8 or greater}} // Because the alignment of long long is 4 in CSKY target #endif } From cd61d4bd2fcd5ba212747bfe48f32f121010e1a6 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 20 May 2022 09:58:40 +0100 Subject: [PATCH 063/908] [LV] Do not LoopSimplify/LCSSA after generating main vector loop. At the moment LV runs LoopSimplify and reconstructs LCSSA form after generating the main vector loop and before generating the epilogue vector loop. In practice, this adds a new exit block for the scalar loop because the middle block now also branches to the original exit block of the scalar loop. It also requires adding a new LCSSA phi in the newly created exit block. This complicates things when modeling exit values in VPlan, because we would need to update the VPlan for the epilogue loop to update the newly created LCSSA phi node. But none of that should be necessary, as all analysis requiring loop-simplify form is already done at this point and LCSSA form of the original loop is not broken. Reviewed By: bmahjour Differential Revision: https://reviews.llvm.org/D125810 --- .../Transforms/Vectorize/LoopVectorize.cpp | 3 - .../sve-epilog-vect-inloop-reductions.ll | 35 ++-- .../AArch64/sve-epilog-vect-reductions.ll | 33 ++-- .../sve-epilog-vect-strict-reductions.ll | 33 ++-- .../LoopVectorize/AArch64/sve-epilog-vect.ll | 48 ++--- .../PowerPC/optimal-epilog-vectorization.ll | 24 +-- .../X86/invariant-load-gather.ll | 39 ++-- .../X86/invariant-store-vectorization.ll | 21 +-- .../X86/limit-vf-by-tripcount.ll | 6 +- .../LoopVectorize/X86/masked_load_store.ll | 18 +- .../epilog-vectorization-reductions.ll | 174 ++++++++---------- .../optimal-epilog-vectorization-liveout.ll | 29 ++- .../optimal-epilog-vectorization.ll | 24 +-- 13 files changed, 207 insertions(+), 280 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5e13c6e1fd1f65..335940337b4de9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10632,9 +10632,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { DT); ++LoopsVectorized; - simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */); - formLCSSARecursively(*L, *DT, LI, SE); - // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. EPI.MainLoopVF = EPI.EpilogueVF; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index a1b0e43c1a2aa1..aed5d4abfa5c79 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -60,43 +60,40 @@ define i64 @int_reduction_and(i64* noalias nocapture %a, i64 %N) { ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 1, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]] +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP30:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP30:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, i64* [[TMP26]], i32 0 ; CHECK-NEXT: [[TMP28:%.*]] = bitcast i64* [[TMP27]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 -; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[WIDE_LOAD10]]) -; CHECK-NEXT: [[TMP30]] = and i64 [[TMP29]], [[VEC_PHI9]] -; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC6]] +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 +; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[WIDE_LOAD9]]) +; CHECK-NEXT: [[TMP30]] = and i64 [[TMP29]], [[VEC_PHI8]] +; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP31]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[N]], [[N_VEC6]] -; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi i64 [ 1, [[ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ 1, [[ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX11]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] ; CHECK-NEXT: [[L3:%.*]] = load i64, i64* [[L2]], align 4 ; CHECK-NEXT: [[AND]] = and i64 [[RDX]], [[L3]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[AND_LCSSA4:%.*]] = phi i64 [ [[AND]], [[FOR_BODY]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[AND_LCSSA4]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[AND]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[AND_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index 7b2cbca73ad8c0..24518f29392dba 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -59,44 +59,41 @@ define i64 @int_reduction_add(i64* %a, i64 %N) { ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]] +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <2 x i64> [ [[TMP24]], [[VEC_EPILOG_PH]] ], [ [[TMP29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <2 x i64> [ [[TMP24]], [[VEC_EPILOG_PH]] ], [ [[TMP29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, i64* [[TMP26]], i32 0 ; CHECK-NEXT: [[TMP28:%.*]] = bitcast i64* [[TMP27]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 -; CHECK-NEXT: [[TMP29]] = add <2 x i64> [[WIDE_LOAD10]], [[VEC_PHI9]] -; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC6]] +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 +; CHECK-NEXT: [[TMP29]] = add <2 x i64> [[WIDE_LOAD9]], [[VEC_PHI8]] +; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP29]]) -; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[N]], [[N_VEC6]] -; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = add i64 [[TMP32]], [[SUM]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_LCSSA4:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA4]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[ADD_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll index 61b4febe16ce7d..e09ec33d143cdc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -56,42 +56,39 @@ define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] +; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi float [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi float [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP23]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP24]], i32 0 ; CHECK-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to <2 x float>* -; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP26]], align 4 -; CHECK-NEXT: [[TMP27]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI8]], <2 x float> [[WIDE_LOAD9]]) -; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP26]], align 4 +; CHECK-NEXT: [[TMP27]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI7]], <2 x float> [[WIDE_LOAD8]]) +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = fadd float [[TMP29]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_LCSSA3:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA3]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[ADD_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 75b08fc2ff9f7f..0ebb0172041290 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -74,20 +74,20 @@ define void @main_vf_vscale_x_16(i8* %A) #0 { ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, i8* [[TMP28]], i32 0 ; CHECK-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to * ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer), * [[TMP30]], align 1 ; CHECK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 8 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP32]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[OFFSET_IDX]], [[TMP32]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP33]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 1024, [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -97,9 +97,7 @@ define void @main_vf_vscale_x_16(i8* %A) #0 { ; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: exit.loopexit: -; CHECK-NEXT: br label [[EXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -151,18 +149,18 @@ define void @main_vf_vscale_x_16(i8* %A) #0 { ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP21:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-VF8-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP21]] ; CHECK-VF8-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[TMP22]], i32 0 ; CHECK-VF8-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to <8 x i8>* ; CHECK-VF8-NEXT: store <8 x i8> , <8 x i8>* [[TMP24]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 +; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 8 ; CHECK-VF8-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1024, 1024 -; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: ; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] @@ -172,9 +170,7 @@ define void @main_vf_vscale_x_16(i8* %A) #0 { ; CHECK-VF8-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 ; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-VF8-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK-VF8: exit.loopexit: -; CHECK-VF8-NEXT: br label [[EXIT]] +; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF8: exit: ; CHECK-VF8-NEXT: ret void ; @@ -257,18 +253,18 @@ define void @main_vf_vscale_x_2(i64* %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, i64* [[TMP22]], i32 0 ; CHECK-NEXT: [[TMP24:%.*]] = bitcast i64* [[TMP23]] to <8 x i64>* ; CHECK-NEXT: store <8 x i64> , <8 x i64>* [[TMP24]], align 1 -; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 ; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1024, 1024 -; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -278,9 +274,7 @@ define void @main_vf_vscale_x_2(i64* %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: store i64 1, i64* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: exit.loopexit: -; CHECK-NEXT: br label [[EXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -332,18 +326,18 @@ define void @main_vf_vscale_x_2(i64* %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP21:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-VF8-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP21]] ; CHECK-VF8-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, i64* [[TMP22]], i32 0 ; CHECK-VF8-NEXT: [[TMP24:%.*]] = bitcast i64* [[TMP23]] to <8 x i64>* ; CHECK-VF8-NEXT: store <8 x i64> , <8 x i64>* [[TMP24]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 +; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 8 ; CHECK-VF8-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1024, 1024 -; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: ; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] @@ -353,9 +347,7 @@ define void @main_vf_vscale_x_2(i64* %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: store i64 1, i64* [[ARRAYIDX]], align 1 ; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-VF8-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK-VF8: exit.loopexit: -; CHECK-VF8-NEXT: br label [[EXIT]] +; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-VF8: exit: ; CHECK-VF8-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index d71bbe28a1aa50..95d698ac6cc0f4 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -227,7 +227,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOPID_EV:![0-9]+]] ; VF-TWO-CHECK: vec.epilog.middle.block: ; VF-TWO-CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC26]] -; VF-TWO-CHECK-NEXT: br i1 [[CMP_N27]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-TWO-CHECK-NEXT: br i1 [[CMP_N27]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -242,9 +242,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-TWO-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; VF-TWO-CHECK: for.end.loopexit.loopexit: -; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; VF-TWO-CHECK: for.end.loopexit: ; VF-TWO-CHECK-NEXT: br label [[FOR_END]] ; VF-TWO-CHECK: for.end: @@ -471,7 +469,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-FOUR-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; VF-FOUR-CHECK: vec.epilog.middle.block: ; VF-FOUR-CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC26]] -; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N27]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N27]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-FOUR-CHECK: vec.epilog.scalar.ph: ; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -486,9 +484,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; VF-FOUR-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; VF-FOUR-CHECK: for.end.loopexit.loopexit: -; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; VF-FOUR-CHECK: for.end.loopexit: ; VF-FOUR-CHECK-NEXT: br label [[FOR_END]] ; VF-FOUR-CHECK: for.end: @@ -725,7 +721,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK-NEXT: br i1 [[TMP126]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF-TWO-CHECK: vec.epilog.middle.block: ; VF-TWO-CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] -; VF-TWO-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-TWO-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END19]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] @@ -744,9 +740,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOPID_MS_CM:![0-9]+]] -; VF-TWO-CHECK: for.end.loopexit.loopexit: -; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOPID_MS_CM:![0-9]+]] ; VF-TWO-CHECK: for.end.loopexit: ; VF-TWO-CHECK-NEXT: br label [[FOR_END]] ; VF-TWO-CHECK: for.end: @@ -952,7 +946,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: br i1 [[TMP126]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOPID_EV_CM:![0-9]+]] ; VF-FOUR-CHECK: vec.epilog.middle.block: ; VF-FOUR-CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] -; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-FOUR-CHECK: vec.epilog.scalar.ph: ; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END19]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] @@ -971,9 +965,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-FOUR-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 ; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; VF-FOUR-CHECK: for.end.loopexit.loopexit: -; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; VF-FOUR-CHECK: for.end.loopexit: ; VF-FOUR-CHECK-NEXT: br label [[FOR_END]] ; VF-FOUR-CHECK: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll index 313c7a8a9f678f..692119d1e947e2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -50,29 +50,29 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_VEC12:%.*]] = and i64 [[SMAX6]], 9223372036854775800 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT16]], <8 x i32*> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT18]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[SMAX6]], 9223372036854775800 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT15]], <8 x i32*> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT17]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT22:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT19]], <8 x i32>* [[TMP6]], align 4 -; CHECK-NEXT: [[INDEX_NEXT22]] = add nuw i64 [[OFFSET_IDX]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT22]], [[N_VEC12]] +; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT18]], <8 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT21]] = add nuw i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT21]], [[N_VEC11]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT17]], zeroinitializer -; CHECK-NEXT: [[WIDE_MASKED_GATHER20:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT17]], i32 4, <8 x i1> [[TMP8]], <8 x i32> undef) -; CHECK-NEXT: [[PREDPHI21:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[WIDE_MASKED_GATHER20]], <8 x i32> -; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC12]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI21]], i64 7 -; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT16]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER19:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT16]], i32 4, <8 x i1> [[TMP8]], <8 x i32> undef) +; CHECK-NEXT: [[PREDPHI20:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[WIDE_MASKED_GATHER19]], <8 x i32> +; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC11]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI20]], i64 7 +; CHECK-NEXT: br i1 [[CMP_N12]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] @@ -87,12 +87,9 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[ALOAD]], [[COND_LOAD]] ], [ 1, [[FOR_BODY]] ] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[A_LCSSA_LCSSA10:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[A_LCSSA_LCSSA10]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[TMP9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[A_LCSSA_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index 048ebd51cf4129..1a44804a53bb36 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -85,7 +85,7 @@ define i32 @inv_val_store_to_inv_address_with_reduction(i32* %a, i64 %n, i32* %b ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP17]]) ; CHECK-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC17]] -; CHECK-NEXT: br i1 [[CMP_N18]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N18]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX23:%.*]] = phi i32 [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ] @@ -99,12 +99,9 @@ define i32 @inv_val_store_to_inv_address_with_reduction(i32* %a, i64 %n, i32* %b ; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[T3_LCSSA:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[T4:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[T3_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[T4:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[T4]] ; entry: @@ -199,7 +196,7 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC13]] -; CHECK-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N14]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -216,9 +213,7 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK: latch: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP17:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -332,7 +327,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N24:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC23]] -; CHECK-NEXT: br i1 [[CMP_N24]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N24]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -351,9 +346,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK: latch: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP28:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index 19e10b316f8de6..532906c60367e7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -51,7 +51,7 @@ define void @test1(i8 * noalias %src, i8 * noalias %dst) #0 { ; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i64 17, 16 -; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP_MEMCPY_EXPANSION:%.*]] @@ -63,9 +63,7 @@ define void @test1(i8 * noalias %src, i8 * noalias %dst) #0 { ; CHECK-NEXT: store i8 [[VAL]], i8* [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 17 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP_MEMCPY_EXPANSION]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: exit.loopexit: -; CHECK-NEXT: br label [[EXIT]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP_MEMCPY_EXPANSION]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 4014f9e30a173f..415ab45c34856b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -297,7 +297,7 @@ define void @foo1(i32* nocapture %A, i32* nocapture readonly %B, i32* nocapture ; AVX512-NEXT: br i1 [[TMP63]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N11:%.*]] = icmp eq i64 10000, 10000 -; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] @@ -317,9 +317,7 @@ define void @foo1(i32* nocapture %A, i32* nocapture readonly %B, i32* nocapture ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 -; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; AVX512: for.end.loopexit: -; AVX512-NEXT: br label [[FOR_END]] +; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AVX512: for.end: ; AVX512-NEXT: ret void ; @@ -632,7 +630,7 @@ define void @foo1_addrspace1(i32 addrspace(1)* nocapture %A, i32 addrspace(1)* n ; AVX512-NEXT: br i1 [[TMP63]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N11:%.*]] = icmp eq i64 10000, 10000 -; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] @@ -652,9 +650,7 @@ define void @foo1_addrspace1(i32 addrspace(1)* nocapture %A, i32 addrspace(1)* n ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 -; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; AVX512: for.end.loopexit: -; AVX512-NEXT: br label [[FOR_END]] +; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; AVX512: for.end: ; AVX512-NEXT: ret void ; @@ -988,7 +984,7 @@ define void @foo2(float* nocapture %A, float* nocapture readonly %B, i32* nocapt ; AVX512-NEXT: br i1 [[TMP68]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N11:%.*]] = icmp eq i64 10000, 10000 -; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] @@ -1009,9 +1005,7 @@ define void @foo2(float* nocapture %A, float* nocapture readonly %B, i32* nocapt ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 -; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; AVX512: for.end.loopexit: -; AVX512-NEXT: br label [[FOR_END]] +; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; AVX512: for.end: ; AVX512-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index 46e84ab01d0a4e..0ca133d9e25ac2 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -39,44 +39,41 @@ define i64 @int_reduction_add(i64* %a, i64 %N) { ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[TMP7]], [[VEC_EPILOG_PH]] ], [ [[TMP12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[TMP7]], [[VEC_EPILOG_PH]] ], [ [[TMP12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <4 x i64>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, <4 x i64>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP12]] = add <4 x i64> [[WIDE_LOAD8]], [[VEC_PHI7]] -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, <4 x i64>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12]] = add <4 x i64> [[WIDE_LOAD7]], [[VEC_PHI6]] +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP12]]) -; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX9]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = add i64 [[TMP15]], [[SUM]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_LCSSA2:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA2]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[ADD_LCSSA]] ; entry: @@ -135,47 +132,44 @@ define float @fp_reduction_max(float* noalias %a, i64 %N) { ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI7]], [[WIDE_LOAD8]] -; CHECK-NEXT: [[TMP13]] = select <4 x i1> [[TMP12]], <4 x float> [[VEC_PHI7]], <4 x float> [[WIDE_LOAD8]] -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, <4 x float>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI6]], [[WIDE_LOAD7]] +; CHECK-NEXT: [[TMP13]] = select <4 x i1> [[TMP12]], <4 x float> [[VEC_PHI6]], <4 x float> [[WIDE_LOAD7]] +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP13]]) -; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ 0.000000e+00, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi float [ 0.000000e+00, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX9]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[C0:%.*]] = fcmp fast ogt float [[RESULT_08]], [[L0]] ; CHECK-NEXT: [[V0]] = select fast i1 [[C0]], float [[RESULT_08]], float [[L0]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[V0_LCSSA2:%.*]] = phi float [ [[V0]], [[FOR_BODY]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[V0_LCSSA:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[V0_LCSSA2]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[V0_LCSSA:%.*]] = phi float [ [[V0]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[V0_LCSSA]] ; entry: @@ -237,18 +231,18 @@ define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) { ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ [[TMP13]], [[VEC_EPILOG_PH]] ], [ [[TMP23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ [[TMP13]], [[VEC_EPILOG_PH]] ], [ [[TMP23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = and <4 x i32> [[VEC_PHI4]], +; CHECK-NEXT: [[TMP15:%.*]] = and <4 x i32> [[VEC_PHI3]], ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, i16* [[TMP16]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16* [[TMP17]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP18]], align 2 -; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i16> [[WIDE_LOAD5]] to <4 x i32> +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP18]], align 2 +; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i16> [[WIDE_LOAD4]] to <4 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP15]], [[TMP19]] -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT6]], 256 +; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256 ; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i32> [[TMP20]] to <4 x i16> ; CHECK-NEXT: [[TMP23]] = zext <4 x i16> [[TMP22]] to <4 x i32> ; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -256,15 +250,15 @@ define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) { ; CHECK-NEXT: [[TMP24:%.*]] = trunc <4 x i32> [[TMP23]] to <4 x i16> ; CHECK-NEXT: [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP24]]) ; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP25]] to i32 -; CHECK-NEXT: [[CMP_N2:%.*]] = icmp eq i32 256, 256 -; CHECK-NEXT: br i1 [[CMP_N2]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i32 256, 256 +; CHECK-NEXT: br i1 [[CMP_N1]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 256, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[TMP12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[TMP12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX7]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX6]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 65535 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load i16, i16* [[GEP]], align 2 @@ -272,12 +266,9 @@ define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) { ; CHECK-NEXT: [[XOR]] = or i32 [[SUM_02]], [[EXT]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[XOR_LCSSA1:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[XOR_LCSSA1]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[XOR_LCSSA]] to i16 ; CHECK-NEXT: ret i16 [[RET]] ; @@ -343,53 +334,49 @@ define float @multiple_fp_rdx(float* %A, i64 %N) { ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.500000e+01, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX3:%.*]] = phi float [ 1.000000e+01, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF6:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC7:%.*]] = sub i64 [[N]], [[N_MOD_VF6]] +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> , float [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> zeroinitializer, float [[BC_MERGE_RDX3]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x float> [ [[TMP9]], [[VEC_EPILOG_PH]] ], [ [[TMP16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x float> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[TMP15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <4 x float> [ [[TMP9]], [[VEC_EPILOG_PH]] ], [ [[TMP16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <4 x float> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[TMP15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15]] = fadd fast <4 x float> [[VEC_PHI11]], [[WIDE_LOAD12]] -; CHECK-NEXT: [[TMP16]] = fmul fast <4 x float> [[VEC_PHI10]], [[WIDE_LOAD12]] -; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC7]] +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15]] = fadd fast <4 x float> [[VEC_PHI9]], [[WIDE_LOAD10]] +; CHECK-NEXT: [[TMP16]] = fmul fast <4 x float> [[VEC_PHI8]], [[WIDE_LOAD10]] +; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP15]]) ; CHECK-NEXT: [[TMP19:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP16]]) -; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[N]], [[N_VEC7]] -; CHECK-NEXT: br i1 [[CMP_N8]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC7]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi float [ 1.500000e+01, [[ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX15:%.*]] = phi float [ 1.000000e+01, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi float [ 1.500000e+01, [[ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi float [ 1.000000e+01, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[PROD:%.*]] = phi float [ [[BC_MERGE_RDX14]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[BC_MERGE_RDX15]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[PROD:%.*]] = phi float [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[BC_MERGE_RDX13]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = fadd fast float [[SUM]], [[TMP20]] ; CHECK-NEXT: [[MUL]] = fmul fast float [[PROD]], [[TMP20]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_LCSSA5:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[MUL_LCSSA4:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA5]], [[FOR_END_LOOPEXIT]] ] -; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[MUL_LCSSA4]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[MUL_LCSSA]], [[ADD_LCSSA]] ; CHECK-NEXT: ret float [[DIV]] ; @@ -456,44 +443,41 @@ define i32 @reduction_phi_start_val(i32* %A, i64 %N) { ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START_SUM]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i32> [ [[TMP8]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ [[TMP8]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13]] = sub <4 x i32> [[VEC_PHI7]], [[WIDE_LOAD8]] -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13]] = sub <4 x i32> [[VEC_PHI6]], [[WIDE_LOAD7]] +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) -; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_COND_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_COND]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[START_SUM]], [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i32 [ [[START_SUM]], [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SUB:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX9]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SUB:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[SUB]] = sub nsw i32 [[SUM]], [[LOAD]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] -; CHECK: for.cond.loopexit: -; CHECK-NEXT: [[SUB_LCSSA2:%.*]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_COND]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: for.cond: -; CHECK-NEXT: [[SUB_LCSSA]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[SUB_LCSSA2]], [[FOR_COND_LOOPEXIT]] ] +; CHECK-NEXT: [[SUB_LCSSA]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1 ; CHECK-NEXT: [[OUTER_EXITCOND_NOT:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[OUTER_EXITCOND_NOT]], label [[FOR_END:%.*]], label [[ITER_CHECK]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll index 5d177833f9ae05..f1a7817d016710 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll @@ -51,30 +51,30 @@ define signext i32 @f1(i32* noalias %A, i32* noalias %B, i32 signext %n) { ; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; VF-TWO-CHECK: vec.epilog.ph: ; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; VF-TWO-CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2 -; VF-TWO-CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF4]] +; VF-TWO-CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2 +; VF-TWO-CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF3]] ; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; VF-TWO-CHECK: vec.epilog.vector.body: -; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF-TWO-CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]] ; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 ; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <2 x i32>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x i32>, <2 x i32>* [[TMP13]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP13]], align 4 ; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP10]] ; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 0 ; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x i32>, <2 x i32>* [[TMP16]], align 4 -; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add nsw <2 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD9]] -; VF-TWO-CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[OFFSET_IDX]], 2 -; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x i32>, <2 x i32>* [[TMP16]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add nsw <2 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD8]] +; VF-TWO-CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 2 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] ; VF-TWO-CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; VF-TWO-CHECK: vec.epilog.middle.block: -; VF-TWO-CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC5]] +; VF-TWO-CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC4]] ; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 -; VF-TWO-CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-TWO-CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: -; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] ; VF-TWO-CHECK: for.body: ; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -85,12 +85,9 @@ define signext i32 @f1(i32* noalias %A, i32* noalias %B, i32 signext %n) { ; VF-TWO-CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; VF-TWO-CHECK: for.end.loopexit.loopexit: -; VF-TWO-CHECK-NEXT: [[ADD_LCSSA3:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; VF-TWO-CHECK: for.end.loopexit: -; VF-TWO-CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA3]], [[FOR_END_LOOPEXIT_LOOPEXIT]] ] +; VF-TWO-CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_END]] ; VF-TWO-CHECK: for.end: ; VF-TWO-CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 1a8999233ea9e3..3f965d7eb09edd 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -80,7 +80,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; CHECK-NEXT: br i1 [[TMP23]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -95,9 +95,7 @@ define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit.loopexit: -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: @@ -244,7 +242,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK-NEXT: br i1 [[TMP35]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] @@ -263,9 +261,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.end.loopexit.loopexit: -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: @@ -377,7 +373,7 @@ define void @f3(i8* noalias %A, i64 %n) { ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -387,9 +383,7 @@ define void @f3(i8* noalias %A, i64 %n) { ; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: for.end.loopexit.loopexit: -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: @@ -440,7 +434,7 @@ define void @f3(i8* noalias %A, i64 %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.middle.block: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.scalar.ph: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_BODY:%.*]] @@ -450,9 +444,7 @@ define void @f3(i8* noalias %A, i64 %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit.loopexit: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_END:%.*]] ; CHECK-PROFITABLE-BY-DEFAULT: for.end: From a61835b1e3f5e074f4c8174edadfc6bef975138f Mon Sep 17 00:00:00 2001 From: Daniil Dudkin Date: Fri, 20 May 2022 12:11:36 +0300 Subject: [PATCH 064/908] [flang] Fix use-associated false-positive error For the program provided as the test case flang fired the following error: error: Semantic errors in main.f90 error: 'foo' is not a procedure This change fixes the error by postponing handling of `UseErrorDetails` from `CharacterizeProcedure` to a later stage. Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D125791 --- flang/lib/Evaluate/characteristics.cpp | 5 ++++ flang/test/Semantics/resolve112.f90 | 32 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 flang/test/Semantics/resolve112.f90 diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp index 95bc5f13270344..2cfdb9d282426e 100644 --- a/flang/lib/Evaluate/characteristics.cpp +++ b/flang/lib/Evaluate/characteristics.cpp @@ -485,6 +485,11 @@ static std::optional CharacterizeProcedure( [&](const semantics::UseDetails &use) { return CharacterizeProcedure(use.symbol(), context, seenProcs); }, + [](const semantics::UseErrorDetails &) { + // Ambiguous use-association will be handled later during symbol + // checks, ignore UseErrorDetails here without actual symbol usage. + return std::optional{}; + }, [&](const semantics::HostAssocDetails &assoc) { return CharacterizeProcedure(assoc.symbol(), context, seenProcs); }, diff --git a/flang/test/Semantics/resolve112.f90 b/flang/test/Semantics/resolve112.f90 new file mode 100644 index 00000000000000..6c45854079ff6f --- /dev/null +++ b/flang/test/Semantics/resolve112.f90 @@ -0,0 +1,32 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +! If there are 2 or more use-associated symbols +! from different modules with the same name, +! the error should be generated only if +! the name is actually used. +module a + contains + function foo() + foo = 42 + end function foo +end module a + +module b + contains + function foo() + foo = 42 + end function foo +end module b + +subroutine without_error + use a + use b +end subroutine without_error + +subroutine with_error + use a + use b + integer :: res + ! ERROR: Reference to 'foo' is ambiguous + res = foo() +end subroutine with_error From 8765ad42cd01717b739a8389584cc8eb9d09a093 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Fri, 13 May 2022 11:04:08 +0100 Subject: [PATCH 065/908] [AArch64][SME][NFC] Add implicit operands for SME instructions in the disassembly. This patch simplifies the switch statement in getInstruction to add implicit operands (register ZA and Immediate equal to zero) in the SME operands when disassembly. The register ZA and the zero immediate can be added by checking the operand in MCInstDesc. Differential Revision: https://reviews.llvm.org/D125534 --- .../lib/Target/AArch64/AArch64InstrFormats.td | 8 +- .../Disassembler/AArch64Disassembler.cpp | 88 +++++++------------ .../Disassembler/AArch64Disassembler.h | 8 +- .../MCTargetDesc/AArch64MCTargetDesc.h | 7 ++ 4 files changed, 50 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 37928aa6fe5dcb..6132e1f42831ac 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1278,8 +1278,12 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>; def VectorIndexSOperand : AsmVectorIndex<0, 3>; def VectorIndexDOperand : AsmVectorIndex<0, 1>; -defm VectorIndex0 : VectorIndex; + } +} defm VectorIndex1 : VectorIndex; defm VectorIndexB : VectorIndex, ImmLeaf { let ParserMatchClass = Imm0_0Operand; let PrintMethod = "printMatrixIndex"; + let OperandNamespace = "AArch64"; + let OperandType = "OPERAND_IMPLICIT_IMM_0"; } def sme_elm_idx0_1 : Operand, ImmLeafget(MI.getOpcode()); + // For Scalable Matrix Extension (SME) instructions that have an implicit - // operand for the accumulator (ZA) which isn't encoded, manually insert - // operand. - case AArch64::LDR_ZA: - case AArch64::STR_ZA: { - MI.insert(MI.begin(), MCOperand::createReg(AArch64::ZA)); - // Spill and fill instructions have a single immediate used for both the - // vector select offset and optional memory offset. Replicate the decoded - // immediate. + // operand for the accumulator (ZA) or implicit immediate zero which isn't + // encoded, manually insert operand. + for (unsigned i = 0; i < Desc.getNumOperands(); i++) { + if (Desc.OpInfo[i].OperandType == MCOI::OPERAND_REGISTER) { + switch (Desc.OpInfo[i].RegClass) { + default: + break; + case AArch64::MPRRegClassID: + MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZA)); + break; + case AArch64::MPR8RegClassID: + MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZAB0)); + break; + } + } else if (Desc.OpInfo[i].OperandType == + AArch64::OPERAND_IMPLICIT_IMM_0) { + MI.insert(MI.begin() + i, MCOperand::createImm(0)); + } + } + + if (MI.getOpcode() == AArch64::LDR_ZA || + MI.getOpcode() == AArch64::STR_ZA) { + // Spill and fill instructions have a single immediate used for both + // the vector select offset and optional memory offset. Replicate + // the decoded immediate. const MCOperand &Imm4Op = MI.getOperand(2); assert(Imm4Op.isImm() && "Unexpected operand type!"); MI.addOperand(Imm4Op); - break; - } - case AArch64::LD1_MXIPXX_H_B: - case AArch64::LD1_MXIPXX_V_B: - case AArch64::ST1_MXIPXX_H_B: - case AArch64::ST1_MXIPXX_V_B: - case AArch64::INSERT_MXIPZ_H_B: - case AArch64::INSERT_MXIPZ_V_B: - // e.g. - // MOVA ZA0.B[, ], /M, .B - // ^ insert implicit 8-bit element tile - MI.insert(MI.begin(), MCOperand::createReg(AArch64::ZAB0)); - break; - case AArch64::EXTRACT_ZPMXI_H_B: - case AArch64::EXTRACT_ZPMXI_V_B: - // MOVA .B, /M, ZA0.B[, ] - // ^ insert implicit 8-bit element tile - MI.insert(MI.begin()+2, MCOperand::createReg(AArch64::ZAB0)); - break; - case AArch64::LD1_MXIPXX_H_Q: - case AArch64::LD1_MXIPXX_V_Q: - case AArch64::ST1_MXIPXX_H_Q: - case AArch64::ST1_MXIPXX_V_Q: - // 128-bit load/store have implicit zero vector index. - MI.insert(MI.begin()+2, MCOperand::createImm(0)); - break; - // 128-bit mova have implicit zero vector index. - case AArch64::INSERT_MXIPZ_H_Q: - case AArch64::INSERT_MXIPZ_V_Q: - MI.insert(MI.begin()+2, MCOperand::createImm(0)); - break; - case AArch64::EXTRACT_ZPMXI_H_Q: - case AArch64::EXTRACT_ZPMXI_V_Q: - MI.addOperand(MCOperand::createImm(0)); - break; - case AArch64::SMOVvi8to32_idx0: - case AArch64::SMOVvi8to64_idx0: - case AArch64::SMOVvi16to32_idx0: - case AArch64::SMOVvi16to64_idx0: - case AArch64::SMOVvi32to64_idx0: - case AArch64::UMOVvi8_idx0: - case AArch64::UMOVvi16_idx0: - case AArch64::UMOVvi32_idx0: - case AArch64::UMOVvi64_idx0: - MI.addOperand(MCOperand::createImm(0)); - break; } if (Result != MCDisassembler::Fail) diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h index 374a89edcb743c..6761d449a7f45f 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h @@ -13,13 +13,17 @@ #define LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H #include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstrInfo.h" namespace llvm { class AArch64Disassembler : public MCDisassembler { + std::unique_ptr const MCII; + public: - AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx) - : MCDisassembler(STI, Ctx) {} + AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII) + : MCDisassembler(STI, Ctx), MCII(MCII) {} ~AArch64Disassembler() override = default; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index ad2dc1027a5eec..049c49796dc656 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" #include @@ -64,6 +65,12 @@ bool isQForm(const MCInst &MI, const MCInstrInfo *MCII); bool isFpOrNEON(const MCInst &MI, const MCInstrInfo *MCII); } +namespace AArch64 { +enum OperandType { + OPERAND_IMPLICIT_IMM_0 = MCOI::OPERAND_FIRST_TARGET, +}; +} // namespace AArch64 + } // End llvm namespace // Defines symbolic names for AArch64 registers. This defines a mapping from From 4cd95104268200659252f03b4bbde6acb9f6306f Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Thu, 12 May 2022 13:19:50 +0100 Subject: [PATCH 066/908] [AArch64][SME]Tied up ZA operand for accumulate instructions This patch updates SMEInstrFormats.td to tie up ZA operand for instructions that accumulate their results into ZA or use part of ZA as input. Depends on: D125534 Differential Revision: https://reviews.llvm.org/D125537 --- llvm/lib/Target/AArch64/SMEInstrFormats.td | 31 ++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 41f2cead4cf8d5..4c78c17dfd1c70 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -17,7 +17,7 @@ class sme_fp_outer_product_inst : I<(outs za_ty:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), + (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", "", []>, Sched<[]> { @@ -34,6 +34,8 @@ class sme_fp_outer_product_inst @@ -53,7 +55,7 @@ class sme_int_outer_product_inst : I<(outs za_ty:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), + (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", "", []>, Sched<[]> { @@ -72,6 +74,8 @@ class sme_int_outer_product_inst opc, string mnemonic> @@ -91,7 +95,7 @@ class sme_int_outer_product_i64 opc, string mnemonic> class sme_outer_product_widening_inst : I<(outs TileOp32:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm), + (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", "", []>, Sched<[]> { @@ -109,6 +113,8 @@ class sme_outer_product_widening_inst let Inst{4} = S; let Inst{3-2} = 0b00; let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; } multiclass sme_bf16_outer_product { @@ -126,7 +132,7 @@ multiclass sme_f16_outer_product { class sme_add_vector_to_tile_inst : I<(outs tile_ty:$ZAda), - (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), + (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn", "", []>, Sched<[]> { bits<3> Pm; @@ -140,6 +146,8 @@ class sme_add_vector_to_tile_inst @@ -429,8 +437,12 @@ class sme_vector_to_tile_inst sz, MatrixTileVectorOperand tile_ty bit is_col, Operand imm_ty, ZPRRegOp zpr_ty, string mnemonic> : sme_vector_to_tile_base; + (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), + mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{ + + let Constraints = "$ZAd = $_ZAd"; +} + multiclass sme_vector_to_tile_aliases sz, ZPRRegOp zpr_ty, MatrixTileVectorOperand tile_ty, bit is_col, Operand imm_ty, string mnemonic> : sme_tile_to_vector_base; + (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), + mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> { + + let Constraints = "$Zd = $_Zd"; +} multiclass sme_tile_to_vector_aliases Date: Fri, 20 May 2022 11:00:34 +0100 Subject: [PATCH 067/908] [ARM] Cost modelling for MVE vector fptoi_sat Building on top of D125665, this adds MVE costs for fptosi.sat and fptoui.sat, providing MVE is available and the types are legal. Differential Revision: https://reviews.llvm.org/D125666 --- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 13 ++- llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll | 98 +++++++++---------- 2 files changed, 60 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 874216303b7235..fcbd9e69450ff1 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1778,14 +1778,23 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32)) return LT.first; + // Equally for MVE vector types + if (ST->hasMVEFloatOps() && + (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) && + LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits()) + return LT.first * ST->getMVEVectorCostFactor(CostKind); + // Otherwise we use a legal convert followed by a min+max if (((ST->hasVFP2Base() && LT.second == MVT::f32) || (ST->hasFP64() && LT.second == MVT::f64) || - (ST->hasFullFP16() && LT.second == MVT::f16)) && + (ST->hasFullFP16() && LT.second == MVT::f16) || + (ST->hasMVEFloatOps() && + (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) && LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) { Type *LegalTy = Type::getIntNTy(ICA.getReturnType()->getContext(), LT.second.getScalarSizeInBits()); - InstructionCost Cost = 1; + InstructionCost Cost = + LT.second.isVector() ? ST->getMVEVectorCostFactor(CostKind) : 1; IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin : Intrinsic::umin, LegalTy, {LegalTy, LegalTy}); diff --git a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll index 8a7784c35599fc..cfa8554a6d81aa 100644 --- a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll @@ -127,14 +127,14 @@ define void @casts() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) @@ -147,14 +147,14 @@ define void @casts() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) @@ -167,14 +167,14 @@ define void @casts() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1104 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 762 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) @@ -187,14 +187,14 @@ define void @casts() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1192 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 290 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4488 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4400 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2698 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) @@ -387,42 +387,42 @@ define void @fp16() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1102 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4484 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) From 122e685878991b63e414733094e9876e715d08e0 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Thu, 19 May 2022 16:13:51 +0200 Subject: [PATCH 068/908] [mlir] do not elide dialect prefix for ops with dots in the name For the hypothetical "a.b.c" op printed within a region that declares "a" as the default dialect, MLIR would currently elide the "a." prefix and only print "b.c". However, this becomes ambiguous while parsing as "b.c" may be exist as the "c" op in the "b" dialect. If it does not, the parsing currently fails. Do not elide the default dialect if the op name contains further dots to avoid the ambiguity. See https://discourse.llvm.org/t/dropping-dialect-prefix-for-ops-with-multiple-dots-in-the-name/62562 Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D125975 --- mlir/lib/IR/AsmPrinter.cpp | 5 ++++- mlir/lib/IR/Operation.cpp | 5 +++-- mlir/test/IR/parser.mlir | 8 ++++++++ mlir/test/lib/Dialect/Test/TestDialect.cpp | 5 +++++ mlir/test/lib/Dialect/Test/TestOps.td | 6 ++++++ 5 files changed, 26 insertions(+), 3 deletions(-) diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 08360554943104..62aab6dd9306d2 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -2718,7 +2718,10 @@ void OperationPrinter::printOperation(Operation *op) { if (auto opPrinter = dialect->getOperationPrinter(op)) { // Print the op name first. StringRef name = op->getName().getStringRef(); - name.consume_front((defaultDialectStack.back() + ".").str()); + // Only drop the default dialect prefix when it cannot lead to + // ambiguities. + if (name.count('.') == 1) + name.consume_front((defaultDialectStack.back() + ".").str()); printEscapedString(name, os); // Print the rest of the op now. opPrinter(op, *this); diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index d7d59055479ced..d529385c25e160 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -624,11 +624,12 @@ void OpState::print(Operation *op, OpAsmPrinter &p, StringRef defaultDialect) { } } -/// Print an operation name, eliding the dialect prefix if necessary. +/// Print an operation name, eliding the dialect prefix if necessary and doesn't +/// lead to ambiguities. void OpState::printOpName(Operation *op, OpAsmPrinter &p, StringRef defaultDialect) { StringRef name = op->getName().getStringRef(); - if (name.startswith((defaultDialect + ".").str())) + if (name.startswith((defaultDialect + ".").str()) && name.count('.') == 1) name = name.drop_front(defaultDialect.size() + 1); p.getStream() << name; } diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir index 4187ae1ffc5d70..a28e63d5e498d4 100644 --- a/mlir/test/IR/parser.mlir +++ b/mlir/test/IR/parser.mlir @@ -1282,6 +1282,14 @@ func.func @default_dialect(%bool : i1) { // example. // CHECK: "test.op_with_attr"() {test.attr = "test.value"} : () -> () "test.op_with_attr"() {test.attr = "test.value"} : () -> () + // Verify that the prefix is not stripped when it can lead to ambiguity. + // CHECK: test.op.with_dot_in_name + test.op.with_dot_in_name + // This is an unregistered operation, the printing/parsing is handled by the + // dialect, and the dialect prefix should not be stripped while printing + // because of potential ambiguity. + // CHECK: test.dialect_custom_printer.with.dot + test.dialect_custom_printer.with.dot "test.terminator"() : ()->() } // The same operation outside of the region does not have an func. prefix. diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index c0948020cf1c07..24e5bc61236034 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -371,6 +371,11 @@ TestDialect::getParseOperationHook(StringRef opName) const { return parser.parseKeyword("custom_format_fallback"); }}; } + if (opName == "test.dialect_custom_printer.with.dot") { + return ParseOpHook{[](OpAsmParser &parser, OperationState &state) { + return ParseResult::success(); + }}; + } return None; } diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 7826f842bcba8f..0fd0566803df6a 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -810,6 +810,12 @@ def DefaultDialectOp : TEST_Op<"default_dialect", [OpAsmOpInterface]> { let assemblyFormat = "regions attr-dict-with-keyword"; } +// This is used to test that the default dialect is not elided when printing an +// op with dots in the name to avoid parsing ambiguity. +def OpWithDotInNameOp : TEST_Op<"op.with_dot_in_name"> { + let assemblyFormat = "attr-dict"; +} + // This is used to test the OpAsmOpInterface::getAsmBlockName() feature: // blocks nested in a region under this op will have a name defined by the // interface. From f598dfb3bf8cc98ef9c1a5c3ddb82bdee30b9988 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Fri, 20 May 2022 13:58:54 +0300 Subject: [PATCH 069/908] [AMDGPU][MC][GFX8+] Correct SMEM offset parsing Differential Revision: https://reviews.llvm.org/D125907 --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 3 ++- llvm/test/MC/AMDGPU/gfx10_asm_smem.s | 6 ++++++ llvm/test/MC/AMDGPU/gfx8_asm_smem.s | 3 +++ llvm/test/MC/AMDGPU/gfx9_asm_smem.s | 3 +++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index e401f03e80d93c..e3cddd64a4a583 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -7736,7 +7736,8 @@ bool AMDGPUOperand::isSMRDOffset8() const { } bool AMDGPUOperand::isSMEMOffset() const { - return isImm(); // Offset range is checked later by validator. + return isImmTy(ImmTyNone) || + isImmTy(ImmTyOffset); // Offset range is checked later by validator. } bool AMDGPUOperand::isSMRDLiteralOffset() const { diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s index 1a24725acb74a6..73559b450f4485 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s @@ -1342,3 +1342,9 @@ s_buffer_atomic_umin s5, s[4:7], s2 dlc s_buffer_atomic_xor s5, s[4:7], s2 dlc // GFX10: encoding: [0x42,0x41,0x28,0xf5,0x00,0x00,0x00,0x04] + +s_load_dword s1, s[2:3] glc +// GFX10: s_load_dword s1, s[2:3], 0x0 glc ; encoding: [0x41,0x00,0x01,0xf4,0x00,0x00,0x00,0xfa] + +s_load_dword s1, s[2:3] dlc +// GFX10: s_load_dword s1, s[2:3], 0x0 dlc ; encoding: [0x41,0x40,0x00,0xf4,0x00,0x00,0x00,0xfa] diff --git a/llvm/test/MC/AMDGPU/gfx8_asm_smem.s b/llvm/test/MC/AMDGPU/gfx8_asm_smem.s index a39729b4b323df..5cdee0f8858c9f 100644 --- a/llvm/test/MC/AMDGPU/gfx8_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx8_asm_smem.s @@ -1163,3 +1163,6 @@ s_atc_probe_buffer 0x0, s[8:11], m0 s_atc_probe_buffer 0x0, s[8:11], 0x7ffff // CHECK: [0x04,0x00,0x9e,0xc0,0xff,0xff,0x07,0x00] + +s_load_dword s1, s[2:3] glc +// CHECK: s_load_dword s1, s[2:3], 0x0 glc ; encoding: [0x41,0x00,0x03,0xc0,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s index 1989062e3a509d..d61dac88f457e3 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s @@ -4475,3 +4475,6 @@ s_atomic_dec_x2 s[10:11], s[2:3], 0x0 s_atomic_dec_x2 s[10:11], s[2:3], s0 glc // CHECK: [0x81,0x02,0xb1,0xc2,0x00,0x00,0x00,0x00] + +s_load_dword s1, s[2:3] glc +// CHECK: s_load_dword s1, s[2:3], 0x0 glc ; encoding: [0x41,0x00,0x03,0xc0,0x00,0x00,0x00,0x00] From d5999bd3f7528db844f95479d6a66be4ac6c79c6 Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Fri, 20 May 2022 18:26:13 +0800 Subject: [PATCH 070/908] [X86][AMX][NFC] Refactor X86LowerAMXCast.cpp Change static function to X86LowerAMXCast member function. Differential Revision: https://reviews.llvm.org/D126058 --- llvm/lib/Target/X86/X86LowerAMXType.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp index 81f258dc9659e2..540182cb791182 100644 --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -703,6 +703,9 @@ class X86LowerAMXCast { public: X86LowerAMXCast(Function &F) : Func(F) {} + void combineCastStore(IntrinsicInst *Cast, StoreInst *ST); + void combineLoadCast(IntrinsicInst *Cast, LoadInst *LD); + bool combineLdSt(SmallVectorImpl &Casts); bool combineAMXcast(TargetLibraryInfo *TLI); bool transformAMXCast(IntrinsicInst *AMXCast); bool transformAllAMXCast(); @@ -913,7 +916,7 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi( // --> // call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %p, // i64 64, x86_amx %42) -static void combineCastStore(IntrinsicInst *Cast, StoreInst *ST) { +void X86LowerAMXCast::combineCastStore(IntrinsicInst *Cast, StoreInst *ST) { Value *Tile = Cast->getOperand(0); // TODO: If it is cast intrinsic or phi node, we can propagate the // shape information through def-use chain. @@ -939,7 +942,7 @@ static void combineCastStore(IntrinsicInst *Cast, StoreInst *ST) { // --> // %66 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, // i8* %p, i64 64) -static void combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) { +void X86LowerAMXCast::combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) { Value *Row = nullptr, *Col = nullptr; Use &U = *(Cast->use_begin()); unsigned OpNo = U.getOperandNo(); @@ -961,7 +964,7 @@ static void combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) { Cast->replaceAllUsesWith(NewInst); } -static bool combineLdSt(SmallVectorImpl &Casts) { +bool X86LowerAMXCast::combineLdSt(SmallVectorImpl &Casts) { bool Change = false; for (auto *Cast : Casts) { auto *II = cast(Cast); From 5deca650fdba22a20e24b397e905ad0abba6f9a9 Mon Sep 17 00:00:00 2001 From: Alexey Katranov Date: Fri, 20 May 2022 11:03:54 +0200 Subject: [PATCH 071/908] tsan: add lock free stack pattern test Add a set of tests that iterate over possible combinations of memory orders for lock free stack implementation. Reviewed By: dvyukov Differential Revision: https://reviews.llvm.org/D110552 --- compiler-rt/test/tsan/lock_free_stack.cpp | 247 ++++++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100755 compiler-rt/test/tsan/lock_free_stack.cpp diff --git a/compiler-rt/test/tsan/lock_free_stack.cpp b/compiler-rt/test/tsan/lock_free_stack.cpp new file mode 100755 index 00000000000000..5477adcfdf9767 --- /dev/null +++ b/compiler-rt/test/tsan/lock_free_stack.cpp @@ -0,0 +1,247 @@ +// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -DRACE -o %t && %deflake %run %t | FileCheck %s --check-prefix=CHECK-RACE +#include "test.h" + +const int kThreadCount = 4; +#if RACE +const int kTestCount = 16; +#else +const int kTestCount = 9; +#endif + +template F for_each_mo(int mo, F f) { + f(mo); + return f; +} + +template +auto for_each_mo(int mo, Rest... rest) -> decltype(for_each_mo(rest...)) { + auto f = for_each_mo(rest...); + f(mo); + return f; +} + +void LockFreeStackImpl(int test, bool main_thread, int mo2, int mo4) { + struct Node { + int data; + Node *next; + }; + static Node *heads[kTestCount]{}; + auto head = heads + test; + + auto concurrent_push = [head](Node *new_head, int mo1, int mo2, int mo3) { + auto expected = __atomic_load_n(head, mo1); + do { + new_head->next = expected; + } while (!__atomic_compare_exchange_n(head, &expected, new_head, + /*weak*/ true, mo2, mo3)); + }; + + auto concurrent_grab_all = [head](int mo4) { + volatile int sink{}; + (void)sink; + + auto h = __atomic_exchange_n(head, nullptr, mo4); + while (h) { + sink = ++h->data; + auto next = h->next; + delete h; + h = next; + } + }; + + if (main_thread) { + concurrent_grab_all(mo4); + } else { + int i = 0; + // We have 15 combinations of mo1 and mo3. Since we have two race reports + // for each combination (the first report is for 'data' and the second + // report for 'next'), there are 30 race reports in total that should match + // to "CHECK-RACE-COUNT{-number_of_reports}" below + for_each_mo( + __ATOMIC_RELAXED, __ATOMIC_ACQUIRE, __ATOMIC_SEQ_CST, [&](int mo1) { + for_each_mo(__ATOMIC_RELAXED, __ATOMIC_ACQUIRE, __ATOMIC_RELEASE, + __ATOMIC_ACQ_REL, __ATOMIC_SEQ_CST, [&](int mo3) { + concurrent_push(new Node{i++}, mo1, mo2, mo3); + }); + }); + } +} + +void LockFreeStack(int test, bool main_thread, int mo2, int mo4) { + barrier_wait(&barrier); + if (main_thread) { + // We need to call LockFreeStackImpl second time after the barrier + // to guarantee at least one grab_all and cleanup. + // However, it is better to have one instantiation of LockFreeStackImpl + // on the main thread to merge the call stacks and prevent double race + // reports. Therefore, we use two interation for loop and skip the barrier + // on the second iteration. + for (int i = 0; i < 2; ++i) { + LockFreeStackImpl(test, main_thread, mo2, mo4); + if (i == 0) { + barrier_wait(&barrier); + } + } + } else { + LockFreeStackImpl(test, main_thread, mo2, mo4); + barrier_wait(&barrier); + } +} + +void Test(bool main_thread) { + for (int test = 0; test < kTestCount; test++) { + if (main_thread) { + fprintf(stderr, "Test %d\n", test); + } + switch (test) { +#if RACE + case 0: + LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + break; + case 1: + LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_ACQUIRE); + break; + case 2: + LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_RELEASE); + break; + case 3: + LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_ACQ_REL); + break; + case 4: + LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST); + break; + case 5: + LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + break; + case 6: + LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); + break; + case 7: + LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_RELEASE); + break; + case 8: + LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_ACQ_REL); + break; + case 9: + LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_SEQ_CST); + break; + case 10: + LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_RELAXED); + break; + case 11: + LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_RELEASE); + break; + case 12: + LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED); + break; + case 13: + LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_RELEASE); + break; + case 14: + LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); + break; + case 15: + LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_RELEASE); + break; +#else + case 0: + LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE); + break; + case 1: + LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_ACQ_REL); + break; + case 2: + LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_SEQ_CST); + break; + case 3: + LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); + break; + case 4: + LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_ACQ_REL); + break; + case 5: + LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_SEQ_CST); + break; + case 6: + LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); + break; + case 7: + LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL); + break; + case 8: + LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + break; +#endif + } + } +} + +void *Thread(void *p) { + Test(false); + return 0; +} + +int main() { + barrier_init(&barrier, kThreadCount); + pthread_t t[kThreadCount - 1]; + for (int i = 0; i < kThreadCount - 1; ++i) + pthread_create(t + i, 0, Thread, (void *)(uintptr_t)(i + 1)); + Test(true); + for (int i = 0; i < kThreadCount - 1; ++i) + pthread_join(t[i], 0); +} + +// No race tests +// CHECK-NOT: ThreadSanitizer: data race + +// Race tests +// 30 is the number of race reports for 15 possible combinations of mo1 and mo3 +// CHECK-RACE: Test 0 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 1 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 2 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 3 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 4 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 5 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 6 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 7 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 8 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 9 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 10 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 11 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 12 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 13 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 14 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE: Test 15 +// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race +// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race From a71a4485b67a31fb48b4b90c7a8cc19119650df3 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 20 May 2022 12:19:59 +0100 Subject: [PATCH 072/908] [AMDGPU] Add a test case for an SIFoldOperands bug --- llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll diff --git a/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll b/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll new file mode 100644 index 00000000000000..bfa6f0c2598f61 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX10 + +declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) +declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) + +; FIXME: This instruction uses two different literal constants which is not +; allowed. +; GFX10-LABEL: _amdgpu_ps_main: +; GFX10: v_fmaak_f32 {{v[0-9]+}}, 0x40490fdb, {{v[0-9]+}}, 0xbfc90fdb +define amdgpu_ps void @_amdgpu_ps_main(float %arg) { +bb: + %i = fmul reassoc nnan nsz arcp contract afn float %arg, 0x400921FB60000000 + %i1 = fadd reassoc nnan nsz arcp contract afn float %i, 0xBFF921FB60000000 + %i2 = fmul reassoc nnan nsz arcp contract afn float %i1, %arg + br label %bb3 + +bb3: + br label %bb4 + +bb4: + %i5 = fadd reassoc nnan nsz arcp contract afn float 0x400921FB60000000, %i2 + br label %bb6 + +bb6: + %i7 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %i5, float 0.000000e+00) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> zeroinitializer, <2 x half> %i7, i1 false, i1 false) + ret void +} From 52f2d057235f08cad3c3788eddcd2bf9567aa29d Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Fri, 20 May 2022 14:00:53 +0200 Subject: [PATCH 073/908] Minutes for pauth sync-ups have moved to Discourse. --- llvm/docs/GettingInvolved.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 33537159038aba..05253154c8f349 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -204,7 +204,7 @@ writing, the following sync-ups are organized: * - LLVM Pointer Authentication - Every month on Mondays - `ics `__ - - `Minutes/docs `__ + - `Minutes/docs `__ * - MemorySSA in LLVM - Every 8 weeks on Mondays - `ics `__ From a351070710f51cc41b181130080ac9da514989d7 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 19 May 2022 10:58:17 +0100 Subject: [PATCH 074/908] [RISCV] Add a test showing overlapping stack offsets with RVV This test (and its forthcoming fix) was split off from D125787. It shows that the logic we use to determine when we need to add extra RVV padding is insufficient. In this example, we may have a situation involving dynamic stack alignment -- but no variable-sized objects -- where we have no FP but must still use SP to index objects. In this case we also need the extra RVV padding, otherwise objects may overlap. Specifically, the test shows that the RVV vector object may clobber the lowest callee-save. |------------------------------| -- <-- Incoming SP | 4-byte callee-save (ra) | |------------------------------| -- <-- SP + VLENB*2 + 60 | 4-byte callee-save (s0) | |------------------------------| -- <-- SP + VLENB*2 + 56 -- | 4-byte callee-save (s9) | | |------------------------------| -- <-- SP + VLENB*2 + 52 | RVV object(!!) | VLENB*2 RVV object | | |------------------------------| -- <-- SP + 56 -- | 4-byte local object | |------------------------------| -- <-- SP + 32 | Dead area | |------------------------------| -- <-- InSP - 2*VLENB - 64 | Possibly-zero realignment | |------------------------------| -- <-- SP (realigned to 32) This diagram should help show that when SP==InSP -- e.g., when the incoming SP is 32-byte aligned, subtracting 2*VLENB+64 may keep it that way -- the RVV object clobbers the spill of s9. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D125962 --- .../RISCV/rvv/wrong-stack-slot-rv32.mir | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir index 21fef81e60d9e4..bf5a5d0954aaea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=riscv32 -mattr=+m,+v -o - %s \ # RUN: -start-before=prologepilog | FileCheck %s # -# This test checks that we are assigning the right stack slot to GPRs and to +# These tests check that we are assigning the right stack slot to GPRs and to # vector registers (VRs). If this test changes, make sure there is no overlap # between slots for GPRs and VRs. --- | @@ -27,6 +27,34 @@ ret void } + ; FIXME: If the stack realignment does nothing to sp (a possibility) then + ; the vlenb*2-sized RVV stack object at sp+56 overlaps with the slot + ; allocated to spilling s9 (sp+52+vlenb*2) + define void @rvv_clobbers_callee_save() #0 { + ; CHECK-LABEL: rvv_clobbers_callee_save: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: addi sp, sp, -64 + ; CHECK-NEXT: sw ra, 60(sp) # 4-byte Folded Spill + ; CHECK-NEXT: sw s0, 56(sp) # 4-byte Folded Spill + ; CHECK-NEXT: sw s9, 52(sp) # 4-byte Folded Spill + ; CHECK-NEXT: addi s0, sp, 64 + ; CHECK-NEXT: csrr a1, vlenb + ; CHECK-NEXT: slli a1, a1, 1 + ; CHECK-NEXT: sub sp, sp, a1 + ; CHECK-NEXT: andi sp, sp, -32 + ; CHECK-NEXT: sw a0, 32(sp) # 4-byte Folded Spill + ; CHECK-NEXT: addi a0, sp, 56 + ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill + ; CHECK-NEXT: addi sp, s0, -64 + ; CHECK-NEXT: lw ra, 60(sp) # 4-byte Folded Reload + ; CHECK-NEXT: lw s0, 56(sp) # 4-byte Folded Reload + ; CHECK-NEXT: lw s9, 52(sp) # 4-byte Folded Reload + ; CHECK-NEXT: addi sp, sp, 64 + ; CHECK-NEXT: ret + entry: + ret void + } + attributes #0 = { nounwind } ... --- @@ -48,3 +76,22 @@ body: | PseudoRET ... +--- +name: rvv_clobbers_callee_save +alignment: 2 +frameInfo: + maxAlignment: 8 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 32 } + - { id: 1, type: spill-slot, size: 16, alignment: 8, stack-id: scalable-vector } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10, $v30m2 + + $x25 = COPY $x10 + SW renamable $x25, %stack.0, 0 :: (store (s32) into %stack.0) + PseudoVSPILL_M2 renamable $v30m2, %stack.1 :: (store unknown-size into %stack.1, align 8) + PseudoRET + +... From d60ae47f9dabcbc42b894f988b51d4643447b599 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 19 May 2022 11:03:36 +0100 Subject: [PATCH 075/908] [RISCV] Fix logic for determining RVV stack padding We must add padding when using SP or BP to access stack objects. Checking whether we're missing FP is not sufficient as stack realignment uses SP too. The test in D125962 explains the specific issue in more detail. Split from D125787. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D125964 --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 8 ++++--- .../RISCV/rvv/wrong-stack-slot-rv32.mir | 23 ++++++++----------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 797a3515f9d9f0..122ab22aff8fd6 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -942,9 +942,11 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( } RVFI->setCalleeSavedStackSize(Size); - // Padding required to keep the RVV stack aligned to 8 bytes - // within the main stack. We only need this when not using FP. - if (RVVStackSize && !hasFP(MF) && Size % 8 != 0) { + // Padding required to keep the RVV stack aligned to 8 bytes within the main + // stack. We only need this when using SP or BP to access stack objects. + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + if (RVVStackSize && (!hasFP(MF) || TRI->hasStackRealignment(MF)) && + Size % 8 != 0) { // Because we add the padding to the size of the stack, adding // getStackAlign() will keep it aligned. RVFI->setRVVPadding(getStackAlign().value()); diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir index bf5a5d0954aaea..202612ca147f19 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir @@ -27,17 +27,14 @@ ret void } - ; FIXME: If the stack realignment does nothing to sp (a possibility) then - ; the vlenb*2-sized RVV stack object at sp+56 overlaps with the slot - ; allocated to spilling s9 (sp+52+vlenb*2) define void @rvv_clobbers_callee_save() #0 { ; CHECK-LABEL: rvv_clobbers_callee_save: ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: addi sp, sp, -64 - ; CHECK-NEXT: sw ra, 60(sp) # 4-byte Folded Spill - ; CHECK-NEXT: sw s0, 56(sp) # 4-byte Folded Spill - ; CHECK-NEXT: sw s9, 52(sp) # 4-byte Folded Spill - ; CHECK-NEXT: addi s0, sp, 64 + ; CHECK-NEXT: addi sp, sp, -80 + ; CHECK-NEXT: sw ra, 76(sp) # 4-byte Folded Spill + ; CHECK-NEXT: sw s0, 72(sp) # 4-byte Folded Spill + ; CHECK-NEXT: sw s9, 68(sp) # 4-byte Folded Spill + ; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 @@ -45,11 +42,11 @@ ; CHECK-NEXT: sw a0, 32(sp) # 4-byte Folded Spill ; CHECK-NEXT: addi a0, sp, 56 ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill - ; CHECK-NEXT: addi sp, s0, -64 - ; CHECK-NEXT: lw ra, 60(sp) # 4-byte Folded Reload - ; CHECK-NEXT: lw s0, 56(sp) # 4-byte Folded Reload - ; CHECK-NEXT: lw s9, 52(sp) # 4-byte Folded Reload - ; CHECK-NEXT: addi sp, sp, 64 + ; CHECK-NEXT: addi sp, s0, -80 + ; CHECK-NEXT: lw ra, 76(sp) # 4-byte Folded Reload + ; CHECK-NEXT: lw s0, 72(sp) # 4-byte Folded Reload + ; CHECK-NEXT: lw s9, 68(sp) # 4-byte Folded Reload + ; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret entry: ret void From 534ea8bca51d9e2673a7170c34f1d91b17fabda2 Mon Sep 17 00:00:00 2001 From: Rahul Anand R Date: Fri, 20 May 2022 13:41:32 +0100 Subject: [PATCH 076/908] [AArch64] Generate AND in place of CSEL for predicated CTTZ This patch implements a for a target specific optimization that replaces the cmp and csel from cttz with an and mask. Recommitted with a fix for truncated value sizes. Differential Revision: https://reviews.llvm.org/D123782 --- .../Target/AArch64/AArch64ISelLowering.cpp | 46 +++++ .../CodeGen/AArch64/fold-csel-cttz-and.ll | 160 ++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 503ac7cb359eae..e3fa268732b308 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17691,6 +17691,47 @@ static SDValue performBRCONDCombine(SDNode *N, return SDValue(); } +static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG) { + unsigned CC = N->getConstantOperandVal(2); + SDValue SUBS = N->getOperand(3); + SDValue Zero, CTTZ; + + if (CC == AArch64CC::EQ && SUBS.getOpcode() == AArch64ISD::SUBS) { + Zero = N->getOperand(0); + CTTZ = N->getOperand(1); + } else if (CC == AArch64CC::NE && SUBS.getOpcode() == AArch64ISD::SUBS) { + Zero = N->getOperand(1); + CTTZ = N->getOperand(0); + } else + return SDValue(); + + if ((CTTZ.getOpcode() != ISD::CTTZ && CTTZ.getOpcode() != ISD::TRUNCATE) || + (CTTZ.getOpcode() == ISD::TRUNCATE && + CTTZ.getOperand(0).getOpcode() != ISD::CTTZ)) + return SDValue(); + + assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) && + "Illegal type in CTTZ folding"); + + if (!isNullConstant(Zero) || !isNullConstant(SUBS.getOperand(1))) + return SDValue(); + + SDValue X = CTTZ.getOpcode() == ISD::TRUNCATE + ? CTTZ.getOperand(0).getOperand(0) + : CTTZ.getOperand(0); + + if (X != SUBS.getOperand(0)) + return SDValue(); + + unsigned BitWidth = CTTZ.getOpcode() == ISD::TRUNCATE + ? CTTZ.getOperand(0).getValueSizeInBits() + : CTTZ.getValueSizeInBits(); + SDValue BitWidthMinusOne = + DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType()); + return DAG.getNode(ISD::AND, SDLoc(N), CTTZ.getValueType(), CTTZ, + BitWidthMinusOne); +} + // Optimize CSEL instructions static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -17699,6 +17740,11 @@ static SDValue performCSELCombine(SDNode *N, if (N->getOperand(0) == N->getOperand(1)) return N->getOperand(0); + // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1 + // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1 + if (SDValue Folded = foldCSELofCTTZ(N, DAG)) + return Folded; + return performCONDCombine(N, DCI, DAG, 2, 3); } diff --git a/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll b/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll new file mode 100644 index 00000000000000..89d1d0b1f1d4da --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +;; Check the transformation +;; CSEL 0, cttz, cc -> AND cttz numbits-1 +;; for cttz in the case of i32 and i64 respectively + +;; Cases for which the optimzation takes place +define i32 @cttzi32(i32 %x) { +; CHECK-LABEL: cttzi32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rbit w8, w0 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: and w0, w8, #0x1f +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + +define i64 @cttzi64(i64 %x) { +; CHECK-LABEL: cttzi64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rbit x8, x0 +; CHECK-NEXT: clz x8, x8 +; CHECK-NEXT: and x0, x8, #0x3f +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) + %1 = icmp eq i64 %x, 0 + %2 = select i1 %1, i64 0, i64 %0 + ret i64 %2 +} + +define i32 @cttzi32ne(i32 %x) { +; CHECK-LABEL: cttzi32ne: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rbit w8, w0 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: and w0, w8, #0x1f +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp ne i32 %x, 0 + %2 = select i1 %1, i32 %0, i32 0 + ret i32 %2 +} + +define i64 @cttzi64ne(i64 %x) { +; CHECK-LABEL: cttzi64ne: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rbit x8, x0 +; CHECK-NEXT: clz x8, x8 +; CHECK-NEXT: and x0, x8, #0x3f +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) + %1 = icmp ne i64 %x, 0 + %2 = select i1 %1, i64 %0, i64 0 + ret i64 %2 +} + +define i32 @cttztrunc(i64 %x) { +; CHECK-LABEL: cttztrunc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rbit x8, x0 +; CHECK-NEXT: clz x8, x8 +; CHECK-NEXT: and w0, w8, #0x3f +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) + %1 = icmp eq i64 %x, 0 + %2 = select i1 %1, i64 0, i64 %0 + %3 = trunc i64 %2 to i32 + ret i32 %3 +} + +;; Cases for which the optimization does not take place +define i32 @cttzne(i32 %x) { +; CHECK-LABEL: cttzne: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rbit w8, w0 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: csel w0, wzr, w8, ne +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp ne i32 %x, 0 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + +define i32 @cttzxnot0(i32 %x) { +; CHECK-LABEL: cttzxnot0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rbit w8, w0 +; CHECK-NEXT: cmp w0, #10 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 10 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + +define i32 @cttzlhsnot0(i32 %x) { +; CHECK-LABEL: cttzlhsnot0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rbit w9, w0 +; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: clz w9, w9 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csel w0, w8, w9, eq +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %2 = select i1 %1, i32 10, i32 %0 + ret i32 %2 +} + +define i32 @notcttz(i32 %x) { +; CHECK-LABEL: notcttz: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: clz w8, w0 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + +define i32 @cttzlhsnotx(i32 %x, i32 %y) { +; CHECK-LABEL: cttzlhsnotx: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rbit w8, w0 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %y, 0 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + +declare i32 @llvm.cttz.i32(i32, i1) + +declare i64 @llvm.cttz.i64(i64, i1) + +declare i32 @llvm.ctlz.i32(i32, i1) From 5b0022a9df3f3a35661f71b23a66c667e8501c32 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 16 May 2022 14:46:09 +0200 Subject: [PATCH 077/908] [clangd] Support UnresolvedUsingTypeLoc AST node in FindTarget. to make features like hover, go-to-def work when the cursor is on the UnresolvedUsingTypeLoc. Differential Revision: https://reviews.llvm.org/D125684 --- clang-tools-extra/clangd/FindTarget.cpp | 12 +++++++----- .../clangd/unittests/FindTargetTests.cpp | 11 +++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index dcdac55c26b706..40859158905130 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -127,11 +127,6 @@ bool shouldSkipTypedef(const TypedefNameDecl *TD) { // template using pvec = vector; pvec x; // There's no Decl `pvec`, we must choose `pvec` or `vector` // and both are lossy. We must know upfront what the caller ultimately wants. -// -// FIXME: improve common dependent scope using name lookup in primary templates. -// We currently handle several dependent constructs, but some others remain to -// be handled: -// - UnresolvedUsingTypenameDecl struct TargetFinder { using RelSet = DeclRelationSet; using Rel = DeclRelation; @@ -207,6 +202,10 @@ struct TargetFinder { } } Flags |= Rel::Alias; // continue with the alias + } else if (isa(D)) { + // FIXME: improve common dependent scope using name lookup in primary + // templates. + Flags |= Rel::Alias; } else if (const UsingShadowDecl *USD = dyn_cast(D)) { // Include the Introducing decl, but don't traverse it. This may end up // including *all* shadows, which we don't want. @@ -382,6 +381,9 @@ struct TargetFinder { // TypeLoc never has a deduced type. https://llvm.org/PR42914 Outer.add(DT->getDeducedType(), Flags); } + void VisitUnresolvedUsingType(const UnresolvedUsingType *UUT) { + Outer.add(UUT->getDecl(), Flags); + } void VisitDeducedTemplateSpecializationType( const DeducedTemplateSpecializationType *DTST) { if (const auto *USD = DTST->getTemplateName().getAsUsingShadowDecl()) diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index f7d547bd960cdb..51eae572ed904e 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -268,6 +268,17 @@ TEST_F(TargetDeclTest, UsingDecl) { EXPECT_DECLS("DeducedTemplateSpecializationTypeLoc", {"using ns::S", Rel::Alias}, {"template class S"}, {"class S", Rel::TemplatePattern}); + + Code = R"cpp( + template + class Foo { public: class foo {}; }; + template class A : public Foo { + using typename Foo::foo; + [[foo]] abc; + }; + )cpp"; + EXPECT_DECLS("UnresolvedUsingTypeLoc", + {"using typename Foo::foo", Rel::Alias}); } TEST_F(TargetDeclTest, BaseSpecifier) { From 4e271fc49517362a9333371fb1ab7e865d4c1b0e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 20 May 2022 05:31:03 -0700 Subject: [PATCH 078/908] [SLP][NFC]Use SmallPtrSet to avoid n*m complexity, NFC. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 18a776436c78b5..e366044353b1f9 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3192,7 +3192,7 @@ class BoUpSLP { void scheduleBlock(BlockScheduling *BS); /// List of users to ignore during scheduling and that don't need extracting. - ArrayRef UserIgnoreList; + SmallPtrSet UserIgnoreList; /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of /// sorted SmallVectors of unsigned. @@ -4119,7 +4119,7 @@ void BoUpSLP::buildExternalUses( } // Ignore users in the user ignore list. - if (is_contained(UserIgnoreList, UserInst)) + if (UserIgnoreList.contains(UserInst)) continue; LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " @@ -4278,7 +4278,8 @@ BoUpSLP::findExternalStoreUsersReorderIndices(TreeEntry *TE) const { void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { deleteTree(); - UserIgnoreList = UserIgnoreLst; + UserIgnoreList.clear(); + UserIgnoreList.insert(UserIgnoreLst.begin(), UserIgnoreLst.end()); if (!allSameType(Roots)) return; buildTree_rec(Roots, 0, EdgeInfo()); @@ -4638,7 +4639,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // The reduction nodes (stored in UserIgnoreList) also should stay scalar. for (Value *V : VL) { - if (is_contained(UserIgnoreList, V)) { + if (UserIgnoreList.contains(V)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); if (TryToFindDuplicates(S)) newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, @@ -8260,7 +8261,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); // It is legal to delete users in the ignorelist. - assert((getTreeEntry(U) || is_contained(UserIgnoreList, U) || + assert((getTreeEntry(U) || UserIgnoreList.contains(U) || (isa_and_nonnull(U) && isDeleted(cast(U)))) && "Deleting out-of-tree value"); From fc9c59c355cb255446e571b4515b5e41a76503c4 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 4 Aug 2021 10:58:37 -0700 Subject: [PATCH 079/908] [SLP]Do not emit extract elements for insertelements users, replace with shuffles directly. SLP vectorizer emits extracts for externally used vectorized scalars and estimates the cost for each such extract. But in many cases these scalars are input for insertelement instructions, forming buildvector, and instead of extractelement/insertelement pair we can emit/cost estimate shuffle(s) cost and generate series of shuffles, which can be further optimized. Tested using test-suite (+SPEC2017), the tests passed, SLP was able to generate/vectorize more instructions in many cases and it allowed to reduce number of re-vectorization attempts (where we could try to vectorize buildector insertelements again and again). Differential Revision: https://reviews.llvm.org/D107966 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 307 ++++++++++++++++++ .../SLPVectorizer/AArch64/loadorder.ll | 8 +- .../AArch64/transpose-inseltpoison.ll | 2 +- .../SLPVectorizer/AArch64/transpose.ll | 2 +- .../SLPVectorizer/AArch64/tsc-s116.ll | 18 +- .../X86/buildvector-same-lane-insert.ll | 19 +- .../SLPVectorizer/X86/buildvector-shuffle.ll | 17 +- .../SLPVectorizer/X86/cmp-as-alternate-ops.ll | 16 +- .../SLPVectorizer/X86/crash_7zip.ll | 14 +- .../SLPVectorizer/X86/crash_bullet3.ll | 10 +- .../SLPVectorizer/X86/crash_cmpop.ll | 2 +- .../X86/crash_exceed_scheduling.ll | 2 +- .../SLPVectorizer/X86/crash_lencod.ll | 9 +- .../X86/crash_scheduling-inseltpoison.ll | 13 +- .../SLPVectorizer/X86/crash_scheduling.ll | 13 +- .../SLPVectorizer/X86/crash_smallpt.ll | 15 +- .../SLPVectorizer/X86/extractelement.ll | 24 +- .../SLPVectorizer/X86/horizontal-list.ll | 11 +- .../SLPVectorizer/X86/horizontal-minmax.ll | 17 +- ...nsert-element-build-vector-inseltpoison.ll | 27 +- .../X86/insert-element-build-vector.ll | 27 +- .../SLPVectorizer/X86/insert-shuffle.ll | 14 +- .../X86/jumbled-load-multiuse.ll | 7 +- .../SLPVectorizer/X86/jumbled-load.ll | 17 +- .../SLPVectorizer/X86/jumbled_store_crash.ll | 15 +- .../X86/load-merge-inseltpoison.ll | 5 +- .../SLPVectorizer/X86/load-merge.ll | 5 +- .../Transforms/SLPVectorizer/X86/lookahead.ll | 32 +- .../SLPVectorizer/X86/ordering-bug.ll | 7 +- llvm/test/Transforms/SLPVectorizer/X86/phi.ll | 60 ++-- .../SLPVectorizer/X86/pr42022-inseltpoison.ll | 14 +- .../Transforms/SLPVectorizer/X86/pr42022.ll | 14 +- .../SLPVectorizer/X86/reduction-same-vals.ll | 16 +- .../X86/remark_extract_broadcast.ll | 10 +- .../X86/reorder_diamond_match.ll | 25 +- .../X86/reordered-top-scalars.ll | 15 +- .../X86/vec_list_bias-inseltpoison.ll | 12 +- .../SLPVectorizer/X86/vec_list_bias.ll | 12 +- .../X86/vectorize-widest-phis.ll | 16 +- 39 files changed, 519 insertions(+), 350 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e366044353b1f9..ca9345f5233e46 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6668,6 +6668,23 @@ static bool isFirstInsertElement(const InsertElementInst *IE1, llvm_unreachable("Two different buildvectors not expected."); } +namespace { +/// Returns incoming Value *, if the requested type is Value * too, or a default +/// value, otherwise. +struct ValueSelect { + template + static typename std::enable_if::value, Value *>::type + get(Value *V) { + return V; + } + template + static typename std::enable_if::value, U>::type + get(Value *) { + return U(); + } +}; +} // namespace + /// Does the analysis of the provided shuffle masks and performs the requested /// actions on the vectors with the given shuffle masks. It tries to do it in /// several steps. @@ -6700,6 +6717,9 @@ static T *performExtractsShuffleAction( else Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; } + auto *V = ValueSelect::get(Base); + assert((!V || GetVF(V) == Mask.size()) && + "Expected base vector of VF number of elements."); Prev = Action(Mask, {nullptr, Res.first}); } else if (ShuffleMask.size() == 1) { // Base is undef and only 1 vector is shuffled - perform the action only for @@ -8105,6 +8125,17 @@ Value *BoUpSLP::vectorizeTree() { return vectorizeTree(ExternallyUsedValues); } +namespace { +/// Data type for handling buildvector sequences with the reused scalars from +/// other tree entries. +struct ShuffledInsertData { + /// List of insertelements to be replaced by shuffles. + SmallVector InsertElements; + /// The parent vectors and shuffle mask for the given list of inserts. + MapVector> ValueMasks; +}; +} // namespace + Value * BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { // All blocks must be scheduled before any instructions are inserted. @@ -8138,6 +8169,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); + SmallVector ShuffledInserts; + // Maps vector instruction to original insertelement instruction + DenseMap VectorToInsertElement; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -8177,6 +8211,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { assert(isa(Scalar->getType()) && isa(Scalar) && "In-tree scalar of vector type is not insertelement?"); + auto *IE = cast(Scalar); + VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; // If User == nullptr, the Scalar is used as extra arg. Generate @@ -8205,6 +8241,64 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { continue; } + if (auto *VU = dyn_cast(User)) { + if (!Scalar->getType()->isVectorTy()) { + if (auto *FTy = dyn_cast(User->getType())) { + Optional InsertIdx = getInsertIndex(VU); + if (InsertIdx) { + auto *It = + find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) { + // Checks if 2 insertelements are from the same buildvector. + InsertElementInst *VecInsert = Data.InsertElements.front(); + return areTwoInsertFromSameBuildVector(VU, VecInsert); + }); + unsigned Idx = *InsertIdx; + if (It == ShuffledInserts.end()) { + (void)ShuffledInserts.emplace_back(); + It = std::next(ShuffledInserts.begin(), + ShuffledInserts.size() - 1); + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + // Find the insertvector, vectorized in tree, if any. + Value *Base = VU; + while (auto *IEBase = dyn_cast(Base)) { + if (IEBase != User && + (!IEBase->hasOneUse() || + getInsertIndex(IEBase).getValueOr(Idx) == Idx)) + break; + // Build the mask for the vectorized insertelement instructions. + if (const TreeEntry *E = getTreeEntry(IEBase)) { + do { + IEBase = cast(Base); + int IEIdx = *getInsertIndex(IEBase); + assert(Mask[Idx] == UndefMaskElem && + "InsertElementInstruction used already."); + Mask[IEIdx] = IEIdx; + Base = IEBase->getOperand(0); + } while (E == getTreeEntry(Base)); + break; + } + Base = cast(Base)->getOperand(0); + // After the vectorization the def-use chain has changed, need + // to look through original insertelement instructions, if they + // get replaced by vector instructions. + auto It = VectorToInsertElement.find(Base); + if (It != VectorToInsertElement.end()) + Base = It->second; + } + } + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + Mask[Idx] = ExternalUse.Lane; + It->InsertElements.push_back(cast(User)); + continue; + } + } + } + } + // Generate extracts for out-of-tree users. // Find the insertion point for the extractelement lane. if (auto *VecI = dyn_cast(Vec)) { @@ -8240,6 +8334,219 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); } + // Checks if the mask is an identity mask. + auto &&IsIdentityMask = [](ArrayRef Mask, FixedVectorType *VecTy) { + int Limit = Mask.size(); + return VecTy->getNumElements() == Mask.size() && + all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) && + ShuffleVectorInst::isIdentityMask(Mask); + }; + // Tries to combine 2 different masks into single one. + auto &&CombineMasks = [](SmallVectorImpl &Mask, ArrayRef ExtMask) { + SmallVector NewMask(ExtMask.size(), UndefMaskElem); + for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) { + if (ExtMask[I] == UndefMaskElem) + continue; + NewMask[I] = Mask[ExtMask[I]]; + } + Mask.swap(NewMask); + }; + // Peek through shuffles, trying to simplify the final shuffle code. + auto &&PeekThroughShuffles = + [&IsIdentityMask, &CombineMasks](Value *&V, SmallVectorImpl &Mask, + bool CheckForLengthChange = false) { + while (auto *SV = dyn_cast(V)) { + // Exit if not a fixed vector type or changing size shuffle. + if (!isa(SV->getType()) || + (CheckForLengthChange && SV->changesLength())) + break; + // Exit if the identity or broadcast mask is found. + if (IsIdentityMask(Mask, cast(SV->getType())) || + SV->isZeroEltSplat()) + break; + bool IsOp1Undef = isUndefVector(SV->getOperand(0)); + bool IsOp2Undef = isUndefVector(SV->getOperand(1)); + if (!IsOp1Undef && !IsOp2Undef) + break; + SmallVector ShuffleMask(SV->getShuffleMask().begin(), + SV->getShuffleMask().end()); + CombineMasks(ShuffleMask, Mask); + Mask.swap(ShuffleMask); + if (IsOp2Undef) + V = SV->getOperand(0); + else + V = SV->getOperand(1); + } + }; + // Smart shuffle instruction emission, walks through shuffles trees and + // tries to find the best matching vector for the actual shuffle + // instruction. + auto &&CreateShuffle = [this, &IsIdentityMask, &PeekThroughShuffles, + &CombineMasks](Value *V1, Value *V2, + ArrayRef Mask) -> Value * { + assert(V1 && "Expected at least one vector value."); + if (V2 && !isUndefVector(V2)) { + // Peek through shuffles. + Value *Op1 = V1; + Value *Op2 = V2; + int VF = + cast(V1->getType())->getElementCount().getKnownMinValue(); + SmallVector CombinedMask1(Mask.size(), UndefMaskElem); + SmallVector CombinedMask2(Mask.size(), UndefMaskElem); + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (Mask[I] < VF) + CombinedMask1[I] = Mask[I]; + else + CombinedMask2[I] = Mask[I] - VF; + } + Value *PrevOp1; + Value *PrevOp2; + do { + PrevOp1 = Op1; + PrevOp2 = Op2; + PeekThroughShuffles(Op1, CombinedMask1, /*CheckForLengthChange=*/true); + PeekThroughShuffles(Op2, CombinedMask2, /*CheckForLengthChange=*/true); + // Check if we have 2 resizing shuffles - need to peek through operands + // again. + if (auto *SV1 = dyn_cast(Op1)) + if (auto *SV2 = dyn_cast(Op2)) + if (SV1->getOperand(0)->getType() == + SV2->getOperand(0)->getType() && + SV1->getOperand(0)->getType() != SV1->getType() && + isUndefVector(SV1->getOperand(1)) && + isUndefVector(SV2->getOperand(1))) { + Op1 = SV1->getOperand(0); + Op2 = SV2->getOperand(0); + SmallVector ShuffleMask1(SV1->getShuffleMask().begin(), + SV1->getShuffleMask().end()); + CombineMasks(ShuffleMask1, CombinedMask1); + CombinedMask1.swap(ShuffleMask1); + SmallVector ShuffleMask2(SV2->getShuffleMask().begin(), + SV2->getShuffleMask().end()); + CombineMasks(ShuffleMask2, CombinedMask2); + CombinedMask2.swap(ShuffleMask2); + } + } while (PrevOp1 != Op1 || PrevOp2 != Op2); + VF = cast(Op1->getType()) + ->getElementCount() + .getKnownMinValue(); + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (CombinedMask2[I] != UndefMaskElem) { + assert(CombinedMask1[I] == UndefMaskElem && + "Expected undefined mask element"); + CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF); + } + } + Value *Vec = Builder.CreateShuffleVector( + Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2, + CombinedMask1); + if (auto *I = dyn_cast(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + return Vec; + } + if (isa(V1)) + return PoisonValue::get(FixedVectorType::get( + cast(V1->getType())->getElementType(), Mask.size())); + Value *Op = V1; + SmallVector CombinedMask(Mask.begin(), Mask.end()); + PeekThroughShuffles(Op, CombinedMask); + if (!isa(Op->getType()) || + !IsIdentityMask(CombinedMask, cast(Op->getType()))) { + Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask); + if (auto *I = dyn_cast(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + return Vec; + } + return Op; + }; + + auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef Mask) { + unsigned VF = Mask.size(); + unsigned VecVF = cast(Vec->getType())->getNumElements(); + if (VF != VecVF) { + if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast(VF); })) { + Vec = CreateShuffle(Vec, nullptr, Mask); + return std::make_pair(Vec, true); + } + SmallVector ResizeMask(VF, UndefMaskElem); + for (unsigned I = 0; I < VF; ++I) { + if (Mask[I] != UndefMaskElem) + ResizeMask[Mask[I]] = Mask[I]; + } + Vec = CreateShuffle(Vec, nullptr, ResizeMask); + } + + return std::make_pair(Vec, false); + }; + // Perform shuffling of the vectorize tree entries for better handling of + // external extracts. + for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) { + // Find the first and the last instruction in the list of insertelements. + sort(ShuffledInserts[I].InsertElements, isFirstInsertElement); + InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front(); + InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back(); + Builder.SetInsertPoint(LastInsert); + auto Vector = ShuffledInserts[I].ValueMasks.takeVector(); + Value *NewInst = performExtractsShuffleAction( + makeMutableArrayRef(Vector.data(), Vector.size()), + FirstInsert->getOperand(0), + [](Value *Vec) { + return cast(Vec->getType()) + ->getElementCount() + .getKnownMinValue(); + }, + ResizeToVF, + [FirstInsert, &CreateShuffle](ArrayRef Mask, + ArrayRef Vals) { + assert((Vals.size() == 1 || Vals.size() == 2) && + "Expected exactly 1 or 2 input values."); + if (Vals.size() == 1) { + // Do not create shuffle if the mask is a simple identity + // non-resizing mask. + if (Mask.size() != cast(Vals.front()->getType()) + ->getNumElements() || + !ShuffleVectorInst::isIdentityMask(Mask)) + return CreateShuffle(Vals.front(), nullptr, Mask); + return Vals.front(); + } + return CreateShuffle(Vals.front() ? Vals.front() + : FirstInsert->getOperand(0), + Vals.back(), Mask); + }); + auto It = ShuffledInserts[I].InsertElements.rbegin(); + // Rebuild buildvector chain. + InsertElementInst *II = nullptr; + if (It != ShuffledInserts[I].InsertElements.rend()) + II = *It; + SmallVector Inserts; + while (It != ShuffledInserts[I].InsertElements.rend()) { + assert(II && "Must be an insertelement instruction."); + if (*It == II) + ++It; + else + Inserts.push_back(cast(II)); + II = dyn_cast(II->getOperand(0)); + } + for (Instruction *II : reverse(Inserts)) { + II->replaceUsesOfWith(II->getOperand(0), NewInst); + if (auto *NewI = dyn_cast(NewInst)) + if (II->getParent() == NewI->getParent() && II->comesBefore(NewI)) + II->moveAfter(NewI); + NewInst = II; + } + LastInsert->replaceAllUsesWith(NewInst); + for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) { + IE->replaceUsesOfWith(IE->getOperand(1), + PoisonValue::get(IE->getOperand(1)->getType())); + eraseInstruction(IE); + } + CSEBlocks.insert(LastInsert->getParent()); + } + // For each vectorized value: for (auto &TEPtr : VectorizableTree) { TreeEntry *Entry = TEPtr.get(); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5dc03ab1a3cd1f..68c8555bd6d435 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -1312,19 +1312,19 @@ define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], ; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP56]] ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP60]], [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP60]], [[TMP61]] ; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP70:%.*]] = add nsw <16 x i32> [[TMP68]], [[TMP69]] ; CHECK-NEXT: [[TMP71:%.*]] = sub nsw <16 x i32> [[TMP68]], [[TMP69]] ; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP72]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP74:%.*]] = add nsw <16 x i32> [[TMP72]], [[TMP73]] ; CHECK-NEXT: [[TMP75:%.*]] = sub nsw <16 x i32> [[TMP72]], [[TMP73]] ; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <16 x i32> [[TMP74]], <16 x i32> [[TMP75]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll index 12a2cd2a6834fe..148169fb64de42 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index 03a29f48a739e7..f5079fab51a6ab 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll index aab848ba73ddbe..e25fd9b206334e 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll @@ -26,19 +26,15 @@ define void @s116_modified(float* %a) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[GEP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP16]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP12]], <4 x float>* [[TMP13]], align 4 ; CHECK-NEXT: ret void ; %gep0 = getelementptr inbounds float, float* %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll index 77174151e96350..281169f7a69fea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll @@ -10,12 +10,9 @@ define void @test() { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 ; CHECK-NEXT: ret void ; @@ -46,14 +43,10 @@ define void @test1() { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT_I5_I10:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP9]], i64 1 -; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13]], ptr null, align 4 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13_2:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP10]], i64 1 -; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13_2]], ptr null, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr null, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP9]], ptr null, align 4 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds float, ptr undef, i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll index 3b0ec888de661c..610049129d3983 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll @@ -4,13 +4,12 @@ define void @b() { ; CHECK-LABEL: @b( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer) -; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fdiv <4 x float> [[TMP4]], zeroinitializer -; CHECK-NEXT: store <4 x float> [[TMP5]], ptr undef, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> , <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> zeroinitializer, <4 x float> zeroinitializer) +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fdiv <4 x float> [[TMP3]], zeroinitializer +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr undef, align 4 ; CHECK-NEXT: ret void ; entry: @@ -50,8 +49,7 @@ define void @test(float %a) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[AGG:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP]] ; entry: @@ -71,7 +69,6 @@ define internal void @test1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[DOTSROA_025_4_VEC_INSERT_US_I:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll index 4463f94b1c557c..36347b4843fe76 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll @@ -52,16 +52,12 @@ define { <2 x float>, <2 x float> } @test1(i32 %conv.i32.i.i.i) { ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 -; CHECK-NEXT: [[RETVAL_SROA_0_0_VEC_INSERT4:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 -; CHECK-NEXT: [[RETVAL_SROA_0_4_VEC_INSERT7:%.*]] = insertelement <2 x float> [[RETVAL_SROA_0_0_VEC_INSERT4]], float [[TMP8]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 -; CHECK-NEXT: [[RETVAL_SROA_7_8_VEC_INSERT11:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP6]], i32 3 -; CHECK-NEXT: [[RETVAL_SROA_7_12_VEC_INSERT13:%.*]] = insertelement <2 x float> [[RETVAL_SROA_7_8_VEC_INSERT11]], float [[TMP10]], i64 1 -; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[RETVAL_SROA_0_4_VEC_INSERT7]], 0 -; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[RETVAL_SROA_7_12_VEC_INSERT13]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> +; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0 +; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } zeroinitializer ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll index c9590ae76f405d..402211b4ef73eb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll @@ -13,21 +13,19 @@ define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145. ; CHECK-NEXT: [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4 ; CHECK-NEXT: br label [[DO_BODY66_I:%.*]] ; CHECK: do.body66.i: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> , i32 [[TMP2]], i32 1 ; CHECK-NEXT: br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]] ; CHECK: if.else.i: -; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], undef +; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[TMP1]], undef ; CHECK-NEXT: br label [[DO_COND_I]] ; CHECK: do.cond.i: -; CHECK-NEXT: [[TMP5]] = phi <2 x i32> [ [[TMP4]], [[IF_ELSE_I]] ], [ [[TMP3]], [[DO_BODY66_I]] ] +; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ [[TMP2]], [[IF_ELSE_I]] ], [ [[TMP1]], [[DO_BODY66_I]] ] ; CHECK-NEXT: br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]] ; CHECK: do.end1006.i: -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll index 30abc6f9fbe87a..9b183969001c80 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll @@ -36,17 +36,15 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(%class. ; CHECK: if.then325: ; CHECK-NEXT: br label [[IF_END327]] ; CHECK: if.end327: -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> , float [[TMP4]], i32 0 ; CHECK-NEXT: br i1 undef, label [[IF_THEN329:%.*]], label [[IF_END332]] ; CHECK: if.then329: ; CHECK-NEXT: br label [[IF_END332]] ; CHECK: if.end332: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[IF_THEN329]] ], [ [[TMP5]], [[IF_END327]] ], [ , [[IF_THEN291]] ] -; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ , [[IF_THEN291]] ] +; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[ARRAYIDX_I_I606:%.*]] = getelementptr inbounds [[CLASS_BTVECTOR3_23_221_463_485_507_573_595_683_727_749_815_837_991_1585_1607_1629_1651_1849_2047_2069_2091_2113:%.*]], %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* [[VERTICES:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP7]], <2 x float>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index ef1ed33ffcb995..0344c5a82fbed5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -32,7 +32,7 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) { ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1 ; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], ; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll index 94739340c8b5ab..0ab2cf94db8272 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -35,7 +35,7 @@ define void @exceed(double %0, double %1) { ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll index fa8c8a14e891ce..e87a5190d47216 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -129,11 +129,10 @@ define fastcc void @dct36(double* %inbuf) { ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll index bbd71825f96c6a..1c7cd3fd265d58 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -20,18 +20,15 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 -; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0 -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll index cd05b940be221f..6b9fdb23714fb1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -20,18 +20,15 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 -; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0 -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[TMP6]], i32 0 -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index 723c12d9b05b67..9bfde3976d2cc3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -115,16 +115,15 @@ define void @_Z8radianceRK3RayiPt() #0 { ; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: ; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double undef, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = fmul <2 x double> undef, undef +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> undef, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> undef, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> undef, [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> undef, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 ; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: if.then78: ; CHECK-NEXT: br label [[RETURN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll index 4c15a89f8f0057..ea4de7916097df 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -82,23 +82,19 @@ define float @f_used_twice_in_tree(<2 x float> %x) { ; CHECK-NEXT: ret float [[ADD]] ; ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] -; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] +; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( -; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] -; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] +; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index f446f3682e580e..bafc67c9ea31f1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -896,12 +896,11 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 1 ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[MUL]], i32 0 ; THRESHOLD-NEXT: [[TMP5:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x float> -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <2 x float> , float [[TMP6]], i32 0 -; THRESHOLD-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP7]] -; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 -; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP9]], [[TMP10]] +; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> , <2 x float> [[TMP5]], <2 x i32> +; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP6]] +; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 +; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP8]], [[TMP9]] ; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[TMP2]], [[OP_RDX2]] ; THRESHOLD-NEXT: ret float [[OP_RDX3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 7c4aed1453c97f..801b4863e7c19e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1100,15 +1100,14 @@ define i32 @maxi8_wrong_parent(i32) { ; THRESH-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] ; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i1> poison, i1 [[OP_RDX]], i32 0 ; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i1> [[TMP11]], i1 [[TMP5]], i32 1 -; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 -; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP3]], i32 1 -; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 -; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP4]], i32 1 -; THRESH-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP14]], <2 x i32> [[TMP16]] -; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0 -; THRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 -; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP18]], i32 [[TMP19]] +; THRESH-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP2]], <2 x i32> +; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> +; THRESH-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP2]], <2 x i32> +; THRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP15]] +; THRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0 +; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1 +; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP17]], i32 [[TMP18]] ; THRESH-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP8]], [[OP_RDX3]] ; THRESH-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP8]], i32 [[OP_RDX3]] ; THRESH-NEXT: ret i32 [[OP_RDX5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll index 470fa0caa1cd17..01979d12fb3ac4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -439,12 +439,9 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -477,12 +474,9 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -539,12 +533,9 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index d7adf196f7c2a7..06cdff548a91fc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -474,12 +474,9 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -512,12 +509,9 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -574,12 +568,9 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll index 5a9273d1e5b3e9..b64c0e4e35a235 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -19,16 +19,10 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) { ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], poison ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], poison ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], poison -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[VEC1:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[VEC2:%.*]] = insertelement <2 x float> [[VEC1]], float [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[VEC3:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[VEC4:%.*]] = insertelement <2 x float> [[VEC3]], float [[TMP13]], i32 1 -; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VEC2]], 0 -; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[VEC4]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP10]], 0 +; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[TMP11]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[INS2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll index d785f8e76798b5..011ce7352079ca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -9,10 +9,9 @@ define i32 @fn1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll index 31c5d5b07f0e0f..85aa8aed5f5a59 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -58,18 +58,11 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 undef ; %in.addr = getelementptr inbounds i32, i32* %in, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll index 6bbde0957743ce..78572209687b87 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll @@ -39,14 +39,13 @@ define dso_local void @j() local_unnamed_addr { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 1 ; CHECK-NEXT: store float [[TMP15]], float* @f, align 4 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> , float [[CONV19]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 2 -; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP11]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = fadd <4 x float> [[TMP11]], [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]] to <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> [[SHUFFLE]], <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = fsub <4 x float> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = fadd <4 x float> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP19]], <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = fptosi <4 x float> [[TMP20]] to <4 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* [[TMP22]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll index 3d7d632cca3d63..6beff0162ffd26 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll @@ -59,9 +59,8 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[I3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll index 509c37f5da28c0..dd08e7983d70cb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -59,9 +59,8 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[I3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll index 36b867d4a148fb..faf86fbcafb9d5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -722,15 +722,11 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub ; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 -; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]] -; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]] -; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0 -; SSE-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1 -; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] ; SSE-NEXT: ret double [[ADD3]] ; ; AVX-LABEL: @splat_loads( @@ -791,17 +787,13 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2, ; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 -; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]] -; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]] -; SSE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1 -; SSE-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP10]], [[TMP11]] -; SSE-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 0 -; SSE-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP12]], i32 1 -; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP13]], [[TMP14]] +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]] +; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 +; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 +; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] ; SSE-NEXT: ret double [[RES]] ; ; AVX-LABEL: @splat_loads_with_internal_uses( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll index 464288ef340c80..fc18281c3f35ed 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll @@ -29,10 +29,9 @@ define void @f(i1 %x) #0 { ; CHECK: if.then: ; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i64> [[TMP11]], [[TMP7]] -; CHECK-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]] +; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index df767259b4f477..c6725da8a98693 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -151,7 +151,7 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x float> [ [[TMP5]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP8]], 7.000000e+00 @@ -163,25 +163,22 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* ; CHECK-NEXT: [[TMP12]] = load <2 x float>, <2 x float>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP17]], -; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP12]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], +; CHECK-NEXT: [[TMP16]] = fadd <4 x float> [[TMP6]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP17]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 0 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP21]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: @@ -244,27 +241,20 @@ define float @sort_phi_type(float* nocapture readonly %A) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP6]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll index 8ab5fb7c7ceec6..595b5c9d1079b0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll @@ -10,16 +10,10 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 -; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 -; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll index 3177f8557315dc..e97a3a4521482b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll @@ -10,16 +10,10 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 -; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 -; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll index 1fd2f30bf93cc9..aa93abaf4d9cbd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll @@ -10,18 +10,10 @@ define i64 @test() { ; CHECK: bb3: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ poison, [[BB2:%.*]] ], [ zeroinitializer, [[BB1:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP1]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP1]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP1]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[TMP65:%.*]] = sext i32 [[OP_RDX]] to i64 ; CHECK-NEXT: ret i64 [[TMP65]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll index 7ab7f3b67634b3..f71e214fe89382 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -9,12 +9,10 @@ define void @fextr(i16* %ptr) { ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll index 3315e71ae2aa60..fc1a673d818f92 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll @@ -12,22 +12,15 @@ define void @test() { ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]] ; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP15]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP19]] -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 16 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds i8, i8* undef, i64 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll index 9bbe2d5d673c76..bb0a39b7b6fb8e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll @@ -7,21 +7,18 @@ define i32 @test(i32* %isec) { ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[ISEC:%.*]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX10]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: br i1 false, label [[BLOCK1:%.*]], label [[BLOCK3:%.*]] ; CHECK: block1: ; CHECK-NEXT: br i1 false, label [[BLOCK2:%.*]], label [[BLOCK3]] ; CHECK: block2: ; CHECK-NEXT: br label [[BLOCK3]] ; CHECK: block3: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP5]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP7]] -; CHECK-NEXT: ret i32 [[TMP9]] +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP2]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: %arrayidx10 = getelementptr inbounds i32, i32* %isec, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll index 297aa493ed0715..0a07e03bac72c5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -49,15 +49,9 @@ define void @test(i32* nocapture %t2) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 -; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 -; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll index b0a406e6a8f514..3cf139ea0469b8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -49,15 +49,9 @@ define void @test(i32* nocapture %t2) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 -; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 -; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index 109c27e4f4f4e5..f2fbf1dfc756a1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -12,7 +12,7 @@ define void @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP18:%.*]], [[BB3:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: @@ -23,17 +23,13 @@ define void @foo() { ; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> poison, double [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = fcmp ogt <4 x double> [[TMP13]], [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = fptrunc <4 x double> [[TMP15]] to <4 x float> -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP2]], <4 x float> [[TMP16]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP18]] = phi <4 x float> [ [[TMP17]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] ; CHECK-NEXT: br label [[BB2]] ; entry: From 7fcbf133ba9d1228bdc305b5183d9879e516045a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 20 May 2022 09:01:41 -0400 Subject: [PATCH 080/908] [InstCombine] add casted bitwise logic tests to show missing use check; NFC While here, update the auto-generated checks to also check the match the function parameters - there was a potential miscompile that would go unnoticed with the more lenient check lines. --- .../test/Transforms/InstCombine/and-xor-or.ll | 1686 ++++++++++------- 1 file changed, 976 insertions(+), 710 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll index f3ab4aca124465..13ad55d83db4b9 100644 --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; RUN: opt < %s -passes=instcombine -S | FileCheck %s declare void @use(i32) @@ -6,9 +6,10 @@ declare void @use(i32) ; a & (a ^ b) --> a & ~b define i32 @and_xor_common_op(i32 %pa, i32 %pb) { -; CHECK-LABEL: @and_xor_common_op( -; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[PA:%.*]] -; CHECK-NEXT: [[B:%.*]] = udiv i32 43, [[PB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_common_op +; CHECK-SAME: (i32 [[PA:%.*]], i32 [[PB:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[PA]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 43, [[PB]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 ; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], [[TMP1]] ; CHECK-NEXT: ret i32 [[R]] @@ -23,9 +24,10 @@ define i32 @and_xor_common_op(i32 %pa, i32 %pb) { ; a & (b ^ a) --> a & ~b define i32 @and_xor_common_op_commute1(i32 %pa, i32 %pb) { -; CHECK-LABEL: @and_xor_common_op_commute1( -; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[PA:%.*]] -; CHECK-NEXT: [[B:%.*]] = udiv i32 43, [[PB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_common_op_commute1 +; CHECK-SAME: (i32 [[PA:%.*]], i32 [[PB:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[PA]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 43, [[PB]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 ; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], [[TMP1]] ; CHECK-NEXT: ret i32 [[R]] @@ -40,9 +42,10 @@ define i32 @and_xor_common_op_commute1(i32 %pa, i32 %pb) { ; (b ^ a) & a --> a & ~b define i32 @and_xor_common_op_commute2(i32 %pa, i32 %pb) { -; CHECK-LABEL: @and_xor_common_op_commute2( -; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[PA:%.*]] -; CHECK-NEXT: [[B:%.*]] = udiv i32 43, [[PB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_common_op_commute2 +; CHECK-SAME: (i32 [[PA:%.*]], i32 [[PB:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[PA]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 43, [[PB]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 ; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], [[TMP1]] ; CHECK-NEXT: ret i32 [[R]] @@ -57,9 +60,10 @@ define i32 @and_xor_common_op_commute2(i32 %pa, i32 %pb) { ; (a ^ b) & a --> a & ~b define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) { -; CHECK-LABEL: @and_xor_common_op_commute3( -; CHECK-NEXT: [[A:%.*]] = udiv <2 x i32> , [[PA:%.*]] -; CHECK-NEXT: [[B:%.*]] = udiv <2 x i32> , [[PB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_common_op_commute3 +; CHECK-SAME: (<2 x i32> [[PA:%.*]], <2 x i32> [[PB:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = udiv <2 x i32> , [[PA]] +; CHECK-NEXT: [[B:%.*]] = udiv <2 x i32> , [[PB]] ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[B]], ; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[A]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[R]] @@ -75,8 +79,9 @@ define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) { ; The xor should be a 'not' op (-1 constant). define <4 x i32> @and_xor_common_op_constant(<4 x i32> %A) { -; CHECK-LABEL: @and_xor_common_op_constant( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], +; CHECK-LABEL: define {{[^@]+}}@and_xor_common_op_constant +; CHECK-SAME: (<4 x i32> [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A]], ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -88,8 +93,9 @@ define <4 x i32> @and_xor_common_op_constant(<4 x i32> %A) { ; a & (a ^ ~b) --> a & b define i32 @and_xor_not_common_op(i32 %a, i32 %b) { -; CHECK-LABEL: @and_xor_not_common_op( -; CHECK-NEXT: [[T4:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_not_common_op +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[T4:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[T4]] ; %b2 = xor i32 %b, -1 @@ -101,10 +107,11 @@ define i32 @and_xor_not_common_op(i32 %a, i32 %b) { ; a & (a ^ ~b) --> a & b define i32 @and_xor_not_common_op_extrause(i32 %a, i32 %b, i32* %dst) { -; CHECK-LABEL: @and_xor_not_common_op_extrause( -; CHECK-NEXT: [[B2:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: store i32 [[B2]], i32* [[DST:%.*]], align 4 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[A:%.*]], [[B]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_not_common_op_extrause +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32* [[DST:%.*]]) { +; CHECK-NEXT: [[B2:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: store i32 [[B2]], i32* [[DST]], align 4 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[T4]] ; %b2 = xor i32 %b, -1 @@ -117,8 +124,9 @@ define i32 @and_xor_not_common_op_extrause(i32 %a, i32 %b, i32* %dst) { ; a & ~(a ^ b) --> a & b define i32 @and_not_xor_common_op(i32 %a, i32 %b) { -; CHECK-LABEL: @and_not_xor_common_op( -; CHECK-NEXT: [[T4:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_xor_common_op +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[T4:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[T4]] ; %b2 = xor i32 %b, %a @@ -129,9 +137,10 @@ define i32 @and_not_xor_common_op(i32 %a, i32 %b) { declare i32 @gen32() define i32 @and_not_xor_common_op_commutative(i32 %b) { -; CHECK-LABEL: @and_not_xor_common_op_commutative( +; CHECK-LABEL: define {{[^@]+}}@and_not_xor_common_op_commutative +; CHECK-SAME: (i32 [[B:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = call i32 @gen32() -; CHECK-NEXT: [[T4:%.*]] = and i32 [[A]], [[B:%.*]] +; CHECK-NEXT: [[T4:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[T4]] ; %a = call i32 @gen32() @@ -145,8 +154,9 @@ define i32 @and_not_xor_common_op_commutative(i32 %b) { ; (x & y) | (x ^ y) -> x | y define i64 @or(i64 %x, i64 %y) { -; CHECK-LABEL: @or( -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y:%.*]], [[X:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or +; CHECK-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y]], [[X]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %1 = and i64 %y, %x @@ -158,8 +168,9 @@ define i64 @or(i64 %x, i64 %y) { ; (x & y) + (x ^ y) -> x | y define i64 @or2(i64 %x, i64 %y) { -; CHECK-LABEL: @or2( -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y:%.*]], [[X:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or2 +; CHECK-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y]], [[X]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %1 = and i64 %y, %x @@ -171,10 +182,11 @@ define i64 @or2(i64 %x, i64 %y) { ; ((x & y) ^ z) | y -> (z | y) define i64 @and_xor_or1(i64 %px, i64 %py, i64 %pz) { -; CHECK-LABEL: @and_xor_or1( -; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY:%.*]] -; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Z:%.*]], [[Y:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_or1 +; CHECK-SAME: (i64 [[PX:%.*]], i64 [[PY:%.*]], i64 [[PZ:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY]] +; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ]] +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Z]], [[Y]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %x = udiv i64 42, %px ; thwart complexity-based canonicalization @@ -189,10 +201,11 @@ define i64 @and_xor_or1(i64 %px, i64 %py, i64 %pz) { ; ((y & x) ^ z) | y -> (z | y) define i64 @and_xor_or2(i64 %px, i64 %py, i64 %pz) { -; CHECK-LABEL: @and_xor_or2( -; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY:%.*]] -; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Z:%.*]], [[Y:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_or2 +; CHECK-SAME: (i64 [[PX:%.*]], i64 [[PY:%.*]], i64 [[PZ:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY]] +; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ]] +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Z]], [[Y]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %x = udiv i64 42, %px ; thwart complexity-based canonicalization @@ -207,10 +220,11 @@ define i64 @and_xor_or2(i64 %px, i64 %py, i64 %pz) { ; (z ^ (x & y)) | y -> (z | y) define i64 @and_xor_or3(i64 %px, i64 %py, i64 %pz) { -; CHECK-LABEL: @and_xor_or3( -; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY:%.*]] -; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Z:%.*]], [[Y:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_or3 +; CHECK-SAME: (i64 [[PX:%.*]], i64 [[PY:%.*]], i64 [[PZ:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY]] +; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ]] +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Z]], [[Y]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %x = udiv i64 42, %px ; thwart complexity-based canonicalization @@ -225,10 +239,11 @@ define i64 @and_xor_or3(i64 %px, i64 %py, i64 %pz) { ; (z ^ (y & x)) | y -> (z | y) define i64 @and_xor_or4(i64 %px, i64 %py, i64 %pz) { -; CHECK-LABEL: @and_xor_or4( -; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY:%.*]] -; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Z:%.*]], [[Y:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_or4 +; CHECK-SAME: (i64 [[PX:%.*]], i64 [[PY:%.*]], i64 [[PZ:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY]] +; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ]] +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Z]], [[Y]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %x = udiv i64 42, %px ; thwart complexity-based canonicalization @@ -243,10 +258,11 @@ define i64 @and_xor_or4(i64 %px, i64 %py, i64 %pz) { ; y | ((x & y) ^ z) -> (y | z) define i64 @and_xor_or5(i64 %px, i64 %py, i64 %pz) { -; CHECK-LABEL: @and_xor_or5( -; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY:%.*]] -; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y:%.*]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_or5 +; CHECK-SAME: (i64 [[PX:%.*]], i64 [[PY:%.*]], i64 [[PZ:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY]] +; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ]] +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y]], [[Z]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %x = udiv i64 42, %px ; thwart complexity-based canonicalization @@ -261,10 +277,11 @@ define i64 @and_xor_or5(i64 %px, i64 %py, i64 %pz) { ; y | ((y & x) ^ z) -> (y | z) define i64 @and_xor_or6(i64 %px, i64 %py, i64 %pz) { -; CHECK-LABEL: @and_xor_or6( -; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY:%.*]] -; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y:%.*]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_or6 +; CHECK-SAME: (i64 [[PX:%.*]], i64 [[PY:%.*]], i64 [[PZ:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY]] +; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ]] +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y]], [[Z]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %x = udiv i64 42, %px ; thwart complexity-based canonicalization @@ -279,10 +296,11 @@ define i64 @and_xor_or6(i64 %px, i64 %py, i64 %pz) { ; y | (z ^ (x & y)) -> (y | z) define i64 @and_xor_or7(i64 %px, i64 %py, i64 %pz) { -; CHECK-LABEL: @and_xor_or7( -; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY:%.*]] -; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y:%.*]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_or7 +; CHECK-SAME: (i64 [[PX:%.*]], i64 [[PY:%.*]], i64 [[PZ:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY]] +; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ]] +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y]], [[Z]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %x = udiv i64 42, %px ; thwart complexity-based canonicalization @@ -297,10 +315,11 @@ define i64 @and_xor_or7(i64 %px, i64 %py, i64 %pz) { ; y | (z ^ (y & x)) -> (y | z) define i64 @and_xor_or8(i64 %px, i64 %py, i64 %pz) { -; CHECK-LABEL: @and_xor_or8( -; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY:%.*]] -; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y:%.*]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_or8 +; CHECK-SAME: (i64 [[PX:%.*]], i64 [[PY:%.*]], i64 [[PZ:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = udiv i64 42, [[PY]] +; CHECK-NEXT: [[Z:%.*]] = udiv i64 42, [[PZ]] +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[Y]], [[Z]] ; CHECK-NEXT: ret i64 [[TMP1]] ; %x = udiv i64 42, %px ; thwart complexity-based canonicalization @@ -315,10 +334,11 @@ define i64 @and_xor_or8(i64 %px, i64 %py, i64 %pz) { ; w | (z ^ (y & x)) define i64 @and_xor_or_negative(i64 %x, i64 %y, i64 %z, i64 %w) { -; CHECK-LABEL: @and_xor_or_negative( -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], [[Z:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP2]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_xor_or_negative +; CHECK-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]], i64 [[W:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[Y]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], [[Z]] +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP2]], [[W]] ; CHECK-NEXT: ret i64 [[TMP3]] ; %1 = and i64 %y, %x @@ -333,10 +353,11 @@ define i64 @and_xor_or_negative(i64 %x, i64 %y, i64 %z, i64 %w) { ; Mix the commutation options to provide coverage using less tests. define i8 @and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { -; CHECK-LABEL: @and_shl( -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_shl +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] +; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z]] ; CHECK-NEXT: [[R:%.*]] = and i8 [[SY]], [[A]] ; CHECK-NEXT: ret i8 [[R]] ; @@ -348,10 +369,11 @@ define i8 @and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { } define i8 @or_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { -; CHECK-LABEL: @or_shl( -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_shl +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] +; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z]] ; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]] ; CHECK-NEXT: ret i8 [[R]] ; @@ -363,10 +385,11 @@ define i8 @or_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { } define i8 @xor_shl(i8 %x, i8 %y, i8 %zarg, i8 %shamt) { -; CHECK-LABEL: @xor_shl( -; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]] -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]] +; CHECK-LABEL: define {{[^@]+}}@xor_shl +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[ZARG:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG]] +; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] ; CHECK-NEXT: [[A:%.*]] = xor i8 [[Z]], [[SX]] ; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] ; CHECK-NEXT: ret i8 [[R]] @@ -380,10 +403,11 @@ define i8 @xor_shl(i8 %x, i8 %y, i8 %zarg, i8 %shamt) { } define i8 @and_lshr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) { -; CHECK-LABEL: @and_lshr( -; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]] -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]] +; CHECK-LABEL: define {{[^@]+}}@and_lshr +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[ZARG:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG]] +; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] ; CHECK-NEXT: [[A:%.*]] = and i8 [[Z]], [[SX]] ; CHECK-NEXT: [[R:%.*]] = and i8 [[SY]], [[A]] ; CHECK-NEXT: ret i8 [[R]] @@ -397,10 +421,11 @@ define i8 @and_lshr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) { } define i8 @or_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { -; CHECK-LABEL: @or_lshr( -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_lshr +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] +; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z]] ; CHECK-NEXT: [[R:%.*]] = or i8 [[SY]], [[A]] ; CHECK-NEXT: ret i8 [[R]] ; @@ -412,10 +437,11 @@ define i8 @or_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { } define i8 @xor_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { -; CHECK-LABEL: @xor_lshr( -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@xor_lshr +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] +; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z]] ; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] ; CHECK-NEXT: ret i8 [[R]] ; @@ -427,10 +453,11 @@ define i8 @xor_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { } define i8 @and_ashr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) { -; CHECK-LABEL: @and_ashr( -; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]] -; CHECK-NEXT: [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]] +; CHECK-LABEL: define {{[^@]+}}@and_ashr +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[ZARG:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG]] +; CHECK-NEXT: [[SX:%.*]] = ashr i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = ashr i8 [[Y]], [[SHAMT]] ; CHECK-NEXT: [[A:%.*]] = and i8 [[Z]], [[SX]] ; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[SY]] ; CHECK-NEXT: ret i8 [[R]] @@ -444,10 +471,11 @@ define i8 @and_ashr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) { } define i8 @or_ashr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) { -; CHECK-LABEL: @or_ashr( -; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]] -; CHECK-NEXT: [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]] +; CHECK-LABEL: define {{[^@]+}}@or_ashr +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[ZARG:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[Z:%.*]] = sdiv i8 42, [[ZARG]] +; CHECK-NEXT: [[SX:%.*]] = ashr i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = ashr i8 [[Y]], [[SHAMT]] ; CHECK-NEXT: [[A:%.*]] = or i8 [[Z]], [[SX]] ; CHECK-NEXT: [[R:%.*]] = or i8 [[SY]], [[A]] ; CHECK-NEXT: ret i8 [[R]] @@ -461,10 +489,11 @@ define i8 @or_ashr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) { } define <2 x i8> @xor_ashr(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z, <2 x i8> %shamt) { -; CHECK-LABEL: @xor_ashr( -; CHECK-NEXT: [[SX:%.*]] = ashr <2 x i8> [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = ashr <2 x i8> [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = xor <2 x i8> [[SX]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@xor_ashr +; CHECK-SAME: (<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]], <2 x i8> [[SHAMT:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = ashr <2 x i8> [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = ashr <2 x i8> [[Y]], [[SHAMT]] +; CHECK-NEXT: [[A:%.*]] = xor <2 x i8> [[SX]], [[Z]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[A]], [[SY]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -478,10 +507,11 @@ define <2 x i8> @xor_ashr(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z, <2 x i8> %shamt ; Negative test - different logic ops define i8 @or_and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { -; CHECK-LABEL: @or_and_shl( -; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_shl +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] +; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z]] ; CHECK-NEXT: [[R:%.*]] = and i8 [[SY]], [[A]] ; CHECK-NEXT: ret i8 [[R]] ; @@ -495,10 +525,11 @@ define i8 @or_and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; Negative test - different shift ops define i8 @or_lshr_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { -; CHECK-LABEL: @or_lshr_shl( -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_lshr_shl +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y]], [[SHAMT]] +; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z]] ; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]] ; CHECK-NEXT: ret i8 [[R]] ; @@ -512,10 +543,11 @@ define i8 @or_lshr_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; Negative test - different shift amounts define i8 @or_lshr_shamt2(i8 %x, i8 %y, i8 %z, i8 %shamt) { -; CHECK-LABEL: @or_lshr_shamt2( -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], 5 -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_lshr_shamt2 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], 5 +; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] +; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z]] ; CHECK-NEXT: [[R:%.*]] = or i8 [[SY]], [[A]] ; CHECK-NEXT: ret i8 [[R]] ; @@ -529,10 +561,11 @@ define i8 @or_lshr_shamt2(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; Negative test - multi-use define i8 @xor_lshr_multiuse(i8 %x, i8 %y, i8 %z, i8 %shamt) { -; CHECK-LABEL: @xor_lshr_multiuse( -; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]] +; CHECK-LABEL: define {{[^@]+}}@xor_lshr_multiuse +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]], i8 [[SHAMT:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X]], [[SHAMT]] +; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y]], [[SHAMT]] +; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z]] ; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] ; CHECK-NEXT: [[R2:%.*]] = sdiv i8 [[A]], [[R]] ; CHECK-NEXT: ret i8 [[R2]] @@ -549,10 +582,11 @@ define i8 @xor_lshr_multiuse(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; Check that logical op is performed on a smaller type and then extended. define i64 @sext_or_chain(i64 %a, i16 %b, i16 %c) { -; CHECK-LABEL: @sext_or_chain( -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B:%.*]] to i64 -; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C:%.*]] to i64 -; CHECK-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@sext_or_chain +; CHECK-SAME: (i64 [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B]] to i64 +; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C]] to i64 +; CHECK-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[A]] ; CHECK-NEXT: [[OR2:%.*]] = or i64 [[OR]], [[CONV2]] ; CHECK-NEXT: ret i64 [[OR2]] ; @@ -564,10 +598,11 @@ define i64 @sext_or_chain(i64 %a, i16 %b, i16 %c) { } define i64 @zext_or_chain(i64 %a, i16 %b, i16 %c) { -; CHECK-LABEL: @zext_or_chain( -; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[B:%.*]] to i64 -; CHECK-NEXT: [[CONV2:%.*]] = zext i16 [[C:%.*]] to i64 -; CHECK-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@zext_or_chain +; CHECK-SAME: (i64 [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[B]] to i64 +; CHECK-NEXT: [[CONV2:%.*]] = zext i16 [[C]] to i64 +; CHECK-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[A]] ; CHECK-NEXT: [[OR2:%.*]] = or i64 [[OR]], [[CONV2]] ; CHECK-NEXT: ret i64 [[OR2]] ; @@ -579,10 +614,11 @@ define i64 @zext_or_chain(i64 %a, i16 %b, i16 %c) { } define i64 @sext_and_chain(i64 %a, i16 %b, i16 %c) { -; CHECK-LABEL: @sext_and_chain( -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B:%.*]] to i64 -; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C:%.*]] to i64 -; CHECK-NEXT: [[AND:%.*]] = and i64 [[CONV]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@sext_and_chain +; CHECK-SAME: (i64 [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B]] to i64 +; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C]] to i64 +; CHECK-NEXT: [[AND:%.*]] = and i64 [[CONV]], [[A]] ; CHECK-NEXT: [[AND2:%.*]] = and i64 [[AND]], [[CONV2]] ; CHECK-NEXT: ret i64 [[AND2]] ; @@ -594,10 +630,11 @@ define i64 @sext_and_chain(i64 %a, i16 %b, i16 %c) { } define i64 @zext_and_chain(i64 %a, i16 %b, i16 %c) { -; CHECK-LABEL: @zext_and_chain( -; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[B:%.*]] to i64 -; CHECK-NEXT: [[CONV2:%.*]] = zext i16 [[C:%.*]] to i64 -; CHECK-NEXT: [[AND:%.*]] = and i64 [[CONV]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@zext_and_chain +; CHECK-SAME: (i64 [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[B]] to i64 +; CHECK-NEXT: [[CONV2:%.*]] = zext i16 [[C]] to i64 +; CHECK-NEXT: [[AND:%.*]] = and i64 [[CONV]], [[A]] ; CHECK-NEXT: [[AND2:%.*]] = and i64 [[AND]], [[CONV2]] ; CHECK-NEXT: ret i64 [[AND2]] ; @@ -609,10 +646,11 @@ define i64 @zext_and_chain(i64 %a, i16 %b, i16 %c) { } define i64 @sext_xor_chain(i64 %a, i16 %b, i16 %c) { -; CHECK-LABEL: @sext_xor_chain( -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B:%.*]] to i64 -; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C:%.*]] to i64 -; CHECK-NEXT: [[XOR:%.*]] = xor i64 [[CONV]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@sext_xor_chain +; CHECK-SAME: (i64 [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B]] to i64 +; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C]] to i64 +; CHECK-NEXT: [[XOR:%.*]] = xor i64 [[CONV]], [[A]] ; CHECK-NEXT: [[XOR2:%.*]] = xor i64 [[XOR]], [[CONV2]] ; CHECK-NEXT: ret i64 [[XOR2]] ; @@ -624,10 +662,11 @@ define i64 @sext_xor_chain(i64 %a, i16 %b, i16 %c) { } define i64 @zext_xor_chain(i64 %a, i16 %b, i16 %c) { -; CHECK-LABEL: @zext_xor_chain( -; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[B:%.*]] to i64 -; CHECK-NEXT: [[CONV2:%.*]] = zext i16 [[C:%.*]] to i64 -; CHECK-NEXT: [[XOR:%.*]] = xor i64 [[CONV]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@zext_xor_chain +; CHECK-SAME: (i64 [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[B]] to i64 +; CHECK-NEXT: [[CONV2:%.*]] = zext i16 [[C]] to i64 +; CHECK-NEXT: [[XOR:%.*]] = xor i64 [[CONV]], [[A]] ; CHECK-NEXT: [[XOR2:%.*]] = xor i64 [[XOR]], [[CONV2]] ; CHECK-NEXT: ret i64 [[XOR2]] ; @@ -640,12 +679,13 @@ define i64 @zext_xor_chain(i64 %a, i16 %b, i16 %c) { ; Negative test with more uses. define i64 @sext_or_chain_two_uses1(i64 %a, i16 %b, i16 %c, i64 %d) { -; CHECK-LABEL: @sext_or_chain_two_uses1( -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B:%.*]] to i64 -; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C:%.*]] to i64 -; CHECK-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@sext_or_chain_two_uses1 +; CHECK-SAME: (i64 [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B]] to i64 +; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C]] to i64 +; CHECK-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[A]] ; CHECK-NEXT: [[OR2:%.*]] = or i64 [[OR]], [[CONV2]] -; CHECK-NEXT: [[USE:%.*]] = udiv i64 [[OR]], [[D:%.*]] +; CHECK-NEXT: [[USE:%.*]] = udiv i64 [[OR]], [[D]] ; CHECK-NEXT: [[RETVAL:%.*]] = udiv i64 [[OR2]], [[USE]] ; CHECK-NEXT: ret i64 [[RETVAL]] ; @@ -659,12 +699,13 @@ define i64 @sext_or_chain_two_uses1(i64 %a, i16 %b, i16 %c, i64 %d) { ret i64 %retval } define i64 @sext_or_chain_two_uses2(i64 %a, i16 %b, i16 %c, i64 %d) { -; CHECK-LABEL: @sext_or_chain_two_uses2( -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B:%.*]] to i64 -; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C:%.*]] to i64 -; CHECK-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@sext_or_chain_two_uses2 +; CHECK-SAME: (i64 [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B]] to i64 +; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[C]] to i64 +; CHECK-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[A]] ; CHECK-NEXT: [[OR2:%.*]] = or i64 [[OR]], [[CONV2]] -; CHECK-NEXT: [[USE1:%.*]] = udiv i64 [[OR2]], [[D:%.*]] +; CHECK-NEXT: [[USE1:%.*]] = udiv i64 [[OR2]], [[D]] ; CHECK-NEXT: [[USE2:%.*]] = udiv i64 [[OR2]], [[USE1]] ; CHECK-NEXT: ret i64 [[USE2]] ; @@ -681,9 +722,10 @@ define i64 @sext_or_chain_two_uses2(i64 %a, i16 %b, i16 %c, i64 %d) { ; (a & ~b) & ~c --> a & ~(b | c) define i32 @not_and_and_not(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_not( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_not +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[TMP2]] ; CHECK-NEXT: ret i32 [[AND2]] @@ -697,9 +739,10 @@ define i32 @not_and_and_not(i32 %a0, i32 %b, i32 %c) { } define <4 x i64> @not_and_and_not_4i64(<4 x i64> %a0, <4 x i64> %b, <4 x i64> %c) { -; CHECK-LABEL: @not_and_and_not_4i64( -; CHECK-NEXT: [[A:%.*]] = sdiv <4 x i64> , [[A0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i64> [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_not_4i64 +; CHECK-SAME: (<4 x i64> [[A0:%.*]], <4 x i64> [[B:%.*]], <4 x i64> [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv <4 x i64> , [[A0]] +; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i64> [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[TMP1]], ; CHECK-NEXT: [[AND2:%.*]] = and <4 x i64> [[A]], [[TMP2]] ; CHECK-NEXT: ret <4 x i64> [[AND2]] @@ -715,10 +758,11 @@ define <4 x i64> @not_and_and_not_4i64(<4 x i64> %a0, <4 x i64> %b, <4 x i64> %c ; (~b & a) & ~c --> a & ~(b | c) define i32 @not_and_and_not_commute1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_not_commute1( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_not_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[TMP2]], [[A:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[TMP2]], [[A]] ; CHECK-NEXT: ret i32 [[AND2]] ; %not1 = xor i32 %b, -1 @@ -731,10 +775,11 @@ define i32 @not_and_and_not_commute1(i32 %a, i32 %b, i32 %c) { ; ~c & (a & ~b) --> a & ~(b | c) define i32 @not_and_and_not_commute2_extra_not_use(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_not_commute2_extra_not_use( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[C:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[C]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_not_commute2_extra_not_use +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[C]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[TMP2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) @@ -750,10 +795,11 @@ define i32 @not_and_and_not_commute2_extra_not_use(i32 %a0, i32 %b, i32 %c) { } define i32 @not_and_and_not_extra_and1_use(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_not_extra_and1_use( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[C:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@not_and_and_not_extra_and1_use +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[C]], -1 ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[NOT1]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[AND1]]) @@ -771,9 +817,10 @@ define i32 @not_and_and_not_extra_and1_use(i32 %a0, i32 %b, i32 %c) { ; (a | ~b) | ~c --> a | ~(b & c) define i32 @not_or_or_not(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_not( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_not +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR2]] @@ -787,9 +834,10 @@ define i32 @not_or_or_not(i32 %a0, i32 %b, i32 %c) { } define <2 x i6> @not_or_or_not_2i6(<2 x i6> %a0, <2 x i6> %b, <2 x i6> %c) { -; CHECK-LABEL: @not_or_or_not_2i6( -; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i6> , [[A0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i6> [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_not_2i6 +; CHECK-SAME: (<2 x i6> [[A0:%.*]], <2 x i6> [[B:%.*]], <2 x i6> [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i6> , [[A0]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i6> [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i6> [[TMP1]], ; CHECK-NEXT: [[OR2:%.*]] = or <2 x i6> [[A]], [[TMP2]] ; CHECK-NEXT: ret <2 x i6> [[OR2]] @@ -805,10 +853,11 @@ define <2 x i6> @not_or_or_not_2i6(<2 x i6> %a0, <2 x i6> %b, <2 x i6> %c) { ; (~b | a) | ~c --> a | ~(b & c) define i32 @not_or_or_not_commute1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_not_commute1( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_not_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[TMP2]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[TMP2]], [[A]] ; CHECK-NEXT: ret i32 [[OR2]] ; %not1 = xor i32 %b, -1 @@ -821,10 +870,11 @@ define i32 @not_or_or_not_commute1(i32 %a, i32 %b, i32 %c) { ; ~c | (a | ~b) --> a | ~(b & c) define i32 @not_or_or_not_commute2_extra_not_use(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_not_commute2_extra_not_use( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[C:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], [[C]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_not_commute2_extra_not_use +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[C]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[TMP2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) @@ -840,10 +890,11 @@ define i32 @not_or_or_not_commute2_extra_not_use(i32 %a0, i32 %b, i32 %c) { } define i32 @not_or_or_not_extra_or1_use(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_not_extra_or1_use( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[C:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@not_or_or_not_extra_or1_use +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[C]], -1 ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[NOT1]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[OR1]]) @@ -861,9 +912,10 @@ define i32 @not_or_or_not_extra_or1_use(i32 %a0, i32 %b, i32 %c) { ; (c & ~(a | b)) | (b & ~(a | c)) --> ~a & (b ^ c) define i32 @or_not_and(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@or_not_and +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -878,10 +930,11 @@ define i32 @or_not_and(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_commute1(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @or_not_and_commute1( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@or_not_and_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -897,10 +950,11 @@ define i32 @or_not_and_commute1(i32 %a, i32 %b0, i32 %c) { } define i32 @or_not_and_commute2(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @or_not_and_commute2( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@or_not_and_commute2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -916,9 +970,10 @@ define i32 @or_not_and_commute2(i32 %a, i32 %b0, i32 %c) { } define i32 @or_not_and_commute3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_commute3( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@or_not_and_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -933,10 +988,11 @@ define i32 @or_not_and_commute3(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_commute4(i32 %a, i32 %b, i32 %c0) { -; CHECK-LABEL: @or_not_and_commute4( -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@or_not_and_commute4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -952,10 +1008,11 @@ define i32 @or_not_and_commute4(i32 %a, i32 %b, i32 %c0) { } define i32 @or_not_and_commute5(i32 %a0, i32 %b, i32 %c0) { -; CHECK-LABEL: @or_not_and_commute5( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_commute5 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR3]] @@ -973,9 +1030,10 @@ define i32 @or_not_and_commute5(i32 %a0, i32 %b, i32 %c0) { } define i32 @or_not_and_commute6(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_commute6( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@or_not_and_commute6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -990,9 +1048,10 @@ define i32 @or_not_and_commute6(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_commute7(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_commute7( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@or_not_and_commute7 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1007,10 +1066,11 @@ define i32 @or_not_and_commute7(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_commute8(i32 %a0, i32 %b0, i32 %c) { -; CHECK-LABEL: @or_not_and_commute8( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_commute8 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[OR3]] @@ -1028,10 +1088,11 @@ define i32 @or_not_and_commute8(i32 %a0, i32 %b0, i32 %c) { } define i32 @or_not_and_commute9(i32 %a0, i32 %b0, i32 %c0) { -; CHECK-LABEL: @or_not_and_commute9( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_commute9 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B0:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] @@ -1051,10 +1112,11 @@ define i32 @or_not_and_commute9(i32 %a0, i32 %b0, i32 %c0) { } define i32 @or_not_and_extra_not_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_extra_not_use1( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_extra_not_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: call void @use(i32 [[NOT1]]) @@ -1072,10 +1134,11 @@ define i32 @or_not_and_extra_not_use1(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_extra_not_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_extra_not_use2( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_extra_not_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C:%.*]] +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[B]] @@ -1095,10 +1158,11 @@ define i32 @or_not_and_extra_not_use2(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_extra_and_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_extra_and_use1( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_extra_and_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C:%.*]] +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] @@ -1117,10 +1181,11 @@ define i32 @or_not_and_extra_and_use1(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_extra_and_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_extra_and_use2( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_extra_and_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C:%.*]] +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[B]] @@ -1140,9 +1205,10 @@ define i32 @or_not_and_extra_and_use2(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_extra_or_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_extra_or_use1( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_extra_or_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: call void @use(i32 [[OR1]]) @@ -1160,9 +1226,10 @@ define i32 @or_not_and_extra_or_use1(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_extra_or_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_extra_or_use2( -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_extra_or_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR3:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: call void @use(i32 [[OR2]]) @@ -1180,11 +1247,12 @@ define i32 @or_not_and_extra_or_use2(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_wrong_c(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: @or_not_and_wrong_c( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_wrong_c +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C:%.*]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[D:%.*]] +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[D]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[AND2]] @@ -1201,13 +1269,14 @@ define i32 @or_not_and_wrong_c(i32 %a, i32 %b, i32 %c, i32 %d) { } define i32 @or_not_and_wrong_b(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: @or_not_and_wrong_b( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_wrong_b +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C:%.*]] +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[D:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[D]] ; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[AND2]] ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1224,9 +1293,10 @@ define i32 @or_not_and_wrong_b(i32 %a, i32 %b, i32 %c, i32 %d) { ; (c | ~(a & b)) & (b | ~(a & c)) --> ~(a & (b ^ c)) define i32 @and_not_or(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1241,10 +1311,11 @@ define i32 @and_not_or(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_commute1(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @and_not_or_commute1( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1260,10 +1331,11 @@ define i32 @and_not_or_commute1(i32 %a, i32 %b0, i32 %c) { } define i32 @and_not_or_commute2(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @and_not_or_commute2( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_commute2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1279,9 +1351,10 @@ define i32 @and_not_or_commute2(i32 %a, i32 %b0, i32 %c) { } define i32 @and_not_or_commute3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_commute3( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1296,10 +1369,11 @@ define i32 @and_not_or_commute3(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_commute4(i32 %a, i32 %b, i32 %c0) { -; CHECK-LABEL: @and_not_or_commute4( -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_commute4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1315,10 +1389,11 @@ define i32 @and_not_or_commute4(i32 %a, i32 %b, i32 %c0) { } define i32 @and_not_or_commute5(i32 %a0, i32 %b, i32 %c0) { -; CHECK-LABEL: @and_not_or_commute5( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_commute5 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] @@ -1336,9 +1411,10 @@ define i32 @and_not_or_commute5(i32 %a0, i32 %b, i32 %c0) { } define i32 @and_not_or_commute6(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_commute6( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_commute6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1353,9 +1429,10 @@ define i32 @and_not_or_commute6(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_commute7(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_commute7( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_commute7 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1370,10 +1447,11 @@ define i32 @and_not_or_commute7(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_commute8(i32 %a0, i32 %b0, i32 %c) { -; CHECK-LABEL: @and_not_or_commute8( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_commute8 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] @@ -1391,10 +1469,11 @@ define i32 @and_not_or_commute8(i32 %a0, i32 %b0, i32 %c) { } define i32 @and_not_or_commute9(i32 %a0, i32 %b0, i32 %c0) { -; CHECK-LABEL: @and_not_or_commute9( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_commute9 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B0:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 @@ -1414,10 +1493,11 @@ define i32 @and_not_or_commute9(i32 %a0, i32 %b0, i32 %c0) { } define i32 @and_not_or_extra_not_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_extra_not_use1( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_extra_not_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[NOT1]]) @@ -1435,10 +1515,11 @@ define i32 @and_not_or_extra_not_use1(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_extra_not_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_extra_not_use2( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_extra_not_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[NOT2]], [[B]] @@ -1458,10 +1539,11 @@ define i32 @and_not_or_extra_not_use2(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_extra_and_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_extra_and_use1( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_extra_and_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 @@ -1480,10 +1562,11 @@ define i32 @and_not_or_extra_and_use1(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_extra_and_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_extra_and_use2( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_extra_and_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[NOT2]], [[B]] @@ -1503,9 +1586,10 @@ define i32 @and_not_or_extra_and_use2(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_extra_or_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_extra_or_use1( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_extra_or_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[AND1]]) @@ -1523,9 +1607,10 @@ define i32 @and_not_or_extra_or_use1(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_extra_or_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_extra_or_use2( -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_extra_or_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[AND2]]) @@ -1543,11 +1628,12 @@ define i32 @and_not_or_extra_or_use2(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_wrong_c(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: @and_not_or_wrong_c( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_wrong_c +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C:%.*]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[D:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[D]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = and i32 [[OR1]], [[OR2]] @@ -1564,13 +1650,14 @@ define i32 @and_not_or_wrong_c(i32 %a, i32 %b, i32 %c, i32 %d) { } define i32 @and_not_or_wrong_b(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: @and_not_or_wrong_b( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_wrong_b +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[NOT2]], [[D:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[NOT2]], [[D]] ; CHECK-NEXT: [[AND3:%.*]] = and i32 [[OR1]], [[OR2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1587,9 +1674,10 @@ define i32 @and_not_or_wrong_b(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b & ~(a | c)) | ~(a | b) --> ~((b & c) | a) define i32 @or_and_not_not(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1603,10 +1691,11 @@ define i32 @or_and_not_not(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_commute1(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @or_and_not_not_commute1( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1621,9 +1710,10 @@ define i32 @or_and_not_not_commute1(i32 %a, i32 %b0, i32 %c) { } define i32 @or_and_not_not_commute2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_commute2( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_commute2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1637,9 +1727,10 @@ define i32 @or_and_not_not_commute2(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_commute3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_commute3( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1653,9 +1744,10 @@ define i32 @or_and_not_not_commute3(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_commute4(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_commute4( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_commute4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1669,9 +1761,10 @@ define i32 @or_and_not_not_commute4(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_commute5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_commute5( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_commute5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1685,10 +1778,11 @@ define i32 @or_and_not_not_commute5(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_commute6(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @or_and_not_not_commute6( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_commute6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1703,9 +1797,10 @@ define i32 @or_and_not_not_commute6(i32 %a, i32 %b0, i32 %c) { } define i32 @or_and_not_not_commute7(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_commute7( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_commute7 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1719,10 +1814,11 @@ define i32 @or_and_not_not_commute7(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_extra_not_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_extra_not_use1( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_extra_not_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND]], [[NOT1]] @@ -1740,10 +1836,11 @@ define i32 @or_and_not_not_extra_not_use1(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_extra_not_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_extra_not_use2( -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_extra_not_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[NOT2]]) @@ -1760,10 +1857,11 @@ define i32 @or_and_not_not_extra_not_use2(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_extra_and_use(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_extra_and_use( -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_extra_and_use +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[NOT2]], [[B:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 @@ -1781,10 +1879,11 @@ define i32 @or_and_not_not_extra_and_use(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_extra_or_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_extra_or_use1( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_extra_or_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND]], [[NOT1]] @@ -1802,9 +1901,10 @@ define i32 @or_and_not_not_extra_or_use1(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_extra_or_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_extra_or_use2( -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_extra_or_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[OR2]]) @@ -1823,11 +1923,12 @@ define i32 @or_and_not_not_extra_or_use2(i32 %a, i32 %b, i32 %c) { ; Check the use limit. It can be adjusted in the future in terms of ; LHS and RHS uses distribution to be more flexible. define i32 @or_and_not_not_2_extra_uses(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_and_not_not_2_extra_uses( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_2_extra_uses +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] ; CHECK-NEXT: call void @use(i32 [[OR1]]) ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NOT2]], [[B]] ; CHECK-NEXT: call void @use(i32 [[AND]]) @@ -1846,10 +1947,11 @@ define i32 @or_and_not_not_2_extra_uses(i32 %a, i32 %b, i32 %c) { } define i32 @or_and_not_not_wrong_a(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: @or_and_not_not_wrong_a( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[D:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_wrong_a +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[D]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND]], [[NOT1]] @@ -1865,12 +1967,13 @@ define i32 @or_and_not_not_wrong_a(i32 %a, i32 %b, i32 %c, i32 %d) { } define i32 @or_and_not_not_wrong_b(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: @or_and_not_not_wrong_b( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[D:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_and_not_not_wrong_b +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[D]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[NOT2]], [[B:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND]], [[NOT1]] ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -1886,9 +1989,10 @@ define i32 @or_and_not_not_wrong_b(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b | ~(a & c)) & ~(a & b) --> ~((b | c) & a) define i32 @and_or_not_not(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1902,10 +2006,11 @@ define i32 @and_or_not_not(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_commute1(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @and_or_not_not_commute1( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1920,9 +2025,10 @@ define i32 @and_or_not_not_commute1(i32 %a, i32 %b0, i32 %c) { } define i32 @and_or_not_not_commute2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_commute2( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_commute2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1936,9 +2042,10 @@ define i32 @and_or_not_not_commute2(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_commute3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_commute3( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1952,9 +2059,10 @@ define i32 @and_or_not_not_commute3(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_commute4(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_commute4( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_commute4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1968,9 +2076,10 @@ define i32 @and_or_not_not_commute4(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_commute5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_commute5( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_commute5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -1984,10 +2093,11 @@ define i32 @and_or_not_not_commute5(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_commute6(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @and_or_not_not_commute6( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_commute6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -2002,9 +2112,10 @@ define i32 @and_or_not_not_commute6(i32 %a, i32 %b0, i32 %c) { } define i32 @and_or_not_not_commute7(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_commute7( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_commute7 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -2018,10 +2129,11 @@ define i32 @and_or_not_not_commute7(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_extra_not_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_extra_not_use1( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_extra_not_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND1]], [[OR]] @@ -2039,10 +2151,11 @@ define i32 @and_or_not_not_extra_not_use1(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_extra_not_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_extra_not_use2( -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_extra_not_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[NOT2]]) @@ -2059,10 +2172,11 @@ define i32 @and_or_not_not_extra_not_use2(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_extra_and_use(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_extra_and_use( -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_extra_and_use +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 -; CHECK-NEXT: [[OR:%.*]] = or i32 [[NOT2]], [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 @@ -2080,9 +2194,10 @@ define i32 @and_or_not_not_extra_and_use(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_extra_or_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_extra_or_use1( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_extra_or_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND1]], [[OR]] @@ -2100,9 +2215,10 @@ define i32 @and_or_not_not_extra_or_use1(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_extra_or_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_extra_or_use2( -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_extra_or_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[AND2]]) @@ -2119,10 +2235,11 @@ define i32 @and_or_not_not_extra_or_use2(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_2_extra_uses(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_or_not_not_2_extra_uses( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_2_extra_uses +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] ; CHECK-NEXT: call void @use(i32 [[AND1]]) -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[NOT2]], [[B]] ; CHECK-NEXT: call void @use(i32 [[OR]]) @@ -2141,9 +2258,10 @@ define i32 @and_or_not_not_2_extra_uses(i32 %a, i32 %b, i32 %c) { } define i32 @and_or_not_not_wrong_a(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: @and_or_not_not_wrong_a( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_wrong_a +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[D]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND1]], [[OR]] @@ -2159,12 +2277,13 @@ define i32 @and_or_not_not_wrong_a(i32 %a, i32 %b, i32 %c, i32 %d) { } define i32 @and_or_not_not_wrong_b(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: @and_or_not_not_wrong_b( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[D:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_or_not_not_wrong_b +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[D]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[A]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 -; CHECK-NEXT: [[OR:%.*]] = or i32 [[NOT2]], [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = and i32 [[OR]], [[NOT1]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -2180,10 +2299,11 @@ define i32 @and_or_not_not_wrong_b(i32 %a, i32 %b, i32 %c, i32 %d) { ; (a & ~(b | c)) | ~(a | (b ^ c)) --> (~a & b & c) | ~(b | c) define i32 @and_not_or_or_not_or_xor(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR3]] @@ -2199,10 +2319,11 @@ define i32 @and_not_or_or_not_or_xor(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_commute1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_commute1( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[C:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[C]], [[B]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR3]] @@ -2218,9 +2339,10 @@ define i32 @and_not_or_or_not_or_xor_commute1(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_commute2(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_commute2( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_commute2 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] @@ -2239,10 +2361,11 @@ define i32 @and_not_or_or_not_or_xor_commute2(i32 %a0, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_commute3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_commute3( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[C]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR3]] @@ -2258,9 +2381,10 @@ define i32 @and_not_or_or_not_or_xor_commute3(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_commute4(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_commute4( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_commute4 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[XOR1]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] @@ -2279,10 +2403,11 @@ define i32 @and_not_or_or_not_or_xor_commute4(i32 %a0, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_commute5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_commute5( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_commute5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR3]] @@ -2298,10 +2423,11 @@ define i32 @and_not_or_or_not_or_xor_commute5(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_use1( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: call void @use(i32 [[OR1]]) @@ -2319,11 +2445,12 @@ define i32 @and_not_or_or_not_or_xor_use1(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_use2( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: call void @use(i32 [[NOT1]]) @@ -2341,10 +2468,11 @@ define i32 @and_not_or_or_not_or_xor_use2(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_use3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_use3( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_use3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 @@ -2364,10 +2492,11 @@ define i32 @and_not_or_or_not_or_xor_use3(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_use4(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_use4( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_use4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: call void @use(i32 [[XOR1]]) @@ -2385,10 +2514,11 @@ define i32 @and_not_or_or_not_or_xor_use4(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_use5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_use5( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_use5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[OR1]], [[OR2]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: call void @use(i32 [[OR2]]) @@ -2406,10 +2536,11 @@ define i32 @and_not_or_or_not_or_xor_use5(i32 %a, i32 %b, i32 %c) { } define i32 @and_not_or_or_not_or_xor_use6(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @and_not_or_or_not_or_xor_use6( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_not_or_or_not_or_xor_use6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 @@ -2433,10 +2564,11 @@ define i32 @and_not_or_or_not_or_xor_use6(i32 %a, i32 %b, i32 %c) { ; It is invalid as is, but feezing %a and %b will make it valid. define i32 @or_not_and_and_not_and_xor(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND2]], [[OR1]] @@ -2453,10 +2585,11 @@ define i32 @or_not_and_and_not_and_xor(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_commute1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_commute1( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[C:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[C]], [[B]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND2]], [[OR1]] @@ -2473,9 +2606,10 @@ define i32 @or_not_and_and_not_and_xor_commute1(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_commute2(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_commute2( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_commute2 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[NOT1]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] @@ -2495,10 +2629,11 @@ define i32 @or_not_and_and_not_and_xor_commute2(i32 %a0, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_commute3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_commute3( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND2]], [[OR1]] @@ -2515,9 +2650,10 @@ define i32 @or_not_and_and_not_and_xor_commute3(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_commute4(i32 %a0, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_commute4( -; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_commute4 +; CHECK-SAME: (i32 [[A0:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0]] +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[NOT1]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] @@ -2537,10 +2673,11 @@ define i32 @or_not_and_and_not_and_xor_commute4(i32 %a0, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_commute5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_commute5( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_commute5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND2]], [[OR1]] @@ -2557,10 +2694,11 @@ define i32 @or_not_and_and_not_and_xor_commute5(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_use1( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND2]], [[OR1]] @@ -2579,10 +2717,11 @@ define i32 @or_not_and_and_not_and_xor_use1(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_use2( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND2]], [[OR1]] @@ -2601,10 +2740,11 @@ define i32 @or_not_and_and_not_and_xor_use2(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_use3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_use3( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_use3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND2]], [[OR1]] @@ -2623,10 +2763,11 @@ define i32 @or_not_and_and_not_and_xor_use3(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_use4(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_use4( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_use4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND2]], [[OR1]] @@ -2645,10 +2786,11 @@ define i32 @or_not_and_and_not_and_xor_use4(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_use5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_use5( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_use5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND2]], [[OR1]] @@ -2667,10 +2809,11 @@ define i32 @or_not_and_and_not_and_xor_use5(i32 %a, i32 %b, i32 %c) { } define i32 @or_not_and_and_not_and_xor_use6(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @or_not_and_and_not_and_xor_use6( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@or_not_and_and_not_and_xor_use6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT1]], [[A]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[XOR1]], [[A]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[AND2]], -1 @@ -2692,9 +2835,10 @@ define i32 @or_not_and_and_not_and_xor_use6(i32 %a, i32 %b, i32 %c) { ; (~a & b & c) | ~(a | b | c) -> ~(a | (b ^ c)) define i32 @not_and_and_or_not_or_or(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -2709,9 +2853,10 @@ define i32 @not_and_and_or_not_or_or(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_commute1_or(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_commute1_or( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_commute1_or +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -2726,9 +2871,10 @@ define i32 @not_and_and_or_not_or_or_commute1_or(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_commute2_or(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_commute2_or( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_commute2_or +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -2743,9 +2889,10 @@ define i32 @not_and_and_or_not_or_or_commute2_or(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_commute1_and(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_commute1_and( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_commute1_and +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -2760,9 +2907,10 @@ define i32 @not_and_and_or_not_or_or_commute1_and(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_commute2_and(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_commute2_and( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_commute2_and +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -2777,9 +2925,10 @@ define i32 @not_and_and_or_not_or_or_commute2_and(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_commute1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_commute1( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -2794,10 +2943,11 @@ define i32 @not_and_and_or_not_or_or_commute1(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_commute2(i32 %a, i32 %b, i32 %c0) { -; CHECK-LABEL: @not_and_and_or_not_or_or_commute2( -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_commute2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -2813,10 +2963,11 @@ define i32 @not_and_and_or_not_or_or_commute2(i32 %a, i32 %b, i32 %c0) { } define i32 @not_and_and_or_not_or_or_commute3(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_commute3( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -2832,10 +2983,11 @@ define i32 @not_and_and_or_not_or_or_commute3(i32 %a, i32 %b0, i32 %c) { } define i32 @not_and_and_or_not_or_or_commute4(i32 %a, i32 %b, i32 %c0) { -; CHECK-LABEL: @not_and_and_or_not_or_or_commute4( -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_commute4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[OR3]] ; @@ -2851,9 +3003,10 @@ define i32 @not_and_and_or_not_or_or_commute4(i32 %a, i32 %b, i32 %c0) { } define i32 @not_and_and_or_not_or_or_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_use1( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[OR1]]) @@ -2871,9 +3024,10 @@ define i32 @not_and_and_or_not_or_or_use1(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_use2( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 @@ -2892,9 +3046,10 @@ define i32 @not_and_and_or_not_or_or_use2(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_use3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_use3( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_use3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] @@ -2915,9 +3070,10 @@ define i32 @not_and_and_or_not_or_or_use3(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_use4(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_use4( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_use4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[NOT2]]) @@ -2935,10 +3091,11 @@ define i32 @not_and_and_or_not_or_or_use4(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_use5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_use5( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_use5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A]] ; CHECK-NEXT: [[OR3:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: call void @use(i32 [[AND1]]) @@ -2956,9 +3113,10 @@ define i32 @not_and_and_or_not_or_or_use5(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_not_or_or_use6(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_not_or_or_use6( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_not_or_or_use6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] @@ -2981,9 +3139,10 @@ define i32 @not_and_and_or_not_or_or_use6(i32 %a, i32 %b, i32 %c) { ; (~a | b | c) & ~(a & b & c) -> ~a | (b ^ c) define i32 @not_or_or_and_not_and_and(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -2998,9 +3157,10 @@ define i32 @not_or_or_and_not_and_and(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_commute1_and(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_commute1_and( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_commute1_and +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -3015,9 +3175,10 @@ define i32 @not_or_or_and_not_and_and_commute1_and(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_commute2_and(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_commute2_and( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_commute2_and +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -3032,9 +3193,10 @@ define i32 @not_or_or_and_not_and_and_commute2_and(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_commute1_or(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_commute1_or( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_commute1_or +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -3049,9 +3211,10 @@ define i32 @not_or_or_and_not_and_and_commute1_or(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_commute2_or(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_commute2_or( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_commute2_or +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -3066,9 +3229,10 @@ define i32 @not_or_or_and_not_and_and_commute2_or(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_commute1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_commute1( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -3083,10 +3247,11 @@ define i32 @not_or_or_and_not_and_and_commute1(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_commute2(i32 %a, i32 %b, i32 %c0) { -; CHECK-LABEL: @not_or_or_and_not_and_and_commute2( -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_commute2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -3102,10 +3267,11 @@ define i32 @not_or_or_and_not_and_and_commute2(i32 %a, i32 %b, i32 %c0) { } define i32 @not_or_or_and_not_and_and_commute3(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_commute3( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[C]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -3121,10 +3287,11 @@ define i32 @not_or_or_and_not_and_and_commute3(i32 %a, i32 %b0, i32 %c) { } define i32 @not_or_or_and_not_and_and_commute4(i32 %a, i32 %b, i32 %c0) { -; CHECK-LABEL: @not_or_or_and_not_and_and_commute4( -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_commute4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND3]] ; @@ -3140,10 +3307,11 @@ define i32 @not_or_or_and_not_and_and_commute4(i32 %a, i32 %b, i32 %c0) { } define i32 @not_or_or_and_not_and_and_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_use1( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[AND1]]) ; CHECK-NEXT: ret i32 [[AND3]] @@ -3160,9 +3328,10 @@ define i32 @not_or_or_and_not_and_and_use1(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_use2( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] @@ -3181,9 +3350,10 @@ define i32 @not_or_or_and_not_and_and_use2(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_use3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_use3( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_use3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND2]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] @@ -3204,9 +3374,10 @@ define i32 @not_or_or_and_not_and_and_use3(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_use4(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_use4( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_use4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) ; CHECK-NEXT: ret i32 [[AND3]] @@ -3223,10 +3394,11 @@ define i32 @not_or_or_and_not_and_and_use4(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_use5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_use5( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C:%.*]], [[B]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_use5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[AND3:%.*]] = or i32 [[TMP1]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[OR1]]) ; CHECK-NEXT: ret i32 [[AND3]] @@ -3243,9 +3415,10 @@ define i32 @not_or_or_and_not_and_and_use5(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_not_and_and_use6(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_not_and_and_use6( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_not_and_and_use6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] @@ -3267,10 +3440,11 @@ define i32 @not_or_or_and_not_and_and_use6(i32 %a, i32 %b, i32 %c) { ; (~a & b & c) | ~(a | b) -> (c | ~b) & ~a define i32 @not_and_and_or_no_or(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[OR2]] ; @@ -3284,10 +3458,11 @@ define i32 @not_and_and_or_no_or(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_commute1_and(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_commute1_and( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_commute1_and +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[OR2]] ; @@ -3301,10 +3476,11 @@ define i32 @not_and_and_or_no_or_commute1_and(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_commute2_and(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_commute2_and( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_commute2_and +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[OR2]] ; @@ -3318,10 +3494,11 @@ define i32 @not_and_and_or_no_or_commute2_and(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_commute1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_commute1( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[OR2]] ; @@ -3335,11 +3512,12 @@ define i32 @not_and_and_or_no_or_commute1(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_commute2(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_commute2( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_commute2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[OR2]] ; @@ -3354,10 +3532,11 @@ define i32 @not_and_and_or_no_or_commute2(i32 %a, i32 %b0, i32 %c) { } define i32 @not_and_and_or_no_or_commute3(i32 %a, i32 %b, i32 %c0) { -; CHECK-LABEL: @not_and_and_or_no_or_commute3( -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[C]], [[TMP1]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[OR2]] @@ -3373,10 +3552,11 @@ define i32 @not_and_and_or_no_or_commute3(i32 %a, i32 %b, i32 %c0) { } define i32 @not_and_and_or_no_or_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_use1( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3392,10 +3572,11 @@ define i32 @not_and_and_or_no_or_use1(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_use2( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3411,10 +3592,11 @@ define i32 @not_and_and_or_no_or_use2(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_use3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_use3( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_use3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3430,10 +3612,11 @@ define i32 @not_and_and_or_no_or_use3(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_use4(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_use4( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_use4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3449,12 +3632,13 @@ define i32 @not_and_and_or_no_or_use4(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_use5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_use5( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_use5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] ; CHECK-NEXT: call void @use(i32 [[OR1]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3470,12 +3654,13 @@ define i32 @not_and_and_or_no_or_use5(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_use6(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_use6( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_use6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] ; CHECK-NEXT: call void @use(i32 [[NOT1]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3491,11 +3676,12 @@ define i32 @not_and_and_or_no_or_use6(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_use7(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_use7( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_use7 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = and i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[AND1]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3511,12 +3697,13 @@ define i32 @not_and_and_or_no_or_use7(i32 %a, i32 %b, i32 %c) { } define i32 @not_and_and_or_no_or_use8(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_and_and_or_no_or_use8( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_and_and_or_no_or_use8 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] ; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] ; CHECK-NEXT: call void @use(i32 [[AND2]]) ; CHECK-NEXT: ret i32 [[OR2]] @@ -3534,10 +3721,11 @@ define i32 @not_and_and_or_no_or_use8(i32 %a, i32 %b, i32 %c) { ; (~a | b | c) & ~(a & b) -> (c & ~b) | ~a define i32 @not_or_or_and_no_and(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND2]] ; @@ -3551,10 +3739,11 @@ define i32 @not_or_or_and_no_and(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_commute1_or(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_commute1_or( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_commute1_or +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND2]] ; @@ -3568,10 +3757,11 @@ define i32 @not_or_or_and_no_and_commute1_or(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_commute2_or(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_commute2_or( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_commute2_or +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND2]] ; @@ -3585,10 +3775,11 @@ define i32 @not_or_or_and_no_and_commute2_or(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_commute1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_commute1( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_commute1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND2]] ; @@ -3602,11 +3793,12 @@ define i32 @not_or_or_and_no_and_commute1(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_commute2(i32 %a, i32 %b0, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_commute2( -; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_commute2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B0:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND2]] ; @@ -3621,10 +3813,11 @@ define i32 @not_or_or_and_no_and_commute2(i32 %a, i32 %b0, i32 %c) { } define i32 @not_or_or_and_no_and_commute3(i32 %a, i32 %b, i32 %c0) { -; CHECK-LABEL: @not_or_or_and_no_and_commute3( -; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C0:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[C]], [[TMP1]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: ret i32 [[AND2]] @@ -3640,10 +3833,11 @@ define i32 @not_or_or_and_no_and_commute3(i32 %a, i32 %b, i32 %c0) { } define i32 @not_or_or_and_no_and_use1(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_use1( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) ; CHECK-NEXT: ret i32 [[AND2]] @@ -3659,10 +3853,11 @@ define i32 @not_or_or_and_no_and_use1(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_use2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_use2( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) ; CHECK-NEXT: ret i32 [[AND2]] @@ -3678,10 +3873,11 @@ define i32 @not_or_or_and_no_and_use2(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_use3(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_use3( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) ; CHECK-NEXT: ret i32 [[AND2]] @@ -3697,10 +3893,11 @@ define i32 @not_or_or_and_no_and_use3(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_use4(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_use4( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[NOT2]]) ; CHECK-NEXT: ret i32 [[AND2]] @@ -3716,12 +3913,13 @@ define i32 @not_or_or_and_no_and_use4(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_use5(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_use5( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[OR2]], [[NOT1]] ; CHECK-NEXT: call void @use(i32 [[AND1]]) ; CHECK-NEXT: ret i32 [[AND2]] @@ -3737,12 +3935,13 @@ define i32 @not_or_or_and_no_and_use5(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_use6(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_use6( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use6 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[OR2]], [[NOT1]] ; CHECK-NEXT: call void @use(i32 [[NOT1]]) ; CHECK-NEXT: ret i32 [[AND2]] @@ -3758,11 +3957,12 @@ define i32 @not_or_or_and_no_and_use6(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_use7(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_use7( -; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use7 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = or i32 [[TMP2]], [[NOT2]] ; CHECK-NEXT: call void @use(i32 [[OR1]]) ; CHECK-NEXT: ret i32 [[AND2]] @@ -3778,12 +3978,13 @@ define i32 @not_or_or_and_no_and_use7(i32 %a, i32 %b, i32 %c) { } define i32 @not_or_or_and_no_and_use8(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @not_or_or_and_no_and_use8( -; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use8 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[A]] ; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[AND1]], -1 ; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOT2]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[OR2]], [[NOT1]] ; CHECK-NEXT: call void @use(i32 [[OR2]]) ; CHECK-NEXT: ret i32 [[AND2]] @@ -3799,9 +4000,10 @@ define i32 @not_or_or_and_no_and_use8(i32 %a, i32 %b, i32 %c) { } define i4 @and_orn_xor(i4 %a, i4 %b) { -; CHECK-LABEL: @and_orn_xor( -; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[A:%.*]], -1 -; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP1]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_orn_xor +; CHECK-SAME: (i4 [[A:%.*]], i4 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP1]], [[B]] ; CHECK-NEXT: ret i4 [[R]] ; %xor = xor i4 %a, %b @@ -3812,9 +4014,10 @@ define i4 @and_orn_xor(i4 %a, i4 %b) { } define <2 x i4> @and_orn_xor_commute1(<2 x i4> %a, <2 x i4> %b) { -; CHECK-LABEL: @and_orn_xor_commute1( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[A:%.*]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[TMP1]], [[B:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_orn_xor_commute1 +; CHECK-SAME: (<2 x i4> [[A:%.*]], <2 x i4> [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[A]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[TMP1]], [[B]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %xor = xor <2 x i4> %a, %b @@ -3825,8 +4028,9 @@ define <2 x i4> @and_orn_xor_commute1(<2 x i4> %a, <2 x i4> %b) { } define i32 @and_orn_xor_commute2(i32 %a, i32 %b) { -; CHECK-LABEL: @and_orn_xor_commute2( -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B:%.*]], [[A:%.*]] +; CHECK-LABEL: define {{[^@]+}}@and_orn_xor_commute2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B]], [[A]] ; CHECK-NEXT: call void @use(i32 [[XOR]]) ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[R:%.*]] = and i32 [[TMP1]], [[B]] @@ -3841,11 +4045,12 @@ define i32 @and_orn_xor_commute2(i32 %a, i32 %b) { } define i32 @and_orn_xor_commute3(i32 %a, i32 %b) { -; CHECK-LABEL: @and_orn_xor_commute3( -; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A:%.*]], -1 +; CHECK-LABEL: define {{[^@]+}}@and_orn_xor_commute3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: call void @use(i32 [[NOTA]]) ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[R:%.*]] = and i32 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i32 [[TMP1]], [[B]] ; CHECK-NEXT: ret i32 [[R]] ; %xor = xor i32 %b, %a @@ -3857,9 +4062,10 @@ define i32 @and_orn_xor_commute3(i32 %a, i32 %b) { } define i32 @and_orn_xor_commute5(i32 %pa, i32 %pb) { -; CHECK-LABEL: @and_orn_xor_commute5( -; CHECK-NEXT: [[A:%.*]] = mul i32 [[PA:%.*]], [[PA]] -; CHECK-NEXT: [[B:%.*]] = mul i32 [[PB:%.*]], [[PB]] +; CHECK-LABEL: define {{[^@]+}}@and_orn_xor_commute5 +; CHECK-SAME: (i32 [[PA:%.*]], i32 [[PB:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = mul i32 [[PA]], [[PA]] +; CHECK-NEXT: [[B:%.*]] = mul i32 [[PB]], [[PB]] ; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[B]], [[NOTA]] ; CHECK-NEXT: call void @use(i32 [[OR]]) @@ -3878,9 +4084,10 @@ define i32 @and_orn_xor_commute5(i32 %pa, i32 %pb) { } define i32 @and_orn_xor_commute6(i32 %pa, i32 %pb) { -; CHECK-LABEL: @and_orn_xor_commute6( -; CHECK-NEXT: [[A:%.*]] = mul i32 [[PA:%.*]], [[PA]] -; CHECK-NEXT: [[B:%.*]] = mul i32 [[PB:%.*]], [[PB]] +; CHECK-LABEL: define {{[^@]+}}@and_orn_xor_commute6 +; CHECK-SAME: (i32 [[PA:%.*]], i32 [[PB:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = mul i32 [[PA]], [[PA]] +; CHECK-NEXT: [[B:%.*]] = mul i32 [[PB]], [[PB]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], [[B]] ; CHECK-NEXT: call void @use(i32 [[XOR]]) ; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A]], -1 @@ -3901,9 +4108,10 @@ define i32 @and_orn_xor_commute6(i32 %pa, i32 %pb) { } define i32 @and_orn_xor_commute7(i32 %pa, i32 %pb) { -; CHECK-LABEL: @and_orn_xor_commute7( -; CHECK-NEXT: [[A:%.*]] = mul i32 [[PA:%.*]], [[PA]] -; CHECK-NEXT: [[B:%.*]] = mul i32 [[PB:%.*]], [[PB]] +; CHECK-LABEL: define {{[^@]+}}@and_orn_xor_commute7 +; CHECK-SAME: (i32 [[PA:%.*]], i32 [[PB:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = mul i32 [[PA]], [[PA]] +; CHECK-NEXT: [[B:%.*]] = mul i32 [[PB]], [[PB]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B]], [[A]] ; CHECK-NEXT: call void @use(i32 [[XOR]]) ; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A]], -1 @@ -3927,9 +4135,10 @@ define i32 @and_orn_xor_commute7(i32 %pa, i32 %pb) { } define i32 @and_orn_xor_commute8(i32 %pa, i32 %pb) { -; CHECK-LABEL: @and_orn_xor_commute8( -; CHECK-NEXT: [[A:%.*]] = mul i32 [[PA:%.*]], [[PA]] -; CHECK-NEXT: [[B:%.*]] = mul i32 [[PB:%.*]], [[PB]] +; CHECK-LABEL: define {{[^@]+}}@and_orn_xor_commute8 +; CHECK-SAME: (i32 [[PA:%.*]], i32 [[PB:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = mul i32 [[PA]], [[PA]] +; CHECK-NEXT: [[B:%.*]] = mul i32 [[PB]], [[PB]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], -1 ; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[TMP1]] ; CHECK-NEXT: ret i32 [[R]] @@ -3942,3 +4151,60 @@ define i32 @and_orn_xor_commute8(i32 %pa, i32 %pb) { %r = and i32 %xor, %or ret i32 %r } + +define i32 @zext_zext_and_uses(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@zext_zext_and_uses +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[ZX:%.*]] = zext i8 [[X]] to i32 +; CHECK-NEXT: call void @use(i32 [[ZX]]) +; CHECK-NEXT: [[ZY:%.*]] = zext i8 [[Y]] to i32 +; CHECK-NEXT: call void @use(i32 [[ZY]]) +; CHECK-NEXT: [[R1:%.*]] = and i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = zext i8 [[R1]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %zx = zext i8 %x to i32 + call void @use(i32 %zx) + %zy = zext i8 %y to i32 + call void @use(i32 %zy) + %r = and i32 %zx, %zy + ret i32 %r +} + +define i32 @sext_sext_or_uses(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@sext_sext_or_uses +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = sext i8 [[X]] to i32 +; CHECK-NEXT: call void @use(i32 [[SX]]) +; CHECK-NEXT: [[SY:%.*]] = sext i8 [[Y]] to i32 +; CHECK-NEXT: call void @use(i32 [[SY]]) +; CHECK-NEXT: [[R1:%.*]] = or i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %sx = sext i8 %x to i32 + call void @use(i32 %sx) + %sy = sext i8 %y to i32 + call void @use(i32 %sy) + %r = or i32 %sx, %sy + ret i32 %r +} + +define i32 @trunc_trunc_xor_uses(i65 %x, i65 %y) { +; CHECK-LABEL: define {{[^@]+}}@trunc_trunc_xor_uses +; CHECK-SAME: (i65 [[X:%.*]], i65 [[Y:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = trunc i65 [[X]] to i32 +; CHECK-NEXT: call void @use(i32 [[SX]]) +; CHECK-NEXT: [[SY:%.*]] = trunc i65 [[Y]] to i32 +; CHECK-NEXT: call void @use(i32 [[SY]]) +; CHECK-NEXT: [[R1:%.*]] = xor i65 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = trunc i65 [[R1]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %sx = trunc i65 %x to i32 + call void @use(i32 %sx) + %sy = trunc i65 %y to i32 + call void @use(i32 %sy) + %r = xor i32 %sx, %sy + ret i32 %r +} From f0071d43e4d30a0bc224020abb52fa77054d2520 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 20 May 2022 09:07:21 -0400 Subject: [PATCH 081/908] [InstCombine] add use check to fold of bitwise logic with cast ops This was shown as a potential regression in D126040. --- llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 5 +++-- llvm/test/Transforms/InstCombine/and-xor-or.ll | 9 +++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 8e3956dd05fdcf..6068a4e3922443 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1431,9 +1431,10 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) { Value *Cast1Src = Cast1->getOperand(0); // fold logic(cast(A), cast(B)) -> cast(logic(A, B)) - if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { + if ((Cast0->hasOneUse() || Cast1->hasOneUse()) && + shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src, - I.getName()); + I.getName()); return CastInst::Create(CastOpcode, NewOp, DestTy); } diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll index 13ad55d83db4b9..6e31dbe16f62c0 100644 --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -4159,8 +4159,7 @@ define i32 @zext_zext_and_uses(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use(i32 [[ZX]]) ; CHECK-NEXT: [[ZY:%.*]] = zext i8 [[Y]] to i32 ; CHECK-NEXT: call void @use(i32 [[ZY]]) -; CHECK-NEXT: [[R1:%.*]] = and i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = zext i8 [[R1]] to i32 +; CHECK-NEXT: [[R:%.*]] = and i32 [[ZX]], [[ZY]] ; CHECK-NEXT: ret i32 [[R]] ; %zx = zext i8 %x to i32 @@ -4178,8 +4177,7 @@ define i32 @sext_sext_or_uses(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use(i32 [[SX]]) ; CHECK-NEXT: [[SY:%.*]] = sext i8 [[Y]] to i32 ; CHECK-NEXT: call void @use(i32 [[SY]]) -; CHECK-NEXT: [[R1:%.*]] = or i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i32 +; CHECK-NEXT: [[R:%.*]] = or i32 [[SX]], [[SY]] ; CHECK-NEXT: ret i32 [[R]] ; %sx = sext i8 %x to i32 @@ -4197,8 +4195,7 @@ define i32 @trunc_trunc_xor_uses(i65 %x, i65 %y) { ; CHECK-NEXT: call void @use(i32 [[SX]]) ; CHECK-NEXT: [[SY:%.*]] = trunc i65 [[Y]] to i32 ; CHECK-NEXT: call void @use(i32 [[SY]]) -; CHECK-NEXT: [[R1:%.*]] = xor i65 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = trunc i65 [[R1]] to i32 +; CHECK-NEXT: [[R:%.*]] = xor i32 [[SX]], [[SY]] ; CHECK-NEXT: ret i32 [[R]] ; %sx = trunc i65 %x to i32 From 7a1d5ef703f676f03e2f623c7d209ded9a5f94e4 Mon Sep 17 00:00:00 2001 From: "J. Ryan Stinnett" Date: Mon, 9 May 2022 15:45:56 +0100 Subject: [PATCH 082/908] [DebugInfo][NFC] Add instr-ref documentation, migration guide This used to be D102158, but all the code it describes got re-written, so I figured I'd take another shot at documenting the new instruction referencing variable locations, this time from a higher level. Happily there's no longer any need to describe LiveDebugValues in any detail seeing how it's all SSA-based now. Probably the most important part is the explanation of what targets need to do to support instruction referencing. The list is small, mostly because there's nothing especially complicated that targets need to do: just instrument their target-specific optimisations and implement the stack spill/restore recognition target hooks. This is a small amount of text (which is a virtue), I'm extremely happy to expand on anything. Differential Revision: https://reviews.llvm.org/D113586 Co-authored-by: Jeremy Morse --- llvm/docs/InstrRefDebugInfo.md | 180 +++++++++++++++++++++++++++++++++ llvm/docs/MIRLangRef.rst | 5 +- llvm/docs/UserGuides.rst | 10 ++ 3 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 llvm/docs/InstrRefDebugInfo.md diff --git a/llvm/docs/InstrRefDebugInfo.md b/llvm/docs/InstrRefDebugInfo.md new file mode 100644 index 00000000000000..b7cd0f82740b8c --- /dev/null +++ b/llvm/docs/InstrRefDebugInfo.md @@ -0,0 +1,180 @@ +# Instruction referencing for debug info + +This document explains how LLVM uses value tracking, or instruction +referencing, to determine variable locations for debug info in the code +generation stage of compilation. This content is aimed at those working on code +generation targets and optimisation passes. It may also be of interest to anyone +curious about low-level debug info handling. + +# Problem statement + +At the end of compilation, LLVM must produce a DWARF location list (or similar) +describing what register or stack location a variable can be found in, for each +instruction in that variable's lexical scope. We could track the virtual +register that the variable resides in through compilation, however this is +vulnerable to register optimisations during regalloc, and instruction +movements. + +# Solution: instruction referencing + +Rather than identify the virtual register that a variable value resides in, +instead in instruction referencing mode, LLVM refers to the machine instruction +and operand position that the value is defined in. Consider the LLVM IR way of +referring to instruction values: + + %2 = add i32 %0, %1 + call void @llvm.dbg.value(metadata i32 %2, + +In LLVM IR, the IR Value is synonymous with the instruction that computes the +value, to the extent that in memory a Value is a pointer to the computing +instruction. Instruction referencing implements this relationship in the +codegen backend of LLVM, after instruction selection. Consider the X86 assembly +below and instruction referencing debug info, corresponding to the earlier +LLVM IR: + + %2:gr32 = ADD32rr %0, %1, implicit-def $eflags, debug-instr-number 1 + DBG_INSTR_REF 1, 0, !123, !456, debug-location !789 + +While the function remains in SSA form, virtual register %2 is sufficient to +identify the value computed by the instruction -- however the function +eventually leaves SSA form, and register optimisations will obscure which +register the desired value is in. Instead, a more consistent way of identifying +the instruction's value is to refer to the MachineOperand where the value is +defined: independently of which register is defined by that MachineOperand. In +the code above, the DBG_INSTR_REF instruction refers to instruction number one, +operand zero, while the ADD32rr has a debug-instr-number attribute attached +indicating that it is instruction number one. + +De-coupling variable locations from registers avoids difficulties involving +register allocation and optimisation, but requires additional instrumentation +when the instructions are optimised instead. Optimisations that replace +instructions with optimised versions that compute the same value must either +preserve the instruction number, or record a substitution from the old +instruction / operand number pair to the new instruction / operand pair -- see +MachineFunction::substituteDebugValuesForInst. If debug info maintenance is not +performed, or an instruction is eliminated as dead code, the variable location +is safely dropped and marked "optimised out". The exception is instructions +that are mutated rather than replaced, which always need debug info +maintenance. + +# Register allocator considerations + +When the register allocator runs, debugging instructions do not directly refer +to any virtual registers, and thus there is no need for expensive location +maintenance during regalloc (i.e., LiveDebugVariables). Debug instructions are +unlinked from the function, then linked back in after register allocation +completes. + +The exception is PHI instructions: these become implicit definitions at control +flow merges once regalloc finishes, and any debug numbers attached to PHI +instructions are lost. To circumvent this, debug numbers of PHIs are recorded +at the start of register allocation (phi-node-elimination), then DBG_PHI +instructions are inserted after regalloc finishes. This requires some +maintenance of which register a variable is located in during regalloc, but at +single positions (block entry points) rather than ranges of instructions. + +An example, before regalloc: + + bb.2: + %2 = PHI %1, %bb.0, %2, %bb.1, debug-instr-number 1 + +After: + + bb.2: + DBG_PHI $rax, 1 + +# LiveDebugValues + +After optimisations and code layout complete, information about variable +values must be translated into variable locations, i.e. registers and stack +slots. This is performed in the [LiveDebugValues pass][LiveDebugValues], where +the debug instructions and machine code are separated out into two independent +functions: + * One that assigns values to variable names, + * One that assigns values to machine registers and stack slots. + +LLVM's existing SSA tools are used to place PHIs for each function, between +variable values and the values contained in machine locations, with value +propagation eliminating any un-necessary PHIs. The two can then be joined up +to map variables to values, then values to locations, for each instruction in +the function. + +Key to this process is being able to identify the movement of values between +registers and stack locations, so that the location of values can be preserved +for the full time that they are resident in the machine. + +# Required target support and transition guide + +Instruction referencing will work on any target, but likely with poor coverage. +Supporting instruction referencing well requires: + * Target hooks to be implemented to allow LiveDebugValues to follow values through the machine, + * Target-specific optimisations to be instrumented, to preserve instruction numbers. + +## Target hooks + +TargetInstrInfo::isCopyInstrImpl must be implemented to recognise any +instructions that are copy-like -- LiveDebugValues uses this to identify when +values move between registers. + +TargetInstrInfo::isLoadFromStackSlotPostFE and +TargetInstrInfo::isStoreToStackSlotPostFE are needed to identify spill and +restore instructions. Each should return the destination or source register +respectively. LiveDebugValues will track the movement of a value from / to +the stack slot. In addition, any instruction that writes to a stack spill +should have a MachineMemoryOperand attached, so that LiveDebugValues can +recognise that a slot has been clobbered. + +## Target-specific optimisation instrumentation + +Optimisations come in two flavours: those that mutate a MachineInstr to make +it do something different, and those that create a new instruction to replace +the operation of the old. + +The former _must_ be instrumented -- the relevant question is whether any +register def in any operand will produce a different value, as a result of the +mutation. If the answer is yes, then there is a risk that a DBG_INSTR_REF +instruction referring to that operand will end up assigning the different +value to a variable, presenting the debugging developer with an unexpected +variable value. In such scenarios, call MachineInstr::dropDebugNumber() on the +mutated instruction to erase its instruction number. Any DBG_INSTR_REF +referring to it will produce an empty variable location instead, that appears +as "optimised out" in the debugger. + +For the latter flavour of optimisation, to increase coverage you should record +an instruction number substitution: a mapping from the old instruction number / +operand pair to new instruction number / operand pair. Consider if we replace +a three-address add instruction with a two-address add: + + %2:gr32 = ADD32rr %0, %1, debug-instr-number 1 + +becomes + + %2:gr32 = ADD32rr %0(tied-def 0), %1, debug-instr-number 2 + +With a substitution from "instruction number 1 operand 0" to "instruction number +2 operand 0" recorded in the MachineFunction. In LiveDebugValues, DBG_INSTR_REFs +will be mapped through the substitution table to find the most recent +instruction number / operand number of the value it refers to. + +Use MachineFunction::substituteDebugValuesForInst to automatically produce +substitutions between an old and new instruction. It assumes that any operand +that is a def in the old instruction is a def in the new instruction at the +same operand position. This works most of the time, for example in the example +above. + +If operand numbers do not line up between the old and new instruction, use +MachineInstr::getDebugInstrNum to acquire the instruction number for the new +instruction, and MachineFunction::makeDebugValueSubstitution to record the +mapping between register definitions in the old and new instructions. If some +values computed by the old instruction are no longer computed by the new +instruction, record no substitution -- LiveDebugValues will safely drop the +now unavailable variable value. + +Should your target clone instructions, much the same as the TailDuplicator +optimisation pass, do not attempt to preserve the instruction numbers or +record any substitutions. MachineFunction::CloneMachineInstr should drop the +instruction number of any cloned instruction, to avoid duplicate numbers +appearing to LiveDebugValues. Dealing with duplicated instructions is a +natural extension to instruction referencing that's currently unimplemented. + +[LiveDebugValues]: SourceLevelDebugging.html#livedebugvalues-expansion-of-variable-locations diff --git a/llvm/docs/MIRLangRef.rst b/llvm/docs/MIRLangRef.rst index e977b3b161e9db..e7b368d38f2b9d 100644 --- a/llvm/docs/MIRLangRef.rst +++ b/llvm/docs/MIRLangRef.rst @@ -911,5 +911,6 @@ The additional operands to ``DBG_INSTR_REF`` are identical to ``DBG_VALUE``, and the ``DBG_INSTR_REF`` s position records where the variable takes on the designated value in the same way. -More information about how these constructs are used will appear on the source -level debugging page in due course, see also :doc:`SourceLevelDebugging` and :doc:`HowToUpdateDebugInfo`. +More information about how these constructs are used is available in +:doc:`InstrRefDebugInfo`. The related documents :doc:`SourceLevelDebugging` and +:doc:`HowToUpdateDebugInfo` may be useful as well. diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index c344c6e6b7c036..efee7d3ba74571 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -38,6 +38,7 @@ intermediate LLVM representation. HowToCrossCompileBuiltinsOnArm HowToCrossCompileLLVM HowToUpdateDebugInfo + InstrRefDebugInfo LinkTimeOptimization LoopTerminology MarkdownQuickstartTemplate @@ -160,6 +161,15 @@ Optimizations This document describes the design and philosophy behind the LLVM source-level debugger. +:doc:`How to Update Debug Info ` + This document specifies how to correctly update debug info in various kinds + of code transformations. + +:doc:`InstrRefDebugInfo` + This document explains how LLVM uses value tracking, or instruction + referencing, to determine variable locations for debug info in the final + stages of compilation. + Code Generation --------------- From 0ef7ca86cf1e28d91113dcf834a61ad155dbb7cb Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Fri, 20 May 2022 06:20:13 -0700 Subject: [PATCH 083/908] Fix test from df2a4e to work with 32 bit windows targets. 32 Bit windows includes attribute 'thiscall' on member functions as a default calling convention. This test was not written in a way that works with that, so added wildcards so it is tolerant of it. --- clang/test/SemaCXX/co_await-ast.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/test/SemaCXX/co_await-ast.cpp b/clang/test/SemaCXX/co_await-ast.cpp index 1221e8623c2613..1145be364f51c7 100644 --- a/clang/test/SemaCXX/co_await-ast.cpp +++ b/clang/test/SemaCXX/co_await-ast.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -std=c++20 -fsyntax-only -ast-dump -ast-dump-filter=foo %s | FileCheck %s --strict-whitespace +// RUN: %clang_cc1 -std=c++20 -triple i386-windows-pc -fsyntax-only -ast-dump -ast-dump-filter=foo %s | FileCheck %s --strict-whitespace namespace std { template struct coroutine_traits; @@ -46,13 +47,13 @@ awaitable foo() { // CHECK: |-CompoundStmt {{.*}} // CHECK: | `-ExprWithCleanups {{.*}} 'void' // CHECK: | `-CoawaitExpr {{.*}} 'void' -// CHECK: | |-CXXTemporaryObjectExpr {{.*}} 'executor' 'void () noexcept' zeroing +// CHECK: | |-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing // CHECK: | |-MaterializeTemporaryExpr {{.*}} 'awaitable_frame::result_t' lvalue // CHECK: | | `-CXXBindTemporaryExpr {{.*}} 'awaitable_frame::result_t' (CXXTemporary {{.*}}) // CHECK: | | `-CXXMemberCallExpr {{.*}} 'awaitable_frame::result_t' // CHECK: | | |-MemberExpr {{.*}} '' .await_transform {{.*}} // CHECK: | | | `-DeclRefExpr {{.*}} 'std::coroutine_traits::promise_type':'awaitable_frame' lvalue Var {{.*}} '__promise' 'std::coroutine_traits::promise_type':'awaitable_frame' -// CHECK: | | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void () noexcept' zeroing +// CHECK: | | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing // CHECK: | |-ExprWithCleanups {{.*}} 'bool' // CHECK: | | `-CXXMemberCallExpr {{.*}} 'bool' // CHECK: | | `-MemberExpr {{.*}} '' .await_ready {{.*}} @@ -63,7 +64,7 @@ awaitable foo() { // CHECK: | | `-CXXMemberCallExpr {{.*}} 'awaitable_frame::result_t' // CHECK: | | |-MemberExpr {{.*}} '' .await_transform {{.*}} // CHECK: | | | `-DeclRefExpr {{.*}} 'std::coroutine_traits::promise_type':'awaitable_frame' lvalue Var {{.*}} '__promise' 'std::coroutine_traits::promise_type':'awaitable_frame' -// CHECK: | | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void () noexcept' zeroing +// CHECK: | | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing // CHECK: | |-ExprWithCleanups {{.*}} 'void' // CHECK: | | `-CXXMemberCallExpr {{.*}} 'void' // CHECK: | | |-MemberExpr {{.*}} '' .await_suspend {{.*}} @@ -73,9 +74,9 @@ awaitable foo() { // CHECK: | | | `-CXXMemberCallExpr {{.*}} 'awaitable_frame::result_t' // CHECK: | | | |-MemberExpr {{.*}} '' .await_transform {{.*}} // CHECK: | | | | `-DeclRefExpr {{.*}} 'std::coroutine_traits::promise_type':'awaitable_frame' lvalue Var {{.*}} '__promise' 'std::coroutine_traits::promise_type':'awaitable_frame' -// CHECK: | | | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void () noexcept' zeroing +// CHECK: | | | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing // CHECK: | | `-ImplicitCastExpr {{.*}} 'std::coroutine_handle':'std::coroutine_handle' -// CHECK: | | `-CXXConstructExpr {{.*}} 'std::coroutine_handle':'std::coroutine_handle' 'void (coroutine_handle &&) noexcept' +// CHECK: | | `-CXXConstructExpr {{.*}} 'std::coroutine_handle':'std::coroutine_handle' 'void (coroutine_handle &&){{.*}} noexcept' // CHECK: | | `-MaterializeTemporaryExpr {{.*}} 'std::coroutine_handle' xvalue // CHECK: | | `-CallExpr {{.*}} 'std::coroutine_handle' // CHECK: | | |-ImplicitCastExpr {{.*}} 'std::coroutine_handle (*)(void *) noexcept' @@ -92,6 +93,6 @@ awaitable foo() { // CHECK: | `-CXXMemberCallExpr {{.*}} 'awaitable_frame::result_t' // CHECK: | |-MemberExpr {{.*}} '' .await_transform {{.*}} // CHECK: | | `-DeclRefExpr {{.*}} 'std::coroutine_traits::promise_type':'awaitable_frame' lvalue Var {{.*}} '__promise' 'std::coroutine_traits::promise_type':'awaitable_frame' -// CHECK: | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void () noexcept' zeroing +// CHECK: | `-CXXTemporaryObjectExpr {{.*}} 'executor' 'void (){{.*}} noexcept' zeroing // Rest of the generated coroutine statements omitted. From e3f990b4e997a10aa3112ee8dff786e46ae9e12f Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 20 May 2022 09:35:44 -0400 Subject: [PATCH 084/908] [libc++abi] Add missing XFAIL on test --- libcxxabi/test/native/AArch64/ra_sign_state.pass.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libcxxabi/test/native/AArch64/ra_sign_state.pass.cpp b/libcxxabi/test/native/AArch64/ra_sign_state.pass.cpp index 1e09c936d664b5..35c05fc40efd4b 100644 --- a/libcxxabi/test/native/AArch64/ra_sign_state.pass.cpp +++ b/libcxxabi/test/native/AArch64/ra_sign_state.pass.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: linux && target={{aarch64-.+}} +// XFAIL: no-exceptions // This test ensures the .cfi_negate_ra_state the RA_SIGN_STATE pseudo register // could be set directly set by a DWARF expression and the unwinder handles it From 422ec524388b524f48d49fccd436105bb848f833 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 19 May 2022 11:16:37 -0400 Subject: [PATCH 085/908] [libunwind] Introduce a cmake-bridge.cfg.in file to reduce test config duplication Differential Revision: https://reviews.llvm.org/D125981 --- libcxx/test/configs/cmake-bridge.cfg.in | 3 +- libunwind/test/CMakeLists.txt | 4 ++ libunwind/test/configs/cmake-bridge.cfg.in | 37 ++++++++++++++++++ .../test/configs/llvm-libunwind-merged.cfg.in | 30 ++------------- .../test/configs/llvm-libunwind-shared.cfg.in | 34 ++--------------- .../test/configs/llvm-libunwind-static.cfg.in | 38 +++---------------- 6 files changed, 55 insertions(+), 91 deletions(-) create mode 100644 libunwind/test/configs/cmake-bridge.cfg.in diff --git a/libcxx/test/configs/cmake-bridge.cfg.in b/libcxx/test/configs/cmake-bridge.cfg.in index 05ac7e594aea24..df58522d6cbb2f 100644 --- a/libcxx/test/configs/cmake-bridge.cfg.in +++ b/libcxx/test/configs/cmake-bridge.cfg.in @@ -2,8 +2,6 @@ @SERIALIZED_LIT_PARAMS@ -import shlex - # # This file performs the bridge between the CMake configuration and the Lit # configuration files by setting up the LitConfig object and various Lit @@ -25,6 +23,7 @@ config.recursiveExpansionLimit = 10 config.test_exec_root = os.path.join('@CMAKE_BINARY_DIR@', 'test') # Add substitutions for bootstrapping the test suite configuration +import shlex config.substitutions.append(('%{cxx}', shlex.quote('@CMAKE_CXX_COMPILER@'))) config.substitutions.append(('%{libcxx}', '@LIBCXX_SOURCE_DIR@')) config.substitutions.append(('%{include}', '@LIBCXX_GENERATED_INCLUDE_DIR@')) diff --git a/libunwind/test/CMakeLists.txt b/libunwind/test/CMakeLists.txt index 736b692969e732..94fc9c35958687 100644 --- a/libunwind/test/CMakeLists.txt +++ b/libunwind/test/CMakeLists.txt @@ -48,6 +48,10 @@ foreach(param IN LISTS LIBUNWIND_TEST_PARAMS) serialize_lit_param("${name}" "\"${value}\"") endforeach() +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/configs/cmake-bridge.cfg.in" + "${CMAKE_CURRENT_BINARY_DIR}/cmake-bridge.cfg" + @ONLY) + configure_lit_site_cfg( "${LIBUNWIND_TEST_CONFIG}" ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg diff --git a/libunwind/test/configs/cmake-bridge.cfg.in b/libunwind/test/configs/cmake-bridge.cfg.in new file mode 100644 index 00000000000000..21768702f6d84d --- /dev/null +++ b/libunwind/test/configs/cmake-bridge.cfg.in @@ -0,0 +1,37 @@ +@AUTO_GEN_COMMENT@ + +@SERIALIZED_LIT_PARAMS@ + +# +# This file performs the bridge between the CMake configuration and the Lit +# configuration files by setting up the LitConfig object and various Lit +# substitutions from CMake variables. +# +# Individual configuration files can take advantage of this bridge by +# loading the file and then setting up the remaining Lit substitutions. +# + +import os, site +site.addsitedir(os.path.join('@LIBUNWIND_LIBCXX_PATH@', 'utils')) +import libcxx.test.format + +# Basic configuration of the test suite +config.name = os.path.basename('@LIBUNWIND_TEST_CONFIG@') +config.test_source_root = os.path.join('@LIBUNWIND_SOURCE_DIR@', 'test') +config.test_format = libcxx.test.format.CxxStandardLibraryTest() +config.recursiveExpansionLimit = 10 +config.test_exec_root = '@CMAKE_BINARY_DIR@' +config.target_info = "@LIBUNWIND_TARGET_INFO@" + +# Add a few features that are common to all the configurations +if @LIBUNWIND_USES_ARM_EHABI@: + config.available_features.add('libunwind-arm-ehabi') +if not @LIBUNWIND_ENABLE_THREADS@: + config.available_features.add('libunwind-no-threads') + +# Add substitutions for bootstrapping the test suite configuration +import shlex +config.substitutions.append(('%{cxx}', shlex.quote('@CMAKE_CXX_COMPILER@'))) +config.substitutions.append(('%{executor}', '@LIBUNWIND_EXECUTOR@')) +config.substitutions.append(('%{include}', '@LIBUNWIND_SOURCE_DIR@/include')) +config.substitutions.append(('%{lib}', '@LIBUNWIND_LIBRARY_DIR@')) diff --git a/libunwind/test/configs/llvm-libunwind-merged.cfg.in b/libunwind/test/configs/llvm-libunwind-merged.cfg.in index 76b572c4cdbf79..d01ee7470de7bb 100644 --- a/libunwind/test/configs/llvm-libunwind-merged.cfg.in +++ b/libunwind/test/configs/llvm-libunwind-merged.cfg.in @@ -1,31 +1,12 @@ -@AUTO_GEN_COMMENT@ - -@SERIALIZED_LIT_PARAMS@ - # # Configuration file for running the libunwind tests against a libc++ shared library # into which the unwinder was merged. # -import os, site -site.addsitedir(os.path.join('@LIBUNWIND_LIBCXX_PATH@', 'utils')) -import libcxx.test.format - -# Basic configuration of the test suite -config.name = os.path.basename('@LIBUNWIND_TEST_CONFIG@') -config.test_source_root = os.path.join('@LIBUNWIND_SOURCE_DIR@', 'test') -config.test_format = libcxx.test.format.CxxStandardLibraryTest() -config.recursiveExpansionLimit = 10 -config.test_exec_root = '@CMAKE_BINARY_DIR@' -config.target_info = "@LIBUNWIND_TARGET_INFO@" +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') compile_flags = [] link_flags = [] -if @LIBUNWIND_USES_ARM_EHABI@: - config.available_features.add('libunwind-arm-ehabi') - -if not @LIBUNWIND_ENABLE_THREADS@: - config.available_features.add('libunwind-no-threads') if @LIBUNWIND_ENABLE_CET@: compile_flags.append('-fcf-protection=full') @@ -36,21 +17,18 @@ if '@CMAKE_SYSTEM_NAME@' == 'Linux': # Stack unwinding tests need unwinding tables and these are not generated by default on all targets. compile_flags.append('-funwind-tables') -config.substitutions.append(('%{executor}', '@LIBUNWIND_EXECUTOR@')) - -config.substitutions.append(('%{cxx}', '@CMAKE_CXX_COMPILER@')) local_sysroot = '@CMAKE_OSX_SYSROOT@' or '@CMAKE_SYSROOT@' config.substitutions.append(('%{flags}', '-isysroot {}'.format(local_sysroot) if local_sysroot else '' )) config.substitutions.append(('%{compile_flags}', - '-nostdinc++ -I {}/include {}'.format('@LIBUNWIND_SOURCE_DIR@', ' '.join(compile_flags)) + '-nostdinc++ -I %{{include}} {}'.format(' '.join(compile_flags)) )) config.substitutions.append(('%{link_flags}', - '-L {0} -Wl,-rpath,{0} -lc++ -ldl {1}'.format('@LIBUNWIND_LIBRARY_DIR@', ' '.join(link_flags)) + '-L %{{lib}} -Wl,-rpath,%{{lib}} -lc++ -ldl {}'.format(' '.join(link_flags)) )) config.substitutions.append(('%{exec}', - '%{executor} --execdir %T -- ' if '@LIBUNWIND_EXECUTOR@' else '' + '%{executor} --execdir %T -- ' )) import os, site diff --git a/libunwind/test/configs/llvm-libunwind-shared.cfg.in b/libunwind/test/configs/llvm-libunwind-shared.cfg.in index 11c9faedf0b847..b82eec71b02ff0 100644 --- a/libunwind/test/configs/llvm-libunwind-shared.cfg.in +++ b/libunwind/test/configs/llvm-libunwind-shared.cfg.in @@ -1,34 +1,11 @@ -@AUTO_GEN_COMMENT@ - -@SERIALIZED_LIT_PARAMS@ - # # Configuration file for running the libunwind tests against the shared library. # -# This file is a lot simpler than the ones for libc++ and libc++abi because -# while libunwind is written in C++, it doesn't use the C++ Standard Library -# so we don't need to set that up to run the tests correctly. -# - -import os, site -site.addsitedir(os.path.join('@LIBUNWIND_LIBCXX_PATH@', 'utils')) -import libcxx.test.format -# Basic configuration of the test suite -config.name = os.path.basename('@LIBUNWIND_TEST_CONFIG@') -config.test_source_root = os.path.join('@LIBUNWIND_SOURCE_DIR@', 'test') -config.test_format = libcxx.test.format.CxxStandardLibraryTest() -config.recursiveExpansionLimit = 10 -config.test_exec_root = '@CMAKE_BINARY_DIR@' -config.target_info = "@LIBUNWIND_TARGET_INFO@" +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') compile_flags = [] link_flags = [] -if @LIBUNWIND_USES_ARM_EHABI@: - config.available_features.add('libunwind-arm-ehabi') - -if not @LIBUNWIND_ENABLE_THREADS@: - config.available_features.add('libunwind-no-threads') if @LIBUNWIND_ENABLE_CET@: compile_flags.append('-fcf-protection=full') @@ -39,21 +16,18 @@ if '@CMAKE_SYSTEM_NAME@' == 'Linux': # Stack unwinding tests need unwinding tables and these are not generated by default on all targets. compile_flags.append('-funwind-tables') -config.substitutions.append(('%{executor}', '@LIBUNWIND_EXECUTOR@')) - -config.substitutions.append(('%{cxx}', '@CMAKE_CXX_COMPILER@')) local_sysroot = '@CMAKE_OSX_SYSROOT@' or '@CMAKE_SYSROOT@' config.substitutions.append(('%{flags}', '-isysroot {}'.format(local_sysroot) if local_sysroot else '' )) config.substitutions.append(('%{compile_flags}', - '-nostdinc++ -I {}/include {}'.format('@LIBUNWIND_SOURCE_DIR@', ' '.join(compile_flags)) + '-nostdinc++ -I %{{include}} {}'.format(' '.join(compile_flags)) )) config.substitutions.append(('%{link_flags}', - '-L {0} -Wl,-rpath,{0} -lunwind -ldl {1}'.format('@LIBUNWIND_LIBRARY_DIR@', ' '.join(link_flags)) + '-L %{{lib}} -Wl,-rpath,%{{lib}} -lunwind -ldl {}'.format(' '.join(link_flags)) )) config.substitutions.append(('%{exec}', - '%{executor} --execdir %T -- ' if '@LIBUNWIND_EXECUTOR@' else '' + '%{executor} --execdir %T -- ' )) import os, site diff --git a/libunwind/test/configs/llvm-libunwind-static.cfg.in b/libunwind/test/configs/llvm-libunwind-static.cfg.in index ce5a6c82caa067..a947a5d08f58bb 100644 --- a/libunwind/test/configs/llvm-libunwind-static.cfg.in +++ b/libunwind/test/configs/llvm-libunwind-static.cfg.in @@ -1,36 +1,13 @@ -@AUTO_GEN_COMMENT@ - -@SERIALIZED_LIT_PARAMS@ - # # Configuration file for running the libunwind tests against the static library. # -# This file is a lot simpler than the ones for libc++ and libc++abi because -# while libunwind is written in C++, it doesn't use the C++ Standard Library -# so we don't need to set that up to run the tests correctly. -# - -import os, site -site.addsitedir(os.path.join('@LIBUNWIND_LIBCXX_PATH@', 'utils')) -import libcxx.test.format -# Basic configuration of the test suite -config.name = os.path.basename('@LIBUNWIND_TEST_CONFIG@') -config.test_source_root = os.path.join('@LIBUNWIND_SOURCE_DIR@', 'test') -config.test_format = libcxx.test.format.CxxStandardLibraryTest() -config.recursiveExpansionLimit = 10 -config.test_exec_root = '@CMAKE_BINARY_DIR@' -config.target_info = "@LIBUNWIND_TARGET_INFO@" +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') compile_flags = [] link_flags = [] -if @LIBUNWIND_USES_ARM_EHABI@: - config.available_features.add('libunwind-arm-ehabi') - -if not @LIBUNWIND_ENABLE_THREADS@: - config.available_features.add('libunwind-no-threads') -else: +if @LIBUNWIND_ENABLE_THREADS@: link_flags.append('-lpthread') if @LIBUNWIND_ENABLE_CET@: @@ -39,26 +16,21 @@ if @LIBUNWIND_ENABLE_CET@: if '@CMAKE_SYSTEM_NAME@' == 'Linux': link_flags.append('-Wl,--export-dynamic') - # Stack unwinding tests need unwinding tables and these are not generated by default on all targets. compile_flags.append('-funwind-tables') -config.substitutions.append(('%{executor}', '@LIBUNWIND_EXECUTOR@')) - -config.substitutions.append(('%{cxx}', '@CMAKE_CXX_COMPILER@')) - local_sysroot = '@CMAKE_OSX_SYSROOT@' or '@CMAKE_SYSROOT@' config.substitutions.append(('%{flags}', '-isysroot {}'.format(local_sysroot) if local_sysroot else '' )) config.substitutions.append(('%{compile_flags}', - '-nostdinc++ -I {}/include {}'.format('@LIBUNWIND_SOURCE_DIR@', ' '.join(compile_flags)) + '-nostdinc++ -I %{{include}} {}'.format(' '.join(compile_flags)) )) config.substitutions.append(('%{link_flags}', - '{}/libunwind.a -ldl {}'.format('@LIBUNWIND_LIBRARY_DIR@', ' '.join(link_flags)) + '%{{lib}}/libunwind.a -ldl {}'.format(' '.join(link_flags)) )) config.substitutions.append(('%{exec}', - '%{executor} --execdir %T -- ' if '@LIBUNWIND_EXECUTOR@' else '' + '%{executor} --execdir %T -- ' )) import os, site From 1c4b31c38b3c36d6e6bbb071ce66abfa52d64c10 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 18 May 2022 13:17:14 -0400 Subject: [PATCH 086/908] [libc++] Improve error messages for disabled modes We should not surface CMake-level options like LIBCXX_ENABLE_FILESYSTEM to our users, since they don't know what it means. Instead, use a slightly more general wording. Also, add an error in to improve the quality of errors for people trying to use when localization is disabled. Differential Revision: https://reviews.llvm.org/D125910 --- libcxx/include/barrier | 2 +- libcxx/include/filesystem | 2 +- libcxx/include/future | 6 ++---- libcxx/include/ios | 7 ++++++- libcxx/include/latch | 2 +- libcxx/include/locale.h | 2 +- libcxx/include/semaphore | 2 +- libcxx/include/shared_mutex | 6 ++---- libcxx/include/thread | 6 ++---- 9 files changed, 17 insertions(+), 18 deletions(-) diff --git a/libcxx/include/barrier b/libcxx/include/barrier index 6f8a1f9f38be3b..36ea6f589d7e6e 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -59,7 +59,7 @@ namespace std #endif #ifdef _LIBCPP_HAS_NO_THREADS -# error is not supported on this single threaded system +# error " is not supported since libc++ has been configured without support for threads." #endif _LIBCPP_PUSH_MACROS diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index c23cac9828b18a..fd20ec44576bba 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -261,7 +261,7 @@ inline constexpr bool std::ranges::enable_view #if defined(_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY) -# error "The Filesystem library is not supported since libc++ has been configured with LIBCXX_ENABLE_FILESYSTEM disabled" +# error "The library is not supported since libc++ has been configured without support for a filesystem." #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/future b/libcxx/include/future index b39747792261dd..4f49bf158bacd4 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -383,8 +383,8 @@ template struct uses_allocator, Alloc>; #endif #ifdef _LIBCPP_HAS_NO_THREADS -#error is not supported on this single threaded system -#else // !_LIBCPP_HAS_NO_THREADS +# error " is not supported since libc++ has been configured without support for threads." +#endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -2432,6 +2432,4 @@ future::share() _NOEXCEPT _LIBCPP_END_NAMESPACE_STD -#endif // !_LIBCPP_HAS_NO_THREADS - #endif // _LIBCPP_FUTURE diff --git a/libcxx/include/ios b/libcxx/include/ios index a790ba5c177581..d1b8c43a8ac9c4 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -210,8 +210,13 @@ storage-class-specifier const error_category& iostream_category() noexcept; */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> + +#if defined(_LIBCPP_HAS_NO_LOCALIZATION) +# error "The iostreams library is not supported since libc++ has been configured without support for localization." +#endif + +#include <__assert> // all public C++ headers provide the assertion handler #include <__ios/fpos.h> #include <__locale> #include <__utility/swap.h> diff --git a/libcxx/include/latch b/libcxx/include/latch index d6cb47622268b5..21abbad2171fbc 100644 --- a/libcxx/include/latch +++ b/libcxx/include/latch @@ -51,7 +51,7 @@ namespace std #endif #ifdef _LIBCPP_HAS_NO_THREADS -# error is not supported on this single threaded system +# error " is not supported since libc++ has been configured without support for threads." #endif _LIBCPP_PUSH_MACROS diff --git a/libcxx/include/locale.h b/libcxx/include/locale.h index 3a05c18abb7319..17c0a705a00996 100644 --- a/libcxx/include/locale.h +++ b/libcxx/include/locale.h @@ -36,7 +36,7 @@ #include <__config> #if defined(_LIBCPP_HAS_NO_LOCALIZATION) -# error "The Localization library is not supported since libc++ has been configured with LIBCXX_ENABLE_LOCALIZATION disabled" +# error " is not supported since libc++ has been configured without support for localization." #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore index b174eba34dbcd5..ce1fa6987c4a91 100644 --- a/libcxx/include/semaphore +++ b/libcxx/include/semaphore @@ -59,7 +59,7 @@ using binary_semaphore = counting_semaphore<1>; #endif #ifdef _LIBCPP_HAS_NO_THREADS -# error is not supported on this single threaded system +# error " is not supported since libc++ has been configured without support for threads." #endif _LIBCPP_PUSH_MACROS diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index 68a2bbae0f53aa..a089aa9fa8177a 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -140,8 +140,8 @@ _LIBCPP_PUSH_MACROS #endif #ifdef _LIBCPP_HAS_NO_THREADS -#error is not supported on this single threaded system -#else // !_LIBCPP_HAS_NO_THREADS +# error " is not supported since libc++ has been configured without support for threads." +#endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -501,8 +501,6 @@ swap(shared_lock<_Mutex>& __x, shared_lock<_Mutex>& __y) _NOEXCEPT _LIBCPP_END_NAMESPACE_STD -#endif // !_LIBCPP_HAS_NO_THREADS - #endif // _LIBCPP_STD_VER > 11 _LIBCPP_POP_MACROS diff --git a/libcxx/include/thread b/libcxx/include/thread index 837ee4a9972e7e..20708f119c9611 100644 --- a/libcxx/include/thread +++ b/libcxx/include/thread @@ -105,8 +105,8 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> #ifdef _LIBCPP_HAS_NO_THREADS -#error is not supported on this single threaded system -#else // !_LIBCPP_HAS_NO_THREADS +# error " is not supported since libc++ has been configured without support for threads." +#endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -401,8 +401,6 @@ void yield() _NOEXCEPT {__libcpp_thread_yield();} _LIBCPP_END_NAMESPACE_STD -#endif // !_LIBCPP_HAS_NO_THREADS - _LIBCPP_POP_MACROS #endif // _LIBCPP_THREAD From ae80024fbe51339afabfa2ff43ae532356fa3c93 Mon Sep 17 00:00:00 2001 From: Stephen Long Date: Fri, 20 May 2022 06:41:37 -0700 Subject: [PATCH 087/908] [clang] Honor __attribute__((no_builtin("foo"))) on functions Support for `__attribute__((no_builtin("foo")))` was added in https://reviews.llvm.org/D68028, but builtins were still being used even when the attribute was placed on a function. Reviewed By: hans Differential Revision: https://reviews.llvm.org/D124701 --- clang/docs/ReleaseNotes.rst | 4 ++ clang/include/clang/Basic/AttrDocs.td | 3 -- clang/lib/CodeGen/CGExpr.cpp | 16 ++++++- clang/test/CodeGen/no-builtin-2.c | 63 +++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGen/no-builtin-2.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 69b5dc1dd1fd39..5d1b12504f787b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -318,6 +318,10 @@ Attribute Changes in Clang - The ``__declspec(naked)`` attribute can no longer be written on a member function in Microsoft compatibility mode, matching the behavior of cl.exe. +- Attribute ``no_builtin`` should now affect the generated code. It now disables + builtins (corresponding to the specific names listed in the attribute) in the + body of the function the attribute is on. + Windows Support --------------- diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index d635da6b84b845..aedf11a7075300 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6000,9 +6000,6 @@ attribute `clang_builtin_alias`. def NoBuiltinDocs : Documentation { let Category = DocCatFunction; let Content = [{ -.. Note:: This attribute is not yet fully implemented, it is validated but has - no effect on the generated code. - The ``__attribute__((no_builtin))`` is similar to the ``-fno-builtin`` flag except it is specific to the body of a function. The attribute may also be applied to a virtual function but has no effect on the behavior of overriding diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index fbe1586001b192..3d7f13aed0abaa 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5034,7 +5034,16 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { const FunctionDecl *FD = cast(GD.getDecl()); if (auto builtinID = FD->getBuiltinID()) { + std::string NoBuiltinFD = ("no-builtin-" + FD->getName()).str(); + std::string NoBuiltins = "no-builtins"; std::string FDInlineName = (FD->getName() + ".inline").str(); + + bool IsPredefinedLibFunction = + CGF.getContext().BuiltinInfo.isPredefinedLibFunction(builtinID); + bool HasAttributeNoBuiltin = + CGF.CurFn->getAttributes().hasFnAttr(NoBuiltinFD) || + CGF.CurFn->getAttributes().hasFnAttr(NoBuiltins); + // When directing calling an inline builtin, call it through it's mangled // name to make it clear it's not the actual builtin. if (CGF.CurFn->getName() != FDInlineName && @@ -5054,8 +5063,11 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { // Replaceable builtins provide their own implementation of a builtin. If we // are in an inline builtin implementation, avoid trivial infinite - // recursion. - else + // recursion. Honor __attribute__((no_builtin("foo"))) or + // __attribute__((no_builtin)) on the current function unless foo is + // not a predefined library function which means we must generate the + // builtin no matter what. + else if (!IsPredefinedLibFunction || !HasAttributeNoBuiltin) return CGCallee::forBuiltin(builtinID, FD); } diff --git a/clang/test/CodeGen/no-builtin-2.c b/clang/test/CodeGen/no-builtin-2.c new file mode 100644 index 00000000000000..f236f29779e070 --- /dev/null +++ b/clang/test/CodeGen/no-builtin-2.c @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s + +typedef typeof(sizeof(0)) size_t; + +void bar(char *s); +void *memset(void *s, int c, size_t n); +void *memcpy(void *d, const void *s, size_t n); +void *memmove(void *d, const void *s, size_t n); + +// CHECK: define{{.*}} void @foo1({{.*}}) #[[NO_NOBUILTIN:[0-9]+]] +// CHECK: call void @bar +// CHECK: call void @llvm.memset +// CHECK: call void @llvm.memcpy +// CHECK: call void @llvm.memmove +void foo1(char *s, char *d, size_t n) { + bar(s); + memset(s, 0, n); + memcpy(d, s, n); + memmove(d, s, n); +} + +// CHECK: define{{.*}} void @foo2({{.*}}) #[[NOBUILTIN_MEMSET:[0-9]+]] +// CHECK: call void @bar +// CHECK: {{.*}}call {{.*}} @memset +// CHECK: call void @llvm.memcpy +// CHECK: call void @llvm.memmove +void foo2(char *s, char *d, size_t n) __attribute__((no_builtin("memset"))) { + bar(s); + memset(s, 1, n); + memcpy(d, s, n); + memmove(d, s, n); +} + +// CHECK: define{{.*}} void @foo3({{.*}}) #[[NOBUILTIN_MEMSET_MEMCPY:[0-9]+]] +// CHECK: call void @bar +// CHECK: {{.*}}call {{.*}} @memset +// CHECK: {{.*}}call {{.*}} @memcpy +// CHECK: call void @llvm.memmove +void foo3(char *s, char *d, size_t n) __attribute__((no_builtin("memset", "memcpy"))) { + bar(s); + memset(s, 2, n); + memcpy(d, s, n); + memmove(d, s, n); +} + +// CHECK: define{{.*}} void @foo4({{.*}}) #[[NOBUILTINS:[0-9]+]] +// CHECK: call void @bar +// CHECK: {{.*}}call {{.*}} @memset +// CHECK: {{.*}}call {{.*}} @memcpy +// CHECK: {{.*}}call {{.*}} @memmove +void foo4(char *s, char *d, size_t n) __attribute__((no_builtin)) { + bar(s); + memset(s, 2, n); + memcpy(d, s, n); + memmove(s, d, n); +} + +// CHECK-NOT: attributes #[[NO_NOBUILTIN]] = {{{.*}}"no-builtin-memset"{{.*}}} +// CHECK-NOT: attributes #[[NO_NOBUILTIN]] = {{{.*}}"no-builtin-memcpy"{{.*}}"no-builtin-memset"{{.*}}} +// CHECK-NOT: attributes #[[NO_NOBUILTIN]] = {{{.*}}"no-builtins"{{.*}}} +// CHECK: attributes #[[NOBUILTIN_MEMSET]] = {{{.*}}"no-builtin-memset"{{.*}}} +// CHECK: attributes #[[NOBUILTIN_MEMSET_MEMCPY]] = {{{.*}}"no-builtin-memcpy"{{.*}}"no-builtin-memset"{{.*}}} +// CHECK: attributes #[[NOBUILTINS]] = {{{.*}}"no-builtins"{{.*}}} From 5450db5f54b86f908ce67cb681fbdf86b88abc54 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 20 May 2022 16:06:46 +0200 Subject: [PATCH 088/908] [analyzer][NFC] Remove the unused LocAsInteger::getPersistentLoc() Reviewed By: martong Differential Revision: https://reviews.llvm.org/D125920 --- .../clang/StaticAnalyzer/Core/PathSensitive/SVals.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index f4b229070d67fb..60a59cbeecf9d9 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -432,13 +432,6 @@ class LocAsInteger : public NonLoc { return D->first.castAs(); } - Loc getPersistentLoc() const { - const std::pair *D = - static_cast *>(Data); - const SVal& V = D->first; - return V.castAs(); - } - unsigned getNumBits() const { const std::pair *D = static_cast *>(Data); From 749fb33e82ff19d656af9c9205f3ac81c1ce52d8 Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Fri, 20 May 2022 16:11:02 +0200 Subject: [PATCH 089/908] [clang-format] Don't break lines after pragma region We have autogenerated pragma regions in our code which where awkwardly broken up like this: ``` #pragma region foo(bar : hello) ``` becomes ``` #pragma region foo(bar \ : hello) ``` This fixes the problem by adding region as a keyword and handling it the same way as pragma mark Reviewed By: curdeius Differential Revision: https://reviews.llvm.org/D125961 --- clang/lib/Format/FormatToken.h | 2 ++ clang/lib/Format/TokenAnnotator.cpp | 4 ++-- clang/unittests/Format/FormatTest.cpp | 7 +++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 9990933fcb20fc..71acdf2f094bab 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -930,6 +930,7 @@ struct AdditionalKeywords { kw___has_include_next = &IdentTable.get("__has_include_next"); kw_mark = &IdentTable.get("mark"); + kw_region = &IdentTable.get("region"); kw_extend = &IdentTable.get("extend"); kw_option = &IdentTable.get("option"); @@ -1053,6 +1054,7 @@ struct AdditionalKeywords { // Pragma keywords. IdentifierInfo *kw_mark; + IdentifierInfo *kw_region; // Proto keywords. IdentifierInfo *kw_extend; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 68f35643bbf2f5..9afe4f8fb0868e 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1251,9 +1251,9 @@ class AnnotatingParser { void parsePragma() { next(); // Consume "pragma". if (CurrentToken && - CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) { + CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option, Keywords.kw_region)) { bool IsMark = CurrentToken->is(Keywords.kw_mark); - next(); // Consume "mark". + next(); next(); // Consume first token (so we fix leading whitespace). while (CurrentToken) { if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator)) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index e54a6db2ca46be..762ccf7dbd8d3a 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -19597,6 +19597,13 @@ TEST_F(FormatTest, UnderstandPragmaOption) { EXPECT_EQ("#pragma option -C -A", format("#pragma option -C -A")); } +TEST_F(FormatTest, UnderstandPragmaRegion) { + auto Style = getLLVMStyleWithColumns(0); + verifyFormat("#pragma region TEST(FOO : BAR)", Style); + + EXPECT_EQ("#pragma region TEST(FOO : BAR)", format("#pragma region TEST(FOO : BAR)", Style)); +} + TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { FormatStyle Style = getLLVMStyleWithColumns(20); From edc7a0814575002abecb8ac7b49c669bf5f5ab7a Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 20 May 2022 15:09:53 +0200 Subject: [PATCH 090/908] [clangd] Provide links to clang-tidy and include-cleaner diagnostic docs LSP supports Diagnostic.codeInformation since 3.16. In VSCode, this turns the code (e.g. "unused-includes" or "bugprone-foo") into a clickable link that opens the docs in a web browser. Differential Revision: https://reviews.llvm.org/D126065 --- clang-tools-extra/clangd/Diagnostics.cpp | 30 +++++++++++++++++++ clang-tools-extra/clangd/Diagnostics.h | 4 +++ clang-tools-extra/clangd/Protocol.cpp | 6 ++++ clang-tools-extra/clangd/Protocol.h | 12 +++++++- .../clangd/test/diagnostics-tidy.test | 3 ++ .../clangd/unittests/DiagnosticsTests.cpp | 6 +++- 6 files changed, 59 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp index 46a16a9f2eeb39..316bc7d1ac6668 100644 --- a/clang-tools-extra/clangd/Diagnostics.cpp +++ b/clang-tools-extra/clangd/Diagnostics.cpp @@ -477,6 +477,10 @@ void toLSPDiags( } Main.code = D.Name; + if (auto URI = getDiagnosticDocURI(D.Source, D.ID, D.Name)) { + Main.codeDescription.emplace(); + Main.codeDescription->href = std::move(*URI); + } switch (D.Source) { case Diag::Clang: Main.source = "clang"; @@ -903,5 +907,31 @@ llvm::StringRef normalizeSuppressedCode(llvm::StringRef Code) { return Code; } +llvm::Optional getDiagnosticDocURI(Diag::DiagSource Source, + unsigned ID, + llvm::StringRef Name) { + switch (Source) { + case Diag::Unknown: + break; + case Diag::Clang: + // There is a page listing many warning flags, but it provides too little + // information to be worth linking. + // https://clang.llvm.org/docs/DiagnosticsReference.html + break; + case Diag::ClangTidy: + return {("https://clang.llvm.org/extra/clang-tidy/checks/" + Name + ".html") + .str()}; + case Diag::Clangd: + if (Name == "unused-includes") + return {"https://clangd.llvm.org/guides/include-cleaner"}; + break; + case Diag::ClangdConfig: + // FIXME: we should link to https://clangd.llvm.org/config + // However we have no diagnostic codes, which the link should describe! + break; + } + return llvm::None; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Diagnostics.h b/clang-tools-extra/clangd/Diagnostics.h index 0c8af97e6695a1..14627ea3d6a017 100644 --- a/clang-tools-extra/clangd/Diagnostics.h +++ b/clang-tools-extra/clangd/Diagnostics.h @@ -126,6 +126,10 @@ CodeAction toCodeAction(const Fix &D, const URIForFile &File); /// Convert from clang diagnostic level to LSP severity. int getSeverity(DiagnosticsEngine::Level L); +/// Returns a URI providing more information about a particular diagnostic. +llvm::Optional getDiagnosticDocURI(Diag::DiagSource, unsigned ID, + llvm::StringRef Name); + /// StoreDiags collects the diagnostics that can later be reported by /// clangd. It groups all notes for a diagnostic into a single Diag /// and filters out diagnostics that don't mention the main file (i.e. neither diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp index 5f8e4938c65ad5..aed4ac09d4869c 100644 --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -592,6 +592,10 @@ llvm::json::Value toJSON(const DiagnosticRelatedInformation &DRI) { llvm::json::Value toJSON(DiagnosticTag Tag) { return static_cast(Tag); } +llvm::json::Value toJSON(const CodeDescription &D) { + return llvm::json::Object{{"href", D.href}}; +} + llvm::json::Value toJSON(const Diagnostic &D) { llvm::json::Object Diag{ {"range", D.range}, @@ -604,6 +608,8 @@ llvm::json::Value toJSON(const Diagnostic &D) { Diag["codeActions"] = D.codeActions; if (!D.code.empty()) Diag["code"] = D.code; + if (D.codeDescription.hasValue()) + Diag["codeDescription"] = *D.codeDescription; if (!D.source.empty()) Diag["source"] = D.source; if (D.relatedInformation) diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index 3f66cb2a5fcddc..fd3b671d15f31c 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -835,6 +835,13 @@ enum DiagnosticTag { }; llvm::json::Value toJSON(DiagnosticTag Tag); +/// Structure to capture a description for an error code. +struct CodeDescription { + /// An URI to open with more information about the diagnostic error. + std::string href; +}; +llvm::json::Value toJSON(const CodeDescription &); + struct CodeAction; struct Diagnostic { /// The range at which the message applies. @@ -847,6 +854,9 @@ struct Diagnostic { /// The diagnostic's code. Can be omitted. std::string code; + /// An optional property to describe the error code. + llvm::Optional codeDescription; + /// A human-readable string describing the source of this /// diagnostic, e.g. 'typescript' or 'super lint'. std::string source; @@ -874,7 +884,7 @@ struct Diagnostic { /// A data entry field that is preserved between a /// `textDocument/publishDiagnostics` notification - /// and`textDocument/codeAction` request. + /// and `textDocument/codeAction` request. /// Mutating users should associate their data with a unique key they can use /// to retrieve later on. llvm::json::Object data; diff --git a/clang-tools-extra/clangd/test/diagnostics-tidy.test b/clang-tools-extra/clangd/test/diagnostics-tidy.test index 1d10541be8bf07..c7e79b0c6d5f59 100644 --- a/clang-tools-extra/clangd/test/diagnostics-tidy.test +++ b/clang-tools-extra/clangd/test/diagnostics-tidy.test @@ -8,6 +8,9 @@ # CHECK-NEXT: "diagnostics": [ # CHECK-NEXT: { # CHECK-NEXT: "code": "bugprone-sizeof-expression", +# CHECK-NEXT: "codeDescription": { +# CHECK-NEXT: "href": "https://clang.llvm.org/extra/clang-tidy/checks/bugprone-sizeof-expression.html" +# CHECK-NEXT: }, # CHECK-NEXT: "message": "Suspicious usage of 'sizeof(K)'; did you mean 'K'?", # CHECK-NEXT: "range": { # CHECK-NEXT: "end": { diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index d83f43d83f3fe5..45ceee01ea9a1c 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -1828,13 +1828,17 @@ TEST(DiagnosticsTest, IncludeCleaner) { Cfg.Diagnostics.Includes.IgnoreHeader.emplace_back( [](llvm::StringRef Header) { return Header.endswith("ignore.h"); }); WithContextValue WithCfg(Config::Key, std::move(Cfg)); + auto AST = TU.build(); EXPECT_THAT( - *TU.build().getDiagnostics(), + *AST.getDiagnostics(), UnorderedElementsAre(AllOf( Diag(Test.range("diag"), "included header unused.h is not used directly"), withTag(DiagnosticTag::Unnecessary), diagSource(Diag::Clangd), withFix(Fix(Test.range("fix"), "", "remove #include directive"))))); + auto &Diag = AST.getDiagnostics()->front(); + EXPECT_EQ(getDiagnosticDocURI(Diag.Source, Diag.ID, Diag.Name), + std::string("https://clangd.llvm.org/guides/include-cleaner")); Cfg.Diagnostics.SuppressAll = true; WithContextValue SuppressAllWithCfg(Config::Key, std::move(Cfg)); EXPECT_THAT(*TU.build().getDiagnostics(), IsEmpty()); From 0443bfabe7ba0bc5365a34de14ab8d108d2f0cd0 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Fri, 20 May 2022 14:37:17 +0000 Subject: [PATCH 091/908] Revert "[libc] Apply no-builtin everywhere, remove unnecessary flags" This reverts commit 94d6dd90576637fa0eb2c40ca92320ad4c1a6942. --- libc/cmake/modules/LLVMLibCObjectRules.cmake | 2 +- libc/src/string/CMakeLists.txt | 14 +++++++ .../llvm-project-overlay/libc/BUILD.bazel | 37 +++++++++++++++---- .../libc/libc_build_rules.bzl | 1 - 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake index 2544a648c8a590..8e2f489752a641 100644 --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -3,7 +3,7 @@ set(OBJECT_LIBRARY_TARGET_TYPE "OBJECT_LIBRARY") function(_get_common_compile_options output_var) set(compile_options ${LLVM_CXX_STD_default} ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN}) if(NOT ${LIBC_TARGET_OS} STREQUAL "windows") - set(compile_options ${compile_options} -fpie -ffreestanding -fno-builtin) + set(compile_options ${compile_options} -fpie -ffreestanding) endif() if(LLVM_COMPILER_IS_GCC_COMPATIBLE) list(APPEND compile_options "-fno-exceptions") diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 68962e21c15d10..c4f45e63311843 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -279,6 +279,7 @@ function(add_implementation name impl_name) ${ARGN}) if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + list(APPEND ADD_IMPL_MLLVM_COMPILE_OPTIONS "-combiner-global-alias-analysis") # Note that '-mllvm' needs to be prefixed with 'SHELL:' to prevent CMake flag deduplication. foreach(opt IN LISTS ADD_IMPL_MLLVM_COMPILE_OPTIONS) list(APPEND ADD_IMPL_COMPILE_OPTIONS "SHELL:-mllvm ${opt}") @@ -308,6 +309,9 @@ function(add_bcmp bcmp_name) DEPENDS .memory_utils.memory_utils libc.include.string + COMPILE_OPTIONS + -fno-builtin-memcmp + -fno-builtin-bcmp ${ARGN} ) endfunction() @@ -335,6 +339,8 @@ function(add_bzero bzero_name) DEPENDS .memory_utils.memset_implementation libc.include.string + COMPILE_OPTIONS + -fno-builtin-bzero ${ARGN} ) endfunction() @@ -362,6 +368,8 @@ function(add_memcmp memcmp_name) DEPENDS .memory_utils.memcmp_implementation libc.include.string + COMPILE_OPTIONS + -fno-builtin-memcmp ${ARGN} ) endfunction() @@ -392,6 +400,8 @@ function(add_memcpy memcpy_name) DEPENDS .memory_utils.memcpy_implementation libc.include.string + COMPILE_OPTIONS + -fno-builtin-memcpy ${ARGN} ) endfunction() @@ -424,6 +434,8 @@ function(add_memmove memmove_name) DEPENDS .memory_utils.memory_utils libc.include.string + COMPILE_OPTIONS + -fno-builtin ${ARGN} ) endfunction() @@ -456,6 +468,8 @@ function(add_memset memset_name) DEPENDS .memory_utils.memset_implementation libc.include.string + COMPILE_OPTIONS + -fno-builtin-memset ${ARGN} ) endfunction() diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index c23a6fe53b7fd5..5f13a26c9fd8ab 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -85,10 +85,7 @@ cc_library( hdrs = [ "src/__support/CPP/UInt.h", ], - deps = [ - "__support_cpp_array", - ":libc_root", - ], + deps = [":libc_root","__support_cpp_array"], ) cc_library( @@ -96,10 +93,7 @@ cc_library( hdrs = [ "src/__support/CPP/TypeTraits.h", ], - deps = [ - "__support_cpp_uint", - ":libc_root", - ], + deps = [":libc_root","__support_cpp_uint"], ) cc_library( @@ -812,6 +806,12 @@ libc_function( name = "memcpy", srcs = ["src/string/memcpy.cpp"], hdrs = ["src/string/memcpy.h"], + copts = [ + "-fno-builtin-memcpy", + "-fno-builtin-memmove", + "-mllvm -combiner-global-alias-analysis", + "-mllvm --tail-merge-threshold=0", + ], features = no_sanitize_features, deps = [ ":__support_common", @@ -823,6 +823,10 @@ libc_function( name = "memset", srcs = ["src/string/memset.cpp"], hdrs = ["src/string/memset.h"], + copts = [ + "-fno-builtin-memset", + "-mllvm -combiner-global-alias-analysis", + ], features = no_sanitize_features, deps = [ ":__support_common", @@ -834,6 +838,10 @@ libc_function( name = "memmove", srcs = ["src/string/memmove.cpp"], hdrs = ["src/string/memmove.h"], + copts = [ + "-fno-builtin-memmove", + "-mllvm -combiner-global-alias-analysis", + ], features = no_sanitize_features, deps = [ ":__support_common", @@ -847,6 +855,10 @@ libc_function( name = "memcmp", srcs = ["src/string/memcmp.cpp"], hdrs = ["src/string/memcmp.h"], + copts = [ + "-fno-builtin-memcmp", + "-mllvm -combiner-global-alias-analysis", + ], features = no_sanitize_features, deps = [ ":__support_common", @@ -859,6 +871,10 @@ libc_function( name = "bcmp", srcs = ["src/string/bcmp.cpp"], hdrs = ["src/string/bcmp.h"], + copts = [ + "-fno-builtin-bcmp", + "-fno-builtin-memcmp", + ], features = no_sanitize_features, deps = [ ":__support_common", @@ -870,6 +886,11 @@ libc_function( name = "bzero", srcs = ["src/string/bzero.cpp"], hdrs = ["src/string/bzero.h"], + copts = [ + "-fno-builtin-bzero", + "-fno-builtin-memset", + "-mllvm -combiner-global-alias-analysis", + ], features = no_sanitize_features, deps = [ ":__support_common", diff --git a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl index 8b4882d0c504db..8e86e8f8a9940a 100644 --- a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl +++ b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl @@ -31,7 +31,6 @@ def libc_function(name, srcs, deps = None, copts = None, **kwargs): deps.append(LIBC_ROOT_TARGET) copts = copts or [] copts.append("-O3") - copts.append("-fno-builtin") # We compile the code twice, the first target is suffixed with ".__internal__" and contains the # C++ functions in the "__llvm_libc" namespace. This allows us to test the function in the From 64748efc27406244496e0c2ee6714e76b794c277 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 19 May 2022 13:10:37 +0200 Subject: [PATCH 092/908] [libc++] Use _LIBCPP_ASSERT by default for _PSTL_ASSERTions Reviewed By: ldionne, #libc Spies: jwakely, rodgert, libcxx-commits Differential Revision: https://reviews.llvm.org/D125634 --- pstl/include/pstl/internal/pstl_config.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pstl/include/pstl/internal/pstl_config.h b/pstl/include/pstl/internal/pstl_config.h index 80a1f9a882bb2c..ad0cb5d312b470 100644 --- a/pstl/include/pstl/internal/pstl_config.h +++ b/pstl/include/pstl/internal/pstl_config.h @@ -27,7 +27,12 @@ # define _PSTL_USAGE_WARNINGS #endif -#if !defined(_PSTL_ASSERT) +#if defined(_LIBCPP_VERSION) +# include <__assert> +# define _PSTL_ASSERT(pred) _LIBCPP_ASSERT(pred, "") +#elif defined(__GLIBCXX__) +# define _PSTL_ASSERT(pred) __glibcxx_assert(pred) +#else # include # define _PSTL_ASSERT(pred) (assert((pred))) #endif From 11a09af76d11ad5a9f1f95b561112af17ff81f80 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Fri, 20 May 2022 17:11:18 +0200 Subject: [PATCH 093/908] Fix an unused variable warning in no-asserts build mode --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ca9345f5233e46..ab556d699f1633 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6718,6 +6718,7 @@ static T *performExtractsShuffleAction( Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; } auto *V = ValueSelect::get(Base); + (void)V; assert((!V || GetVF(V) == Mask.size()) && "Expected base vector of VF number of elements."); Prev = Action(Mask, {nullptr, Res.first}); From 5b18ef7256a129273b6786545f8b66e928442e81 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 20 May 2022 13:06:03 +0100 Subject: [PATCH 094/908] [AMDGPU] Add verification for mandatory literals Extend the literal operand checking in SIInstrInfo::verifyInstruction to check VOP2 instructions like V_FMAAK_F32 which have a mandatory literal operand. The rule is that src0 can also be a literal, but only if it is the same literal value. AMDGPUAsmParser::validateConstantBusLimitations already handles this correctly. Differential Revision: https://reviews.llvm.org/D126063 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 +++++++--- llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll | 5 +++-- .../AMDGPU/verify-constant-bus-violations.mir | 15 +++++++++++---- .../CodeGen/AMDGPU/verify-duplicate-literal.mir | 9 +++++++++ 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index cb1a4a269c9468..0b0f2c4a64b194 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4252,8 +4252,12 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, bool UsesLiteral = false; const MachineOperand *LiteralVal = nullptr; - if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) + int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); + if (ImmIdx != -1) { ++ConstantBusCount; + UsesLiteral = true; + LiteralVal = &MI.getOperand(ImmIdx); + } SmallVector SGPRsUsed; Register SGPRUsed; @@ -4280,8 +4284,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, UsesLiteral = true; LiteralVal = &MO; } else if (!MO.isIdenticalTo(*LiteralVal)) { - assert(isVOP3(MI)); - ErrInfo = "VOP3 instruction uses more than one literal"; + assert(isVOP2(MI) || isVOP3(MI)); + ErrInfo = "VOP2/VOP3 instruction uses more than one literal"; return false; } } diff --git a/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll b/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll index bfa6f0c2598f61..8ac8b6b427989d 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX10 +; RUN: not --crash llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s 2>&1 | FileCheck %s -check-prefix GFX10 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) @@ -6,7 +6,8 @@ declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 ; FIXME: This instruction uses two different literal constants which is not ; allowed. ; GFX10-LABEL: _amdgpu_ps_main: -; GFX10: v_fmaak_f32 {{v[0-9]+}}, 0x40490fdb, {{v[0-9]+}}, 0xbfc90fdb +; GFX10: Bad machine code: VOP2/VOP3 instruction uses more than one literal +; GFX10: instruction: %4:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_FMAAK_F32 1078530011, %0:vgpr_32, -1077342245, implicit $mode, implicit $exec define amdgpu_ps void @_amdgpu_ps_main(float %arg) { bb: %i = fmul reassoc nnan nsz arcp contract afn float %arg, 0x400921FB60000000 diff --git a/llvm/test/CodeGen/AMDGPU/verify-constant-bus-violations.mir b/llvm/test/CodeGen/AMDGPU/verify-constant-bus-violations.mir index a036bf5c4807f0..a9e34c0279491c 100644 --- a/llvm/test/CodeGen/AMDGPU/verify-constant-bus-violations.mir +++ b/llvm/test/CodeGen/AMDGPU/verify-constant-bus-violations.mir @@ -1,21 +1,26 @@ -# RUN: not --crash llc -march=amdgcn -mcpu=gfx900 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX9-ERR %s -# RUN: not --crash llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX10PLUS-ERR %s -# RUN: not --crash llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX10PLUS-ERR %s +# RUN: not --crash llc -march=amdgcn -mcpu=gfx900 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" -check-prefix=GFX9-ERR %s +# RUN: not --crash llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" -check-prefix=GFX10PLUS-ERR %s +# RUN: not --crash llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" -check-prefix=GFX10PLUS-ERR %s # GFX9-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction *** # GFX9-ERR: $vgpr0 = V_CNDMASK_B32_e64 0, $sgpr0, 0, -1, killed $sgpr0_sgpr1, implicit $exec +# GFX9-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction *** +# GFX9-ERR: $vgpr0 = V_FMAAK_F32 $sgpr2, $vgpr0, 1077936128, implicit $mode, implicit $exec --- name: sgpr_reuse_2sgpr liveins: - { reg: '$sgpr0_sgpr1', virtual-reg: '' } body: | bb.0: - liveins: $sgpr0_sgpr1 + liveins: $sgpr0_sgpr1, $sgpr2 $vgpr0 = V_CNDMASK_B32_e64 0, $sgpr0, 0, -1, killed $sgpr0_sgpr1, implicit $exec + $vgpr0 = V_FMAAK_F32 $sgpr2, $vgpr0, 1077936128, implicit $mode, implicit $exec ... # GFX10PLUS-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction *** # GFX10PLUS-ERR: $vgpr0 = V_CNDMASK_B32_e64 0, $sgpr0, 0, $sgpr2, killed $sgpr0_sgpr1, implicit $exec +# GFX10PLUS-ERR: *** Bad machine code: VOP2/VOP3 instruction uses more than one literal *** +# GFX10PLUS-ERR: $vgpr0 = V_FMAAK_F32 1077936128, $vgpr0, 0, implicit $mode, implicit $exec --- name: sgpr_reuse_3sgpr liveins: @@ -25,4 +30,6 @@ body: | bb.0: liveins: $sgpr0_sgpr1, $sgpr2 $vgpr0 = V_CNDMASK_B32_e64 0, $sgpr0, 0, $sgpr2, killed $sgpr0_sgpr1, implicit $exec + $vgpr0 = V_FMAAK_F32 1077936128, $vgpr0, 0, implicit $mode, implicit $exec + $vgpr0 = V_FMAAK_F32 $sgpr2, $vgpr0, 1077936128, implicit $mode, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/verify-duplicate-literal.mir b/llvm/test/CodeGen/AMDGPU/verify-duplicate-literal.mir index a5920d3cac07c1..ba303fe8f8c855 100644 --- a/llvm/test/CodeGen/AMDGPU/verify-duplicate-literal.mir +++ b/llvm/test/CodeGen/AMDGPU/verify-duplicate-literal.mir @@ -27,6 +27,15 @@ body: | $vgpr0 = V_FMA_F32_e64 0, $vgpr0, 0, 1077936128, 0, 1077936128, 0, 0, implicit $mode, implicit $exec ... +--- +name: use_duplicate_literal_fmaak +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + $vgpr0 = V_FMAAK_F32 1077936128, $vgpr0, 1077936128, implicit $mode, implicit $exec +... + --- name: use_duplicate_literal_sop2 tracksRegLiveness: true From 78ec59e6aea9c29f204877bacfa12d70c21289fc Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 20 May 2022 13:23:53 +0100 Subject: [PATCH 095/908] [AMDGPU] Handle mandatory literals in isOperandLegal Extend SIInstrInfo::isOperandLegal to enforce a limit on the number of literal operands for all VALU instructions, not just VOP3. In particular it now handles VOP2 instructions with a mandatory literal operand like V_FMAAK_F32. Differential Revision: https://reviews.llvm.org/D126064 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 14 ++++++-------- llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll | 9 ++++----- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0b0f2c4a64b194..b23e884a5d0e1f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4965,9 +4965,9 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, MO = &MI.getOperand(OpIdx); int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode()); - int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0; + int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0; if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) { - if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--) + if (isLiteralConstantLike(*MO, OpInfo) && !LiteralLimit--) return false; SmallDenseSet SGPRsUsed; @@ -4986,12 +4986,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return false; SGPRsUsed.insert(SGPR); } - } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) { - if (--ConstantBusLimit <= 0) - return false; - } else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) && - isLiteralConstantLike(Op, InstDesc.OpInfo[i])) { - if (!VOP3LiteralLimit--) + } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32 || + (AMDGPU::isSISrcOperand(InstDesc, i) && + isLiteralConstantLike(Op, InstDesc.OpInfo[i]))) { + if (!LiteralLimit--) return false; if (--ConstantBusLimit <= 0) return false; diff --git a/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll b/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll index 8ac8b6b427989d..4af5b66bb12259 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll @@ -1,13 +1,12 @@ -; RUN: not --crash llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s 2>&1 | FileCheck %s -check-prefix GFX10 +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX10 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) -; FIXME: This instruction uses two different literal constants which is not -; allowed. +; Check that this constant is not folded into the v_fmaak_f32 instruction. ; GFX10-LABEL: _amdgpu_ps_main: -; GFX10: Bad machine code: VOP2/VOP3 instruction uses more than one literal -; GFX10: instruction: %4:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_FMAAK_F32 1078530011, %0:vgpr_32, -1077342245, implicit $mode, implicit $exec +; GFX10: v_mov_b32_e32 v1, 0x40490fdb +; GFX10: v_fmaak_f32 v1, v0, v1, 0xbfc90fdb define amdgpu_ps void @_amdgpu_ps_main(float %arg) { bb: %i = fmul reassoc nnan nsz arcp contract afn float %arg, 0x400921FB60000000 From 80c836ec557acd8ffb9fd6b3483dc75c4bcf0b11 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 20 May 2022 08:24:42 -0700 Subject: [PATCH 096/908] [lldb] Disable scripted_crashlog_json.test on Apple Silicon Disable scripted_crashlog_json.test on Apple Silicon until Ismail has bandwidth to investigate. rdar://93655633 --- .../Python/Crashlog/scripted_crashlog_json.test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/scripted_crashlog_json.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/scripted_crashlog_json.test index 4cf61599dd877d..2bf8d3e022baf1 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/scripted_crashlog_json.test +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/scripted_crashlog_json.test @@ -1,5 +1,8 @@ # REQUIRES: python, native && target-aarch64 && system-darwin +# rdar://93655633 +# UNSUPPORTED: arm64 + # RUN: %clangxx_host -std=c++17 -g %S/Inputs/multithread-test.cc -o %t.out # RUN: cp %S/Inputs/scripted_crashlog.ips %t.crash From aed49eac87b8aa77298252ea781bae4725ae8046 Mon Sep 17 00:00:00 2001 From: Nabeel Omer Date: Fri, 20 May 2022 15:24:09 +0100 Subject: [PATCH 097/908] [X86] Add tests for FREM Introduces basic test coverage for frem on x86. Split off from https://reviews.llvm.org/D125988 Differential Revision: https://reviews.llvm.org/D126055 --- llvm/test/CodeGen/X86/frem-libcall.ll | 57 + llvm/test/CodeGen/X86/frem.ll | 1462 +++++++++++++++++++++++++ 2 files changed, 1519 insertions(+) create mode 100644 llvm/test/CodeGen/X86/frem-libcall.ll create mode 100644 llvm/test/CodeGen/X86/frem.ll diff --git a/llvm/test/CodeGen/X86/frem-libcall.ll b/llvm/test/CodeGen/X86/frem-libcall.ll new file mode 100644 index 00000000000000..b48139215d6ff3 --- /dev/null +++ b/llvm/test/CodeGen/X86/frem-libcall.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s + +; FIXME: Ensure vectorized FREMs are not widened/unrolled such that they get lowered +; into libcalls on undef elements. + +define float @frem(<2 x float> %a0, <2 x float> %a1, <2 x float> %a2, <2 x float> *%p3) nounwind { +; CHECK-LABEL: frem: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $80, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: divps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1] +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: movlps %xmm1, (%rbx) +; CHECK-NEXT: addq $80, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem <2 x float> %a0, %a1 + %fdiv = fdiv <2 x float> %frem, %a2 + %ex0 = extractelement <2 x float> %fdiv, i32 0 + %ex1 = extractelement <2 x float> %fdiv, i32 1 + %res = fadd float %ex0, %ex1 + store <2 x float> %fdiv, <2 x float> *%p3 + ret float %res +} + diff --git a/llvm/test/CodeGen/X86/frem.ll b/llvm/test/CodeGen/X86/frem.ll new file mode 100644 index 00000000000000..50e7d62fb4a2e1 --- /dev/null +++ b/llvm/test/CodeGen/X86/frem.ll @@ -0,0 +1,1462 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s + +; Basic test coverage for FREM + +define void @frem_f16(half %a0, half %a1, half *%p3) nounwind { +; CHECK-LABEL: frem_f16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rdx, %rbx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movzwl %si, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl %bp, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, (%rbx) +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + %frem = frem half %a0, %a1 + store half %frem, half *%p3 + ret void +} + +define void @frem_f32(float %a0, float %a1, float *%p3) nounwind { +; CHECK-LABEL: frem_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movss %xmm0, (%rbx) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem float %a0, %a1 + store float %frem, float *%p3 + ret void +} + +define void @frem_f64(double %a0, double %a1, double *%p3) nounwind { +; CHECK-LABEL: frem_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movsd %xmm0, (%rbx) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem double %a0, %a1 + store double %frem, double *%p3 + ret void +} + +define void @frem_f80(x86_fp80 %a0, x86_fp80 %a1, x86_fp80 *%p3) nounwind { +; CHECK-LABEL: frem_f80: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq fmodl@PLT +; CHECK-NEXT: fstpt (%rbx) +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem x86_fp80 %a0, %a1 + store x86_fp80 %frem, x86_fp80 *%p3 + ret void +} + +define void @frem_f128(fp128 %a0, fp128 %a1, fp128 *%p3) nounwind { +; CHECK-LABEL: frem_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: callq fmodl@PLT +; CHECK-NEXT: movaps %xmm0, (%rbx) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem fp128 %a0, %a1 + store fp128 %frem, fp128 *%p3 + ret void +} + +define void @frem_v16f32(<16 x float> %a0, <16 x float> %a1, <16 x float> *%p3) nounwind { +; CHECK-LABEL: frem_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $160, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm5, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: movaps %xmm4, %xmm1 +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm4[3,3] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, 48(%rbx) +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 32(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 16(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, (%rbx) +; CHECK-NEXT: addq $160, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem <16 x float> %a0, %a1 + store <16 x float> %frem, <16 x float> *%p3 + ret void +} + +define void @frem_v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> *%p3) nounwind { +; CHECK-LABEL: frem_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $96, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm2, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: movaps %xmm2, %xmm1 +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm2[3,3] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, 16(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, (%rbx) +; CHECK-NEXT: addq $96, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem <8 x float> %a0, %a1 + store <8 x float> %frem, <8 x float> *%p3 + ret void +} + +define void @frem_v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> *%p3) nounwind { +; CHECK-LABEL: frem_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $64, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, (%rbx) +; CHECK-NEXT: addq $64, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem <4 x float> %a0, %a1 + store <4 x float> %frem, <4 x float> *%p3 + ret void +} + +define void @frem_v8f64(<8 x double> %a0, <8 x double> %a1, <8 x double> *%p3) nounwind { +; CHECK-LABEL: frem_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $144, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm5, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm4, %xmm1 +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, 48(%rbx) +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 32(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 16(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, (%rbx) +; CHECK-NEXT: addq $144, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem <8 x double> %a0, %a1 + store <8 x double> %frem, <8 x double> *%p3 + ret void +} + +define void @frem_v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> *%p3) nounwind { +; CHECK-LABEL: frem_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $80, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm2, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm2, %xmm1 +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, 16(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, (%rbx) +; CHECK-NEXT: addq $80, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem <4 x double> %a0, %a1 + store <4 x double> %frem, <4 x double> *%p3 + ret void +} + +define void @frem_v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> *%p3) nounwind { +; CHECK-LABEL: frem_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $48, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmod@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, (%rbx) +; CHECK-NEXT: addq $48, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem <2 x double> %a0, %a1 + store <2 x double> %frem, <2 x double> *%p3 + ret void +} + +define void @frem_v32f16(<32 x half> %a0, <32 x half> %a1, <32 x half> *%p3) nounwind { +; CHECK-LABEL: frem_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $248, %rsp +; CHECK-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %esi, %r14d +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r13d +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl %bx, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl %r12d, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl %r14w, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl %r13d, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r14d +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r12d +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r13d +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, 62(%r15) +; CHECK-NEXT: movw %bx, 60(%r15) +; CHECK-NEXT: movw %r13w, 58(%r15) +; CHECK-NEXT: movw %r12w, 56(%r15) +; CHECK-NEXT: movw %r14w, 54(%r15) +; CHECK-NEXT: movw %bp, 52(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 50(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 48(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 46(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 44(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 42(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 40(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 38(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 36(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 34(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 32(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 30(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 28(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 26(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 24(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 22(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 20(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 18(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 16(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 14(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 12(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 10(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 8(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 6(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 4(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 2(%r15) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, (%r15) +; CHECK-NEXT: addq $248, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + %frem = frem <32 x half> %a0, %a1 + store <32 x half> %frem, <32 x half> *%p3 + ret void +} + +define void @frem_v16f16(<16 x half> %a0, <16 x half> %a1, <16 x half> *%p3) nounwind { +; CHECK-LABEL: frem_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $120, %rsp +; CHECK-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %esi, %r15d +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r13d +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl %r14w, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl %r15w, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl %r12d, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl %r13d, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r13d +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r12d +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r14d +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r15d +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, 30(%rbp) +; CHECK-NEXT: movw %r15w, 28(%rbp) +; CHECK-NEXT: movw %r14w, 26(%rbp) +; CHECK-NEXT: movw %bx, 24(%rbp) +; CHECK-NEXT: movw %r12w, 22(%rbp) +; CHECK-NEXT: movw %r13w, 20(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 18(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 16(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 14(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 12(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 10(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 8(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 6(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 4(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, 2(%rbp) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, (%rbp) +; CHECK-NEXT: addq $120, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + %frem = frem <16 x half> %a0, %a1 + store <16 x half> %frem, <16 x half> *%p3 + ret void +} + +define void @frem_v8f16(<8 x half> %a0, <8 x half> %a1, <8 x half> *%p3) nounwind { +; CHECK-LABEL: frem_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %edx, %ebx +; CHECK-NEXT: movl %esi, %r13d +; CHECK-NEXT: movl %edi, %r15d +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl %r15w, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movl %r12d, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl %r13w, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r12d +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl %bx, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r13d +; CHECK-NEXT: movl %r14d, %edi +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r14d +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %r15d +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq fmodf@PLT +; CHECK-NEXT: callq __gnu_f2h_ieee@PLT +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: movw %ax, 14(%rcx) +; CHECK-NEXT: movw %bx, 12(%rcx) +; CHECK-NEXT: movw %bp, 10(%rcx) +; CHECK-NEXT: movw %r15w, 8(%rcx) +; CHECK-NEXT: movw %r14w, 6(%rcx) +; CHECK-NEXT: movw %r13w, 4(%rcx) +; CHECK-NEXT: movw %r12w, 2(%rcx) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movw %ax, (%rcx) +; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + %frem = frem <8 x half> %a0, %a1 + store <8 x half> %frem, <8 x half> *%p3 + ret void +} + +define void @frem_v4f80(<4 x x86_fp80> %a0, <4 x x86_fp80> %a1, <4 x x86_fp80> *%p3) nounwind { +; CHECK-LABEL: frem_v4f80: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $128, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq fmodl@PLT +; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload +; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq fmodl@PLT +; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload +; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq fmodl@PLT +; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload +; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq fmodl@PLT +; CHECK-NEXT: fstpt 30(%rbx) +; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload +; CHECK-NEXT: fstpt 20(%rbx) +; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload +; CHECK-NEXT: fstpt 10(%rbx) +; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload +; CHECK-NEXT: fstpt (%rbx) +; CHECK-NEXT: addq $128, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %frem = frem <4 x x86_fp80> %a0, %a1 + store <4 x x86_fp80> %frem, <4 x x86_fp80> *%p3 + ret void +} From 480dcdc8975d895ebb1f35ccf324900c816a0d6d Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Fri, 20 May 2022 15:28:18 +0000 Subject: [PATCH 098/908] [ifs] Switch to using OptTable Differential revision: https://reviews.llvm.org/D125658 --- llvm/test/tools/llvm-ifs/help.test | 7 +- llvm/tools/llvm-ifs/CMakeLists.txt | 8 + llvm/tools/llvm-ifs/Opts.td | 37 +++ llvm/tools/llvm-ifs/llvm-ifs.cpp | 379 +++++++++++++++++------------ 4 files changed, 270 insertions(+), 161 deletions(-) create mode 100644 llvm/tools/llvm-ifs/Opts.td diff --git a/llvm/test/tools/llvm-ifs/help.test b/llvm/test/tools/llvm-ifs/help.test index 1cfdbd25cca438..76c15bdf7b6658 100644 --- a/llvm/test/tools/llvm-ifs/help.test +++ b/llvm/test/tools/llvm-ifs/help.test @@ -1,6 +1,5 @@ # RUN: llvm-ifs --help | FileCheck %s --check-prefix HELP --implicit-check-not='{{[Oo]}}ptions:' -# HELP: USAGE -# HELP: Color Options -# HELP: Generic Options -# HELP: Ifs Options +# HELP: OVERVIEW: +# HELP: USAGE: +# HELP: OPTIONS: diff --git a/llvm/tools/llvm-ifs/CMakeLists.txt b/llvm/tools/llvm-ifs/CMakeLists.txt index 5a0cfb91383815..483610dce94a0c 100644 --- a/llvm/tools/llvm-ifs/CMakeLists.txt +++ b/llvm/tools/llvm-ifs/CMakeLists.txt @@ -5,9 +5,17 @@ set(LLVM_LINK_COMPONENTS Support TextAPI ObjectYAML + Option ) +set(LLVM_TARGET_DEFINITIONS Opts.td) +tablegen(LLVM Opts.inc -gen-opt-parser-defs) +add_public_tablegen_target(IFSOptsTableGen) + add_llvm_tool(llvm-ifs ErrorCollector.cpp llvm-ifs.cpp + +DEPENDS + IFSOptsTableGen ) diff --git a/llvm/tools/llvm-ifs/Opts.td b/llvm/tools/llvm-ifs/Opts.td new file mode 100644 index 00000000000000..7288cf0123a726 --- /dev/null +++ b/llvm/tools/llvm-ifs/Opts.td @@ -0,0 +1,37 @@ +include "llvm/Option/OptParser.td" + +class F : Flag<["-"], letter>, HelpText; +class FF : Flag<["--"], name>, HelpText; + +multiclass Eq { + def NAME #_EQ : Joined<["--"], name #"=">, HelpText; + def : Separate<["--"], name>, Alias(NAME #_EQ)>; +} + +defm arch : Eq<"arch", "Specify the architecture, e.g. x86_64">; +defm bitwidth : Eq<"bitwidth", "Specify the bit width">; +defm endianness : Eq<"endianness", "Specify the endianness">; +defm exclude : Eq<"exclude", "Remove symbols which match the pattern. Can be specified multiple times">; +def help : FF<"help", "Display this help">; +def : F<"h", "Alias for --help">, Alias; +defm hint_ifs_target : Eq<"hint-ifs-target", "When --output-format is 'IFS', this flag will hint the expected target triple for IFS output">; +defm input : Eq<"input", "input">; +defm input_format : Eq<"input-format", "Specify the input file format">; +defm output : Eq<"output", "Output file **DEPRECATED**">; +def : Separate<["-"], "o">, HelpText<"Alias for --output">, Alias; +defm output_elf : Eq<"output-elf", "Output path for ELF file">; +defm output_format : Eq<"output-format", "Specify the output file format **DEPRECATED**">; +defm output_ifs : Eq<"output-ifs", "Output path for IFS file">; +defm output_tbd : Eq<"output-tbd", "Output path for TBD file">; +defm soname : Eq<"soname", "name">; +def strip_ifs_arch : FF<"strip-ifs-arch", "Strip target architecture information away from IFS output">; +def strip_ifs_bitwidth : FF<"strip-ifs-bitwidth", "Strip target bit width information away from IFS output">; +def strip_ifs_endianness : FF<"strip-ifs-endianness", "Strip target endianness information away from IFS output">; +def strip_ifs_target : FF<"strip-ifs-target", "Strip all target information away from IFS output">; +def strip_needed : FF<"strip-needed", "Strip needed libs from output">; +def strip_size : FF<"strip-size", "Remove object size from the output">; +def strip_undefined : FF<"strip-undefined", "Strip undefined symbols from IFS output">; +defm target : Eq<"target", "Specify the target triple, e.g. x86_64-linux-gnu">; +def version : FF<"version", "Display the version">; +def : F<"V", "Alias for --version">, Alias; +def write_if_changed : FF<"write-if-changed", "Write the output file only if it is new or has changed">; diff --git a/llvm/tools/llvm-ifs/llvm-ifs.cpp b/llvm/tools/llvm-ifs/llvm-ifs.cpp index 444a6d2dd58a5f..b1026042e23ec0 100644 --- a/llvm/tools/llvm-ifs/llvm-ifs.cpp +++ b/llvm/tools/llvm-ifs/llvm-ifs.cpp @@ -15,6 +15,9 @@ #include "llvm/InterfaceStub/IFSHandler.h" #include "llvm/InterfaceStub/IFSStub.h" #include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" @@ -46,100 +49,68 @@ const VersionTuple IfsVersionCurrent(3, 0); enum class FileFormat { IFS, ELF, TBD }; } // end anonymous namespace -cl::OptionCategory IfsCategory("Ifs Options"); - -// TODO: Use OptTable for option parsing in the future. -// Command line flags: -cl::list InputFilePaths(cl::Positional, cl::desc("input"), - cl::ZeroOrMore, cl::cat(IfsCategory)); -cl::opt InputFormat( - "input-format", cl::desc("Specify the input file format"), - cl::values(clEnumValN(FileFormat::IFS, "IFS", "Text based ELF stub file"), - clEnumValN(FileFormat::ELF, "ELF", "ELF object file")), - cl::cat(IfsCategory)); -cl::opt OutputFormat( - "output-format", cl::desc("Specify the output file format **DEPRECATED**"), - cl::values(clEnumValN(FileFormat::IFS, "IFS", "Text based ELF stub file"), - clEnumValN(FileFormat::ELF, "ELF", "ELF stub file"), - clEnumValN(FileFormat::TBD, "TBD", "Apple TBD text stub file")), - cl::cat(IfsCategory)); -cl::opt OptArch("arch", - cl::desc("Specify the architecture, e.g. x86_64"), - cl::cat(IfsCategory)); -cl::opt - OptBitWidth("bitwidth", cl::desc("Specify the bit width"), - cl::values(clEnumValN(IFSBitWidthType::IFS32, "32", "32 bits"), - clEnumValN(IFSBitWidthType::IFS64, "64", "64 bits")), - cl::cat(IfsCategory)); -cl::opt OptEndianness( - "endianness", cl::desc("Specify the endianness"), - cl::values(clEnumValN(IFSEndiannessType::Little, "little", "Little Endian"), - clEnumValN(IFSEndiannessType::Big, "big", "Big Endian")), - cl::cat(IfsCategory)); -cl::opt OptTargetTriple( - "target", cl::desc("Specify the target triple, e.g. x86_64-linux-gnu"), - cl::cat(IfsCategory)); -cl::opt OptTargetTripleHint( - "hint-ifs-target", - cl::desc("When --output-format is 'IFS', this flag will hint the expected " - "target triple for IFS output"), - cl::cat(IfsCategory)); -cl::opt StripIFSArch( - "strip-ifs-arch", - cl::desc("Strip target architecture information away from IFS output"), - cl::cat(IfsCategory)); -cl::opt StripIFSBitWidth( - "strip-ifs-bitwidth", - cl::desc("Strip target bit width information away from IFS output"), - cl::cat(IfsCategory)); -cl::opt StripIFSEndiannessWidth( - "strip-ifs-endianness", - cl::desc("Strip target endianness information away from IFS output"), - cl::cat(IfsCategory)); -cl::opt StripIFSTarget( - "strip-ifs-target", - cl::desc("Strip all target information away from IFS output"), - cl::cat(IfsCategory)); -cl::opt - StripUndefined("strip-undefined", - cl::desc("Strip undefined symbols from IFS output"), - cl::cat(IfsCategory)); -cl::opt StripNeededLibs("strip-needed", - cl::desc("Strip needed libs from output"), - cl::cat(IfsCategory)); -cl::opt StripSize("strip-size", - cl::desc("Remove object size from the output"), - cl::cat(IfsCategory)); - -cl::list - ExcludeSyms("exclude", - cl::desc("Remove symbols which match the pattern. Can be " - "specified multiple times"), - cl::cat(IfsCategory)); - -cl::opt - SoName("soname", - cl::desc("Manually set the DT_SONAME entry of any emitted files"), - cl::value_desc("name"), cl::cat(IfsCategory)); -cl::opt OutputFilePath("output", - cl::desc("Output file **DEPRECATED**"), - cl::cat(IfsCategory)); -cl::alias OutputFilePathA("o", cl::desc("Alias for --output"), - cl::aliasopt(OutputFilePath), cl::cat(IfsCategory)); -cl::opt OutputELFFilePath("output-elf", - cl::desc("Output path for ELF file"), - cl::cat(IfsCategory)); -cl::opt OutputIFSFilePath("output-ifs", - cl::desc("Output path for IFS file"), - cl::cat(IfsCategory)); -cl::opt OutputTBDFilePath("output-tbd", - cl::desc("Output path for TBD file"), - cl::cat(IfsCategory)); - -cl::opt WriteIfChanged( - "write-if-changed", - cl::desc("Write the output file only if it is new or has changed."), - cl::cat(IfsCategory)); +using namespace llvm::opt; +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + OPT_##ID, +#include "Opts.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Opts.inc" +#undef PREFIX + +const opt::OptTable::Info InfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + { \ + PREFIX, NAME, HELPTEXT, \ + METAVAR, OPT_##ID, opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, \ + OPT_##ALIAS, ALIASARGS, VALUES}, +#include "Opts.inc" +#undef OPTION +}; + +class IFSOptTable : public opt::OptTable { +public: + IFSOptTable() : OptTable(InfoTable) { setGroupedShortOptions(true); } +}; + +struct DriverConfig { + std::vector InputFilePaths; + + Optional InputFormat; + Optional OutputFormat; + + Optional HintIfsTarget; + Optional OptTargetTriple; + Optional OverrideArch; + Optional OverrideBitWidth; + Optional OverrideEndianness; + + bool StripIfsArch = false; + bool StripIfsBitwidth = false; + bool StripIfsEndianness = false; + bool StripIfsTarget = false; + bool StripNeeded = false; + bool StripSize = false; + bool StripUndefined = false; + + std::vector Exclude; + + Optional SoName; + + Optional Output; + Optional OutputElf; + Optional OutputIfs; + Optional OutputTbd; + + bool WriteIfChanged = false; +}; static std::string getTypeName(IFSSymbolType Type) { switch (Type) { @@ -157,7 +128,8 @@ static std::string getTypeName(IFSSymbolType Type) { llvm_unreachable("Unexpected ifs symbol type."); } -static Expected> readInputFile(StringRef FilePath) { +static Expected> +readInputFile(Optional &InputFormat, StringRef FilePath) { // Read in file. ErrorOr> BufOrError = MemoryBuffer::getFileOrSTDIN(FilePath, /*IsText=*/true); @@ -169,10 +141,11 @@ static Expected> readInputFile(StringRef FilePath) { ErrorCollector EC(/*UseFatalErrors=*/false); // First try to read as a binary (fails fast if not binary). - if (InputFormat.getNumOccurrences() == 0 || InputFormat == FileFormat::ELF) { + if (!InputFormat || *InputFormat == FileFormat::ELF) { Expected> StubFromELF = readELFFile(FileReadBuffer->getMemBufferRef()); if (StubFromELF) { + InputFormat = FileFormat::ELF; (*StubFromELF)->IfsVersion = IfsVersionCurrent; return std::move(*StubFromELF); } @@ -180,10 +153,11 @@ static Expected> readInputFile(StringRef FilePath) { } // Fall back to reading as a ifs. - if (InputFormat.getNumOccurrences() == 0 || InputFormat == FileFormat::IFS) { + if (!InputFormat || *InputFormat == FileFormat::IFS) { Expected> StubFromIFS = readIFSFromBuffer(FileReadBuffer->getBuffer()); if (StubFromIFS) { + InputFormat = FileFormat::IFS; if ((*StubFromIFS)->IfsVersion > IfsVersionCurrent) EC.addError( createStringError(errc::not_supported, @@ -271,9 +245,14 @@ static void fatalError(Error Err) { exit(1); } +static void fatalError(Twine T) { + WithColor::error() << T.str() << '\n'; + exit(1); +} + /// writeIFS() writes a Text-Based ELF stub to a file using the latest version /// of the YAML parser. -static Error writeIFS(StringRef FilePath, IFSStub &Stub) { +static Error writeIFS(StringRef FilePath, IFSStub &Stub, bool WriteIfChanged) { // Write IFS to memory first. std::string IFSStr; raw_string_ostream OutStr(IFSStr); @@ -285,7 +264,8 @@ static Error writeIFS(StringRef FilePath, IFSStub &Stub) { if (WriteIfChanged) { if (ErrorOr> BufOrError = MemoryBuffer::getFile(FilePath)) { - // Compare IFS output with the existing IFS file. If unchanged, avoid changing the file. + // Compare IFS output with the existing IFS file. If unchanged, avoid + // changing the file. if ((*BufOrError)->getBuffer() == IFSStr) return Error::success(); } @@ -300,24 +280,119 @@ static Error writeIFS(StringRef FilePath, IFSStub &Stub) { return Error::success(); } +static DriverConfig parseArgs(int argc, char *const *argv) { + BumpPtrAllocator A; + StringSaver Saver(A); + IFSOptTable Tbl; + StringRef ToolName = argv[0]; + llvm::opt::InputArgList Args = Tbl.parseArgs( + argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { fatalError(Msg); }); + if (Args.hasArg(OPT_help)) { + Tbl.printHelp(llvm::outs(), + (Twine(ToolName) + " [options]") + .str() + .c_str(), + "shared object stubbing tool"); + std::exit(0); + } + if (Args.hasArg(OPT_version)) { + llvm::outs() << ToolName << '\n'; + cl::PrintVersionMessage(); + std::exit(0); + } + + DriverConfig Config; + for (const opt::Arg *A : Args.filtered(OPT_INPUT)) + Config.InputFilePaths.push_back(A->getValue()); + if (const opt::Arg *A = Args.getLastArg(OPT_input_format_EQ)) { + Config.InputFormat = StringSwitch>(A->getValue()) + .Case("IFS", FileFormat::IFS) + .Case("ELF", FileFormat::ELF) + .Default(None); + if (!Config.InputFormat) + fatalError(Twine("invalid argument '") + A->getValue()); + } + + auto OptionNotFound = [ToolName](StringRef FlagName, StringRef OptionName) { + fatalError(Twine(ToolName) + ": for the " + FlagName + + " option: Cannot find option named '" + OptionName + "'!"); + }; + if (const opt::Arg *A = Args.getLastArg(OPT_output_format_EQ)) { + Config.OutputFormat = StringSwitch>(A->getValue()) + .Case("IFS", FileFormat::IFS) + .Case("ELF", FileFormat::ELF) + .Case("TBD", FileFormat::TBD) + .Default(None); + if (!Config.OutputFormat) + OptionNotFound("--output-format", A->getValue()); + } + if (const opt::Arg *A = Args.getLastArg(OPT_arch_EQ)) + Config.OverrideArch = ELF::convertArchNameToEMachine(A->getValue()); + + if (const opt::Arg *A = Args.getLastArg(OPT_bitwidth_EQ)) { + size_t Width; + llvm::StringRef S(A->getValue()); + if (!S.getAsInteger(10, Width) || Width == 64 || Width == 32) + Config.OverrideBitWidth = + Width == 64 ? IFSBitWidthType::IFS64 : IFSBitWidthType::IFS32; + else + OptionNotFound("--bitwidth", A->getValue()); + } + if (const opt::Arg *A = Args.getLastArg(OPT_endianness_EQ)) { + Config.OverrideEndianness = + StringSwitch>(A->getValue()) + .Case("little", IFSEndiannessType::Little) + .Case("big", IFSEndiannessType::Big) + .Default(None); + if (!Config.OverrideEndianness) + OptionNotFound("--endianness", A->getValue()); + } + if (const opt::Arg *A = Args.getLastArg(OPT_target_EQ)) + Config.OptTargetTriple = A->getValue(); + if (const opt::Arg *A = Args.getLastArg(OPT_hint_ifs_target_EQ)) + Config.HintIfsTarget = A->getValue(); + + Config.StripIfsArch = Args.hasArg(OPT_strip_ifs_arch); + Config.StripIfsBitwidth = Args.hasArg(OPT_strip_ifs_bitwidth); + Config.StripIfsEndianness = Args.hasArg(OPT_strip_ifs_endianness); + Config.StripIfsTarget = Args.hasArg(OPT_strip_ifs_target); + Config.StripUndefined = Args.hasArg(OPT_strip_undefined); + Config.StripNeeded = Args.hasArg(OPT_strip_needed); + Config.StripSize = Args.hasArg(OPT_strip_size); + + for (const opt::Arg *A : Args.filtered(OPT_exclude_EQ)) + Config.Exclude.push_back(A->getValue()); + if (const opt::Arg *A = Args.getLastArg(OPT_soname_EQ)) + Config.SoName = A->getValue(); + if (const opt::Arg *A = Args.getLastArg(OPT_output_EQ)) + Config.Output = A->getValue(); + if (const opt::Arg *A = Args.getLastArg(OPT_output_elf_EQ)) + Config.OutputElf = A->getValue(); + if (const opt::Arg *A = Args.getLastArg(OPT_output_ifs_EQ)) + Config.OutputIfs = A->getValue(); + if (const opt::Arg *A = Args.getLastArg(OPT_output_tbd_EQ)) + Config.OutputTbd = A->getValue(); + Config.WriteIfChanged = Args.hasArg(OPT_write_if_changed); + return Config; +} + int main(int argc, char *argv[]) { - // Parse arguments. - cl::HideUnrelatedOptions({&IfsCategory, &getColorCategory()}); - cl::ParseCommandLineOptions(argc, argv); + DriverConfig Config = parseArgs(argc, argv); - if (InputFilePaths.empty()) - InputFilePaths.push_back("-"); + if (Config.InputFilePaths.empty()) + Config.InputFilePaths.push_back("-"); // If input files are more than one, they can only be IFS files. - if (InputFilePaths.size() > 1) - InputFormat.setValue(FileFormat::IFS); + if (Config.InputFilePaths.size() > 1) + Config.InputFormat = FileFormat::IFS; // Attempt to merge input. IFSStub Stub; std::map SymbolMap; std::string PreviousInputFilePath; - for (const std::string &InputFilePath : InputFilePaths) { - Expected> StubOrErr = readInputFile(InputFilePath); + for (const std::string &InputFilePath : Config.InputFilePaths) { + Expected> StubOrErr = + readInputFile(Config.InputFormat, InputFilePath); if (!StubOrErr) fatalError(StubOrErr.takeError()); @@ -411,57 +486,45 @@ int main(int argc, char *argv[]) { Stub.Symbols.push_back(Entry.second); // Change SoName before emitting stubs. - if (SoName.getNumOccurrences() == 1) - Stub.SoName = SoName; - Optional OverrideArch; - Optional OverrideEndianness; - Optional OverrideBitWidth; - Optional OverrideTriple; - if (OptArch.getNumOccurrences() == 1) - OverrideArch = ELF::convertArchNameToEMachine(OptArch.getValue()); - if (OptEndianness.getNumOccurrences() == 1) - OverrideEndianness = OptEndianness.getValue(); - if (OptBitWidth.getNumOccurrences() == 1) - OverrideBitWidth = OptBitWidth.getValue(); - if (OptTargetTriple.getNumOccurrences() == 1) - OverrideTriple = OptTargetTriple.getValue(); - Error OverrideError = overrideIFSTarget( - Stub, OverrideArch, OverrideEndianness, OverrideBitWidth, OverrideTriple); + if (Config.SoName) + Stub.SoName = *Config.SoName; + + Error OverrideError = + overrideIFSTarget(Stub, Config.OverrideArch, Config.OverrideEndianness, + Config.OverrideBitWidth, Config.OptTargetTriple); if (OverrideError) fatalError(std::move(OverrideError)); - if (StripNeededLibs) + if (Config.StripNeeded) Stub.NeededLibs.clear(); - if (Error E = filterIFSSyms(Stub, StripUndefined, ExcludeSyms)) + if (Error E = filterIFSSyms(Stub, Config.StripUndefined, Config.Exclude)) fatalError(std::move(E)); - if (StripSize) + if (Config.StripSize) for (IFSSymbol &Sym : Stub.Symbols) Sym.Size.reset(); - if (OutputELFFilePath.getNumOccurrences() == 0 && - OutputIFSFilePath.getNumOccurrences() == 0 && - OutputTBDFilePath.getNumOccurrences() == 0) { - if (OutputFormat.getNumOccurrences() == 0) { + if (!Config.OutputElf && !Config.OutputIfs && !Config.OutputTbd) { + if (!Config.OutputFormat) { WithColor::error() << "at least one output should be specified."; return -1; } - } else if (OutputFormat.getNumOccurrences() == 1) { + } else if (Config.OutputFormat) { WithColor::error() << "'--output-format' cannot be used with " "'--output-{FILE_FORMAT}' options at the same time"; return -1; } - if (OutputFormat.getNumOccurrences() == 1) { + if (Config.OutputFormat) { // TODO: Remove OutputFormat flag in the next revision. WithColor::warning() << "--output-format option is deprecated, please use " "--output-{FILE_FORMAT} options instead\n"; - switch (OutputFormat.getValue()) { + switch (Config.OutputFormat.getValue()) { case FileFormat::TBD: { std::error_code SysErr; - raw_fd_ostream Out(OutputFilePath, SysErr); + raw_fd_ostream Out(*Config.Output, SysErr); if (SysErr) { - WithColor::error() << "Couldn't open " << OutputFilePath + WithColor::error() << "Couldn't open " << *Config.Output << " for writing.\n"; return -1; } @@ -475,10 +538,10 @@ int main(int argc, char *argv[]) { } case FileFormat::IFS: { Stub.IfsVersion = IfsVersionCurrent; - if (InputFormat.getValue() == FileFormat::ELF && - OptTargetTripleHint.getNumOccurrences() == 1) { + if (Config.InputFormat.getValue() == FileFormat::ELF && + Config.HintIfsTarget) { std::error_code HintEC(1, std::generic_category()); - IFSTarget HintTarget = parseTriple(OptTargetTripleHint); + IFSTarget HintTarget = parseTriple(*Config.HintIfsTarget); if (Stub.Target.Arch.getValue() != HintTarget.Arch.getValue()) fatalError(make_error( "Triple hint does not match the actual architecture", HintEC)); @@ -491,12 +554,13 @@ int main(int argc, char *argv[]) { "Triple hint does not match the actual bit width", HintEC)); stripIFSTarget(Stub, true, false, false, false); - Stub.Target.Triple = OptTargetTripleHint.getValue(); + Stub.Target.Triple = Config.HintIfsTarget.getValue(); } else { - stripIFSTarget(Stub, StripIFSTarget, StripIFSArch, - StripIFSEndiannessWidth, StripIFSBitWidth); + stripIFSTarget(Stub, Config.StripIfsTarget, Config.StripIfsArch, + Config.StripIfsEndianness, Config.StripIfsBitwidth); } - Error IFSWriteError = writeIFS(OutputFilePath.getValue(), Stub); + Error IFSWriteError = + writeIFS(Config.Output.getValue(), Stub, Config.WriteIfChanged); if (IFSWriteError) fatalError(std::move(IFSWriteError)); break; @@ -506,7 +570,7 @@ int main(int argc, char *argv[]) { if (TargetError) fatalError(std::move(TargetError)); Error BinaryWriteError = - writeBinaryStub(OutputFilePath, Stub, WriteIfChanged); + writeBinaryStub(*Config.Output, Stub, Config.WriteIfChanged); if (BinaryWriteError) fatalError(std::move(BinaryWriteError)); break; @@ -514,21 +578,21 @@ int main(int argc, char *argv[]) { } } else { // Check if output path for individual format. - if (OutputELFFilePath.getNumOccurrences() == 1) { + if (Config.OutputElf) { Error TargetError = validateIFSTarget(Stub, true); if (TargetError) fatalError(std::move(TargetError)); Error BinaryWriteError = - writeBinaryStub(OutputELFFilePath, Stub, WriteIfChanged); + writeBinaryStub(*Config.OutputElf, Stub, Config.WriteIfChanged); if (BinaryWriteError) fatalError(std::move(BinaryWriteError)); } - if (OutputIFSFilePath.getNumOccurrences() == 1) { + if (Config.OutputIfs) { Stub.IfsVersion = IfsVersionCurrent; - if (InputFormat.getValue() == FileFormat::ELF && - OptTargetTripleHint.getNumOccurrences() == 1) { + if (Config.InputFormat.getValue() == FileFormat::ELF && + Config.HintIfsTarget) { std::error_code HintEC(1, std::generic_category()); - IFSTarget HintTarget = parseTriple(OptTargetTripleHint); + IFSTarget HintTarget = parseTriple(*Config.HintIfsTarget); if (Stub.Target.Arch.getValue() != HintTarget.Arch.getValue()) fatalError(make_error( "Triple hint does not match the actual architecture", HintEC)); @@ -541,20 +605,21 @@ int main(int argc, char *argv[]) { "Triple hint does not match the actual bit width", HintEC)); stripIFSTarget(Stub, true, false, false, false); - Stub.Target.Triple = OptTargetTripleHint.getValue(); + Stub.Target.Triple = Config.HintIfsTarget.getValue(); } else { - stripIFSTarget(Stub, StripIFSTarget, StripIFSArch, - StripIFSEndiannessWidth, StripIFSBitWidth); + stripIFSTarget(Stub, Config.StripIfsTarget, Config.StripIfsArch, + Config.StripIfsEndianness, Config.StripIfsBitwidth); } - Error IFSWriteError = writeIFS(OutputIFSFilePath.getValue(), Stub); + Error IFSWriteError = + writeIFS(Config.OutputIfs.getValue(), Stub, Config.WriteIfChanged); if (IFSWriteError) fatalError(std::move(IFSWriteError)); } - if (OutputTBDFilePath.getNumOccurrences() == 1) { + if (Config.OutputTbd) { std::error_code SysErr; - raw_fd_ostream Out(OutputTBDFilePath, SysErr); + raw_fd_ostream Out(*Config.OutputTbd, SysErr); if (SysErr) { - WithColor::error() << "Couldn't open " << OutputTBDFilePath + WithColor::error() << "Couldn't open " << *Config.OutputTbd << " for writing.\n"; return -1; } From 1ca772ed951e6412ef006459b56ae9a21691a97c Mon Sep 17 00:00:00 2001 From: Christopher Bate Date: Tue, 17 May 2022 17:54:29 -0600 Subject: [PATCH 099/908] [MLIR][GPU] Add NvGpu mma.sync path to the VectorToGPU pass This changes adds the option to lower to NvGpu dialect ops during the VectorToGPU convsersion pass. Because this transformation reuses existing VectorToGPU logic, a seperate VectorToNvGpu conversion pass is not created. The option `use-nvgpu` is added to the VectorToGPU pass. When this is true, the pass will attempt to convert slices rooted at `vector.contract` operations into `nvgpu.mma.sync` ops, and `vector.transfer_read` ops are converted to either `nvgpu.ldmatrix` or one or more `vector.load` operations. The specific data loaded will depend on the thread id within a subgroup (warp). These index calculations depend on data type and shape of the MMA op according to the downstream PTX specification. The code for supporting these details is separated into `NvGpuSupport.cpp|h`. Differential Revision: https://reviews.llvm.org/D122940 --- mlir/include/mlir/Conversion/Passes.td | 9 +- .../mlir/Conversion/VectorToGPU/VectorToGPU.h | 15 +- mlir/lib/Conversion/PassDetail.h | 4 + .../lib/Conversion/VectorToGPU/CMakeLists.txt | 1 + .../Conversion/VectorToGPU/NvGpuSupport.cpp | 327 ++++++++++++++ .../lib/Conversion/VectorToGPU/NvGpuSupport.h | 100 +++++ .../Conversion/VectorToGPU/VectorToGPU.cpp | 404 +++++++++++++++++- .../vector-to-mma-ops-mma-sync.mlir | 349 +++++++++++++++ 8 files changed, 1181 insertions(+), 28 deletions(-) create mode 100644 mlir/lib/Conversion/VectorToGPU/NvGpuSupport.cpp create mode 100644 mlir/lib/Conversion/VectorToGPU/NvGpuSupport.h create mode 100644 mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 41e7b29f15d0d4..6d9863e7234879 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -851,8 +851,13 @@ def ConvertVectorToGPU : Pass<"convert-vector-to-gpu"> { "dialect"; let constructor = "mlir::createConvertVectorToGPUPass()"; let dependentDialects = [ - "memref::MemRefDialect", - "gpu::GPUDialect" + "memref::MemRefDialect", "gpu::GPUDialect", "AffineDialect", + "vector::VectorDialect", "nvgpu::NVGPUDialect" + ]; + + let options = [ + Option<"useNvGpu", "use-nvgpu", "bool", /*default=*/"false", + "convert to NvGPU ops instead of GPU dialect ops"> ]; } diff --git a/mlir/include/mlir/Conversion/VectorToGPU/VectorToGPU.h b/mlir/include/mlir/Conversion/VectorToGPU/VectorToGPU.h index 266fa0eac4c481..1ba5b3f90d9a8c 100644 --- a/mlir/include/mlir/Conversion/VectorToGPU/VectorToGPU.h +++ b/mlir/include/mlir/Conversion/VectorToGPU/VectorToGPU.h @@ -17,16 +17,25 @@ class Pass; class RewritePatternSet; /// Patterns to transform vector ops into a canonical form to convert to MMA -/// matrix operations. -void populatePrepareVectorToMMAPatterns(RewritePatternSet &patterns); +/// matrix operations. If `useNvGpu` is true, then the patterns will populated +/// will prepare for conversion to `nvgpu` mma operations rather than the `gpu` +/// dialect WMMA operations. +void populatePrepareVectorToMMAPatterns(RewritePatternSet &patterns, + bool useNvGpu = false); /// Convert vector ops to MMA matrix operations nested under `rootOp`. This will /// convert slice of operations that can be legally converted to MMA operations. /// The rest of the vector operations are left untouched. void convertVectorToMMAOps(Operation *rootOp); +/// Convert vector ops ops nested under `rootOp` to vector and GPU operaitons +/// compatible with the `nvvm.mma.sync` lowering path. This will convert a slice +/// of operations that can be legally lowered on this path while the rest of +/// the vector operations are left untouched. +LogicalResult convertVectorToNVVMCompatibleMMASync(Operation *rootOp); + /// Convert from vector to GPU ops. -std::unique_ptr createConvertVectorToGPUPass(); +std::unique_ptr createConvertVectorToGPUPass(bool useNvGpu = false); } // namespace mlir diff --git a/mlir/lib/Conversion/PassDetail.h b/mlir/lib/Conversion/PassDetail.h index e05004061dd4dc..530e156024fdc9 100644 --- a/mlir/lib/Conversion/PassDetail.h +++ b/mlir/lib/Conversion/PassDetail.h @@ -55,6 +55,10 @@ namespace LLVM { class LLVMDialect; } // namespace LLVM +namespace nvgpu { +class NVGPUDialect; +} + namespace NVVM { class NVVMDialect; } // namespace NVVM diff --git a/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt b/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt index 06758c5fe12663..778f2c42eebe5f 100644 --- a/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt +++ b/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_conversion_library(MLIRVectorToGPU VectorToGPU.cpp + NvGpuSupport.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/VectorToGPU diff --git a/mlir/lib/Conversion/VectorToGPU/NvGpuSupport.cpp b/mlir/lib/Conversion/VectorToGPU/NvGpuSupport.cpp new file mode 100644 index 00000000000000..a2820c3e88f8c4 --- /dev/null +++ b/mlir/lib/Conversion/VectorToGPU/NvGpuSupport.cpp @@ -0,0 +1,327 @@ +//===- NvGpuSupport.cpp - MLIR Vector to GPU lowering support --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides utilities to assist in the lowering of Vector operations +// to NvGPU dialect MMA operations. +// +//===----------------------------------------------------------------------===// + +#include "NvGpuSupport.h" +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/NVGPU/NVGPUDialect.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" + +namespace mlir { +namespace nvgpu { +namespace { + +/// There are always 4 threads per [128|256|512] bit row. +constexpr int64_t kThreadsPerRow = 4; + +constexpr int64_t kNumRowsPerTile = 8; + +bool isAccumulatorOrResult(MatMulOperandRole operandType) { + return operandType == MatMulOperandRole::C; +} + +/// Returns the number of registers which compose a matrix fragment held by a +/// single thread. +int64_t inferNumRegistersPerMatrixFragment(const WarpMatrixInfo &type) { + int64_t lineSize = inferTileWidthInBits(type); + auto shape = type.vectorType.getShape(); + return (shape[0] / kNumRowsPerTile) * + (shape[1] * type.vectorType.getElementType().getIntOrFloatBitWidth()) / + lineSize; +} + +/// Returns the number of 8 x [128|256|512] bit tiles that compose the given +/// operand shape. +std::array getTileShape(ArrayRef operandShape, + Type elementType, int64_t lineSizeBits) { + // For each 8x128bit square, a thread is responsible for one 32bit register. + return {operandShape[0] / kNumRowsPerTile, + (operandShape[1] * elementType.getIntOrFloatBitWidth()) / + lineSizeBits}; +} + +} // namespace + +FailureOr getWarpMatrixInfo(Operation *op) { + WarpMatrixInfo info; + + // Determine the vector type. + if (vector::TransferWriteOp writeOp = dyn_cast(op)) { + info.vectorType = writeOp.getVectorType(); + } else if (isa(op)) { + info.vectorType = op->getResult(0).getType().cast(); + } else { + return op->emitError() + << "unhandled operation type in nvgpu.mma.sync conversion path"; + } + + // Determine the operand role. We assume it is an accumulator/result unless it + // is directly consumed by a `vector.contract` op. + info.operandRole = MatMulOperandRole::C; + for (Operation *user : op->getUsers()) { + auto contract = dyn_cast(user); + if (!contract) + continue; + if (contract.getLhs() == op->getResult(0)) { + info.operandRole = MatMulOperandRole::A; + break; + } + if (contract.getRhs() == op->getResult(0)) { + info.operandRole = MatMulOperandRole::B; + break; + } + } + return info; +} + +int64_t inferTileWidthInBits(const WarpMatrixInfo &type) { + bool isAcc = isAccumulatorOrResult(type.operandRole); + Type elType = type.vectorType.getElementType(); + if (isAcc && elType.getIntOrFloatBitWidth() == 32) { + return 256; + } + if (elType.getIntOrFloatBitWidth() == 64) { + return isAcc ? 512 : 256; + } + return 128; +} + +FailureOr +getMmaSyncRegisterType(const WarpMatrixInfo &type) { + MLIRContext *ctx = type.vectorType.getContext(); + const bool isAccum = isAccumulatorOrResult(type.operandRole); + + Type elType = type.vectorType.getElementType(); + if (elType.isF16()) { + return FragmentElementInfo{ + LLVM::getFixedVectorType(Float16Type::get(ctx), 2), 2, 32, + inferNumRegistersPerMatrixFragment(type)}; + } + + // f64 operand + Type f64Ty = Float64Type::get(ctx); + if (elType.isF64()) { + return isAccum + ? FragmentElementInfo{LLVM::getFixedVectorType(f64Ty, 2), 2, 128, + inferNumRegistersPerMatrixFragment(type)} + : FragmentElementInfo{f64Ty, 1, 64, + inferNumRegistersPerMatrixFragment(type)}; + } + + // int8 operand + if (elType.isInteger(8)) { + return FragmentElementInfo{ + LLVM::getFixedVectorType(IntegerType::get(ctx, 8), 4), 4, 32, + inferNumRegistersPerMatrixFragment(type)}; + } + // Integer 32bit acc operands + if (elType.isInteger(32)) { + return FragmentElementInfo{ + LLVM::getFixedVectorType(IntegerType::get(ctx, 32), 2), 2, 64, + inferNumRegistersPerMatrixFragment(type)}; + } + + // Floating point 32bit operands + if (elType.isF32()) { + Type f32Ty = Float32Type::get(ctx); + return isAccum + ? FragmentElementInfo{LLVM::getFixedVectorType(f32Ty, 2), 2, 64, + inferNumRegistersPerMatrixFragment(type)} + : FragmentElementInfo{f32Ty, 1, 32, + inferNumRegistersPerMatrixFragment(type)}; + } + return failure(); +} + +static AffineMap getRegisterIndexToTileOffsetMap(int64_t lineSize, + Type elementType, + ArrayRef operandShape, + bool isAccumulator, + int64_t elementsPerRegister, + AffineExpr logicalValueId) { + const int64_t elementsPerLine = + lineSize / elementType.getIntOrFloatBitWidth(); + const std::array num8x128bTiles = + getTileShape(operandShape, elementType, lineSize); + AffineExpr registerIdx = logicalValueId.floorDiv(elementsPerRegister); + return AffineMap::get( + 2, 0, + {(registerIdx % num8x128bTiles[0]) * 8, + (registerIdx.floorDiv(num8x128bTiles[0])) * elementsPerLine}, + elementType.getContext()); +} + +FailureOr +getLaneIdAndValueIdToOperandCoord(Location loc, OpBuilder &builder, + const WarpMatrixInfo &fragmentType) { + Type elementType = fragmentType.vectorType.getElementType(); + ArrayRef operandShape = fragmentType.vectorType.getShape(); + FailureOr regInfo = + getMmaSyncRegisterType(fragmentType); + if (failed(regInfo)) + return failure(); + + const int64_t elementBitWidth = elementType.getIntOrFloatBitWidth(); + const int64_t elementsPerRegister = + regInfo->registerWidthBits / elementBitWidth; + const int64_t lineSize = inferTileWidthInBits(fragmentType); + + AffineExpr laneId, logicalValueIdDim; + bindDims(builder.getContext(), laneId, logicalValueIdDim); + + // Determine what register logicalValueId corresponds to. Use that as a + // linear index into the coordinate mapping `index -> (tile row, tile col)`. + AffineMap registerIndexToTileCoord = getRegisterIndexToTileOffsetMap( + lineSize, elementType, operandShape, + isAccumulatorOrResult(fragmentType.operandRole), elementsPerRegister, + logicalValueIdDim); + + auto makeMap = [&](ArrayRef dimExprs) -> AffineMap { + return AffineMap::get(2, 0, dimExprs, builder.getContext()); + }; + + auto tileRow = registerIndexToTileCoord.getResult(0); + auto tileCol = registerIndexToTileCoord.getResult(1); + return makeMap({tileRow + laneId.floorDiv(kThreadsPerRow), + tileCol + (laneId % kThreadsPerRow) * elementsPerRegister + + (logicalValueIdDim % elementsPerRegister)}); +} + +FailureOr getLdMatrixParams(const WarpMatrixInfo &type, + bool transpose) { + LdMatrixParams params; + Type elType = type.vectorType.getElementType(); + params.fragmentType = type.vectorType; + if (type.operandRole == MatMulOperandRole::A || + type.operandRole == MatMulOperandRole::C) { + params.targetLayout = NVVM::MMALayout::row; + } else { + params.targetLayout = NVVM::MMALayout::col; + } + ArrayRef shape = type.vectorType.getShape(); + params.contiguousDimType = + transpose ? IteratorType::Parallel : IteratorType::Reduction; + + if (params.targetLayout == NVVM::MMALayout::row) { + params.numTiles = (shape[0] / kNumRowsPerTile) * + ((shape[1] * elType.getIntOrFloatBitWidth()) / 128); + } else { + params.numTiles = (shape[1] / kNumRowsPerTile) * + ((shape[0] * elType.getIntOrFloatBitWidth()) / 128); + } + + if (params.numTiles == 0) + return failure(); + + return params; +} + +FailureOr +getLaneIdToLdMatrixMatrixCoord(Location loc, OpBuilder &builder, + const LdMatrixParams ¶ms) { + // One thread per 128b row. + const int64_t kNumThreadsPerTile = kNumRowsPerTile; + const int bitsPerElement = static_cast( + params.fragmentType.getElementType().getIntOrFloatBitWidth()); + const int kElementsPer128b = (128 / bitsPerElement); + ArrayRef operandShape = params.fragmentType.getShape(); + AffineExpr d0 = getAffineDimExpr(0, builder.getContext()); + + auto makeMap = [&](ArrayRef dimExprs) -> AffineMap { + return AffineMap::get(1, 0, dimExprs, builder.getContext()); + }; + + // This case corresponds to row-major A|C or col-major B operands. + if (params.contiguousDimType == IteratorType::Reduction) { + AffineExpr row = d0 % (operandShape[0]); + AffineExpr col = d0.floorDiv(operandShape[0]) * (kElementsPer128b); + return makeMap({row, col}); + } + + // This case Corresponds to col-major A|C or row-major B operands. The + // operandShape given is already pre-transposed (e.g. 8x16 = KxN). + if (params.contiguousDimType == IteratorType::Parallel) { + const int64_t num8x128bCols = (operandShape[0] * bitsPerElement) / 128; + // Threads are assigned in groups of 8 first across columns, then to + // rows. This is transpose of what `ldmatrix` expects, but when + // `ldmatrix` gets the `.trans` qualifier, final the effect will be to + // transpose just the blocks. + auto groupIdx = d0.floorDiv(kNumThreadsPerTile); + auto tileCol = (groupIdx % num8x128bCols); + auto tileRow = groupIdx.floorDiv(num8x128bCols); + return makeMap({tileCol * kElementsPer128b, + tileRow * kNumRowsPerTile + (d0 % kNumRowsPerTile)}); + } + return failure(); +} + +LogicalResult +PrepareContractToGPUMMASync::matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const { + Location loc = op.getLoc(); + Value lhs = op.getLhs(); + Value rhs = op.getRhs(); + Value res = op.getAcc(); + + // Set up the parallel/reduction structure in right form. + using MapList = ArrayRef>; + auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; + AffineExpr m; + AffineExpr n; + AffineExpr k; + bindDims(rewriter.getContext(), m, n, k); + static constexpr std::array perm = {1, 0}; + auto iteratorTypes = op.getIteratorTypes().getValue(); + SmallVector maps = op.getIndexingMaps(); + if (iteratorTypes.size() != 3) + return failure(); + if (!(isParallelIterator(iteratorTypes[0]) && + isParallelIterator(iteratorTypes[1]) && + isReductionIterator(iteratorTypes[2]))) + return failure(); + + // The canonical form is "TNT" = A row-major, B col-major, C row-major. + const auto canonicalForm = infer({{m, k}, {n, k}, {m, n}}); + if (maps == canonicalForm) { + return failure(); + } + if (maps == infer({{m, k}, {k, n}, {m, n}})) { + rhs = rewriter.create(loc, rhs, perm); + } else if (maps == infer({{k, m}, {k, n}, {m, n}})) { + lhs = rewriter.create(loc, lhs, perm); + } else if (maps == infer({{k, m}, {k, n}, {m, n}})) { + rhs = rewriter.create(loc, rhs, perm); + lhs = rewriter.create(loc, lhs, perm); + } else if (maps == infer({{k, m}, {k, n}, {n, m}})) { + std::swap(rhs, lhs); + rhs = rewriter.create(loc, rhs, perm); + lhs = rewriter.create(loc, lhs, perm); + } else if (maps == infer({{k, m}, {n, k}, {n, m}})) { + std::swap(rhs, lhs); + rhs = rewriter.create(loc, rhs, perm); + } else if (maps == infer({{m, k}, {k, n}, {n, m}})) { + std::swap(lhs, rhs); + lhs = rewriter.create(loc, lhs, perm); + } else if (maps == infer({{m, k}, {n, k}, {n, m}})) { + std::swap(lhs, rhs); + } else { + return failure(); + } + rewriter.replaceOpWithNewOp( + op, lhs, rhs, res, rewriter.getAffineMapArrayAttr(canonicalForm), + op.getIteratorTypes()); + return success(); +} + +} // namespace nvgpu +} // namespace mlir \ No newline at end of file diff --git a/mlir/lib/Conversion/VectorToGPU/NvGpuSupport.h b/mlir/lib/Conversion/VectorToGPU/NvGpuSupport.h new file mode 100644 index 00000000000000..9902faa835a6f9 --- /dev/null +++ b/mlir/lib/Conversion/VectorToGPU/NvGpuSupport.h @@ -0,0 +1,100 @@ +//===- NvvmMMASupport.h - MLIR Vector to GPU lowering support --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides utilities to assist in the lowering of Vector operations +// to GPU dialect MMA operations. +// +//===----------------------------------------------------------------------===// +#ifndef MLIR_CONVERSION_VECTORTOGPU_NVGPUSUPPORT_H +#define MLIR_CONVERSION_VECTORTOGPU_NVGPUSUPPORT_H + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Types.h" + +namespace mlir { +namespace nvgpu { + +enum class MatMulOperandRole : int32_t { A = 0, B, C }; + +/// Collects information about a warp-level matrix operand represented by a +/// VectorType. +struct WarpMatrixInfo { + VectorType vectorType; + MatMulOperandRole operandRole; +}; + +/// Given an op that operates on a VectorType representing a warp-level matrix +/// operand, the function returns a struct containing relevant type information. +FailureOr getWarpMatrixInfo(Operation *op); + +/// Returns the number of bits in a single tile row. It is either 128, 256, or +/// 512 bits depending on the data type and` whether the operand is an +/// accumulator/result operand +int64_t inferTileWidthInBits(const WarpMatrixInfo &type); + +/// Specifies information about the registers which compose a matrix fragment +/// according to the PTX documentation. +struct FragmentElementInfo { + Type registerLLVMType; + int64_t elementsPerRegister; + int64_t registerWidthBits; + int64_t numRegistersPerFragment; +}; + +/// Returns a FragmentElementInfo struct describing the register types for the +/// given matrix fragment type. +FailureOr +getMmaSyncRegisterType(const WarpMatrixInfo &type); + +/// Returns an AffineMap which maps a two dimensions representing (laneId, +/// logicalValueId) and returns two results representing offsets within a +/// matrix operand. The offsets point to the values the thread is responsible +/// for (AKA the matrix fragment values) during a warp-collective matrix +/// operation. For a visual reference of this LaneId -> (row, col) mapping, +/// please see NVIDIA's PTX documentation: +/// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-mma +FailureOr +getLaneIdAndValueIdToOperandCoord(Location loc, OpBuilder &builder, + const WarpMatrixInfo &fragmentType); + +struct LdMatrixParams { + VectorType fragmentType; + bool isAccum; + int64_t numTiles; + IteratorType contiguousDimType; + NVVM::MMALayout targetLayout; +}; + +FailureOr getLdMatrixParams(const WarpMatrixInfo &type, + bool transpose); +/// Returns an AffineMap which maps a single dimension representing the laneId +/// to two results representing offsets within the matrix operand that should +/// be the pointer locations a thread should pass to the ldmatrix instruction. +FailureOr +getLaneIdToLdMatrixMatrixCoord(Location loc, OpBuilder &builder, + const LdMatrixParams ¶ms); + +// Transform contract into (m, k)x(n, k)x(m, n) form so that it can be converted +// to MMA matmul. +struct PrepareContractToGPUMMASync + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; +}; + +} // namespace nvgpu +} // namespace mlir + +#endif // MLIR_CONVERSION_VECTORTOGPU_NVGPUSUPPORT_H diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp index 9ed1c3483c11dc..a6e122c3803155 100644 --- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp +++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp @@ -12,6 +12,7 @@ #include +#include "NvGpuSupport.h" #include "mlir/Conversion/VectorToGPU/VectorToGPU.h" #include "../PassDetail.h" @@ -19,6 +20,7 @@ #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/NVGPU/NVGPUDialect.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" @@ -27,11 +29,39 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" +#include "llvm/ADT/TypeSwitch.h" using namespace mlir; +/// For a vector TransferOpType `xferOp`, an empty `indices` vector, and an +/// AffineMap representing offsets to apply to indices, the function fills +/// `indices` with the original indices plus the offsets. The offsets are +/// applied by taking into account the permutation map of the transfer op. If +/// the `offsetMap` has dimension placeholders, those should be provided in +/// `dimValues`. +template +static void getXferIndices(OpBuilder &b, TransferOpType xferOp, + AffineMap offsetMap, ArrayRef dimValues, + SmallVector &indices) { + indices.append(xferOp.getIndices().begin(), xferOp.getIndices().end()); + Location loc = xferOp.getLoc(); + unsigned offsetsIdx = 0; + for (auto expr : xferOp.getPermutationMap().getResults()) { + if (auto dim = expr.template dyn_cast()) { + Value prevIdx = indices[dim.getPosition()]; + SmallVector dims(dimValues.begin(), dimValues.end()); + dims.push_back(prevIdx); + AffineExpr d0 = b.getAffineDimExpr(offsetMap.getNumDims()); + indices[dim.getPosition()] = makeComposedAffineApply( + b, loc, d0 + offsetMap.getResult(offsetsIdx++), dims); + continue; + } + } +} + // Return true if the contract op can be convert to MMA matmul. -static bool contractSupportsMMAMatrixType(vector::ContractionOp contract) { +static bool contractSupportsMMAMatrixType(vector::ContractionOp contract, + bool useNvGpu) { if (llvm::size(contract.getMasks()) != 0) return false; @@ -47,7 +77,10 @@ static bool contractSupportsMMAMatrixType(vector::ContractionOp contract) { // The contract needs to represent a matmul to be able to convert to // MMAMatrix matmul. - if (contract.getIndexingMaps() != infer({{m, k}, {k, n}, {m, n}})) + if (!useNvGpu && + contract.getIndexingMaps() != infer({{m, k}, {k, n}, {m, n}})) + return false; + if (useNvGpu && contract.getIndexingMaps() != infer({{m, k}, {n, k}, {m, n}})) return false; return true; @@ -61,7 +94,7 @@ getMemrefConstantHorizontalStride(ShapedType type) { if (!memrefType) return false; // If the memref is 0 or 1D the horizontal stride is 0. - if(memrefType.getRank() < 2) + if (memrefType.getRank() < 2) return 0; int64_t offset = 0; SmallVector strides; @@ -75,7 +108,8 @@ getMemrefConstantHorizontalStride(ShapedType type) { } // Return true if the transfer op can be converted to a MMA matrix load. -static bool transferReadSupportsMMAMatrixType(vector::TransferReadOp readOp) { +static bool transferReadSupportsMMAMatrixType(vector::TransferReadOp readOp, + bool useNvGpu) { if (readOp.getMask() || readOp.hasOutOfBoundsDim() || readOp.getVectorType().getRank() != 2) return false; @@ -87,9 +121,14 @@ static bool transferReadSupportsMMAMatrixType(vector::TransferReadOp readOp) { AffineExpr zero = b.getAffineConstantExpr(0); auto broadcastInnerDim = AffineMap::get(map.getNumDims(), 0, {zero, innerDim}, readOp.getContext()); - // TODO: Support transpose once it is added to GPU dialect ops. - // For now we only support (d0, d1) -> (d0, d1) and (d0, d1) -> (0, d1). - return !(!map.isMinorIdentity() && map != broadcastInnerDim); + + if (!useNvGpu) { + // TODO: Support transpose once it is added to GPU dialect ops. + // For now we only support (d0, d1) -> (d0, d1) and (d0, d1) -> (0, d1). + return map.isMinorIdentity() || map == broadcastInnerDim; + } + + return true; } // Return true if the transfer op can be converted to a MMA matrix store. @@ -147,15 +186,15 @@ static bool elementwiseSupportsMMAMatrixType(Operation *op) { return convertElementwiseOpToMMA(op).hasValue(); } -static bool supportsMMaMatrixType(Operation *op) { +static bool supportsMMaMatrixType(Operation *op, bool useNvGpu) { if (isa(op)) return true; if (auto transferRead = dyn_cast(op)) - return transferReadSupportsMMAMatrixType(transferRead); + return transferReadSupportsMMAMatrixType(transferRead, useNvGpu); if (auto transferWrite = dyn_cast(op)) return transferWriteSupportsMMAMatrixType(transferWrite); if (auto contract = dyn_cast(op)) - return contractSupportsMMAMatrixType(contract); + return contractSupportsMMAMatrixType(contract, useNvGpu); if (auto constant = dyn_cast(op)) return constantSupportsMMAMatrixType(constant); if (auto broadcast = dyn_cast(op)) @@ -203,7 +242,8 @@ static SetVector getSliceContract(Operation *op, // Analyze slice of operations based on convert op to figure out if the whole // slice can be converted to MMA operations. -static SetVector getOpToConvert(mlir::Operation *op) { +static SetVector getOpToConvert(mlir::Operation *op, + bool useNvGpu) { auto hasVectorDest = [](Operation *op) { return llvm::any_of(op->getResultTypes(), [](Type t) { return t.isa(); }); @@ -221,8 +261,9 @@ static SetVector getOpToConvert(mlir::Operation *op) { // If any instruction cannot use MMA matrix type drop the whole // chain. MMA matrix are stored in an opaque type so they cannot be used // by all operations. - if (llvm::any_of(dependentOps, - [](Operation *op) { return !supportsMMaMatrixType(op); })) + if (llvm::any_of(dependentOps, [useNvGpu](Operation *op) { + return !supportsMMaMatrixType(op, useNvGpu); + })) return; opToConvert.insert(dependentOps.begin(), dependentOps.end()); }); @@ -351,7 +392,7 @@ static const char *inferFragType(OpTy op) { static void convertTransferReadOp(vector::TransferReadOp op, llvm::DenseMap &valueMapping) { assert(op.getTransferRank() > 0 && "unexpected 0-d transfer"); - assert(transferReadSupportsMMAMatrixType(op)); + assert(transferReadSupportsMMAMatrixType(op, /*useNvGpu=*/false)); Optional stride = getMemrefConstantHorizontalStride(op.getShapedType()); AffineMap map = op.getPermutationMap(); @@ -386,6 +427,250 @@ static void convertTransferWriteOp(vector::TransferWriteOp op, op.erase(); } +/// Returns the vector type which represents a matrix fragment. +static VectorType +getMmaSyncVectorOperandType(const nvgpu::FragmentElementInfo ®Info) { + SmallVector shape{regInfo.numRegistersPerFragment, + regInfo.elementsPerRegister}; + Type elType = regInfo.registerLLVMType; + if (auto vecType = elType.dyn_cast()) + elType = vecType.getElementType(); + return VectorType::get(shape, elType); +} + +/// Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. +static LogicalResult +convertConstantOpMmaSync(arith::ConstantOp op, + llvm::DenseMap &valueMapping) { + OpBuilder b(op); + FailureOr warpMatrixInfo = + nvgpu::getWarpMatrixInfo(op); + if (failed(warpMatrixInfo)) + return failure(); + + FailureOr regInfo = + nvgpu::getMmaSyncRegisterType(*warpMatrixInfo); + if (failed(regInfo)) + return failure(); + + VectorType vectorType = getMmaSyncVectorOperandType(*regInfo); + auto dense = op.getValue().dyn_cast(); + if (!dense) + return failure(); + Value result = b.create( + op.getLoc(), vectorType, + DenseElementsAttr::get(vectorType, dense.getSplatValue())); + valueMapping[op.getResult()] = result; + return success(); +} + +static LogicalResult +creatLdMatrixCompatibleLoads(vector::TransferReadOp op, OpBuilder &builder, + llvm::DenseMap &valueMapping) { + Location loc = op->getLoc(); + + FailureOr warpMatrixInfo = + nvgpu::getWarpMatrixInfo(op); + if (failed(warpMatrixInfo)) + return failure(); + + FailureOr regInfo = + nvgpu::getMmaSyncRegisterType(*warpMatrixInfo); + if (failed(regInfo)) + return failure(); + + FailureOr params = nvgpu::getLdMatrixParams( + *warpMatrixInfo, + /*transpose=*/!op.getPermutationMap().isMinorIdentity()); + if (failed(params)) { + return op->emitError() + << "failed to convert vector.transfer_read to ldmatrix; this op " + "likely " + "should not be converted to a nvgpu.ldmatrix call."; + } + + // Adjust the load offset. + auto laneId = builder.create(loc); + FailureOr offsets = + nvgpu::getLaneIdToLdMatrixMatrixCoord(loc, builder, *params); + if (failed(offsets)) + return failure(); + + VectorType vectorType = getMmaSyncVectorOperandType(*regInfo); + + SmallVector indices; + getXferIndices(builder, op, *offsets, {laneId}, + indices); + nvgpu::LdMatrixOp newOp = builder.create( + loc, vectorType, op.getSource(), indices, + !op.getPermutationMap().isMinorIdentity(), params->numTiles); + valueMapping[op] = newOp->getResult(0); + return success(); +} + +static LogicalResult +createNonLdMatrixLoads(vector::TransferReadOp op, OpBuilder &builder, + llvm::DenseMap &valueMapping) { + Location loc = op.getLoc(); + FailureOr warpMatrixInfo = + nvgpu::getWarpMatrixInfo(op); + if (failed(warpMatrixInfo)) + return failure(); + FailureOr regInfo = + nvgpu::getMmaSyncRegisterType(*warpMatrixInfo); + if (failed(regInfo)) { + op->emitError() << "Failed to deduce register fragment type during " + "conversion to distributed non-ldmatrix compatible load"; + return failure(); + } + + NVVM::MMALayout targetLayout = + warpMatrixInfo->operandRole == nvgpu::MatMulOperandRole::B + ? NVVM::MMALayout::col + : NVVM::MMALayout::row; + + Value laneId = builder.create(loc); + SmallVector elements; + + // This is the individual element type. + Type loadedElType = regInfo->registerLLVMType; + VectorType vectorType = getMmaSyncVectorOperandType(*regInfo); + + Value fill = builder.create( + op.getLoc(), vectorType.getElementType(), + builder.getZeroAttr(vectorType.getElementType())); + Value result = builder.create(op.getLoc(), fill, vectorType); + + bool isTransposeLoad = !op.getPermutationMap().isMinorIdentity(); + + // Vectorized loads. + if (!isTransposeLoad && targetLayout == NVVM::MMALayout::row) { + if (!loadedElType.isa()) { + loadedElType = VectorType::get({1}, loadedElType); + } + + for (int i = 0; i < vectorType.getShape()[0]; i++) { + FailureOr coords = nvgpu::getLaneIdAndValueIdToOperandCoord( + op.getLoc(), builder, *warpMatrixInfo); + if (failed(coords)) + return failure(); + Value logicalValueId = builder.create( + loc, builder.getIndexType(), + builder.getIndexAttr(i * regInfo->elementsPerRegister)); + SmallVector newIndices; + getXferIndices( + builder, op, *coords, {laneId, logicalValueId}, newIndices); + + Value el = builder.create(loc, loadedElType, + op.getSource(), newIndices); + result = builder.create(loc, el, result, + builder.getI64ArrayAttr(i)); + } + } else if (isTransposeLoad && targetLayout == NVVM::MMALayout::col) { + if (auto vecType = loadedElType.dyn_cast()) { + loadedElType = vecType.getElementType(); + } + // Load each element individually. + for (int i = 0; i < vectorType.getShape()[0]; i++) { + for (unsigned innerIdx = 0; innerIdx < vectorType.getShape()[1]; + innerIdx++) { + + Value logicalValueId = builder.create( + loc, builder.getIndexType(), + builder.getIndexAttr(i * regInfo->elementsPerRegister + innerIdx)); + FailureOr coords = nvgpu::getLaneIdAndValueIdToOperandCoord( + op.getLoc(), builder, *warpMatrixInfo); + if (failed(coords)) + return failure(); + + SmallVector newIndices; + getXferIndices( + builder, op, *coords, {laneId, logicalValueId}, newIndices); + Value el = builder.create(op.getLoc(), loadedElType, + op.getSource(), newIndices); + result = builder.create( + op.getLoc(), el, result, builder.getI64ArrayAttr({i, innerIdx})); + } + } + } else { + return failure(); + } + + valueMapping[op.getResult()] = result; + return success(); +} + +/// Converts a `vector.transfer_read` operation directly to either a +/// `vector.load` or a `nvgpu.ldmatrix` operation. This function should only be +/// used when converting to `nvgpu.mma.sync` operations. +static LogicalResult +convertTransferReadToLoads(vector::TransferReadOp op, + llvm::DenseMap &valueMapping) { + OpBuilder b(op); + + FailureOr warpMatrixInfo = + nvgpu::getWarpMatrixInfo(op); + if (failed(warpMatrixInfo)) + return failure(); + + bool isLdMatrixCompatible = + op.getSource().getType().cast().getMemorySpaceAsInt() == 3 && + nvgpu::inferTileWidthInBits(*warpMatrixInfo) == 128; + + VectorType vecTy = op.getVectorType(); + int64_t bitWidth = vecTy.getElementType().getIntOrFloatBitWidth(); + + // When we are transposing the B operand, ldmatrix will only work if we have + // at least 8 rows to read and the width to read for the transpose is 128 + // bits. + if (!op.getPermutationMap().isMinorIdentity() && + (vecTy.getDimSize(1) < 8 || vecTy.getDimSize(0) * bitWidth < 128)) + isLdMatrixCompatible = false; + + if (!isLdMatrixCompatible) + return createNonLdMatrixLoads(op, b, valueMapping); + + return creatLdMatrixCompatibleLoads(op, b, valueMapping); +} + +static LogicalResult +convertTransferWriteToStores(vector::TransferWriteOp op, + llvm::DenseMap &valueMapping) { + OpBuilder b(op); + Location loc = op->getLoc(); + Value matrix = valueMapping.find(op.getVector())->second; + + FailureOr warpMatrixInfo = + nvgpu::getWarpMatrixInfo(op); + if (failed(warpMatrixInfo)) + return failure(); + FailureOr regInfo = + nvgpu::getMmaSyncRegisterType(*warpMatrixInfo); + if (failed(regInfo)) + return failure(); + + VectorType vectorType = getMmaSyncVectorOperandType(*regInfo); + Value laneId = b.create(loc); + + for (unsigned i = 0; i < vectorType.getShape()[0]; i++) { + Value logicalValueId = b.create( + loc, b.getIndexType(), + b.getIndexAttr(i * regInfo->elementsPerRegister)); + FailureOr coords = nvgpu::getLaneIdAndValueIdToOperandCoord( + op.getLoc(), b, *warpMatrixInfo); + if (failed(coords)) + return failure(); + + Value el = b.create(loc, matrix, ArrayRef{i}); + SmallVector newIndices; + getXferIndices( + b, op, *coords, {laneId, logicalValueId}, newIndices); + b.create(loc, el, op.getSource(), newIndices); + } + op->erase(); + return success(); +} + static void convertContractOp(vector::ContractionOp op, llvm::DenseMap &valueMapping) { OpBuilder b(op); @@ -397,6 +682,22 @@ static void convertContractOp(vector::ContractionOp op, valueMapping[op.getResult()] = matmul; } +static LogicalResult +convertContractOpToMmaSync(vector::ContractionOp op, + llvm::DenseMap &valueMapping) { + OpBuilder b(op); + Value opA = valueMapping.find(op.getLhs())->second; + Value opB = valueMapping.find(op.getRhs())->second; + Value opC = valueMapping.find(op.getAcc())->second; + int64_t m = op.getLhs().getType().cast().getShape()[0]; + int64_t n = op.getRhs().getType().cast().getShape()[0]; + int64_t k = op.getLhs().getType().cast().getShape()[1]; + Value matmul = b.create( + op.getLoc(), opC.getType(), opA, opB, opC, b.getI64ArrayAttr({m, n, k})); + valueMapping[op.getResult()] = matmul; + return success(); +} + /// Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. static void convertConstantOp(arith::ConstantOp op, llvm::DenseMap &valueMapping) { @@ -509,13 +810,20 @@ static void convertElementwiseOp(Operation *op, gpu::MMAElementwiseOp opType, valueMapping[op->getResult(0)] = newOp; } -void mlir::populatePrepareVectorToMMAPatterns(RewritePatternSet &patterns) { - patterns.add( - patterns.getContext()); +void mlir::populatePrepareVectorToMMAPatterns(RewritePatternSet &patterns, + bool useNvGpu) { + if (!useNvGpu) { + patterns.add( + patterns.getContext()); + return; + } + patterns + .add( + patterns.getContext()); } void mlir::convertVectorToMMAOps(Operation *rootOp) { - SetVector ops = getOpToConvert(rootOp); + SetVector ops = getOpToConvert(rootOp, /*useNvGpu=*/false); llvm::DenseMap valueMapping; for (Operation *op : ops) { if (auto transferRead = dyn_cast(op)) { @@ -538,21 +846,71 @@ void mlir::convertVectorToMMAOps(Operation *rootOp) { } } +LogicalResult mlir::convertVectorToNVVMCompatibleMMASync(Operation *rootOp) { + SetVector ops = getOpToConvert(rootOp, /*useNvGpu=*/true); + llvm::DenseMap valueMapping; + for (Operation *op : ops) { + if (llvm::TypeSwitch(op) + .Case([&](vector::TransferReadOp transferReadOp) { + return convertTransferReadToLoads(transferReadOp, valueMapping); + }) + .Case([&](vector::TransferWriteOp transferWriteOp) { + return convertTransferWriteToStores(transferWriteOp, + valueMapping); + }) + .Case([&](vector::ContractionOp contractionOp) { + return convertContractOpToMmaSync(contractionOp, valueMapping); + }) + .Case([&](scf::ForOp forOp) { + convertForOp(forOp, valueMapping); + return success(); + }) + .Case([&](scf::YieldOp yieldOp) { + convertYieldOp(yieldOp, valueMapping); + return success(); + }) + .Case([&](arith::ConstantOp constOp) { + return convertConstantOpMmaSync(constOp, valueMapping); + }) + .Default([&](Operation *op) { + op->emitError() << "unhandled vector to mma type: " << *op; + return failure(); + }) + .failed()) { + op->emitError() << "Failed to convert op " << *op; + return failure(); + } + } + return success(); +} + namespace { struct ConvertVectorToGPUPass : public ConvertVectorToGPUBase { + + explicit ConvertVectorToGPUPass(bool useNvGpu_) { + useNvGpu.setValue(useNvGpu_); + } + void runOnOperation() override { RewritePatternSet patterns(&getContext()); - populatePrepareVectorToMMAPatterns(patterns); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); + populatePrepareVectorToMMAPatterns(patterns, useNvGpu.getValue()); + if (failed( + applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) + return signalPassFailure(); + + if (useNvGpu.getValue()) { + if (failed(convertVectorToNVVMCompatibleMMASync(getOperation()))) + return signalPassFailure(); + } - convertVectorToMMAOps(getOperation()); + (void)convertVectorToMMAOps(getOperation()); } }; } // namespace -std::unique_ptr mlir::createConvertVectorToGPUPass() { - return std::make_unique(); +std::unique_ptr mlir::createConvertVectorToGPUPass(bool useNvGpu) { + return std::make_unique(useNvGpu); } diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir new file mode 100644 index 00000000000000..be8d08be06ce6a --- /dev/null +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir @@ -0,0 +1,349 @@ +// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(convert-vector-to-gpu{use-nvgpu=true})" | FileCheck %s + +//######################################################### +// INT8 row-row-row +//######################################################### + +// CHECK-DAG: [[$rowA0_map:#.+]] = affine_map<()[s0] -> (s0 mod 16 + 1)> +// CHECK-DAG: [[$colA0_map:#.+]] = affine_map<()[s0] -> ((s0 floordiv 16) * 16 + 1)> + +// CHECK-DAG: [[$rowB0_map:#.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 4) * 16 + 39)> +// CHECK-DAG: [[$colB0_map:#.+]] = affine_map<()[s0] -> (s0 floordiv 4 + 40)> +// CHECK-DAG: [[$rowB1_map:#.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 4) * 16 + 40)> +// CHECK-DAG: [[$rowB2_map:#.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 4) * 16 + 41)> +// CHECK-DAG: [[$rowB3_map:#.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 4) * 16 + 42)> +// CHECK-DAG: [[$rowB4_map:#.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 4) * 16 + 55)> +// CHECK-DAG: [[$rowB5_map:#.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 4) * 16 + 56)> +// CHECK-DAG: [[$rowB6_map:#.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 4) * 16 + 57)> +// CHECK-DAG: [[$rowB7_map:#.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 4) * 16 + 58)> + +// CHECK-DAG: [[$rowC0_map:#.+]] = affine_map<()[s0] -> (s0 floordiv 4 + 49)> +// CHECK-DAG: [[$colC0_map:#.+]] = affine_map<()[s0] -> (s0 * 2 - (s0 floordiv 4) * 8 + 40)> +// CHECK-DAG: [[$rowC8_map:#.+]] = affine_map<()[s0] -> (s0 floordiv 4 + 57)> + + +#map0 = affine_map<(d0, d1) -> (d1, d0)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d1, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1)> + +// CHECK-LABEL: func @m16n8k32_int8_row_row_row +func.func @m16n8k32_int8_row_row_row(%arg0: memref<128x128xi8, 3>, %arg1: memref<128x128xi8, 3>, %arg2: memref<128x128xi32>) { + %cst_0 = arith.constant dense<0> : vector<32x8xi8> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c17 = arith.constant 17 : index + %c39 = arith.constant 39 : index + %c40 = arith.constant 40 : index + %c49 = arith.constant 49 : index + %c50 = arith.constant 50 : index + %cst = arith.constant 0 : i8 + %cst0 = arith.constant 0 : i32 + + // Verify that the operand A is distributed to loads correctly. + + // CHECK: [[row:%.+]] = affine.apply [[$rowA0_map]]()[{{%.+}}] + // CHECK: [[col:%.+]] = affine.apply [[$colA0_map]]()[{{%.+}}] + // CHECK: nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 4 : i32, transpose = false} : memref<128x128xi8, 3> -> vector<4x4xi8> + + // Verify that the operand B is distributed to loads correctly. It's elements + // must be loaded in a non-vectorized manner to do the transpose. + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB0_map]]()[{{%.+}}] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}] + // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3> + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB1_map]]()[{{%.+}}] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}] + // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3> + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB2_map]]()[{{%.+}}] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}] + // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3> + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB3_map]]()[{{%.+}}] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}] + // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3> + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}] + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB4_map]]()[{{%.+}}] + // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3> + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB5_map]]()[{{%.+}}] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}] + // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3> + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB6_map]]()[{{%.+}}] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}] + // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3> + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB7_map]]()[{{%.+}}] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}] + // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3> + // CHECK-NOT: memref.load %arg1 + + // Verify that the operand C is distributed to loads correctly. + // CHECK: [[row:%.+]] = affine.apply [[$rowC0_map]]()[{{%.+}}] + // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[{{%.+}}] + // CHECK: vector.load %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32> + // CHECK: [[row:%.+]] = affine.apply [[$rowC8_map]]()[{{%.+}}] + // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[{{%.+}}] + // CHECK: vector.load %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32> + // CHECK-NOT: vector.load %arg2{{.*}} + + %A = vector.transfer_read %arg0[%c1, %c1], %cst {in_bounds = [true, true]} : memref<128x128xi8, 3>, vector<16x32xi8> + %B = vector.transfer_read %arg1[%c39, %c40], %cst {in_bounds = [true, true], permutation_map = #map0} : memref<128x128xi8, 3>, vector<8x32xi8> + %C = vector.transfer_read %arg2[%c49, %c40], %cst0 {in_bounds = [true, true]} : memref<128x128xi32>, vector<16x8xi32> + // CHECK: [[d:%.+]] = nvgpu.mma.sync({{.*}}) {mmaShape = [16, 8, 32]} : (vector<4x4xi8>, vector<2x4xi8>, vector<2x2xi32>) -> vector<2x2xi32> + %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x32xi8>, vector<8x32xi8> into vector<16x8xi32> + + // CHECK: [[row:%.+]] = affine.apply [[$rowC0_map]]()[{{%.+}}] + // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[{{%.+}}] + // CHECK: vector.store {{%.+}}, %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32> + // CHECK: [[row:%.+]] = affine.apply [[$rowC8_map]]()[{{%.+}}] + // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[{{%.+}}] + // CHECK: vector.store {{%.+}}, %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32> + vector.transfer_write %D, %arg2[%c49, %c40] {in_bounds = [true, true]} : vector<16x8xi32>, memref<128x128xi32> + return +} + +// ----- + +//######################################################### +// f64 row-row-row +//######################################################### +// CHECK-DAG: [[$rowA0_map:#.+]] = affine_map<()[s0] -> (s0 floordiv 4 + 1)> +// CHECK-DAG: [[$colA0_map:#.+]] = affine_map<()[s0] -> (s0 mod 4 + 1)> + +// CHECK-DAG: [[$rowb0_map:#.+]] = affine_map<()[s0] -> (s0 mod 4 + 39)> +// CHECK-DAG: [[$colb0_map:#.+]] = affine_map<()[s0] -> (s0 floordiv 4 + 40)> + +// CHECK-DAG: [[$rowC0_map:#.+]] = affine_map<()[s0] -> (s0 floordiv 4 + 49)> +// CHECK-DAG: [[$colC0_map:#.+]] = affine_map<()[s0] -> (s0 * 2 - (s0 floordiv 4) * 8 + 40) + +#map0 = affine_map<(d0, d1) -> (d1, d0)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d1, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1)> + +// CHECK-LABEL: func @m8n8k4_f64_row_row_row +func.func @m8n8k4_f64_row_row_row(%arg0: memref<128x128xf64>, %arg1: memref<128x128xf64>, %arg2: memref<128x128xf64>) { + %cst_0 = arith.constant dense<0.0> : vector<4x8xf64> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c17 = arith.constant 17 : index + %c39 = arith.constant 39 : index + %c40 = arith.constant 40 : index + %c49 = arith.constant 49 : index + %c50 = arith.constant 50 : index + %cst = arith.constant 0.0 : f64 + %cst0 = arith.constant 0.0 : f64 + + // Verify that the operand A is distributed to loads correctly. + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowA0_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA0_map]] + // CHECK: vector.load %arg0[[[row]], [[col]]] : memref<128x128xf64>, vector<1xf64> + + // Verify that the operand B is distributed to loads correctly. It's elements + // must be loaded in a non-vectorized manner to do the transpose. + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowb0_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colb0_map]] + // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xf64> + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowC0_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colC0_map]] + // CHECK: vector.load %arg2[[[row]], [[col]]] : memref<128x128xf64>, vector<2xf64> + + %A = vector.transfer_read %arg0[%c1, %c1], %cst {in_bounds = [true, true]} : memref<128x128xf64>, vector<8x4xf64> + %B = vector.transfer_read %arg1[%c39, %c40], %cst {in_bounds = [true, true], permutation_map = #map0} : memref<128x128xf64>, vector<8x4xf64> + %C = vector.transfer_read %arg2[%c49, %c40], %cst0 {in_bounds = [true, true]} : memref<128x128xf64>, vector<8x8xf64> + // CHECK: [[d:%.+]] = nvgpu.mma.sync({{.*}}) {mmaShape = [8, 8, 4]} : (vector<1x1xf64>, vector<1x1xf64>, vector<1x2xf64>) -> vector<1x2xf64> + %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<8x4xf64>, vector<8x4xf64> into vector<8x8xf64> + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowC0_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colC0_map]] + // CHECK: vector.store {{%.+}}, %arg2[[[row]], [[col]]] : memref<128x128xf64>, vector<2xf64> + vector.transfer_write %D, %arg2[%c49, %c40] {in_bounds = [true, true]} : vector<8x8xf64>, memref<128x128xf64> + return +} + +// ----- + +//######################################################### +// FP16 row-row-row +//######################################################### + +#map0 = affine_map<(d0, d1) -> (d1, d0)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d1, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1)> + +// CHECK-DAG: [[$rowA_map:#.+]] = affine_map<()[s0] -> (s0 mod 16 + 1)> +// CHECK-DAG: [[$colA_map:#.+]] = affine_map<()[s0] -> ((s0 floordiv 16) * 8 + 3)> + +// CHECK-DAG: [[$rowB_map:#.+]] = affine_map<()[s0] -> (s0 + 3)> +// CHECK-DAG: [[$colB_map:#.+]] = affine_map<() -> (3)> + +// CHECK-LABEL: func @m16n8k16_fp16_row_row_row +func.func @m16n8k16_fp16_row_row_row(%arg0: memref<20x20xf16, 3>, %arg1: memref<20x20xf16, 3>, %arg2: memref<20x20xf16, 3>) { + %cst_0 = arith.constant dense<0.000000e+00> : vector<16x8xf16> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %cst = arith.constant 0.000000e+00 : f16 + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowA_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]] + // CHECK: nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 4 : i32, transpose = false} + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB_map]] + // CHECK: nvgpu.ldmatrix %arg1[[[row]], [[col]]] {numTiles = 2 : i32, transpose = true} + %A = vector.transfer_read %arg0[%c1, %c3], %cst {in_bounds = [true, true]} : memref<20x20xf16, 3>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c3, %c3], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<20x20xf16, 3>, vector<8x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<20x20xf16, 3>, vector<16x8xf16> + %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, memref<20x20xf16, 3> + return +} + +// ----- + +// CHECK-DAG: [[$Arow_map:#.+]] = affine_map<()[s0] -> (s0 mod 16 + 1)> +// CHECK-DAG: [[$Acol_map:#.+]] = affine_map<()[s0] -> ((s0 floordiv 16) * 8 + 3)> +// CHECK-DAG: [[$Bcol_map:#.+]] = affine_map<() -> (3)> +// CHECK-DAG: [[$Brow_map:#.+]] = affine_map<()[s0] -> (s0 + 3)> + +#map0 = affine_map<(d0, d1, d2) -> (d2, d1)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d1, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1)> + +// CHECK-LABEL: func @batch_m16n8k16_fp16_row_row_row +func.func @batch_m16n8k16_fp16_row_row_row(%arg0: memref<2x20x20xf16, 3>, %arg1: memref<2x20x20xf16, 3>, %arg2: memref<2x20x20xf16, 3>) { + %cst_0 = arith.constant dense<0.000000e+00> : vector<20x20xf16> + // CHECK: [[C0:%.+]] = arith.constant 0 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %cst = arith.constant 0.000000e+00 : f16 + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$Arow_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$Acol_map]] + // CHECK: nvgpu.ldmatrix %arg0[[[C0]], [[row]], [[col]]] {numTiles = 4 : i32, transpose = false} : memref<2x20x20xf16, 3> -> vector<4x2xf16> + %A = vector.transfer_read %arg0[%c0, %c1, %c3], %cst {in_bounds = [true, true]} : memref<2x20x20xf16, 3>, vector<16x16xf16> + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$Brow_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$Bcol_map]] + // CHECK: nvgpu.ldmatrix %arg1[[[C0]], [[row]], [[col]]] {numTiles = 2 : i32, transpose = true} : memref<2x20x20xf16, 3> -> vector<2x2xf16> + %B = vector.transfer_read %arg1[%c0, %c3, %c3], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<2x20x20xf16, 3>, vector<8x16xf16> + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$Arow_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$Acol_map]] + // CHECK: nvgpu.ldmatrix %arg2[[[C0]], [[row]], [[col]]] {numTiles = 2 : i32, transpose = false} : memref<2x20x20xf16, 3> -> vector<2x2xf16> + %C = vector.transfer_read %arg2[%c0, %c1, %c3], %cst {in_bounds = [true, true]} : memref<2x20x20xf16, 3>, vector<16x8xf16> + %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> + vector.transfer_write %D, %arg2[%c0, %c1, %c3] {in_bounds = [true, true]} : vector<16x8xf16>, memref<2x20x20xf16, 3> + return +} + +// ----- + +//######################################################### +// FP16 row-col-row +//######################################################### + +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d1, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1)> + +// CHECK: [[$rowA_map:#.+]] = affine_map<()[s0] -> (s0 mod 16 + 1)> +// CHECK: [[$colA_map:#.+]] = affine_map<()[s0] -> ((s0 floordiv 16) * 8 + 3)> + +// CHECK: [[$rowB_map:#.+]] = affine_map<()[s0] -> (s0 mod 8 + 1)> +// CHECK: [[$colB_map:#.+]] = affine_map<()[s0] -> ((s0 floordiv 8) * 8 + 3)> + +// CHECK-LABEL: func @m16n8k16_fp16_row_col_row +func.func @m16n8k16_fp16_row_col_row(%arg0: memref<20x20xf16, 3>, %arg1: memref<20x20xf16, 3>, %arg2: memref<20x20xf16, 3>) { + %cst_0 = arith.constant dense<0.000000e+00> : vector<16x8xf16> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %cst = arith.constant 0.000000e+00 : f16 + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowA_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]] + // CHECK: nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 4 : i32 + // CHECK-SAME: transpose = false + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB_map]] + // CHECK: nvgpu.ldmatrix %arg1[[[row]], [[col]]] {numTiles = 2 : i32 + // CHECK-SAME: transpose = false + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowA_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]] + // CHECK: nvgpu.ldmatrix %arg2[[[row]], [[col]]] {numTiles = 2 : i32 + // CHECK-SAME: transpose = false + %A = vector.transfer_read %arg0[%c1, %c3], %cst {in_bounds = [true, true]} : memref<20x20xf16, 3>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c1, %c3], %cst {in_bounds = [true, true]} : memref<20x20xf16, 3>, vector<8x16xf16> + %C = vector.transfer_read %arg2[%c1, %c3], %cst {in_bounds = [true, true]} : memref<20x20xf16, 3>, vector<16x8xf16> + %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, memref<20x20xf16, 3> + return +} + +// ----- + +//######################################################### +// TF32 (multiplicand) F32 (accumulator) row-row-row +//######################################################### + +#map0 = affine_map<(d0, d1) -> (d1, d0)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d1, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1)> + +// CHECK-DAG: [[$rowA_map:#.+]] = affine_map<()[s0] -> (s0 mod 16 + 1)> +// CHECK-DAG: [[$colA_map:#.+]] = affine_map<()[s0] -> ((s0 floordiv 16) * 4 + 3)> + +// CHECK-DAG: [[$rowB_map:#.+]] = affine_map<()[s0] -> (s0 mod 4 + 3)> +// CHECK-DAG: [[$colB_map:#.+]] = affine_map<()[s0] -> (s0 floordiv 4 + 3)> + +// CHECK-DAG: [[$rowC_map:#.+]] = affine_map<()[s0] -> (s0 floordiv 4)> +// CHECK-DAG: [[$rowC8_map:#.+]] = affine_map<()[s0] -> (s0 floordiv 4 + 8)> +// CHECK-DAG: [[$colC_map:#.+]] = affine_map<()[s0] -> (s0 * 2 - (s0 floordiv 4) * 8)> + +// CHECK-LABEL: func @m16n8k4_tf32_f32_row_row_row +func.func @m16n8k4_tf32_f32_row_row_row(%arg0: memref<20x20xf32, 3>, %arg1: memref<20x20xf32, 3>, %arg2: memref<20x20xf32>) { + %cst_0 = arith.constant dense<0.000000e+00> : vector<16x8xf32> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %cst = arith.constant 0.000000e+00 : f32 + + // CHECK: [[c_frag:%.+]] = arith.constant {{.*}} : vector<2x2xf32> + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowA_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]] + // CHECK: [[a_frag:%.+]] = nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 2 : i32, transpose = false} + + // b and c are not loaded by ldmatrix in this test. + // CHECK-NOT: nvgpu.ldmatrix + + // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB_map]] + // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB_map]] + // CHECK: [[b_el:%.+]] = memref.load {{%.+}} : memref<20x20xf32, 3> + // CHECK: [[b_frag:%.+]] = vector.insert [[b_el]], {{.*}} : f32 into vector<1x1xf32> + + // CHECK: [[d_frag:%.+]] = nvgpu.mma.sync([[a_frag]], [[b_frag]], [[c_frag]]) + // CHECK-SAME: mmaShape = [16, 8, 4] + // CHECK-SAME: -> vector<2x2xf32> + %A = vector.transfer_read %arg0[%c1, %c3], %cst {in_bounds = [true, true]} : memref<20x20xf32, 3>, vector<16x4xf32> + %B = vector.transfer_read %arg1[%c3, %c3], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<20x20xf32, 3>, vector<8x4xf32> + %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %cst_0 : vector<16x4xf32>, vector<8x4xf32> into vector<16x8xf32> + + // CHECK: vector.extract [[d_frag]][0] : vector<2x2xf32> + // CHECK: affine.apply [[$rowC_map]] + // CHECK: affine.apply [[$colC_map]] + // CHECK: vector.store + // CHECK: vector.extract [[d_frag]][1] : vector<2x2xf32> + // CHECK: affine.apply [[$rowC8_map]] + // CHECK: affine.apply [[$colC_map]] + // CHECK: vector.store + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf32>, memref<20x20xf32> + return +} From 322e2a3b40fa5f6bfcb868e827960c812b185710 Mon Sep 17 00:00:00 2001 From: David Goldman Date: Thu, 28 Apr 2022 15:13:21 -0400 Subject: [PATCH 100/908] [clangd][ObjC] Filter ObjC method completions on the remaining selector Previously, clangd would filter completions only on the first part of the selector (first typed chunk) instead of all remaining selector fragments (all typed chunks). Differential Revision: https://reviews.llvm.org/D124637 --- clang-tools-extra/clangd/CodeComplete.cpp | 19 +++++++++++--- clang-tools-extra/clangd/CodeComplete.h | 4 +++ .../clangd/unittests/CodeCompleteTests.cpp | 26 +++++++++++++++++++ .../include/clang/Sema/CodeCompleteConsumer.h | 5 +++- clang/lib/Sema/CodeCompleteConsumer.cpp | 9 +++++++ 5 files changed, 59 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 920fb77f07dee6..527b6cdc304dbd 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -303,6 +303,7 @@ struct CodeCompletionBuilder { assert(ASTCtx); Completion.Origin |= SymbolOrigin::AST; Completion.Name = std::string(llvm::StringRef(SemaCCS->getTypedText())); + Completion.FilterText = SemaCCS->getAllTypedText(); if (Completion.Scope.empty()) { if ((C.SemaResult->Kind == CodeCompletionResult::RK_Declaration) || (C.SemaResult->Kind == CodeCompletionResult::RK_Pattern)) @@ -335,6 +336,8 @@ struct CodeCompletionBuilder { Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind); if (Completion.Name.empty()) Completion.Name = std::string(C.IndexResult->Name); + if (Completion.FilterText.empty()) + Completion.FilterText = Completion.Name; // If the completion was visible to Sema, no qualifier is needed. This // avoids unneeded qualifiers in cases like with `using ns::X`. if (Completion.RequiredQualifier.empty() && !C.SemaResult) { @@ -352,6 +355,7 @@ struct CodeCompletionBuilder { Completion.Origin |= SymbolOrigin::Identifier; Completion.Kind = CompletionItemKind::Text; Completion.Name = std::string(C.IdentifierResult->Name); + Completion.FilterText = Completion.Name; } // Turn absolute path into a literal string that can be #included. @@ -860,7 +864,15 @@ struct CompletionRecorder : public CodeCompleteConsumer { return Result.Pattern->getTypedText(); } auto *CCS = codeCompletionString(Result); - return CCS->getTypedText(); + const CodeCompletionString::Chunk *OnlyText = nullptr; + for (auto &C : *CCS) { + if (C.Kind != CodeCompletionString::CK_TypedText) + continue; + if (OnlyText) + return CCAllocator->CopyString(CCS->getAllTypedText()); + OnlyText = &C; + } + return OnlyText ? OnlyText->Text : llvm::StringRef(); } // Build a CodeCompletion string for R, which must be from Results. @@ -1980,6 +1992,7 @@ CodeCompleteResult codeCompleteComment(PathRef FileName, unsigned Offset, continue; CodeCompletion Item; Item.Name = Name.str() + "="; + Item.FilterText = Item.Name; Item.Kind = CompletionItemKind::Text; Result.Completions.push_back(Item); } @@ -2118,8 +2131,8 @@ CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const { Doc.append(*Documentation); LSP.documentation = renderDoc(Doc, Opts.DocumentationFormat); } - LSP.sortText = sortText(Score.Total, Name); - LSP.filterText = Name; + LSP.sortText = sortText(Score.Total, FilterText); + LSP.filterText = FilterText; LSP.textEdit = {CompletionTokenRange, RequiredQualifier + Name}; // Merge continuous additionalTextEdits into main edit. The main motivation // behind this is to help LSP clients, it seems most of them are confused when diff --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h index b76dd642943a90..73139ba40e7655 100644 --- a/clang-tools-extra/clangd/CodeComplete.h +++ b/clang-tools-extra/clangd/CodeComplete.h @@ -155,6 +155,10 @@ struct CodeCompleteOptions { struct CodeCompletion { // The unqualified name of the symbol or other completion item. std::string Name; + // The name of the symbol for filtering and sorting purposes. Typically the + // same as `Name`, but may be different e.g. for ObjC methods, `Name` is the + // first selector fragment but the `FilterText` is the entire selector. + std::string FilterText; // The scope qualifier for the symbol name. e.g. "ns1::ns2::" // Empty for non-symbol completions. Not inserted, but may be displayed. std::string Scope; diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 2e42786f76ad6d..316fcb6293b12e 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -58,6 +58,7 @@ MATCHER_P(scopeRefs, Refs, "") { return arg.ScopeRefsInFile == Refs; } MATCHER_P(nameStartsWith, Prefix, "") { return llvm::StringRef(arg.Name).startswith(Prefix); } +MATCHER_P(filterText, F, "") { return arg.FilterText == F; } MATCHER_P(scope, S, "") { return arg.Scope == S; } MATCHER_P(qualifier, Q, "") { return arg.RequiredQualifier == Q; } MATCHER_P(labeled, Label, "") { @@ -1918,6 +1919,7 @@ TEST(CompletionTest, QualifiedNames) { TEST(CompletionTest, Render) { CodeCompletion C; C.Name = "x"; + C.FilterText = "x"; C.Signature = "(bool) const"; C.SnippetSuffix = "(${0:bool})"; C.ReturnType = "int"; @@ -1950,6 +1952,11 @@ TEST(CompletionTest, Render) { EXPECT_FALSE(R.deprecated); EXPECT_EQ(R.score, .5f); + C.FilterText = "xtra"; + R = C.render(Opts); + EXPECT_EQ(R.filterText, "xtra"); + EXPECT_EQ(R.sortText, sortText(1.0, "xtra")); + Opts.EnableSnippets = true; R = C.render(Opts); EXPECT_EQ(R.insertText, "Foo::x(${0:bool})"); @@ -3051,6 +3058,25 @@ TEST(CompletionTest, ObjectiveCMethodTwoArgumentsFromMiddle) { EXPECT_THAT(C, ElementsAre(snippetSuffix("${1:(unsigned int)}"))); } +TEST(CompletionTest, ObjectiveCMethodFilterOnEntireSelector) { + auto Results = completions(R"objc( + @interface Foo + + (id)player:(id)player willRun:(id)run; + @end + id val = [Foo wi^] + )objc", + /*IndexSymbols=*/{}, + /*Opts=*/{}, "Foo.m"); + + auto C = Results.Completions; + EXPECT_THAT(C, ElementsAre(named("player:"))); + EXPECT_THAT(C, ElementsAre(filterText("player:willRun:"))); + EXPECT_THAT(C, ElementsAre(kind(CompletionItemKind::Method))); + EXPECT_THAT(C, ElementsAre(returnType("id"))); + EXPECT_THAT(C, ElementsAre(signature("(id) willRun:(id)"))); + EXPECT_THAT(C, ElementsAre(snippetSuffix("${1:(id)} willRun:${2:(id)}"))); +} + TEST(CompletionTest, ObjectiveCSimpleMethodDeclaration) { auto Results = completions(R"objc( @interface Foo diff --git a/clang/include/clang/Sema/CodeCompleteConsumer.h b/clang/include/clang/Sema/CodeCompleteConsumer.h index 41c495882b27b5..3869fb5b839881 100644 --- a/clang/include/clang/Sema/CodeCompleteConsumer.h +++ b/clang/include/clang/Sema/CodeCompleteConsumer.h @@ -606,9 +606,12 @@ class CodeCompletionString { return begin()[I]; } - /// Returns the text in the TypedText chunk. + /// Returns the text in the first TypedText chunk. const char *getTypedText() const; + /// Returns the combined text from all TypedText chunks. + std::string getAllTypedText() const; + /// Retrieve the priority of this code completion result. unsigned getPriority() const { return Priority; } diff --git a/clang/lib/Sema/CodeCompleteConsumer.cpp b/clang/lib/Sema/CodeCompleteConsumer.cpp index 927b067797275d..8e8a1be38c0f57 100644 --- a/clang/lib/Sema/CodeCompleteConsumer.cpp +++ b/clang/lib/Sema/CodeCompleteConsumer.cpp @@ -346,6 +346,15 @@ const char *CodeCompletionString::getTypedText() const { return nullptr; } +std::string CodeCompletionString::getAllTypedText() const { + std::string Res; + for (const Chunk &C : *this) + if (C.Kind == CK_TypedText) + Res += C.Text; + + return Res; +} + const char *CodeCompletionAllocator::CopyString(const Twine &String) { SmallString<128> Data; StringRef Ref = String.toStringRef(Data); From 28b6d412afc52d46e44960de429e6688826d9f4f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 19 May 2022 19:33:33 -0700 Subject: [PATCH 101/908] [mlir][sparse] add support for complex zero/one building Reviewed By: bixia Differential Revision: https://reviews.llvm.org/D126039 --- .../SparseTensor/Transforms/CMakeLists.txt | 1 + .../SparseTensor/Transforms/CodegenUtils.cpp | 2 ++ .../SparseTensor/Transforms/CodegenUtils.h | 18 +++++++++-- .../Transforms/SparseTensorPasses.cpp | 4 ++- .../test/Dialect/SparseTensor/conversion.mlir | 32 +++++++++++++++++-- .../llvm-project-overlay/mlir/BUILD.bazel | 1 + 6 files changed, 51 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt index 51d7bd4fa8728c..7e7ed55416360c 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt @@ -13,6 +13,7 @@ add_mlir_dialect_library(MLIRSparseTensorTransforms LINK_LIBS PUBLIC MLIRArithmetic MLIRBufferization + MLIRComplex MLIRFunc MLIRIR MLIRLLVMIR diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 5eb0785baf9382..cf574fdabb7aeb 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -189,5 +189,7 @@ Value mlir::sparse_tensor::genIsNonzero(OpBuilder &builder, mlir::Location loc, if (tp.isIntOrIndex()) return builder.create(loc, arith::CmpIPredicate::ne, v, zero); + if (tp.dyn_cast()) + return builder.create(loc, v, zero); llvm_unreachable("Non-numeric type"); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 944605bab4b9fa..ba897f7f7c14be 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -14,6 +14,7 @@ #define MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_CODEGENUTILS_H_ #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/ExecutionEngine/SparseTensorUtils.h" #include "mlir/IR/Builders.h" @@ -102,16 +103,27 @@ Value genIsNonzero(OpBuilder &builder, Location loc, Value v); //===----------------------------------------------------------------------===// /// Generates a 0-valued constant of the given type. In addition to -/// the scalar types (`FloatType`, `IndexType`, `IntegerType`), this also -/// works for `RankedTensorType` and `VectorType` (for which it generates -/// a constant `DenseElementsAttr` of zeros). +/// the scalar types (`ComplexType`, ``FloatType`, `IndexType`, `IntegerType`), +/// this also works for `RankedTensorType` and `VectorType` (for which it +/// generates a constant `DenseElementsAttr` of zeros). inline Value constantZero(OpBuilder &builder, Location loc, Type tp) { + if (auto ctp = tp.dyn_cast()) { + auto zeroe = builder.getZeroAttr(ctp.getElementType()); + auto zeroa = builder.getArrayAttr({zeroe, zeroe}); + return builder.create(loc, tp, zeroa); + } return builder.create(loc, tp, builder.getZeroAttr(tp)); } /// Generates a 1-valued constant of the given type. This supports all /// the same types as `constantZero`. inline Value constantOne(OpBuilder &builder, Location loc, Type tp) { + if (auto ctp = tp.dyn_cast()) { + auto zeroe = builder.getZeroAttr(ctp.getElementType()); + auto onee = getOneAttr(builder, ctp.getElementType()); + auto zeroa = builder.getArrayAttr({onee, zeroe}); + return builder.create(loc, tp, zeroa); + } return builder.create(loc, tp, getOneAttr(builder, tp)); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index 2b957e6fbad608..8c1f296a39e98c 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -9,6 +9,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Func/Transforms/FuncConversions.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -112,7 +113,8 @@ struct SparseTensorConversionPass // The following operations and dialects may be introduced by the // rewriting rules, and are therefore marked as legal. target.addLegalOp(); target .addLegalDialect) -> tensor) -> !llvm.ptr +// CHECK-SAME: %[[A:.*]]: tensor) -> !llvm.ptr { // CHECK-DAG: %[[EmptyCOO:.*]] = arith.constant 4 : i32 // CHECK-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[I0:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[U:.*]] = tensor.dim %[[A]], %[[C0]] : tensor @@ -191,8 +192,11 @@ func.func @sparse_nop_cast(%arg0: tensor<64xf32, #SparseVector>) -> tensor to memref // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[U]] step %[[C1]] { // CHECK: %[[E:.*]] = tensor.extract %[[A]][%[[I]]] : tensor -// CHECK: memref.store %[[I]], %[[M]][%[[C0]]] : memref<1xindex> -// CHECK: call @addEltI32(%[[C]], %[[E]], %[[T]], %[[Z]]) +// CHECK: %[[N:.*]] = arith.cmpi ne, %[[E]], %[[I0]] : i32 +// CHECK: scf.if %[[N]] { +// CHECK: memref.store %[[I]], %[[M]][%[[C0]]] : memref<1xindex> +// CHECK: call @addEltI32(%[[C]], %[[E]], %[[T]], %[[Z]]) +// CHECK: } // CHECK: } // CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) // CHECK: call @delSparseTensorCOOI32(%[[C]]) @@ -202,6 +206,28 @@ func.func @sparse_convert_1d(%arg0: tensor) -> tensor } +// CHECK-LABEL: func @sparse_convert_complex( +// CHECK-SAME: %[[A:.*]]: tensor<100xcomplex>) -> !llvm.ptr { +// CHECK-DAG: %[[CC:.*]] = complex.constant [0.000000e+00, 0.000000e+00] : complex +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C100:.*]] = arith.constant 100 : index +// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] { +// CHECK: %[[E:.*]] = tensor.extract %[[A]][%[[I]]] : tensor<100xcomplex> +// CHECK: %[[N:.*]] = complex.neq %[[E]], %[[CC]] : complex +// CHECK: scf.if %[[N]] { +// CHECK: memref.store %[[I]], %{{.*}}[%[[C0]]] : memref<1xindex> +// CHECK: call @addEltC64 +// CHECK: } +// CHECK: } +// CHECK: %[[T:.*]] = call @newSparseTensor +// CHECK: call @delSparseTensorCOOC64 +// CHECK: return %[[T]] : !llvm.ptr +func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100xcomplex, #SparseVector> { + %0 = sparse_tensor.convert %arg0 : tensor<100xcomplex> to tensor<100xcomplex, #SparseVector> + return %0 : tensor<100xcomplex, #SparseVector> +} + // CHECK-LABEL: func @sparse_convert_1d_ss( // CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32 diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 53d8467739ef7e..3bb09cb168c13c 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2033,6 +2033,7 @@ cc_library( ":Affine", ":ArithmeticDialect", ":BufferizationDialect", + ":ComplexDialect", ":FuncDialect", ":FuncTransforms", ":IR", From 73bd60b8431087bc4d527de3f40ae0178e23b52f Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Fri, 20 May 2022 18:03:20 +0200 Subject: [PATCH 102/908] Adjust BUILD files for [MLIR][GPU] Add NvGpu mma.sync path to the VectorToGPU pass --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 3bb09cb168c13c..bc250d1ba6c0b8 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -7608,6 +7608,8 @@ cc_library( ":IR", ":LLVMDialect", ":MemRefDialect", + ":NVGPU", + ":NVVMDialect", ":Pass", ":SCFDialect", ":Support", @@ -8560,8 +8562,8 @@ cc_library( td_library( name = "MLProgramOpsTdFiles", srcs = [ - "include/mlir/Dialect/MLProgram/IR/MLProgramBase.td", "include/mlir/Dialect/MLProgram/IR/MLProgramAttributes.td", + "include/mlir/Dialect/MLProgram/IR/MLProgramBase.td", "include/mlir/Dialect/MLProgram/IR/MLProgramOps.td", ], includes = ["include"], From cf31db35a7350f73058681f377124acbcbe8220b Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Fri, 20 May 2022 18:08:29 +0200 Subject: [PATCH 103/908] Adjust BUILD files for [ifs] Switch to using OptTable --- utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index f10c76990dac09..e0ee148b87d033 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -2957,6 +2957,18 @@ cc_binary( ], ) +gentbl( + name = "IfsOptionsTableGen", + strip_include_prefix = "tools/llvm-ifs", + tbl_outs = [( + "-gen-opt-parser-defs", + "tools/llvm-ifs/Opts.inc", + )], + tblgen = ":llvm-tblgen", + td_file = "tools/llvm-ifs/Opts.td", + td_srcs = ["include/llvm/Option/OptParser.td"], +) + cc_binary( name = "llvm-ifs", srcs = glob([ @@ -2966,8 +2978,10 @@ cc_binary( copts = llvm_copts, stamp = 0, deps = [ + ":IfsOptionsTableGen", ":InterfaceStub", ":ObjectYAML", + ":Option", ":Support", ":TextAPI", ], From 30628b0eccf89c36dbd176e925c7fc5c3bfa519d Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Fri, 20 May 2022 17:45:45 +0200 Subject: [PATCH 104/908] Use the public clang::Builtin API in the unit test --- clang/unittests/CodeGen/CheckTargetFeaturesTest.cpp | 5 ++--- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/unittests/CodeGen/CheckTargetFeaturesTest.cpp b/clang/unittests/CodeGen/CheckTargetFeaturesTest.cpp index b234ca6bcb0fe5..8fdb3d66d8cf83 100644 --- a/clang/unittests/CodeGen/CheckTargetFeaturesTest.cpp +++ b/clang/unittests/CodeGen/CheckTargetFeaturesTest.cpp @@ -1,4 +1,4 @@ -#include "../lib/Basic/BuiltinTargetFeatures.h" +#include "clang/Basic/Builtins.h" #include "gtest/gtest.h" using namespace llvm; @@ -11,8 +11,7 @@ TEST(CheckTargetFeaturesTest, checkBuiltinFeatures) { StringMap SM; for (StringRef F : Features) SM.insert(std::make_pair(F, true)); - clang::Builtin::TargetFeatures TF(SM); - return TF.hasRequiredFeatures(BuiltinFeatures); + return clang::Builtin::evaluateRequiredTargetFeatures(BuiltinFeatures, SM); }; // Make sure the basic function ',' and '|' works correctly ASSERT_FALSE(doCheck("A,B,C,D", "A")); diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index b9e8df560d294e..eb900106c21e6d 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -403,7 +403,6 @@ cc_library( ]), hdrs = glob([ "include/clang/Basic/*.h", - "lib/Basic/BuiltinTargetFeatures.h", ]), copts = [ "-DHAVE_VCS_VERSION_INC", From 9af56c676e40efa551e899675e902cbb3f0db0b6 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 19 May 2022 17:46:54 +0100 Subject: [PATCH 105/908] [AMDGPU] Mark SMEM cache invalidations as not reading memory This brings the MachineInstrs in line with the corresponding intrinsics which have side effects but do not access memory. It also matches how BUF cache invalidation instructions are defined. The lit test changes are just because the machine scheduler previously treated them like loads, and added an artificial scheduling edge from them to the exit SU, which caused them to be scheduled earlier. Differential Revision: https://reviews.llvm.org/D126074 --- llvm/lib/Target/AMDGPU/SMInstructions.td | 1 + llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll | 2 +- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll | 2 +- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index d21f3d9a6fccc7..ffbc0130ab936f 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -220,6 +220,7 @@ class SM_Time_Pseudo : SM_Pse class SM_Inval_Pseudo : SM_Pseudo< opName, (outs), (ins), "", [(node)]> { let hasSideEffects = 1; + let mayLoad = 0; let mayStore = 0; let has_sdst = 0; let has_sbase = 0; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll index e8a363adde7341..41f398f685f941 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll @@ -16,7 +16,7 @@ define amdgpu_kernel void @test_s_dcache_inv_vol() #0 { ; GCN-LABEL: {{^}}test_s_dcache_inv_vol_insert_wait: ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_dcache_inv_vol +; GCN: s_dcache_inv_vol ; GCN: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_inv_vol_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.inv.vol() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll index 254a0fae3c3b73..9026d4831ad794 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll @@ -14,7 +14,7 @@ define amdgpu_kernel void @test_s_dcache_wb() #0 { ; VI-LABEL: {{^}}test_s_dcache_wb_insert_wait: ; VI-NEXT: ; %bb.0: -; VI-NEXT: s_dcache_wb +; VI: s_dcache_wb ; VI: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_wb_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.wb() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll index 929cd1c5f0bbdc..7f6e115e0e6956 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll @@ -14,7 +14,7 @@ define amdgpu_kernel void @test_s_dcache_wb_vol() #0 { ; VI-LABEL: {{^}}test_s_dcache_wb_vol_insert_wait: ; VI-NEXT: ; %bb.0: -; VI-NEXT: s_dcache_wb_vol +; VI: s_dcache_wb_vol ; VI: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_wb_vol_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.wb.vol() From 1f12718ccfd660f01ac0e444d4632cf8ce6b98e2 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu Date: Fri, 20 May 2022 10:12:14 -0400 Subject: [PATCH 106/908] [clang] Fixing arm-common, windows only and openmp header install targets https://reviews.llvm.org/D123498 contains a few errors resulting in incorrect target contents or mismatched target/list names. This patch fixes all the known errors. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D126002 --- clang/lib/Headers/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 08c5dccb3b77a5..8bf7dd570384fe 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -366,7 +366,7 @@ set_target_properties("clang-resource-headers" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${output_dir}") add_dependencies("clang-resource-headers" "core-resource-headers" - "arm-common-headers" + "arm-common-resource-headers" "arm-resource-headers" "aarch64-resource-headers" "cuda-resource-headers" @@ -387,7 +387,7 @@ add_dependencies("clang-resource-headers" # Core/common headers add_header_target("core-resource-headers" ${core_files}) -add_header_target("arm-common-headers" "${arm_common_files};${arm_common_generated_files}") +add_header_target("arm-common-resource-headers" "${arm_common_files};${arm_common_generated_files}") # Architecture/platform specific targets add_header_target("arm-resource-headers" "${arm_only_files};${arm_only_generated_files}") @@ -546,7 +546,7 @@ install( install( FILES ${openmp_wrapper_files} - DESTINATION ${header_install_dir} + DESTINATION ${header_install_dir}/openmp_wrappers EXCLUDE_FROM_ALL COMPONENT openmp-resource-headers) @@ -557,7 +557,7 @@ install( COMPONENT utility-resource-headers) install( - FILES ${windows_files} + FILES ${windows_only_files} DESTINATION ${header_install_dir} EXCLUDE_FROM_ALL COMPONENT windows-resource-headers) @@ -621,7 +621,7 @@ if (NOT LLVM_ENABLE_IDE) DEPENDS openmp-resource-headers COMPONENT openmp-resource-headers) add_llvm_install_targets(install-windows-resource-headers - DEPENDS window-resource-headers + DEPENDS windows-resource-headers COMPONENT windows-resource-headers) add_llvm_install_targets(install-utility-resource-headers DEPENDS utility-resource-headers From 8d3894f67ebf475e4393abeab91736bf534ff8f8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 19 May 2022 21:19:12 -0700 Subject: [PATCH 107/908] [TypePromotion] Fix another case for sext vs zext in promoted constant. If the SafeWrap operation is a subtract, we negated the constant to treat the subtract as an addition. The sext was based on the operation being addition. So we really need to do (neg (sext (neg C))) when promoting the constant. This is equivalent to (sext C) for every value of C except the min signed value. For min signed value we need to do (zext C) instead. Fixes PR55490. Differential Revision: https://reviews.llvm.org/D125653 --- llvm/lib/CodeGen/TypePromotion.cpp | 5 ++++- llvm/test/Transforms/TypePromotion/ARM/icmps.ll | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 016feb0df2da12..ea1369587609eb 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -484,7 +484,10 @@ void IRPromoter::PromoteTree() { continue; if (auto *Const = dyn_cast(Op)) { - Constant *NewConst = (SafeWrap.contains(I) && i == 1) + // For subtract, we don't need to sext the constant. We only put it in + // SafeWrap because SafeWrap.size() is used elsewhere. + Constant *NewConst = (SafeWrap.contains(I) && i == 1 && + I->getOpcode() != Instruction::Sub) ? ConstantExpr::getSExt(Const, ExtTy) : ConstantExpr::getZExt(Const, ExtTy); I->setOperand(i, NewConst); diff --git a/llvm/test/Transforms/TypePromotion/ARM/icmps.ll b/llvm/test/Transforms/TypePromotion/ARM/icmps.ll index 880ea18f438822..5caa3a65266fdb 100644 --- a/llvm/test/Transforms/TypePromotion/ARM/icmps.ll +++ b/llvm/test/Transforms/TypePromotion/ARM/icmps.ll @@ -361,3 +361,14 @@ define i32 @degenerateicmp() { %3 = select i1 %2, i32 1, i32 0 ret i32 %3 } + +define i1 @pr55490() { +; CHECK-LABEL: @pr55490( +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 10, 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = sub i4 -6, -8 + %2 = icmp ult i4 %1, 3 + ret i1 %2 +} From a1cf154dd476e021a4c7924e49fac85da01cecf8 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 20 May 2022 09:27:44 -0700 Subject: [PATCH 108/908] [lldb] Set correct register number for cpsr (GENERIC_REGNUM_FLAGS) Report the correct register number (GENERIC_REGNUM_FLAGS) for cpsr. This fixes TestLldbGdbServer.py on Apple Silicon. Differential revision: https://reviews.llvm.org/D126076 --- lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp index e065718c7df947..fa4ba5f67263ae 100644 --- a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp +++ b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp @@ -1643,7 +1643,7 @@ const DNBRegisterInfo DNBArchMachARM64::g_gpr_registers[] = { // used for // userland debugging. {e_regSetGPR, gpr_cpsr, "cpsr", "flags", Uint, Hex, 4, - GPR_OFFSET_NAME(cpsr), dwarf_elr_mode, dwarf_elr_mode, INVALID_NUB_REGNUM, + GPR_OFFSET_NAME(cpsr), dwarf_elr_mode, dwarf_elr_mode, GENERIC_REGNUM_FLAGS, debugserver_gpr_cpsr, NULL, NULL}, DEFINE_PSEUDO_GPR_IDX(0, w0), From 873a3e2c1d048f5d592f29d0ca09348afd2cbd0a Mon Sep 17 00:00:00 2001 From: Christopher Bate Date: Fri, 20 May 2022 10:44:31 -0600 Subject: [PATCH 109/908] [mlir] Add missing NVGPU link dependency to VectorToGPU The missing link dependency caused build failures in some configurations. --- mlir/lib/Conversion/VectorToGPU/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt b/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt index 778f2c42eebe5f..7d12e4781cb642 100644 --- a/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt +++ b/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt @@ -13,6 +13,7 @@ add_mlir_conversion_library(MLIRVectorToGPU MLIRGPUOps MLIRLLVMIR MLIRMemRef + MLIRNVGPU MLIRTransforms MLIRVector MLIRVectorUtils From 98d141481e0410bb830fb257be7099edfca6e17f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 20 May 2022 13:13:37 -0400 Subject: [PATCH 110/908] [gn build] (manually) port 480dcdc8975d8 (llvm-ifs OptTable) --- llvm/utils/gn/secondary/llvm/tools/llvm-ifs/BUILD.gn | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-ifs/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-ifs/BUILD.gn index b77712262143f1..77b4b05e277ea6 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-ifs/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-ifs/BUILD.gn @@ -1,8 +1,17 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("Opts") { + visibility = [ ":llvm-ifs" ] + args = [ "-gen-opt-parser-defs" ] +} + executable("llvm-ifs") { deps = [ + ":Opts", "//llvm/lib/InterfaceStub", "//llvm/lib/Object", "//llvm/lib/ObjectYAML", + "//llvm/lib/Option", "//llvm/lib/Support", "//llvm/lib/TextAPI", ] From 923831ebc37c258268db57856f066980aa3eef18 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 20 May 2022 10:05:55 -0700 Subject: [PATCH 111/908] [RISCV] Add initial test coverage for LSR Establish the most basic possible test coverage for LSR transformation on RISCV. Original patch by eopXD (D123458), modified by me to cleanup/simplify tests. --- ...op-strength-reduce-add-cheaper-than-mul.ll | 95 ++++++++++++++++++ .../RISCV/loop-strength-reduce-loop-invar.ll | 96 +++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll create mode 100644 llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll new file mode 100644 index 00000000000000..d062709000f043 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs -mattr=+m | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs -mattr=+m | FileCheck %s -check-prefixes=RV64 + +; Test case: +; - Addition should be cheaper than multiplication + +; The following LLVM IR simulates: +; int8_t flag2[8193]; +; void test(int i) { +; int tmp = i * 2; +; if (i * 2 > 8192) return ; +; for (int j = 0; ; ++j) { +; int offset = j * i + tmp; +; flag2[offset] = 0; +; if (offset + i > 8192) break; +; } +; } + +; After LSR: +; int8_t flag2[8193]; +; void test(int i) { +; int j = i * 2; +; if (j > 8193) return ; +; do { +; flag2[j] = 0; +; j += i; +; } while (j < 8193); +; } + +@flags2 = internal global [8193 x i8] zeroinitializer, align 32 ; <[8193 x i8]*> [#uses=1] + +define void @test(i32 signext %i) nounwind { +; RV32-LABEL: test: +; RV32: # %bb.0: # %entry +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: lui a3, 2 +; RV32-NEXT: blt a3, a1, .LBB0_3 +; RV32-NEXT: # %bb.1: # %bb.preheader +; RV32-NEXT: lui a2, %hi(flags2) +; RV32-NEXT: addi a2, a2, %lo(flags2) +; RV32-NEXT: addi a3, a3, 1 +; RV32-NEXT: .LBB0_2: # %bb +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: add a4, a1, a2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: sb zero, 0(a4) +; RV32-NEXT: blt a1, a3, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %return +; RV32-NEXT: ret +; +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: slliw a1, a0, 1 +; RV64-NEXT: lui a4, 2 +; RV64-NEXT: blt a4, a1, .LBB0_3 +; RV64-NEXT: # %bb.1: # %bb.preheader +; RV64-NEXT: li a2, 0 +; RV64-NEXT: lui a3, %hi(flags2) +; RV64-NEXT: addi a3, a3, %lo(flags2) +; RV64-NEXT: addiw a4, a4, 1 +; RV64-NEXT: .LBB0_2: # %bb +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: mulw a5, a2, a0 +; RV64-NEXT: addw a5, a5, a1 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: srli a6, a6, 32 +; RV64-NEXT: add a6, a6, a3 +; RV64-NEXT: sb zero, 0(a6) +; RV64-NEXT: addw a5, a5, a0 +; RV64-NEXT: addiw a2, a2, 1 +; RV64-NEXT: blt a5, a4, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %return +; RV64-NEXT: ret +entry: + %k_addr.012 = shl i32 %i, 1 + %tmp14 = icmp sgt i32 %k_addr.012, 8192 + %tmp. = shl i32 %i, 1 + br i1 %tmp14, label %return, label %bb + +bb: + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] + %tmp.15 = mul i32 %indvar, %i + %tmp.16 = add i32 %tmp.15, %tmp. + %gep.upgrd.1 = zext i32 %tmp.16 to i64 + %tmp = getelementptr [8193 x i8], [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1 + store i8 0, i8* %tmp + %tmp.17 = add i32 %tmp.16, %i + %tmp.upgrd.2 = icmp sgt i32 %tmp.17, 8192 + %indvar.next = add i32 %indvar, 1 + br i1 %tmp.upgrd.2, label %return, label %bb + +return: + ret void +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll new file mode 100644 index 00000000000000..a288b4ea77787c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +; Test case: +; - `A[row]` is loop invariant and should be hoisted up to preheader +; FIXME: RV32 is working as expected, but RV64 doesn't + +; The following LLVM IR simulates: +; int A[16][16]; +; void test(int row, int N) { +; for (int i=0; i0; N--) { +; *(ptr-1) = 4; +; *(ptr) = 5; +; ++ptr; +; } +; } + +@A = internal global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2] + +define void @test(i32 signext %row, i32 signext %N.in) nounwind { +; RV32-LABEL: test: +; RV32: # %bb.0: # %entry +; RV32-NEXT: blez a1, .LBB0_3 +; RV32-NEXT: # %bb.1: # %cond_true.preheader +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: lui a2, %hi(A) +; RV32-NEXT: addi a2, a2, %lo(A) +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: addi a0, a0, 8 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li a3, 5 +; RV32-NEXT: .LBB0_2: # %cond_true +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: sw a2, -4(a0) +; RV32-NEXT: sw a3, 0(a0) +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: addi a0, a0, 4 +; RV32-NEXT: bnez a1, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %return +; RV32-NEXT: ret +; +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: blez a1, .LBB0_3 +; RV64-NEXT: # %bb.1: # %cond_true.preheader +; RV64-NEXT: li a4, 0 +; RV64-NEXT: lui a2, %hi(A) +; RV64-NEXT: addi a2, a2, %lo(A) +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: li a3, 5 +; RV64-NEXT: .LBB0_2: # %cond_true +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: addiw a5, a4, 1 +; RV64-NEXT: slli a6, a5, 2 +; RV64-NEXT: add a6, a0, a6 +; RV64-NEXT: sw a2, 0(a6) +; RV64-NEXT: addiw a4, a4, 2 +; RV64-NEXT: slli a4, a4, 2 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: sw a3, 0(a4) +; RV64-NEXT: mv a4, a5 +; RV64-NEXT: bne a5, a1, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %return +; RV64-NEXT: ret +entry: + %N = bitcast i32 %N.in to i32 + %tmp5 = icmp sgt i32 %N.in, 0 + br i1 %tmp5, label %cond_true, label %return + +cond_true: + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] + %tmp2 = add i32 %indvar, 1 + %tmp = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 + store i32 4, i32* %tmp + %tmp5.upgrd.1 = add i32 %indvar, 2 + %tmp7 = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 + store i32 5, i32* %tmp7 + %indvar.next = add i32 %indvar, 1 + %exitcond = icmp eq i32 %indvar.next, %N + br i1 %exitcond, label %return, label %cond_true + +return: + ret void +} From f2df53b75071736009d68ba73e86edf7e25cebbd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 20 May 2022 10:23:12 -0700 Subject: [PATCH 112/908] [InstructionSimplify] Remove multiple 'break' after 'return'. NFC --- llvm/lib/Analysis/InstructionSimplify.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 1b16df02d04c84..5241c0c90d0056 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6167,7 +6167,6 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } case Intrinsic::experimental_constrained_fsub: { auto *FPI = cast(Call); @@ -6175,7 +6174,6 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } case Intrinsic::experimental_constrained_fmul: { auto *FPI = cast(Call); @@ -6183,7 +6181,6 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } case Intrinsic::experimental_constrained_fdiv: { auto *FPI = cast(Call); @@ -6191,7 +6188,6 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } case Intrinsic::experimental_constrained_frem: { auto *FPI = cast(Call); @@ -6199,7 +6195,6 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { FPI->getFastMathFlags(), Q, FPI->getExceptionBehavior().getValue(), FPI->getRoundingMode().getValue()); - break; } default: return nullptr; From 54e3bf5f37d67441dafbc66838a54a385293a2e1 Mon Sep 17 00:00:00 2001 From: Douglas Yung Date: Fri, 20 May 2022 10:21:24 -0700 Subject: [PATCH 113/908] Revert "[ConstantRange] Improve the implementation of binaryOr" This reverts commit 6990e7477d24ff585ae86549f5280f0be65422a6. This change was causing the test compiler-rt/test/fuzzer/merge_two_step.test to fail on our internal bot as well as other build bots such as https://lab.llvm.org/buildbot/#/builders/179/builds/3712. --- llvm/lib/IR/ConstantRange.cpp | 15 +++++----- llvm/unittests/IR/ConstantRangeTest.cpp | 38 ------------------------- 2 files changed, 8 insertions(+), 45 deletions(-) diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index be6386c571b596..c3915cee00476d 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -1410,13 +1410,14 @@ ConstantRange ConstantRange::binaryOr(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return getEmpty(); - ConstantRange KnownBitsRange = - fromKnownBits(toKnownBits() | Other.toKnownBits(), false); - // Upper wrapped range. - ConstantRange UMaxUMinRange = - getNonEmpty(APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()), - APInt::getZero(getBitWidth())); - return KnownBitsRange.intersectWith(UMaxUMinRange); + // Use APInt's implementation of OR for single element ranges. + if (isSingleElement() && Other.isSingleElement()) + return {*getSingleElement() | *Other.getSingleElement()}; + + // TODO: replace this with something less conservative + + APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()); + return getNonEmpty(std::move(umax), APInt::getZero(getBitWidth())); } ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const { diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp index 8a543b1f7df41f..ffd0f004c7575d 100644 --- a/llvm/unittests/IR/ConstantRangeTest.cpp +++ b/llvm/unittests/IR/ConstantRangeTest.cpp @@ -2560,44 +2560,6 @@ TEST_F(ConstantRangeTest, binaryAnd) { CheckSingleElementsOnly); } -TEST_F(ConstantRangeTest, binaryOr) { - // Single element ranges. - ConstantRange R16(APInt(8, 16)); - ConstantRange R20(APInt(8, 20)); - EXPECT_EQ(*R16.binaryOr(R16).getSingleElement(), APInt(8, 16)); - EXPECT_EQ(*R16.binaryOr(R20).getSingleElement(), APInt(8, 16 | 20)); - - ConstantRange R16_32(APInt(8, 16), APInt(8, 32)); - // 'Or' with a high bits mask. - // KnownBits estimate is important, otherwise the maximum included element - // would be 2^8 - 1. - ConstantRange R32(APInt(8, 32)); - ConstantRange R48_64(APInt(8, 48), APInt(8, 64)); - EXPECT_EQ(R16_32.binaryOr(R32), R48_64); - EXPECT_EQ(R32.binaryOr(R16_32), R48_64); - // 'Or' with a low bits mask. - ConstantRange R4(APInt(8, 4)); - ConstantRange R0_16(APInt(8, 0), APInt(8, 16)); - ConstantRange R4_16(APInt(8, 4), APInt(8, 16)); - EXPECT_EQ(R0_16.binaryOr(R4), R4_16); - EXPECT_EQ(R4.binaryOr(R0_16), R4_16); - - // Ranges with more than one element. Handled conservatively for now. - // UMaxUMin estimate is important, otherwise the lower bound would be zero. - ConstantRange R0_64(APInt(8, 0), APInt(8, 64)); - ConstantRange R5_32(APInt(8, 5), APInt(8, 32)); - ConstantRange R5_64(APInt(8, 5), APInt(8, 64)); - EXPECT_EQ(R0_64.binaryOr(R5_32), R5_64); - EXPECT_EQ(R5_32.binaryOr(R0_64), R5_64); - - TestBinaryOpExhaustive( - [](const ConstantRange &CR1, const ConstantRange &CR2) { - return CR1.binaryOr(CR2); - }, - [](const APInt &N1, const APInt &N2) { return N1 | N2; }, PreferSmallest, - CheckSingleElementsOnly); -} - TEST_F(ConstantRangeTest, binaryXor) { // Single element ranges. ConstantRange R16(APInt(8, 16)); From 3bd112c720dc614a59e3f34ebf9b45075037bfa0 Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Fri, 20 May 2022 10:59:29 -0700 Subject: [PATCH 114/908] Update fuzzing test to comply with new optimisation. https://reviews.llvm.org/D125933 improved some of LLVM's handling of binary ORs, which meant we have one less conditional branch, because the 'if (Size > 5 && Data[5] == 'R')' and 'if (bits == 63)' branches are now correctly folded. --- compiler-rt/test/fuzzer/merge_two_step.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/fuzzer/merge_two_step.test b/compiler-rt/test/fuzzer/merge_two_step.test index fbbd2a52c1f7f9..3634ec62b19531 100644 --- a/compiler-rt/test/fuzzer/merge_two_step.test +++ b/compiler-rt/test/fuzzer/merge_two_step.test @@ -10,7 +10,7 @@ RUN: echo ..Z... > %t/T1/3 RUN: rm -f %t/MCF RUN: %run %t-FullCoverageSetTest -merge=1 -merge_control_file=%t/MCF %t/T0 %t/T1 2>&1 | FileCheck %s --check-prefix=CHECK1 CHECK1: MERGE-OUTER: 3 files, 0 in the initial corpus -CHECK1: MERGE-OUTER: 3 new files with {{.*}} new features added; 11 new coverage edges +CHECK1: MERGE-OUTER: 3 new files with {{.*}} new features added; 10 new coverage edges RUN: echo ...Z.. > %t/T2/1 RUN: echo ....E. > %t/T2/2 @@ -26,6 +26,6 @@ CHECK2: MERGE-OUTER: non-empty control file provided CHECK2: MERGE-OUTER: control file ok, 3 files total, first not processed file 3 CHECK2: MERGE-OUTER: starting merge from scratch, but reusing coverage information from the given control file CHECK2: MERGE-OUTER: 7 files, 0 in the initial corpus, 3 processed earlier -CHECK2: MERGE-INNER: using the control file +CHECK2: MERGE-INNER: using the control file CHECK2: MERGE-INNER: 4 total files; 0 processed earlier; will process 4 files now -CHECK2: MERGE-OUTER: 6 new files with {{.*}} new features added; 14 new coverage edges +CHECK2: MERGE-OUTER: 6 new files with {{.*}} new features added; 13 new coverage edges From f4570ce442b492d436a0e314c6ea14bf299af0d3 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Fri, 20 May 2022 11:24:24 -0700 Subject: [PATCH 115/908] Adapt C++ std::string dataformatter for D125496 https://reviews.llvm.org/D125496 changed the layout of std::string without updating the LLDB dataformatter. This patch adds code to recognize the new format. Differential Revision: https://reviews.llvm.org/D126080 --- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp index f8052f62babd31..cd09c3e7d1e9bd 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -546,7 +546,7 @@ bool lldb_private::formatters::LibcxxContainerSummaryProvider( nullptr, nullptr, &valobj, false, false); } -// the field layout in a libc++ string (cap, side, data or data, size, cap) +/// The field layout in a libc++ string (cap, side, data or data, size, cap). enum LibcxxStringLayoutMode { eLibcxxStringLayoutModeCSD = 0, eLibcxxStringLayoutModeDSC = 1, @@ -626,6 +626,9 @@ ExtractLibcxxStringInfo(ValueObject &valobj) { return {}; ValueObjectSP location_sp = short_sp->GetChildAtIndex( (layout == eLibcxxStringLayoutModeDSC) ? 0 : 1, true); + // After D125496, there is a flat layout. + if (location_sp->GetName() == g_size_name) + location_sp = short_sp->GetChildAtIndex(3, true); if (using_bitmasks) size = (layout == eLibcxxStringLayoutModeDSC) ? size_mode_value From 80ac0b9bc8865e6cf6bdc9436405dae1a4ec0e38 Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Fri, 20 May 2022 11:25:29 -0700 Subject: [PATCH 116/908] Fix up fuzzing test on Windows. 3bd112c720dc fixed the fuzzing test on Linux, which, after https://reviews.llvm.org/D125933, has one less branch. Turns out, on Windows, that it still has the extra branch. I'm guessing that's because exit() isn't known to be noreturn on Windows or something. Either way, just make the test more tolerant. --- compiler-rt/test/fuzzer/merge_two_step.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/test/fuzzer/merge_two_step.test b/compiler-rt/test/fuzzer/merge_two_step.test index 3634ec62b19531..001a35c8ae3dae 100644 --- a/compiler-rt/test/fuzzer/merge_two_step.test +++ b/compiler-rt/test/fuzzer/merge_two_step.test @@ -10,7 +10,7 @@ RUN: echo ..Z... > %t/T1/3 RUN: rm -f %t/MCF RUN: %run %t-FullCoverageSetTest -merge=1 -merge_control_file=%t/MCF %t/T0 %t/T1 2>&1 | FileCheck %s --check-prefix=CHECK1 CHECK1: MERGE-OUTER: 3 files, 0 in the initial corpus -CHECK1: MERGE-OUTER: 3 new files with {{.*}} new features added; 10 new coverage edges +CHECK1: MERGE-OUTER: 3 new files with {{.*}} new features added; {{.*}} new coverage edges RUN: echo ...Z.. > %t/T2/1 RUN: echo ....E. > %t/T2/2 @@ -28,4 +28,4 @@ CHECK2: MERGE-OUTER: starting merge from scratch, but reusing coverage informati CHECK2: MERGE-OUTER: 7 files, 0 in the initial corpus, 3 processed earlier CHECK2: MERGE-INNER: using the control file CHECK2: MERGE-INNER: 4 total files; 0 processed earlier; will process 4 files now -CHECK2: MERGE-OUTER: 6 new files with {{.*}} new features added; 13 new coverage edges +CHECK2: MERGE-OUTER: 6 new files with {{.*}} new features added; {{.*}} new coverage edges From 1fef69da0bfd51de916f0a869f97740c51211cc1 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 20 May 2022 20:29:47 +0200 Subject: [PATCH 117/908] Avoid uninitialized Diag.ID (which we pass but never read) --- clang-tools-extra/clangd/Diagnostics.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/Diagnostics.h b/clang-tools-extra/clangd/Diagnostics.h index 14627ea3d6a017..6004bb147361af 100644 --- a/clang-tools-extra/clangd/Diagnostics.h +++ b/clang-tools-extra/clangd/Diagnostics.h @@ -69,7 +69,7 @@ struct DiagBase { // Since File is only descriptive, we store a separate flag to distinguish // diags from the main file. bool InsideMainFile = false; - unsigned ID; // e.g. member of clang::diag, or clang-tidy assigned ID. + unsigned ID = 0; // e.g. member of clang::diag, or clang-tidy assigned ID. // Feature modules can make use of this field to propagate data from a // diagnostic to a CodeAction request. Each module should only append to the // list. From 9385a6d6eaa3f4caebdee2eb53b5bf4ca1f1b832 Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Fri, 20 May 2022 11:36:55 -0700 Subject: [PATCH 118/908] Add some diagnostics to diagnose bot-only failures for TestIgnoredExceptions.py The test for commit bff4673b41781ec5bff6b96b52cf321d2271726c is failing on the GreenDragon bot but none of us can repro the failure locally. Adding some logging to the test failure to help diagnose the issue. --- .../API/macosx/ignore_exceptions/TestIgnoredExceptions.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lldb/test/API/macosx/ignore_exceptions/TestIgnoredExceptions.py b/lldb/test/API/macosx/ignore_exceptions/TestIgnoredExceptions.py index 378ce7a7b6b014..288602199e2e46 100644 --- a/lldb/test/API/macosx/ignore_exceptions/TestIgnoredExceptions.py +++ b/lldb/test/API/macosx/ignore_exceptions/TestIgnoredExceptions.py @@ -42,6 +42,11 @@ def suspended_thread_test(self): # Now continue, and we should stop with a stop reason of SIGBUS: process.Continue() self.assertEqual(process.state, lldb.eStateStopped, "Stopped after continue to SIGBUS") + if thread.stop_reason == lldb.eStopReasonBreakpoint: + id = thread.GetStopReasonDataAtIndex(0) + name = thread.frame[0].name + self.fail("Hit breakpoint {0} in '{1}' rather than getting a SIGBUS".format(id, name)) + self.assertEqual(thread.stop_reason, lldb.eStopReasonSignal) self.assertEqual(thread.GetStopReasonDataAtIndex(0), 10, "Got a SIGBUS") From 9398caf399ae04d3f331c7d766ee9884f794d4f2 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Fri, 20 May 2022 18:39:21 +0000 Subject: [PATCH 119/908] Recommit "[ConstantRange] Improve the implementation of binaryOr" This recommits https://reviews.llvm.org/rG6990e7477d24ff585ae86549f5280f0be65422a6 as the problematic test has been updated updated in https://reviews.llvm.org/rG3bd112c720dc614a59e3f34ebf9b45075037bfa0. --- llvm/lib/IR/ConstantRange.cpp | 15 +++++----- llvm/unittests/IR/ConstantRangeTest.cpp | 38 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index c3915cee00476d..be6386c571b596 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -1410,14 +1410,13 @@ ConstantRange ConstantRange::binaryOr(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return getEmpty(); - // Use APInt's implementation of OR for single element ranges. - if (isSingleElement() && Other.isSingleElement()) - return {*getSingleElement() | *Other.getSingleElement()}; - - // TODO: replace this with something less conservative - - APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()); - return getNonEmpty(std::move(umax), APInt::getZero(getBitWidth())); + ConstantRange KnownBitsRange = + fromKnownBits(toKnownBits() | Other.toKnownBits(), false); + // Upper wrapped range. + ConstantRange UMaxUMinRange = + getNonEmpty(APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()), + APInt::getZero(getBitWidth())); + return KnownBitsRange.intersectWith(UMaxUMinRange); } ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const { diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp index ffd0f004c7575d..8a543b1f7df41f 100644 --- a/llvm/unittests/IR/ConstantRangeTest.cpp +++ b/llvm/unittests/IR/ConstantRangeTest.cpp @@ -2560,6 +2560,44 @@ TEST_F(ConstantRangeTest, binaryAnd) { CheckSingleElementsOnly); } +TEST_F(ConstantRangeTest, binaryOr) { + // Single element ranges. + ConstantRange R16(APInt(8, 16)); + ConstantRange R20(APInt(8, 20)); + EXPECT_EQ(*R16.binaryOr(R16).getSingleElement(), APInt(8, 16)); + EXPECT_EQ(*R16.binaryOr(R20).getSingleElement(), APInt(8, 16 | 20)); + + ConstantRange R16_32(APInt(8, 16), APInt(8, 32)); + // 'Or' with a high bits mask. + // KnownBits estimate is important, otherwise the maximum included element + // would be 2^8 - 1. + ConstantRange R32(APInt(8, 32)); + ConstantRange R48_64(APInt(8, 48), APInt(8, 64)); + EXPECT_EQ(R16_32.binaryOr(R32), R48_64); + EXPECT_EQ(R32.binaryOr(R16_32), R48_64); + // 'Or' with a low bits mask. + ConstantRange R4(APInt(8, 4)); + ConstantRange R0_16(APInt(8, 0), APInt(8, 16)); + ConstantRange R4_16(APInt(8, 4), APInt(8, 16)); + EXPECT_EQ(R0_16.binaryOr(R4), R4_16); + EXPECT_EQ(R4.binaryOr(R0_16), R4_16); + + // Ranges with more than one element. Handled conservatively for now. + // UMaxUMin estimate is important, otherwise the lower bound would be zero. + ConstantRange R0_64(APInt(8, 0), APInt(8, 64)); + ConstantRange R5_32(APInt(8, 5), APInt(8, 32)); + ConstantRange R5_64(APInt(8, 5), APInt(8, 64)); + EXPECT_EQ(R0_64.binaryOr(R5_32), R5_64); + EXPECT_EQ(R5_32.binaryOr(R0_64), R5_64); + + TestBinaryOpExhaustive( + [](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.binaryOr(CR2); + }, + [](const APInt &N1, const APInt &N2) { return N1 | N2; }, PreferSmallest, + CheckSingleElementsOnly); +} + TEST_F(ConstantRangeTest, binaryXor) { // Single element ranges. ConstantRange R16(APInt(8, 16)); From d497129f9bfaeff9fd2a57d75186ef0966758ca4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 20 May 2022 12:02:26 -0700 Subject: [PATCH 120/908] [AArch64] Use proper instruction mnemonics for FPRs The FPR128 regs need MOVIv2d_ns and SVE regs need DUP_ZI_D. Differential Revision: https://reviews.llvm.org/D126083 --- .../Target/AArch64/AArch64FrameLowering.cpp | 7 +- .../CodeGen/AArch64/zero-call-used-regs.ll | 324 +++++++++--------- 2 files changed, 168 insertions(+), 163 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 399a4e19ce2f8f..52e782c7eff527 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -793,7 +793,12 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, // Zero out FP/vector registers. for (MCRegister Reg : FPRsToZero.set_bits()) - BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVID), Reg).addImm(0); + if (HasSVE) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::DUP_ZI_D), Reg) + .addImm(0) + .addImm(0); + else + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVIv2d_ns), Reg).addImm(0); if (HasSVE) { for (MCRegister PReg : diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll index b9425a6b8018d0..c39089908713b9 100644 --- a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll +++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,DEFAULT -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,DEFAULT +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE @result = dso_local global i32 0, align 4 @@ -146,14 +146,14 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo ; DEFAULT-NEXT: mov x7, #0 ; DEFAULT-NEXT: mov x8, #0 ; DEFAULT-NEXT: mov x18, #0 -; DEFAULT-NEXT: movi q0, #0000000000000000 -; DEFAULT-NEXT: movi q1, #0000000000000000 -; DEFAULT-NEXT: movi q2, #0000000000000000 -; DEFAULT-NEXT: movi q3, #0000000000000000 -; DEFAULT-NEXT: movi q4, #0000000000000000 -; DEFAULT-NEXT: movi q5, #0000000000000000 -; DEFAULT-NEXT: movi q6, #0000000000000000 -; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: movi v0.2d, #0000000000000000 +; DEFAULT-NEXT: movi v1.2d, #0000000000000000 +; DEFAULT-NEXT: movi v2.2d, #0000000000000000 +; DEFAULT-NEXT: movi v3.2d, #0000000000000000 +; DEFAULT-NEXT: movi v4.2d, #0000000000000000 +; DEFAULT-NEXT: movi v5.2d, #0000000000000000 +; DEFAULT-NEXT: movi v6.2d, #0000000000000000 +; DEFAULT-NEXT: movi v7.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; ; SVE-LABEL: all_arg: @@ -169,14 +169,14 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo ; SVE-NEXT: mov x7, #0 ; SVE-NEXT: mov x8, #0 ; SVE-NEXT: mov x18, #0 -; SVE-NEXT: movi z0, #0000000000000000 -; SVE-NEXT: movi z1, #0000000000000000 -; SVE-NEXT: movi z2, #0000000000000000 -; SVE-NEXT: movi z3, #0000000000000000 -; SVE-NEXT: movi z4, #0000000000000000 -; SVE-NEXT: movi z5, #0000000000000000 -; SVE-NEXT: movi z6, #0000000000000000 -; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: mov z0.d, #0 // =0x0 +; SVE-NEXT: mov z1.d, #0 // =0x0 +; SVE-NEXT: mov z2.d, #0 // =0x0 +; SVE-NEXT: mov z3.d, #0 // =0x0 +; SVE-NEXT: mov z4.d, #0 // =0x0 +; SVE-NEXT: mov z5.d, #0 // =0x0 +; SVE-NEXT: mov z6.d, #0 // =0x0 +; SVE-NEXT: mov z7.d, #0 // =0x0 ; SVE-NEXT: pfalse p0.b ; SVE-NEXT: pfalse p1.b ; SVE-NEXT: pfalse p2.b @@ -212,38 +212,38 @@ define dso_local i32 @all(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_ ; DEFAULT-NEXT: mov x16, #0 ; DEFAULT-NEXT: mov x17, #0 ; DEFAULT-NEXT: mov x18, #0 -; DEFAULT-NEXT: movi q0, #0000000000000000 -; DEFAULT-NEXT: movi q1, #0000000000000000 -; DEFAULT-NEXT: movi q2, #0000000000000000 -; DEFAULT-NEXT: movi q3, #0000000000000000 -; DEFAULT-NEXT: movi q4, #0000000000000000 -; DEFAULT-NEXT: movi q5, #0000000000000000 -; DEFAULT-NEXT: movi q6, #0000000000000000 -; DEFAULT-NEXT: movi q7, #0000000000000000 -; DEFAULT-NEXT: movi q8, #0000000000000000 -; DEFAULT-NEXT: movi q9, #0000000000000000 -; DEFAULT-NEXT: movi q10, #0000000000000000 -; DEFAULT-NEXT: movi q11, #0000000000000000 -; DEFAULT-NEXT: movi q12, #0000000000000000 -; DEFAULT-NEXT: movi q13, #0000000000000000 -; DEFAULT-NEXT: movi q14, #0000000000000000 -; DEFAULT-NEXT: movi q15, #0000000000000000 -; DEFAULT-NEXT: movi q16, #0000000000000000 -; DEFAULT-NEXT: movi q17, #0000000000000000 -; DEFAULT-NEXT: movi q18, #0000000000000000 -; DEFAULT-NEXT: movi q19, #0000000000000000 -; DEFAULT-NEXT: movi q20, #0000000000000000 -; DEFAULT-NEXT: movi q21, #0000000000000000 -; DEFAULT-NEXT: movi q22, #0000000000000000 -; DEFAULT-NEXT: movi q23, #0000000000000000 -; DEFAULT-NEXT: movi q24, #0000000000000000 -; DEFAULT-NEXT: movi q25, #0000000000000000 -; DEFAULT-NEXT: movi q26, #0000000000000000 -; DEFAULT-NEXT: movi q27, #0000000000000000 -; DEFAULT-NEXT: movi q28, #0000000000000000 -; DEFAULT-NEXT: movi q29, #0000000000000000 -; DEFAULT-NEXT: movi q30, #0000000000000000 -; DEFAULT-NEXT: movi q31, #0000000000000000 +; DEFAULT-NEXT: movi v0.2d, #0000000000000000 +; DEFAULT-NEXT: movi v1.2d, #0000000000000000 +; DEFAULT-NEXT: movi v2.2d, #0000000000000000 +; DEFAULT-NEXT: movi v3.2d, #0000000000000000 +; DEFAULT-NEXT: movi v4.2d, #0000000000000000 +; DEFAULT-NEXT: movi v5.2d, #0000000000000000 +; DEFAULT-NEXT: movi v6.2d, #0000000000000000 +; DEFAULT-NEXT: movi v7.2d, #0000000000000000 +; DEFAULT-NEXT: movi v8.2d, #0000000000000000 +; DEFAULT-NEXT: movi v9.2d, #0000000000000000 +; DEFAULT-NEXT: movi v10.2d, #0000000000000000 +; DEFAULT-NEXT: movi v11.2d, #0000000000000000 +; DEFAULT-NEXT: movi v12.2d, #0000000000000000 +; DEFAULT-NEXT: movi v13.2d, #0000000000000000 +; DEFAULT-NEXT: movi v14.2d, #0000000000000000 +; DEFAULT-NEXT: movi v15.2d, #0000000000000000 +; DEFAULT-NEXT: movi v16.2d, #0000000000000000 +; DEFAULT-NEXT: movi v17.2d, #0000000000000000 +; DEFAULT-NEXT: movi v18.2d, #0000000000000000 +; DEFAULT-NEXT: movi v19.2d, #0000000000000000 +; DEFAULT-NEXT: movi v20.2d, #0000000000000000 +; DEFAULT-NEXT: movi v21.2d, #0000000000000000 +; DEFAULT-NEXT: movi v22.2d, #0000000000000000 +; DEFAULT-NEXT: movi v23.2d, #0000000000000000 +; DEFAULT-NEXT: movi v24.2d, #0000000000000000 +; DEFAULT-NEXT: movi v25.2d, #0000000000000000 +; DEFAULT-NEXT: movi v26.2d, #0000000000000000 +; DEFAULT-NEXT: movi v27.2d, #0000000000000000 +; DEFAULT-NEXT: movi v28.2d, #0000000000000000 +; DEFAULT-NEXT: movi v29.2d, #0000000000000000 +; DEFAULT-NEXT: movi v30.2d, #0000000000000000 +; DEFAULT-NEXT: movi v31.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; ; SVE-LABEL: all: @@ -268,38 +268,38 @@ define dso_local i32 @all(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_ ; SVE-NEXT: mov x16, #0 ; SVE-NEXT: mov x17, #0 ; SVE-NEXT: mov x18, #0 -; SVE-NEXT: movi z0, #0000000000000000 -; SVE-NEXT: movi z1, #0000000000000000 -; SVE-NEXT: movi z2, #0000000000000000 -; SVE-NEXT: movi z3, #0000000000000000 -; SVE-NEXT: movi z4, #0000000000000000 -; SVE-NEXT: movi z5, #0000000000000000 -; SVE-NEXT: movi z6, #0000000000000000 -; SVE-NEXT: movi z7, #0000000000000000 -; SVE-NEXT: movi z8, #0000000000000000 -; SVE-NEXT: movi z9, #0000000000000000 -; SVE-NEXT: movi z10, #0000000000000000 -; SVE-NEXT: movi z11, #0000000000000000 -; SVE-NEXT: movi z12, #0000000000000000 -; SVE-NEXT: movi z13, #0000000000000000 -; SVE-NEXT: movi z14, #0000000000000000 -; SVE-NEXT: movi z15, #0000000000000000 -; SVE-NEXT: movi z16, #0000000000000000 -; SVE-NEXT: movi z17, #0000000000000000 -; SVE-NEXT: movi z18, #0000000000000000 -; SVE-NEXT: movi z19, #0000000000000000 -; SVE-NEXT: movi z20, #0000000000000000 -; SVE-NEXT: movi z21, #0000000000000000 -; SVE-NEXT: movi z22, #0000000000000000 -; SVE-NEXT: movi z23, #0000000000000000 -; SVE-NEXT: movi z24, #0000000000000000 -; SVE-NEXT: movi z25, #0000000000000000 -; SVE-NEXT: movi z26, #0000000000000000 -; SVE-NEXT: movi z27, #0000000000000000 -; SVE-NEXT: movi z28, #0000000000000000 -; SVE-NEXT: movi z29, #0000000000000000 -; SVE-NEXT: movi z30, #0000000000000000 -; SVE-NEXT: movi z31, #0000000000000000 +; SVE-NEXT: mov z0.d, #0 // =0x0 +; SVE-NEXT: mov z1.d, #0 // =0x0 +; SVE-NEXT: mov z2.d, #0 // =0x0 +; SVE-NEXT: mov z3.d, #0 // =0x0 +; SVE-NEXT: mov z4.d, #0 // =0x0 +; SVE-NEXT: mov z5.d, #0 // =0x0 +; SVE-NEXT: mov z6.d, #0 // =0x0 +; SVE-NEXT: mov z7.d, #0 // =0x0 +; SVE-NEXT: mov z8.d, #0 // =0x0 +; SVE-NEXT: mov z9.d, #0 // =0x0 +; SVE-NEXT: mov z10.d, #0 // =0x0 +; SVE-NEXT: mov z11.d, #0 // =0x0 +; SVE-NEXT: mov z12.d, #0 // =0x0 +; SVE-NEXT: mov z13.d, #0 // =0x0 +; SVE-NEXT: mov z14.d, #0 // =0x0 +; SVE-NEXT: mov z15.d, #0 // =0x0 +; SVE-NEXT: mov z16.d, #0 // =0x0 +; SVE-NEXT: mov z17.d, #0 // =0x0 +; SVE-NEXT: mov z18.d, #0 // =0x0 +; SVE-NEXT: mov z19.d, #0 // =0x0 +; SVE-NEXT: mov z20.d, #0 // =0x0 +; SVE-NEXT: mov z21.d, #0 // =0x0 +; SVE-NEXT: mov z22.d, #0 // =0x0 +; SVE-NEXT: mov z23.d, #0 // =0x0 +; SVE-NEXT: mov z24.d, #0 // =0x0 +; SVE-NEXT: mov z25.d, #0 // =0x0 +; SVE-NEXT: mov z26.d, #0 // =0x0 +; SVE-NEXT: mov z27.d, #0 // =0x0 +; SVE-NEXT: mov z28.d, #0 // =0x0 +; SVE-NEXT: mov z29.d, #0 // =0x0 +; SVE-NEXT: mov z30.d, #0 // =0x0 +; SVE-NEXT: mov z31.d, #0 // =0x0 ; SVE-NEXT: pfalse p0.b ; SVE-NEXT: pfalse p1.b ; SVE-NEXT: pfalse p2.b @@ -368,14 +368,14 @@ define dso_local double @used_arg_float(double noundef %a, float noundef %b) loc ; DEFAULT: // %bb.0: // %entry ; DEFAULT-NEXT: fcvt d1, s1 ; DEFAULT-NEXT: fmul d0, d1, d0 -; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; ; SVE-LABEL: used_arg_float: ; SVE: // %bb.0: // %entry ; SVE-NEXT: fcvt d1, s1 ; SVE-NEXT: fmul d0, d1, d0 -; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: mov z1.d, #0 // =0x0 ; SVE-NEXT: ret entry: @@ -389,14 +389,14 @@ define dso_local double @used_float(double noundef %a, float noundef %b) local_u ; DEFAULT: // %bb.0: // %entry ; DEFAULT-NEXT: fcvt d1, s1 ; DEFAULT-NEXT: fmul d0, d1, d0 -; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; ; SVE-LABEL: used_float: ; SVE: // %bb.0: // %entry ; SVE-NEXT: fcvt d1, s1 ; SVE-NEXT: fmul d0, d1, d0 -; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: mov z1.d, #0 // =0x0 ; SVE-NEXT: ret entry: @@ -475,13 +475,13 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca ; DEFAULT-NEXT: mov x7, #0 ; DEFAULT-NEXT: mov x8, #0 ; DEFAULT-NEXT: mov x18, #0 -; DEFAULT-NEXT: movi q1, #0000000000000000 -; DEFAULT-NEXT: movi q2, #0000000000000000 -; DEFAULT-NEXT: movi q3, #0000000000000000 -; DEFAULT-NEXT: movi q4, #0000000000000000 -; DEFAULT-NEXT: movi q5, #0000000000000000 -; DEFAULT-NEXT: movi q6, #0000000000000000 -; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: movi v1.2d, #0000000000000000 +; DEFAULT-NEXT: movi v2.2d, #0000000000000000 +; DEFAULT-NEXT: movi v3.2d, #0000000000000000 +; DEFAULT-NEXT: movi v4.2d, #0000000000000000 +; DEFAULT-NEXT: movi v5.2d, #0000000000000000 +; DEFAULT-NEXT: movi v6.2d, #0000000000000000 +; DEFAULT-NEXT: movi v7.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; ; SVE-LABEL: all_arg_float: @@ -498,13 +498,13 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca ; SVE-NEXT: mov x7, #0 ; SVE-NEXT: mov x8, #0 ; SVE-NEXT: mov x18, #0 -; SVE-NEXT: movi z1, #0000000000000000 -; SVE-NEXT: movi z2, #0000000000000000 -; SVE-NEXT: movi z3, #0000000000000000 -; SVE-NEXT: movi z4, #0000000000000000 -; SVE-NEXT: movi z5, #0000000000000000 -; SVE-NEXT: movi z6, #0000000000000000 -; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: mov z1.d, #0 // =0x0 +; SVE-NEXT: mov z2.d, #0 // =0x0 +; SVE-NEXT: mov z3.d, #0 // =0x0 +; SVE-NEXT: mov z4.d, #0 // =0x0 +; SVE-NEXT: mov z5.d, #0 // =0x0 +; SVE-NEXT: mov z6.d, #0 // =0x0 +; SVE-NEXT: mov z7.d, #0 // =0x0 ; SVE-NEXT: pfalse p0.b ; SVE-NEXT: pfalse p1.b ; SVE-NEXT: pfalse p2.b @@ -541,37 +541,37 @@ define dso_local double @all_float(double noundef %a, float noundef %b) local_un ; DEFAULT-NEXT: mov x16, #0 ; DEFAULT-NEXT: mov x17, #0 ; DEFAULT-NEXT: mov x18, #0 -; DEFAULT-NEXT: movi q1, #0000000000000000 -; DEFAULT-NEXT: movi q2, #0000000000000000 -; DEFAULT-NEXT: movi q3, #0000000000000000 -; DEFAULT-NEXT: movi q4, #0000000000000000 -; DEFAULT-NEXT: movi q5, #0000000000000000 -; DEFAULT-NEXT: movi q6, #0000000000000000 -; DEFAULT-NEXT: movi q7, #0000000000000000 -; DEFAULT-NEXT: movi q8, #0000000000000000 -; DEFAULT-NEXT: movi q9, #0000000000000000 -; DEFAULT-NEXT: movi q10, #0000000000000000 -; DEFAULT-NEXT: movi q11, #0000000000000000 -; DEFAULT-NEXT: movi q12, #0000000000000000 -; DEFAULT-NEXT: movi q13, #0000000000000000 -; DEFAULT-NEXT: movi q14, #0000000000000000 -; DEFAULT-NEXT: movi q15, #0000000000000000 -; DEFAULT-NEXT: movi q16, #0000000000000000 -; DEFAULT-NEXT: movi q17, #0000000000000000 -; DEFAULT-NEXT: movi q18, #0000000000000000 -; DEFAULT-NEXT: movi q19, #0000000000000000 -; DEFAULT-NEXT: movi q20, #0000000000000000 -; DEFAULT-NEXT: movi q21, #0000000000000000 -; DEFAULT-NEXT: movi q22, #0000000000000000 -; DEFAULT-NEXT: movi q23, #0000000000000000 -; DEFAULT-NEXT: movi q24, #0000000000000000 -; DEFAULT-NEXT: movi q25, #0000000000000000 -; DEFAULT-NEXT: movi q26, #0000000000000000 -; DEFAULT-NEXT: movi q27, #0000000000000000 -; DEFAULT-NEXT: movi q28, #0000000000000000 -; DEFAULT-NEXT: movi q29, #0000000000000000 -; DEFAULT-NEXT: movi q30, #0000000000000000 -; DEFAULT-NEXT: movi q31, #0000000000000000 +; DEFAULT-NEXT: movi v1.2d, #0000000000000000 +; DEFAULT-NEXT: movi v2.2d, #0000000000000000 +; DEFAULT-NEXT: movi v3.2d, #0000000000000000 +; DEFAULT-NEXT: movi v4.2d, #0000000000000000 +; DEFAULT-NEXT: movi v5.2d, #0000000000000000 +; DEFAULT-NEXT: movi v6.2d, #0000000000000000 +; DEFAULT-NEXT: movi v7.2d, #0000000000000000 +; DEFAULT-NEXT: movi v8.2d, #0000000000000000 +; DEFAULT-NEXT: movi v9.2d, #0000000000000000 +; DEFAULT-NEXT: movi v10.2d, #0000000000000000 +; DEFAULT-NEXT: movi v11.2d, #0000000000000000 +; DEFAULT-NEXT: movi v12.2d, #0000000000000000 +; DEFAULT-NEXT: movi v13.2d, #0000000000000000 +; DEFAULT-NEXT: movi v14.2d, #0000000000000000 +; DEFAULT-NEXT: movi v15.2d, #0000000000000000 +; DEFAULT-NEXT: movi v16.2d, #0000000000000000 +; DEFAULT-NEXT: movi v17.2d, #0000000000000000 +; DEFAULT-NEXT: movi v18.2d, #0000000000000000 +; DEFAULT-NEXT: movi v19.2d, #0000000000000000 +; DEFAULT-NEXT: movi v20.2d, #0000000000000000 +; DEFAULT-NEXT: movi v21.2d, #0000000000000000 +; DEFAULT-NEXT: movi v22.2d, #0000000000000000 +; DEFAULT-NEXT: movi v23.2d, #0000000000000000 +; DEFAULT-NEXT: movi v24.2d, #0000000000000000 +; DEFAULT-NEXT: movi v25.2d, #0000000000000000 +; DEFAULT-NEXT: movi v26.2d, #0000000000000000 +; DEFAULT-NEXT: movi v27.2d, #0000000000000000 +; DEFAULT-NEXT: movi v28.2d, #0000000000000000 +; DEFAULT-NEXT: movi v29.2d, #0000000000000000 +; DEFAULT-NEXT: movi v30.2d, #0000000000000000 +; DEFAULT-NEXT: movi v31.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; ; SVE-LABEL: all_float: @@ -597,37 +597,37 @@ define dso_local double @all_float(double noundef %a, float noundef %b) local_un ; SVE-NEXT: mov x16, #0 ; SVE-NEXT: mov x17, #0 ; SVE-NEXT: mov x18, #0 -; SVE-NEXT: movi z1, #0000000000000000 -; SVE-NEXT: movi z2, #0000000000000000 -; SVE-NEXT: movi z3, #0000000000000000 -; SVE-NEXT: movi z4, #0000000000000000 -; SVE-NEXT: movi z5, #0000000000000000 -; SVE-NEXT: movi z6, #0000000000000000 -; SVE-NEXT: movi z7, #0000000000000000 -; SVE-NEXT: movi z8, #0000000000000000 -; SVE-NEXT: movi z9, #0000000000000000 -; SVE-NEXT: movi z10, #0000000000000000 -; SVE-NEXT: movi z11, #0000000000000000 -; SVE-NEXT: movi z12, #0000000000000000 -; SVE-NEXT: movi z13, #0000000000000000 -; SVE-NEXT: movi z14, #0000000000000000 -; SVE-NEXT: movi z15, #0000000000000000 -; SVE-NEXT: movi z16, #0000000000000000 -; SVE-NEXT: movi z17, #0000000000000000 -; SVE-NEXT: movi z18, #0000000000000000 -; SVE-NEXT: movi z19, #0000000000000000 -; SVE-NEXT: movi z20, #0000000000000000 -; SVE-NEXT: movi z21, #0000000000000000 -; SVE-NEXT: movi z22, #0000000000000000 -; SVE-NEXT: movi z23, #0000000000000000 -; SVE-NEXT: movi z24, #0000000000000000 -; SVE-NEXT: movi z25, #0000000000000000 -; SVE-NEXT: movi z26, #0000000000000000 -; SVE-NEXT: movi z27, #0000000000000000 -; SVE-NEXT: movi z28, #0000000000000000 -; SVE-NEXT: movi z29, #0000000000000000 -; SVE-NEXT: movi z30, #0000000000000000 -; SVE-NEXT: movi z31, #0000000000000000 +; SVE-NEXT: mov z1.d, #0 // =0x0 +; SVE-NEXT: mov z2.d, #0 // =0x0 +; SVE-NEXT: mov z3.d, #0 // =0x0 +; SVE-NEXT: mov z4.d, #0 // =0x0 +; SVE-NEXT: mov z5.d, #0 // =0x0 +; SVE-NEXT: mov z6.d, #0 // =0x0 +; SVE-NEXT: mov z7.d, #0 // =0x0 +; SVE-NEXT: mov z8.d, #0 // =0x0 +; SVE-NEXT: mov z9.d, #0 // =0x0 +; SVE-NEXT: mov z10.d, #0 // =0x0 +; SVE-NEXT: mov z11.d, #0 // =0x0 +; SVE-NEXT: mov z12.d, #0 // =0x0 +; SVE-NEXT: mov z13.d, #0 // =0x0 +; SVE-NEXT: mov z14.d, #0 // =0x0 +; SVE-NEXT: mov z15.d, #0 // =0x0 +; SVE-NEXT: mov z16.d, #0 // =0x0 +; SVE-NEXT: mov z17.d, #0 // =0x0 +; SVE-NEXT: mov z18.d, #0 // =0x0 +; SVE-NEXT: mov z19.d, #0 // =0x0 +; SVE-NEXT: mov z20.d, #0 // =0x0 +; SVE-NEXT: mov z21.d, #0 // =0x0 +; SVE-NEXT: mov z22.d, #0 // =0x0 +; SVE-NEXT: mov z23.d, #0 // =0x0 +; SVE-NEXT: mov z24.d, #0 // =0x0 +; SVE-NEXT: mov z25.d, #0 // =0x0 +; SVE-NEXT: mov z26.d, #0 // =0x0 +; SVE-NEXT: mov z27.d, #0 // =0x0 +; SVE-NEXT: mov z28.d, #0 // =0x0 +; SVE-NEXT: mov z29.d, #0 // =0x0 +; SVE-NEXT: mov z30.d, #0 // =0x0 +; SVE-NEXT: mov z31.d, #0 // =0x0 ; SVE-NEXT: pfalse p0.b ; SVE-NEXT: pfalse p1.b ; SVE-NEXT: pfalse p2.b From 9886046289fa63d8acf7e50cef59122d4b6ccb1f Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Mon, 16 May 2022 13:01:28 -0700 Subject: [PATCH 121/908] [CodeView] Combine variable def ranges that are continuous. It saves about 1.13% size for chrome.dll.pdb on chrome official build. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D125721 --- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 56 +++++++-------- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h | 70 +++++++++++++------ llvm/test/DebugInfo/COFF/pieces.ll | 6 +- 3 files changed, 77 insertions(+), 55 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index ece4167c791c04..94166922cbac72 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1242,9 +1242,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.emitCVLinetableDirective(FI.FuncId, Fn, FI.End); } -CodeViewDebug::LocalVarDefRange +CodeViewDebug::LocalVarDef CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) { - LocalVarDefRange DR; + LocalVarDef DR; DR.InMemory = -1; DR.DataOffset = Offset; assert(DR.DataOffset == Offset && "truncation"); @@ -1296,19 +1296,19 @@ void CodeViewDebug::collectVariableInfoFromMFTable( "Frame offsets with a scalable component are not supported"); // Calculate the label ranges. - LocalVarDefRange DefRange = + LocalVarDef DefRange = createDefRangeMem(CVReg, FrameOffset.getFixed() + ExprOffset); + LocalVariable Var; + Var.DIVar = VI.Var; + for (const InsnRange &Range : Scope->getRanges()) { const MCSymbol *Begin = getLabelBeforeInsn(Range.first); const MCSymbol *End = getLabelAfterInsn(Range.second); End = End ? End : Asm->getFunctionEnd(); - DefRange.Ranges.emplace_back(Begin, End); + Var.DefRanges[DefRange].emplace_back(Begin, End); } - LocalVariable Var; - Var.DIVar = VI.Var; - Var.DefRanges.emplace_back(std::move(DefRange)); if (Deref) Var.UseReferenceType = true; @@ -1367,24 +1367,18 @@ void CodeViewDebug::calculateRanges( // We can only handle a register or an offseted load of a register. if (Location->Register == 0 || Location->LoadChain.size() > 1) continue; - { - LocalVarDefRange DR; - DR.CVRegister = TRI->getCodeViewRegNum(Location->Register); - DR.InMemory = !Location->LoadChain.empty(); - DR.DataOffset = - !Location->LoadChain.empty() ? Location->LoadChain.back() : 0; - if (Location->FragmentInfo) { - DR.IsSubfield = true; - DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8; - } else { - DR.IsSubfield = false; - DR.StructOffset = 0; - } - if (Var.DefRanges.empty() || - Var.DefRanges.back().isDifferentLocation(DR)) { - Var.DefRanges.emplace_back(std::move(DR)); - } + LocalVarDef DR; + DR.CVRegister = TRI->getCodeViewRegNum(Location->Register); + DR.InMemory = !Location->LoadChain.empty(); + DR.DataOffset = + !Location->LoadChain.empty() ? Location->LoadChain.back() : 0; + if (Location->FragmentInfo) { + DR.IsSubfield = true; + DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8; + } else { + DR.IsSubfield = false; + DR.StructOffset = 0; } // Compute the label range. @@ -1401,7 +1395,7 @@ void CodeViewDebug::calculateRanges( // If the last range end is our begin, just extend the last range. // Otherwise make a new range. SmallVectorImpl> &R = - Var.DefRanges.back().Ranges; + Var.DefRanges[DR]; if (!R.empty() && R.back().second == Begin) R.back().second = End; else @@ -2814,7 +2808,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, // records and on disk formats are described in SymbolRecords.h. BytePrefix // should be big enough to hold all forms without memory allocation. SmallString<20> BytePrefix; - for (const LocalVarDefRange &DefRange : Var.DefRanges) { + for (const auto &Pair : Var.DefRanges) { + LocalVarDef DefRange = Pair.first; + const auto &Ranges = Pair.second; BytePrefix.clear(); if (DefRange.InMemory) { int Offset = DefRange.DataOffset; @@ -2838,7 +2834,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, : (EncFP == FI.EncodedLocalFramePtrReg))) { DefRangeFramePointerRelHeader DRHdr; DRHdr.Offset = Offset; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } else { uint16_t RegRelFlags = 0; if (DefRange.IsSubfield) { @@ -2850,7 +2846,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, DRHdr.Register = Reg; DRHdr.Flags = RegRelFlags; DRHdr.BasePointerOffset = Offset; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); @@ -2859,12 +2855,12 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; DRHdr.OffsetInParent = DefRange.StructOffset; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } else { DefRangeRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } } } diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index d1fc3cdccb207f..16f0082723ed0e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -50,18 +50,8 @@ class MachineFunction; /// Collects and handles line tables information in a CodeView format. class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { - MCStreamer &OS; - BumpPtrAllocator Allocator; - codeview::GlobalTypeTableBuilder TypeTable; - - /// Whether to emit type record hashes into .debug$H. - bool EmitDebugGlobalHashes = false; - - /// The codeview CPU type used by the translation unit. - codeview::CPUType TheCPU; - - /// Represents the most general definition range. - struct LocalVarDefRange { +public: + struct LocalVarDef { /// Indicates that variable data is stored in memory relative to the /// specified register. int InMemory : 1; @@ -79,23 +69,40 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// location containing the data. uint16_t CVRegister; - /// Compares all location fields. This includes all fields except the label - /// ranges. - bool isDifferentLocation(LocalVarDefRange &O) { - return InMemory != O.InMemory || DataOffset != O.DataOffset || - IsSubfield != O.IsSubfield || StructOffset != O.StructOffset || - CVRegister != O.CVRegister; + uint64_t static toOpaqueValue(const LocalVarDef DR) { + uint64_t Val = 0; + std::memcpy(&Val, &DR, sizeof(Val)); + return Val; } - SmallVector, 1> Ranges; + LocalVarDef static createFromOpaqueValue(uint64_t Val) { + LocalVarDef DR; + std::memcpy(&DR, &Val, sizeof(Val)); + return DR; + } }; - static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset); + static_assert(sizeof(uint64_t) == sizeof(LocalVarDef), ""); + +private: + MCStreamer &OS; + BumpPtrAllocator Allocator; + codeview::GlobalTypeTableBuilder TypeTable; + + /// Whether to emit type record hashes into .debug$H. + bool EmitDebugGlobalHashes = false; + + /// The codeview CPU type used by the translation unit. + codeview::CPUType TheCPU; + + static LocalVarDef createDefRangeMem(uint16_t CVRegister, int Offset); /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific. struct LocalVariable { const DILocalVariable *DIVar = nullptr; - SmallVector DefRanges; + MapVector, 1>> + DefRanges; bool UseReferenceType = false; }; @@ -493,6 +500,27 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void beginInstruction(const MachineInstr *MI) override; }; +template <> struct DenseMapInfo { + + static inline CodeViewDebug::LocalVarDef getEmptyKey() { + return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL); + } + + static inline CodeViewDebug::LocalVarDef getTombstoneKey() { + return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL - 1ULL); + } + + static unsigned getHashValue(const CodeViewDebug::LocalVarDef &DR) { + return CodeViewDebug::LocalVarDef::toOpaqueValue(DR) * 37ULL; + } + + static bool isEqual(const CodeViewDebug::LocalVarDef &LHS, + const CodeViewDebug::LocalVarDef &RHS) { + return CodeViewDebug::LocalVarDef::toOpaqueValue(LHS) == + CodeViewDebug::LocalVarDef::toOpaqueValue(RHS); + } +}; + } // end namespace llvm #endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H diff --git a/llvm/test/DebugInfo/COFF/pieces.ll b/llvm/test/DebugInfo/COFF/pieces.ll index 76c330a996b2f5..54e266c3d5975d 100644 --- a/llvm/test/DebugInfo/COFF/pieces.ll +++ b/llvm/test/DebugInfo/COFF/pieces.ll @@ -114,10 +114,8 @@ ; ASM: .asciz "loop_csr" # Function name ; ASM: .short 4414 # Record kind: S_LOCAL ; ASM: .asciz "o" -; ASM: .cv_def_range [[oy_ox_start]] [[ox_start]], subfield_reg, 24, 0 -; ASM: .cv_def_range [[oy_ox_start]] [[oy_start]], subfield_reg, 23, 4 -; ASM: .cv_def_range [[ox_start]] [[loopskip_start]], subfield_reg, 24, 0 -; ASM: .cv_def_range [[oy_start]] [[loopskip_start]], subfield_reg, 23, 4 +; ASM: .cv_def_range [[oy_ox_start]] [[loopskip_start]], subfield_reg, 24, 0 +; ASM: .cv_def_range [[oy_ox_start]] [[loopskip_start]], subfield_reg, 23, 4 ; OBJ-LABEL: GlobalProcIdSym { From 665bfbb98daa0e40a2e6cecfbbfd6949980d8f2f Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 20 May 2022 21:12:39 +0200 Subject: [PATCH 122/908] Reland "[clang-tidy] modernize-deprecated-headers check should respect extern "C" blocks"" This partially reverts commit e8cae487022c2216182ae1ec24f248f287a614b7. Changes since that commit: - Use `SourceManager::isBeforeInTranslationUnit` instead of the fancy decomposed decl logarithmic search. - Add a test for including a system header containing a deprecated include. - Add `REQUIRES: system-linux` clause to the test. Reviewed By: LegalizeAdulthood, whisperity Differential Revision: https://reviews.llvm.org/D125209 --- .../modernize/DeprecatedHeadersCheck.cpp | 113 ++++++++++++++---- .../modernize/DeprecatedHeadersCheck.h | 16 ++- clang-tools-extra/docs/ReleaseNotes.rst | 5 + .../modernize-deprecated-headers/mylib.h | 1 + .../mysystemlib.h | 1 + .../modernize-deprecated-headers-extern-c.cpp | 66 ++++++++++ 6 files changed, 180 insertions(+), 22 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/modernize-deprecated-headers/mylib.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/modernize-deprecated-headers/mysystemlib.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp index 0b44bdaafe23ed..ee78747d397ff0 100644 --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp @@ -7,23 +7,27 @@ //===----------------------------------------------------------------------===// #include "DeprecatedHeadersCheck.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" +#include #include +using IncludeMarker = + clang::tidy::modernize::DeprecatedHeadersCheck::IncludeMarker; namespace clang { namespace tidy { namespace modernize { - namespace { + class IncludeModernizePPCallbacks : public PPCallbacks { public: - explicit IncludeModernizePPCallbacks(ClangTidyCheck &Check, - LangOptions LangOpts); + explicit IncludeModernizePPCallbacks( + std::vector &IncludesToBeProcessed, LangOptions LangOpts); void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, @@ -33,22 +37,95 @@ class IncludeModernizePPCallbacks : public PPCallbacks { SrcMgr::CharacteristicKind FileType) override; private: - ClangTidyCheck &Check; + std::vector &IncludesToBeProcessed; LangOptions LangOpts; llvm::StringMap CStyledHeaderToCxx; llvm::StringSet<> DeleteHeaders; }; + +class ExternCRefutationVisitor + : public RecursiveASTVisitor { + std::vector &IncludesToBeProcessed; + const SourceManager &SM; + +public: + ExternCRefutationVisitor(std::vector &IncludesToBeProcessed, + SourceManager &SM) + : IncludesToBeProcessed(IncludesToBeProcessed), SM(SM) {} + bool shouldWalkTypesOfTypeLocs() const { return false; } + bool shouldVisitLambdaBody() const { return false; } + + bool VisitLinkageSpecDecl(LinkageSpecDecl *LinkSpecDecl) const { + if (LinkSpecDecl->getLanguage() != LinkageSpecDecl::lang_c || + !LinkSpecDecl->hasBraces()) + return true; + + auto ExternCBlockBegin = LinkSpecDecl->getBeginLoc(); + auto ExternCBlockEnd = LinkSpecDecl->getEndLoc(); + auto IsWrapped = [=, &SM = SM](const IncludeMarker &Marker) -> bool { + return SM.isBeforeInTranslationUnit(ExternCBlockBegin, Marker.DiagLoc) && + SM.isBeforeInTranslationUnit(Marker.DiagLoc, ExternCBlockEnd); + }; + + llvm::erase_if(IncludesToBeProcessed, IsWrapped); + return true; + } +}; } // namespace +DeprecatedHeadersCheck::DeprecatedHeadersCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void DeprecatedHeadersCheck::registerPPCallbacks( const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) { - PP->addPPCallbacks( - ::std::make_unique(*this, getLangOpts())); + PP->addPPCallbacks(std::make_unique( + IncludesToBeProcessed, getLangOpts())); +} +void DeprecatedHeadersCheck::registerMatchers( + ast_matchers::MatchFinder *Finder) { + // Even though the checker operates on a "preprocessor" level, we still need + // to act on a "TranslationUnit" to acquire the AST where we can walk each + // Decl and look for `extern "C"` blocks where we will suppress the report we + // collected during the preprocessing phase. + // The `onStartOfTranslationUnit()` won't suffice, since we need some handle + // to the `ASTContext`. + Finder->addMatcher(ast_matchers::translationUnitDecl().bind("TU"), this); +} + +void DeprecatedHeadersCheck::onEndOfTranslationUnit() { + IncludesToBeProcessed.clear(); +} + +void DeprecatedHeadersCheck::check( + const ast_matchers::MatchFinder::MatchResult &Result) { + SourceManager &SM = Result.Context->getSourceManager(); + + // Suppress includes wrapped by `extern "C" { ... }` blocks. + ExternCRefutationVisitor Visitor(IncludesToBeProcessed, SM); + Visitor.TraverseAST(*Result.Context); + + // Emit all the remaining reports. + for (const IncludeMarker &Marker : IncludesToBeProcessed) { + if (Marker.Replacement.empty()) { + diag(Marker.DiagLoc, + "including '%0' has no effect in C++; consider removing it") + << Marker.FileName + << FixItHint::CreateRemoval(Marker.ReplacementRange); + } else { + diag(Marker.DiagLoc, "inclusion of deprecated C++ header " + "'%0'; consider using '%1' instead") + << Marker.FileName << Marker.Replacement + << FixItHint::CreateReplacement( + Marker.ReplacementRange, + (llvm::Twine("<") + Marker.Replacement + ">").str()); + } + } } -IncludeModernizePPCallbacks::IncludeModernizePPCallbacks(ClangTidyCheck &Check, - LangOptions LangOpts) - : Check(Check), LangOpts(LangOpts) { +IncludeModernizePPCallbacks::IncludeModernizePPCallbacks( + std::vector &IncludesToBeProcessed, LangOptions LangOpts) + : IncludesToBeProcessed(IncludesToBeProcessed), LangOpts(LangOpts) { for (const auto &KeyValue : std::vector>( {{"assert.h", "cassert"}, @@ -101,19 +178,15 @@ void IncludeModernizePPCallbacks::InclusionDirective( // 1. Insert std prefix for every such symbol occurrence. // 2. Insert `using namespace std;` to the beginning of TU. // 3. Do nothing and let the user deal with the migration himself. + SourceLocation DiagLoc = FilenameRange.getBegin(); if (CStyledHeaderToCxx.count(FileName) != 0) { - std::string Replacement = - (llvm::Twine("<") + CStyledHeaderToCxx[FileName] + ">").str(); - Check.diag(FilenameRange.getBegin(), "inclusion of deprecated C++ header " - "'%0'; consider using '%1' instead") - << FileName << CStyledHeaderToCxx[FileName] - << FixItHint::CreateReplacement(FilenameRange.getAsRange(), - Replacement); + IncludesToBeProcessed.push_back( + IncludeMarker{CStyledHeaderToCxx[FileName], FileName, + FilenameRange.getAsRange(), DiagLoc}); } else if (DeleteHeaders.count(FileName) != 0) { - Check.diag(FilenameRange.getBegin(), - "including '%0' has no effect in C++; consider removing it") - << FileName << FixItHint::CreateRemoval( - SourceRange(HashLoc, FilenameRange.getEnd())); + IncludesToBeProcessed.push_back( + IncludeMarker{std::string{}, FileName, + SourceRange{HashLoc, FilenameRange.getEnd()}, DiagLoc}); } } diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h index 113b0ca182e0fe..2af626d42129bd 100644 --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h @@ -34,13 +34,25 @@ namespace modernize { /// http://clang.llvm.org/extra/clang-tidy/checks/modernize-deprecated-headers.html class DeprecatedHeadersCheck : public ClangTidyCheck { public: - DeprecatedHeadersCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} + DeprecatedHeadersCheck(StringRef Name, ClangTidyContext *Context); bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { return LangOpts.CPlusPlus; } void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void onEndOfTranslationUnit() override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + + struct IncludeMarker { + std::string Replacement; + StringRef FileName; + SourceRange ReplacementRange; + SourceLocation DiagLoc; + }; + +private: + std::vector IncludesToBeProcessed; }; } // namespace modernize diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 00124b0cad75dc..806e5d10a5fdc1 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -170,6 +170,11 @@ Changes in existing checks ` involving assignments in conditions. This fixes `Issue 35853 `_. +- Fixed a false positive in :doc:`modernize-deprecated-headers + ` involving including + C header files from C++ files wrapped by ``extern "C" { ... }`` blocks. + Such includes will be ignored by now. + - Improved :doc:`performance-inefficient-vector-operation ` to work when the vector is a member of a structure. diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/modernize-deprecated-headers/mylib.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/modernize-deprecated-headers/mylib.h new file mode 100644 index 00000000000000..f5711e7692e70b --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/modernize-deprecated-headers/mylib.h @@ -0,0 +1 @@ +#include diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/modernize-deprecated-headers/mysystemlib.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/modernize-deprecated-headers/mysystemlib.h new file mode 100644 index 00000000000000..f5711e7692e70b --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/modernize-deprecated-headers/mysystemlib.h @@ -0,0 +1 @@ +#include diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp new file mode 100644 index 00000000000000..458c329de7743f --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp @@ -0,0 +1,66 @@ + +// Copy the 'mylib.h' to a directory under the build directory. This is +// required, since the relative order of the emitted diagnostics depends on the +// absolute file paths which is sorted by clang-tidy prior emitting. +// +// RUN: mkdir -p %t/sys && mkdir -p %t/usr \ +// RUN: && cp %S/Inputs/modernize-deprecated-headers/mysystemlib.h %t/sys/mysystemlib.h \ +// RUN: && cp %S/Inputs/modernize-deprecated-headers/mylib.h %t/usr/mylib.h + +// RUN: %check_clang_tidy -std=c++11 %s modernize-deprecated-headers %t \ +// RUN: --header-filter='.*' --system-headers \ +// RUN: -- -I %t/usr -isystem %t/sys -isystem %S/Inputs/modernize-deprecated-headers + +// REQUIRES: system-linux + +#define EXTERN_C extern "C" + +extern "C++" { +// We should still have the warnings here. +#include +// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: including 'stdbool.h' has no effect in C++; consider removing it [modernize-deprecated-headers] +} + +#include +// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] + +#include +// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: including 'stdbool.h' has no effect in C++; consider removing it [modernize-deprecated-headers] + +#include // FIXME: We should have no warning into system headers. +// CHECK-MESSAGES: mysystemlib.h:1:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] + +#include +// CHECK-MESSAGES: mylib.h:1:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] + +namespace wrapping { +extern "C" { +#include // no-warning +#include // no-warning +#include // no-warning +} +} // namespace wrapping + +extern "C" { +namespace wrapped { +#include // no-warning +#include // no-warning +#include // no-warning +} // namespace wrapped +} + +namespace wrapping { +extern "C" { +namespace wrapped { +#include // no-warning +#include // no-warning +#include // no-warning +} // namespace wrapped +} +} // namespace wrapping + +EXTERN_C { +#include // no-warning +#include // no-warning +#include // no-warning +} From 20ec4161d7c9e146c844fadf5af2a44afb9e7a60 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 20 May 2022 15:37:55 -0400 Subject: [PATCH 123/908] [Libomptarget] Add branch prediction intrinsic to state check Summary: We usually used the `OMP_LIKELY` and `OMP_UNLIKELY` macros to add branch prediction intrinsics to help the optimizer ignore unlikely loops. This wasn't applied to this one loop so add that in. --- openmp/libomptarget/DeviceRTL/src/State.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp index f299aa0a4a27f1..c8910635278ff2 100644 --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -289,7 +289,7 @@ uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) { TeamState.ICVState.LevelVar == 0)) return TeamState.ICVState.*Var; uint32_t TId = mapping::getThreadIdInBlock(); - if (!ThreadStates[TId]) { + if (OMP_UNLIKELY(!ThreadStates[TId])) { ThreadStates[TId] = reinterpret_cast(memory::allocGlobal( sizeof(ThreadStateTy), "ICV modification outside data environment")); ASSERT(ThreadStates[TId] != nullptr && "Nullptr returned by malloc!"); From 0606467ea122da5cb23a588e1222a4140445734a Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 20 May 2022 21:41:25 +0200 Subject: [PATCH 124/908] [clang-tidy] Introduce the WarnIntoHeaders option to modernize-deprecated-headers Unfortunately, we must restrict the checker to warn for deprecated headers only if the header is included directly from a c++ source file. For header files, we cannot know if the project has a C source file that also directly/indirectly includes the offending header file otherwise. Thus, it's better to be on the safe side and suppress those reports. One can opt-in the old behavior, emitting diagnostics into header files, if one explicitly sets the WarnIntoHeaders=true, in which case nothing will be changed. Reviewed By: LegalizeAdulthood Differential Revision: https://reviews.llvm.org/D125769 --- .../modernize/DeprecatedHeadersCheck.cpp | 27 ++++++++++++--- .../modernize/DeprecatedHeadersCheck.h | 2 ++ clang-tools-extra/docs/ReleaseNotes.rst | 4 +++ .../checks/modernize-deprecated-headers.rst | 34 +++++++++++++++++++ .../modernize-deprecated-headers-extern-c.cpp | 17 +++++++--- 5 files changed, 74 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp index ee78747d397ff0..72508a99e4a1a3 100644 --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp @@ -27,7 +27,8 @@ namespace { class IncludeModernizePPCallbacks : public PPCallbacks { public: explicit IncludeModernizePPCallbacks( - std::vector &IncludesToBeProcessed, LangOptions LangOpts); + std::vector &IncludesToBeProcessed, LangOptions LangOpts, + const SourceManager &SM, bool CheckHeaderFile); void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, @@ -41,6 +42,8 @@ class IncludeModernizePPCallbacks : public PPCallbacks { LangOptions LangOpts; llvm::StringMap CStyledHeaderToCxx; llvm::StringSet<> DeleteHeaders; + const SourceManager &SM; + bool CheckHeaderFile; }; class ExternCRefutationVisitor @@ -75,12 +78,18 @@ class ExternCRefutationVisitor DeprecatedHeadersCheck::DeprecatedHeadersCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} + : ClangTidyCheck(Name, Context), + CheckHeaderFile(Options.get("CheckHeaderFile", false)) {} + +void DeprecatedHeadersCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "CheckHeaderFile", CheckHeaderFile); +} void DeprecatedHeadersCheck::registerPPCallbacks( const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) { PP->addPPCallbacks(std::make_unique( - IncludesToBeProcessed, getLangOpts())); + IncludesToBeProcessed, getLangOpts(), PP->getSourceManager(), + CheckHeaderFile)); } void DeprecatedHeadersCheck::registerMatchers( ast_matchers::MatchFinder *Finder) { @@ -124,8 +133,10 @@ void DeprecatedHeadersCheck::check( } IncludeModernizePPCallbacks::IncludeModernizePPCallbacks( - std::vector &IncludesToBeProcessed, LangOptions LangOpts) - : IncludesToBeProcessed(IncludesToBeProcessed), LangOpts(LangOpts) { + std::vector &IncludesToBeProcessed, LangOptions LangOpts, + const SourceManager &SM, bool CheckHeaderFile) + : IncludesToBeProcessed(IncludesToBeProcessed), LangOpts(LangOpts), SM(SM), + CheckHeaderFile(CheckHeaderFile) { for (const auto &KeyValue : std::vector>( {{"assert.h", "cassert"}, @@ -171,6 +182,12 @@ void IncludeModernizePPCallbacks::InclusionDirective( bool IsAngled, CharSourceRange FilenameRange, Optional File, StringRef SearchPath, StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) { + + // If we don't want to warn for non-main file reports and this is one, skip + // it. + if (!CheckHeaderFile && !SM.isInMainFile(HashLoc)) + return; + // FIXME: Take care of library symbols from the global namespace. // // Reasonable options for the check: diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h index 2af626d42129bd..e5023c3f47ed5e 100644 --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h @@ -38,6 +38,7 @@ class DeprecatedHeadersCheck : public ClangTidyCheck { bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { return LangOpts.CPlusPlus; } + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) override; void registerMatchers(ast_matchers::MatchFinder *Finder) override; @@ -53,6 +54,7 @@ class DeprecatedHeadersCheck : public ClangTidyCheck { private: std::vector IncludesToBeProcessed; + bool CheckHeaderFile; }; } // namespace modernize diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 806e5d10a5fdc1..41a9723ffeac8b 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -174,6 +174,10 @@ Changes in existing checks ` involving including C header files from C++ files wrapped by ``extern "C" { ... }`` blocks. Such includes will be ignored by now. + By default now it doesn't warn for including deprecated headers from header + files, since that header file might be used from C source files. By passing + the ``CheckHeaderFile=true`` option if header files of the project only + included by C++ source files. - Improved :doc:`performance-inefficient-vector-operation ` to work when diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize-deprecated-headers.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize-deprecated-headers.rst index 9cdd1d44f6012d..974a56abd97dd2 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize-deprecated-headers.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize-deprecated-headers.rst @@ -10,6 +10,17 @@ Standard [depr.c.headers] section. This check replaces C standard library headers with their C++ alternatives and removes redundant ones. +.. code-block:: c++ + + // C++ source file... + #include + #include + + // becomes + + #include + // No 'stdbool.h' here. + Important note: the Standard doesn't guarantee that the C++ headers declare all the same functions in the global namespace. The check in its current form can break the code that uses library symbols from the global namespace. @@ -47,3 +58,26 @@ These headers don't have effect in C++: * `` * `` * `` + +The checker ignores `include` directives within `extern "C" { ... }` blocks, +since a library might want to expose some API for C and C++ libraries. + +.. code-block:: c++ + + // C++ source file... + extern "C" { + #include // Left intact. + #include // Left intact. + } + +Options +------- + +.. option:: CheckHeaderFile + + `clang-tidy` cannot know if the header file included by the currently + analyzed C++ source file is not included by any other C source files. + Hence, to omit false-positives and wrong fixit-hints, we ignore emitting + reports into header files. One can set this option to `true` if they know + that the header files in the project are only used by C++ source file. + Default is `false`. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp index 458c329de7743f..98d66ce00ceb26 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp @@ -8,6 +8,13 @@ // RUN: && cp %S/Inputs/modernize-deprecated-headers/mylib.h %t/usr/mylib.h // RUN: %check_clang_tidy -std=c++11 %s modernize-deprecated-headers %t \ +// RUN: -check-suffixes=DEFAULT \ +// RUN: --header-filter='.*' --system-headers \ +// RUN: -- -I %t/usr -isystem %t/sys -isystem %S/Inputs/modernize-deprecated-headers + +// RUN: %check_clang_tidy -std=c++11 %s modernize-deprecated-headers %t \ +// RUN: -check-suffixes=DEFAULT,CHECK-HEADER-FILE \ +// RUN: -config="{CheckOptions: [{key: modernize-deprecated-headers.CheckHeaderFile, value: 'true'}]}" \ // RUN: --header-filter='.*' --system-headers \ // RUN: -- -I %t/usr -isystem %t/sys -isystem %S/Inputs/modernize-deprecated-headers @@ -18,20 +25,20 @@ extern "C++" { // We should still have the warnings here. #include -// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: including 'stdbool.h' has no effect in C++; consider removing it [modernize-deprecated-headers] +// CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:10: warning: including 'stdbool.h' has no effect in C++; consider removing it [modernize-deprecated-headers] } #include -// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] +// CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] #include -// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: including 'stdbool.h' has no effect in C++; consider removing it [modernize-deprecated-headers] +// CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:10: warning: including 'stdbool.h' has no effect in C++; consider removing it [modernize-deprecated-headers] #include // FIXME: We should have no warning into system headers. -// CHECK-MESSAGES: mysystemlib.h:1:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] +// CHECK-MESSAGES-CHECK-HEADER-FILE: mysystemlib.h:1:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] #include -// CHECK-MESSAGES: mylib.h:1:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] +// CHECK-MESSAGES-CHECK-HEADER-FILE: mylib.h:1:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] namespace wrapping { extern "C" { From 6fa82e344c291cf0e1134b357b1f09fd2dcd3f4f Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 20 May 2022 21:41:25 +0200 Subject: [PATCH 125/908] [clang-tidy] modernize-deprecated-headers should ignore system headers The end-user has no way of 'fixing' bugs in the system library anyway. Let's suppress these as well. Reviewed By: LegalizeAdulthood Differential Revision: https://reviews.llvm.org/D125770 --- .../clang-tidy/modernize/DeprecatedHeadersCheck.cpp | 4 ++++ .../checkers/modernize-deprecated-headers-extern-c.cpp | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp index 72508a99e4a1a3..64e70ad746c59a 100644 --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.cpp @@ -188,6 +188,10 @@ void IncludeModernizePPCallbacks::InclusionDirective( if (!CheckHeaderFile && !SM.isInMainFile(HashLoc)) return; + // Ignore system headers. + if (SM.isInSystemHeader(HashLoc)) + return; + // FIXME: Take care of library symbols from the global namespace. // // Reasonable options for the check: diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp index 98d66ce00ceb26..1193c2ad0a9965 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-deprecated-headers-extern-c.cpp @@ -34,8 +34,7 @@ extern "C++" { #include // CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:10: warning: including 'stdbool.h' has no effect in C++; consider removing it [modernize-deprecated-headers] -#include // FIXME: We should have no warning into system headers. -// CHECK-MESSAGES-CHECK-HEADER-FILE: mysystemlib.h:1:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] +#include // no-warning: Don't warn into system headers. #include // CHECK-MESSAGES-CHECK-HEADER-FILE: mylib.h:1:10: warning: inclusion of deprecated C++ header 'assert.h'; consider using 'cassert' instead [modernize-deprecated-headers] From b517d679dd69a30ed84e9782a2e902cf4284ebc7 Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Fri, 20 May 2022 13:06:44 -0700 Subject: [PATCH 126/908] Mark new TSan test as unsupported on PPC. Notably fails under PPC. For now, just exclude it. More details in the original Phabricator review, https://reviews.llvm.org/D110552. --- compiler-rt/test/tsan/lock_free_stack.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler-rt/test/tsan/lock_free_stack.cpp b/compiler-rt/test/tsan/lock_free_stack.cpp index 5477adcfdf9767..3ae6ced95b800e 100755 --- a/compiler-rt/test/tsan/lock_free_stack.cpp +++ b/compiler-rt/test/tsan/lock_free_stack.cpp @@ -1,5 +1,11 @@ // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s // RUN: %clangxx_tsan -O1 %s -DRACE -o %t && %deflake %run %t | FileCheck %s --check-prefix=CHECK-RACE + +// Found in the post-submit testing under PPC (documented in +// https://reviews.llvm.org/D110552), this test fails under PowerPC. Should be +// investigated at some point. +// UNSUPPORTED: ppc + #include "test.h" const int kThreadCount = 4; From ade5b55af5747413dab8dd57896532fdee19cdf5 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Fri, 20 May 2022 16:03:20 -0400 Subject: [PATCH 127/908] Add a page to track C defect report status We currently have a page for tracking defects against the C++ standard, but we don't have the same information for the C standard. This starts us down the path of being able to track that in a way our users can see. There are *a lot* of entries marked as "Unknown". As we validate Clang's behavior for a given DR by adding a test case for it, we can slowly begin to improve this page over time. This page is now linked from the existing C status page, which was updated slightly as a result of these changes as well. Note, unlike with the C++ defect report page, this content is not auto- generated from a source document and is not automatically updated from test comments. It may be worthwhile to automate the updates based on our test coverage, but that can happen later. --- clang/www/c_dr_status.html | 2620 ++++++++++++++++++++++++++++++++++++ clang/www/c_status.html | 8 +- 2 files changed, 2624 insertions(+), 4 deletions(-) create mode 100644 clang/www/c_dr_status.html diff --git a/clang/www/c_dr_status.html b/clang/www/c_dr_status.html new file mode 100644 index 00000000000000..b92cf60604955f --- /dev/null +++ b/clang/www/c_dr_status.html @@ -0,0 +1,2620 @@ + + + + + Clang - C Defect Report Status + + + + + + + + +

+ + +

C Defect Report Support in Clang

+ + +

C defect report implementation status

+ +

This page tracks which C defect reports are implemented within Clang.

+ +

The implementation status for defect reports against the C Standard are +currently under investigation. Any defect report whose status in Clang is +currently unknown will be marked in magenta.

+ +

The LLVM bug tracker uses +the "c", "c99", "c11", "c17", and "c2x" labels to track known bugs with Clang's language +conformance.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NumberStatusIssue titleAvailable in Clang?
1C89Do functions return values by copying?Unknown
2NADSubclause 6.8.3.2: Semantics of #Unknown
3NADSubclause 6.1.8: Preprocessing numbersUnknown
4NADAre multiple definitions of unused identifiers with external linkage permitted?Unknown
5C89May a conforming implementation define and recognize a pragma which would change the semantics of the language?Unknown
7C89It is unclear how the strtoul function behaves when presented with a subject sequence that begins with a minus signN/A
7NADAre declarations of the form struct-or-union identifier ; permitted after the identifier tag has already been declared?Unknown
8NADCan a conforming C compiler to perform dead-store elimination?Unknown
9C89Use of typedef names in parameter declarationsUnknown
10NADIs a typedef to an incomplete type legal?Unknown
11C89Merging of declarations for linked identifierUnknown
12NADQuestions on the validity of various expressionsUnknown
13C89Compatible and composite function typesUnknown
14C89Issues with setjmp and fscanf descriptionsN/A
15NADWhat is the promoted type of a plain int bit-field?Unknown
16C89What does static storage duration do when zero for the type is not all zero bits?Unknown
17C8939 unrelated questions about C89Unknown
18NADHow does fscanf behave in the presence of multibyte characters?N/A
19NADDefinition of the term "printing character" and isgraph()N/A
20NADIs a compiler which allows the Relaxed Ref/Def linkage model to be considered a conforming compiler?Unknown
21C89What is the result of: printf("%#.4o", 345);?N/A
22C89What is the result of: strtod("100ergs", &ptr);?N/A
23NADwhat is the result of strtod("0.0e99999", &ptr);?N/A
24NADIn subclause 7.10.1.4 The strtod function: What does '"C" locale' mean?N/A
25NADWhat is meant by 'representable floating-point value?'Unknown
26NADCan a strictly conforming program contain a string literal with '$' or '@'?Unknown
27C89Can there be characters in the character set that are not in the required source character set?Unknown
28NADDo object access rules apply to dynamically allocated objects?Unknown
29NADDo two types have to have the same tag to be compatible?Unknown
30NADCan 'sin(DBL_MAX)' result in 'errno' being set to 'EDOM'?N/A
31NADCan constant expressions overflow?Unknown
32NADMust implementations diagnose extensions to the constant evaluation rules?Unknown
33NADConformance questions around 'shall' violations outside of constraints sectionsUnknown
34C89External declarations in different scopesUnknown
35NADQuestions about definition of functions without a prototypeUnknown
36NADMay floating-point constants be represented with more precision than implied by its type?Unknown
37NADQuestions about multibyte characters and UnicodeUnknown
38NADQuestions about argument substitution during macro expansionUnknown
39NADQuestions about the "C" localeUnknown
40NAD9 unrelated questions about C89Unknown
41NADDo characters defined in 5.2.1 impact 7.3.1?N/A
42NADOn the behavior of library functions and overlapping objectsN/A
43C89On the definition of the NULL macroUnknown
44NADOn the result of the offsetof macroUnknown
45NADIs the behavior of freopen defined when the file is invalid?N/A
46NADUse of typedef names in parameter declarationsUnknown
47NADQuestions about declaration conformanceUnknown
48NADClarifications on the abort() functionN/A
49C89Can strxfrm() output more characters than were input?N/A
50NADDo wide string literals implicitly include <stddef.h>?Unknown
51NADQuestion on pointer arithmeticUnknown
52C89Editorial correctionsUnknown
53C89Accessing a pointer to a function with a prototype through a pointer to pointer to function without a prototypeUnknown
54C89Can the string handling functions have a length of 0?N/A
55C89Signal handler macros should have distinct valuesN/A
56NADFloating-point representation precision requirementsUnknown
57NADIs there an integral type for every pointer?Unknown
58NADIs there a limit on the number of digits processed by scanf and strtdo?N/A
59NADDo types have to be completed?Unknown
60C89Array initialization from a string literalUnknown
61NADWhitespace in scanf format stringN/A
62NADCan the rename function be defined to fail?N/A
63DupFloating-point representation precision requirementsDuplicate of 56
64NADNull pointer constantsUnknown
65C89Questions on localesN/A
66NADAnother question on localesUnknown
67NADInteger and integral type confusionUnknown
68NAD'char' and signed vs unsigned integer typesUnknown
69NADQuestions about the representation of integer typesUnknown
70NADInterchangeability of function argumentsUnknown
71C89Enumerated typesUnknown
72NADDefinition of object and pointer arithmeticUnknown
73NADDefinition of object and array accessUnknown
74NADAlignment and structure paddingUnknown
75NADAlignment of allocated memoryN/A
76OpenPointers to the end of arraysNot resolved
77NADStability of addressesUnknown
78NADUniqueness of addressesUnknown
79NADConstancy of system library function addressesN/A
80C89Merging of string constantsUnknown
81NADLeft shift operatorUnknown
82C89Multiple varargsUnknown
83C89Use of library functionsN/A
84NADIncomplete type in function declarationUnknown
85C89Returning from mainUnknown
86NADObject-like macros in system headersUnknown
87NADOrder of evaluationUnknown
88NADCompatibility of incomplete typesUnknown
89C89Multiple definitions of macrosUnknown
90NADMultibyte characters in formatsN/A
91NADMultibyte encodingsUnknown
92DupPartial initialization of stringsDuplicate of 60
93C89Reservation of identifiersUnknown
94NADANSI/ISO C Defect report #rfg1Unknown
95NADANSI/ISO C Defect report #rfg2Unknown
96NADANSI/ISO C Defect report #rfg3Unknown
97DupANSI/ISO C Defect report #rfg4Duplicate of 40
98NADANSI/ISO C Defect report #rfg5Unknown
99NADANSI/ISO C Defect report #rfg6Unknown
100DupANSI/ISO C Defect report #rfg7Duplicate of 1
101C89ANSI/ISO C Defect report #rfg8Unknown
102NADANSI/ISO C Defect report #rfg9Unknown
103NADANSI/ISO C Defect report #rfg10Unknown
104DupANSI/ISO C Defect report #rfg11Duplicate of 84
105DupANSI/ISO C Defect report #rfg12Duplicate of 17
106NADANSI/ISO C Defect report #rfg13Unknown
107NADANSI/ISO C Defect report #rfg14N/A
108NADANSI/ISO C Defect report #rfg15Unknown
109NADANSI/ISO C Defect report #rfg16Unknown
110DupFormal parameters having array-of-non-object typesDuplicate of 47
111NADConversion of pointer-to-qualified type values to type (void*) valuesUnknown
112NADNull pointer constants and relational comparisonsUnknown
113NADReturn expressions in functions declared to return qualified voidUnknown
114NADInitialization of multi-dimensional char array objectsUnknown
115NADMember declarators as declaratorsUnknown
116NADImplicit unary & applied to register arraysUnknown
117NADAbstract semantics, sequence points, and expression evaluationUnknown
118C89Completion point for enumerated typesUnknown
119NADInitialization of multi-dimensional array objectsUnknown
120NADSemantics of assignment to (and initialization of) bit-fieldsUnknown
121NADConversions of pointer values to integral typesUnknown
122DupConversion/widening of bit-fieldsDuplicate of 15
123NAD'Type categories' and qualified typesUnknown
124C89Casts to 'a void type' versus casts to 'the void type'Unknown
125NADUsing things declared as 'extern (qualified) void'Unknown
126NADWhat does 'synonym' mean with respect to typedef names?Unknown
127DupComposite type of an enumerated type and an integral typeDuplicate of 13
128NADEditorial issue relating to tag declarations in type specifiersUnknown
129NADTags and name spacesUnknown
130NADGuarantees when writing text to a streamN/A
131C89const member qualification and assignmentUnknown
132DupCan undefined behavior occur at translation time, or only at run time?Duplicate of 109
133NADUndefined behavior not previously listed in subclause G2Unknown
134NADWhat is an 'error number' for strerror?N/A
135NADCan the size argument to 'fwrite' be zero?N/A
136NAD'mktime' and time gapsN/A
137NAD'printf' and negative floating point valuesN/A
138C89Is there an allocated storage duration?Unknown
139C89Compatibility of complete and incomplete typesUnknown
140NADBehavior of 'setvbuf'N/A
141NADWhat is the meaning of EOF?N/A
142C89Reservation of macro namesUnknown
143C89'fopen' modesN/A
144C89Preprocessing of preprocessing directivesUnknown
145C89Constant expressionsUnknown
146C89Nugatory constraintUnknown
147C89Sequence points in library functionsUnknown
148NADDefining library functionsUnknown
149C89The term "variable"Unknown
150C89Initialization of a char array from a string literalUnknown
151C89Behavior of 'printf' and flagsN/A
152NADCan you 'longjmp' out of a signal handler?N/A
153DupCan 'f()' be considered a call to a function-like macro with one empty argument?Duplicate of 3
154NADConsistency of implementation-defined valuesUnknown
155C89Zero-sized allocationsN/A
156C89Closed streamsN/A
157C89Legitimacy of type synonymsUnknown
158C89Null pointer conversionsUnknown
159C89Consistency of the C Standard Defects exist in the way the Standard refers to itselfUnknown
160C89Reservation of identifiersUnknown
161NADDetails of reserved symbolsUnknown
162C89'gmtime' and 'localtime'N/A
163C89Undeclared identifiersUnknown
164NADBad declarationsUnknown
165C89Tags and incomplete typesUnknown
166OpenMeaning of lvalueNot resolved
167OpenConsistency of the C Standard (Defect Report UK 015)Not resolved
168OpenConsistency of the C Standard (Defect Report UK 016)Not resolved
169C89TrigraphsUnknown
170C89Operators and punctuatorsUnknown
171OpenRanges of integral typesNot resolved
172OpenRelational and equality operatorsNot resolved
173OpenLine numbersNot resolved
174OpenImplicit conversionsNot resolved
175OpenCorrection to Technical Corrigendum 1Not resolved
176OpenDiagnostics for #errorNot resolved
177OpenPreprocessing directivesNot resolved
178OpenConformance with array members and allocationsNot resolved
201NADInteger types longer than longUnknown
202C99Change return type of certain <fenv.h> functionsN/A
203C99C locale conflict with ISO/IEC 9945-2N/A
204C99size_t and ptrdiff_t as a long long typeUnknown
205NADNew keyword __at_leastYes
206NADDefault argument conversion of float _ComplexUnknown
207C99Handling of imaginary typesUnknown
208C99Ambiguity in initializationUnknown
209C99Problem implementing INTN_C macrosN/A
210C99'fprintf' %a and %A conversions recommended practiceN/A
211C99Accuracy of decimal string to/from "binary" (non-decimal) floating-point conversionsUnknown
212NADBinding of multibyte conversion state objectsN/A
213C99Lacuna in 'mbrtowc'N/A
214NAD'atexit' function registrationN/A
215C99Equality operatorsUnknown
216C99Source character encodingsUnknown
217NAD'asctime' limitsN/A
218C99Signs of non-numeric floating point valuesUnknown
219NADEffective typesUnknown
220C99Definition of "decimal integer"N/A
221NADLacuna in pointer arithmeticUnknown
222C99Partially initialized structuresUnknown
223C99'FP_FAST_FMAF' and 'FP_FAST_FMAL' should be integer constantN/A
224C99fpclassify return is not definedN/A
225C99strtod, strtof and strtold expected form of the subject sequenceN/A
226NADstrftime referencesYes
227NADstrftime %U, %V, and %W conversion specifiersN/A
228C99wmemcmp declaration in Annex BN/A
229C99localeconv() *_sep_by_space table entries issuesN/A
230C99Enumerated type rankUnknown
231NADSemantics of text-line and non-directiveUnknown
232C99Typo in Annex IYes
233C99%g, %G precision specificationN/A
234C99Miscellaneous TyposYes
235C99"C" locale collating behaviour not definedN/A
236NADThe interpretation of type based aliasing rule when applied to union objects or allocated objectsUnknown
237NADDeclarations using [static]Unknown
238C99Decriptions of fma() overflow and underflow errors are missingN/A
239C99Annex F nexttoward description is inconsistent with 7.12.11.4 and F.9.8.3N/A
240C99lrint, llrint, lround, llround, and ilogb descriptions are not consistent for unrepresentable resultsN/A
241C99Make the base standard and Annex F consistent for pow(0, <0)N/A
242C99N/A
243C99Make the base standard and Annex F consistent for fmod(), remainder(), and remquo() for a zero divisorN/A
244C99tgamma(zero or negative integer) should be considered a pole errorN/A
245C99Missing paragraph numbersYes
246NADCompletion of declaratorsUnknown
247C99Are values a form of behaviour?Unknown
248C99limits are required for optional typesUnknown
249C99Lacuna applying C89:TC1 to C99Unknown
250C99Non-directives within macro argumentsUnknown
251C99Are struct fred and union fred the same type?Unknown
252NADIncomplete argument types when calling non-prototyped functionsUnknown
253NAD"overriding" in designated initializersUnknown
254NADmbtowc and partial charactersN/A
255NADNon-prototyped function calls and argument mismatchesUnknown
256NADMultiple inclusion of headersN/A
257NADCommon initial sequences and related issues with unionsUnknown
258NADOrdering of "defined" and macro replacementUnknown
259NADMacro invocations with no argumentsUnknown
260NADIndeterminate values and identical representationsUnknown
261NADConstant expressionsUnknown
262C99Maximum size of bit fieldsUnknown
263C99All-zero bits representationsUnknown
264NADGraphic charactersN/A
265C99Preprocessor arithmeticUnknown
266NADOverflow of sizeofUnknown
267C99Typos in 5.1.2.3, 7.24.4.4.5, 7.24.6.1, 7.24.6.1N/A
268C99Jumps into iteration statementsUnknown
269C99Lacunae in exact-width integer typesN/A
270C99wint_t is not the promoted version of wchar_tN/A
271NADLacuna in iswctype and towctransN/A
272C99Type categoryUnknown
273C99Meaning of __STDC_ISO_10646__Unknown
274C99Meaning of "character" in <string.h> functionsN/A
275C99Bitwise-OR of nothingN/A
276C99Orientation of perrorN/A
277NADDeclarations within iteration statementsUnknown
278C99Lacuna in character encodingsUnknown
279C99Wide character code values for members of the basic character setUnknown
280NADstruct tm, member tm_isdst, and mktime() in <time.h>N/A
281C99CLOCKS_PER_SEC should not be a constant expressionN/A
282C99Flexible array members & struct paddingUnknown
283C99Accessing a non-current union member ("type punning")Unknown
284NADDoes <math.h> define INT_MIN and INT_MAX?N/A
285C99Conversion of an imaginary type to _BoolUnknown
286C99Correctly rounded and rounding direction/modeN/A
287DupFloating-point status flags and sequence pointsDuplicate of 87
288NADDeficiency on multibyte conversionsN/A
289C99Function prototype with [restrict]Unknown
290C99FLT_EVAL_METHOD and extra precision and/or rangeUnknown
291C99Corrections to requirements on inexact floating-point exceptionsUnknown
292C99Use of the word variableUnknown
293C99Typo in Standard - double complex instead of complex in an exampleYes
294NADTechnical question on C99 restrict keywordUnknown
295C99Incomplete types for function parametersUnknown
296C99Is exp(INFINITY) overflow? A range error? A divide-by-zero exception? INFINITY without any errors?N/A
297C99May FE_* floating-point exception flags have bits in common?N/A
298C99Validity of constant in unsigned long long rangeUnknown
299C99Is cabs() a type-generic macro?N/A
300NADTranslation-time expresssion evaluationUnknown
301NADMeaning of FE_* macros in <fenv.h>Unknown
302C996.10.2p5: Adding underscore to portable include file name character setUnknown
303C996.10p2: Breaking up the very long sentence describing preprocessing directiveUnknown
304C99Clarifying illegal tokens in #if directivesUnknown
305C996.10.1p3: Clarifying handling of keywords in #if directivesUnknown
306C996.10.3p9: Clarifying that rescanning applies to object-like macrosUnknown
307C996.10.3p10: Clarifiying arguments vs. parametersUnknown
308C99Clarify that source files et al. need not be "files"Yes
309C99Clarifying trigraph substitutionUnknown
310C99Add non-corner case example of trigraphsYes
311C99Definition of variably modified typesUnknown
312C99Meaning of "known constant size"Yes
313NADIncomplete arrays of VLAsUnknown
314NADCross-translation-unit tagged type compatibilityUnknown
315C99Implementation-defined bit-field typesUnknown
316NADUnprototyped function typesUnknown
317NADFunction definitions with empty parenthesesUnknown
318C99(double)0.1f with FLT_EVAL_METHOD being 2Unknown
319NADprintf("%a", 1.0) and trailing zerosN/A
320C99Scope of variably modified typeUnknown
321C99Wide character code values for members of the basic character setUnknown
322C99Problem with TC2 Change #67 (Add perror to the list defining byte input/output functions)N/A
323C99Potential problems with TC2 #34, #35, and #36N/A
324C99Tokenization obscuritiesUnknown
325NADstrerror()N/A
326C99asctime()N/A
327C99Italicize definition of variable length array type, add forward referencesYes
328C99String literals in compound literal initializationUnknown
329C99Math functions and directed roundingN/A
330C99Externally visible exceptional conditionsN/A
331NADpermit FE_DIVBYZERO when errno says EDOMN/A
332C99gets is generally unsafeN/A
333C99Missing Predefined Macro NameUnknown
334OpenMissing semantics of comparison macrosNot resolved
335NAD_Bool bit-fieldsUnknown
336C99What does TMP_MAX actually indicate?N/A
337C99stdio.h macro definition problemsN/A
338C11C99 seems to exclude indeterminate value from being an uninitialized registerUnknown
339DupVariably modified compound literalDuplicate of 328
340C99Composite types for variable-length arraysUnknown
341C99[*] in abstract declaratorsUnknown
342DupVLAs and conditional expressionsDuplicate of 340
343C99Initializing qualified wchar_t arraysUnknown
344C99Casts in preprocessor conditional expressionsUnknown
345C11Where does parameter scope start?Unknown
400C11realloc with size zero problemsUnknown
401C11"happens before" can not be cyclicUnknown
402C11Memory model coherence is not aligned with C++11Unknown
403C11malloc() and free() in the memory modelN/A
404C11Joke fragment remains in a footnoteYes
405C11The mutex specificationN/A
406C11Visible sequences of side effects are redundantUnknown
407C11Memory ordering of atomicsUnknown
408NADShould locks provide intra-thread synchronizationN/A
409C11f(inf) is inf being a range errorN/A
410C11ilogb inconsistent with lrint, lroundN/A
411C11#elifUnknown
412C11#elifUnknown
413NADInitializationUnknown
414C11Typos in 6.27 Threads <threads.h>N/A
415C11Missing divide by zero entry in Annex JYes
416C11tss_t destruction unspecifiedN/A
417C11Annex J not updated with necessary aligned_alloc entriesYes
418NADPossible defect report: fmod(0.,NaN) and fmod(NaN,infinity)N/A
419C11Generic FunctionsUnknown
420NADSytax error in specification of for-statementYes
421NADInitialization of atomic_flagN/A
422NADInitialization of atomic typesN/A
423C11Defect Report relative to n1570: underspecification for qualified rvaluesClang 15
424DupUnderspecification of tss_tDuplicate of 416
425NADNo specification for the access to variables with temporary lifetimeUnknown
426C11G.5.1: -yv and -x/v are ambiguousN/A
427NADFunction Parameter and Return Value AssignmentsUnknown
428C11Runtime-constraint issue with sprintf family of routines in Annex KN/A
429C11Should gets_s discard next input line when (s == NULL) ?N/A
430C11getenv_s, maxsize should be allowed to be zeroN/A
431C11atomic_compare_exchange: What does it mean to say two structs compare equal?Unknown
432C11Possible defect report: Is 0.0 required to be a representable value?Unknown
433C11Issue with constraints for wide character function arguments involving RSIZE_MAXN/A
434C11Possible defect report: Missing constraint w.r.t. AtomicYes
435NADPossible defect report: Missing constraint w.r.t. ImaginaryYes
436C11Request for interpretation of C11 6.8.5#6Unknown
437C11clock overflow problemsN/A
438C11ungetc / ungetwc and file position after discarding push back problemsN/A
439C11Issues with the definition of "full expression"Unknown
440NADFloating-point issues in C11 from PDTS 18661-1 UK review, Issue 1Unknown
441C11Floating-point issues in C11 from PDTS 18661-1 UK review, Issue 2N/A
442NADFloating-point issues in C11 from PDTS 18661-1 UK review, Issue 3Unknown
443NADFloating-point issues in C11 from PDTS 18661-1 UK review, Issue 4Unknown
444C11Issues with alignment in C11, part 1Unknown
445C11Issues with alignment in C11, part 2Unknown
446NADUse byte instead of character for memcmp, memcpyN/A
447C11Boolean from complexUnknown
448C11What are the semantics of a # non-directive?Yes
449NADWhat is the value of TSS_DTOR_ITERATIONS for implementations with no maximum?N/A
450C11tmpnam_s clears s[0] when maxsize > RSIZE_MAXN/A
451NADInstability of uninitialized automatic variablesUnknown
452C11Effective Type in Loop InvariantUnknown
453C11Atomic flag type and operationsN/A
454NADATOMIC_VAR_INIT (issues 3 and 4)Yes
455NADATOMIC_VAR_INIT issue 5Yes
456DupCompile time definition of UINTN_C(value)Duplicate of 209
457C11The ctime_s function in Annex K defined incorrectlyN/A
458C11ATOMIC_XXX_LOCK_FREE macros not constant expressionsN/A
459C11atomic_load missing const qualifierUnknown
460C11aligned_alloc underspecifiedN/A
461NADProblems with references to objects in signal handlersN/A
462C11Clarifying objects accessed in signal handlersN/A
463NADLeft-shifting into the sign bitUnknown
464C11Clarifying the Behavior of the #line DirectiveUnknown
465C11Fixing an inconsistency in atomic_is_lock_freeUnknown
466NADScope of a for loop control declarationUnknown
467C11Maximum representable finite description vs mathUnknown
468C11strncpy_s clobbers buffer past nullN/A
469NADLock ownership vs. thread terminationN/A
470C11mtx_trylock should be allowed to fail spuriouslyN/A
471C11Complex math functions cacosh and ctanhN/A
472C11Introduction to complex arithmetic in 7.3.1p3 wrong due to CMPLXN/A
473C11"A range error occurs if x is too large." is misleadingN/A
474NADNOTE 1 Clarification for atomic_compare_exchangeN/A
475C11Misleading Atomic library references to atomic typesYes
476C11volatile semantics for lvaluesUnknown
477C11nan should take a string argumentN/A
478NADValid uses of the main functionUnknown
479DupUnclear specification of mtx_trylock on non-recursive mutenessDuplicate of 269
480C11cnd_wait and cnd_timewait should allow spurious wake-upsN/A
481C11Controlling expression of _Generic primary expressionUnknown
482NADMacro invocation split over many filesUnknown
483NAD__LINE__ and __FILE__ in macro replacement listUnknown
484NADinvalid characters in strcoll()N/A
485C11Problem with the specification of ATOMIC_VAR_INITYes
486NADInconsistent specification for arithmetic on atomic objectsUnknown
487C11timespec vs. tmN/A
488C11c16rtomb() on wide characters encoded as multiple char16_tN/A
489NADInteger Constant ExpressionUnknown
490NADUnwritten Assumptions About if-thenYes
491C11Concern with Keywords that Match Reserved IdentifiersUnknown
492NADNamed Child struct-union with no MemberUnknown
493DupMutex Initialization UnderspecifiedDuplicate of 469
494C11Part 1: Alignment specifier expression evaluationUnknown
495C11Part 2: Atomic specifier expression evaluationNot resolved
496NADoffsetof questionsUnknown
497C11"white-space character" defined in two placesN/A
498C11mblen, mbtowc, and wctomb thread-safetyN/A
499C17Anonymous structure in union behaviorUnknown
500C17Ambiguous specification for FLT_EVAL_METHODUnknown
501C17Can DECIMAL_DIG be larger than necessary?N/A
502NADFlexible array member in an anonymous structUnknown
503NADHexadecimal floating-point and strtodUnknown
+ +
+ + + diff --git a/clang/www/c_status.html b/clang/www/c_status.html index e02538828342be..1d23ce7a97b9d5 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -68,12 +68,12 @@

C Support in Clang

will be marked in magenta.

The Clang community is continually striving to improve C standards -compliance between releases. We implement the resolution for defect -reports, but we do not currently track our DR status (help with -tracking DR status is appreciated).

+compliance between releases by submitting and tracking +C Defect Reports and implementing resolutions as +they become available.

The LLVM bug tracker uses -the "c", "c11", "c18", and "c2x" labels to track known bugs with Clang's language +the "c", "c99", "c11", "c17", and "c2x" labels to track known bugs with Clang's language conformance.

C89 implementation status

From acec07005e038ab2891f235ae60ba2f0236bb952 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 19 May 2022 16:52:58 -0500 Subject: [PATCH 128/908] [OpenMP] Fix partial unrolling off-by-one. Even though the comment description is ".unroll_inner.iv < NumIterations", the code emitted a BO_LE ('<=') operator for the inner loop that is to be unrolled. This lead to one additional copy of the body code in a partially unrolled. It only manifests when the unrolled loop is consumed by another loop-associated construct. Fix by using the BO_LT operator instead. The condition for the outer loop and the corresponding code for tiling correctly used BO_LT already. Fixes #55236 --- clang/lib/Sema/SemaOpenMP.cpp | 4 ++-- .../OpenMP/irbuilder_unroll_partial_factor_for_collapse.c | 4 ++-- .../irbuilder_unroll_partial_heuristic_for_collapse.c | 4 ++-- clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp | 4 ++-- clang/test/OpenMP/unroll_codegen_for_partial.cpp | 4 ++-- clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp | 4 ++-- clang/test/OpenMP/unroll_codegen_tile_for.cpp | 4 ++-- clang/test/OpenMP/unroll_codegen_unroll_for.cpp | 8 ++++---- clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp | 8 ++++---- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index e4305ba8c1718a..42c3dbc181911e 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14483,11 +14483,11 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, if (!EndOfTile.isUsable()) return StmtError(); ExprResult InnerCond1 = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), - BO_LE, MakeInnerRef(), EndOfTile.get()); + BO_LT, MakeInnerRef(), EndOfTile.get()); if (!InnerCond1.isUsable()) return StmtError(); ExprResult InnerCond2 = - BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LE, MakeInnerRef(), + BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeInnerRef(), MakeNumIterations()); if (!InnerCond2.isUsable()) return StmtError(); diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c index 0bfed911077bf3..94d7a14a0bb873 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c @@ -106,12 +106,12 @@ // CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 // CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 // CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 4 -// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]] +// CHECK-NEXT: %[[CMP22:.+]] = icmp slt i32 %[[TMP15]], %[[ADD21]] // CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[LAND_RHS]]: // CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8 +// CHECK-NEXT: %[[CMP24:.+]] = icmp slt i32 %[[TMP17]], 8 // CHECK-NEXT: br label %[[LAND_END]] // CHECK-EMPTY: // CHECK-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c index 1a2bd117bf98c6..c44b2b32026944 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c @@ -114,12 +114,12 @@ double sind(double); // CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 // CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 // CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 2 -// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]] +// CHECK-NEXT: %[[CMP22:.+]] = icmp slt i32 %[[TMP15]], %[[ADD21]] // CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[LAND_RHS]]: // CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8 +// CHECK-NEXT: %[[CMP24:.+]] = icmp slt i32 %[[TMP17]], 8 // CHECK-NEXT: br label %[[LAND_END]] // CHECK-EMPTY: // CHECK-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp index cafda811d0d0c5..693cbf851b991f 100644 --- a/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp +++ b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp @@ -176,14 +176,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP39:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 // IR-NEXT: %[[TMP40:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J23]], align 4 // IR-NEXT: %[[ADD50:.+]] = add i32 %[[TMP40]], 2 -// IR-NEXT: %[[CMP51:.+]] = icmp ule i32 %[[TMP39]], %[[ADD50]] +// IR-NEXT: %[[CMP51:.+]] = icmp ult i32 %[[TMP39]], %[[ADD50]] // IR-NEXT: br i1 %[[CMP51]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP41:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 // IR-NEXT: %[[TMP42:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_7]], align 4 // IR-NEXT: %[[ADD52:.+]] = add i32 %[[TMP42]], 1 -// IR-NEXT: %[[CMP53:.+]] = icmp ule i32 %[[TMP41]], %[[ADD52]] +// IR-NEXT: %[[CMP53:.+]] = icmp ult i32 %[[TMP41]], %[[ADD52]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_for_partial.cpp b/clang/test/OpenMP/unroll_codegen_for_partial.cpp index 759c436da58317..e1cb7a3db6b80f 100644 --- a/clang/test/OpenMP/unroll_codegen_for_partial.cpp +++ b/clang/test/OpenMP/unroll_codegen_for_partial.cpp @@ -114,14 +114,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP21:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP22:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4 // IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP22]], 2 -// IR-NEXT: %[[CMP18:.+]] = icmp ule i32 %[[TMP21]], %[[ADD17]] +// IR-NEXT: %[[CMP18:.+]] = icmp ult i32 %[[TMP21]], %[[ADD17]] // IR-NEXT: br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP23:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD19:.+]] = add i32 %[[TMP24]], 1 -// IR-NEXT: %[[CMP20:.+]] = icmp ule i32 %[[TMP23]], %[[ADD19]] +// IR-NEXT: %[[CMP20:.+]] = icmp ult i32 %[[TMP23]], %[[ADD19]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp index 914b53252b9c14..1ad3108fcd17df 100644 --- a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp +++ b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp @@ -143,14 +143,14 @@ extern "C" void func(int start, int end, int step) { // IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4 // IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP27]], 7 -// IR-NEXT: %[[CMP18:.+]] = icmp ule i32 %[[TMP26]], %[[ADD17]] +// IR-NEXT: %[[CMP18:.+]] = icmp ult i32 %[[TMP26]], %[[ADD17]] // IR-NEXT: br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP28:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD19:.+]] = add i32 %[[TMP29]], 1 -// IR-NEXT: %[[CMP20:.+]] = icmp ule i32 %[[TMP28]], %[[ADD19]] +// IR-NEXT: %[[CMP20:.+]] = icmp ult i32 %[[TMP28]], %[[ADD19]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_tile_for.cpp b/clang/test/OpenMP/unroll_codegen_tile_for.cpp index 2a31df2326e65a..f7611f5b35e30e 100644 --- a/clang/test/OpenMP/unroll_codegen_tile_for.cpp +++ b/clang/test/OpenMP/unroll_codegen_tile_for.cpp @@ -162,14 +162,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4 // IR-NEXT: %[[ADD36:.+]] = add i32 %[[TMP32]], 2 -// IR-NEXT: %[[CMP37:.+]] = icmp ule i32 %[[TMP31]], %[[ADD36]] +// IR-NEXT: %[[CMP37:.+]] = icmp ult i32 %[[TMP31]], %[[ADD36]] // IR-NEXT: br i1 %[[CMP37]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD38:.+]] = add i32 %[[TMP34]], 1 -// IR-NEXT: %[[CMP39:.+]] = icmp ule i32 %[[TMP33]], %[[ADD38]] +// IR-NEXT: %[[CMP39:.+]] = icmp ult i32 %[[TMP33]], %[[ADD38]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_unroll_for.cpp b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp index 5157d4f7ae5b89..ebb79d0c409aa4 100644 --- a/clang/test/OpenMP/unroll_codegen_unroll_for.cpp +++ b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp @@ -129,14 +129,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4 // IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4 // IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP25]], 2 -// IR-NEXT: %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]] +// IR-NEXT: %[[CMP24:.+]] = icmp ult i32 %[[TMP24]], %[[ADD23]] // IR-NEXT: br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4 // IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4 // IR-NEXT: %[[ADD25:.+]] = add i32 %[[TMP27]], 1 -// IR-NEXT: %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]] +// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP26]], %[[ADD25]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: @@ -156,14 +156,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4 // IR-NEXT: %[[ADD30:.+]] = add i32 %[[TMP32]], 2 -// IR-NEXT: %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]] +// IR-NEXT: %[[CMP31:.+]] = icmp ult i32 %[[TMP31]], %[[ADD30]] // IR-NEXT: br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS32]]: // IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD33:.+]] = add i32 %[[TMP34]], 1 -// IR-NEXT: %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]] +// IR-NEXT: %[[CMP34:.+]] = icmp ult i32 %[[TMP33]], %[[ADD33]] // IR-NEXT: br label %[[LAND_END35]] // IR-EMPTY: // IR-NEXT: [[LAND_END35]]: diff --git a/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp b/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp index add7ecaea01d55..06196259b6acaa 100644 --- a/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp +++ b/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp @@ -129,14 +129,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4 // IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4 // IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP25]], 2 -// IR-NEXT: %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]] +// IR-NEXT: %[[CMP24:.+]] = icmp ult i32 %[[TMP24]], %[[ADD23]] // IR-NEXT: br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4 // IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4 // IR-NEXT: %[[ADD25:.+]] = add i32 %[[TMP27]], 1 -// IR-NEXT: %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]] +// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP26]], %[[ADD25]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: @@ -156,14 +156,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4 // IR-NEXT: %[[ADD30:.+]] = add i32 %[[TMP32]], 2 -// IR-NEXT: %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]] +// IR-NEXT: %[[CMP31:.+]] = icmp ult i32 %[[TMP31]], %[[ADD30]] // IR-NEXT: br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS32]]: // IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD33:.+]] = add i32 %[[TMP34]], 1 -// IR-NEXT: %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]] +// IR-NEXT: %[[CMP34:.+]] = icmp ult i32 %[[TMP33]], %[[ADD33]] // IR-NEXT: br label %[[LAND_END35]] // IR-EMPTY: // IR-NEXT: [[LAND_END35]]: From bd62b70b515bcbded99ea2debe1d1f54acbb38cd Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Fri, 20 May 2022 13:56:25 -0700 Subject: [PATCH 129/908] Add 'ppc' as a target (for both 32- and 64-bit ppc). Needed for a TSan test that won't pass on PPC. Relevant information is in https://reviews.llvm.org/D110552. --- compiler-rt/test/lit.common.cfg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 00a3adafc01c00..5922fca2fa928b 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -207,6 +207,9 @@ def get_path_from_clang(args, allow_failure): config.available_features.add(config.host_os.lower()) +if config.target_triple.startswith("ppc"): + config.available_features.add("ppc") + if re.match(r'^x86_64.*-linux', config.target_triple): config.available_features.add("x86_64-linux") From a3c3482ceb529206b0ae4e7782e5496da5e0879d Mon Sep 17 00:00:00 2001 From: Will Hawkins Date: Fri, 20 May 2022 13:50:42 -0700 Subject: [PATCH 130/908] [lldb] Consider binary as module of last resort When setting an address breakpoint using a non-section address in lldb before having ever run the program, the binary itself is not considered a module. As a result, the breakpoint is unresolved (and never gets resolved subsequently). This patch changes that behavior: as a last resort, the binary is considered as a module when resolving a non-section address breakpoint. Differential revision: https://reviews.llvm.org/D124731 --- .../Breakpoint/BreakpointResolverAddress.cpp | 27 +++++++++----- lldb/source/Commands/Options.td | 6 ++-- .../breakpoint/set/address-nomodule/Makefile | 3 ++ .../TestBreakpointAddressNoModule.py | 36 +++++++++++++++++++ .../set/address-nomodule/inferior.c | 6 ++++ 5 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 lldb/test/API/commands/breakpoint/set/address-nomodule/Makefile create mode 100644 lldb/test/API/commands/breakpoint/set/address-nomodule/TestBreakpointAddressNoModule.py create mode 100644 lldb/test/API/commands/breakpoint/set/address-nomodule/inferior.c diff --git a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp index c173fc0c2a7a6e..9b6b6d29cbce85 100644 --- a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp @@ -121,16 +121,27 @@ Searcher::CallbackReturn BreakpointResolverAddress::SearchCallback( if (filter.AddressPasses(m_addr)) { if (breakpoint.GetNumLocations() == 0) { - // If the address is just an offset, and we're given a module, see if we - // can find the appropriate module loaded in the binary, and fix up - // m_addr to use that. - if (!m_addr.IsSectionOffset() && m_module_filespec) { + // If the address is just an offset ... + if (!m_addr.IsSectionOffset()) { + ModuleSP containing_module_sp = nullptr; Target &target = breakpoint.GetTarget(); - ModuleSpec module_spec(m_module_filespec); - ModuleSP module_sp = target.GetImages().FindFirstModule(module_spec); - if (module_sp) { + if (m_module_filespec) { + // ... and we're given a module, see if we can find the + // appropriate module loaded in the binary, and fix up + // m_addr to use that. + ModuleSpec module_spec(m_module_filespec); + containing_module_sp = + target.GetImages().FindFirstModule(module_spec); + } else { + // ... and we're not given a module, see if the offset is + // somewhere in the executable module. If it is, then we'll + // fix up m_addr to use that. + containing_module_sp = target.GetExecutableModule(); + } + if (containing_module_sp) { Address tmp_address; - if (module_sp->ResolveFileAddress(m_addr.GetOffset(), tmp_address)) + if (containing_module_sp->ResolveFileAddress(m_addr.GetOffset(), + tmp_address)) m_addr = tmp_address; } } diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index c326f8a3207488..284437887a0b94 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -128,13 +128,15 @@ let Command = "breakpoint set" in { Arg<"AddressOrExpression">, Required, Desc<"Set the breakpoint at the specified address. If the address maps " "uniquely to a particular binary, then the address will be converted to " - "a \"file\"address, so that the breakpoint will track that binary+offset " + "a \"file\" address, so that the breakpoint will track that binary+offset " "no matter where the binary eventually loads. Alternately, if you also " "specify the module - with the -s option - then the address will be " "treated as a file address in that module, and resolved accordingly. " "Again, this will allow lldb to track that offset on subsequent reloads. " "The module need not have been loaded at the time you specify this " - "breakpoint, and will get resolved when the module is loaded.">; + "breakpoint, and will get resolved when the module is loaded. If no " + "module is specified, the binary being debugged is considered as a " + "fallback.">; def breakpoint_set_name : Option<"name", "n">, Group<3>, Arg<"FunctionName">, Completion<"Symbol">, Required, Desc<"Set the breakpoint by function name. Can be repeated multiple times " diff --git a/lldb/test/API/commands/breakpoint/set/address-nomodule/Makefile b/lldb/test/API/commands/breakpoint/set/address-nomodule/Makefile new file mode 100644 index 00000000000000..7c82a2dde6c2e7 --- /dev/null +++ b/lldb/test/API/commands/breakpoint/set/address-nomodule/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := inferior.c + +include Makefile.rules diff --git a/lldb/test/API/commands/breakpoint/set/address-nomodule/TestBreakpointAddressNoModule.py b/lldb/test/API/commands/breakpoint/set/address-nomodule/TestBreakpointAddressNoModule.py new file mode 100644 index 00000000000000..3e3550d6947085 --- /dev/null +++ b/lldb/test/API/commands/breakpoint/set/address-nomodule/TestBreakpointAddressNoModule.py @@ -0,0 +1,36 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def get_address_from_symbol(self, symbol): + target = lldbutil.run_to_breakpoint_make_target(self, "a.out", True) + bp = target.BreakpointCreateByName(symbol, None) + address = bp.GetLocationAtIndex(0).GetAddress().GetFileAddress() + return address + + def test_set_address_no_module(self): + self.build() + + main_address = self.get_address_from_symbol("main") + + target = lldbutil.run_to_breakpoint_make_target(self) + debugger = target.GetDebugger() + + debugger.HandleCommand(f"break set -a {main_address:#x}") + self.assertEquals(target.GetNumBreakpoints(), 1) + + bp = target.GetBreakpointAtIndex(0) + self.assertIsNotNone(bp) + + _, _, thread, _ = lldbutil.run_to_breakpoint_do_run(self, target, bp) + self.assertGreaterEqual(thread.GetNumFrames(), 1) + + thread_pc = thread.GetFrameAtIndex(0).GetPCAddress() + self.assertNotEqual(thread_pc, None) + + self.assertEquals(main_address, thread_pc.GetFileAddress()) diff --git a/lldb/test/API/commands/breakpoint/set/address-nomodule/inferior.c b/lldb/test/API/commands/breakpoint/set/address-nomodule/inferior.c new file mode 100644 index 00000000000000..b3af1438927c14 --- /dev/null +++ b/lldb/test/API/commands/breakpoint/set/address-nomodule/inferior.c @@ -0,0 +1,6 @@ +int function(int a) { return a; } + +int main() { + int f = function(10); + return f; +} From d252d9231c4a5008a69c6791db498aaf95bda8e7 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 20 May 2022 14:04:09 -0700 Subject: [PATCH 131/908] [lldb] Fix spurious assertion in PrintCommandOutput When the string passed to PrintCommandOutput doesn't end with a newline, `written` will exceed `size` and result in an lldbassert. After 8e776bb660dda6c51ce7ca6cea641db1f47aa9cf we don't really need written anymore and we can check whether `str` is empty instead. This patch simplifies the code and removes the assert that's no longer relevant. Differential revision: https://reviews.llvm.org/D126081 --- lldb/source/Interpreter/CommandInterpreter.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 01a2e3f950aa77..e3d2aa8ebd37f4 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2989,22 +2989,18 @@ void CommandInterpreter::PrintCommandOutput(IOHandler &io_handler, lldb::StreamFileSP stream = is_stdout ? io_handler.GetOutputStreamFileSP() : io_handler.GetErrorStreamFileSP(); // Split the output into lines and poll for interrupt requests - size_t size = str.size(); - while (size > 0 && !WasInterrupted()) { + while (!str.empty() && !WasInterrupted()) { llvm::StringRef line; - size_t written = 0; std::tie(line, str) = str.split('\n'); { std::lock_guard guard(io_handler.GetOutputMutex()); - written += stream->Write(line.data(), line.size()); - written += stream->Write("\n", 1); + stream->Write(line.data(), line.size()); + stream->Write("\n", 1); } - lldbassert(size >= written); - size -= written; } std::lock_guard guard(io_handler.GetOutputMutex()); - if (size > 0) + if (!str.empty()) stream->Printf("\n... Interrupted.\n"); stream->Flush(); } From de066267254acb97424bea008195d47de689aca4 Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Fri, 20 May 2022 14:21:58 -0700 Subject: [PATCH 132/908] PPC+TSan whack-a-mole, round 3. More details in https://reviews.llvm.org/D110552. Last try until I revert the whole shenanigans. --- compiler-rt/test/lit.common.cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 5922fca2fa928b..62a73dd3139686 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -207,7 +207,7 @@ def get_path_from_clang(args, allow_failure): config.available_features.add(config.host_os.lower()) -if config.target_triple.startswith("ppc"): +if config.target_triple.startswith("ppc") or config.target_triple.startswith("powerpc"): config.available_features.add("ppc") if re.match(r'^x86_64.*-linux', config.target_triple): From ff60a0a364b49f1b52883b032f719548685081bd Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 20 May 2022 17:23:34 -0400 Subject: [PATCH 133/908] [LLVM] Add a check if should cast atomic operations to integer type Currently for atomic load, store, and rmw instructions, as long as the operand is floating-point value, they are casted to integer. Nowadays many targets can actually support part of atomic operations with floating-point operands. For example, NVPTX supports atomic load and store of floating-point values. This patch adds a series interface functions `shouldCastAtomicXXXInIR`, and the default implementations are same as what we currently do. Later for targets can have their specialization. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D125652 --- llvm/include/llvm/CodeGen/TargetLowering.h | 30 ++++++++++++++++++++++ llvm/lib/CodeGen/AtomicExpandPass.cpp | 10 +++++--- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 0c619a41f7e8bc..5fc8757c55febe 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -248,6 +248,8 @@ class TargetLoweringBase { /// w.r.t. what they should expand to. enum class AtomicExpansionKind { None, // Don't expand the instruction. + CastToInteger, // Cast the atomic instruction to another type, e.g. from + // floating-point to integer type. LLSC, // Expand the instruction into loadlinked/storeconditional; used // by ARM/AArch64. LLOnly, // Expand the (load) instruction into just a load-linked, which has @@ -2045,6 +2047,14 @@ class TargetLoweringBase { return AtomicExpansionKind::None; } + /// Returns how the given (atomic) load should be cast by the IR-level + /// AtomicExpand pass. + virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const { + if (LI->getType()->isFloatingPointTy()) + return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; + } + /// Returns how the given (atomic) store should be expanded by the IR-level /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try /// to use an atomicrmw xchg. @@ -2052,6 +2062,15 @@ class TargetLoweringBase { return AtomicExpansionKind::None; } + /// Returns how the given (atomic) store should be cast by the IR-level + /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger + /// will try to cast the operands to integer values. + virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const { + if (SI->getValueOperand()->getType()->isFloatingPointTy()) + return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; + } + /// Returns how the given atomic cmpxchg should be expanded by the IR-level /// AtomicExpand pass. virtual AtomicExpansionKind @@ -2066,6 +2085,17 @@ class TargetLoweringBase { AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } + /// Returns how the given atomic atomicrmw should be cast by the IR-level + /// AtomicExpand pass. + virtual AtomicExpansionKind + shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const { + if (RMWI->getOperation() == AtomicRMWInst::Xchg && + RMWI->getValOperand()->getType()->isFloatingPointTy()) + return AtomicExpansionKind::CastToInteger; + + return AtomicExpansionKind::None; + } + /// On some platforms, an AtomicRMW that never actually modifies the value /// (such as fetch_add of 0) can be turned into a fence followed by an /// atomic load. This may sound useless, but it makes it possible for the diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index fc1f832cf99e50..2f4eb1f21fbf21 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -254,7 +254,8 @@ bool AtomicExpand::runOnFunction(Function &F) { } if (LI) { - if (LI->getType()->isFloatingPointTy()) { + if (TLI->shouldCastAtomicLoadInIR(LI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. LI = convertAtomicLoadToIntegerType(LI); @@ -264,7 +265,8 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange |= tryExpandAtomicLoad(LI); } else if (SI) { - if (SI->getValueOperand()->getType()->isFloatingPointTy()) { + if (TLI->shouldCastAtomicStoreInIR(SI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. SI = convertAtomicStoreToIntegerType(SI); @@ -285,8 +287,8 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange = true; } else { AtomicRMWInst::BinOp Op = RMWI->getOperation(); - if (Op == AtomicRMWInst::Xchg && - RMWI->getValOperand()->getType()->isFloatingPointTy()) { + if (TLI->shouldCastAtomicRMWIInIR(RMWI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. RMWI = convertAtomicXchgToIntegerType(RMWI); From ecf5b780538ecb643462fcb522440ea65d82483c Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 20 May 2022 17:25:14 -0400 Subject: [PATCH 134/908] [NVPTX] Enable AtomicExpandPass for NVPTX This patch enables `AtomicExpandPass` for NVPTX. Depend on D125652. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D125639 --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 63 +++++++ llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 11 ++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 + llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll | 165 +++++++++++++++++++ 4 files changed, 241 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 3c357a440a3248..f4afab300faa41 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -589,6 +589,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // Now deduce the information based on the above mentioned // actions computeRegisterProperties(STI.getRegisterInfo()); + + setMinCmpXchgSizeInBits(32); } const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -5125,6 +5127,67 @@ void NVPTXTargetLowering::ReplaceNodeResults( } } +NVPTXTargetLowering::AtomicExpansionKind +NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + Type *Ty = AI->getValOperand()->getType(); + + if (AI->isFloatingPointOperation()) { + if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) { + if (Ty->isFloatTy()) + return AtomicExpansionKind::None; + if (Ty->isDoubleTy() && STI.hasAtomAddF64()) + return AtomicExpansionKind::None; + } + return AtomicExpansionKind::CmpXChg; + } + + assert(Ty->isIntegerTy() && "Ty should be integer at this point"); + auto ITy = cast(Ty); + + switch (AI->getOperation()) { + default: + return AtomicExpansionKind::CmpXChg; + case AtomicRMWInst::BinOp::And: + case AtomicRMWInst::BinOp::Or: + case AtomicRMWInst::BinOp::Xor: + case AtomicRMWInst::BinOp::Xchg: + switch (ITy->getBitWidth()) { + case 8: + case 16: + return AtomicExpansionKind::CmpXChg; + case 32: + return AtomicExpansionKind::None; + case 64: + if (STI.hasAtomBitwise64()) + return AtomicExpansionKind::None; + return AtomicExpansionKind::CmpXChg; + default: + llvm_unreachable("unsupported width encountered"); + } + case AtomicRMWInst::BinOp::Add: + case AtomicRMWInst::BinOp::Sub: + case AtomicRMWInst::BinOp::Max: + case AtomicRMWInst::BinOp::Min: + case AtomicRMWInst::BinOp::UMax: + case AtomicRMWInst::BinOp::UMin: + switch (ITy->getBitWidth()) { + case 8: + case 16: + return AtomicExpansionKind::CmpXChg; + case 32: + return AtomicExpansionKind::None; + case 64: + if (STI.hasAtomMinMax64()) + return AtomicExpansionKind::None; + return AtomicExpansionKind::CmpXChg; + default: + llvm_unreachable("unsupported width encountered"); + } + } + + return AtomicExpansionKind::CmpXChg; +} + // Pin NVPTXTargetObjectFile's vtables to this file. NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 18a697deacb44d..fb09f99a019d0e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -561,6 +561,17 @@ class NVPTXTargetLowering : public TargetLowering { // instruction, so we say that ctlz is cheap to speculate. bool isCheapToSpeculateCtlz() const override { return true; } + AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { + return AtomicExpansionKind::None; + } + + AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { + return AtomicExpansionKind::None; + } + + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + private: const NVPTXSubtarget &STI; // cache the subtarget here SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 7285bbe63d8e32..597b8af176a2a4 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -330,6 +330,8 @@ void NVPTXPassConfig::addIRPasses() { addStraightLineScalarOptimizationPasses(); } + addPass(createAtomicExpandPass()); + // === LSR and other generic IR passes === TargetPassConfig::addIRPasses(); // EarlyCSE is not always strong enough to clean up what LSR produces. For diff --git a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll new file mode 100644 index 00000000000000..a2512dfbf58111 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll @@ -0,0 +1,165 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefixes=ALL,SM30 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=ALL,SM60 +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify %} + +; CHECK-LABEL: fadd_double +define void @fadd_double(ptr %0, double %1) { +entry: + ; SM30: atom.cas.b64 + ; SM60: atom.add.f64 + %2 = atomicrmw fadd ptr %0, double %1 monotonic, align 8 + ret void +} + +; CHECK-LABEL: fadd_float +define void @fadd_float(ptr %0, float %1) { +entry: + ; ALL: atom.add.f32 + %2 = atomicrmw fadd ptr %0, float %1 monotonic, align 4 + ret void +} + +; CHECK-LABEL: bitwise_i32 +define void @bitwise_i32(ptr %0, i32 %1) { +entry: + ; ALL: atom.and.b32 + %2 = atomicrmw and ptr %0, i32 %1 monotonic, align 4 + ; ALL: atom.or.b32 + %3 = atomicrmw or ptr %0, i32 %1 monotonic, align 4 + ; ALL: atom.xor.b32 + %4 = atomicrmw xor ptr %0, i32 %1 monotonic, align 4 + ; ALL: atom.exch.b32 + %5 = atomicrmw xchg ptr %0, i32 %1 monotonic, align 4 + ret void +} + +; CHECK-LABEL: bitwise_i64 +define void @bitwise_i64(ptr %0, i64 %1) { +entry: + ; SM30: atom.cas.b64 + ; SM60: atom.and.b64 + %2 = atomicrmw and ptr %0, i64 %1 monotonic, align 8 + ; SM30: atom.cas.b64 + ; SM60: atom.or.b64 + %3 = atomicrmw or ptr %0, i64 %1 monotonic, align 8 + ; SM30: atom.cas.b64 + ; SM60: atom.xor.b64 + %4 = atomicrmw xor ptr %0, i64 %1 monotonic, align 8 + ; SM30: atom.cas.b64 + ; SM60: atom.exch.b64 + %5 = atomicrmw xchg ptr %0, i64 %1 monotonic, align 8 + ret void +} + +; CHECK-LABEL: minmax_i32 +define void @minmax_i32(ptr %0, i32 %1) { +entry: + ; ALL: atom.min.s32 + %2 = atomicrmw min ptr %0, i32 %1 monotonic, align 4 + ; ALL: atom.max.s32 + %3 = atomicrmw max ptr %0, i32 %1 monotonic, align 4 + ; ALL: atom.min.u32 + %4 = atomicrmw umin ptr %0, i32 %1 monotonic, align 4 + ; ALL: atom.max.u32 + %5 = atomicrmw umax ptr %0, i32 %1 monotonic, align 4 + ret void +} + +; CHECK-LABEL: minmax_i64 +define void @minmax_i64(ptr %0, i64 %1) { +entry: + ; SM30: atom.cas.b64 + ; SM60: atom.min.s64 + %2 = atomicrmw min ptr %0, i64 %1 monotonic, align 8 + ; SM30: atom.cas.b64 + ; SM60: atom.max.s64 + %3 = atomicrmw max ptr %0, i64 %1 monotonic, align 8 + ; SM30: atom.cas.b64 + ; SM60: atom.min.u64 + %4 = atomicrmw umin ptr %0, i64 %1 monotonic, align 8 + ; SM30: atom.cas.b64 + ; SM60: atom.max.u64 + %5 = atomicrmw umax ptr %0, i64 %1 monotonic, align 8 + ret void +} + +; CHECK-LABEL: bitwise_i8 +define void @bitwise_i8(ptr %0, i8 %1) { +entry: + ; ALL: atom.and.b32 + %2 = atomicrmw and ptr %0, i8 %1 monotonic, align 1 + ; ALL: atom.or.b32 + %3 = atomicrmw or ptr %0, i8 %1 monotonic, align 1 + ; ALL: atom.xor.b32 + %4 = atomicrmw xor ptr %0, i8 %1 monotonic, align 1 + ; ALL: atom.cas.b32 + %5 = atomicrmw xchg ptr %0, i8 %1 monotonic, align 1 + ret void +} + +; CHECK-LABEL: minmax_i8 +define void @minmax_i8(ptr %0, i8 %1) { +entry: + ; ALL: atom.cas.b32 + %2 = atomicrmw min ptr %0, i8 %1 monotonic, align 1 + ; ALL: atom.cas.b32 + %3 = atomicrmw max ptr %0, i8 %1 monotonic, align 1 + ; ALL: atom.cas.b32 + %4 = atomicrmw umin ptr %0, i8 %1 monotonic, align 1 + ; ALL: atom.cas.b32 + %5 = atomicrmw umax ptr %0, i8 %1 monotonic, align 1 + ret void +} + +; CHECK-LABEL: bitwise_i16 +define void @bitwise_i16(ptr %0, i16 %1) { +entry: + ; ALL: atom.and.b32 + %2 = atomicrmw and ptr %0, i16 %1 monotonic, align 2 + ; ALL: atom.or.b32 + %3 = atomicrmw or ptr %0, i16 %1 monotonic, align 2 + ; ALL: atom.xor.b32 + %4 = atomicrmw xor ptr %0, i16 %1 monotonic, align 2 + ; ALL: atom.cas.b32 + %5 = atomicrmw xchg ptr %0, i16 %1 monotonic, align 2 + ret void +} + +; CHECK-LABEL: minmax_i16 +define void @minmax_i16(ptr %0, i16 %1) { +entry: + ; ALL: atom.cas.b32 + %2 = atomicrmw min ptr %0, i16 %1 monotonic, align 2 + ; ALL: atom.cas.b32 + %3 = atomicrmw max ptr %0, i16 %1 monotonic, align 2 + ; ALL: atom.cas.b32 + %4 = atomicrmw umin ptr %0, i16 %1 monotonic, align 2 + ; ALL: atom.cas.b32 + %5 = atomicrmw umax ptr %0, i16 %1 monotonic, align 2 + ret void +} + +; TODO: We might still want to test other types, such as i128. Currently the +; backend doesn't support them. Atomic expand only supports expansion to cas of +; the same bitwidth, which means even after expansion, the back end still +; doesn't support the instruction. Here we still put the tests. Remove the +; comment once we have proper support, either from atomic expand or backend. + +; define void @bitwise_i128(ptr %0, i128 %1) { +; entry: +; %2 = atomicrmw and ptr %0, i128 %1 monotonic, align 16 +; %3 = atomicrmw or ptr %0, i128 %1 monotonic, align 16 +; %4 = atomicrmw xor ptr %0, i128 %1 monotonic, align 16 +; %5 = atomicrmw xchg ptr %0, i128 %1 monotonic, align 16 +; ret void +; } + +; define void @minmax_i128(ptr %0, i128 %1) { +; entry: +; %2 = atomicrmw min ptr %0, i128 %1 monotonic, align 16 +; %3 = atomicrmw max ptr %0, i128 %1 monotonic, align 16 +; %4 = atomicrmw umin ptr %0, i128 %1 monotonic, align 16 +; %5 = atomicrmw umax ptr %0, i128 %1 monotonic, align 16 +; ret void +; } From d390035b46907f1c421d312bdd33f44ad7580415 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Thu, 19 May 2022 15:18:50 -0700 Subject: [PATCH 135/908] [mlir][sparse] Support more complex operations. Add complex operations abs, neg, sin, log1p, sub and div. Add test cases. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D126027 --- .../mlir/Dialect/SparseTensor/Utils/Merger.h | 6 + .../lib/Dialect/SparseTensor/Utils/Merger.cpp | 56 +++++- .../SparseTensor/CPU/sparse_complex_ops.mlir | 179 ++++++++++++++++++ 3 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h index ceee28466449c1..039cc5ddefac50 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -31,14 +31,18 @@ enum Kind { kIndex, // Unary operations. kAbsF, + kAbsC, kCeilF, kFloorF, kSqrtF, kExpm1F, kLog1pF, + kLog1pC, kSinF, + kSinC, kTanhF, kNegF, + kNegC, kNegI, kTruncF, kExtF, @@ -60,12 +64,14 @@ enum Kind { kMulC, kMulI, kDivF, + kDivC, // complex kDivS, // signed kDivU, // unsigned kAddF, kAddC, kAddI, kSubF, + kSubC, kSubI, kAndI, kOrI, diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 9a589965a5280a..3cf7dc60d33c33 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -37,14 +37,18 @@ TensorExp::TensorExp(Kind k, unsigned x, unsigned y, Value v, Operation *o) index = x; break; case kAbsF: + case kAbsC: case kCeilF: case kFloorF: case kSqrtF: case kExpm1F: case kLog1pF: + case kLog1pC: case kSinF: + case kSinC: case kTanhF: case kNegF: + case kNegC: case kNegI: case kCIm: case kCRe: @@ -151,6 +155,8 @@ unsigned Merger::takeDisj(Kind kind, unsigned s0, unsigned s1, Operation *op) { // TODO: move this if-else logic into buildLattices if (kind == kSubF) s1 = mapSet(kNegF, s1); + else if (kind == kSubC) + s1 = mapSet(kNegC, s1); else if (kind == kSubI) s1 = mapSet(kNegI, s1); // Followed by all in s1. @@ -274,14 +280,18 @@ bool Merger::isSingleCondition(unsigned t, unsigned e) const { case kTensor: return tensorExps[e].tensor == t; case kAbsF: + case kAbsC: case kCeilF: case kFloorF: case kSqrtF: case kExpm1F: case kLog1pF: + case kLog1pC: case kSinF: + case kSinC: case kTanhF: case kNegF: + case kNegC: case kNegI: case kTruncF: case kExtF: @@ -298,6 +308,7 @@ bool Merger::isSingleCondition(unsigned t, unsigned e) const { case kBitCast: return isSingleCondition(t, tensorExps[e].children.e0); case kDivF: // note: x / c only + case kDivC: case kDivS: case kDivU: assert(!maybeZero(tensorExps[e].children.e1)); @@ -342,6 +353,7 @@ static const char *kindToOpSymbol(Kind kind) { case kIndex: return "index"; case kAbsF: + case kAbsC: return "abs"; case kCeilF: return "ceil"; @@ -352,13 +364,15 @@ static const char *kindToOpSymbol(Kind kind) { case kExpm1F: return "expm1"; case kLog1pF: + case kLog1pC: return "log1p"; case kSinF: + case kSinC: return "sin"; case kTanhF: return "tanh"; case kNegF: - return "-"; + case kNegC: case kNegI: return "-"; case kTruncF: @@ -386,6 +400,7 @@ static const char *kindToOpSymbol(Kind kind) { case kMulI: return "*"; case kDivF: + case kDivC: case kDivS: case kDivU: return "/"; @@ -394,6 +409,7 @@ static const char *kindToOpSymbol(Kind kind) { case kAddI: return "+"; case kSubF: + case kSubC: case kSubI: return "-"; case kAndI: @@ -533,6 +549,7 @@ unsigned Merger::buildLattices(unsigned e, unsigned i) { return s; } case kAbsF: + case kAbsC: case kCeilF: case kCIm: case kCRe: @@ -540,9 +557,12 @@ unsigned Merger::buildLattices(unsigned e, unsigned i) { case kSqrtF: case kExpm1F: case kLog1pF: + case kLog1pC: case kSinF: + case kSinC: case kTanhF: case kNegF: + case kNegC: case kNegI: case kTruncF: case kExtF: @@ -607,6 +627,7 @@ unsigned Merger::buildLattices(unsigned e, unsigned i) { buildLattices(tensorExps[e].children.e0, i), buildLattices(tensorExps[e].children.e1, i)); case kDivF: + case kDivC: case kDivS: case kDivU: // A division is tricky, since 0/0, 0/c, c/0 all have @@ -630,6 +651,7 @@ unsigned Merger::buildLattices(unsigned e, unsigned i) { case kAddC: case kAddI: case kSubF: + case kSubC: case kSubI: case kOrI: case kXorI: @@ -696,6 +718,11 @@ Optional Merger::buildTensorExpFromLinalg(linalg::GenericOp op) { /// Only returns false if we are certain this is a nonzero. bool Merger::maybeZero(unsigned e) const { if (tensorExps[e].kind == kInvariant) { + if (auto c = tensorExps[e].val.getDefiningOp()) { + ArrayAttr arrayAttr = c.getValue(); + return arrayAttr[0].cast().getValue().isZero() && + arrayAttr[0].cast().getValue().isZero(); + } if (auto c = tensorExps[e].val.getDefiningOp()) return c.value() == 0; if (auto c = tensorExps[e].val.getDefiningOp()) @@ -750,6 +777,8 @@ Optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { unsigned e = x.getValue(); if (isa(def)) return addExp(kAbsF, e); + if (isa(def)) + return addExp(kAbsC, e); if (isa(def)) return addExp(kCeilF, e); if (isa(def)) @@ -760,12 +789,18 @@ Optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { return addExp(kExpm1F, e); if (isa(def)) return addExp(kLog1pF, e); + if (isa(def)) + return addExp(kLog1pC, e); if (isa(def)) return addExp(kSinF, e); + if (isa(def)) + return addExp(kSinC, e); if (isa(def)) return addExp(kTanhF, e); if (isa(def)) return addExp(kNegF, e); // no negi in std + if (isa(def)) + return addExp(kNegC, e); if (isa(def)) return addExp(kTruncF, e, v); if (isa(def)) @@ -813,6 +848,8 @@ Optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { return addExp(kMulI, e0, e1); if (isa(def) && !maybeZero(e1)) return addExp(kDivF, e0, e1); + if (isa(def) && !maybeZero(e1)) + return addExp(kDivC, e0, e1); if (isa(def) && !maybeZero(e1)) return addExp(kDivS, e0, e1); if (isa(def) && !maybeZero(e1)) @@ -825,6 +862,8 @@ Optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { return addExp(kAddI, e0, e1); if (isa(def)) return addExp(kSubF, e0, e1); + if (isa(def)) + return addExp(kSubC, e0, e1); if (isa(def)) return addExp(kSubI, e0, e1); if (isa(def)) @@ -902,6 +941,11 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, unsigned e, // Unary ops. case kAbsF: return rewriter.create(loc, v0); + case kAbsC: { + auto type = v0.getType().template cast(); + auto eltType = type.getElementType().template cast(); + return rewriter.create(loc, eltType, v0); + } case kCeilF: return rewriter.create(loc, v0); case kFloorF: @@ -912,12 +956,18 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, unsigned e, return rewriter.create(loc, v0); case kLog1pF: return rewriter.create(loc, v0); + case kLog1pC: + return rewriter.create(loc, v0); case kSinF: return rewriter.create(loc, v0); + case kSinC: + return rewriter.create(loc, v0); case kTanhF: return rewriter.create(loc, v0); case kNegF: return rewriter.create(loc, v0); + case kNegC: + return rewriter.create(loc, v0); case kNegI: // no negi in std return rewriter.create( loc, @@ -964,6 +1014,8 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, unsigned e, return rewriter.create(loc, v0, v1); case kDivF: return rewriter.create(loc, v0, v1); + case kDivC: + return rewriter.create(loc, v0, v1); case kDivS: return rewriter.create(loc, v0, v1); case kDivU: @@ -976,6 +1028,8 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, unsigned e, return rewriter.create(loc, v0, v1); case kSubF: return rewriter.create(loc, v0, v1); + case kSubC: + return rewriter.create(loc, v0, v1); case kSubI: return rewriter.create(loc, v0, v1); case kAndI: diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir new file mode 100644 index 00000000000000..74c30e6f5ff839 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir @@ -0,0 +1,179 @@ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> + +#trait_op1 = { + indexing_maps = [ + affine_map<(i) -> (i)>, // a (in) + affine_map<(i) -> (i)> // x (out) + ], + iterator_types = ["parallel"], + doc = "x(i) = OP a(i)" +} + +#trait_op2 = { + indexing_maps = [ + affine_map<(i) -> (i)>, // a (in) + affine_map<(i) -> (i)>, // b (in) + affine_map<(i) -> (i)> // x (out) + ], + iterator_types = ["parallel"], + doc = "x(i) = a(i) OP b(i)" +} + +module { + func.func @cops(%arga: tensor, #SparseVector>, + %argb: tensor, #SparseVector>) + -> tensor, #SparseVector> { + %c0 = arith.constant 0 : index + %d = tensor.dim %arga, %c0 : tensor, #SparseVector> + %xv = sparse_tensor.init [%d] : tensor, #SparseVector> + %0 = linalg.generic #trait_op2 + ins(%arga, %argb: tensor, #SparseVector>, + tensor, #SparseVector>) + outs(%xv: tensor, #SparseVector>) { + ^bb(%a: complex, %b: complex, %x: complex): + %1 = complex.neg %b : complex + %2 = complex.sub %a, %1 : complex + linalg.yield %2 : complex + } -> tensor, #SparseVector> + return %0 : tensor, #SparseVector> + } + + func.func @csin(%arga: tensor, #SparseVector>) + -> tensor, #SparseVector> { + %c0 = arith.constant 0 : index + %d = tensor.dim %arga, %c0 : tensor, #SparseVector> + %xv = sparse_tensor.init [%d] : tensor, #SparseVector> + %0 = linalg.generic #trait_op1 + ins(%arga: tensor, #SparseVector>) + outs(%xv: tensor, #SparseVector>) { + ^bb(%a: complex, %x: complex): + %1 = complex.sin %a : complex + linalg.yield %1 : complex + } -> tensor, #SparseVector> + return %0 : tensor, #SparseVector> + } + + func.func @cdiv(%arga: tensor, #SparseVector>) + -> tensor, #SparseVector> { + %c0 = arith.constant 0 : index + %d = tensor.dim %arga, %c0 : tensor, #SparseVector> + %xv = sparse_tensor.init [%d] : tensor, #SparseVector> + %c = complex.constant [2.0 : f64, 0.0 : f64] : complex + %0 = linalg.generic #trait_op1 + ins(%arga: tensor, #SparseVector>) + outs(%xv: tensor, #SparseVector>) { + ^bb(%a: complex, %x: complex): + %1 = complex.div %a, %c : complex + linalg.yield %1 : complex + } -> tensor, #SparseVector> + return %0 : tensor, #SparseVector> + } + + func.func @cabs(%arga: tensor, #SparseVector>) + -> tensor { + %c0 = arith.constant 0 : index + %d = tensor.dim %arga, %c0 : tensor, #SparseVector> + %xv = sparse_tensor.init [%d] : tensor + %0 = linalg.generic #trait_op1 + ins(%arga: tensor, #SparseVector>) + outs(%xv: tensor) { + ^bb(%a: complex, %x: f64): + %1 = complex.abs %a : complex + linalg.yield %1 : f64 + } -> tensor + return %0 : tensor + } + + func.func @dumpc(%arg0: tensor, #SparseVector>, %d: index) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %mem = sparse_tensor.values %arg0 : tensor, #SparseVector> to memref> + scf.for %i = %c0 to %d step %c1 { + %v = memref.load %mem[%i] : memref> + %real = complex.re %v : complex + %imag = complex.im %v : complex + vector.print %real : f64 + vector.print %imag : f64 + } + return + } + + func.func @dumpf(%arg0: tensor) { + %c0 = arith.constant 0 : index + %d0 = arith.constant 0.0 : f64 + %values = sparse_tensor.values %arg0 : tensor to memref + %0 = vector.transfer_read %values[%c0], %d0: memref, vector<3xf64> + vector.print %0 : vector<3xf64> + return + } + + // Driver method to call and verify complex kernels. + func.func @entry() { + // Setup sparse vectors. + %v1 = arith.constant sparse< + [ [0], [28], [31] ], + [ (-5.13, 2.0), (3.0, 4.0), (5.0, 6.0) ] > : tensor<32xcomplex> + %v2 = arith.constant sparse< + [ [1], [28], [31] ], + [ (1.0, 0.0), (-2.0, 0.0), (3.0, 0.0) ] > : tensor<32xcomplex> + %sv1 = sparse_tensor.convert %v1 : tensor<32xcomplex> to tensor, #SparseVector> + %sv2 = sparse_tensor.convert %v2 : tensor<32xcomplex> to tensor, #SparseVector> + + // Call sparse vector kernels. + %0 = call @cops(%sv1, %sv2) + : (tensor, #SparseVector>, + tensor, #SparseVector>) -> tensor, #SparseVector> + %1 = call @csin(%sv1) + : (tensor, #SparseVector>) -> tensor, #SparseVector> + %2 = call @cdiv(%sv1) + : (tensor, #SparseVector>) -> tensor, #SparseVector> + %3 = call @cabs(%sv1) + : (tensor, #SparseVector>) -> tensor + + // + // Verify the results. + // + %d3 = arith.constant 3 : index + %d4 = arith.constant 4 : index + // CHECK: -5.13 + // CHECK-NEXT: 2 + // CHECK-NEXT: 1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: 4 + // CHECK-NEXT: 8 + // CHECK-NEXT: 6 + call @dumpc(%0, %d4) : (tensor, #SparseVector>, index) -> () + // CHECK-NEXT: 3.43887 + // CHECK-NEXT: 1.47097 + // CHECK-NEXT: 3.85374 + // CHECK-NEXT: -27.0168 + // CHECK-NEXT: -193.43 + // CHECK-NEXT: 57.2184 + call @dumpc(%1, %d3) : (tensor, #SparseVector>, index) -> () + // CHECK-NEXT: -2.565 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1.5 + // CHECK-NEXT: 2 + // CHECK-NEXT: 2.5 + // CHECK-NEXT: 3 + call @dumpc(%2, %d3) : (tensor, #SparseVector>, index) -> () + // CHECK-NEXT: ( 5.50608, 5, 7.81025 ) + call @dumpf(%3) : (tensor) -> () + + // Release the resources. + sparse_tensor.release %sv1 : tensor, #SparseVector> + sparse_tensor.release %sv2 : tensor, #SparseVector> + sparse_tensor.release %0 : tensor, #SparseVector> + sparse_tensor.release %1 : tensor, #SparseVector> + sparse_tensor.release %2 : tensor, #SparseVector> + sparse_tensor.release %3 : tensor + return + } +} From 190b0f42cf559af7299c53ff010b55bc9b4f5f8c Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Fri, 20 May 2022 21:39:16 +0000 Subject: [PATCH 136/908] [lld-macho] Stop crash when emitting personalities with -dead_strip The symbol was tripping an assertion in getVA() because it was not marked as used. Per the comment above that symbols creation, dead stripping has already occurred so marking this symbol as used is accurate. Fixes https://github.com/llvm/llvm-project/issues/55565 Differential revision: https://reviews.llvm.org/D126072 --- lld/MachO/UnwindInfoSection.cpp | 1 + lld/test/MachO/weak-definition-gc.s | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index c0850af09388f1..7aa0e8253c768f 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -309,6 +309,7 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { /*includeInSymtab=*/true, /*isThumb=*/false, /*isReferencedDynamically=*/false, /*noDeadStrip=*/false); + s->used = true; in.got->addEntry(s); } r.referent = s; diff --git a/lld/test/MachO/weak-definition-gc.s b/lld/test/MachO/weak-definition-gc.s index e1d5cb44ddb0e9..15c3b88c25461a 100644 --- a/lld/test/MachO/weak-definition-gc.s +++ b/lld/test/MachO/weak-definition-gc.s @@ -66,15 +66,19 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/weak-aligned-1.s -o %t/weak-aligned-1.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/weak-aligned-2.s -o %t/weak-aligned-2.o # RUN: %lld -o %t/out -lSystem %t/weak-aligned-1.o %t/weak-aligned-2.o -# RUN: llvm-objdump --syms --section=__const --full-contents %t/out | FileCheck --check-prefix=ALIGN %s -# ALIGN: SYMBOL TABLE: -# ALIGN-DAG: [[#%x, ADDR:]] l O __DATA_CONST,__const _weak1 -# ALIGN-DAG: {{0*}}[[#ADDR+ 0x4]] l O __DATA_CONST,__const _weak3 -# ALIGN-DAG: {{0*}}[[#ADDR+ 0x8]] l O __DATA_CONST,__const _weak2 -# ALIGN-DAG: {{0*}}[[#ADDR+0x10]] g O __DATA_CONST,__const _aligned -# ALIGN: Contents of section __DATA_CONST,__const: -# ALIGN-NEXT: {{0*}}[[#ADDR]] 11111111 33333333 22222222 00000000 -# ALIGN-NEXT: {{0*}}[[#ADDR+0x10]] 81818181 81818181 82828282 82828282 +# RUN: llvm-objdump --syms --section=__const --full-contents %t/out | FileCheck --check-prefixes=ALIGN,ALIGN2 %s +# RUN: %lld -o %t/out -lSystem %t/weak-aligned-1.o %t/weak-aligned-2.o -dead_strip +# RUN: llvm-objdump --syms --section=__const --full-contents %t/out | FileCheck --check-prefixes=ALIGN,ALIGN3 %s +# ALIGN: SYMBOL TABLE: +# ALIGN-DAG: [[#%x, ADDR:]] l O __DATA_CONST,__const _weak1 +# ALIGN2-DAG: {{0*}}[[#ADDR+ 0x4]] l O __DATA_CONST,__const _weak3 +# ALIGN3-DAG: {{0*}}[[#ADDR+ 0x4]] l O __DATA_CONST,__const _weak2 +# ALIGN2-DAG: {{0*}}[[#ADDR+ 0x8]] l O __DATA_CONST,__const _weak2 +# ALIGN-DAG: {{0*}}[[#ADDR+0x10]] g O __DATA_CONST,__const _aligned +# ALIGN: Contents of section __DATA_CONST,__const: +# ALIGN2-NEXT: {{0*}}[[#ADDR]] 11111111 33333333 22222222 00000000 +# ALIGN3-NEXT: {{0*}}[[#ADDR]] 11111111 22222222 00000000 00000000 +# ALIGN-NEXT: {{0*}}[[#ADDR+0x10]] 81818181 81818181 82828282 82828282 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/weak-def.s -o %t/weak-def.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/strong-def.s -o %t/strong-def.o From b369762beb70dfef22c7e793aed79b94d7dc0757 Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Fri, 20 May 2022 15:14:26 -0700 Subject: [PATCH 137/908] Convert the test file for TestIgnoredExceptions.py to the mach_vm API. The previous version of this test uses mprotect, and that seemed to be flakey on older systems. I converted the test to use the underlying mach_vm API's. The test only runs on Darwin anyway, so this is not a real limitation, and I'm hoping the lower level API's work more consistently. --- lldb/test/API/macosx/ignore_exceptions/main.c | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/lldb/test/API/macosx/ignore_exceptions/main.c b/lldb/test/API/macosx/ignore_exceptions/main.c index 7b89dbf88152bc..682c5f23627e0a 100644 --- a/lldb/test/API/macosx/ignore_exceptions/main.c +++ b/lldb/test/API/macosx/ignore_exceptions/main.c @@ -3,25 +3,34 @@ #include #include #include +#include +#include -int g_ints[] = {10, 20, 30, 40, 50, 60}; +int *g_int_ptr = NULL; +size_t g_size = 10*sizeof(int); void saction_handler(int signo, siginfo_t info, void *baton) { - printf("Got into handler.\n"); - mprotect(g_ints, sizeof(g_ints), PROT_READ|PROT_WRITE); // stop here in the signal handler - g_ints[0] = 20; + printf("Got into handler.\n"); // stop here in the signal handler + kern_return_t success + = mach_vm_protect(mach_task_self(), g_int_ptr, + g_size, 0, VM_PROT_READ|VM_PROT_WRITE); + g_int_ptr[1] = 20; } int main() { - mprotect(g_ints, 10*sizeof(int) , PROT_NONE); + kern_return_t vm_result = vm_allocate(mach_task_self(), &g_int_ptr, g_size, VM_FLAGS_ANYWHERE); + for (int i = 0; i < 10; i++) + g_int_ptr[i] = i * 10; + + vm_result = mach_vm_protect(mach_task_self(), g_int_ptr, g_size, 0, VM_PROT_NONE); struct sigaction my_action; sigemptyset(&my_action.sa_mask); my_action.sa_handler = (void (*)(int)) saction_handler; my_action.sa_flags = SA_SIGINFO; sigaction(SIGBUS, &my_action, NULL); // Stop here to get things going. - int local_value = g_ints[1]; + int local_value = g_int_ptr[1]; return local_value; // Break here to make sure we got past the signal handler } From 59726668f1dcb32883de4cf027fe3585ca384cf5 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 20 May 2022 15:16:10 -0700 Subject: [PATCH 138/908] [WebAssembly] Strip TLS when "atomics" is not enabled With f3b4f99007cdcb3306484c9a39d31addc20aaa69, the exclusive source of truth for whether threads are supported is the -matomics flag. Accordingly, strip TLS flags when -matomic is not specified, even if bulk-memory is specified and it would theoretically be supportable. This allows the backend to compile TLS variables when -mbulk-memory is enabled but threads are not enabled. Differential Revision: https://reviews.llvm.org/D125730 --- .../WebAssembly/WebAssemblyTargetMachine.cpp | 7 ++++--- .../CodeGen/WebAssembly/target-features-tls.ll | 14 ++++++++++---- .../CodeGen/WebAssembly/tls-general-dynamic.ll | 10 +++++----- llvm/test/CodeGen/WebAssembly/tls-local-exec.ll | 6 +++--- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 03124781ea4e60..93dd5466fdd82e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -203,11 +203,12 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { bool StrippedAtomics = false; bool StrippedTLS = false; - if (!Features[WebAssembly::FeatureAtomics]) + if (!Features[WebAssembly::FeatureAtomics]) { StrippedAtomics = stripAtomics(M); - - if (!Features[WebAssembly::FeatureBulkMemory]) StrippedTLS = stripThreadLocals(M); + } else if (!Features[WebAssembly::FeatureBulkMemory]) { + StrippedTLS |= stripThreadLocals(M); + } if (StrippedAtomics && !StrippedTLS) stripThreadLocals(M); diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll index ae5bf4cea9f6fa..57d14053f33426 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mattr=-bulk-memory | FileCheck %s --check-prefixes NO-BULK-MEM -; RUN: llc < %s -mattr=+bulk-memory | FileCheck %s --check-prefixes BULK-MEM +; RUN: llc < %s -mattr=-bulk-memory,atomics | FileCheck %s --check-prefixes NO-BULK-MEM +; RUN: llc < %s -mattr=+bulk-memory,atomics | FileCheck %s --check-prefixes BULK-MEM ; Test that the target features section contains -atomics or +atomics ; for modules that have thread local storage in their source. @@ -10,7 +10,10 @@ target triple = "wasm32-unknown-unknown" ; -bulk-memory ; NO-BULK-MEM-LABEL: .custom_section.target_features,"",@ -; NO-BULK-MEM-NEXT: .int8 1 +; NO-BULK-MEM-NEXT: .int8 2 +; NO-BULK-MEM-NEXT: .int8 43 +; NO-BULK-MEM-NEXT: .int8 7 +; NO-BULK-MEM-NEXT: .ascii "atomics" ; NO-BULK-MEM-NEXT: .int8 45 ; NO-BULK-MEM-NEXT: .int8 10 ; NO-BULK-MEM-NEXT: .ascii "shared-mem" @@ -18,7 +21,10 @@ target triple = "wasm32-unknown-unknown" ; +bulk-memory ; BULK-MEM-LABEL: .custom_section.target_features,"",@ -; BULK-MEM-NEXT: .int8 1 +; BULK-MEM-NEXT: .int8 2 +; BULK-MEM-NEXT: .int8 43 +; BULK-MEM-NEXT: .int8 7 +; BULK-MEM-NEXT: .ascii "atomics" ; BULK-MEM-NEXT: .int8 43 ; BULK-MEM-NEXT: .int8 11 ; BULK-MEM-NEXT: .ascii "bulk-memory" diff --git a/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll index b6ede2bb97fe2b..214c2585800453 100644 --- a/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll +++ b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll @@ -1,8 +1,8 @@ -; RUN: not --crash llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory 2>&1 | FileCheck %s --check-prefix=ERROR -; RUN: not --crash llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory -fast-isel 2>&1 | FileCheck %s --check-prefix=ERROR -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory --mtriple wasm32-unknown-emscripten | FileCheck %s --check-prefixes=CHECK,TLS -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory --mtriple wasm32-unknown-emscripten -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory | FileCheck %s --check-prefixes=CHECK,NO-TLS +; RUN: not --crash llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics 2>&1 | FileCheck %s --check-prefix=ERROR +; RUN: not --crash llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics -fast-isel 2>&1 | FileCheck %s --check-prefix=ERROR +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics --mtriple wasm32-unknown-emscripten | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics --mtriple wasm32-unknown-emscripten -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory,atomics | FileCheck %s --check-prefixes=CHECK,NO-TLS target triple = "wasm32-unknown-unknown" ; ERROR: LLVM ERROR: only -ftls-model=local-exec is supported for now on non-Emscripten OSes: variable tls diff --git a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll index 583f15a4fabdec..95f139386cb7fb 100644 --- a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll +++ b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory | FileCheck %s --check-prefixes=CHECK,TLS -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory | FileCheck %s --check-prefixes=CHECK,NO-TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory,atomics | FileCheck %s --check-prefixes=CHECK,NO-TLS target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: address_of_tls: From 362b4066f0c6178d639db41517549e13b3610036 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 20 May 2022 15:26:31 -0700 Subject: [PATCH 139/908] [ObjCARC] Drop nullary clang.arc.attachedcall bundles in autoupgrade. In certain use-cases, these can be emitted by old compilers, but the operand is now always required. These are only used for optimizations, so it's safe to drop them if they happen to have the now-invalid format. The semantically-required call is already a separate instruction. Differential Revision: https://reviews.llvm.org/D123811 --- llvm/include/llvm/IR/AutoUpgrade.h | 7 ++++++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 12 +++++++++ llvm/lib/IR/AutoUpgrade.cpp | 12 +++++++++ .../upgrade-arc-attachedcall-bundle.ll | 23 ++++++++++++++++++ .../upgrade-arc-attachedcall-bundle.ll.bc | Bin 0 -> 1364 bytes 5 files changed, 54 insertions(+) create mode 100644 llvm/test/Bitcode/upgrade-arc-attachedcall-bundle.ll create mode 100644 llvm/test/Bitcode/upgrade-arc-attachedcall-bundle.ll.bc diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h index bcece46f89e02c..96d20a7aefb1a8 100644 --- a/llvm/include/llvm/IR/AutoUpgrade.h +++ b/llvm/include/llvm/IR/AutoUpgrade.h @@ -14,6 +14,7 @@ #define LLVM_IR_AUTOUPGRADE_H #include "llvm/ADT/StringRef.h" +#include namespace llvm { class AttrBuilder; @@ -28,6 +29,9 @@ namespace llvm { class Type; class Value; + template class OperandBundleDefT; + using OperandBundleDef = OperandBundleDefT; + /// This is a more granular function that simply checks an intrinsic function /// for upgrading, and returns true if it requires upgrading. It may return /// null in NewFn if the all calls to the original intrinsic function @@ -99,6 +103,9 @@ namespace llvm { /// Upgrade attributes that changed format or kind. void UpgradeAttributes(AttrBuilder &B); + /// Upgrade operand bundles (without knowing about their user instruction). + void UpgradeOperandBundles(std::vector &OperandBundles); + } // End llvm namespace #endif diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 9d49a96522b29f..cc5892959d0ad8 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -5138,6 +5138,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } } + // Upgrade the bundles if needed. + if (!OperandBundles.empty()) + UpgradeOperandBundles(OperandBundles); + I = InvokeInst::Create(FTy, Callee, NormalBB, UnwindBB, Ops, OperandBundles); ResTypeID = getContainedTypeID(FTyID); @@ -5235,6 +5239,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } } + // Upgrade the bundles if needed. + if (!OperandBundles.empty()) + UpgradeOperandBundles(OperandBundles); + I = CallBrInst::Create(FTy, Callee, DefaultDest, IndirectDests, Args, OperandBundles); ResTypeID = getContainedTypeID(FTyID); @@ -5846,6 +5854,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } } + // Upgrade the bundles if needed. + if (!OperandBundles.empty()) + UpgradeOperandBundles(OperandBundles); + I = CallInst::Create(FTy, Callee, Args, OperandBundles); ResTypeID = getContainedTypeID(FTyID); OperandBundles.clear(); diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index fbd08777d8180c..8028371cb6e876 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -4675,3 +4675,15 @@ void llvm::UpgradeAttributes(AttrBuilder &B) { B.addAttribute(Attribute::NullPointerIsValid); } } + +void llvm::UpgradeOperandBundles(std::vector &Bundles) { + + // clang.arc.attachedcall bundles are now required to have an operand. + // If they don't, it's okay to drop them entirely: when there is an operand, + // the "attachedcall" is meaningful and required, but without an operand, + // it's just a marker NOP. Dropping it merely prevents an optimization. + erase_if(Bundles, [&](OperandBundleDef &OBD) { + return OBD.getTag() == "clang.arc.attachedcall" && + OBD.inputs().empty(); + }); +} diff --git a/llvm/test/Bitcode/upgrade-arc-attachedcall-bundle.ll b/llvm/test/Bitcode/upgrade-arc-attachedcall-bundle.ll new file mode 100644 index 00000000000000..7b012abf808502 --- /dev/null +++ b/llvm/test/Bitcode/upgrade-arc-attachedcall-bundle.ll @@ -0,0 +1,23 @@ +; Test that nullary clang.arc.attachedcall operand bundles are "upgraded". + +; RUN: llvm-dis %s.bc -o - | FileCheck %s +; RUN: verify-uselistorder %s.bc + +define i8* @invalid() { +; CHECK-LABEL: define i8* @invalid() { +; CHECK-NEXT: %tmp0 = call i8* @foo(){{$}} +; CHECK-NEXT: ret i8* %tmp0 + %tmp0 = call i8* @foo() [ "clang.arc.attachedcall"() ] + ret i8* %tmp0 +} + +define i8* @valid() { +; CHECK-LABEL: define i8* @valid() { +; CHECK-NEXT: %tmp0 = call i8* @foo() [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ] +; CHECK-NEXT: ret i8* %tmp0 + %tmp0 = call i8* @foo() [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ] + ret i8* %tmp0 +} + +declare i8* @foo() +declare i8* @llvm.objc.retainAutoreleasedReturnValue(i8*) diff --git a/llvm/test/Bitcode/upgrade-arc-attachedcall-bundle.ll.bc b/llvm/test/Bitcode/upgrade-arc-attachedcall-bundle.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..bd47cfc1bebcb4a1e8ecccb99d8a25b248dc5f48 GIT binary patch literal 1364 zcmY*ZacC1~6#uT5=1v>GyJ^wX-o-mxS=Np2l2lEWWHdP^k##O^V|3tHE+(0ES$j>> zHm%!8nz*Z&L6pKW=)i*i_+JP8Ls%JYuXC9Mak33zp(Jb6PNECb4PnCeU39`8-1~Ts z_r3Rfzwf>Cic4#ITLEYW00a#}=WFkL6Z!1#h1c5)od+;$GvEyXbYTFrH1VJb#cW3( zk?J_>Dd-OAX{+NLmND#UCfiK9Gr=VNN|X0oi#J1CTkWLr)y-D#v`=2;I}U7Bvb5)1 zQ>Z|G)yjznr@8Mjt2blpGI!-HAI%><%dHkr-qRcas2s5FCO2`xgMfd&Gf(q-kZOC@ zaOy%X;0+z+Izv<7i;iBhkM2Zyc9KR695DibqkHYs<$sV6y$I+b~E^>yO2*GL5`X1qOVhty*9I$r5I`nD;bVed4kw`JYDgI=Ydr;akFdQQCZGD**Hv zt;Xl*vyn+=wZcSNG554+&!|~%l9)&kBl%&&EkNC+;e#Z!8N8;IW%lYE6A_vlVd?fF z?kO>NfHx2MV5(486Qh1GN-f^pMkX;tO(SgK&{td-4AW_D!axrH+XhM zGDU>b&c(Fzn&?=-U5jZ~26qH1)&x9Qm}9DQOtGO$5_5%TR#WU1N&jmF59Z`@a)<$`w3>!0P}2J(!X^oZ7-_MdC`psd9>oHiq2_ds@ZQhqfV7^goh+1%|tAp zI=W87+7i6!hxdh!A>t(NR=gmzEs`74_&U`WA*s z0-+j41okU!zg&j30Cl&luXEH*fx5|2fBE4c4c7&T%==~iZ)I5Z!-qo0uQiETNwGI7 zJ8C?8i)UAP7QNsGzbnFh>0A`umvF~S+EMIZM6ZvUokTc&%D7Aba1qTik7lf8bIgq1 zj+pG1^C(_LEIF>JQ@5DQb6Yk%+GwG)k2%@ivmSuA{fKI+su51z_QMDB$rLeRZcIW# zL0xD(^vkoVeF1Y`NV{j$rVr3iF{KJq;+U$W>5Y_WMPjbaF@?wAu^-U-=n)p8=(>=0 z&Y}Up-RILTGz>Xa+XaaOU=upG>6S~LK-NeDaw^3|X{ZB@r8?N#R}{5nMIvzkY(b|M zn+oVU^g1vV;K=q2j{Pr|w>-Yv@+o+_yu;|Omc|O{-5D^`WUN1jECAj`KagGM+=|cq zuGs^FV^VY=)KHP3p=fmM!|tKriDTWda9kP~^o+)bV&Q058VQHq2**cbgKtaG(Xgeb zTj& Date: Sat, 21 May 2022 07:45:07 +0900 Subject: [PATCH 140/908] SystemZAsmPrinter.cpp: Fix a warning. [-Wunused-variable] This could be rewritten as `!ZFL->hasFP(*MF) || FrameReg < 16` but I thought better to clarify it is `AllocaReg`. --- llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 3142abb2b08c36..e34f522ab5505f 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -970,6 +970,7 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) { uint8_t FrameReg = TRI->getEncodingValue(TRI->getFrameRegister(*MF)); uint8_t AllocaReg = ZFL->hasFP(*MF) ? FrameReg : 0; assert(AllocaReg < 16 && "Can't have alloca register larger than 15"); + (void)AllocaReg; // Build FPR save area offset. uint32_t FrameAndFPROffset = 0; From 705f048cbbfd51569097f235b5a08563ce40509a Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 20 May 2022 14:59:23 -0700 Subject: [PATCH 141/908] [mlir] MemRefToLLVM: convert memref.view operations for empty memrefs Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D126094 --- .../Conversion/MemRefToLLVM/MemRefToLLVM.cpp | 9 +++++--- .../MemRefToLLVM/memref-to-llvm.mlir | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp index 579f2da92fa41d..3dfd591b0fca85 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -1848,6 +1848,12 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { return viewOp.emitWarning("cannot cast to non-strided shape"), failure(); assert(offset == 0 && "expected offset to be 0"); + // Target memref must be contiguous in memory (innermost stride is 1), or + // empty (special case when at least one of the memref dimensions is 0). + if (!strides.empty() && (strides.back() != 1 && strides.back() != 0)) + return viewOp.emitWarning("cannot cast to non-contiguous shape"), + failure(); + // Create the descriptor. MemRefDescriptor sourceMemRef(adaptor.source()); auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy); @@ -1884,9 +1890,6 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { return rewriter.replaceOp(viewOp, {targetMemRef}), success(); // Fields 4 and 5: Update sizes and strides. - if (strides.back() != 1) - return viewOp.emitWarning("cannot cast to non-contiguous shape"), - failure(); Value stride = nullptr, nextSize = nullptr; for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) { // Update size. diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir index 84fcea403227ea..4b0dc3b7568397 100644 --- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir @@ -89,6 +89,29 @@ func.func @view(%arg0 : index, %arg1 : index, %arg2 : index) { // ----- +// CHECK-LABL: func @view_empty_memref( +// CHECK: %[[ARG0:.*]]: index, +// CHECK: %[[ARG1:.*]]: memref<0xi8>) +func.func @view_empty_memref(%offset: index, %mem: memref<0xi8>) { + + // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.mlir.constant(0 : index) : i64 + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.mlir.constant(4 : index) : i64 + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.mlir.constant(0 : index) : i64 + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.mlir.constant(0 : index) : i64 + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.mlir.constant(0 : index) : i64 + // CHECK: = llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + %0 = memref.view %mem[%offset][] : memref<0xi8> to memref<0x4xf32> + + return +} + +// ----- + // CHECK-LABEL: func @subview( // CHECK: %[[MEM:.*]]: memref<{{.*}}>, // CHECK: %[[ARG0f:[a-zA-Z0-9]*]]: index, From f1d197f1a804f470b1025a7a9eeabeecfcbaeb0d Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Sat, 21 May 2022 00:11:50 +0000 Subject: [PATCH 142/908] Revert "Mark new TSan test as unsupported on PPC." This reverts commit b517d679dd69a30ed84e9782a2e902cf4284ebc7. --- compiler-rt/test/tsan/lock_free_stack.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/compiler-rt/test/tsan/lock_free_stack.cpp b/compiler-rt/test/tsan/lock_free_stack.cpp index 3ae6ced95b800e..5477adcfdf9767 100755 --- a/compiler-rt/test/tsan/lock_free_stack.cpp +++ b/compiler-rt/test/tsan/lock_free_stack.cpp @@ -1,11 +1,5 @@ // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s // RUN: %clangxx_tsan -O1 %s -DRACE -o %t && %deflake %run %t | FileCheck %s --check-prefix=CHECK-RACE - -// Found in the post-submit testing under PPC (documented in -// https://reviews.llvm.org/D110552), this test fails under PowerPC. Should be -// investigated at some point. -// UNSUPPORTED: ppc - #include "test.h" const int kThreadCount = 4; From d6a3c8ca1831879c60104db9efb39715b4206acc Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Sat, 21 May 2022 00:12:06 +0000 Subject: [PATCH 143/908] Revert "tsan: add lock free stack pattern test" This reverts commit 5deca650fdba22a20e24b397e905ad0abba6f9a9. --- compiler-rt/test/tsan/lock_free_stack.cpp | 247 ---------------------- 1 file changed, 247 deletions(-) delete mode 100755 compiler-rt/test/tsan/lock_free_stack.cpp diff --git a/compiler-rt/test/tsan/lock_free_stack.cpp b/compiler-rt/test/tsan/lock_free_stack.cpp deleted file mode 100755 index 5477adcfdf9767..00000000000000 --- a/compiler-rt/test/tsan/lock_free_stack.cpp +++ /dev/null @@ -1,247 +0,0 @@ -// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_tsan -O1 %s -DRACE -o %t && %deflake %run %t | FileCheck %s --check-prefix=CHECK-RACE -#include "test.h" - -const int kThreadCount = 4; -#if RACE -const int kTestCount = 16; -#else -const int kTestCount = 9; -#endif - -template F for_each_mo(int mo, F f) { - f(mo); - return f; -} - -template -auto for_each_mo(int mo, Rest... rest) -> decltype(for_each_mo(rest...)) { - auto f = for_each_mo(rest...); - f(mo); - return f; -} - -void LockFreeStackImpl(int test, bool main_thread, int mo2, int mo4) { - struct Node { - int data; - Node *next; - }; - static Node *heads[kTestCount]{}; - auto head = heads + test; - - auto concurrent_push = [head](Node *new_head, int mo1, int mo2, int mo3) { - auto expected = __atomic_load_n(head, mo1); - do { - new_head->next = expected; - } while (!__atomic_compare_exchange_n(head, &expected, new_head, - /*weak*/ true, mo2, mo3)); - }; - - auto concurrent_grab_all = [head](int mo4) { - volatile int sink{}; - (void)sink; - - auto h = __atomic_exchange_n(head, nullptr, mo4); - while (h) { - sink = ++h->data; - auto next = h->next; - delete h; - h = next; - } - }; - - if (main_thread) { - concurrent_grab_all(mo4); - } else { - int i = 0; - // We have 15 combinations of mo1 and mo3. Since we have two race reports - // for each combination (the first report is for 'data' and the second - // report for 'next'), there are 30 race reports in total that should match - // to "CHECK-RACE-COUNT{-number_of_reports}" below - for_each_mo( - __ATOMIC_RELAXED, __ATOMIC_ACQUIRE, __ATOMIC_SEQ_CST, [&](int mo1) { - for_each_mo(__ATOMIC_RELAXED, __ATOMIC_ACQUIRE, __ATOMIC_RELEASE, - __ATOMIC_ACQ_REL, __ATOMIC_SEQ_CST, [&](int mo3) { - concurrent_push(new Node{i++}, mo1, mo2, mo3); - }); - }); - } -} - -void LockFreeStack(int test, bool main_thread, int mo2, int mo4) { - barrier_wait(&barrier); - if (main_thread) { - // We need to call LockFreeStackImpl second time after the barrier - // to guarantee at least one grab_all and cleanup. - // However, it is better to have one instantiation of LockFreeStackImpl - // on the main thread to merge the call stacks and prevent double race - // reports. Therefore, we use two interation for loop and skip the barrier - // on the second iteration. - for (int i = 0; i < 2; ++i) { - LockFreeStackImpl(test, main_thread, mo2, mo4); - if (i == 0) { - barrier_wait(&barrier); - } - } - } else { - LockFreeStackImpl(test, main_thread, mo2, mo4); - barrier_wait(&barrier); - } -} - -void Test(bool main_thread) { - for (int test = 0; test < kTestCount; test++) { - if (main_thread) { - fprintf(stderr, "Test %d\n", test); - } - switch (test) { -#if RACE - case 0: - LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_RELAXED); - break; - case 1: - LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_ACQUIRE); - break; - case 2: - LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_RELEASE); - break; - case 3: - LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_ACQ_REL); - break; - case 4: - LockFreeStack(test, main_thread, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST); - break; - case 5: - LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); - break; - case 6: - LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); - break; - case 7: - LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_RELEASE); - break; - case 8: - LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_ACQ_REL); - break; - case 9: - LockFreeStack(test, main_thread, __ATOMIC_ACQUIRE, __ATOMIC_SEQ_CST); - break; - case 10: - LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_RELAXED); - break; - case 11: - LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_RELEASE); - break; - case 12: - LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED); - break; - case 13: - LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_RELEASE); - break; - case 14: - LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); - break; - case 15: - LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_RELEASE); - break; -#else - case 0: - LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE); - break; - case 1: - LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_ACQ_REL); - break; - case 2: - LockFreeStack(test, main_thread, __ATOMIC_RELEASE, __ATOMIC_SEQ_CST); - break; - case 3: - LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); - break; - case 4: - LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_ACQ_REL); - break; - case 5: - LockFreeStack(test, main_thread, __ATOMIC_ACQ_REL, __ATOMIC_SEQ_CST); - break; - case 6: - LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); - break; - case 7: - LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL); - break; - case 8: - LockFreeStack(test, main_thread, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); - break; -#endif - } - } -} - -void *Thread(void *p) { - Test(false); - return 0; -} - -int main() { - barrier_init(&barrier, kThreadCount); - pthread_t t[kThreadCount - 1]; - for (int i = 0; i < kThreadCount - 1; ++i) - pthread_create(t + i, 0, Thread, (void *)(uintptr_t)(i + 1)); - Test(true); - for (int i = 0; i < kThreadCount - 1; ++i) - pthread_join(t[i], 0); -} - -// No race tests -// CHECK-NOT: ThreadSanitizer: data race - -// Race tests -// 30 is the number of race reports for 15 possible combinations of mo1 and mo3 -// CHECK-RACE: Test 0 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 1 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 2 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 3 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 4 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 5 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 6 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 7 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 8 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 9 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 10 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 11 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 12 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 13 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 14 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE: Test 15 -// CHECK-RACE-COUNT-30: SUMMARY: ThreadSanitizer: data race -// CHECK-RACE-NOT: SUMMARY: ThreadSanitizer: data race From 4f6ac969267d6380568b56fc1f7f27a1b0dd4a39 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Fri, 20 May 2022 16:16:36 -0700 Subject: [PATCH 144/908] [lld][WebAssemlby] Add TLS test to lld/test/wasm/data-segments.ll. NFC Differential Revision: https://reviews.llvm.org/D126104 --- lld/test/wasm/data-segments.ll | 48 ++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll index be526de9d21312..25b45982fe8bc0 100644 --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -45,6 +45,8 @@ @e = private constant [9 x i8] c"constant\00", align 1 @f = private constant i8 43, align 4 +@g = thread_local global i32 99, align 4 + ; ERROR: 'bulk-memory' feature must be used in order to use shared memory ; ACTIVE-LABEL: - Type: CODE @@ -77,7 +79,7 @@ ; PASSIVE-LABEL: - Type: START ; PASSIVE-NEXT: StartFunction: 2 ; PASSIVE-LABEL: - Type: DATACOUNT -; PASSIVE-NEXT: Count: 2 +; PASSIVE-NEXT: Count: 3 ; PASSIVE-LABEL: - Type: CODE ; PASSIVE-NEXT: Functions: ; PASSIVE-NEXT: - Index: 0 @@ -85,7 +87,7 @@ ; PASSIVE-NEXT: Body: 0B ; PASSIVE-NEXT: - Index: 1 ; PASSIVE-NEXT: Locals: [] -; PASSIVE-NEXT: Body: 0B +; PASSIVE-NEXT: Body: {{.*}} ; PASSIVE-NEXT: - Index: 2 ; PASSIVE-NEXT: Locals: [] ; PASSIVE-NEXT: Body: {{.*}} @@ -93,8 +95,11 @@ ; PASSIVE-NEXT: Segments: ; PASSIVE-NEXT: - SectionOffset: 3 ; PASSIVE-NEXT: InitFlags: 1 +; PASSIVE-NEXT: Content: '63000000' +; PASSIVE-NEXT: - SectionOffset: 9 +; PASSIVE-NEXT: InitFlags: 1 ; PASSIVE-NEXT: Content: 636F6E7374616E74000000002B -; PASSIVE-NEXT: - SectionOffset: 18 +; PASSIVE-NEXT: - SectionOffset: 24 ; PASSIVE-NEXT: InitFlags: 1 ; PASSIVE-NEXT: Content: 68656C6C6F00676F6F646279650000002A000000 ; PASSIVE-NEXT: - Type: CUSTOM @@ -110,7 +115,7 @@ ; PASSIVE-PIC: - Type: START ; PASSIVE-PIC-NEXT: StartFunction: 2 ; PASSIVE-PIC-NEXT: - Type: DATACOUNT -; PASSIVE-PIC-NEXT: Count: 2 +; PASSIVE-PIC-NEXT: Count: 3 ; PASSIVE-PIC-NEXT: - Type: CODE ; PASSIVE-PIC-NEXT: Functions: ; PASSIVE-PIC-NEXT: - Index: 0 @@ -118,7 +123,7 @@ ; PASSIVE-PIC-NEXT: Body: 0B ; PASSIVE-PIC-NEXT: - Index: 1 ; PASSIVE-PIC-NEXT: Locals: [] -; PASSIVE-PIC-NEXT: Body: 0B +; PASSIVE-PIC-NEXT: Body: {{.*}} ; PASSIVE-PIC-NEXT: - Index: 2 ; PASSIVE-PIC-NEXT: Locals: ; PASSIVE32-PIC-NEXT: - Type: I32 @@ -132,8 +137,11 @@ ; PASSIVE-PIC-NEXT: Segments: ; PASSIVE-PIC-NEXT: - SectionOffset: 3 ; PASSIVE-PIC-NEXT: InitFlags: 1 +; PASSIVE-PIC-NEXT: Content: '63000000' +; PASSIVE-PIC-NEXT: - SectionOffset: 9 +; PASSIVE-PIC-NEXT: InitFlags: 1 ; PASSIVE-PIC-NEXT: Content: 636F6E7374616E74000000002B -; PASSIVE-PIC-NEXT: - SectionOffset: 18 +; PASSIVE-PIC-NEXT: - SectionOffset: 24 ; PASSIVE-PIC-NEXT: InitFlags: 1 ; PASSIVE-PIC-NEXT: Content: 68656C6C6F00676F6F646279650000002A000000 ; PASSIVE-PIC-NEXT: - Type: CUSTOM @@ -152,7 +160,7 @@ ; PIC-DIS: .local [[PTR]] ; PIC-DIS-NEXT: global.get 1 -; PIC-DIS-NEXT: [[PTR]].const 10036 +; PIC-DIS-NEXT: [[PTR]].const 10040 ; PIC-DIS-NEXT: [[PTR]].add ; PIC-DIS-NEXT: local.set 0 @@ -160,7 +168,7 @@ ; DIS-NEXT: block ; DIS-NEXT: block -; NOPIC-DIS-NEXT: [[PTR]].const 11060 +; NOPIC-DIS-NEXT: [[PTR]].const 11064 ; PIC-DIS-NEXT: local.get 0 ; DIS-NEXT: i32.const 0 @@ -170,25 +178,25 @@ ; DIS-NEXT: # 2: down to label0 ; DIS-NEXT: end -; NOPIC-DIS-NEXT: [[PTR]].const 1024 -; PIC-DIS-NEXT: [[PTR]].const 0 +; NOPIC-DIS-NEXT: [[PTR]].const 1028 +; PIC-DIS-NEXT: [[PTR]].const 4 ; PIC-DIS-NEXT: global.get 1 ; PIC-DIS-NEXT: [[PTR]].add ; DIS-NEXT: i32.const 0 ; DIS-NEXT: i32.const 13 -; DIS-NEXT: memory.init 0, 0 +; DIS-NEXT: memory.init 1, 0 -; NOPIC-DIS-NEXT: [[PTR]].const 1040 -; PIC-DIS-NEXT: [[PTR]].const 16 +; NOPIC-DIS-NEXT: [[PTR]].const 1044 +; PIC-DIS-NEXT: [[PTR]].const 20 ; PIC-DIS-NEXT: global.get 1 ; PIC-DIS-NEXT: [[PTR]].add ; DIS-NEXT: i32.const 0 ; DIS-NEXT: i32.const 20 -; DIS-NEXT: memory.init 1, 0 -; NOPIC-DIS-NEXT: [[PTR]].const 1060 -; PIC-DIS-NEXT: [[PTR]].const 36 +; DIS-NEXT: memory.init 2, 0 +; NOPIC-DIS-NEXT: [[PTR]].const 1064 +; PIC-DIS-NEXT: [[PTR]].const 40 ; PIC-DIS-NEXT: global.get 1 ; PIC-DIS-NEXT: [[PTR]].add ; DIS-NEXT: i32.const 0 @@ -197,13 +205,13 @@ ; PIC-DIS-NEXT: call 3 -; NOPIC-DIS-NEXT: [[PTR]].const 11060 +; NOPIC-DIS-NEXT: [[PTR]].const 11064 ; PIC-DIS-NEXT: local.get 0 ; DIS-NEXT: i32.const 2 ; DIS-NEXT: i32.atomic.store 0 -; NOPIC-DIS-NEXT: [[PTR]].const 11060 +; NOPIC-DIS-NEXT: [[PTR]].const 11064 ; PIC-DIS-NEXT: local.get 0 ; DIS-NEXT: i32.const -1 @@ -212,7 +220,7 @@ ; DIS-NEXT: br 1 # 1: down to label1 ; DIS-NEXT: end -; NOPIC-DIS-NEXT: [[PTR]].const 11060 +; NOPIC-DIS-NEXT: [[PTR]].const 11064 ; PIC-DIS-NEXT: local.get 0 ; DIS-NEXT: i32.const 1 @@ -220,6 +228,6 @@ ; DIS-NEXT: memory.atomic.wait32 0 ; DIS-NEXT: drop ; DIS-NEXT: end -; DIS-NEXT: data.drop 0 ; DIS-NEXT: data.drop 1 +; DIS-NEXT: data.drop 2 ; DIS-NEXT: end From ffdbecccafdf96c37921e3e74bb436aa169faefa Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 21 May 2022 02:31:07 +0200 Subject: [PATCH 145/908] [mlir][bufferization] Add bufferization.alloc_tensor op This change adds a new op `alloc_tensor` to the bufferization dialect. During bufferization, this op is always lowered to a buffer allocation (unless it is "eliminated" by a pre-processing pass). It is useful to have such an op in tensor land, because it allows users to model tensor SSA use-def chains (which drive bufferization decisions) and because tensor SSA use-def chains can be analyzed by One-Shot Bufferize, while memref values cannot. This change also replaces all uses of linalg.init_tensor in bufferization-related code with bufferization.alloc_tensor. linalg.init_tensor and bufferization.alloc_tensor are similar, but the purpose of the former one is just to carry a shape. It does not indicate a memory allocation. linalg.init_tensor is not suitable for modelling SSA use-def chains for bufferization purposes, because linalg.init_tensor is marked as not having side effects (in contrast to alloc_tensor). As such, it is legal to move linalg.init_tensor ops around/CSE them/etc. This is not desirable for alloc_tensor; it represents an explicit buffer allocation while still in tensor land and such allocations should not suddenly disappear or get moved around when running the canonicalizer/CSE/etc. BEGIN_PUBLIC No public commit message needed for presubmit. END_PUBLIC Differential Revision: https://reviews.llvm.org/D126003 --- .../Dialect/Bufferization/IR/Bufferization.h | 1 + .../Bufferization/IR/BufferizationBase.td | 4 +- .../Bufferization/IR/BufferizationOps.td | 116 ++++++++ .../Transforms/AllocTensorElimination.h | 48 ++++ .../Dialect/Bufferization/Transforms/Passes.h | 7 + .../Bufferization/Transforms/Passes.td | 17 ++ .../mlir/Dialect/Linalg/IR/LinalgOps.td | 11 +- mlir/include/mlir/Dialect/Linalg/Passes.h | 5 +- mlir/include/mlir/Dialect/Linalg/Passes.td | 14 +- .../Transforms/BufferizableOpInterfaceImpl.h | 32 --- .../Bufferization/IR/BufferizationDialect.cpp | 1 + .../Bufferization/IR/BufferizationOps.cpp | 162 +++++++++++ .../Dialect/Bufferization/IR/CMakeLists.txt | 1 + .../Transforms/AllocTensorElimination.cpp | 272 ++++++++++++++++++ .../Bufferization/Transforms/Bufferize.cpp | 22 ++ .../Bufferization/Transforms/CMakeLists.txt | 2 + .../Transforms/OneShotAnalysis.cpp | 4 +- .../Transforms/OneShotModuleBufferize.cpp | 2 +- .../BufferizableOpInterfaceImpl.cpp | 248 ---------------- .../Dialect/Linalg/Transforms/CMakeLists.txt | 2 +- .../Transforms/InitTensorElimination.cpp | 50 ---- .../Transforms/InitTensorToAllocTensor.cpp | 55 ++++ mlir/python/mlir/dialects/BufferizationOps.td | 15 + .../mlir/dialects/_bufferization_ops_ext.py | 51 ++++ ...t-bufferize-alloc-tensor-elimination.mlir} | 18 +- ...ne-shot-bufferize-allow-return-allocs.mlir | 2 +- .../one-shot-bufferize-partial.mlir | 4 +- .../Transforms/one-shot-bufferize.mlir | 6 +- ...-module-bufferize-allow-return-allocs.mlir | 2 +- .../one-shot-module-bufferize-analysis.mlir | 22 +- .../one-shot-module-bufferize-invalid.mlir | 10 +- .../Transforms/one-shot-module-bufferize.mlir | 10 +- .../Dialect/Bufferization/canonicalize.mlir | 13 + mlir/test/Dialect/Bufferization/invalid.mlir | 26 ++ ...alysis-2fill-extract-matmul-all-perms.mlir | 50 ++-- ...rize-analysis-init-tensor-elimination.mlir | 6 +- .../Dialect/Linalg/one-shot-bufferize.mlir | 12 +- .../SCF/one-shot-bufferize-analysis.mlir | 2 +- mlir/test/Dialect/SCF/one-shot-bufferize.mlir | 6 +- .../Dialect/Tensor/one-shot-bufferize.mlir | 4 +- .../Linalg/CPU/test-one-shot-bufferize.mlir | 10 +- .../Dialect/Linalg/CPU/test-padtensor.mlir | 3 +- .../llvm-project-overlay/mlir/BUILD.bazel | 4 + 43 files changed, 920 insertions(+), 432 deletions(-) create mode 100644 mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h create mode 100644 mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp delete mode 100644 mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp create mode 100644 mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp create mode 100644 mlir/python/mlir/dialects/BufferizationOps.td create mode 100644 mlir/python/mlir/dialects/_bufferization_ops_ext.py rename mlir/test/Dialect/{Linalg/one-shot-bufferize-init-tensor-elimination.mlir => Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir} (86%) create mode 100644 mlir/test/Dialect/Bufferization/invalid.mlir diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h index 7b58452652c177..e68e4502e6c553 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h @@ -12,6 +12,7 @@ #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Interfaces/CopyOpInterface.h" +#include "mlir/Interfaces/InferTypeOpInterface.h" //===----------------------------------------------------------------------===// // Bufferization Dialect diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td index 104abadd7cdea9..638bd42eb8e1e7 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td @@ -25,7 +25,9 @@ def Bufferization_Dialect : Dialect { found in [bufferization](/docs/Bufferization/) and [buffer deallocation](/docs/BufferDeallocationInternals/). }]; - let dependentDialects = ["memref::MemRefDialect", "tensor::TensorDialect"]; + let dependentDialects = [ + "AffineDialect", "memref::MemRefDialect", "tensor::TensorDialect" + ]; let extraClassDeclaration = [{ /// An attribute that can override writability of buffers of tensor function diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td index 804a8bf4f7f577..38a4d39d0ce2db 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td @@ -12,12 +12,128 @@ include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.td" include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td" include "mlir/Dialect/Bufferization/IR/BufferizationBase.td" +include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/CopyOpInterface.td" class Bufferization_Op traits = []> : Op; +//===----------------------------------------------------------------------===// +// AllocTensorOp +//===----------------------------------------------------------------------===// + +def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor", + [BufferizableOpInterface, + DeclareOpInterfaceMethods]> { + let summary = "buffer allocation in tensor land"; + + let description = [{ + `bufferization.alloc_tensor` is an operation that bufferizes to a buffer + allocation of a given shape. The shape could be dynamic or static. + Reading from the result of an `alloc_tensor` op yields an undefined value. + + `alloc_tensor` is a helper op for bufferization. It marks the beginning of + a new tensor SSA use-def chain and is used to control in-place bufferization + decisions during One-Shot Bufferize. + }]; + + let arguments = + (ins Variadic:$sizes, I64ArrayAttr:$static_sizes); + + let results = (outs AnyTensor:$result); + + let assemblyFormat = [{ + custom($sizes, $static_sizes) attr-dict + `:` type($result) + }]; + + let extraClassDeclaration = [{ + LogicalResult bufferize(RewriterBase &rewriter, BufferizationState &state); + + bool isMemoryWrite(OpResult opResult, const AnalysisState &state) const { + // AllocTensorOps allocate but do not write. + return false; + } + + static StringRef getStaticSizesAttrName() { + return "static_sizes"; + } + + RankedTensorType getType() { + return getResult().getType().cast(); + } + + // Infer the shape of the result tensor given the static shapes + // and element type of the result tensor. + static Type inferResultType(ArrayRef staticSizes, Type elementType, + Attribute encoding = {}); + + // Return true if the size of the tensor is dynamic at `idx` + bool isDynamicSize(unsigned idx) { + APInt v = *(static_sizes().getAsValueRange().begin() + idx); + return ShapedType::isDynamic(v.getSExtValue()); + } + + // Assert that the size of the result tensor is static at `idx` + // and return the shape. + int64_t getStaticSize(unsigned idx) { + assert(!isDynamicSize(idx) && "expected static size"); + APInt v = *(static_sizes(). + template getAsValueRange().begin() + idx); + return v.getSExtValue(); + } + + // Return the argument position that contains the dynamic size of + // the tensor at dimension `idx`. Asserts that the shape is + // dynamic at that `idx`. + unsigned getIndexOfDynamicSize(unsigned idx) { + assert(isDynamicSize(idx) && "expected dynamic size"); + return std::count_if( + static_sizes().getValue().begin(), + static_sizes().getValue().begin() + idx, + [&](Attribute attr) { + return ShapedType::isDynamic(attr.cast().getInt()); + }); + } + + // Return both static and dynamic sizes as a list of `OpFoldResult`. + SmallVector getMixedSizes(); + + // Return the Value of the dynamic size of the tensor at dimension + // `idx`. Asserts that the shape is dynamic at that `idx. + Value getDynamicSize(unsigned idx) { + return getOperand(getIndexOfDynamicSize(idx)); + } + }]; + + let builders = [ + OpBuilder<(ins "ValueRange":$shape, + "ArrayRef":$staticShape, "Type":$elementType), + [{ + build($_builder, $_state, + AllocTensorOp::inferResultType(staticShape, elementType), + shape, $_builder.getI64ArrayAttr(staticShape)); + }]>, + OpBuilder<(ins "ValueRange":$shape, "Type":$elementType), + [{ + SmallVector staticShape( + shape.size(), ShapedType::kDynamicSize); + build($_builder, $_state, shape, staticShape, elementType); + }]>, + OpBuilder<(ins "ArrayRef":$staticShape, "Type":$elementType), + [{ + build($_builder, $_state, ValueRange{}, staticShape, elementType); + }]>, + OpBuilder<(ins "ArrayRef":$sizes, "Type":$elementType, + CArg<"ArrayRef", "{}">:$attrs)> + ]; + + let hasCanonicalizer = 1; + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // CloneOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h new file mode 100644 index 00000000000000..c74c7b75f37c5a --- /dev/null +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h @@ -0,0 +1,48 @@ +//===- AllocTensorElimination.h - alloc_tensor op elimination -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ALLOCTENSORELIMINATION_H +#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ALLOCTENSORELIMINATION_H + +#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" + +namespace mlir { +namespace bufferization { + +/// A function that matches anchor OpOperands for AllocTensorOp elimination. +/// If an OpOperand is matched, the function should populate the SmallVector +/// with all values that are needed during `RewriteFn` to produce the +/// replacement value. +using AnchorMatchFn = std::function &)>; + +/// A function that rewrites matched anchors. +using RewriteFn = std::function; + +/// Try to eliminate AllocTensorOps inside `op`. +/// +/// * `rewriteFunc` generates the replacement for the AllocTensorOp. +/// * Only AllocTensorOps that are anchored on a matching OpOperand as per +/// `anchorMatchFunc` are considered. "Anchored" means that there is a path +/// on the reverse SSA use-def chain, starting from the OpOperand and always +/// following the aliasing OpOperand, that eventually ends at a single +/// AllocTensorOp. +LogicalResult eliminateAllocTensors(RewriterBase &rewriter, Operation *op, + bufferization::AnalysisState &state, + AnchorMatchFn anchorMatchFunc, + RewriteFn rewriteFunc); + +/// Try to eliminate AllocTensorOps inside `op` that are anchored on an +/// InsertSliceOp, i.e., if it is eventually inserted into another tensor +/// (and some other conditions are met). +LogicalResult insertSliceAnchoredAllocTensorEliminationStep( + RewriterBase &rewriter, Operation *op, bufferization::AnalysisState &state); + +} // namespace bufferization +} // namespace mlir + +#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ALLOCTENSORELIMINATION_H diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h index dd39cd528bfafd..1d2068edc84675 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -64,6 +64,13 @@ createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024, std::unique_ptr createPromoteBuffersToStackPass(std::function isSmallAlloc); +/// Create a pass that tries to eliminate alloc_tensor ops that are anchored on +/// insert_slice ops. +std::unique_ptr createAllocTensorEliminationPass(); + +/// Create a pass that bufferizes ops from the bufferization dialect. +std::unique_ptr createBufferizationBufferizePass(); + //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td index f9fc5a4ef1a0c5..22ddda8bd8ca8e 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -149,6 +149,11 @@ def FinalizingBufferize : Pass<"finalizing-bufferize", "func::FuncOp"> { let constructor = "mlir::bufferization::createFinalizingBufferizePass()"; } +def BufferizationBufferize : Pass<"bufferization-bufferize", "func::FuncOp"> { + let summary = "Bufferize the `bufferization` dialect"; + let constructor = "mlir::bufferization::createBufferizationBufferizePass()"; +} + def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> { let summary = "One-Shot Bufferize"; let description = [{ @@ -309,4 +314,16 @@ def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "func::FuncOp"> { ]; } +def AllocTensorElimination : Pass<"eliminate-alloc-tensors"> { + let summary = "Try to eliminate all alloc_tensor ops."; + let description = [{ + This pass tries to eliminate all insert_slice op-anchored alloc_tensor ops. + I.e., when a value that is equivalent to an alloc_tensor op is inserted into + another tensor, this pass tries to rewrite the IR in such a way that the + destination tensor of the insert_slice op is used directly instead of the + alloc_tensor result. + }]; + let constructor = "mlir::bufferization::createAllocTensorEliminationPass()"; +} + #endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td index be6f8c323cbe79..536ccf0f4aa9ef 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -27,11 +27,16 @@ class Linalg_Op traits = []> : def Linalg_InitTensorOp : Linalg_Op<"init_tensor", [NoSideEffect, DeclareOpInterfaceMethods]> { - let summary = "operation to define a tensor of particular value"; + let summary = "operation to define a tensor of particular shape"; let description = [{ - `linalg.init_tensor` is an operation that materializes a tensor of - a given shape. The shape could be dynamic or static. + `linalg.init_tensor` is an operation that defines a tensor of a particular + shape. The shape could be dynamic or static. The contents of the tensor are + unspecified and the only purpose of the op result is to materialize the + specified shape in IR and make it available to other transformations. + + Note: This op can be lowered to a `bufferization.alloc_tensor`, at which + point it turns into an explicit buffer allocation. }]; let arguments = diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index 93d01a738dd880..f2e6547a9d3387 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -62,9 +62,8 @@ createConvertLinalgToParallelLoopsPass(); std::unique_ptr> createConvertLinalgToAffineLoopsPass(); -/// Create a pass that tries to eliminate init_tensor ops that are anchored on -/// insert_slice ops. -std::unique_ptr createLinalgInitTensorEliminationPass(); +/// Create a pass that rewrites init_tensor to alloc_tensor. +std::unique_ptr createLinalgInitTensorToAllocTensorPass(); /// Create a pass to convert Linalg operations which work on tensors to use /// buffers instead. diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index da48c651597e56..f7d064c6803c68 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -24,16 +24,14 @@ def ConvertElementwiseToLinalg : Pass<"convert-elementwise-to-linalg", ""> { let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"]; } -def LinalgInitTensorElimination : Pass<"linalg-eliminate-init-tensors"> { - let summary = "Try to eliminate all init_tensor ops."; +def LinalgInitTensorToAllocTensor : Pass<"linalg-init-tensor-to-alloc-tensor"> { + let summary = "Replace all init_tensor ops by alloc_tensor ops."; let description = [{ - This pass tries to eliminate all insert_slice op-anchored init_tensor ops. - I.e., when a value that is aliasing with an init_tensor op is inserted into - another tensor, this pass tries to rewrite the IR in such a way that the - destination tensor of the insert_slice op is used directly instead of the - init_tensor result. + init_tensor ops return a tensor of unspecified contents who's only purpose + is to carry the tensor shape. This pass converts such ops to + bufferization.alloc_tensor ops, which bufferize to buffer allocations. }]; - let constructor = "mlir::createLinalgInitTensorEliminationPass()"; + let constructor = "mlir::createLinalgInitTensorToAllocTensorPass()"; } def LinalgFoldUnitExtentDims : Pass<"linalg-fold-unit-extent-dims", ""> { diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h index 8fc406ceef80c3..3afe2cd0d32f87 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h @@ -9,43 +9,11 @@ #ifndef MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H #define MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H -#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" - namespace mlir { class DialectRegistry; namespace linalg { - -/// A function that matches anchor OpOperands for InitTensorOp elimination. -/// If an OpOperand is matched, the function should populate the SmallVector -/// with all values that are needed during `RewriteFn` to produce the -/// replacement value. -using AnchorMatchFn = std::function &)>; - -/// A function that rewrites matched anchors. -using RewriteFn = std::function; - -/// Try to eliminate InitTensorOps inside `op`. -/// -/// * `rewriteFunc` generates the replacement for the InitTensorOp. -/// * Only InitTensorOps that are anchored on a matching OpOperand as per -/// `anchorMatchFunc` are considered. "Anchored" means that there is a path -/// on the reverse SSA use-def chain, starting from the OpOperand and always -/// following the aliasing OpOperand, that eventually ends at a single -/// InitTensorOp. -LogicalResult eliminateInitTensors(RewriterBase &rewriter, Operation *op, - bufferization::AnalysisState &state, - AnchorMatchFn anchorMatchFunc, - RewriteFn rewriteFunc); - -/// Try to eliminate InitTensorOps inside `op` that are anchored on an -/// InsertSliceOp, i.e., if it is eventually inserted into another tensor -/// (and some other conditions are met). -LogicalResult insertSliceAnchoredInitTensorEliminationStep( - RewriterBase &rewriter, Operation *op, bufferization::AnalysisState &state); - void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); - } // namespace linalg } // namespace mlir diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp index a84e2d1675314d..a1d880f3ba0ca0 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp index 6509a8ebd22d91..00f0a74f73924a 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h" @@ -127,6 +128,167 @@ LogicalResult mlir::bufferization::foldToMemrefToTensorPair( return success(); } +//===----------------------------------------------------------------------===// +// AllocTensorOp +//===----------------------------------------------------------------------===// + +LogicalResult AllocTensorOp::bufferize(RewriterBase &rewriter, + BufferizationState &state) { + // Nothing to do for dead AllocTensorOps. + if (getOperation()->getUses().empty()) + return success(); + + FailureOr alloc = state.createAlloc(rewriter, getLoc(), getResult()); + if (failed(alloc)) + return failure(); + replaceOpWithBufferizedValues(rewriter, getOperation(), *alloc); + return success(); +} + +void AllocTensorOp::build(OpBuilder &b, OperationState &result, + ArrayRef sizes, Type elementType, + ArrayRef attrs) { + SmallVector dynamicSizes; + SmallVector staticSizes; + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, + ShapedType::kDynamicSize); + auto resultType = RankedTensorType ::get(staticSizes, elementType); + build(b, result, resultType, dynamicSizes, b.getI64ArrayAttr(staticSizes)); + result.addAttributes(attrs); +} + +LogicalResult AllocTensorOp::verify() { + RankedTensorType resultType = getType(); + SmallVector staticSizes = llvm::to_vector<4>(llvm::map_range( + static_sizes().cast(), + [](Attribute a) -> int64_t { return a.cast().getInt(); })); + + if (failed(verifyListOfOperandsOrIntegers( + *this, "sizes", resultType.getRank(), static_sizes(), sizes(), + ShapedType::isDynamic))) + return failure(); + + if (static_sizes().size() != static_cast(resultType.getRank())) + return emitError("expected ") << resultType.getRank() << " sizes values"; + + Type expectedType = AllocTensorOp::inferResultType( + staticSizes, resultType.getElementType(), resultType.getEncoding()); + if (resultType != expectedType) { + return emitError("specified type ") + << resultType << " does not match the inferred type " + << expectedType; + } + return success(); +} + +Type AllocTensorOp::inferResultType(ArrayRef staticSizes, + Type elementType, Attribute encoding) { + return RankedTensorType::get(staticSizes, elementType, encoding); +} + +SmallVector AllocTensorOp::getMixedSizes() { + SmallVector mixedSizes; + mixedSizes.reserve(getType().getRank()); + unsigned dynamicValIndex = 0; + for (Attribute attr : static_sizes()) { + auto intAttr = attr.cast(); + if (!ShapedType::isDynamic(intAttr.getInt())) { + mixedSizes.push_back(intAttr); + continue; + } + mixedSizes.push_back(sizes()[dynamicValIndex++]); + } + return mixedSizes; +} + +namespace { +/// Change the type of the result of a `bufferization.alloc_tensor` by making +/// the result type statically sized along dimension that in the original +/// operation where defined as dynamic, but the size was defined using a +/// `constant` op. For example: +/// +/// %c5 = arith.constant 5: index +/// %0 = bufferization.alloc_tensor [%arg0, %c5] : tensor +/// +/// to +/// +/// %0 = bufferization.alloc_tensor [%arg0, 5] : tensor +struct ReplaceStaticShapeDims : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(AllocTensorOp op, + PatternRewriter &rewriter) const override { + SmallVector dynamicSizes; + SmallVector staticSizes; + for (unsigned i = 0, e = op.getType().getRank(); i != e; ++i) { + // If the size is already static, nothing to do. + if (!op.isDynamicSize(i)) { + staticSizes.push_back(op.getStaticSize(i)); + continue; + } + + // If the size is dynamic but defined using a `constant` op, get the + // constant value to find the static size to use. + unsigned operandNum = op.getIndexOfDynamicSize(i); + Value sizeOperand = op.getOperand(operandNum); + if (auto constantIndexOp = + sizeOperand.getDefiningOp()) { + staticSizes.push_back(constantIndexOp.value()); + continue; + } + + // Fallback case. Keep the size dynamic. + dynamicSizes.push_back(sizeOperand); + staticSizes.push_back(ShapedType::kDynamicSize); + } + RankedTensorType newType = + RankedTensorType::get(staticSizes, op.getType().getElementType()); + if (newType == op.getType()) + return failure(); + auto newOp = + rewriter.create(op.getLoc(), newType, dynamicSizes, + rewriter.getI64ArrayAttr(staticSizes)); + rewriter.replaceOpWithNewOp(op, op.getType(), newOp); + return success(); + } +}; + +struct FoldDimOfAllocTensorOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tensor::DimOp dimOp, + PatternRewriter &rewriter) const override { + Optional maybeConstantIndex = dimOp.getConstantIndex(); + auto allocTensorOp = dimOp.source().getDefiningOp(); + if (!allocTensorOp || !maybeConstantIndex) + return failure(); + if (!allocTensorOp.isDynamicSize(*maybeConstantIndex)) + return failure(); + rewriter.replaceOp(dimOp, + allocTensorOp.getDynamicSize(*maybeConstantIndex)); + return success(); + } +}; +} // namespace + +void AllocTensorOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *ctx) { + results.add(ctx); +} + +LogicalResult AllocTensorOp::reifyResultShapes( + OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) { + auto shapes = llvm::to_vector<4>(llvm::map_range( + llvm::seq(0, getType().getRank()), [&](int64_t dim) -> Value { + if (isDynamicSize(dim)) + return getDynamicSize(dim); + return builder.create(getLoc(), + getStaticSize(dim)); + })); + reifiedReturnShapes.emplace_back(std::move(shapes)); + return success(); +} + //===----------------------------------------------------------------------===// // CloneOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt index 302f6a409a48ae..31d608ac042587 100644 --- a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt @@ -12,6 +12,7 @@ add_mlir_dialect_library(MLIRBufferization MLIRBufferizationOpsIncGen LINK_LIBS PUBLIC + MLIRAffine MLIRDialect MLIRFunc MLIRIR diff --git a/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp new file mode 100644 index 00000000000000..9eab0fc257c190 --- /dev/null +++ b/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp @@ -0,0 +1,272 @@ +//===- AllocTensorElimination.cpp - alloc_tensor op elimination -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" + +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h" +#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" +#include "mlir/Dialect/Bufferization/Transforms/Passes.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/IR/Dominance.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; +using namespace mlir::bufferization; + +/// Return true if all `neededValues` are in scope at the given +/// `insertionPoint`. +static bool +neededValuesDominateInsertionPoint(const DominanceInfo &domInfo, + Operation *insertionPoint, + const SmallVector &neededValues) { + for (Value val : neededValues) { + if (auto bbArg = val.dyn_cast()) { + Block *owner = bbArg.getOwner(); + if (!owner->findAncestorOpInBlock(*insertionPoint)) + return false; + } else { + auto opResult = val.cast(); + if (!domInfo.dominates(opResult.getOwner(), insertionPoint)) + return false; + } + } + return true; +} + +/// Return true if the given `insertionPoint` dominates all uses of +/// `allocTensorOp`. +static bool insertionPointDominatesUses(const DominanceInfo &domInfo, + Operation *insertionPoint, + Operation *allocTensorOp) { + for (Operation *user : allocTensorOp->getUsers()) + if (!domInfo.dominates(insertionPoint, user)) + return false; + return true; +} + +/// Find a valid insertion point for a replacement of `allocTensorOp`, assuming +/// that the replacement may use any value from `neededValues`. +static Operation * +findValidInsertionPoint(Operation *allocTensorOp, + const SmallVector &neededValues) { + DominanceInfo domInfo; + + // Gather all possible insertion points: the location of `allocTensorOp` and + // right after the definition of each value in `neededValues`. + SmallVector insertionPointCandidates; + insertionPointCandidates.push_back(allocTensorOp); + for (Value val : neededValues) { + // Note: The anchor op is using all of `neededValues`, so: + // * in case of a block argument: There must be at least one op in the block + // (the anchor op or one of its parents). + // * in case of an OpResult: There must be at least one op right after the + // defining op (the anchor op or one of its + // parents). + if (auto bbArg = val.dyn_cast()) { + insertionPointCandidates.push_back( + &bbArg.getOwner()->getOperations().front()); + } else { + insertionPointCandidates.push_back(val.getDefiningOp()->getNextNode()); + } + } + + // Select first matching insertion point. + for (Operation *insertionPoint : insertionPointCandidates) { + // Check if all needed values are in scope. + if (!neededValuesDominateInsertionPoint(domInfo, insertionPoint, + neededValues)) + continue; + // Check if the insertion point is before all uses. + if (!insertionPointDominatesUses(domInfo, insertionPoint, allocTensorOp)) + continue; + return insertionPoint; + } + + // No suitable insertion point was found. + return nullptr; +} + +/// Try to eliminate AllocTensorOps inside `op`. An AllocTensorOp is replaced +/// with the result of `rewriteFunc` if it is anchored on a matching +/// OpOperand. "Anchored" means that there is a path on the reverse SSA use-def +/// chain, starting from the OpOperand and always following the aliasing +/// OpOperand, that eventually ends at a single AllocTensorOp. +LogicalResult mlir::bufferization::eliminateAllocTensors( + RewriterBase &rewriter, Operation *op, AnalysisState &state, + AnchorMatchFn anchorMatchFunc, RewriteFn rewriteFunc) { + OpBuilder::InsertionGuard g(rewriter); + + WalkResult status = op->walk([&](Operation *op) { + for (OpOperand &operand : op->getOpOperands()) { + // Skip operands that do not bufferize inplace. + if (!state.isInPlace(operand)) + continue; + // All values that are needed to create the replacement op. + SmallVector neededValues; + // Is this a matching OpOperand? + if (!anchorMatchFunc(operand, neededValues)) + continue; + SetVector maybeAllocTensor = + state.findValueInReverseUseDefChain(operand.get(), [&](Value val) { + // Continue traversal until this function returns true. + OpResult opResult = val.dyn_cast(); + if (!opResult) + return true; + SmallVector opOperands = + state.getAliasingOpOperand(opResult); + if (!llvm::all_of(opOperands, [&](OpOperand *operand) { + return state.isInPlace(*operand); + })) + return true; + // Only equivalent tensors are supported at the moment. + // TODO: Support cases such as extract_slice(alloc_tensor) + return !llvm::all_of(opOperands, [&](OpOperand *operand) { + return state.areEquivalentBufferizedValues(operand->get(), + opResult); + }); + }); + + // Replace only if the reverse use-def chain ends at exactly one + // AllocTensorOp. + if (maybeAllocTensor.size() != 1 || + !maybeAllocTensor.front().getDefiningOp()) + return WalkResult::skip(); + Value allocTensor = maybeAllocTensor.front(); + + // Find a suitable insertion point. + Operation *insertionPoint = + findValidInsertionPoint(allocTensor.getDefiningOp(), neededValues); + if (!insertionPoint) + continue; + + // Create a replacement for the AllocTensorOp. + rewriter.setInsertionPoint(insertionPoint); + Value replacement = rewriteFunc(rewriter, allocTensor.getLoc(), operand); + if (!replacement) + continue; + + // Replace the AllocTensorOp. + rewriter.replaceOp(allocTensor.getDefiningOp(), replacement); + } + + // Advance to the next operation. + return WalkResult::advance(); + }); + + return failure(status.wasInterrupted()); +} + +/// Try to eliminate AllocTensorOps inside `op`. An AllocTensorOp can be +/// eliminated if it is eventually inserted into another tensor (and some other +/// conditions are met). +/// +/// E.g.: +/// %0 = linalg.alloc_tensor +/// %1 = linalg.fill(%cst, %0) {inplace = [true]} +/// %2 = tensor.insert_slice %1 into %t[10][20][1] +/// +/// AllocTensorOp elimination will try to fill %t inplace instead of filling a +/// new allocation %0 and inserting it into %t. This is done by replacing the +/// AllocTensorOp with: +/// +/// %0 = tensor.extract_slice %t[10][20][1] +/// +/// The analysis looks for matching ExtractSliceOp/InsertSliceOp pairs and lets +/// those bufferize inplace in the absence of other conflicts. +/// +/// Starting from an InsertSliceOp, an AllocTensorOp at the end of the insert +/// source's reverse use-def chain is eliminated if: +/// * On the reverse use-def chain path from the InsertSliceOp to the +/// AllocTensorOp, all ops were decided to bufferize inplace and the buffer +/// relation is "equivalent" (TODO: can be relaxed if needed). +/// * The reverse use-def chain has exactly one end, which is the AllocTensorOp. +LogicalResult +mlir::bufferization::insertSliceAnchoredAllocTensorEliminationStep( + RewriterBase &rewriter, Operation *op, AnalysisState &state) { + return eliminateAllocTensors( + rewriter, op, state, + /*anchorMatchFunc=*/ + [&](OpOperand &operand, SmallVector &neededValues) { + auto insertSliceOp = + dyn_cast(operand.getOwner()); + if (!insertSliceOp) + return false; + if (&operand != &insertSliceOp->getOpOperand(0) /*source*/) + return false; + + // Collect all values that are needed to construct the replacement op. + neededValues.append(insertSliceOp.offsets().begin(), + insertSliceOp.offsets().end()); + neededValues.append(insertSliceOp.sizes().begin(), + insertSliceOp.sizes().end()); + neededValues.append(insertSliceOp.strides().begin(), + insertSliceOp.strides().end()); + neededValues.push_back(insertSliceOp.dest()); + + return true; + }, + /*rewriteFunc=*/ + [](OpBuilder &b, Location loc, OpOperand &operand) { + auto insertOp = cast(operand.getOwner()); + // Expand offsets, sizes and strides to the full rank to handle the + // rank-reducing case. + SmallVector mixedOffsets = insertOp.getMixedOffsets(); + SmallVector mixedSizes = insertOp.getMixedSizes(); + SmallVector mixedStrides = insertOp.getMixedStrides(); + OffsetSizeAndStrideOpInterface::expandToRank( + insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides, + [&](Value target, int64_t dim) -> OpFoldResult { + auto shapedType = target.getType().cast(); + if (shapedType.isDynamicDim(dim)) + return b.create(loc, target, dim).result(); + return b.getIndexAttr(shapedType.getDimSize(dim)); + }); + auto t = tensor::ExtractSliceOp::inferRankReducedResultType( + insertOp.getSourceType().getRank(), + insertOp.dest().getType().cast(), mixedOffsets, + mixedSizes, mixedStrides); + auto extractOp = b.create( + loc, t, insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides); + return extractOp.result(); + }); +} + +namespace { +struct AllocTensorElimination + : public AllocTensorEliminationBase { + AllocTensorElimination() = default; + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry + .insert(); + } +}; +} // namespace + +void AllocTensorElimination::runOnOperation() { + Operation *op = getOperation(); + OneShotBufferizationOptions options; + OneShotAnalysisState state(op, options); + if (failed(analyzeOp(op, state))) { + signalPassFailure(); + return; + } + + IRRewriter rewriter(op->getContext()); + if (failed(bufferization::insertSliceAnchoredAllocTensorEliminationStep( + rewriter, op, state))) + signalPassFailure(); +} + +std::unique_ptr mlir::bufferization::createAllocTensorEliminationPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 3bde5a59550e0a..0ef56d2a5ab4dd 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -237,6 +237,28 @@ struct OneShotBufferizePass }; } // namespace +namespace { +struct BufferizationBufferizePass + : public BufferizationBufferizeBase { + void runOnOperation() override { + BufferizationOptions options = getPartialBufferizationOptions(); + options.allowDialectInFilter(); + + if (failed(bufferizeOp(getOperation(), options))) + signalPassFailure(); + } + + void getDependentDialects(DialectRegistry ®istry) const override { + registry + .insert(); + } +}; +} // namespace + +std::unique_ptr mlir::bufferization::createBufferizationBufferizePass() { + return std::make_unique(); +} + std::unique_ptr mlir::bufferization::createOneShotBufferizePass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt index cf61738e0a9b5c..189b25e98cb7ba 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIRBufferizationTransforms + AllocTensorElimination.cpp Bufferize.cpp BufferDeallocation.cpp BufferOptimizations.cpp @@ -22,5 +23,6 @@ add_mlir_dialect_library(MLIRBufferizationTransforms MLIRIR MLIRMemRef MLIRPass + MLIRTensor MLIRTransforms ) diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp index 704a5164c543f9..dd43a51b16526b 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp @@ -379,7 +379,7 @@ getCommonEnclosingRepetitiveRegion(ArrayRef values) { /// Return `true` if the given tensor value is a memory write. Most values are /// tensor writes, but ops that define a tensor SSA value without specifying its -/// contents (e.g., init_tensor) are not. +/// contents (e.g., alloc_tensor) are not. static bool isMemoryWrite(Value value, const AnalysisState &state) { auto opResult = value.dyn_cast(); if (!opResult) @@ -855,7 +855,7 @@ annotateOpsWithBufferizationMarkers(Operation *op, /// %1 = scf.if %c -> (tensor) { /// scf.yield %0 : tensor /// } else { -/// %t = linalg.init_tensor : tensor +/// %t = linalg.alloc_tensor : tensor /// scf.yield %t : tensor /// } /// ``` diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index 805467be2088c1..377bc58b2d53f3 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -32,7 +32,7 @@ // Example: `foo` fails bufferization because %0 is not equivalent to any bbArg. // ``` // func @foo() -> tensor { -// %0 = linalg.init_tensor [...] : tensor +// %0 = linalg.alloc_tensor [...] : tensor // return %0 : tensor // } // ``` diff --git a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp index 11fea3adb76c08..a924e8c8c2640a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp @@ -12,7 +12,6 @@ #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/Dialect.h" -#include "mlir/IR/Dominance.h" #include "mlir/IR/Operation.h" using namespace mlir; @@ -219,32 +218,6 @@ struct LinalgOpInterface } }; -struct InitTensorOpInterface - : public BufferizableOpInterface::ExternalModel { - bool isMemoryWrite(Operation *op, OpResult opResult, - const AnalysisState &state) const { - // InitTensorOps allocate but do not write. - return false; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - BufferizationState &state) const { - auto initTensorOp = cast(op); - - // The InitTensorOp may have been eliminated. - if (initTensorOp->getUses().empty()) - return success(); - - FailureOr alloc = state.createAlloc(rewriter, initTensorOp->getLoc(), - initTensorOp.result()); - if (failed(alloc)) - return failure(); - replaceOpWithBufferizedValues(rewriter, op, *alloc); - return success(); - } -}; - /// Helper structure that iterates over all LinalgOps in `OpTys` and registers /// the `BufferizableOpInterface` with each of them. template @@ -256,230 +229,9 @@ struct LinalgOpInterfaceHelper { }; } // namespace -/// Return true if all `neededValues` are in scope at the given -/// `insertionPoint`. -static bool -neededValuesDominateInsertionPoint(const DominanceInfo &domInfo, - Operation *insertionPoint, - const SmallVector &neededValues) { - for (Value val : neededValues) { - if (auto bbArg = val.dyn_cast()) { - Block *owner = bbArg.getOwner(); - if (!owner->findAncestorOpInBlock(*insertionPoint)) - return false; - } else { - auto opResult = val.cast(); - if (!domInfo.dominates(opResult.getOwner(), insertionPoint)) - return false; - } - } - return true; -} - -/// Return true if the given `insertionPoint` dominates all uses of -/// `initTensorOp`. -static bool insertionPointDominatesUses(const DominanceInfo &domInfo, - Operation *insertionPoint, - Operation *initTensorOp) { - for (Operation *user : initTensorOp->getUsers()) - if (!domInfo.dominates(insertionPoint, user)) - return false; - return true; -} - -/// Find a valid insertion point for a replacement of `initTensorOp`, assuming -/// that the replacement may use any value from `neededValues`. -static Operation * -findValidInsertionPoint(Operation *initTensorOp, - const SmallVector &neededValues) { - DominanceInfo domInfo; - - // Gather all possible insertion points: the location of `initTensorOp` and - // right after the definition of each value in `neededValues`. - SmallVector insertionPointCandidates; - insertionPointCandidates.push_back(initTensorOp); - for (Value val : neededValues) { - // Note: The anchor op is using all of `neededValues`, so: - // * in case of a block argument: There must be at least one op in the block - // (the anchor op or one of its parents). - // * in case of an OpResult: There must be at least one op right after the - // defining op (the anchor op or one of its - // parents). - if (auto bbArg = val.dyn_cast()) { - insertionPointCandidates.push_back( - &bbArg.getOwner()->getOperations().front()); - } else { - insertionPointCandidates.push_back(val.getDefiningOp()->getNextNode()); - } - } - - // Select first matching insertion point. - for (Operation *insertionPoint : insertionPointCandidates) { - // Check if all needed values are in scope. - if (!neededValuesDominateInsertionPoint(domInfo, insertionPoint, - neededValues)) - continue; - // Check if the insertion point is before all uses. - if (!insertionPointDominatesUses(domInfo, insertionPoint, initTensorOp)) - continue; - return insertionPoint; - } - - // No suitable insertion point was found. - return nullptr; -} - -/// Try to eliminate InitTensorOps inside `op`. An InitTensorOp is replaced -/// with the the result of `rewriteFunc` if it is anchored on a matching -/// OpOperand. "Anchored" means that there is a path on the reverse SSA use-def -/// chain, starting from the OpOperand and always following the aliasing -/// OpOperand, that eventually ends at a single InitTensorOp. -LogicalResult mlir::linalg::eliminateInitTensors(RewriterBase &rewriter, - Operation *op, - AnalysisState &state, - AnchorMatchFn anchorMatchFunc, - RewriteFn rewriteFunc) { - OpBuilder::InsertionGuard g(rewriter); - - WalkResult status = op->walk([&](Operation *op) { - for (OpOperand &operand : op->getOpOperands()) { - // Skip operands that do not bufferize inplace. - if (!state.isInPlace(operand)) - continue; - // All values that are needed to create the replacement op. - SmallVector neededValues; - // Is this a matching OpOperand? - if (!anchorMatchFunc(operand, neededValues)) - continue; - SetVector maybeInitTensor = - state.findValueInReverseUseDefChain(operand.get(), [&](Value val) { - // Continue traversal until this function returns true. - OpResult opResult = val.dyn_cast(); - if (!opResult) - return true; - SmallVector opOperands = - state.getAliasingOpOperand(opResult); - if (!llvm::all_of(opOperands, [&](OpOperand *operand) { - return state.isInPlace(*operand); - })) - return true; - // Only equivalent tensors are supported at the moment. - // TODO: Support cases such as extract_slice(init_tensor) - return !llvm::all_of(opOperands, [&](OpOperand *operand) { - return state.areEquivalentBufferizedValues(operand->get(), - opResult); - }); - }); - - // Replace only if the reverse use-def chain ends at exactly one - // InitTensorOp. - if (maybeInitTensor.size() != 1 || - !maybeInitTensor.front().getDefiningOp()) - return WalkResult::skip(); - Value initTensor = maybeInitTensor.front(); - - // Find a suitable insertion point. - Operation *insertionPoint = - findValidInsertionPoint(initTensor.getDefiningOp(), neededValues); - if (!insertionPoint) - continue; - - // Create a replacement for the InitTensorOp. - rewriter.setInsertionPoint(insertionPoint); - Value replacement = rewriteFunc(rewriter, initTensor.getLoc(), operand); - if (!replacement) - continue; - - // Replace the InitTensorOp. - rewriter.replaceOp(initTensor.getDefiningOp(), replacement); - } - - // Advance to the next operation. - return WalkResult::advance(); - }); - - return failure(status.wasInterrupted()); -} - -/// Try to eliminate InitTensorOps inside `op`. An InitTensorOp can be -/// eliminated if it is eventually inserted into another tensor (and some other -/// conditions are met). -/// -/// E.g.: -/// %0 = linalg.init_tensor -/// %1 = linalg.fill(%cst, %0) {inplace = [true]} -/// %2 = tensor.insert_slice %1 into %t[10][20][1] -/// -/// InitTensorOp elimination will try to fill %t inplace instead of filling a -/// new allocation %0 and inserting it into %t. This is done by replacing the -/// InitTensorOp with: -/// -/// %0 = tensor.extract_slice %t[10][20][1] -/// -/// The analysis looks for matching ExtractSliceOp/InsertSliceOp pairs and lets -/// those bufferize inplace in the absence of other conflicts. -/// -/// Starting from an InsertSliceOp, an InitTensorOp at the end of the insert -/// source's reverse use-def chain is eliminated if: -/// * On the reverse use-def chain path from the InsertSliceOp to the -/// InitTensorOp, all ops were decided to bufferize inplace and the buffer -/// relation is "equivalent" (TODO: can be relaxed if needed). -/// * The reverse use-def chain has exactly one end, which is the InitTensorOp. -LogicalResult mlir::linalg::insertSliceAnchoredInitTensorEliminationStep( - RewriterBase &rewriter, Operation *op, AnalysisState &state) { - return eliminateInitTensors( - rewriter, op, state, - /*anchorMatchFunc=*/ - [&](OpOperand &operand, SmallVector &neededValues) { - auto insertSliceOp = - dyn_cast(operand.getOwner()); - if (!insertSliceOp) - return false; - if (&operand != &insertSliceOp->getOpOperand(0) /*source*/) - return false; - - // Collect all values that are needed to construct the replacement op. - neededValues.append(insertSliceOp.offsets().begin(), - insertSliceOp.offsets().end()); - neededValues.append(insertSliceOp.sizes().begin(), - insertSliceOp.sizes().end()); - neededValues.append(insertSliceOp.strides().begin(), - insertSliceOp.strides().end()); - neededValues.push_back(insertSliceOp.dest()); - - return true; - }, - /*rewriteFunc=*/ - [](OpBuilder &b, Location loc, OpOperand &operand) { - auto insertOp = cast(operand.getOwner()); - // Expand offsets, sizes and strides to the full rank to handle the - // rank-reducing case. - SmallVector mixedOffsets = insertOp.getMixedOffsets(); - SmallVector mixedSizes = insertOp.getMixedSizes(); - SmallVector mixedStrides = insertOp.getMixedStrides(); - OffsetSizeAndStrideOpInterface::expandToRank( - insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides, - [&](Value target, int64_t dim) -> OpFoldResult { - auto shapedType = target.getType().cast(); - if (shapedType.isDynamicDim(dim)) - return b.create(loc, target, dim).result(); - return b.getIndexAttr(shapedType.getDimSize(dim)); - }); - auto t = tensor::ExtractSliceOp::inferRankReducedResultType( - insertOp.getSourceType().getRank(), - insertOp.dest().getType().cast(), mixedOffsets, - mixedSizes, mixedStrides); - auto extractOp = b.create( - loc, t, insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides); - return extractOp.result(); - }); -} - void mlir::linalg::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) { - linalg::InitTensorOp::attachInterface(*ctx); - // Register all Linalg structured ops. `LinalgOp` is an interface and it is // not possible to attach an external interface to an existing interface. // Therefore, attach the `BufferizableOpInterface` to all ops one-by-one. diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index 29553919736882..0c536d53f1abee 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -13,7 +13,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms Generalization.cpp Hoisting.cpp HoistPadding.cpp - InitTensorElimination.cpp + InitTensorToAllocTensor.cpp InlineScalarOperands.cpp Interchange.cpp Loops.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp b/mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp deleted file mode 100644 index f48f9c83e2cd5f..00000000000000 --- a/mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===- ComprehensiveBufferize.cpp - Single pass bufferization -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "PassDetail.h" - -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" -#include "mlir/Dialect/Linalg/Passes.h" -#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Pass/Pass.h" - -using namespace mlir; -using namespace mlir::bufferization; -using namespace mlir::linalg; - -namespace { -struct LinalgInitTensorElimination - : public LinalgInitTensorEliminationBase { - LinalgInitTensorElimination() = default; - - void runOnOperation() override; - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } -}; -} // namespace - -void LinalgInitTensorElimination::runOnOperation() { - Operation *op = getOperation(); - OneShotBufferizationOptions options; - OneShotAnalysisState state(op, options); - if (failed(analyzeOp(op, state))) { - signalPassFailure(); - return; - } - - IRRewriter rewriter(op->getContext()); - if (failed(insertSliceAnchoredInitTensorEliminationStep(rewriter, op, state))) - signalPassFailure(); -} - -std::unique_ptr mlir::createLinalgInitTensorEliminationPass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp new file mode 100644 index 00000000000000..e1d74e99242e3c --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp @@ -0,0 +1,55 @@ +//===- InitTensorToAllocTensor.cpp - Lower init_tensor to alloc_tensor ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" + +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +using namespace mlir; +using namespace mlir::bufferization; +using namespace mlir::linalg; + +namespace { +struct InitTensorLoweringPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(InitTensorOp op, + PatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp( + op, op.getMixedSizes(), op.getType().getElementType()); + return success(); + } +}; + +struct LinalgInitTensorToAllocTensor + : public LinalgInitTensorToAllocTensorBase { + LinalgInitTensorToAllocTensor() = default; + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry + .insert(); + } +}; +} // namespace + +void LinalgInitTensorToAllocTensor::runOnOperation() { + Operation *op = getOperation(); + RewritePatternSet patterns(op->getContext()); + patterns.insert(op->getContext()); + if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + signalPassFailure(); +} + +std::unique_ptr mlir::createLinalgInitTensorToAllocTensorPass() { + return std::make_unique(); +} diff --git a/mlir/python/mlir/dialects/BufferizationOps.td b/mlir/python/mlir/dialects/BufferizationOps.td new file mode 100644 index 00000000000000..c5170cee3534cd --- /dev/null +++ b/mlir/python/mlir/dialects/BufferizationOps.td @@ -0,0 +1,15 @@ +//===-- BufferizationOps.td - Entry point for BufferizationOps bindings ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef PYTHON_BINDINGS_BUFFERIZATION_OPS +#define PYTHON_BINDINGS_BUFFERIZATION_OPS + +include "mlir/Bindings/Python/Attributes.td" +include "mlir/Dialect/Bufferization/IR/BufferizationOps.td" + +#endif diff --git a/mlir/python/mlir/dialects/_bufferization_ops_ext.py b/mlir/python/mlir/dialects/_bufferization_ops_ext.py new file mode 100644 index 00000000000000..2414c8b7476be2 --- /dev/null +++ b/mlir/python/mlir/dialects/_bufferization_ops_ext.py @@ -0,0 +1,51 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +try: + from typing import Sequence, Union + from ..ir import * + from ._ods_common import get_default_loc_context as _get_default_loc_context + + from typing import Any, List, Union +except ImportError as e: + raise RuntimeError("Error loading imports from extension module") from e + + +class AllocTensorOp: + """Extends the bufferization.alloc_tensor op.""" + + def __init__(self, + sizes: Union[Sequence[int], Sequence[Value]], + element_type: Type, + *, + loc=None, + ip=None): + """Constructs an `alloc_tensor` with either static or dynamic sizes.""" + context = get_default_loc_context(loc) + operands = [] + attributes = {} + # TODO: Refactor the AllocTensorOp to take an element type attribute and + # then use normal result type inference, unifying the Python and C++ side + # with a standard mechanism (versus stashing that in builders). + if sizes and isinstance(sizes[0], Value): + # Dynamic sizes. + operands.extend(sizes) + static_size_ints = [-1] * len(sizes) + result_type = RankedTensorType.get(static_size_ints, element_type) + else: + # Static sizes. + result_type = RankedTensorType.get(sizes, element_type) + static_size_ints = sizes + + i64_type = IntegerType.get_signless(64) + attributes["static_sizes"] = ArrayAttr.get( + [IntegerAttr.get(i64_type, s) for s in static_size_ints], + context=context) + op = self.build_generic( + results=[result_type], + operands=operands, + attributes=attributes, + loc=loc, + ip=ip) + OpView.__init__(self, op) diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-init-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir similarity index 86% rename from mlir/test/Dialect/Linalg/one-shot-bufferize-init-tensor-elimination.mlir rename to mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir index 053efe87288375..76c835f04e5f57 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-init-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-eliminate-init-tensors -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -canonicalize -split-input-file | FileCheck %s +// RUN: mlir-opt %s -eliminate-alloc-tensors -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -canonicalize -split-input-file | FileCheck %s // CHECK: func @buffer_forwarding_conflict( // CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref @@ -14,11 +14,11 @@ func.func @buffer_forwarding_conflict( // This allocs the whole dim to allow for a full clone of t. // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM]]) - // init_tensor itself does not alloc but forwards to the **second** - // insert_slice. InitTensorOp replaces the init_tensor with an out-of-place + // alloc_tensor itself does not alloc but forwards to the **second** + // insert_slice. AllocTensorOp replaces the alloc_tensor with an out-of-place // extract_slice. // CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]]) - %a = linalg.init_tensor[%sz] : tensor + %a = bufferization.alloc_tensor[%sz] : tensor // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[EXTRACT_SLICE_ALLOC]] : memref) %f = linalg.fill ins(%f0 : f32) outs(%a : tensor) -> tensor @@ -47,10 +47,10 @@ func.func @buffer_forwarding_no_conflict( { %f0 = arith.constant 0.0: f32 - // init_tensor itself does not alloc but forwards to the insert_slice. - // InitTensorOp replaces the init_tensor with an inplace extract_slice. + // alloc_tensor itself does not alloc but forwards to the insert_slice. + // InitTensorOp replaces the alloc_tensor with an inplace extract_slice. // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1] - %a = linalg.init_tensor[%sz] : tensor + %a = bufferization.alloc_tensor[%sz] : tensor // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref) -> tensor @@ -71,7 +71,7 @@ func.func @insertion_point_inside_loop(%t : tensor, %sz : index) -> (tens %c5 = arith.constant 5 : index // CHECK-NOT: memref.alloc - %blank = linalg.init_tensor [5] : tensor<5xf32> + %blank = bufferization.alloc_tensor [5] : tensor<5xf32> // CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} { %r = scf.for %iv = %c0 to %sz step %c5 iter_args(%bb = %t) -> (tensor) { @@ -102,7 +102,7 @@ func.func @insertion_point_outside_loop(%t : tensor, %sz : index, // CHECK-NOT: memref.alloc // CHECK: %[[subview:.*]] = memref.subview %[[t]][%[[idx]]] [5] [1] - %blank = linalg.init_tensor [5] : tensor<5xf32> + %blank = bufferization.alloc_tensor [5] : tensor<5xf32> // CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} { %r = scf.for %iv = %c0 to %sz step %c5 iter_args(%bb = %t) -> (tensor) { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir index ae097ca47b428f..56c75fefe3ce2d 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir @@ -16,7 +16,7 @@ func.func @buffer_not_deallocated(%t : tensor, %c : i1) -> tensor // CHECK-NOT: dealloc // CHECK: scf.yield %[[casted]] %sz = "test.some_op"() : () -> (index) - %0 = linalg.init_tensor[%sz] : tensor + %0 = bufferization.alloc_tensor[%sz] : tensor scf.yield %0 : tensor } else { // CHECK: } else { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir index 4717881f522c43..06de89c0d8ef85 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir @@ -138,11 +138,11 @@ func.func @unknown_op_may_read(%v: vector<5xf32>) %idx = arith.constant 0 : index %cst = arith.constant 5.0 : f32 - // One alloc for the init_tensor, another one because the transfer_write + // One alloc for the alloc_tensor, another one because the transfer_write // bufferizes out-of-place. // CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32> // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32> - %t1 = linalg.init_tensor [10] : tensor<10xf32> + %t1 = bufferization.alloc_tensor [10] : tensor<10xf32> // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[m1]] // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1]] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir index f9ea4dce5294d3..74382aef3fa72e 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -44,7 +44,7 @@ func.func @return_tensor(%A : tensor, %v : vector<4xf32>) -> (tensor) -> () { // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = linalg.init_tensor[10] : tensor<10xf32> + %0 = bufferization.alloc_tensor[10] : tensor<10xf32> %c0 = arith.constant 0 : index // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] @@ -97,7 +97,7 @@ func.func @read_after_write_conflict(%cst : f32, %idx : index, %idx2 : index) // CHECK-LABEL: func @copy_deallocated( func.func @copy_deallocated() -> tensor<10xf32> { // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = linalg.init_tensor[10] : tensor<10xf32> + %0 = bufferization.alloc_tensor[10] : tensor<10xf32> // CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc]] // CHECK: memref.dealloc %[[alloc]] // CHECK: return %[[alloc_tensor]] @@ -111,7 +111,7 @@ func.func @copy_deallocated() -> tensor<10xf32> { func.func @select_different_tensors(%t: tensor, %sz: index, %c: i1) -> tensor { // CHECK-DAG: %[[m:.*]] = bufferization.to_memref %[[t]] : memref // CHECK-DAG: %[[alloc:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref - %0 = linalg.init_tensor [%sz] : tensor + %0 = bufferization.alloc_tensor [%sz] : tensor // A cast must be inserted because %t and %0 have different memref types. // CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref to memref diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir index 888d8a10e135ea..60b30cee6f6a7d 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir @@ -17,7 +17,7 @@ // CHECK: %[[alloc:.*]] = memref.alloc // CHECK: return %[[alloc]] func.func @create_tensor() -> tensor<10xf32> { - %0 = linalg.init_tensor [10] : tensor<10xf32> + %0 = bufferization.alloc_tensor [10] : tensor<10xf32> return %0 : tensor<10xf32> } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir index 71547701088591..3bade5c3bf062d 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir @@ -682,7 +682,7 @@ func.func @matmul_on_tensors( %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant 1.000000e+00 : f32 - %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %7 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} @@ -720,7 +720,7 @@ func.func @matmul_on_tensors( %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant 1.000000e+00 : f32 - %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %7 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} @@ -1246,19 +1246,19 @@ func.func @write_to_same_tensor_in_loop_out_of_place( // ----- -// CHECK-LABEL: func @write_to_same_init_tensor_in_place( -func.func @write_to_same_init_tensor_in_place( +// CHECK-LABEL: func @write_to_same_alloc_tensor_in_place( +func.func @write_to_same_alloc_tensor_in_place( %A : tensor {linalg.inplaceable = true}, %lb : index, %ub : index, %step : index, %sz: index, %sz2: index) -> (tensor) { - %B = linalg.init_tensor [%sz2] : tensor + %B = bufferization.alloc_tensor [%sz2] : tensor // CHECK: scf.for {{.*}} { %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { %i2 = arith.index_cast %i : index to i32 %i3 = arith.sitofp %i2 : i32 to f32 - // %B is written multiple times inside a loop, but it is an init_tensor. + // %B is written multiple times inside a loop, but it is an alloc_tensor. // CHECK: tensor.insert // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} %B2 = tensor.insert %i3 into %B[%i] : tensor @@ -1274,13 +1274,13 @@ func.func @write_to_same_init_tensor_in_place( // ----- -// CHECK-LABEL: func @write_to_same_init_tensor_out_of_place( -func.func @write_to_same_init_tensor_out_of_place( +// CHECK-LABEL: func @write_to_same_alloc_tensor_out_of_place( +func.func @write_to_same_alloc_tensor_out_of_place( %A : tensor {linalg.inplaceable = true}, %lb : index, %ub : index, %step : index, %sz: index, %sz2: index, %f: f32) -> (tensor) { - %B = linalg.init_tensor [%sz2] : tensor + %B = bufferization.alloc_tensor [%sz2] : tensor %C = tensor.insert %f into %B[%lb] : tensor // CHECK: scf.for {{.*}} { @@ -1288,8 +1288,8 @@ func.func @write_to_same_init_tensor_out_of_place( %i2 = arith.index_cast %i : index to i32 %i3 = arith.sitofp %i2 : i32 to f32 // %C is written multiple times inside a loop. Even though %C aliases with - // an init_tensor, out-of-bounds bufferization is necessary because there is - // another alias (%C) outside of the loop. + // an alloc_tensor, out-of-bounds bufferization is necessary because there + // is another alias (%C) outside of the loop. // CHECK: tensor.insert // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} %B2 = tensor.insert %i3 into %C[%i] : tensor diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir index 88a128921c6ecc..33983d19b9bca6 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir @@ -60,7 +60,7 @@ func.func @scf_if_not_aliasing( scf.yield %t1 : tensor } else { // This buffer aliases. - %t2 = linalg.init_tensor [%idx] : tensor + %t2 = bufferization.alloc_tensor [%idx] : tensor // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} scf.yield %t2 : tensor } @@ -221,7 +221,7 @@ func.func @unknown_op(%A : tensor<4xf32>) -> tensor<4xf32> func.func @mini_test_case1() -> tensor<10x20xf32> { %f0 = arith.constant 0.0 : f32 - %t = linalg.init_tensor [10, 20] : tensor<10x20xf32> + %t = bufferization.alloc_tensor [10, 20] : tensor<10x20xf32> %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32> // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} return %r : tensor<10x20xf32> @@ -274,7 +274,7 @@ func.func @call_to_unknown_tensor_returning_func(%t : tensor) { // ----- func.func @foo(%t : tensor<5xf32>) -> (tensor<5xf32>) { - %0 = linalg.init_tensor [5] : tensor<5xf32> + %0 = bufferization.alloc_tensor [5] : tensor<5xf32> // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} return %0 : tensor<5xf32> } @@ -291,7 +291,7 @@ func.func @call_to_func_returning_non_equiv_tensor(%t : tensor<5xf32>) { func.func @destination_passing_style_dominance_test_1(%cst : f32, %idx : index, %idx2 : index) -> f32 { %0 = scf.execute_region -> tensor { - %1 = linalg.init_tensor [%idx] : tensor + %1 = bufferization.alloc_tensor [%idx] : tensor // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} scf.yield %1 : tensor } @@ -304,7 +304,7 @@ func.func @destination_passing_style_dominance_test_1(%cst : f32, %idx : index, func.func @destination_passing_style_dominance_test_2(%cst : f32, %idx : index, %idx2 : index) -> f32 { - %1 = linalg.init_tensor [%idx] : tensor + %1 = bufferization.alloc_tensor [%idx] : tensor %0 = scf.execute_region -> tensor { // This YieldOp is in destination-passing style, thus no error. diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index d0316464eaae4c..f6ba5c9f3f9a3e 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -70,7 +70,7 @@ func.func private @external_func_with_return_val(tensor<4xi32>) -> f32 // CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: #[[$map2a]]> { func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>) { - %t = linalg.init_tensor [20, 10] : tensor<20x10xf32> + %t = bufferization.alloc_tensor [20, 10] : tensor<20x10xf32> %0 = tensor.extract_slice %t[%idx, %idx][2, %sz][1, 1] : tensor<20x10xf32> to tensor<2x?xf32> return %0 : tensor<2x?xf32> @@ -120,7 +120,7 @@ func.func @main(%t: tensor {bufferization.writable = false}) -> (f32) { // CHECK-LABEL: func @func_without_tensor_args func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = linalg.init_tensor[10] : tensor<10xf32> + %0 = bufferization.alloc_tensor[10] : tensor<10xf32> %c0 = arith.constant 0 : index // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] @@ -456,9 +456,9 @@ func.func @main() { // CHECK-DAG: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> // CHECK-DAG: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> // CHECK-DAG: %[[cC:.*]] = memref.cast %[[C]] : memref to memref - %A = linalg.init_tensor [64] : tensor<64xf32> - %B = linalg.init_tensor [64] : tensor<64xf32> - %C = linalg.init_tensor [] : tensor + %A = bufferization.alloc_tensor [64] : tensor<64xf32> + %B = bufferization.alloc_tensor [64] : tensor<64xf32> + %C = bufferization.alloc_tensor [] : tensor // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>) // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>) diff --git a/mlir/test/Dialect/Bufferization/canonicalize.mlir b/mlir/test/Dialect/Bufferization/canonicalize.mlir index 89d4bb5988ae26..949024369836d1 100644 --- a/mlir/test/Dialect/Bufferization/canonicalize.mlir +++ b/mlir/test/Dialect/Bufferization/canonicalize.mlir @@ -243,3 +243,16 @@ func.func @load_from_buffer_cast(%arg0: index, %arg1: index, // CHECK: %[[RES:.*]] = tensor.extract %[[TENSOR]][%[[IDX0]], %[[IDX1]]] // CHECK-NOT: memref.load // CHECK: return %[[RES]] : f32 + + +// ----- + +func.func @alloc_tensor_canonicalize() -> (tensor<4x5x?xf32>) { + %c6 = arith.constant 6 : index + %0 = bufferization.alloc_tensor [4, 5, %c6] : tensor<4x5x?xf32> + return %0 : tensor<4x5x?xf32> +} +// CHECK: func @alloc_tensor_canonicalize +// CHECK: %[[T0:.+]] = bufferization.alloc_tensor [4, 5, 6] : tensor<4x5x6xf32> +// CHECK: %[[T1:.+]] = tensor.cast %[[T0]] : tensor<4x5x6xf32> to tensor<4x5x?xf32> +// CHECK: return %[[T1]] diff --git a/mlir/test/Dialect/Bufferization/invalid.mlir b/mlir/test/Dialect/Bufferization/invalid.mlir new file mode 100644 index 00000000000000..f461f8a6e76772 --- /dev/null +++ b/mlir/test/Dialect/Bufferization/invalid.mlir @@ -0,0 +1,26 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +func.func @alloc_tensor_err(%arg0 : index, %arg1 : index) +{ + // expected-error @+1 {{specified type 'tensor<4x?x?x5xf32>' does not match the inferred type 'tensor<4x5x?x?xf32>'}} + %1 = bufferization.alloc_tensor [4, 5, %arg0, %arg1] : tensor<4x?x?x5xf32> + return +} + +// ----- + +func.func @alloc_tensor_err(%arg0 : index) +{ + // expected-error @+1 {{expected 4 sizes values}} + %1 = bufferization.alloc_tensor [4, 5, %arg0] : tensor<4x?x?x5xf32> + return +} + +// ----- + +func.func @alloc_tensor_err(%arg0 : index) +{ + // expected-error @+1 {{expected 2 dynamic sizes values}} + %1 = "bufferization.alloc_tensor"(%arg0) {static_sizes = [4, -1, -1, 5]} : (index) -> tensor<4x?x?x5xf32> + return +} diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir index b48a8af13ededd..b5e2ee8e1e1351 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only bufferize-function-boundaries" -split-input-file | FileCheck %s -/// All combinations of matmul(fill(extract(init_tensor)), fill(extract(%init_tensor)), %arg2) +/// All combinations of matmul(fill(extract(alloc_tensor)), fill(extract(%alloc_tensor)), %arg2) /// These should all be inplaceable except the first op. // ----- @@ -15,7 +15,7 @@ func.func @fill_extract_matmul_1234( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -42,7 +42,7 @@ func.func @fill_extract_matmul_1243( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -69,7 +69,7 @@ func.func @fill_extract_matmul_1324( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -96,7 +96,7 @@ func.func @fill_extract_matmul_1342( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -123,7 +123,7 @@ func.func @fill_extract_matmul_1423( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -150,7 +150,7 @@ func.func @fill_extract_matmul_1432( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -177,7 +177,7 @@ func.func @fill_extract_matmul_2134( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -204,7 +204,7 @@ func.func @fill_extract_matmul_2143( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -231,7 +231,7 @@ func.func @fill_extract_matmul_2314( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -258,7 +258,7 @@ func.func @fill_extract_matmul_2341( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -285,7 +285,7 @@ func.func @fill_extract_matmul_2413( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -312,7 +312,7 @@ func.func @fill_extract_matmul_2431( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -339,7 +339,7 @@ func.func @fill_extract_matmul_3124( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -366,7 +366,7 @@ func.func @fill_extract_matmul_3142( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -392,7 +392,7 @@ func.func @fill_extract_matmul_3214( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -419,7 +419,7 @@ func.func @fill_extract_matmul_3241( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -446,7 +446,7 @@ func.func @fill_extract_matmul_3412( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -473,7 +473,7 @@ func.func @fill_extract_matmul_3421( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -500,7 +500,7 @@ func.func @fill_extract_matmul_4123( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -527,7 +527,7 @@ func.func @fill_extract_matmul_4132( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -554,7 +554,7 @@ func.func @fill_extract_matmul_4213( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -581,7 +581,7 @@ func.func @fill_extract_matmul_4231( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -608,7 +608,7 @@ func.func @fill_extract_matmul_4312( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -635,7 +635,7 @@ func.func @fill_extract_matmul_4321( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir index 512407907aa3cb..088b8c708540f2 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-eliminate-init-tensors -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -eliminate-alloc-tensors -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s //===----------------------------------------------------------------------===// // InitTensorOp elimination @@ -10,7 +10,7 @@ func.func @buffer_forwarding_conflict(%arg0: tensor {bufferization.writab // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"] // Instead of allocating, share buffer with some inplace bufferization? - %0 = linalg.init_tensor [%arg1] : tensor + %0 = bufferization.alloc_tensor [%arg1] : tensor // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] @@ -37,7 +37,7 @@ func.func @buffer_forwarding_no_conflict(%arg0: tensor {bufferization.wri // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] // Instead of allocating, share buffer with some inplace bufferization? - %0 = linalg.init_tensor [%arg1] : tensor + %0 = bufferization.alloc_tensor [%arg1] : tensor // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir index 77011e9878d599..72023f62d7b08e 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -339,7 +339,7 @@ func.func @op_is_reading_but_following_ops_are_not( // ----- //===----------------------------------------------------------------------===// -// InitTensorOp elimination would produce SSA violations for the example below. +// AllocTensorOp elimination would produce SSA violations for the example below. //===----------------------------------------------------------------------===// func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x18x32xf32>) @@ -347,9 +347,9 @@ func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x %c0 = arith.constant 0 : index %c32 = arith.constant 32 : index %c8 = arith.constant 8 : index - %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> + %0 = bufferization.alloc_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor - %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32> + %2 = bufferization.alloc_tensor [1, 6, 8] : tensor<1x6x8xf32> %3 = scf.for %arg3 = %c0 to %c32 step %c8 iter_args(%arg4 = %1) -> (tensor) { %4 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg3) %5 = tensor.insert_slice %2 into %arg4[%4,0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] : @@ -361,8 +361,8 @@ func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x // ----- -// CHECK-LABEL: func @do_not_copy_init_tensors( -func.func @do_not_copy_init_tensors(%f1: f32, %f2: f32, %idx: index) +// CHECK-LABEL: func @do_not_copy_alloc_tensors( +func.func @do_not_copy_alloc_tensors(%f1: f32, %f2: f32, %idx: index) -> (tensor<5xf32>, tensor<5xf32>) { // CHECK: memref.alloc @@ -370,7 +370,7 @@ func.func @do_not_copy_init_tensors(%f1: f32, %f2: f32, %idx: index) // CHECK-NOT: copy // CHECK: memref.store // CHECK: memref.store - %0 = linalg.init_tensor [5] : tensor<5xf32> + %0 = bufferization.alloc_tensor [5] : tensor<5xf32> %1 = tensor.insert %f1 into %0[%idx] : tensor<5xf32> %2 = tensor.insert %f2 into %0[%idx] : tensor<5xf32> return %1, %2 : tensor<5xf32>, tensor<5xf32> diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir index 10e6b289f8ed0f..c427db7d9fbece 100644 --- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir @@ -583,7 +583,7 @@ func.func @write_to_same_tensor_in_loop_in_place( { // CHECK: scf.for {{.*}} { %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - %B = linalg.init_tensor [%sz] : tensor + %B = bufferization.alloc_tensor [%sz] : tensor %i2 = arith.index_cast %i : index to i32 %i3 = arith.sitofp %i2 : i32 to f32 // The tensor.insert is in-place because the %B is defined inside the loop. diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir index 4f38fb2ce93d78..4ff9c9ea326fed 100644 --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -220,7 +220,7 @@ func.func @scf_if_non_equiv_yields( // CHECK: return %[[r]] func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 { %r = scf.execute_region -> (tensor) { - %t2 = linalg.init_tensor [%i] : tensor + %t2 = bufferization.alloc_tensor [%i] : tensor scf.yield %t2 : tensor } %f = tensor.extract %r[%j] : tensor @@ -261,7 +261,7 @@ func.func @scf_for_yield_non_equivalent( // CHECK-SAME: %[[t:.*]]: memref, %lb : index, %ub : index, %step : index) -> tensor { %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor { - %t2 = linalg.init_tensor [%i] : tensor + %t2 = bufferization.alloc_tensor [%i] : tensor scf.yield %t2 : tensor } diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir index 382e4246b0f58d..7a98300a1e66f9 100644 --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -179,9 +179,9 @@ func.func @rank_reducing( %c8 = arith.constant 8 : index %c32 = arith.constant 32 : index %c0 = arith.constant 0 : index - %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> + %0 = bufferization.alloc_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor - %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32> + %2 = bufferization.alloc_tensor [1, 6, 8] : tensor<1x6x8xf32> %5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor) { %7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7) %8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32> diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir index 58bc2d053f56e8..731f7e5c5c0ff6 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir @@ -16,7 +16,7 @@ func.func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: ten %c0 = arith.constant 0 : index %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor %1 = affine.apply #map0(%c0, %c64)[%c2] - %2 = linalg.init_tensor [%1, 2] : tensor + %2 = bufferization.alloc_tensor [%1, 2] : tensor %3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor) { %8 = affine.apply #map1(%arg3, %c0)[%c2] %9 = tensor.extract_slice %arg1[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32> @@ -33,7 +33,7 @@ func.func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: ten // call @printMemrefF32(%B) : (tensor<*xf32>) -> () %4 = affine.apply #map0(%c0, %c64)[%c2] - %5 = linalg.init_tensor [%4, 2] : tensor + %5 = bufferization.alloc_tensor [%4, 2] : tensor %6 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %5) -> (tensor) { %8 = affine.apply #map1(%arg3, %c0)[%c2] %9 = tensor.extract_slice %arg0[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32> @@ -80,9 +80,9 @@ func.func @main() { %v1 = arith.constant 1.0 : f32 %v2 = arith.constant 2.0 : f32 - %A = linalg.init_tensor [64] : tensor<64xf32> - %B = linalg.init_tensor [64] : tensor<64xf32> - %C = linalg.init_tensor [] : tensor + %A = bufferization.alloc_tensor [64] : tensor<64xf32> + %B = bufferization.alloc_tensor [64] : tensor<64xf32> + %C = bufferization.alloc_tensor [] : tensor %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) -> tensor diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir index 92bd97186761d2..1c092bd9945762 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s -test-linalg-transform-patterns=test-linalg-to-vector-patterns \ -// RUN: -linalg-bufferize -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: -linalg-init-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \ +// RUN: -bufferization-bufferize -tensor-bufferize -func-bufferize \ // RUN: -finalizing-bufferize -buffer-deallocation \ // RUN: -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index bc250d1ba6c0b8..00fec62b2ac6c5 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -8692,6 +8692,7 @@ td_library( deps = [ ":AllocationOpInterfaceTdFiles", ":CopyOpInterfaceTdFiles", + ":InferTypeOpInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", ], @@ -8755,6 +8756,7 @@ cc_library( ], includes = ["include"], deps = [ + ":Affine", ":AllocationOpInterface", ":ArithmeticDialect", ":BufferizableOpInterfaceIncGen", @@ -8763,6 +8765,7 @@ cc_library( ":CopyOpInterface", ":FuncDialect", ":IR", + ":InferTypeOpInterface", ":MemRefDialect", ":Support", ":TensorDialect", @@ -8809,6 +8812,7 @@ cc_library( ":LoopLikeInterface", ":MemRefDialect", ":Pass", + ":TensorDialect", ":Transforms", "//llvm:Support", ], From 441f691b37486730a18068c139f34a50cba666bd Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 20 May 2022 16:58:19 -0700 Subject: [PATCH 146/908] [ORC] Remove usused Error variable. This was left in by accident, and caused unchecked-error assertions during program termination in BuildingAJIT-Ch4. --- llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp index eb555b4ddb8b6a..48aaab96e71ff4 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -88,7 +88,6 @@ EPCTrampolinePool::EPCTrampolinePool(EPCIndirectionUtils &EPCIU) } Error EPCTrampolinePool::deallocatePool() { - Error Err = Error::success(); std::promise DeallocResultP; auto DeallocResultF = DeallocResultP.get_future(); From 7581f138d54a117a3119a80001f4e0467d7d7b0c Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 20 May 2022 17:12:05 -0700 Subject: [PATCH 147/908] [ORC][examples] Add missing EPCIndirectionUtils::cleanup call. Clients are required to make this call prior to destroying the object. Adding the missing call fixes an assertion that was triggering at program termination time in the LLJITWithExecutorProcessControl example. --- .../LLJITWithExecutorProcessControl.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/examples/OrcV2Examples/LLJITWithExecutorProcessControl/LLJITWithExecutorProcessControl.cpp b/llvm/examples/OrcV2Examples/LLJITWithExecutorProcessControl/LLJITWithExecutorProcessControl.cpp index 634d74d9b80ed0..04c2c2e9545e37 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithExecutorProcessControl/LLJITWithExecutorProcessControl.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithExecutorProcessControl/LLJITWithExecutorProcessControl.cpp @@ -194,5 +194,8 @@ int main(int argc, char *argv[]) { outs() << "---Result---\n" << "entry(" << argc << ") = " << Result << "\n"; + // Destroy the EPCIndirectionUtils utility. + ExitOnErr(EPCIU->cleanup()); + return 0; } From 55287840fe3f0d175ca649f0b665ccc0c184eb5d Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 20 May 2022 17:31:31 -0700 Subject: [PATCH 148/908] [ORC][examples] Fix off-by-one error when handling null terminators. LLVMCreateMemoryBufferWithMemoryRange checks for a null terminator after the given range, so we need to pass the length of the module string (excluding the null terminator). --- .../OrcV2CBindingsLazy/OrcV2CBindingsLazy.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/OrcV2CBindingsLazy.c b/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/OrcV2CBindingsLazy.c index 0f4f979127e853..fd90671bbb3448 100644 --- a/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/OrcV2CBindingsLazy.c +++ b/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/OrcV2CBindingsLazy.c @@ -127,8 +127,8 @@ int main(int argc, char *argv[]) { LLVMErrorRef Err; LLVMOrcThreadSafeModuleRef FooTSM; - if ((Err = - parseExampleModule(FooMod, sizeof(FooMod), "foo-mod", &FooTSM))) { + if ((Err = parseExampleModule(FooMod, sizeof(FooMod) - 1, "foo-mod", + &FooTSM))) { MainResult = handleError(Err); goto jit_cleanup; } @@ -142,8 +142,8 @@ int main(int argc, char *argv[]) { } LLVMOrcThreadSafeModuleRef BarTSM; - if ((Err = - parseExampleModule(BarMod, sizeof(BarMod), "bar-mod", &BarTSM))) { + if ((Err = parseExampleModule(BarMod, sizeof(BarMod) - 1, "bar-mod", + &BarTSM))) { MainResult = handleError(Err); goto jit_cleanup; } @@ -155,7 +155,7 @@ int main(int argc, char *argv[]) { } LLVMOrcThreadSafeModuleRef MainTSM; - if ((Err = parseExampleModule(MainMod, sizeof(MainMod), "main-mod", + if ((Err = parseExampleModule(MainMod, sizeof(MainMod) - 1, "main-mod", &MainTSM))) { MainResult = handleError(Err); goto jit_cleanup; From 3b91657c7bc1e1fcf3f8374f71c238f432050947 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Wed, 20 Apr 2022 20:17:07 -0700 Subject: [PATCH 149/908] [mlir][LLVMIR] Add support for translating from some simple LLVM instructions Add support for translating from llvm::Select, llvm::FNeg, and llvm::Unreachable. This patch also cleans up (NFC) the opcode map for simple instructions and adds `// clang-format off/on` comments to prevent those lines from being churned by clang-format between commits. Differential Revision: https://reviews.llvm.org/D125817 --- mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 80 ++++++++++++++------ mlir/test/Target/LLVMIR/Import/basic.ll | 19 +++++ 2 files changed, 74 insertions(+), 25 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index 840da68dafbbdf..ab69c6a0027513 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -581,46 +581,73 @@ static StringRef lookupOperationNameFromOpcode(unsigned opcode) { #define INST(llvm_n, mlir_n) \ { llvm::Instruction::llvm_n, LLVM::mlir_n##Op::getOperationName() } static const DenseMap opcMap = { - // Ret is handled specially. + // clang-format off + INST(Ret, Return), // Br is handled specially. // Switch is handled specially. // FIXME: indirectbr - // FIXME: invoke + // Invoke is handled specially. INST(Resume, Resume), - // FIXME: unreachable + INST(Unreachable, Unreachable), // FIXME: cleanupret // FIXME: catchret // FIXME: catchswitch // FIXME: callbr - // FIXME: fneg - INST(Add, Add), INST(FAdd, FAdd), INST(Sub, Sub), INST(FSub, FSub), - INST(Mul, Mul), INST(FMul, FMul), INST(UDiv, UDiv), INST(SDiv, SDiv), - INST(FDiv, FDiv), INST(URem, URem), INST(SRem, SRem), INST(FRem, FRem), - INST(Shl, Shl), INST(LShr, LShr), INST(AShr, AShr), INST(And, And), - INST(Or, Or), INST(Xor, XOr), INST(Alloca, Alloca), INST(Load, Load), + INST(FNeg, FNeg), + INST(Add, Add), + INST(FAdd, FAdd), + INST(Sub, Sub), + INST(FSub, FSub), + INST(Mul, Mul), + INST(FMul, FMul), + INST(UDiv, UDiv), + INST(SDiv, SDiv), + INST(FDiv, FDiv), + INST(URem, URem), + INST(SRem, SRem), + INST(FRem, FRem), + INST(Shl, Shl), + INST(LShr, LShr), + INST(AShr, AShr), + INST(And, And), + INST(Or, Or), + INST(Xor, XOr), + INST(ExtractElement, ExtractElement), + INST(InsertElement, InsertElement), + // ShuffleVector is handled specially. + // ExtractValue is handled specially. + // InsertValue is handled specially. + INST(Alloca, Alloca), + INST(Load, Load), INST(Store, Store), - // Getelementptr is handled specially. - INST(Ret, Return), INST(Fence, Fence), + INST(Fence, Fence), // FIXME: atomiccmpxchg // FIXME: atomicrmw - INST(Trunc, Trunc), INST(ZExt, ZExt), INST(SExt, SExt), - INST(FPToUI, FPToUI), INST(FPToSI, FPToSI), INST(UIToFP, UIToFP), - INST(SIToFP, SIToFP), INST(FPTrunc, FPTrunc), INST(FPExt, FPExt), - INST(PtrToInt, PtrToInt), INST(IntToPtr, IntToPtr), - INST(BitCast, Bitcast), INST(AddrSpaceCast, AddrSpaceCast), - // FIXME: cleanuppad - // FIXME: catchpad + // Getelementptr is handled specially. + INST(Trunc, Trunc), + INST(ZExt, ZExt), + INST(SExt, SExt), + INST(FPToUI, FPToUI), + INST(FPToSI, FPToSI), + INST(UIToFP, UIToFP), + INST(SIToFP, SIToFP), + INST(FPTrunc, FPTrunc), + INST(FPExt, FPExt), + INST(PtrToInt, PtrToInt), + INST(IntToPtr, IntToPtr), + INST(BitCast, Bitcast), + INST(AddrSpaceCast, AddrSpaceCast), // ICmp is handled specially. // FCmp is handled specially. // PHI is handled specially. - INST(Freeze, Freeze), INST(Call, Call), - // FIXME: select + INST(Select, Select), + INST(Freeze, Freeze), + INST(Call, Call), // FIXME: vaarg - INST(ExtractElement, ExtractElement), INST(InsertElement, InsertElement), - // ShuffleVector is handled specially. - // InsertValue is handled specially. - // ExtractValue is handled specially. // FIXME: landingpad + // FIXME: catchpad + // FIXME: cleanuppad + // clang-format on }; #undef INST @@ -776,7 +803,10 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) { case llvm::Instruction::Freeze: case llvm::Instruction::BitCast: case llvm::Instruction::ExtractElement: - case llvm::Instruction::InsertElement: { + case llvm::Instruction::InsertElement: + case llvm::Instruction::Select: + case llvm::Instruction::FNeg: + case llvm::Instruction::Unreachable: { OperationState state(loc, lookupOperationNameFromOpcode(inst->getOpcode())); SmallVector ops; ops.reserve(inst->getNumOperands()); diff --git a/mlir/test/Target/LLVMIR/Import/basic.ll b/mlir/test/Target/LLVMIR/Import/basic.ll index 05c09cebcaa318..6b74e7da00c6cf 100644 --- a/mlir/test/Target/LLVMIR/Import/basic.ll +++ b/mlir/test/Target/LLVMIR/Import/basic.ll @@ -278,6 +278,10 @@ define void @FPArithmetic(float %a, float %b, double %c, double %d) { %10 = frem float %a, %b ; CHECK: %[[a13:[0-9]+]] = llvm.frem %arg2, %arg3 : f64 %11 = frem double %c, %d + ; CHECK: %{{.+}} = llvm.fneg %{{.+}} : f32 + %12 = fneg float %a + ; CHECK: %{{.+}} = llvm.fneg %{{.+}} : f64 + %13 = fneg double %c ret void } @@ -605,3 +609,18 @@ define <4 x half> @insert_element(<4 x half>* %vec, half %v, i32 %idx) { ; CHECK: llvm.return %[[V1]] ret <4 x half> %r } + +; Select +; CHECK-LABEL: llvm.func @select_inst +define void @select_inst(i32 %arg0, i32 %arg1, i1 %pred) { + ; CHECK: %{{.+}} = llvm.select %{{.+}}, %{{.+}}, %{{.+}} : i1, i32 + %1 = select i1 %pred, i32 %arg0, i32 %arg1 + ret void +} + +; Unreachable +; CHECK-LABEL: llvm.func @unreachable_inst +define void @unreachable_inst() { + ; CHECK: llvm.unreachable + unreachable +} From f088b99eac74e5ab44fd8a576cd5dc28d7042029 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Thu, 21 Apr 2022 09:45:42 -0700 Subject: [PATCH 150/908] [mlir][LLVMIR] Use the correct way to determine if it's a scalable vector One of the ShuffleVectorOp::build functions checks if the incoming vector operands is scalable vector by casting its type to mlir::VectorType first. However, in some cases the operand is not necessarily mlir::VectorType (e.g. it might be a LLVMVectorType). This patch fixes this issue by using the dedicated `LLVM::isScalableVectorType` function to determine if the incoming vector is scalable vector or not. Differential Revision: https://reviews.llvm.org/D125818 --- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 6 +++--- .../LLVMIR/Import/incorrect-scalable-vector-check.ll | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/Import/incorrect-scalable-vector-check.ll diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index ca28fdf64ad7fe..958142b03d2456 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -2049,9 +2049,9 @@ void LLVM::ShuffleVectorOp::build(OpBuilder &b, OperationState &result, Value v1, Value v2, ArrayAttr mask, ArrayRef attrs) { auto containerType = v1.getType(); - auto vType = LLVM::getVectorType( - LLVM::getVectorElementType(containerType), mask.size(), - containerType.cast().isScalable()); + auto vType = LLVM::getVectorType(LLVM::getVectorElementType(containerType), + mask.size(), + LLVM::isScalableVectorType(containerType)); build(b, result, vType, v1, v2, mask); result.addAttributes(attrs); } diff --git a/mlir/test/Target/LLVMIR/Import/incorrect-scalable-vector-check.ll b/mlir/test/Target/LLVMIR/Import/incorrect-scalable-vector-check.ll new file mode 100644 index 00000000000000..03ddee2da3eeb6 --- /dev/null +++ b/mlir/test/Target/LLVMIR/Import/incorrect-scalable-vector-check.ll @@ -0,0 +1,8 @@ +; RUN: mlir-translate --import-llvm %s | FileCheck %s + +; CHECK: llvm.func @shufflevector_crash +define void @shufflevector_crash(<2 x i32*> %arg0) { + ; CHECK: llvm.shufflevector %{{.+}}, %{{.+}} [1 : i32, 0 : i32] : !llvm.vec<2 x ptr>, !llvm.vec<2 x ptr> + %1 = shufflevector <2 x i32*> %arg0, <2 x i32*> undef, <2 x i32> + ret void +} From 66875dbcc0f86e7318063328367f92b205c187e2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 20 May 2022 23:15:42 -0700 Subject: [PATCH 151/908] [LegalizeTypes] Use SmallDenseMap::count instead of SmallDenseMap::find. NFC It's more readable and more efficient. --- .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 6159dbc1ec72fe..0a439e5dd6287b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -83,7 +83,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { auto ResId = ValueToIdMap.lookup(Res); unsigned Mapped = 0; - if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) { + if (ResId && ReplacedValues.count(ResId)) { Mapped |= 1; // Check that remapped values are only used by nodes marked NewNode. for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); @@ -105,23 +105,23 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { assert(NewVal.getNode()->getNodeId() != NewNode && "ReplacedValues maps to a new node!"); } - if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end()) + if (ResId && PromotedIntegers.count(ResId)) Mapped |= 2; - if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end()) + if (ResId && SoftenedFloats.count(ResId)) Mapped |= 4; - if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end()) + if (ResId && ScalarizedVectors.count(ResId)) Mapped |= 8; - if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end()) + if (ResId && ExpandedIntegers.count(ResId)) Mapped |= 16; - if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end()) + if (ResId && ExpandedFloats.count(ResId)) Mapped |= 32; - if (ResId && SplitVectors.find(ResId) != SplitVectors.end()) + if (ResId && SplitVectors.count(ResId)) Mapped |= 64; - if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end()) + if (ResId && WidenedVectors.count(ResId)) Mapped |= 128; - if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end()) + if (ResId && PromotedFloats.count(ResId)) Mapped |= 256; - if (ResId && SoftPromotedHalfs.find(ResId) != SoftPromotedHalfs.end()) + if (ResId && SoftPromotedHalfs.count(ResId)) Mapped |= 512; if (Node.getNodeId() != Processed) { From 003b95acf2ba3e42a65099e23b9716989699a1f1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 21 May 2022 00:00:01 -0700 Subject: [PATCH 152/908] [LegalizeTypes] Remove double map lookup in DAGTypeLegalizer::PerformExpensiveChecks. NFC Remove repeated checks for ResId being 0. --- .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 77 ++++++++++--------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 0a439e5dd6287b..7b7df0b4f62893 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -83,46 +83,49 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { auto ResId = ValueToIdMap.lookup(Res); unsigned Mapped = 0; - if (ResId && ReplacedValues.count(ResId)) { - Mapped |= 1; - // Check that remapped values are only used by nodes marked NewNode. - for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); - UI != UE; ++UI) - if (UI.getUse().getResNo() == i) - assert(UI->getNodeId() == NewNode && - "Remapped value has non-trivial use!"); - - // Check that the final result of applying ReplacedValues is not - // marked NewNode. - auto NewValId = ReplacedValues[ResId]; - auto I = ReplacedValues.find(NewValId); - while (I != ReplacedValues.end()) { - NewValId = I->second; + if (ResId) { + auto I = ReplacedValues.find(ResId); + if (I != ReplacedValues.end()) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + auto NewValId = I->second; I = ReplacedValues.find(NewValId); + while (I != ReplacedValues.end()) { + NewValId = I->second; + I = ReplacedValues.find(NewValId); + } + SDValue NewVal = getSDValue(NewValId); + (void)NewVal; + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); } - SDValue NewVal = getSDValue(NewValId); - (void)NewVal; - assert(NewVal.getNode()->getNodeId() != NewNode && - "ReplacedValues maps to a new node!"); + if (PromotedIntegers.count(ResId)) + Mapped |= 2; + if (SoftenedFloats.count(ResId)) + Mapped |= 4; + if (ScalarizedVectors.count(ResId)) + Mapped |= 8; + if (ExpandedIntegers.count(ResId)) + Mapped |= 16; + if (ExpandedFloats.count(ResId)) + Mapped |= 32; + if (SplitVectors.count(ResId)) + Mapped |= 64; + if (WidenedVectors.count(ResId)) + Mapped |= 128; + if (PromotedFloats.count(ResId)) + Mapped |= 256; + if (SoftPromotedHalfs.count(ResId)) + Mapped |= 512; } - if (ResId && PromotedIntegers.count(ResId)) - Mapped |= 2; - if (ResId && SoftenedFloats.count(ResId)) - Mapped |= 4; - if (ResId && ScalarizedVectors.count(ResId)) - Mapped |= 8; - if (ResId && ExpandedIntegers.count(ResId)) - Mapped |= 16; - if (ResId && ExpandedFloats.count(ResId)) - Mapped |= 32; - if (ResId && SplitVectors.count(ResId)) - Mapped |= 64; - if (ResId && WidenedVectors.count(ResId)) - Mapped |= 128; - if (ResId && PromotedFloats.count(ResId)) - Mapped |= 256; - if (ResId && SoftPromotedHalfs.count(ResId)) - Mapped |= 512; if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes From 86fd1c139fb81f17a1d58ba5200df414b5eaebb1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 21 May 2022 10:58:57 +0200 Subject: [PATCH 153/908] [bufferization] Simplify code. NFCI. --- .../Bufferization/Transforms/OneShotModuleBufferize.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index 377bc58b2d53f3..aa6672860ec287 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -342,10 +342,8 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, return callOp->emitError() << "expected a CallOp"; func::FuncOp calledFunction = getCalledFunction(callOp); assert(calledFunction && "could not retrieved called func::FuncOp"); - auto it = callerMap.try_emplace(calledFunction, DenseSet{}); - it.first->getSecond().insert(callOp); - if (calledBy[calledFunction].count(funcOp) == 0) { - calledBy[calledFunction].insert(funcOp); + callerMap[calledFunction].insert(callOp); + if (calledBy[calledFunction].insert(funcOp).second) { numberCallOpsContainedInFuncOp[funcOp]++; } return WalkResult::advance(); From 216f546c846ca69005de193f4d1eea78e0efb2c2 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Sun, 10 Apr 2022 13:38:27 +0100 Subject: [PATCH 154/908] [SVE] Refactor lowering for fixed length MGATHER/MSCATTER. Lower fixed length MGATHER/MSCATTER operations to scalable vector equivalents, which are then lowered to SVE specific nodes. This two stage process is in preparation for making scalable vector MGATHER/MSCATTER operations legal. Differential Revision: https://reviews.llvm.org/D125192 --- .../Target/AArch64/AArch64ISelLowering.cpp | 167 ++++++++++-------- 1 file changed, 98 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e3fa268732b308..f8b71ee6428777 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4696,6 +4696,56 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, MGT->getMemOperand(), IndexType, ExtType); } + // Lower fixed length gather to a scalable equivalent. + if (VT.isFixedLengthVector()) { + assert(Subtarget->useSVEForFixedLengthVectors() && + "Cannot lower when not using SVE for fixed vectors!"); + + // NOTE: Handle floating-point as if integer then bitcast the result. + EVT DataVT = VT.changeVectorElementTypeToInteger(); + MemVT = MemVT.changeVectorElementTypeToInteger(); + + // Find the smallest integer fixed length vector we can use for the gather. + EVT PromotedVT = VT.changeVectorElementType(MVT::i32); + if (DataVT.getVectorElementType() == MVT::i64 || + Index.getValueType().getVectorElementType() == MVT::i64 || + Mask.getValueType().getVectorElementType() == MVT::i64) + PromotedVT = VT.changeVectorElementType(MVT::i64); + + // Promote vector operands except for passthrough, which we know is either + // undef or zero, and thus best constructed directly. + unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index); + Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask); + + // A promoted result type forces the need for an extending load. + if (PromotedVT != DataVT && ExtType == ISD::NON_EXTLOAD) + ExtType = ISD::EXTLOAD; + + EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT); + + // Convert fixed length vector operands to scalable. + MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType()); + Index = convertToScalableVector(DAG, ContainerVT, Index); + Mask = convertFixedMaskToScalableVector(Mask, DAG); + PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT) + : DAG.getConstant(0, DL, ContainerVT); + + // Emit equivalent scalable vector gather. + SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; + SDValue Load = + DAG.getMaskedGather(DAG.getVTList(ContainerVT, MVT::Other), MemVT, DL, + Ops, MGT->getMemOperand(), IndexType, ExtType); + + // Extract fixed length data then convert to the required result type. + SDValue Result = convertFromScalableVector(DAG, PromotedVT, Load); + Result = DAG.getNode(ISD::TRUNCATE, DL, DataVT, Result); + if (VT.isFloatingPoint()) + Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); + + return DAG.getMergeValues({Result, Load.getValue(1)}, DL); + } + bool IdxNeedsExtend = getGatherScatterIndexIsExtended(Index) || Index.getSimpleValueType().getVectorElementType() == MVT::i32; @@ -4703,26 +4753,8 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, EVT IndexVT = Index.getSimpleValueType(); SDValue InputVT = DAG.getValueType(MemVT); - bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector(); - - if (IsFixedLength) { - assert(Subtarget->useSVEForFixedLengthVectors() && - "Cannot lower when not using SVE for fixed vectors"); - if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); - } else { - MemVT = getContainerForFixedLengthVector(DAG, MemVT); - IndexVT = MemVT.changeTypeToInteger(); - } - InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); - Mask = DAG.getNode( - ISD::SIGN_EXTEND, DL, - VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); - } - // Handle FP data by using an integer gather and casting the result. - if (VT.isFloatingPoint() && !IsFixedLength) + if (VT.isFloatingPoint()) InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other); @@ -4737,25 +4769,11 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, if (ExtType == ISD::SEXTLOAD) Opcode = getSignExtendedGatherOpcode(Opcode); - if (IsFixedLength) { - if (Index.getSimpleValueType().isFixedLengthVector()) - Index = convertToScalableVector(DAG, IndexVT, Index); - if (BasePtr.getSimpleValueType().isFixedLengthVector()) - BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr); - Mask = convertFixedMaskToScalableVector(Mask, DAG); - } - SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT}; SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops); Chain = Result.getValue(1); - if (IsFixedLength) { - Result = convertFromScalableVector( - DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()), - Result); - Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result); - Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); - } else if (VT.isFloatingPoint()) + if (VT.isFloatingPoint()) Result = getSVESafeBitCast(VT, Result, DAG); return DAG.getMergeValues({Result, Chain}, DL); @@ -4775,6 +4793,7 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, EVT VT = StoreVal.getValueType(); EVT MemVT = MSC->getMemoryVT(); ISD::MemIndexType IndexType = MSC->getIndexType(); + bool Truncating = MSC->isTruncatingStore(); bool IsScaled = MSC->isIndexScaled(); bool IsSigned = MSC->isIndexSigned(); @@ -4791,42 +4810,60 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops, - MSC->getMemOperand(), IndexType, - MSC->isTruncatingStore()); + MSC->getMemOperand(), IndexType, Truncating); + } + + // Lower fixed length scatter to a scalable equivalent. + if (VT.isFixedLengthVector()) { + assert(Subtarget->useSVEForFixedLengthVectors() && + "Cannot lower when not using SVE for fixed vectors!"); + + // Once bitcast we treat floating-point scatters as if integer. + if (VT.isFloatingPoint()) { + VT = VT.changeVectorElementTypeToInteger(); + MemVT = MemVT.changeVectorElementTypeToInteger(); + StoreVal = DAG.getNode(ISD::BITCAST, DL, VT, StoreVal); + } + + // Find the smallest integer fixed length vector we can use for the scatter. + EVT PromotedVT = VT.changeVectorElementType(MVT::i32); + if (VT.getVectorElementType() == MVT::i64 || + Index.getValueType().getVectorElementType() == MVT::i64 || + Mask.getValueType().getVectorElementType() == MVT::i64) + PromotedVT = VT.changeVectorElementType(MVT::i64); + + // Promote vector operands. + unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index); + Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask); + StoreVal = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, StoreVal); + + // A promoted value type forces the need for a truncating store. + if (PromotedVT != VT) + Truncating = true; + + EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT); + + // Convert fixed length vector operands to scalable. + MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType()); + Index = convertToScalableVector(DAG, ContainerVT, Index); + Mask = convertFixedMaskToScalableVector(Mask, DAG); + StoreVal = convertToScalableVector(DAG, ContainerVT, StoreVal); + + // Emit equivalent scalable vector scatter. + SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops, + MSC->getMemOperand(), IndexType, Truncating); } bool NeedsExtend = getGatherScatterIndexIsExtended(Index) || Index.getSimpleValueType().getVectorElementType() == MVT::i32; - EVT IndexVT = Index.getSimpleValueType(); SDVTList VTs = DAG.getVTList(MVT::Other); SDValue InputVT = DAG.getValueType(MemVT); - bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector(); - - if (IsFixedLength) { - assert(Subtarget->useSVEForFixedLengthVectors() && - "Cannot lower when not using SVE for fixed vectors"); - if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); - } else { - MemVT = getContainerForFixedLengthVector(DAG, MemVT); - IndexVT = MemVT.changeTypeToInteger(); - } - InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); - - StoreVal = - DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal); - StoreVal = DAG.getNode( - ISD::ANY_EXTEND, DL, - VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal); - StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal); - Mask = DAG.getNode( - ISD::SIGN_EXTEND, DL, - VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); - } else if (VT.isFloatingPoint()) { + if (VT.isFloatingPoint()) { // Handle FP data by casting the data so an integer scatter can be used. EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount()); StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG); @@ -4840,14 +4877,6 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, selectGatherScatterAddrMode(BasePtr, Index, IsScaled, MemVT, Opcode, /*isGather=*/false, DAG); - if (IsFixedLength) { - if (Index.getSimpleValueType().isFixedLengthVector()) - Index = convertToScalableVector(DAG, IndexVT, Index); - if (BasePtr.getSimpleValueType().isFixedLengthVector()) - BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr); - Mask = convertFixedMaskToScalableVector(Mask, DAG); - } - SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT}; return DAG.getNode(Opcode, DL, VTs, Ops); } From 6f0ca6fd2305be860057a1b2c21c6708f4ffdb38 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 21 May 2022 11:19:29 +0200 Subject: [PATCH 155/908] [JumpThreading] Insert freeze when unfolding select JumpThreading may convert selects into branch instructions, in which case the condition needs to be frozen (as branch on poison is immediate undefined behavior, unlike select on poison). The necessary code for this is already in place, this just enables the option. Differential Revision: https://reviews.llvm.org/D125869 --- .../llvm/Transforms/Scalar/JumpThreading.h | 2 +- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 4 +-- .../Transforms/JumpThreading/codesize-loop.ll | 24 ++++++++------ llvm/test/Transforms/JumpThreading/select.ll | 32 +++++++++---------- 4 files changed, 33 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 87f33c1f9ec9f2..07db880a6d74bf 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -95,7 +95,7 @@ class JumpThreadingPass : public PassInfoMixin { bool InsertFreezeWhenUnfoldingSelect; public: - JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1); + JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = true, int T = -1); // Glue for old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 06dd9ff0a75a15..5b1f703ffef0b2 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -106,7 +106,7 @@ static cl::opt PrintLVIAfterJumpThreading( static cl::opt JumpThreadingFreezeSelectCond( "jump-threading-freeze-select-cond", - cl::desc("Freeze the condition when unfolding select"), cl::init(false), + cl::desc("Freeze the condition when unfolding select"), cl::init(true), cl::Hidden); static cl::opt ThreadAcrossLoopHeaders( @@ -138,7 +138,7 @@ namespace { public: static char ID; // Pass identification - JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1) + JumpThreading(bool InsertFreezeWhenUnfoldingSelect = true, int T = -1) : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) { initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); } diff --git a/llvm/test/Transforms/JumpThreading/codesize-loop.ll b/llvm/test/Transforms/JumpThreading/codesize-loop.ll index 68d6a99d86debd..3556debfed858b 100644 --- a/llvm/test/Transforms/JumpThreading/codesize-loop.ll +++ b/llvm/test/Transforms/JumpThreading/codesize-loop.ll @@ -19,7 +19,8 @@ define i32 @test_minsize(i32 %argc, i8** nocapture readonly %argv) local_unnamed ; DEFAULT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; DEFAULT-NEXT: [[TMP3:%.*]] = mul i32 [[COND]], [[TMP2]] ; DEFAULT-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[COND]], 0 -; DEFAULT-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]] +; DEFAULT-NEXT: [[COND_FR:%.*]] = freeze i1 [[TMP4]] +; DEFAULT-NEXT: br i1 [[COND_FR]], label [[TMP5:%.*]], label [[TMP6:%.*]] ; DEFAULT: 5: ; DEFAULT-NEXT: br label [[TMP6]] ; DEFAULT: 6: @@ -40,14 +41,15 @@ define i32 @test_minsize(i32 %argc, i8** nocapture readonly %argv) local_unnamed ; OVERIDE-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; OVERIDE-NEXT: [[TMP3:%.*]] = mul i32 [[CALL]], [[TMP2]] ; OVERIDE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[CALL]], 0 -; OVERIDE-NEXT: br i1 [[TMP4]], label [[COND_END_THREAD]], label [[TMP6:%.*]] +; OVERIDE-NEXT: [[COND_FR:%.*]] = freeze i1 [[TMP4]] +; OVERIDE-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP6:%.*]] ; OVERIDE: cond.end.thread: ; OVERIDE-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[COND_END]] ], [ 205962976, [[ENTRY:%.*]] ] -; OVERIDE-NEXT: [[COND2:%.*]] = phi i32 [ [[CALL]], [[COND_END]] ], [ 46, [[ENTRY]] ] +; OVERIDE-NEXT: [[COND3:%.*]] = phi i32 [ [[CALL]], [[COND_END]] ], [ 46, [[ENTRY]] ] ; OVERIDE-NEXT: br label [[TMP6]] ; OVERIDE: 6: ; OVERIDE-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP5]], [[COND_END_THREAD]] ], [ [[TMP3]], [[COND_END]] ] -; OVERIDE-NEXT: [[TMP8:%.*]] = phi i32 [ [[COND2]], [[COND_END_THREAD]] ], [ 0, [[COND_END]] ] +; OVERIDE-NEXT: [[TMP8:%.*]] = phi i32 [ [[COND3]], [[COND_END_THREAD]] ], [ 0, [[COND_END]] ] ; OVERIDE-NEXT: [[TMP9:%.*]] = mul i32 [[TMP7]], [[TMP8]] ; OVERIDE-NEXT: [[CALL33:%.*]] = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 [[TMP9]]) ; OVERIDE-NEXT: ret i32 0 @@ -87,14 +89,15 @@ define i32 @test_optsize(i32 %argc, i8** nocapture readonly %argv) local_unnamed ; DEFAULT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; DEFAULT-NEXT: [[TMP3:%.*]] = mul i32 [[CALL]], [[TMP2]] ; DEFAULT-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[CALL]], 0 -; DEFAULT-NEXT: br i1 [[TMP4]], label [[COND_END_THREAD]], label [[TMP6:%.*]] +; DEFAULT-NEXT: [[COND_FR:%.*]] = freeze i1 [[TMP4]] +; DEFAULT-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP6:%.*]] ; DEFAULT: cond.end.thread: ; DEFAULT-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[COND_END]] ], [ 205962976, [[ENTRY:%.*]] ] -; DEFAULT-NEXT: [[COND2:%.*]] = phi i32 [ [[CALL]], [[COND_END]] ], [ 46, [[ENTRY]] ] +; DEFAULT-NEXT: [[COND3:%.*]] = phi i32 [ [[CALL]], [[COND_END]] ], [ 46, [[ENTRY]] ] ; DEFAULT-NEXT: br label [[TMP6]] ; DEFAULT: 6: ; DEFAULT-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP5]], [[COND_END_THREAD]] ], [ [[TMP3]], [[COND_END]] ] -; DEFAULT-NEXT: [[TMP8:%.*]] = phi i32 [ [[COND2]], [[COND_END_THREAD]] ], [ 0, [[COND_END]] ] +; DEFAULT-NEXT: [[TMP8:%.*]] = phi i32 [ [[COND3]], [[COND_END_THREAD]] ], [ 0, [[COND_END]] ] ; DEFAULT-NEXT: [[TMP9:%.*]] = mul i32 [[TMP7]], [[TMP8]] ; DEFAULT-NEXT: [[CALL33:%.*]] = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 [[TMP9]]) ; DEFAULT-NEXT: ret i32 0 @@ -111,14 +114,15 @@ define i32 @test_optsize(i32 %argc, i8** nocapture readonly %argv) local_unnamed ; OVERIDE-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[TMP1]] ; OVERIDE-NEXT: [[TMP3:%.*]] = mul i32 [[CALL]], [[TMP2]] ; OVERIDE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[CALL]], 0 -; OVERIDE-NEXT: br i1 [[TMP4]], label [[COND_END_THREAD]], label [[TMP6:%.*]] +; OVERIDE-NEXT: [[COND_FR:%.*]] = freeze i1 [[TMP4]] +; OVERIDE-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP6:%.*]] ; OVERIDE: cond.end.thread: ; OVERIDE-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[COND_END]] ], [ 205962976, [[ENTRY:%.*]] ] -; OVERIDE-NEXT: [[COND2:%.*]] = phi i32 [ [[CALL]], [[COND_END]] ], [ 46, [[ENTRY]] ] +; OVERIDE-NEXT: [[COND3:%.*]] = phi i32 [ [[CALL]], [[COND_END]] ], [ 46, [[ENTRY]] ] ; OVERIDE-NEXT: br label [[TMP6]] ; OVERIDE: 6: ; OVERIDE-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP5]], [[COND_END_THREAD]] ], [ [[TMP3]], [[COND_END]] ] -; OVERIDE-NEXT: [[TMP8:%.*]] = phi i32 [ [[COND2]], [[COND_END_THREAD]] ], [ 0, [[COND_END]] ] +; OVERIDE-NEXT: [[TMP8:%.*]] = phi i32 [ [[COND3]], [[COND_END_THREAD]] ], [ 0, [[COND_END]] ] ; OVERIDE-NEXT: [[TMP9:%.*]] = mul i32 [[TMP7]], [[TMP8]] ; OVERIDE-NEXT: [[CALL33:%.*]] = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 [[TMP9]]) ; OVERIDE-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/JumpThreading/select.ll b/llvm/test/Transforms/JumpThreading/select.ll index 392cdd386ac1f2..4c50be4f013094 100644 --- a/llvm/test/Transforms/JumpThreading/select.ll +++ b/llvm/test/Transforms/JumpThreading/select.ll @@ -175,7 +175,8 @@ define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind { ; CHECK: L0: ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[VAL:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VAL_PHI]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[L1:%.*]], label [[TMP0:%.*]] +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[L1:%.*]], label [[TMP0:%.*]] ; CHECK: 0: ; CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[VALUE:%.*]], [[L0]] ] ; CHECK-NEXT: switch i8 [[TMP1]], label [[L3:%.*]] [ @@ -361,22 +362,23 @@ define i32 @unfold3(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) noun ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 ; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] -; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD3:%.*]], label [[COND_FALSE_I:%.*]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD4:%.*]], label [[COND_FALSE_I:%.*]] ; CHECK: cond.false.i: ; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] ; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_6_I:%.*]] ; CHECK: cond.false.6.i: ; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] -; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD3]], label [[COND_FALSE_10_I:%.*]] +; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_10_I:%.*]] ; CHECK: cond.false.10.i: ; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] ; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]] ; CHECK: .exit: ; CHECK-NEXT: [[PHITMP:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] -; CHECK-NEXT: br i1 [[PHITMP]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD3]] +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[PHITMP]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD4]] ; CHECK: .exit.thread: -; CHECK-NEXT: br label [[DOTEXIT_THREAD3]] -; CHECK: .exit.thread3: +; CHECK-NEXT: br label [[DOTEXIT_THREAD4]] +; CHECK: .exit.thread4: ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ] ; CHECK-NEXT: ret i32 [[TMP0]] ; @@ -416,21 +418,22 @@ define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) noun ; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]] ; CHECK: cond.false.i: ; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] -; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD4:%.*]], label [[COND_FALSE_6_I:%.*]] +; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD5:%.*]], label [[COND_FALSE_6_I:%.*]] ; CHECK: cond.false.6.i: ; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] ; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]] ; CHECK: cond.false.10.i: ; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] -; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD4]], label [[DOTEXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD5]], label [[DOTEXIT:%.*]] ; CHECK: .exit: ; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32 ; CHECK-NEXT: [[LNOT_I18:%.*]] = icmp eq i32 [[CONV]], 1 -; CHECK-NEXT: br i1 [[LNOT_I18]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD4]] +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[LNOT_I18]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD5]] ; CHECK: .exit.thread: -; CHECK-NEXT: br label [[DOTEXIT_THREAD4]] -; CHECK: .exit.thread4: +; CHECK-NEXT: br label [[DOTEXIT_THREAD5]] +; CHECK: .exit.thread5: ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ] ; CHECK-NEXT: ret i32 [[TMP0]] ; @@ -625,16 +628,13 @@ sw.default: ; preds = %if.end, %sw.bb9 br label %for.cond } -; FIXME: This is an invalid transform. If %b is false and %x is poison, -; then the select produces poison (the result of the program is poison). -; But with this transform, we may be branching on poison, and that is UB. - define i32 @TryToUnfoldSelectInCurrBB(i1 %b, i1 %ui, i32 %s, i1 %x) { ; CHECK-LABEL: @TryToUnfoldSelectInCurrBB( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF_END_THREAD:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: br i1 [[X:%.*]], label [[TMP0:%.*]], label [[IF_END_THREAD]] +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[X:%.*]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[TMP0:%.*]], label [[IF_END_THREAD]] ; CHECK: 0: ; CHECK-NEXT: br label [[IF_END_THREAD]] ; CHECK: if.end.thread: From f09a50cd974c55c2c20674077126b04b27697051 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 20 May 2022 22:12:45 +0100 Subject: [PATCH 156/908] [AMDGPU] Regenerate permute.ll test checks for future patch --- llvm/test/CodeGen/AMDGPU/permute.ll | 266 +++++++++++++++++++++++----- 1 file changed, 217 insertions(+), 49 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/permute.ll b/llvm/test/CodeGen/AMDGPU/permute.ll index 26af6f4d0f2bc1..b0b684e770ffc3 100644 --- a/llvm/test/CodeGen/AMDGPU/permute.ll +++ b/llvm/test/CodeGen/AMDGPU/permute.ll @@ -1,9 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; GCN-LABEL: {{^}}lsh8_or_and: -; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: lsh8_or_and: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x6050400 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -15,10 +28,22 @@ bb: ret void } -; GCN-LABEL: {{^}}lsr24_or_and: -; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: lsr24_or_and: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_perm_b32 v2, s0, v2, v3 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -30,10 +55,23 @@ bb: ret void } -; GCN-LABEL: {{^}}and_or_lsr24: -; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: and_or_lsr24: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 +; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -46,10 +84,22 @@ bb: ret void } -; GCN-LABEL: {{^}}and_or_and: -; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: and_or_and: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x7020500 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -61,9 +111,21 @@ bb: ret void } -; GCN-LABEL: {{^}}lsh8_or_lsr24: -; GCN: v_alignbit_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, 24 define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: lsh8_or_lsr24: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_alignbit_b32 v2, v2, s0, 24 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -75,10 +137,22 @@ bb: ret void } -; GCN-LABEL: {{^}}lsh16_or_lsr24: -; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: lsh16_or_lsr24: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x5040c03 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -90,10 +164,22 @@ bb: ret void } -; GCN-LABEL: {{^}}and_xor_and: -; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: and_xor_and: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -105,13 +191,25 @@ bb: ret void } -; GCN-LABEL: {{^}}and_or_or_and: -; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff00 -; GCN: s_or_b32 [[SREG:s[0-9]+]], s{{[0-9]+}}, 0xffff0000 -; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, v{{[0-9]+}} -; GCN: v_or_b32_e32 v{{[0-9]+}}, [[SREG]], [[VREG]] ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask. define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: and_or_or_and: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_and_b32 s0, s0, 0xff00 +; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_and_b32_e32 v2, 0xff00ff, v2 +; GCN-NEXT: v_or_b32_e32 v2, s0, v2 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -124,10 +222,22 @@ bb: ret void } -; GCN-LABEL: {{^}}and_or_and_shl: -; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: and_or_and_shl: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x50c0c00 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -140,10 +250,22 @@ bb: ret void } -; GCN-LABEL: {{^}}or_and_or: -; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: or_and_or: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -155,16 +277,31 @@ bb: ret void } -; GCN-LABEL: {{^}}known_ffff0500: -; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 -; GCN: s_and_b32 [[SREG:s[0-9]+]], [[SREG]], 0xff00 -; GCN: s_or_b32 [[SREG]], [[SREG]], 0xffff0000 -; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, [[VREG]] -; GCN: v_or_b32_e32 [[VREG]], [[SREG]], [[VREG]] -; GCN: store_dword v[{{[0-9:]+}}], [[VREG]]{{$}} -; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask. define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: known_ffff0500: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v5, 0xffff8004 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v4, v[0:1] +; GCN-NEXT: s_bitset1_b32 s0, 15 +; GCN-NEXT: s_and_b32 s0, s0, 0xff00 +; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000 +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_or_b32_e32 v4, 4, v4 +; GCN-NEXT: v_and_b32_e32 v4, 0xff00ff, v4 +; GCN-NEXT: v_or_b32_e32 v4, s0, v4 +; GCN-NEXT: flat_store_dword v[0:1], v4 +; GCN-NEXT: flat_store_dword v[2:3], v5 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -181,12 +318,27 @@ bb: ret void } -; GCN-LABEL: {{^}}known_050c0c00: -; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 -; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}} -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] -; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: known_050c0c00: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v5, 0x50c0c00 +; GCN-NEXT: v_mov_b32_e32 v6, 4 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v4, v[0:1] +; GCN-NEXT: s_or_b32 s0, s0, 4 +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_perm_b32 v4, v4, s0, v5 +; GCN-NEXT: flat_store_dword v[0:1], v4 +; GCN-NEXT: flat_store_dword v[2:3], v6 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id @@ -202,12 +354,28 @@ bb: ret void } -; GCN-LABEL: {{^}}known_ffff8004: -; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 -; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] -; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +; GCN-LABEL: known_ffff8004: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v5, 0xffff0500 +; GCN-NEXT: v_mov_b32_e32 v6, 0xffff8004 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: flat_load_dword v4, v[0:1] +; GCN-NEXT: s_or_b32 s0, s0, 4 +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_or_b32_e32 v4, 0x8000, v4 +; GCN-NEXT: v_perm_b32 v4, v4, s0, v5 +; GCN-NEXT: flat_store_dword v[0:1], v4 +; GCN-NEXT: flat_store_dword v[2:3], v6 +; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id From 8ba1421432924a49ce884461f87970c6513a298f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Sat, 21 May 2022 12:26:22 +0200 Subject: [PATCH 157/908] [lldb] fix 'command container' help text --- lldb/source/Commands/CommandObjectCommands.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index 4ea81082a9aed0..2334759ea0f473 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -2099,8 +2099,8 @@ class CommandObjectCommandContainer : public CommandObjectMultiword { : CommandObjectMultiword( interpreter, "command container", "Commands for adding container commands to lldb. " - "Container commands are containers for other commands. You can" - "add nested container commands by specifying a command path, but " + "Container commands are containers for other commands. You can " + "add nested container commands by specifying a command path, " "but you can't add commands into the built-in command hierarchy.", "command container []") { LoadSubCommand("add", CommandObjectSP(new CommandObjectCommandsContainerAdd( From c312f025940d79b858166b80c50526558864d54e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 21 May 2022 12:58:39 +0200 Subject: [PATCH 158/908] [STLExtras] Make indexed_accessor_range operator== compatible with C++20 This would be ambigious with itself when C++20 tries to lookup the reversed form. I didn't find a use in LLVM, but MLIR does a lot of comparisons of ranges of different types. --- llvm/include/llvm/ADT/STLExtras.h | 16 +++++++++------- llvm/unittests/Support/IndexedAccessorTest.cpp | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index b186489295b417..0f713ede8b9e22 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1181,13 +1181,15 @@ class indexed_accessor_range_base { } /// Compare this range with another. - template bool operator==(const OtherT &other) const { - return size() == - static_cast(std::distance(other.begin(), other.end())) && - std::equal(begin(), end(), other.begin()); - } - template bool operator!=(const OtherT &other) const { - return !(*this == other); + template + friend bool operator==(const indexed_accessor_range_base &lhs, + const OtherT &rhs) { + return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); + } + template + friend bool operator!=(const indexed_accessor_range_base &lhs, + const OtherT &rhs) { + return !(lhs == rhs); } /// Return the size of this range. diff --git a/llvm/unittests/Support/IndexedAccessorTest.cpp b/llvm/unittests/Support/IndexedAccessorTest.cpp index 501d7a6ea2ec09..02b565634e2a9d 100644 --- a/llvm/unittests/Support/IndexedAccessorTest.cpp +++ b/llvm/unittests/Support/IndexedAccessorTest.cpp @@ -46,4 +46,18 @@ TEST(AccessorRange, SliceTest) { compareData(range.slice(2, 3), data.slice(2, 3)); compareData(range.slice(0, 5), data.slice(0, 5)); } + +TEST(AccessorRange, EqualTest) { + int32_t rawData1[] = {0, 1, 2, 3, 4}; + uint64_t rawData2[] = {0, 1, 2, 3, 4}; + + ArrayIndexedAccessorRange range1(rawData1, /*start=*/0, + /*numElements=*/5); + ArrayIndexedAccessorRange range2(rawData2, /*start=*/0, + /*numElements=*/5); + EXPECT_TRUE(range1 == range2); + EXPECT_FALSE(range1 != range2); + EXPECT_TRUE(range2 == range1); + EXPECT_FALSE(range2 != range1); +} } // end anonymous namespace From 295d032762ad284068c72cc1904680a4db5e80d3 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 21 May 2022 13:25:24 +0200 Subject: [PATCH 159/908] [mlir] Move diagnostic handlers instead of copying This also allows using unique_ptr instead of shared_ptr for the CAPI user data. NFCI. --- mlir/include/mlir/IR/Diagnostics.h | 4 ++-- mlir/lib/CAPI/IR/Diagnostics.cpp | 7 ++++--- mlir/lib/IR/Diagnostics.cpp | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/IR/Diagnostics.h b/mlir/include/mlir/IR/Diagnostics.h index dd75aeea48424c..82bd79390e6108 100644 --- a/mlir/include/mlir/IR/Diagnostics.h +++ b/mlir/include/mlir/IR/Diagnostics.h @@ -417,7 +417,7 @@ class DiagnosticEngine { /// The handler type for MLIR diagnostics. This function takes a diagnostic as /// input, and returns success if the handler has fully processed this /// diagnostic. Returns failure otherwise. - using HandlerTy = std::function; + using HandlerTy = llvm::unique_function; /// A handle to a specific registered handler object. using HandlerID = uint64_t; @@ -427,7 +427,7 @@ class DiagnosticEngine { /// handlers will process diagnostics first. This function returns a unique /// identifier for the registered handler, which can be used to unregister /// this handler at a later time. - HandlerID registerHandler(const HandlerTy &handler); + HandlerID registerHandler(HandlerTy handler); /// Set the diagnostic handler with a function that returns void. This is a /// convenient wrapper for handlers that always completely process the given diff --git a/mlir/lib/CAPI/IR/Diagnostics.cpp b/mlir/lib/CAPI/IR/Diagnostics.cpp index 40639c7ba31b71..4a13ae57611d8e 100644 --- a/mlir/lib/CAPI/IR/Diagnostics.cpp +++ b/mlir/lib/CAPI/IR/Diagnostics.cpp @@ -59,11 +59,12 @@ MlirDiagnosticHandlerID mlirContextAttachDiagnosticHandler( assert(handler && "unexpected null diagnostic handler"); if (deleteUserData == nullptr) deleteUserData = deleteUserDataNoop; - std::shared_ptr sharedUserData(userData, deleteUserData); DiagnosticEngine::HandlerID id = unwrap(context)->getDiagEngine().registerHandler( - [handler, sharedUserData](Diagnostic &diagnostic) { - return unwrap(handler(wrap(diagnostic), sharedUserData.get())); + [handler, + ownedUserData = std::unique_ptr( + userData, deleteUserData)](Diagnostic &diagnostic) { + return unwrap(handler(wrap(diagnostic), ownedUserData.get())); }); return static_cast(id); } diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp index 975f6943fc673d..98e4d40461c36b 100644 --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -272,10 +272,10 @@ DiagnosticEngine::~DiagnosticEngine() = default; /// Register a new handler for diagnostics to the engine. This function returns /// a unique identifier for the registered handler, which can be used to /// unregister this handler at a later time. -auto DiagnosticEngine::registerHandler(const HandlerTy &handler) -> HandlerID { +auto DiagnosticEngine::registerHandler(HandlerTy handler) -> HandlerID { llvm::sys::SmartScopedLock lock(impl->mutex); auto uniqueID = impl->uniqueHandlerId++; - impl->handlers.insert({uniqueID, handler}); + impl->handlers.insert({uniqueID, std::move(handler)}); return uniqueID; } From c8b675eaa1de1d4edebd0292ca6de3180c687127 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 21 May 2022 15:39:08 +0200 Subject: [PATCH 160/908] [SCEV] Use umin_seq for BECount of multi-exit loops When computing the BECount for multi-exit loops, we need to combine individual exit counts using umin_seq rather than umin. This is because an earlier exit may exit on the first iteration, in which case later exit expressions will not be evaluated and could be poisonous. We cannot propagate potential poison values from later exits. In particular, this avoids the introduction of "branch on poison" UB when optimizing multi-exit loops. Differential Revision: https://reviews.llvm.org/D124910 --- llvm/lib/Analysis/ScalarEvolution.cpp | 5 +- .../IndVarSimplify/ARM/code-size.ll | 130 ++++++++++-------- .../IndVarSimplify/loop-predication.ll | 81 ++++++----- .../IndVarSimplify/post-inc-range.ll | 22 +-- ...ple-unreachable-exits-for-vectorization.ll | 85 ++++++------ 5 files changed, 174 insertions(+), 149 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 57d5706b1496d7..c8193732ea16d5 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8165,7 +8165,10 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, "Predicate should be always true!"); } - return SE->getUMinFromMismatchedTypes(Ops); + // If an earlier exit exits on the first iteration (exit count zero), then + // a later poison exit count should not propagate into the result. This are + // exactly the semantics provided by umin_seq. + return SE->getUMinFromMismatchedTypes(Ops, /* Sequential */ true); } /// Get the exact not taken count for this loop exit. diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll index 0d79d325276e77..f3f7084893c7fc 100644 --- a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll +++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll @@ -135,13 +135,14 @@ define i32 @test1(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -161,13 +162,14 @@ define i32 @test1(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -216,13 +218,14 @@ define i32 @test2(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8M-LABEL: @test2( ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -241,13 +244,14 @@ define i32 @test2(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8A-LABEL: @test2( ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -294,16 +298,16 @@ define i32 @two_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %l ; CHECK-V8M-LABEL: @two_range_checks( ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) -; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]]) ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[UMIN]], [[UMIN1]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -325,16 +329,16 @@ define i32 @two_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %l ; CHECK-V8A-LABEL: @two_range_checks( ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) -; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]]) ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[UMIN]], [[UMIN1]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -390,17 +394,16 @@ define i32 @three_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]]) ; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) -; CHECK-V8M-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]]) -; CHECK-V8M-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[LENGTH_1]]) ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN4:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN3]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8M-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN1]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN2]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -426,17 +429,16 @@ define i32 @three_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]]) ; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) -; CHECK-V8A-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]]) -; CHECK-V8A-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[LENGTH_1]]) ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN4:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN3]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8A-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN1]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN2]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -499,17 +501,19 @@ exit: ; preds = %guarded, %entry define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) #0 { ; CHECK-V8M-LABEL: @distinct_checks( ; CHECK-V8M-NEXT: loop.preheader: -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] -; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8M-NEXT: [[TMP2:%.*]] = freeze i32 [[LENGTH_2:%.*]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-V8M-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -518,7 +522,7 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %le ; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt2: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -535,17 +539,19 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %le ; ; CHECK-V8A-LABEL: @distinct_checks( ; CHECK-V8A-NEXT: loop.preheader: -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] -; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8A-NEXT: [[TMP2:%.*]] = freeze i32 [[LENGTH_2:%.*]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-V8A-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -554,7 +560,7 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %le ; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt2: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -612,13 +618,14 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -627,7 +634,7 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l ; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8M-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt2: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -646,13 +653,14 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -661,7 +669,7 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l ; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8A-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt2: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -723,15 +731,16 @@ define i32 @different_ivs(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8M-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[TMP1:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]]) -; CHECK-V8M-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] +; CHECK-V8M-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP2]]) +; CHECK-V8M-NEXT: [[TMP3:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-V8M-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], [[UMIN]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP4]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -751,15 +760,16 @@ define i32 @different_ivs(i32* %array, i32 %length, i32 %n) #0 { ; CHECK-V8A-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[TMP1:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]]) -; CHECK-V8A-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] +; CHECK-V8A-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP2]]) +; CHECK-V8A-NEXT: [[TMP3:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-V8A-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], [[UMIN]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP4]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 diff --git a/llvm/test/Transforms/IndVarSimplify/loop-predication.ll b/llvm/test/Transforms/IndVarSimplify/loop-predication.ll index 6dd7e1b07b9391..aad72b7afa7b03 100644 --- a/llvm/test/Transforms/IndVarSimplify/loop-predication.ll +++ b/llvm/test/Transforms/IndVarSimplify/loop-predication.ll @@ -9,13 +9,14 @@ define i32 @test1(i32* %array, i32 %length, i32 %n) { ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -67,7 +68,7 @@ define i32 @neg_store(i32* %array, i32 %length, i32 %n) { ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -124,7 +125,7 @@ define i32 @neg_implicit_exit(i32* %array, i32 %length, i32 %n) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @maythrow() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -175,13 +176,14 @@ define i32 @test2(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -229,16 +231,16 @@ define i32 @two_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %l ; CHECK-LABEL: @two_range_checks( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) -; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]]) ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[UMIN]], [[UMIN1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -294,17 +296,16 @@ define i32 @three_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]]) ; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) -; CHECK-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]]) -; CHECK-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[LENGTH_1]]) ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN4:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN3]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN1]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -367,17 +368,19 @@ exit: ; preds = %guarded, %entry define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) { ; CHECK-LABEL: @distinct_checks( ; CHECK-NEXT: loop.preheader: -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i32 [[LENGTH_2:%.*]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -386,7 +389,7 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %le ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -444,13 +447,14 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -459,7 +463,7 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -520,7 +524,7 @@ define i32 @provably_taken(i32* %array, i32* %length.ptr) { ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 false, label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 false, label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -569,7 +573,7 @@ define i32 @unconditional_latch(i32* %a, i32 %length) { ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 false, label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 false, label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -601,7 +605,7 @@ define i32 @unconditional_latch_with_side_effect(i32* %a, i32 %length) { ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED:%.*]] ], [ 400, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -637,15 +641,16 @@ define i32 @different_ivs(i32* %array, i32 %length, i32 %n) { ; CHECK-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1) ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -707,7 +712,7 @@ define i32 @different_ivs2(i32* %array, i32 %length, i32 %n) { ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[GUARDED]] ], [ [[J_START]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[J]], [[LENGTH]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -771,14 +776,14 @@ define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %length2, i32 %n) ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED2:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC]], [[LOOP]] ] ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 [[RESULT]] ; CHECK: guarded: ; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH2:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 diff --git a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll index 3402d69078ad3d..8ee770d96f8b02 100644 --- a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll +++ b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll @@ -16,16 +16,17 @@ define void @test(i32* %base, i32 %limit, i32 %start) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX]], -1 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], [[START]] -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP0]], [[UMIN]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = freeze i32 [[TMP3]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP4]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP0]], [[UMIN]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP3]], [[UMIN]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 [[TMP4]], label [[CONTINUE:%.*]], label [[FOR_END:%.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[CONTINUE:%.*]], label [[FOR_END:%.*]] ; CHECK: continue: ; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.inc: -; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -67,16 +68,17 @@ define void @test_false_edge(i32* %base, i32 %limit, i32 %start) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX]], -1 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], [[START]] -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP0]], [[UMIN]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = freeze i32 [[TMP3]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP4]], i32 [[TMP0]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP0]], [[UMIN]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP3]], [[UMIN]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_END:%.*]], label [[CONTINUE:%.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[CONTINUE:%.*]] ; CHECK: continue: ; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.inc: -; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll index f71034c05c67fc..53b78be49c5c4c 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll @@ -10,6 +10,7 @@ define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) { ; CHECK-LABEL: @sum_2_at_with_int_conversion( ; CHECK-NEXT: at_with_int_conversion.exit12.peel: +; CHECK-NEXT: [[N_FR:%.*]] = freeze i64 [[N:%.*]] ; CHECK-NEXT: [[GEP_START_I:%.*]] = getelementptr [[VEC:%.*]], %vec* [[A:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[START_I:%.*]] = load i64*, i64** [[GEP_START_I]], align 8 ; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1 @@ -22,20 +23,21 @@ define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) { ; CHECK-NEXT: [[START_I2_PEEL:%.*]] = load i64*, i64** [[GEP_START_I1]], align 8 ; CHECK-NEXT: [[END_I4_PEEL:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8 ; CHECK-NEXT: [[START_INT_I5_PEEL:%.*]] = ptrtoint i64* [[START_I2_PEEL]] to i64 -; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL]] to i64 +; CHECK-NEXT: [[END_I4_PEEL_FR:%.*]] = freeze i64* [[END_I4_PEEL]] +; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL_FR]] to i64 ; CHECK-NEXT: [[SUB_I7_PEEL:%.*]] = sub i64 [[END_INT_I6_PEEL]], [[START_INT_I5_PEEL]] ; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, i64* [[START_I]], align 4 ; CHECK-NEXT: [[LV_I10_PEEL:%.*]] = load i64, i64* [[START_I2_PEEL]], align 4 ; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I10_PEEL]] -; CHECK-NEXT: [[C_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0 +; CHECK-NEXT: [[C_PEEL:%.*]] = icmp sgt i64 [[N_FR]], 0 ; CHECK-NEXT: br i1 [[C_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SUB_I7_PEEL]], i64 [[SUB_I]]) -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[UMIN16:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[TMP0]]) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N_FR]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[SUB_I7_PEEL]]) +; CHECK-NEXT: [[UMIN16:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[SUB_I]]) ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMIN16]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 5 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER22:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER21:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP1]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 @@ -47,38 +49,38 @@ define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI18:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4 +; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP9]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4 +; CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 2 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4 +; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[WIDE_LOAD19]], [[VEC_PHI18]] -; CHECK-NEXT: [[TMP15]] = add <2 x i64> [[TMP13]], [[WIDE_LOAD20]] -; CHECK-NEXT: [[TMP16]] = add <2 x i64> [[TMP14]], [[WIDE_LOAD21]] +; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[WIDE_LOAD18]], [[VEC_PHI17]] +; CHECK-NEXT: [[TMP15]] = add <2 x i64> [[TMP13]], [[WIDE_LOAD19]] +; CHECK-NEXT: [[TMP16]] = add <2 x i64> [[TMP14]], [[WIDE_LOAD20]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP16]], [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) -; CHECK-NEXT: br label [[LOOP_PREHEADER22]] +; CHECK-NEXT: br label [[LOOP_PREHEADER21]] ; CHECK: loop.preheader21: ; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER22]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12]] ], [ [[SUM_PH]], [[LOOP_PREHEADER22]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER21]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12]] ], [ [[SUM_PH]], [[LOOP_PREHEADER21]] ] ; CHECK-NEXT: [[INRANGE_I:%.*]] = icmp ult i64 [[SUB_I]], [[IV]] ; CHECK-NEXT: br i1 [[INRANGE_I]], label [[ERROR_I:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT:%.*]] ; CHECK: error.i: @@ -98,7 +100,7 @@ define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) { ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[LV_I]], [[SUM]] ; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[ADD]], [[LV_I10]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i64 [[IV]], [[N]] +; CHECK-NEXT: [[C:%.*]] = icmp slt i64 [[IV]], [[N_FR]] ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[AT_WITH_INT_CONVERSION_EXIT12_PEEL:%.*]] ], [ [[SUM_NEXT]], [[AT_WITH_INT_CONVERSION_EXIT12]] ] @@ -125,6 +127,7 @@ exit: define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) { ; CHECK-LABEL: @sum_3_at_with_int_conversion( ; CHECK-NEXT: at_with_int_conversion.exit24.peel: +; CHECK-NEXT: [[N_FR:%.*]] = freeze i64 [[N:%.*]] ; CHECK-NEXT: [[GEP_START_I:%.*]] = getelementptr [[VEC:%.*]], %vec* [[A:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[START_I:%.*]] = load i64*, i64** [[GEP_START_I]], align 8 ; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1 @@ -140,27 +143,29 @@ define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) { ; CHECK-NEXT: [[START_I2_PEEL:%.*]] = load i64*, i64** [[GEP_START_I1]], align 8 ; CHECK-NEXT: [[END_I4_PEEL:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8 ; CHECK-NEXT: [[START_INT_I5_PEEL:%.*]] = ptrtoint i64* [[START_I2_PEEL]] to i64 -; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL]] to i64 +; CHECK-NEXT: [[END_I4_PEEL_FR:%.*]] = freeze i64* [[END_I4_PEEL]] +; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL_FR]] to i64 ; CHECK-NEXT: [[SUB_I7_PEEL:%.*]] = sub i64 [[END_INT_I6_PEEL]], [[START_INT_I5_PEEL]] ; CHECK-NEXT: [[START_I14_PEEL:%.*]] = load i64*, i64** [[GEP_START_I13]], align 8 ; CHECK-NEXT: [[END_I16_PEEL:%.*]] = load i64*, i64** [[GEP_END_I15]], align 8 ; CHECK-NEXT: [[START_INT_I17_PEEL:%.*]] = ptrtoint i64* [[START_I14_PEEL]] to i64 -; CHECK-NEXT: [[END_INT_I18_PEEL:%.*]] = ptrtoint i64* [[END_I16_PEEL]] to i64 +; CHECK-NEXT: [[END_I16_PEEL_FR:%.*]] = freeze i64* [[END_I16_PEEL]] +; CHECK-NEXT: [[END_INT_I18_PEEL:%.*]] = ptrtoint i64* [[END_I16_PEEL_FR]] to i64 ; CHECK-NEXT: [[SUB_I19_PEEL:%.*]] = sub i64 [[END_INT_I18_PEEL]], [[START_INT_I17_PEEL]] ; CHECK-NEXT: [[LV_I10_PEEL:%.*]] = load i64, i64* [[START_I2_PEEL]], align 4 ; CHECK-NEXT: [[LV_I22_PEEL:%.*]] = load i64, i64* [[START_I14_PEEL]], align 4 ; CHECK-NEXT: [[ADD_2_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I10_PEEL]] ; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[ADD_2_PEEL]], [[LV_I22_PEEL]] -; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0 +; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp sgt i64 [[N_FR]], 0 ; CHECK-NEXT: br i1 [[COND_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SUB_I19_PEEL]], i64 [[SUB_I7_PEEL]]) -; CHECK-NEXT: [[UMIN28:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[SUB_I]]) -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[UMIN29:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN28]], i64 [[TMP0]]) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N_FR]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[SUB_I19_PEEL]]) +; CHECK-NEXT: [[UMIN28:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[SUB_I7_PEEL]]) +; CHECK-NEXT: [[UMIN29:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN28]], i64 [[SUB_I]]) ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMIN29]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 5 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER37:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER36:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP1]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 @@ -172,46 +177,46 @@ define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI31:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI30:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4 +; CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP9]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4 +; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 2 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4 +; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, i64* [[START_I14_PEEL]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64* [[TMP13]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <2 x i64>, <2 x i64>* [[TMP14]], align 4 +; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x i64>, <2 x i64>* [[TMP14]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, i64* [[TMP13]], i64 2 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64* [[TMP15]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD36:%.*]] = load <2 x i64>, <2 x i64>* [[TMP16]], align 4 +; CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <2 x i64>, <2 x i64>* [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i64> [[WIDE_LOAD32]], [[VEC_PHI31]] -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[TMP17]], [[WIDE_LOAD33]] -; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i64> [[TMP18]], [[WIDE_LOAD34]] -; CHECK-NEXT: [[TMP21]] = add <2 x i64> [[TMP19]], [[WIDE_LOAD35]] -; CHECK-NEXT: [[TMP22]] = add <2 x i64> [[TMP20]], [[WIDE_LOAD36]] +; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i64> [[WIDE_LOAD31]], [[VEC_PHI30]] +; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[TMP17]], [[WIDE_LOAD32]] +; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i64> [[TMP18]], [[WIDE_LOAD33]] +; CHECK-NEXT: [[TMP21]] = add <2 x i64> [[TMP19]], [[WIDE_LOAD34]] +; CHECK-NEXT: [[TMP22]] = add <2 x i64> [[TMP20]], [[WIDE_LOAD35]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP22]], [[TMP21]] ; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) -; CHECK-NEXT: br label [[LOOP_PREHEADER37]] +; CHECK-NEXT: br label [[LOOP_PREHEADER36]] ; CHECK: loop.preheader36: ; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER37]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24]] ], [ [[SUM_PH]], [[LOOP_PREHEADER37]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER36]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24]] ], [ [[SUM_PH]], [[LOOP_PREHEADER36]] ] ; CHECK-NEXT: [[INRANGE_I:%.*]] = icmp ult i64 [[SUB_I]], [[IV]] ; CHECK-NEXT: br i1 [[INRANGE_I]], label [[ERROR_I:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT:%.*]] ; CHECK: error.i: @@ -240,7 +245,7 @@ define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) { ; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[ADD_1]], [[LV_I10]] ; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[ADD_2]], [[LV_I22]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[IV]], [[N]] +; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[IV]], [[N_FR]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[AT_WITH_INT_CONVERSION_EXIT24_PEEL:%.*]] ], [ [[SUM_NEXT]], [[AT_WITH_INT_CONVERSION_EXIT24]] ] From a86cfaea549799c1083b21f664489f131937c148 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 21 May 2022 15:28:24 +0100 Subject: [PATCH 161/908] [ARM] Add register-mask for tail returns The TC_RETURN/TCRETURNdi under Arm does not currently add the register-mask operand when tail folding, which leads to the register (like LR) not being 'used' by the return. This changes the code to unconditionally set the register mask on the call, as opposed to skipping it for tail calls. I don't believe this will currently alter any codegen, but should glue things together better post-frame lowering. It matches the AArch64 code better. Differential Revision: https://reviews.llvm.org/D125906 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 32 +++++++++---------- llvm/test/CodeGen/ARM/dbg-tcreturn.ll | 2 +- llvm/test/DebugInfo/ARM/instr-ref-tcreturn.ll | 2 +- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 88b4f2a04bb92d..d3e1b92c3b957b 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2786,25 +2786,23 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - if (!isTailCall) { - const uint32_t *Mask; - const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); - if (isThisReturn) { - // For 'this' returns, use the R0-preserving mask if applicable - Mask = ARI->getThisReturnPreservedMask(MF, CallConv); - if (!Mask) { - // Set isThisReturn to false if the calling convention is not one that - // allows 'returned' to be modeled in this way, so LowerCallResult does - // not try to pass 'this' straight through - isThisReturn = false; - Mask = ARI->getCallPreservedMask(MF, CallConv); - } - } else + const uint32_t *Mask; + const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); + if (isThisReturn) { + // For 'this' returns, use the R0-preserving mask if applicable + Mask = ARI->getThisReturnPreservedMask(MF, CallConv); + if (!Mask) { + // Set isThisReturn to false if the calling convention is not one that + // allows 'returned' to be modeled in this way, so LowerCallResult does + // not try to pass 'this' straight through + isThisReturn = false; Mask = ARI->getCallPreservedMask(MF, CallConv); + } + } else + Mask = ARI->getCallPreservedMask(MF, CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - } + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); diff --git a/llvm/test/CodeGen/ARM/dbg-tcreturn.ll b/llvm/test/CodeGen/ARM/dbg-tcreturn.ll index 037fda116f38ec..1402e1206d39b5 100644 --- a/llvm/test/CodeGen/ARM/dbg-tcreturn.ll +++ b/llvm/test/CodeGen/ARM/dbg-tcreturn.ll @@ -12,7 +12,7 @@ target triple = "thumbv7-apple-ios7.0.0" ; CHECK-NEXT: $r0 = COPY %0 ; CHECK-NEXT: $r1 = COPY %1 ; CHECK-NEXT: DBG_VALUE $noreg, $noreg, !13, !DIExpression(), debug-location !16 -; CHECK-NEXT: TCRETURNdi &__divsi3, 0, implicit $sp, implicit $r0, implicit $r1 +; CHECK-NEXT: TCRETURNdi &__divsi3, 0, csr_ios, implicit $sp, implicit $r0, implicit $r1 define i32 @test(i32 %a1, i32 %a2) !dbg !5 { entry: diff --git a/llvm/test/DebugInfo/ARM/instr-ref-tcreturn.ll b/llvm/test/DebugInfo/ARM/instr-ref-tcreturn.ll index f86e35061dc229..eeab02e59c06c2 100644 --- a/llvm/test/DebugInfo/ARM/instr-ref-tcreturn.ll +++ b/llvm/test/DebugInfo/ARM/instr-ref-tcreturn.ll @@ -26,7 +26,7 @@ target triple = "thumbv7-apple-ios7.0.0" ; CHECK: $r0 = COPY %0 ; CHECK-NEXT: $r1 = COPY %1 ; CHECK-NEXT: DBG_INSTR_REF 1, 0 -; CHECK-NEXT: TCRETURNdi &__divsi3, 0, implicit $sp, implicit $r0, implicit $r1 +; CHECK-NEXT: TCRETURNdi &__divsi3, 0, csr_ios, implicit $sp, implicit $r0, implicit $r1 declare i1 @ext() From a84896f2706697abb9178fedfad7e6e2a9f8da01 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 21 May 2022 16:01:33 +0100 Subject: [PATCH 162/908] [LV] Precommit test for PR55167. Test for #55167. --- .../pr55167-fold-tail-live-out.ll | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll new file mode 100644 index 00000000000000..f1bbe90c613b83 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s + +define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { +; CHECK-LABEL: @test( +; CHECK-NEXT: bb: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT3]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[PREDPHI9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 6, [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP7]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> , <2 x i32> [[BROADCAST_SPLAT6]] +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> , <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI8:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP1]], <2 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[PREDPHI9]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP4]], <2 x i32> [[PREDPHI8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI9]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 176, 176 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[PREDPHI7]], i32 1 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 182, [[MIDDLE_BLOCK]] ], [ 6, [[BB:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 35902, [[BB]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[V_2:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[P_2:%.*]], [[LOOP_LATCH]] ] +; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[BODY_1:%.*]] +; CHECK: body.1: +; CHECK-NEXT: [[V_2_ADD:%.*]] = add i32 [[V_2]], 10 +; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[BODY_2:%.*]] +; CHECK: body.2: +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[V_2_ADD]], 20 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], 1 +; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[XOR]] +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[P_1:%.*]] = phi i32 [ [[IV]], [[LOOP_HEADER]] ], [ 9, [[BODY_1]] ], [ 9, [[BODY_2]] ] +; CHECK-NEXT: [[P_2]] = phi i32 [ [[V_2]], [[LOOP_HEADER]] ], [ [[V_2_ADD]], [[BODY_1]] ], [ [[ADD_2]], [[BODY_2]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp ult i32 [[IV]], 181 +; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[E_1:%.*]] = phi i32 [ [[P_1]], [[LOOP_LATCH]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[E_2:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_1]], [[E_2]] +; CHECK-NEXT: ret i32 [[RES]] +; +bb: + br label %loop.header + +loop.header: + %iv = phi i32 [ 6, %bb ], [ %iv.next, %loop.latch ] + %v.2 = phi i32 [ 35902, %bb ], [ %p.2, %loop.latch ] + br i1 %c.1, label %loop.latch, label %body.1 + +body.1: + %v.2.add = add i32 %v.2, 10 + br i1 %c.2, label %loop.latch, label %body.2 + +body.2: ; preds = %bb11 + %add.1 = add i32 %v.2.add, 20 + %xor = xor i32 %a, 1 + %add.2 = add i32 %add.1, %xor + br label %loop.latch + +loop.latch: + %p.1 = phi i32 [ %iv, %loop.header ], [ 9, %body.1 ], [ 9, %body.2] + %p.2 = phi i32 [ %v.2, %loop.header ], [ %v.2.add, %body.1 ], [ %add.2, %body.2 ] + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp ult i32 %iv, 181 + br i1 %ec, label %loop.header, label %exit + +exit: + %e.1 = phi i32 [ %p.1, %loop.latch ] + %e.2 = phi i32 [ %p.2, %loop.latch ] + %res = add i32 %e.1, %e.2 + ret i32 %res +} From 3bebec6592a4dd8a1fb330382fc8dd4162c6bf44 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 21 May 2022 16:01:38 +0100 Subject: [PATCH 163/908] [VPlan] Model first exit values using VPLiveOut. This patch introduces a new VPLiveOut subclass of VPUser to model exit values explicitly. The initial version handles exit values that are neither part of induction or reduction chains nor first order recurrence phis. Fixes #51366, #54867, #55167, #55459 Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D123537 --- .../Transforms/Vectorize/LoopVectorize.cpp | 98 ++++++++++--------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 25 +++++ llvm/lib/Transforms/Vectorize/VPlan.h | 48 +++++++++ .../Transforms/Vectorize/VPlanTransforms.cpp | 41 +------- llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 + .../Transforms/Vectorize/VPlanVerifier.cpp | 7 ++ .../AArch64/scalable-avoid-scalarization.ll | 2 +- .../X86/invariant-load-gather.ll | 4 +- ...6-sunk-instruction-used-outside-of-loop.ll | 71 ++++++++++++++ .../LoopVectorize/extract-last-veclane.ll | 4 +- ...-order-recurrence-sink-replicate-region.ll | 2 + .../instruction-only-used-outside-of-loop.ll | 8 +- .../LoopVectorize/iv_outside_user.ll | 2 +- .../optimal-epilog-vectorization-liveout.ll | 4 +- .../pr55167-fold-tail-live-out.ll | 42 ++++---- .../LoopVectorize/vplan-printing.ll | 6 ++ 16 files changed, 245 insertions(+), 120 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 335940337b4de9..725899535c7dc6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -567,7 +567,8 @@ class InnerLoopVectorizer { /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, BasicBlock *VectorHeader); + BasicBlock *MiddleBlock, BasicBlock *VectorHeader, + VPlan &Plan); /// Handle all cross-iteration phis in the header. void fixCrossIterationPHIs(VPTransformState &State); @@ -584,13 +585,6 @@ class InnerLoopVectorizer { void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR, VPTransformState &State); - /// Fixup the LCSSA phi nodes in the unique exit block. This simply - /// means we need to add the appropriate incoming value from the middle - /// block as exiting edges from the scalar epilogue loop (if present) are - /// already in place, and we exit the vector loop exclusively to the middle - /// block. - void fixLCSSAPHIs(VPTransformState &State); - /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. void sinkScalarOperands(Instruction *PredInst); @@ -3335,7 +3329,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, BasicBlock *MiddleBlock, - BasicBlock *VectorHeader) { + BasicBlock *VectorHeader, VPlan &Plan) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate // value (the value that feeds into the phi from the loop latch). @@ -3395,8 +3389,10 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, // In this case, if IV1 has an external use, we need to avoid adding both // "last value of IV1" and "penultimate value of IV2". So, verify that we // don't already have an incoming value for the middle block. - if (PHI->getBasicBlockIndex(MiddleBlock) == -1) + if (PHI->getBasicBlockIndex(MiddleBlock) == -1) { PHI->addIncoming(I.second, MiddleBlock); + Plan.removeLiveOut(PHI); + } } } @@ -3700,20 +3696,30 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitBasicBlock(); Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]); - // If we inserted an edge from the middle block to the unique exit block, - // update uses outside the loop (phis) to account for the newly inserted - // edge. - if (!Cost->requiresScalarEpilogue(VF)) { + if (Cost->requiresScalarEpilogue(VF)) { + // No edge from the middle block to the unique exit block has been inserted + // and there is nothing to fix from vector loop; phis should have incoming + // from scalar loop only. + Plan.clearLiveOuts(); + } else { + // If we inserted an edge from the middle block to the unique exit block, + // update uses outside the loop (phis) to account for the newly inserted + // edge. + // Fix-up external users of the induction variables. for (auto &Entry : Legal->getInductionVars()) fixupIVUsers(Entry.first, Entry.second, getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()), IVEndValues[Entry.first], LoopMiddleBlock, - VectorLoop->getHeader()); - - fixLCSSAPHIs(State); + VectorLoop->getHeader(), Plan); } + // Fix LCSSA phis not already fixed earlier. Extracts may need to be generated + // in the exit block, so update the builder. + State.Builder.SetInsertPoint(State.CFG.ExitBB->getFirstNonPHI()); + for (auto &KV : Plan.getLiveOuts()) + KV.second->fixPhi(Plan, State); + for (Instruction *PI : PredicatedInstructions) sinkScalarOperands(&*PI); @@ -3862,8 +3868,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence( // and thus no phis which needed updated. if (!Cost->requiresScalarEpilogue(VF)) for (PHINode &LCSSAPhi : LoopExitBlock->phis()) - if (llvm::is_contained(LCSSAPhi.incoming_values(), Phi)) + if (llvm::is_contained(LCSSAPhi.incoming_values(), Phi)) { LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); + State.Plan->removeLiveOut(&LCSSAPhi); + } } void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, @@ -4046,8 +4054,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, // fixFirstOrderRecurrence for a more complete explaination of the logic. if (!Cost->requiresScalarEpilogue(VF)) for (PHINode &LCSSAPhi : LoopExitBlock->phis()) - if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst)) + if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst)) { LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock); + State.Plan->removeLiveOut(&LCSSAPhi); + } // Fix the scalar loop reduction variable with the incoming reduction sum // from the vector body and from the backedge value. @@ -4092,35 +4102,6 @@ void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR, } } -void InnerLoopVectorizer::fixLCSSAPHIs(VPTransformState &State) { - for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { - if (LCSSAPhi.getBasicBlockIndex(LoopMiddleBlock) != -1) - // Some phis were already hand updated by the reduction and recurrence - // code above, leave them alone. - continue; - - auto *IncomingValue = LCSSAPhi.getIncomingValue(0); - // Non-instruction incoming values will have only one value. - - VPLane Lane = VPLane::getFirstLane(); - if (isa(IncomingValue) && - !Cost->isUniformAfterVectorization(cast(IncomingValue), - VF)) - Lane = VPLane::getLastLaneForVF(VF); - - // Can be a loop invariant incoming value or the last scalar value to be - // extracted from the vectorized loop. - // FIXME: Should not rely on getVPValue at this point. - Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); - Value *lastIncomingValue = - OrigLoop->isLoopInvariant(IncomingValue) - ? IncomingValue - : State.get(State.Plan->getVPValue(IncomingValue, true), - VPIteration(UF - 1, Lane)); - LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock); - } -} - void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { // The basic block and loop containing the predicated instruction. auto *PredBB = PredInst->getParent(); @@ -8716,6 +8697,25 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL, EB->appendRecipe(BranchOnCount); } +// Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the +// original exit block. +static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, + VPBasicBlock *MiddleVPBB, Loop *OrigLoop, + VPlan &Plan) { + BasicBlock *ExitBB = OrigLoop->getUniqueExitBlock(); + // Only handle single-exit loops with unique exit blocks for now. + if (!ExitBB || !ExitBB->getSinglePredecessor()) + return; + + // Introduce VPUsers modeling the exit values. + for (PHINode &ExitPhi : ExitBB->phis()) { + Value *IncomingValue = + ExitPhi.getIncomingValueForBlock(OrigLoop->getLoopLatch()); + VPValue *V = Plan.getOrAddVPValue(IncomingValue, true); + Plan.addLiveOut(&ExitPhi, V); + } +} + VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl &DeadInstructions, const MapVector &SinkAfter) { @@ -8895,6 +8895,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // After here, VPBB should not be used. VPBB = nullptr; + addUsersInExitBlock(HeaderVPBB, MiddleVPBB, OrigLoop, *Plan); + assert(isa(Plan->getVectorLoopRegion()) && !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() && "entry block must be set to a VPRegionBlock having a non-empty entry " diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index e821615c886c07..dff6fac4291de5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -647,6 +647,15 @@ bool VPRecipeBase::mayHaveSideEffects() const { } } +void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { + auto Lane = VPLane::getLastLaneForVF(State.VF); + VPValue *ExitValue = getOperand(0); + if (Plan.isUniformAfterVectorization(ExitValue)) + Lane = VPLane::getFirstLane(); + Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), + State.Builder.GetInsertBlock()); +} + void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { assert(!Parent && "Recipe already in some VPBasicBlock"); assert(InsertPos->getParent() && @@ -1065,6 +1074,17 @@ void VPlan::print(raw_ostream &O) const { O << '\n'; Block->print(O, "", SlotTracker); } + + if (!LiveOuts.empty()) + O << "\n"; + for (auto &KV : LiveOuts) { + O << "Live-out "; + KV.second->getPhi()->printAsOperand(O); + O << " = "; + KV.second->getOperand(0)->printAsOperand(O, SlotTracker); + O << "\n"; + } + O << "}\n"; } @@ -1078,6 +1098,11 @@ LLVM_DUMP_METHOD void VPlan::dump() const { print(dbgs()); } #endif +void VPlan::addLiveOut(PHINode *PN, VPValue *V) { + assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists"); + LiveOuts.insert({PN, new VPLiveOut(PN, V)}); +} + void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB, BasicBlock *LoopLatchBB, BasicBlock *LoopExitBB) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0c2490f68a932f..5be38dce53dbd3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -664,6 +664,32 @@ class VPBlockBase { #endif }; +/// A value that is used outside the VPlan. The operand of the user needs to be +/// added to the associated LCSSA phi node. +class VPLiveOut : public VPUser { + PHINode *Phi; + +public: + VPLiveOut(PHINode *Phi, VPValue *Op) + : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {} + + /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply + /// means we need to add the appropriate incoming value from the middle + /// block as exiting edges from the scalar epilogue loop (if present) are + /// already in place, and we exit the vector loop exclusively to the middle + /// block. + void fixPhi(VPlan &Plan, VPTransformState &State); + + /// Returns true if the VPLiveOut uses scalars of operand \p Op. + bool usesScalars(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + + PHINode *getPhi() const { return Phi; } +}; + /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef /// and is responsible for deleting its defined values. Single-value @@ -2483,6 +2509,9 @@ class VPlan { /// mapping cannot be used any longer, because it is stale. bool Value2VPValueEnabled = true; + /// Values used outside the plan. + DenseMap LiveOuts; + public: VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) { if (Entry) @@ -2490,6 +2519,8 @@ class VPlan { } ~VPlan() { + clearLiveOuts(); + if (Entry) { VPValue DummyValue; for (VPBlockBase *Block : depth_first(Entry)) @@ -2658,6 +2689,23 @@ class VPlan { return cast(&*EntryVPBB->begin()); } + void addLiveOut(PHINode *PN, VPValue *V); + + void clearLiveOuts() { + for (auto &KV : LiveOuts) + delete KV.second; + LiveOuts.clear(); + } + + void removeLiveOut(PHINode *PN) { + delete LiveOuts[PN]; + LiveOuts.erase(PN); + } + + const DenseMap &getLiveOuts() const { + return LiveOuts; + } + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4c77d56e28ac2c..85befe26e98a0b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -363,50 +363,15 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { } } -// Check for live-out users currently not modeled in VPlan. -// Note that exit values of inductions are generated independent of -// the recipe. This means VPWidenIntOrFpInductionRecipe & -// VPScalarIVStepsRecipe can be removed, independent of uses outside -// the loop. -// TODO: Remove once live-outs are modeled in VPlan. -static bool hasOutsideUser(Instruction &I, Loop &OrigLoop) { - return any_of(I.users(), [&OrigLoop](User *U) { - if (!OrigLoop.contains(cast(U))) - return true; - - // Look through single-value phis in the loop, as they won't be modeled in - // VPlan and may be used outside the loop. - if (auto *PN = dyn_cast(U)) - if (PN->getNumIncomingValues() == 1) - return hasOutsideUser(*PN, OrigLoop); - - return false; - }); -} - void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) { VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); - // Check if \p R is used outside the loop, if required. - // TODO: Remove once live-outs are modeled in VPlan. - auto HasUsersOutsideLoop = [&OrigLoop](VPRecipeBase &R) { - // Exit values for induction recipes are generated independent of the - // recipes, expect for truncated inductions. Hence there is no need to check - // for users outside the loop for them. - if (isa(&R) || - (isa(&R) && - !isa(R.getUnderlyingInstr()))) - return false; - return R.getUnderlyingInstr() && - hasOutsideUser(*R.getUnderlyingInstr(), OrigLoop); - }; // Remove dead recipes in header block. The recipes in the block are processed // in reverse order, to catch chains of dead recipes. // TODO: Remove dead recipes across whole plan. for (VPRecipeBase &R : make_early_inc_range(reverse(*Header))) { - if (R.mayHaveSideEffects() || - any_of(R.definedValues(), - [](VPValue *V) { return V->getNumUsers() > 0; }) || - HasUsersOutsideLoop(R)) + if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) { + return V->getNumUsers() > 0; + })) continue; R.eraseFromParent(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index e61af31f184222..44c2212d72ffb4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -208,6 +208,7 @@ class VPUser { /// Subclass identifier (for isa/dyn_cast). enum class VPUserID { Recipe, + LiveOut, // TODO: Currently VPUsers are used in VPBlockBase, but in the future the // only VPUsers should either be recipes or live-outs. Block diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 253e343ea643fe..fe9eb77901cf42 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -202,5 +202,12 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { return false; } } + + for (auto &KV : Plan.getLiveOuts()) + if (KV.second->getNumOperands() != 1) { + errs() << "live outs must have a single operand\n"; + return false; + } + return true; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index 6fa67d888fb26a..6197bcd2ba8b22 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -52,11 +52,11 @@ define void @test_no_scalarization(i64* %a, i32 %idx, i32 %n) #0 { ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1 ; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP12]], i32 [[TMP22]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[L_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IDX]], [[L_ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll index 692119d1e947e2..e46cf5a20d56f5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -41,8 +41,8 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP3]], <16 x i32> undef), !alias.scope !3 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[PREDPHI]], i64 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX6]], 8 @@ -68,8 +68,8 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT16]], zeroinitializer ; CHECK-NEXT: [[WIDE_MASKED_GATHER19:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT16]], i32 4, <8 x i1> [[TMP8]], <8 x i32> undef) ; CHECK-NEXT: [[PREDPHI20:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[WIDE_MASKED_GATHER19]], <8 x i32> -; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC11]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI20]], i64 7 +; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC11]] ; CHECK-NEXT: br i1 [[CMP_N12]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll new file mode 100644 index 00000000000000..34546a764a77d0 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -0,0 +1,71 @@ +; RUN: opt -passes=loop-vectorize -mtriple=x86_64-unknown-linux -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s + +define i32* @test(i32* noalias %src, i32* noalias %dst) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %vector.body ], [ [[TMP8]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]] +; CHECK: pred.load.if1: +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] +; CHECK: pred.load.continue2: +; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> zeroinitializer, <2 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], <2 x i32>* [[TMP16]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label %vector.body +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label %exit, label %scalar.ph +; CHECK: exit: +; CHECK-NEXT: [[GEP_LCSSA:%.*]] = phi i32* [ %gep.src, %loop.latch ], [ [[TMP2]], %middle.block ] +; CHECK-NEXT: ret i32* [[GEP_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv + %cmp.1 = icmp eq i64 %iv, 0 + br i1 %cmp.1, label %loop.latch, label %then + +then: + %l = load i32, i32* %gep.src, align 4 + br label %loop.latch + +loop.latch: + %m = phi i32 [ %l, %then ], [ 0, %loop.header ] + %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv + store i32 %m, i32* %gep.dst, align 4 + %iv.next = add nsw i64 %iv, 1 + %cmp.2 = icmp slt i64 %iv.next, 1000 + br i1 %cmp.2, label %loop.header, label %exit + +exit: + %gep.lcssa = phi i32* [ %gep.src, %loop.latch ] + ret i32* %gep.lcssa +} diff --git a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll index 28d9c497a4bd30..5d683f7cce7ca4 100644 --- a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll +++ b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll @@ -22,8 +22,8 @@ define void @inv_store_last_lane(i32* noalias nocapture %a, i32* noalias nocaptu ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -85,8 +85,8 @@ define float @ret_last_lane(float* noalias nocapture %a, float* noalias nocaptur ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i64 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 9a9d46f1c7422e..fb8f716477f94a 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -225,6 +225,8 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out i32 %res = ir<%and.red.next> ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll index 7896b94eb1b20b..11d06a139dfa9a 100644 --- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll @@ -20,8 +20,8 @@ define i32 @one_direct_branch(i32* %src) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -80,8 +80,8 @@ define i32 @two_direct_branch(i32* %src) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -154,8 +154,8 @@ define i32 @cond_branch(i32 %a, i32* %src) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -218,8 +218,8 @@ define i32 @optimizable_trunc_used_outside() { ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index b5f96cd593d988..9a019505ac187b 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -206,7 +206,7 @@ define i32 @iv_2_dead_in_loop_only_used_outside(i64* %ptr) { ; CHECK-LABEL: vector.body: ; CHECK-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.+]], %vector.body ] ; VEC-NEXT: [[VEC_IND:%.+]] = phi <2 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.+]], %vector.body ] -; CHECK-NEXT: [[IV_0:%.+]] = add i64 [[INDEX]], 0 +; CHECK: [[IV_0:%.+]] = add i64 [[INDEX]], 0 ; VEC-NOT: add i64 [[INDEX]], 1 ; CHECK-NOT: [[IV_2_0:%.+]] = add i32 %offset.idx, 0 ; CHECK-LABEL: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll index f1a7817d016710..708cf790f8c86e 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll @@ -42,8 +42,8 @@ define signext i32 @f1(i32* noalias %A, i32* noalias %B, i32 signext %n) { ; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VF-TWO-CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF-TWO-CHECK: middle.block: -; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1 +; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; VF-TWO-CHECK: vec.epilog.iter.check: ; VF-TWO-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] @@ -70,8 +70,8 @@ define signext i32 @f1(i32* noalias %A, i32* noalias %B, i32 signext %n) { ; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] ; VF-TWO-CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; VF-TWO-CHECK: vec.epilog.middle.block: -; VF-TWO-CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC4]] ; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 +; VF-TWO-CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC4]] ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index f1bbe90c613b83..351e2ead2b3ba6 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -15,30 +15,28 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[PREDPHI9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 6, [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP7]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> , <2 x i32> [[BROADCAST_SPLAT6]] -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> , <2 x i32> [[PREDPHI]] -; CHECK-NEXT: [[PREDPHI8:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP1]], <2 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[PREDPHI9]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP4]], <2 x i32> [[PREDPHI8]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[PREDPHI7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> , <2 x i32> [[VEC_IND]] +; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> , <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[PREDPHI7]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP3]], <2 x i32> [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI9]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[PREDPHI5]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 176, 176 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[PREDPHI7]], i32 1 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 182, [[MIDDLE_BLOCK]] ], [ 6, [[BB:%.*]] ] @@ -63,7 +61,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[EC:%.*]] = icmp ult i32 [[IV]], 181 ; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: [[E_1:%.*]] = phi i32 [ [[P_1]], [[LOOP_LATCH]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[E_1:%.*]] = phi i32 [ [[P_1]], [[LOOP_LATCH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[E_2:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_1]], [[E_2]] ; CHECK-NEXT: ret i32 [[RES]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 2ec52e20c897af..3f1a824c71d9b9 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -127,6 +127,8 @@ define float @print_reduction(i64 %n, float* noalias %y) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out float %red.next.lcssa = ir<%red.next> ; CHECK-NEXT: } ; entry: @@ -363,6 +365,8 @@ define float @print_fmuladd_strict(float* %a, float* %b, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out float %muladd.lcssa = ir<%muladd> ; CHECK-NEXT:} entry: @@ -552,6 +556,8 @@ define i32 @print_exit_value(i8* %ptr, i32 %off) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out i32 %lcssa = ir<%add> ; CHECK-NEXT: } ; entry: From f3428dafdc553d4354062863983a3bbe712e1266 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 20 May 2022 17:36:47 -0700 Subject: [PATCH 164/908] [ORC] Add a ~ExectionSession destructor to verify that endSession was called. Clients are required to call ExecutionSession::endSession before destroying the ExecutionSession. Failure to do so can lead to memory leaks and other difficult to debug issues. Enforcing this requirement by assertion makes it easy to spot or debug situations where the contract was not followed. --- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 6 +++++- llvm/lib/ExecutionEngine/Orc/Core.cpp | 6 ++++++ llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp | 6 ++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index b3c85b53eda3a5..26f15c8f17490b 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -1389,8 +1389,12 @@ class ExecutionSession { /// object. ExecutionSession(std::unique_ptr EPC); + /// Destroy an ExecutionSession. Verifies that endSession was called prior to + /// destruction. + ~ExecutionSession(); + /// End the session. Closes all JITDylibs and disconnects from the - /// executor. + /// executor. Clients must call this method before destroying the session. Error endSession(); /// Get the ExecutorProcessControl object associated with this diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index e13028196c84c3..90807fce576ec8 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -1860,6 +1860,12 @@ ExecutionSession::ExecutionSession(std::unique_ptr EPC) this->EPC->ES = this; } +ExecutionSession::~ExecutionSession() { + // You must call endSession prior to destroying the session. + assert(!SessionOpen && + "Session still open. Did you forget to call endSession?"); +} + Error ExecutionSession::endSession() { LLVM_DEBUG(dbgs() << "Ending ExecutionSession " << this << "\n"); diff --git a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp index 2344878417f26f..824988532c42d5 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "OrcTestCommon.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Config/llvm-config.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" @@ -1410,6 +1411,7 @@ TEST(JITDylibTest, GetDFSLinkOrderTree) { // form a tree. ExecutionSession ES{std::make_unique()}; + auto _ = make_scope_exit([&]() { cantFail(ES.endSession()); }); auto &LibA = ES.createBareJITDylib("A"); auto &LibB = ES.createBareJITDylib("B"); @@ -1451,6 +1453,8 @@ TEST(JITDylibTest, GetDFSLinkOrderDiamond) { // contain a diamond. ExecutionSession ES{std::make_unique()}; + auto _ = make_scope_exit([&]() { cantFail(ES.endSession()); }); + auto &LibA = ES.createBareJITDylib("A"); auto &LibB = ES.createBareJITDylib("B"); auto &LibC = ES.createBareJITDylib("C"); @@ -1473,6 +1477,8 @@ TEST(JITDylibTest, GetDFSLinkOrderCycle) { // contain a cycle. ExecutionSession ES{std::make_unique()}; + auto _ = make_scope_exit([&]() { cantFail(ES.endSession()); }); + auto &LibA = ES.createBareJITDylib("A"); auto &LibB = ES.createBareJITDylib("B"); auto &LibC = ES.createBareJITDylib("C"); From 8bfccb963b3519393c0266b452a115a4bb46d207 Mon Sep 17 00:00:00 2001 From: owenca Date: Sat, 21 May 2022 10:28:54 -0700 Subject: [PATCH 165/908] [clang-format] Fix an infinite loop in parseJavaEnumBody() Fixes #55623. --- clang/lib/Format/UnwrappedLineParser.cpp | 2 +- clang/unittests/Format/FormatTestJava.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index be081a9189600c..18f476aca01ae0 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -3509,7 +3509,7 @@ void UnwrappedLineParser::parseJavaEnumBody() { ++Line->Level; // Parse the enum constants. - while (FormatTok) { + while (FormatTok->isNot(tok::eof)) { if (FormatTok->is(tok::l_brace)) { // Parse the constant's class body. parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index e778836e0fc9a2..03e16ae0a00d3a 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -197,6 +197,8 @@ TEST_F(FormatTestJava, EnumDeclarations) { " CDE;\n" " void f() {}\n" "}"); + verifyFormat("enum SomeThing {\n" + " void f() {}"); verifyFormat("enum SomeThing {\n" " ABC(1, \"ABC\"),\n" " CDE(2, \"CDE\");\n" From df46fb40557a14807dd508af32251ceb1cab8b86 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Sat, 21 May 2022 14:59:16 -0400 Subject: [PATCH 166/908] Test C DR conformance (part one of many) This starts to fill out the C DR status page with information determined from tests. It also starts to add some test coverage for the DRs we can add tests for (some are difficult as not all C DRs involve questions about code and some DRs are about the behavior of linking multiple TUs together). Note: there is currently no automation for filling out the HTML page from test coverage like there is for the C++ DRs, but this commit attempts to use a similar comment style in case we want to add such a script in the future. --- clang/test/C/drs/dr011.c | 18 +++++ clang/test/C/drs/dr0xx.c | 161 +++++++++++++++++++++++++++++++++++++ clang/www/c_dr_status.html | 42 +++++----- 3 files changed, 200 insertions(+), 21 deletions(-) create mode 100644 clang/test/C/drs/dr011.c create mode 100644 clang/test/C/drs/dr0xx.c diff --git a/clang/test/C/drs/dr011.c b/clang/test/C/drs/dr011.c new file mode 100644 index 00000000000000..6bb7d4785cfa3e --- /dev/null +++ b/clang/test/C/drs/dr011.c @@ -0,0 +1,18 @@ +/* RUN: %clang_cc1 -std=c89 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c99 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c11 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c17 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c2x -emit-llvm -o - %s | FileCheck %s + */ + +/* WG14 DR011: yes + * Merging of declarations for linked identifier + * + * Note, more of this DR is tested in dr0xx.c + */ + +int i[10]; +int j[]; + +// CHECK: @i = {{.*}} global [10 x i32] zeroinitializer +// CHECK-NEXT: @j = {{.*}} global [1 x i32] zeroinitializer diff --git a/clang/test/C/drs/dr0xx.c b/clang/test/C/drs/dr0xx.c new file mode 100644 index 00000000000000..cac8b62483ac26 --- /dev/null +++ b/clang/test/C/drs/dr0xx.c @@ -0,0 +1,161 @@ +/* RUN: %clang_cc1 -std=c89 -verify=expected,c89 -pedantic -Wno-declaration-after-statement -Wno-c11-extensions %s + RUN: %clang_cc1 -std=c99 -verify -pedantic -Wno-c11-extensions %s + RUN: %clang_cc1 -std=c11 -verify -pedantic %s + RUN: %clang_cc1 -std=c17 -verify -pedantic %s + RUN: %clang_cc1 -std=c2x -verify -pedantic %s + */ + +/* The following are DRs which do not require tests to demonstrate + * conformance or nonconformance. + * + * WG14 DR005: yes + * May a conforming implementation define and recognize a pragma which would + * change the semantics of the language? + * + * WG14 DR008: yes + * Can a conforming C compiler to perform dead-store elimination? + * + * WG14 DR020: yes + * Is a compiler which allows the Relaxed Ref/Def linkage model to be + * considered a conforming compiler? + * + * WG14 DR025: yes + * What is meant by 'representable floating-point value?' + * + * WG14 DR026: yes + * Can a strictly conforming program contain a string literal with '$' or '@'? + * + * WG14 DR033: yes + * Conformance questions around 'shall' violations outside of constraints + * sections + */ + + +/* WG14 DR004: yes + * Are multiple definitions of unused identifiers with external linkage + * permitted? + */ +int dr004(void) {return 0;} /* expected-note {{previous definition is here}} */ +int dr004(void) {return 1;} /* expected-error {{redefinition of 'dr004'}} */ + +/* WG14 DR007: yes + * Are declarations of the form struct-or-union identifier ; permitted after + * the identifier tag has already been declared? + */ +struct dr007_a; +struct dr007_a; +struct dr007_a {int a;}; +struct dr007_a; +struct dr007_b {int a;}; +struct dr007_b; + +/* WG14 DR009: no + * Use of typedef names in parameter declarations + */ +typedef int dr009_t; +void dr009_f(int dr009_t); + +/* WG14 DR010: + * Is a typedef to an incomplete type legal? + */ +typedef int dr010_t[]; +dr010_t dr010_a = {1}; +dr010_t dr010_b = {1, 2}; +int dr010_c = sizeof(dr010_t); /* expected-error {{invalid application of 'sizeof' to an incomplete type 'dr010_t' (aka 'int[]')}} */ + +/* WG14 DR011: yes + * Merging of declarations for linked identifier + * + * Note: more of this DR is tested in dr011.c + */ +static int dr011_a[]; /* expected-warning {{tentative array definition assumed to have one element}} */ +void dr011(void) { + extern int i[]; + { + /* a different declaration of the same object */ + extern int i[10]; + (void)sizeof(i); + _Static_assert(sizeof(i) == 10 * sizeof(int), "fail"); + } + (void)sizeof(i); /* expected-error {{invalid application of 'sizeof' to an incomplete type 'int[]'}} */ + + extern int dr011_a[10]; + (void)sizeof(dr011_a); + _Static_assert(sizeof(dr011_a) == 10 * sizeof(int), "fail"); + + extern int j[10]; + { + extern int j[]; + (void)sizeof(j); + _Static_assert(sizeof(j) == 10 * sizeof(int), "fail"); + } +} + +/* WG14 DR012: yes + * Is it valid to take the address of a dereferenced void pointer? + */ +void dr012(void *p) { + /* The behavior changed between C89 and C99. */ + (void)&*p; /* c89-warning {{ISO C forbids taking the address of an expression of type 'void'}} */ +} + +/* WG14 DR013: yes + * Compatible and composite function types + */ +int dr013(int a[4]); +int dr013(int a[5]); +int dr013(int *a); + +struct dr013_t { +struct dr013_t *p; +} dr013_v[sizeof(struct dr013_t)]; + +/* WG14 DR015: yes + * What is the promoted type of a plain int bit-field? + */ +void dr015(void) { + struct S { + int small_int_bitfield : 16; + unsigned int small_uint_bitfield : 16; + int int_bitfield : 32; + unsigned int uint_bitfield : 32; + } s; + _Static_assert(__builtin_types_compatible_p(__typeof__(+s.small_int_bitfield), int), "fail"); + _Static_assert(__builtin_types_compatible_p(__typeof__(+s.small_uint_bitfield), int), "fail"); + _Static_assert(__builtin_types_compatible_p(__typeof__(+s.int_bitfield), int), "fail"); + _Static_assert(__builtin_types_compatible_p(__typeof__(+s.uint_bitfield), unsigned int), "fail"); +} + +/* WG14 DR027: yes + * Can there be characters in the character set that are not in the required + * source character set? + */ +#define THIS$AND$THAT(a, b) ((a) + (b)) /* expected-warning 2 {{'$' in identifier}} */ +_Static_assert(THIS$AND$THAT(1, 1) == 2, "fail"); /* expected-warning 2 {{'$' in identifier}} */ + + +/* WG14 DR029: no + * Do two types have to have the same tag to be compatible? + * Note: the rule changed in C99 to be different than the resolution to DR029, + * so it's not clear there's value in implementing this DR. + */ +_Static_assert(__builtin_types_compatible_p(struct S { int a; }, union U { int a; }), "fail"); /* expected-error {{static_assert failed due to requirement '__builtin_types_compatible_p(struct S, union U)' "fail"}} */ + +/* WG14 DR031: yes + * Can constant expressions overflow? + */ +void dr031(int i) { + switch (i) { + case __INT_MAX__ + 1: break; /* expected-warning {{overflow in expression; result is -2147483648 with type 'int'}} */ + case __INT_MAX__ + 2ul: break; + case (__INT_MAX__ * 4) / 4: break; /* expected-warning {{overflow in expression; result is -4 with type 'int'}} */ + } +} + +/* WG21 DR032: no + * Must implementations diagnose extensions to the constant evaluation rules? + * + * This should issue a diagnostic because a constant-expression is a + * conditional-expression, which excludes the comma operator. + */ +int dr032 = (1, 2); diff --git a/clang/www/c_dr_status.html b/clang/www/c_dr_status.html index b92cf60604955f..05d85790cb9205 100644 --- a/clang/www/c_dr_status.html +++ b/clang/www/c_dr_status.html @@ -70,16 +70,16 @@

C defect report implementation status

4 NAD Are multiple definitions of unused identifiers with external linkage permitted? - Unknown + Yes 5 - C89 + NAD May a conforming implementation define and recognize a pragma which would change the semantics of the language? - Unknown + Yes - 7 + 6 C89 It is unclear how the strtoul function behaves when presented with a subject sequence that begins with a minus sign N/A @@ -88,43 +88,43 @@

C defect report implementation status

7 NAD Are declarations of the form struct-or-union identifier ; permitted after the identifier tag has already been declared? - Unknown + Yes 8 NAD Can a conforming C compiler to perform dead-store elimination? - Unknown + Yes 9 C89 Use of typedef names in parameter declarations - Unknown + No 10 NAD Is a typedef to an incomplete type legal? - Unknown + Yes 11 C89 Merging of declarations for linked identifier - Unknown + Yes 12 NAD - Questions on the validity of various expressions - Unknown + Is it valid to take the address of a dereferenced void pointer? + Yes 13 C89 Compatible and composite function types - Unknown + Yes 14 @@ -136,7 +136,7 @@

C defect report implementation status

15 NAD What is the promoted type of a plain int bit-field? - Unknown + Yes 16 @@ -166,7 +166,7 @@

C defect report implementation status

20 NAD Is a compiler which allows the Relaxed Ref/Def linkage model to be considered a conforming compiler? - Unknown + Yes 21 @@ -196,19 +196,19 @@

C defect report implementation status

25 NAD What is meant by 'representable floating-point value?' - Unknown + Yes 26 NAD Can a strictly conforming program contain a string literal with '$' or '@'? - Unknown + Yes 27 C89 Can there be characters in the character set that are not in the required source character set? - Unknown + Yes 28 @@ -220,7 +220,7 @@

C defect report implementation status

29 NAD Do two types have to have the same tag to be compatible? - Unknown + No 30 @@ -232,19 +232,19 @@

C defect report implementation status

31 NAD Can constant expressions overflow? - Unknown + Yes 32 NAD Must implementations diagnose extensions to the constant evaluation rules? - Unknown + No 33 NAD Conformance questions around 'shall' violations outside of constraints sections - Unknown + Yes 34 From 8eebb47f9706b2016f2f6b772955066bf07249d7 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Sun, 22 May 2022 01:20:50 +0530 Subject: [PATCH 167/908] [MLIR][Presburger] Update equality and subset checks asserts in IntegerRelation This patch updates asserts in IntegerRelation::isEqual and IntegerRelation::isCompatible to allow these functions when number of local identifiers are different. This change is done to reflect the algorithmic changes done before this patch. --- mlir/lib/Analysis/Presburger/IntegerRelation.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp index 14f05e0513f824..73ae0ebd9873e3 100644 --- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp @@ -61,12 +61,12 @@ IntegerRelation IntegerRelation::intersect(IntegerRelation other) const { } bool IntegerRelation::isEqual(const IntegerRelation &other) const { - assert(space.isEqual(other.getSpace()) && "Spaces must be equal."); + assert(space.isCompatible(other.getSpace()) && "Spaces must be compatible."); return PresburgerRelation(*this).isEqual(PresburgerRelation(other)); } bool IntegerRelation::isSubsetOf(const IntegerRelation &other) const { - assert(space.isEqual(other.getSpace()) && "Spaces must be equal."); + assert(space.isCompatible(other.getSpace()) && "Spaces must be compatible."); return PresburgerRelation(*this).isSubsetOf(PresburgerRelation(other)); } From aeb19817d66f1a15754163c7f48e01e9ebdd6d45 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 21 May 2022 20:57:49 +0100 Subject: [PATCH 168/908] Revert "[SLP]Do not emit extract elements for insertelements users, replace with shuffles directly." This reverts commit fc9c59c355cb255446e571b4515b5e41a76503c4. The patch triggers an assertion when building SPEC on X86. Reduced reproducer shared at D107966. Also reverts follow-up commit 11a09af76d11ad5a9f1f95b561112af17ff81f80. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 308 ------------------ .../SLPVectorizer/AArch64/loadorder.ll | 8 +- .../AArch64/transpose-inseltpoison.ll | 2 +- .../SLPVectorizer/AArch64/transpose.ll | 2 +- .../SLPVectorizer/AArch64/tsc-s116.ll | 18 +- .../X86/buildvector-same-lane-insert.ll | 19 +- .../SLPVectorizer/X86/buildvector-shuffle.ll | 17 +- .../SLPVectorizer/X86/cmp-as-alternate-ops.ll | 16 +- .../SLPVectorizer/X86/crash_7zip.ll | 14 +- .../SLPVectorizer/X86/crash_bullet3.ll | 10 +- .../SLPVectorizer/X86/crash_cmpop.ll | 2 +- .../X86/crash_exceed_scheduling.ll | 2 +- .../SLPVectorizer/X86/crash_lencod.ll | 9 +- .../X86/crash_scheduling-inseltpoison.ll | 13 +- .../SLPVectorizer/X86/crash_scheduling.ll | 13 +- .../SLPVectorizer/X86/crash_smallpt.ll | 15 +- .../SLPVectorizer/X86/extractelement.ll | 24 +- .../SLPVectorizer/X86/horizontal-list.ll | 11 +- .../SLPVectorizer/X86/horizontal-minmax.ll | 17 +- ...nsert-element-build-vector-inseltpoison.ll | 27 +- .../X86/insert-element-build-vector.ll | 27 +- .../SLPVectorizer/X86/insert-shuffle.ll | 14 +- .../X86/jumbled-load-multiuse.ll | 7 +- .../SLPVectorizer/X86/jumbled-load.ll | 17 +- .../SLPVectorizer/X86/jumbled_store_crash.ll | 15 +- .../X86/load-merge-inseltpoison.ll | 5 +- .../SLPVectorizer/X86/load-merge.ll | 5 +- .../Transforms/SLPVectorizer/X86/lookahead.ll | 32 +- .../SLPVectorizer/X86/ordering-bug.ll | 7 +- llvm/test/Transforms/SLPVectorizer/X86/phi.ll | 60 ++-- .../SLPVectorizer/X86/pr42022-inseltpoison.ll | 14 +- .../Transforms/SLPVectorizer/X86/pr42022.ll | 14 +- .../SLPVectorizer/X86/reduction-same-vals.ll | 16 +- .../X86/remark_extract_broadcast.ll | 10 +- .../X86/reorder_diamond_match.ll | 25 +- .../X86/reordered-top-scalars.ll | 15 +- .../X86/vec_list_bias-inseltpoison.ll | 12 +- .../SLPVectorizer/X86/vec_list_bias.ll | 12 +- .../X86/vectorize-widest-phis.ll | 16 +- 39 files changed, 350 insertions(+), 520 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ab556d699f1633..e366044353b1f9 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6668,23 +6668,6 @@ static bool isFirstInsertElement(const InsertElementInst *IE1, llvm_unreachable("Two different buildvectors not expected."); } -namespace { -/// Returns incoming Value *, if the requested type is Value * too, or a default -/// value, otherwise. -struct ValueSelect { - template - static typename std::enable_if::value, Value *>::type - get(Value *V) { - return V; - } - template - static typename std::enable_if::value, U>::type - get(Value *) { - return U(); - } -}; -} // namespace - /// Does the analysis of the provided shuffle masks and performs the requested /// actions on the vectors with the given shuffle masks. It tries to do it in /// several steps. @@ -6717,10 +6700,6 @@ static T *performExtractsShuffleAction( else Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; } - auto *V = ValueSelect::get(Base); - (void)V; - assert((!V || GetVF(V) == Mask.size()) && - "Expected base vector of VF number of elements."); Prev = Action(Mask, {nullptr, Res.first}); } else if (ShuffleMask.size() == 1) { // Base is undef and only 1 vector is shuffled - perform the action only for @@ -8126,17 +8105,6 @@ Value *BoUpSLP::vectorizeTree() { return vectorizeTree(ExternallyUsedValues); } -namespace { -/// Data type for handling buildvector sequences with the reused scalars from -/// other tree entries. -struct ShuffledInsertData { - /// List of insertelements to be replaced by shuffles. - SmallVector InsertElements; - /// The parent vectors and shuffle mask for the given list of inserts. - MapVector> ValueMasks; -}; -} // namespace - Value * BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { // All blocks must be scheduled before any instructions are inserted. @@ -8170,9 +8138,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); - SmallVector ShuffledInserts; - // Maps vector instruction to original insertelement instruction - DenseMap VectorToInsertElement; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -8212,8 +8177,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { assert(isa(Scalar->getType()) && isa(Scalar) && "In-tree scalar of vector type is not insertelement?"); - auto *IE = cast(Scalar); - VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; // If User == nullptr, the Scalar is used as extra arg. Generate @@ -8242,64 +8205,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { continue; } - if (auto *VU = dyn_cast(User)) { - if (!Scalar->getType()->isVectorTy()) { - if (auto *FTy = dyn_cast(User->getType())) { - Optional InsertIdx = getInsertIndex(VU); - if (InsertIdx) { - auto *It = - find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) { - // Checks if 2 insertelements are from the same buildvector. - InsertElementInst *VecInsert = Data.InsertElements.front(); - return areTwoInsertFromSameBuildVector(VU, VecInsert); - }); - unsigned Idx = *InsertIdx; - if (It == ShuffledInserts.end()) { - (void)ShuffledInserts.emplace_back(); - It = std::next(ShuffledInserts.begin(), - ShuffledInserts.size() - 1); - SmallVectorImpl &Mask = It->ValueMasks[Vec]; - if (Mask.empty()) - Mask.assign(FTy->getNumElements(), UndefMaskElem); - // Find the insertvector, vectorized in tree, if any. - Value *Base = VU; - while (auto *IEBase = dyn_cast(Base)) { - if (IEBase != User && - (!IEBase->hasOneUse() || - getInsertIndex(IEBase).getValueOr(Idx) == Idx)) - break; - // Build the mask for the vectorized insertelement instructions. - if (const TreeEntry *E = getTreeEntry(IEBase)) { - do { - IEBase = cast(Base); - int IEIdx = *getInsertIndex(IEBase); - assert(Mask[Idx] == UndefMaskElem && - "InsertElementInstruction used already."); - Mask[IEIdx] = IEIdx; - Base = IEBase->getOperand(0); - } while (E == getTreeEntry(Base)); - break; - } - Base = cast(Base)->getOperand(0); - // After the vectorization the def-use chain has changed, need - // to look through original insertelement instructions, if they - // get replaced by vector instructions. - auto It = VectorToInsertElement.find(Base); - if (It != VectorToInsertElement.end()) - Base = It->second; - } - } - SmallVectorImpl &Mask = It->ValueMasks[Vec]; - if (Mask.empty()) - Mask.assign(FTy->getNumElements(), UndefMaskElem); - Mask[Idx] = ExternalUse.Lane; - It->InsertElements.push_back(cast(User)); - continue; - } - } - } - } - // Generate extracts for out-of-tree users. // Find the insertion point for the extractelement lane. if (auto *VecI = dyn_cast(Vec)) { @@ -8335,219 +8240,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); } - // Checks if the mask is an identity mask. - auto &&IsIdentityMask = [](ArrayRef Mask, FixedVectorType *VecTy) { - int Limit = Mask.size(); - return VecTy->getNumElements() == Mask.size() && - all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) && - ShuffleVectorInst::isIdentityMask(Mask); - }; - // Tries to combine 2 different masks into single one. - auto &&CombineMasks = [](SmallVectorImpl &Mask, ArrayRef ExtMask) { - SmallVector NewMask(ExtMask.size(), UndefMaskElem); - for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) { - if (ExtMask[I] == UndefMaskElem) - continue; - NewMask[I] = Mask[ExtMask[I]]; - } - Mask.swap(NewMask); - }; - // Peek through shuffles, trying to simplify the final shuffle code. - auto &&PeekThroughShuffles = - [&IsIdentityMask, &CombineMasks](Value *&V, SmallVectorImpl &Mask, - bool CheckForLengthChange = false) { - while (auto *SV = dyn_cast(V)) { - // Exit if not a fixed vector type or changing size shuffle. - if (!isa(SV->getType()) || - (CheckForLengthChange && SV->changesLength())) - break; - // Exit if the identity or broadcast mask is found. - if (IsIdentityMask(Mask, cast(SV->getType())) || - SV->isZeroEltSplat()) - break; - bool IsOp1Undef = isUndefVector(SV->getOperand(0)); - bool IsOp2Undef = isUndefVector(SV->getOperand(1)); - if (!IsOp1Undef && !IsOp2Undef) - break; - SmallVector ShuffleMask(SV->getShuffleMask().begin(), - SV->getShuffleMask().end()); - CombineMasks(ShuffleMask, Mask); - Mask.swap(ShuffleMask); - if (IsOp2Undef) - V = SV->getOperand(0); - else - V = SV->getOperand(1); - } - }; - // Smart shuffle instruction emission, walks through shuffles trees and - // tries to find the best matching vector for the actual shuffle - // instruction. - auto &&CreateShuffle = [this, &IsIdentityMask, &PeekThroughShuffles, - &CombineMasks](Value *V1, Value *V2, - ArrayRef Mask) -> Value * { - assert(V1 && "Expected at least one vector value."); - if (V2 && !isUndefVector(V2)) { - // Peek through shuffles. - Value *Op1 = V1; - Value *Op2 = V2; - int VF = - cast(V1->getType())->getElementCount().getKnownMinValue(); - SmallVector CombinedMask1(Mask.size(), UndefMaskElem); - SmallVector CombinedMask2(Mask.size(), UndefMaskElem); - for (int I = 0, E = Mask.size(); I < E; ++I) { - if (Mask[I] < VF) - CombinedMask1[I] = Mask[I]; - else - CombinedMask2[I] = Mask[I] - VF; - } - Value *PrevOp1; - Value *PrevOp2; - do { - PrevOp1 = Op1; - PrevOp2 = Op2; - PeekThroughShuffles(Op1, CombinedMask1, /*CheckForLengthChange=*/true); - PeekThroughShuffles(Op2, CombinedMask2, /*CheckForLengthChange=*/true); - // Check if we have 2 resizing shuffles - need to peek through operands - // again. - if (auto *SV1 = dyn_cast(Op1)) - if (auto *SV2 = dyn_cast(Op2)) - if (SV1->getOperand(0)->getType() == - SV2->getOperand(0)->getType() && - SV1->getOperand(0)->getType() != SV1->getType() && - isUndefVector(SV1->getOperand(1)) && - isUndefVector(SV2->getOperand(1))) { - Op1 = SV1->getOperand(0); - Op2 = SV2->getOperand(0); - SmallVector ShuffleMask1(SV1->getShuffleMask().begin(), - SV1->getShuffleMask().end()); - CombineMasks(ShuffleMask1, CombinedMask1); - CombinedMask1.swap(ShuffleMask1); - SmallVector ShuffleMask2(SV2->getShuffleMask().begin(), - SV2->getShuffleMask().end()); - CombineMasks(ShuffleMask2, CombinedMask2); - CombinedMask2.swap(ShuffleMask2); - } - } while (PrevOp1 != Op1 || PrevOp2 != Op2); - VF = cast(Op1->getType()) - ->getElementCount() - .getKnownMinValue(); - for (int I = 0, E = Mask.size(); I < E; ++I) { - if (CombinedMask2[I] != UndefMaskElem) { - assert(CombinedMask1[I] == UndefMaskElem && - "Expected undefined mask element"); - CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF); - } - } - Value *Vec = Builder.CreateShuffleVector( - Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2, - CombinedMask1); - if (auto *I = dyn_cast(Vec)) { - GatherShuffleSeq.insert(I); - CSEBlocks.insert(I->getParent()); - } - return Vec; - } - if (isa(V1)) - return PoisonValue::get(FixedVectorType::get( - cast(V1->getType())->getElementType(), Mask.size())); - Value *Op = V1; - SmallVector CombinedMask(Mask.begin(), Mask.end()); - PeekThroughShuffles(Op, CombinedMask); - if (!isa(Op->getType()) || - !IsIdentityMask(CombinedMask, cast(Op->getType()))) { - Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask); - if (auto *I = dyn_cast(Vec)) { - GatherShuffleSeq.insert(I); - CSEBlocks.insert(I->getParent()); - } - return Vec; - } - return Op; - }; - - auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef Mask) { - unsigned VF = Mask.size(); - unsigned VecVF = cast(Vec->getType())->getNumElements(); - if (VF != VecVF) { - if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast(VF); })) { - Vec = CreateShuffle(Vec, nullptr, Mask); - return std::make_pair(Vec, true); - } - SmallVector ResizeMask(VF, UndefMaskElem); - for (unsigned I = 0; I < VF; ++I) { - if (Mask[I] != UndefMaskElem) - ResizeMask[Mask[I]] = Mask[I]; - } - Vec = CreateShuffle(Vec, nullptr, ResizeMask); - } - - return std::make_pair(Vec, false); - }; - // Perform shuffling of the vectorize tree entries for better handling of - // external extracts. - for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) { - // Find the first and the last instruction in the list of insertelements. - sort(ShuffledInserts[I].InsertElements, isFirstInsertElement); - InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front(); - InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back(); - Builder.SetInsertPoint(LastInsert); - auto Vector = ShuffledInserts[I].ValueMasks.takeVector(); - Value *NewInst = performExtractsShuffleAction( - makeMutableArrayRef(Vector.data(), Vector.size()), - FirstInsert->getOperand(0), - [](Value *Vec) { - return cast(Vec->getType()) - ->getElementCount() - .getKnownMinValue(); - }, - ResizeToVF, - [FirstInsert, &CreateShuffle](ArrayRef Mask, - ArrayRef Vals) { - assert((Vals.size() == 1 || Vals.size() == 2) && - "Expected exactly 1 or 2 input values."); - if (Vals.size() == 1) { - // Do not create shuffle if the mask is a simple identity - // non-resizing mask. - if (Mask.size() != cast(Vals.front()->getType()) - ->getNumElements() || - !ShuffleVectorInst::isIdentityMask(Mask)) - return CreateShuffle(Vals.front(), nullptr, Mask); - return Vals.front(); - } - return CreateShuffle(Vals.front() ? Vals.front() - : FirstInsert->getOperand(0), - Vals.back(), Mask); - }); - auto It = ShuffledInserts[I].InsertElements.rbegin(); - // Rebuild buildvector chain. - InsertElementInst *II = nullptr; - if (It != ShuffledInserts[I].InsertElements.rend()) - II = *It; - SmallVector Inserts; - while (It != ShuffledInserts[I].InsertElements.rend()) { - assert(II && "Must be an insertelement instruction."); - if (*It == II) - ++It; - else - Inserts.push_back(cast(II)); - II = dyn_cast(II->getOperand(0)); - } - for (Instruction *II : reverse(Inserts)) { - II->replaceUsesOfWith(II->getOperand(0), NewInst); - if (auto *NewI = dyn_cast(NewInst)) - if (II->getParent() == NewI->getParent() && II->comesBefore(NewI)) - II->moveAfter(NewI); - NewInst = II; - } - LastInsert->replaceAllUsesWith(NewInst); - for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) { - IE->replaceUsesOfWith(IE->getOperand(1), - PoisonValue::get(IE->getOperand(1)->getType())); - eraseInstruction(IE); - } - CSEBlocks.insert(LastInsert->getParent()); - } - // For each vectorized value: for (auto &TEPtr : VectorizableTree) { TreeEntry *Entry = TEPtr.get(); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 68c8555bd6d435..5dc03ab1a3cd1f 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -1312,19 +1312,19 @@ define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], ; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP56]] ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP60]], [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP60]], [[TMP61]] ; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> ; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> ; CHECK-NEXT: [[TMP70:%.*]] = add nsw <16 x i32> [[TMP68]], [[TMP69]] ; CHECK-NEXT: [[TMP71:%.*]] = sub nsw <16 x i32> [[TMP68]], [[TMP69]] ; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP72]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> ; CHECK-NEXT: [[TMP74:%.*]] = add nsw <16 x i32> [[TMP72]], [[TMP73]] ; CHECK-NEXT: [[TMP75:%.*]] = sub nsw <16 x i32> [[TMP72]], [[TMP73]] ; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <16 x i32> [[TMP74]], <16 x i32> [[TMP75]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll index 148169fb64de42..12a2cd2a6834fe 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index f5079fab51a6ab..03a29f48a739e7 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll index e25fd9b206334e..aab848ba73ddbe 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll @@ -26,15 +26,19 @@ define void @s116_modified(float* %a) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP6]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[GEP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP12]], <4 x float>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP16]], <4 x float>* [[TMP17]], align 4 ; CHECK-NEXT: ret void ; %gep0 = getelementptr inbounds float, float* %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll index 281169f7a69fea..77174151e96350 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll @@ -10,9 +10,12 @@ define void @test() { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP11]], i64 0 ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 ; CHECK-NEXT: ret void ; @@ -43,10 +46,14 @@ define void @test1() { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP8]], ptr null, align 4 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP9]], ptr null, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT_I5_I10:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP9]], i64 1 +; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13]], ptr null, align 4 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13_2:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP10]], i64 1 +; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13_2]], ptr null, align 4 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds float, ptr undef, i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll index 610049129d3983..3b0ec888de661c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll @@ -4,12 +4,13 @@ define void @b() { ; CHECK-LABEL: @b( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> , <4 x float> , <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> zeroinitializer, <4 x float> zeroinitializer) -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = fdiv <4 x float> [[TMP3]], zeroinitializer -; CHECK-NEXT: store <4 x float> [[TMP4]], ptr undef, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = fdiv <4 x float> [[TMP4]], zeroinitializer +; CHECK-NEXT: store <4 x float> [[TMP5]], ptr undef, align 4 ; CHECK-NEXT: ret void ; entry: @@ -49,7 +50,8 @@ define void @test(float %a) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[AGG:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i64 1 ; CHECK-NEXT: br label [[LOOP]] ; entry: @@ -69,6 +71,7 @@ define internal void @test1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: +; CHECK-NEXT: [[DOTSROA_025_4_VEC_INSERT_US_I:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll index 36347b4843fe76..4463f94b1c557c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll @@ -52,12 +52,16 @@ define { <2 x float>, <2 x float> } @test1(i32 %conv.i32.i.i.i) { ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> -; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0 -; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 +; CHECK-NEXT: [[RETVAL_SROA_0_0_VEC_INSERT4:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 +; CHECK-NEXT: [[RETVAL_SROA_0_4_VEC_INSERT7:%.*]] = insertelement <2 x float> [[RETVAL_SROA_0_0_VEC_INSERT4]], float [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 +; CHECK-NEXT: [[RETVAL_SROA_7_8_VEC_INSERT11:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP6]], i32 3 +; CHECK-NEXT: [[RETVAL_SROA_7_12_VEC_INSERT13:%.*]] = insertelement <2 x float> [[RETVAL_SROA_7_8_VEC_INSERT11]], float [[TMP10]], i64 1 +; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[RETVAL_SROA_0_4_VEC_INSERT7]], 0 +; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[RETVAL_SROA_7_12_VEC_INSERT13]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } zeroinitializer ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll index 402211b4ef73eb..c9590ae76f405d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll @@ -13,19 +13,21 @@ define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145. ; CHECK-NEXT: [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4 ; CHECK-NEXT: br label [[DO_BODY66_I:%.*]] ; CHECK: do.body66.i: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> , i32 [[TMP2]], i32 1 ; CHECK-NEXT: br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]] ; CHECK: if.else.i: -; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[TMP1]], undef +; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], undef ; CHECK-NEXT: br label [[DO_COND_I]] ; CHECK: do.cond.i: -; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ [[TMP2]], [[IF_ELSE_I]] ], [ [[TMP1]], [[DO_BODY66_I]] ] +; CHECK-NEXT: [[TMP5]] = phi <2 x i32> [ [[TMP4]], [[IF_ELSE_I]] ], [ [[TMP3]], [[DO_BODY66_I]] ] ; CHECK-NEXT: br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]] ; CHECK: do.end1006.i: -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll index 9b183969001c80..30abc6f9fbe87a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll @@ -36,15 +36,17 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(%class. ; CHECK: if.then325: ; CHECK-NEXT: br label [[IF_END327]] ; CHECK: if.end327: +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> , float [[TMP4]], i32 0 ; CHECK-NEXT: br i1 undef, label [[IF_THEN329:%.*]], label [[IF_END332]] ; CHECK: if.then329: ; CHECK-NEXT: br label [[IF_END332]] ; CHECK: if.end332: -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ , [[IF_THEN291]] ] -; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[IF_THEN329]] ], [ [[TMP5]], [[IF_END327]] ], [ , [[IF_THEN291]] ] +; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP3]], [[TMP6]] ; CHECK-NEXT: [[ARRAYIDX_I_I606:%.*]] = getelementptr inbounds [[CLASS_BTVECTOR3_23_221_463_485_507_573_595_683_727_749_815_837_991_1585_1607_1629_1651_1849_2047_2069_2091_2113:%.*]], %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* [[VERTICES:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP7]], <2 x float>* [[TMP8]], align 4 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index 0344c5a82fbed5..ef1ed33ffcb995 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -32,7 +32,7 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) { ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1 ; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], ; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll index 0ab2cf94db8272..94739340c8b5ab 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -35,7 +35,7 @@ define void @exceed(double %0, double %1) { ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll index e87a5190d47216..fa8c8a14e891ce 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -129,10 +129,11 @@ define fastcc void @dct36(double* %inbuf) { ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll index 1c7cd3fd265d58..bbd71825f96c6a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -20,15 +20,18 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 +; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0 +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll index 6b9fdb23714fb1..cd05b940be221f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -20,15 +20,18 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 +; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0 +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[TMP6]], i32 0 +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index 9bfde3976d2cc3..723c12d9b05b67 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -115,15 +115,16 @@ define void @_Z8radianceRK3RayiPt() #0 { ; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: ; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = fmul <2 x double> undef, undef -; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> undef, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> undef, [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double undef, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> undef, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> undef, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 ; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: if.then78: ; CHECK-NEXT: br label [[RETURN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll index ea4de7916097df..4c15a89f8f0057 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -82,19 +82,23 @@ define float @f_used_twice_in_tree(<2 x float> %x) { ; CHECK-NEXT: ret float [[ADD]] ; ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] -; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] +; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 +; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 +; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] +; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( -; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] -; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] +; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 +; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 +; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] +; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index bafc67c9ea31f1..f446f3682e580e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -896,11 +896,12 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 1 ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[MUL]], i32 0 ; THRESHOLD-NEXT: [[TMP5:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x float> -; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> , <2 x float> [[TMP5]], <2 x i32> -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP6]] -; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 -; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP8]], [[TMP9]] +; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 +; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <2 x float> , float [[TMP6]], i32 0 +; THRESHOLD-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP7]] +; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 +; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP9]], [[TMP10]] ; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[TMP2]], [[OP_RDX2]] ; THRESHOLD-NEXT: ret float [[OP_RDX3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 801b4863e7c19e..7c4aed1453c97f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1100,14 +1100,15 @@ define i32 @maxi8_wrong_parent(i32) { ; THRESH-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] ; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i1> poison, i1 [[OP_RDX]], i32 0 ; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i1> [[TMP11]], i1 [[TMP5]], i32 1 -; THRESH-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP2]], <2 x i32> -; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> -; THRESH-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP2]], <2 x i32> -; THRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP15]] -; THRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0 -; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1 -; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP17]], i32 [[TMP18]] +; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 +; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP3]], i32 1 +; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 +; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP4]], i32 1 +; THRESH-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP14]], <2 x i32> [[TMP16]] +; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0 +; THRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 +; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP18]], i32 [[TMP19]] ; THRESH-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP8]], [[OP_RDX3]] ; THRESH-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP8]], i32 [[OP_RDX3]] ; THRESH-NEXT: ret i32 [[OP_RDX5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll index 01979d12fb3ac4..470fa0caa1cd17 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -439,9 +439,12 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> -; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -474,9 +477,12 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> -; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -533,9 +539,12 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> -; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 06cdff548a91fc..d7adf196f7c2a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -474,9 +474,12 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> -; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -509,9 +512,12 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> -; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -568,9 +574,12 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> -; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll index b64c0e4e35a235..5a9273d1e5b3e9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -19,10 +19,16 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) { ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], poison ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], poison ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], poison -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP10]], 0 -; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[TMP11]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: [[VEC1:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 +; CHECK-NEXT: [[VEC2:%.*]] = insertelement <2 x float> [[VEC1]], float [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 +; CHECK-NEXT: [[VEC3:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 +; CHECK-NEXT: [[VEC4:%.*]] = insertelement <2 x float> [[VEC3]], float [[TMP13]], i32 1 +; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VEC2]], 0 +; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[VEC4]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[INS2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll index 011ce7352079ca..d785f8e76798b5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -9,9 +9,10 @@ define i32 @fn1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll index 85aa8aed5f5a59..31c5d5b07f0e0f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -58,11 +58,18 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4 ; CHECK-NEXT: ret i32 undef ; %in.addr = getelementptr inbounds i32, i32* %in, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll index 78572209687b87..6bbde0957743ce 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll @@ -39,13 +39,14 @@ define dso_local void @j() local_unnamed_addr { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 1 ; CHECK-NEXT: store float [[TMP15]], float* @f, align 4 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> , float [[CONV19]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> [[SHUFFLE]], <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = fsub <4 x float> [[TMP11]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = fadd <4 x float> [[TMP11]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP19]], <4 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = fptosi <4 x float> [[TMP20]] to <4 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP11]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = fadd <4 x float> [[TMP11]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]] to <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll index 6beff0162ffd26..3d7d632cca3d63 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll @@ -59,8 +59,9 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[I3]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll index dd08e7983d70cb..509c37f5da28c0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -59,8 +59,9 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[I3]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll index faf86fbcafb9d5..36b867d4a148fb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -722,11 +722,15 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub ; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 -; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] +; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1 +; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 +; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0 +; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 +; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]] +; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0 +; SSE-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1 +; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]] ; SSE-NEXT: ret double [[ADD3]] ; ; AVX-LABEL: @splat_loads( @@ -787,13 +791,17 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2, ; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]] -; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 -; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 -; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] +; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1 +; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 +; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0 +; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 +; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]] +; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1 +; SSE-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP10]], [[TMP11]] +; SSE-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 0 +; SSE-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP12]], i32 1 +; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP13]], [[TMP14]] ; SSE-NEXT: ret double [[RES]] ; ; AVX-LABEL: @splat_loads_with_internal_uses( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll index fc18281c3f35ed..464288ef340c80 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll @@ -29,9 +29,10 @@ define void @f(i1 %x) #0 { ; CHECK: if.then: ; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]] -; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i64> [[TMP11]], [[TMP7]] +; CHECK-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index c6725da8a98693..df767259b4f477 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -151,7 +151,7 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x float> [ [[TMP5]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP8]], 7.000000e+00 @@ -163,22 +163,25 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* ; CHECK-NEXT: [[TMP12]] = load <2 x float>, <2 x float>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP12]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], -; CHECK-NEXT: [[TMP16]] = fadd <4 x float> [[TMP6]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP17]], 121 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP17]], +; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP19]] -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP21]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 0 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: @@ -241,20 +244,27 @@ define float @sort_phi_type(float* nocapture readonly %A) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 +; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll index 595b5c9d1079b0..8ab5fb7c7ceec6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll @@ -10,10 +10,16 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 +; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 +; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll index e97a3a4521482b..3177f8557315dc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll @@ -10,10 +10,16 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 +; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 +; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll index aa93abaf4d9cbd..1fd2f30bf93cc9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll @@ -10,10 +10,18 @@ define i64 @test() { ; CHECK: bb3: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ poison, [[BB2:%.*]] ], [ zeroinitializer, [[BB1:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP1]], i32 5 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP1]], i32 6 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP1]], i32 7 +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP65:%.*]] = sext i32 [[OP_RDX]] to i64 ; CHECK-NEXT: ret i64 [[TMP65]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll index f71e214fe89382..7ab7f3b67634b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -9,10 +9,12 @@ define void @fextr(i16* %ptr) { ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll index fc1a673d818f92..3315e71ae2aa60 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll @@ -12,15 +12,22 @@ define void @test() { ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]] ; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP13]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP15]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 16 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds i8, i8* undef, i64 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll index bb0a39b7b6fb8e..9bbe2d5d673c76 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll @@ -7,18 +7,21 @@ define i32 @test(i32* %isec) { ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[ISEC:%.*]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX10]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i32 1 ; CHECK-NEXT: br i1 false, label [[BLOCK1:%.*]], label [[BLOCK3:%.*]] ; CHECK: block1: ; CHECK-NEXT: br i1 false, label [[BLOCK2:%.*]], label [[BLOCK3]] ; CHECK: block2: ; CHECK-NEXT: br label [[BLOCK3]] ; CHECK: block3: -; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP2]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP5]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP7]] +; CHECK-NEXT: ret i32 [[TMP9]] ; entry: %arrayidx10 = getelementptr inbounds i32, i32* %isec, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll index 0a07e03bac72c5..297aa493ed0715 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -49,9 +49,15 @@ define void @test(i32* nocapture %t2) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 +; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 +; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 +; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll index 3cf139ea0469b8..b0a406e6a8f514 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -49,9 +49,15 @@ define void @test(i32* nocapture %t2) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 +; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 +; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 +; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index f2fbf1dfc756a1..109c27e4f4f4e5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -12,7 +12,7 @@ define void @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP18:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: @@ -23,13 +23,17 @@ define void @foo() { ; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> poison, double [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = fcmp ogt <4 x double> [[TMP13]], [[TMP4]] +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = fptrunc <4 x double> [[TMP15]] to <4 x float> +; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP2]], <4 x float> [[TMP16]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: [[TMP18]] = phi <4 x float> [ [[TMP17]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] ; CHECK-NEXT: br label [[BB2]] ; entry: From 36fde81f93606cfa6357e6463571c0222e7e6e28 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Sat, 21 May 2022 16:08:32 -0400 Subject: [PATCH 169/908] Fix failing test bots from df46fb40557a14807dd508af32251ceb1cab8b86 Should fix bots like: https://lab.llvm.org/buildbot/#/builders/188/builds/14403 https://lab.llvm.org/buildbot/#/builders/171/builds/14928 https://lab.llvm.org/buildbot/#/builders/123/builds/10852 (and others) --- clang/test/C/drs/dr011.c | 4 ++-- clang/test/C/drs/dr0xx.c | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/clang/test/C/drs/dr011.c b/clang/test/C/drs/dr011.c index 6bb7d4785cfa3e..da569565166257 100644 --- a/clang/test/C/drs/dr011.c +++ b/clang/test/C/drs/dr011.c @@ -14,5 +14,5 @@ int i[10]; int j[]; -// CHECK: @i = {{.*}} global [10 x i32] zeroinitializer -// CHECK-NEXT: @j = {{.*}} global [1 x i32] zeroinitializer +// CHECK: @i = {{.*}}global [10 x i32] zeroinitializer +// CHECK-NEXT: @j = {{.*}}global [1 x i32] zeroinitializer diff --git a/clang/test/C/drs/dr0xx.c b/clang/test/C/drs/dr0xx.c index cac8b62483ac26..4046765fa97ed6 100644 --- a/clang/test/C/drs/dr0xx.c +++ b/clang/test/C/drs/dr0xx.c @@ -147,7 +147,13 @@ _Static_assert(__builtin_types_compatible_p(struct S { int a; }, union U { int a void dr031(int i) { switch (i) { case __INT_MAX__ + 1: break; /* expected-warning {{overflow in expression; result is -2147483648 with type 'int'}} */ + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wswitch" + /* Silence the targets which issue: + * warning: overflow converting case value to switch condition type (2147483649 to 18446744071562067969) + */ case __INT_MAX__ + 2ul: break; + #pragma clang diagnostic pop case (__INT_MAX__ * 4) / 4: break; /* expected-warning {{overflow in expression; result is -4 with type 'int'}} */ } } @@ -158,4 +164,4 @@ void dr031(int i) { * This should issue a diagnostic because a constant-expression is a * conditional-expression, which excludes the comma operator. */ -int dr032 = (1, 2); +int dr032 = (1, 2); /* expected-warning {{left operand of comma operator has no effect}} */ From ecf5924eb4c4111bbab62425aca85866bb94b679 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Sat, 21 May 2022 22:16:55 +0200 Subject: [PATCH 170/908] [clang-tidy] Add a useful note about -std=c++11-or-later I and @whisperity spent some time debugging a LIT test case using the `-std=c++11-or-later` `check_clang_tidy.py` flag when the test had fixits. It turns out if the test wants to report a diagnostic into a header file AND into the test file as well, one needs to first copy the header somewhere under the build directory. It needs to be copied since `clang-tidy` sorts the reports into alphabetical order, thus to have a deterministic order relative to the diagnostic in the header AND the diagnostic in the test cpp file. There is more to this story. The `-std=c++11-or-later` turns out executes the test with multiple `-std=XX` version substitution, and each execution will also have the `-fix` tidy parameter. This means that the freshly copied header file I stated in the previous paragraph gets fixed up and the very next tidy execution will fail miserably. Following @whisperity's advice, I'm leaving a reminder about such //shared// state in the related doc comment. Reviewed By: LegalizeAdulthood Differential Revision: https://reviews.llvm.org/D125771 --- clang-tools-extra/test/clang-tidy/check_clang_tidy.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py index d764f20ef2ecce..e6a4a8e505492f 100755 --- a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py +++ b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py @@ -19,11 +19,17 @@ [-assume-filename=] \ [-check-suffix=] \ [-check-suffixes=] \ + [-std=c++(98|11|14|17|20)[-or-later]] \ \ -- [optional clang-tidy arguments] Example: // RUN: %check_clang_tidy %s llvm-include-order %t -- -- -isystem %S/Inputs + +Notes: + -std=c++(98|11|14|17|20)-or-later: + This flag will cause multiple runs within the same check_clang_tidy + execution. Make sure you don't have shared state across these runs. """ import argparse From 55e8f721d4d0996cbe5dc802a2c5fbe48dac0d44 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 20 May 2022 18:38:13 -0700 Subject: [PATCH 171/908] [ORC] Allow FailedToMaterialize errors to outlive ExecutionSessions. Idiomatic llvm::Error usage can result in a FailedToMaterialize error tearing down an ExecutionSession instance. Since the FailedToMaterialize error holds SymbolStringPtrs and JITDylib references this leads to crashes when accessing or logging the error. This patch modifies FailedToMaterialize to retain the SymbolStringPool and JITDylibs involved in the failure so that we can safely report an error message to the client, even if the error tears down the session. The contract for JITDylibs allows the getName method to be used even after the session has been torn down, but no other JITDylib fields should be accessed via the FailedToMaterialize error if the ssesion has been torn down. Logging the error is guaranteed to be safe in all cases. --- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 5 ++- llvm/lib/ExecutionEngine/Orc/Core.cpp | 27 +++++++++++++--- .../ExecutionEngine/Orc/CoreAPIsTest.cpp | 31 +++++++++++++++++++ 3 files changed, 57 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 26f15c8f17490b..a51776880cde26 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -420,12 +420,15 @@ class FailedToMaterialize : public ErrorInfo { public: static char ID; - FailedToMaterialize(std::shared_ptr Symbols); + FailedToMaterialize(std::shared_ptr SSP, + std::shared_ptr Symbols); + ~FailedToMaterialize(); std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; const SymbolDependenceMap &getSymbols() const { return *Symbols; } private: + std::shared_ptr SSP; std::shared_ptr Symbols; }; diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 90807fce576ec8..2fc3802e185115 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -76,9 +76,21 @@ void ResourceTrackerDefunct::log(raw_ostream &OS) const { } FailedToMaterialize::FailedToMaterialize( + std::shared_ptr SSP, std::shared_ptr Symbols) - : Symbols(std::move(Symbols)) { + : SSP(std::move(SSP)), Symbols(std::move(Symbols)) { + assert(this->SSP && "String pool cannot be null"); assert(!this->Symbols->empty() && "Can not fail to resolve an empty set"); + + // FIXME: Use a new dep-map type for FailedToMaterialize errors so that we + // don't have to manually retain/release. + for (auto &KV : *this->Symbols) + KV.first->Retain(); +} + +FailedToMaterialize::~FailedToMaterialize() { + for (auto &KV : *Symbols) + KV.first->Release(); } std::error_code FailedToMaterialize::convertToErrorCode() const { @@ -962,6 +974,7 @@ Error JITDylib::resolve(MaterializationResponsibility &MR, auto FailedSymbolsDepMap = std::make_shared(); (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState); return make_error( + getExecutionSession().getSymbolStringPool(), std::move(FailedSymbolsDepMap)); } @@ -1039,6 +1052,7 @@ Error JITDylib::emit(MaterializationResponsibility &MR, auto FailedSymbolsDepMap = std::make_shared(); (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState); return make_error( + getExecutionSession().getSymbolStringPool(), std::move(FailedSymbolsDepMap)); } @@ -2265,7 +2279,8 @@ Error ExecutionSession::removeResourceTracker(ResourceTracker &RT) { joinErrors(std::move(Err), L->handleRemoveResources(RT.getKeyUnsafe())); for (auto &Q : QueriesToFail) - Q->handleFailed(make_error(FailedSymbols)); + Q->handleFailed( + make_error(getSymbolStringPool(), FailedSymbols)); return Err; } @@ -2345,7 +2360,8 @@ Error ExecutionSession::IL_updateCandidatesFor( if (SymI->second.getFlags().hasError()) { auto FailedSymbolsMap = std::make_shared(); (*FailedSymbolsMap)[&JD] = {Name}; - return make_error(std::move(FailedSymbolsMap)); + return make_error(getSymbolStringPool(), + std::move(FailedSymbolsMap)); } // Otherwise this is a match. Remove it from the candidate set. @@ -2619,7 +2635,7 @@ void ExecutionSession::OL_completeLookup( auto FailedSymbolsMap = std::make_shared(); (*FailedSymbolsMap)[&JD] = {Name}; return make_error( - std::move(FailedSymbolsMap)); + getSymbolStringPool(), std::move(FailedSymbolsMap)); } // Otherwise this is a match. @@ -2955,7 +2971,8 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) { }); for (auto &Q : FailedQueries) - Q->handleFailed(make_error(FailedSymbols)); + Q->handleFailed( + make_error(getSymbolStringPool(), FailedSymbols)); } Error ExecutionSession::OL_replace(MaterializationResponsibility &MR, diff --git a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp index 824988532c42d5..7ef20cdc49d5b9 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp @@ -1557,4 +1557,35 @@ TEST_F(CoreAPIsStandardTest, RemoveJITDylibs) { BazMR->failMaterialization(); } +TEST(CoreAPIsExtraTest, SessionTeardownByFailedToMaterialize) { + + auto RunTestCase = []() -> Error { + ExecutionSession ES{std::make_unique( + std::make_shared())}; + auto Foo = ES.intern("foo"); + auto FooFlags = JITSymbolFlags::Exported; + + auto &JD = ES.createBareJITDylib("Foo"); + cantFail(JD.define(std::make_unique( + SymbolFlagsMap({{Foo, FooFlags}}), + [&](std::unique_ptr R) { + R->failMaterialization(); + }))); + + auto Sym = ES.lookup({&JD}, Foo); + assert(!Sym && "Query should have failed"); + cantFail(ES.endSession()); + return Sym.takeError(); + }; + + auto Err = RunTestCase(); + EXPECT_TRUE(!!Err); // Expect that error occurred. + EXPECT_TRUE( + Err.isA()); // Expect FailedToMaterialize error. + + // Make sure that we can log errors, even though the session has been + // destroyed. + logAllUnhandledErrors(std::move(Err), nulls(), ""); +} + } // namespace From 4638766794b0d9e9a9262e59e3a5cccde657b2ef Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 21 May 2022 14:02:22 -0700 Subject: [PATCH 172/908] [TypePromotion] Refine fix sext/zext for promoted constant from D125294. Reviewing the code again, I believe the sext is needed on the LHS or RHS for ICmp and only on the RHS for Add. Add an opcode check before checking the operand number. Fixes PR55627. Differential Revision: https://reviews.llvm.org/D125654 --- llvm/lib/CodeGen/TypePromotion.cpp | 5 ++++- .../test/Transforms/TypePromotion/ARM/icmps.ll | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index ea1369587609eb..8752a081623aab 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -486,7 +486,10 @@ void IRPromoter::PromoteTree() { if (auto *Const = dyn_cast(Op)) { // For subtract, we don't need to sext the constant. We only put it in // SafeWrap because SafeWrap.size() is used elsewhere. - Constant *NewConst = (SafeWrap.contains(I) && i == 1 && + // For cmp, we need to sign extend a constant appearing in either + // operand. For add, we should only sign extend the RHS. + Constant *NewConst = (SafeWrap.contains(I) && + (I->getOpcode() == Instruction::ICmp || i == 1) && I->getOpcode() != Instruction::Sub) ? ConstantExpr::getSExt(Const, ExtTy) : ConstantExpr::getZExt(Const, ExtTy); diff --git a/llvm/test/Transforms/TypePromotion/ARM/icmps.ll b/llvm/test/Transforms/TypePromotion/ARM/icmps.ll index 5caa3a65266fdb..cc744adb64118f 100644 --- a/llvm/test/Transforms/TypePromotion/ARM/icmps.ll +++ b/llvm/test/Transforms/TypePromotion/ARM/icmps.ll @@ -319,6 +319,24 @@ entry: ret i32 %conv1 } +define i32 @icmp_minus_imm_noncanonicalcmp(i8* %a) { +; CHECK-LABEL: @icmp_minus_imm_noncanonicalcmp( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], -7 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 -5, [[ADD_I]] +; CHECK-NEXT: [[CONV1:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV1]] +; +entry: + %0 = load i8, i8* %a, align 1 + %add.i = add i8 %0, -7 + %cmp = icmp ult i8 -5, %add.i + %conv1 = zext i1 %cmp to i32 + ret i32 %conv1 +} + define void @mul_with_neg_imm(i32, i32* %b) { ; CHECK-LABEL: @mul_with_neg_imm( ; CHECK-NEXT: entry: From 7be783ab9deb3ed035d6c3456afb1e630760433d Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sat, 21 May 2022 13:52:55 -0700 Subject: [PATCH 173/908] [ORC] Check for errors when materializing absolute symbols. This code previously used cantFail, but both steps (resolution and emission) can fail if the resource tracker associated with the AbsoluteSymbolsMaterializationUnit is removed. Checking these errors is necessary for correct error propagation. --- llvm/lib/ExecutionEngine/Orc/Core.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 2fc3802e185115..9a90fe973da28a 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -263,9 +263,21 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const { void AbsoluteSymbolsMaterializationUnit::materialize( std::unique_ptr R) { - // No dependencies, so these calls can't fail. - cantFail(R->notifyResolved(Symbols)); - cantFail(R->notifyEmitted()); + // Even though these are just absolute symbols we need to check for failure + // to resolve/emit: the tracker for these symbols may have been removed while + // the materialization was in flight (e.g. due to a failure in some action + // triggered by the queries attached to the resolution/emission of these + // symbols). + if (auto Err = R->notifyResolved(Symbols)) { + R->getExecutionSession().reportError(std::move(Err)); + R->failMaterialization(); + return; + } + if (auto Err = R->notifyEmitted()) { + R->getExecutionSession().reportError(std::move(Err)); + R->failMaterialization(); + return; + } } void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD, From 1443dbaba6f0e57be066995db9164f89fb57b413 Mon Sep 17 00:00:00 2001 From: owenca Date: Mon, 16 May 2022 13:52:32 -0700 Subject: [PATCH 174/908] [clang-format] Handle "complex" conditionals in RemoveBracesLLVM Do not remove braces if the conditional of if/for/while might not fit on a single line even after the opening brace is removed. Examples: // ColumnLimit: 20 // 45678901234567890 if (a) { /* Remove. */ foo(); } if (-b >= c) { // Keep. bar(); } Differential Revision: https://reviews.llvm.org/D126052 --- clang/lib/Format/UnwrappedLineParser.cpp | 37 +++-- clang/lib/Format/UnwrappedLineParser.h | 3 +- clang/unittests/Format/FormatTest.cpp | 166 ++++++++++++++++++++++- 3 files changed, 195 insertions(+), 11 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 18f476aca01ae0..cb7a17f157ee0f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -754,16 +754,19 @@ size_t UnwrappedLineParser::computePPHash() const { return h; } -// Checks whether \p ParsedLine might fit on a single line. We must clone the -// tokens of \p ParsedLine before running the token annotator on it so that we -// can restore them afterward. -bool UnwrappedLineParser::mightFitOnOneLine(UnwrappedLine &ParsedLine) const { +// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace +// is not null, subtracts its length (plus the preceding space) when computing +// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before +// running the token annotator on it so that we can restore them afterward. +bool UnwrappedLineParser::mightFitOnOneLine( + UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { const auto ColumnLimit = Style.ColumnLimit; if (ColumnLimit == 0) return true; auto &Tokens = ParsedLine.Tokens; assert(!Tokens.empty()); + const auto *LastToken = Tokens.back().Tok; assert(LastToken); @@ -785,7 +788,11 @@ bool UnwrappedLineParser::mightFitOnOneLine(UnwrappedLine &ParsedLine) const { Annotator.annotate(Line); Annotator.calculateFormattingInformation(Line); - const int Length = LastToken->TotalLength; + auto Length = LastToken->TotalLength; + if (OpeningBrace) { + assert(OpeningBrace != Tokens.front().Tok); + Length -= OpeningBrace->TokenText.size() + 1; + } Index = 0; for (auto &Token : Tokens) { @@ -805,6 +812,8 @@ UnwrappedLineParser::IfStmtKind UnwrappedLineParser::parseBlock( assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && "'{' or macro block token expected"); FormatToken *Tok = FormatTok; + const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); + auto Index = CurrentLines->size(); const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); FormatTok->setBlockKind(BK_Block); @@ -854,14 +863,26 @@ UnwrappedLineParser::IfStmtKind UnwrappedLineParser::parseBlock( return IfKind; } - if (SimpleBlock && !KeepBraces && - Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { + if (SimpleBlock && !KeepBraces) { + assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); assert(FormatTok->is(tok::r_brace)); const FormatToken *Previous = Tokens->getPreviousToken(); assert(Previous); if (Previous->isNot(tok::r_brace) || Previous->Optional) { assert(!CurrentLines->empty()); - if (mightFitOnOneLine(CurrentLines->back())) { + const FormatToken *OpeningBrace = Tok; + if (!Tok->Previous) { // Wrapped l_brace. + if (FollowedByComment) { + KeepBraces = true; + } else { + assert(Index > 0); + --Index; // The line above the wrapped l_brace. + OpeningBrace = nullptr; + } + } + if (!KeepBraces && mightFitOnOneLine(CurrentLines->back()) && + (Tok->is(TT_ElseLBrace) || + mightFitOnOneLine((*CurrentLines)[Index], OpeningBrace))) { Tok->MatchingParen = FormatTok; FormatTok->MatchingParen = Tok; } diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 2d32d8c6327baa..ffba36cd017061 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -95,7 +95,8 @@ class UnwrappedLineParser { bool parseLevel(const FormatToken *OpeningBrace, bool CanContainBracedList, IfStmtKind *IfKind = nullptr, TokenType NextLBracesType = TT_Unknown); - bool mightFitOnOneLine(UnwrappedLine &Line) const; + bool mightFitOnOneLine(UnwrappedLine &Line, + const FormatToken *OpeningBrace = nullptr) const; IfStmtKind parseBlock(bool MustBeDeclaration = false, unsigned AddLevels = 1u, bool MunchSemi = true, bool KeepBraces = true, bool UnindentWhitesmithsBraces = false, diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 762ccf7dbd8d3a..646a6ed175075f 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -25453,7 +25453,6 @@ TEST_F(FormatTest, RemoveBraces) { Style); Style.ColumnLimit = 65; - verifyFormat("if (condition) {\n" " ff(Indices,\n" " [&](unsigned LHSI, unsigned RHSI) { return true; });\n" @@ -25496,8 +25495,48 @@ TEST_F(FormatTest, RemoveBraces) { "}", Style); - Style.ColumnLimit = 0; + verifyFormat("if (-b >=\n" + " c) { // Keep.\n" + " foo();\n" + "} else {\n" + " bar();\n" + "}", + "if (-b >= c) { // Keep.\n" + " foo();\n" + "} else {\n" + " bar();\n" + "}", + Style); + verifyFormat("if (a) /* Remove. */\n" + " f();\n" + "else\n" + " g();", + "if (a) <% /* Remove. */\n" + " f();\n" + "%> else <%\n" + " g();\n" + "%>", + Style); + + verifyFormat("while (\n" + " !i--) <% // Keep.\n" + " foo();\n" + "%>", + "while (!i--) <% // Keep.\n" + " foo();\n" + "%>", + Style); + + verifyFormat("for (int &i : chars)\n" + " ++i;", + "for (int &i :\n" + " chars) {\n" + " ++i;\n" + "}", + Style); + + Style.ColumnLimit = 0; verifyFormat("if (a)\n" " b234567890223456789032345678904234567890 = " "c234567890223456789032345678904234567890;", @@ -25506,6 +25545,129 @@ TEST_F(FormatTest, RemoveBraces) { "c234567890223456789032345678904234567890;\n" "}", Style); + + Style.BreakBeforeBraces = FormatStyle::BS_Custom; + Style.BraceWrapping.AfterControlStatement = FormatStyle::BWACS_Always; + Style.BraceWrapping.BeforeElse = true; + + Style.ColumnLimit = 65; + + verifyFormat("if (condition)\n" + "{\n" + " ff(Indices,\n" + " [&](unsigned LHSI, unsigned RHSI) { return true; });\n" + "}\n" + "else\n" + "{\n" + " ff(Indices,\n" + " [&](unsigned LHSI, unsigned RHSI) { return true; });\n" + "}", + "if (condition) {\n" + " ff(Indices,\n" + " [&](unsigned LHSI, unsigned RHSI) { return true; });\n" + "} else {\n" + " ff(Indices,\n" + " [&](unsigned LHSI, unsigned RHSI) { return true; });\n" + "}", + Style); + + verifyFormat("if (a)\n" + "{ //\n" + " foo();\n" + "}", + "if (a) { //\n" + " foo();\n" + "}", + Style); + + Style.ColumnLimit = 20; + + verifyFormat("int ab = [](int i) {\n" + " if (i > 0)\n" + " {\n" + " i = 12345678 -\n" + " i;\n" + " }\n" + " return i;\n" + "};", + "int ab = [](int i) {\n" + " if (i > 0) {\n" + " i = 12345678 -\n" + " i;\n" + " }\n" + " return i;\n" + "};", + Style); + + verifyFormat("if (a)\n" + "{\n" + " b = c + // 1 -\n" + " d;\n" + "}", + "if (a) {\n" + " b = c + // 1 -\n" + " d;\n" + "}", + Style); + + verifyFormat("if (a)\n" + "{\n" + " b = c >= 0 ? d\n" + " : e;\n" + "}", + "if (a) {\n" + " b = c >= 0 ? d : e;\n" + "}", + Style); + + verifyFormat("if (a)\n" + " b = c > 0 ? d : e;", + "if (a)\n" + "{\n" + " b = c > 0 ? d : e;\n" + "}", + Style); + + verifyFormat("if (foo + bar <=\n" + " baz)\n" + "{\n" + " func(arg1, arg2);\n" + "}", + "if (foo + bar <= baz) {\n" + " func(arg1, arg2);\n" + "}", + Style); + + verifyFormat("if (foo + bar < baz)\n" + " func(arg1, arg2);\n" + "else\n" + " func();", + "if (foo + bar < baz)\n" + "<%\n" + " func(arg1, arg2);\n" + "%>\n" + "else\n" + "<%\n" + " func();\n" + "%>", + Style); + + verifyFormat("while (i--)\n" + "<% // Keep.\n" + " foo();\n" + "%>", + "while (i--) <% // Keep.\n" + " foo();\n" + "%>", + Style); + + verifyFormat("for (int &i : chars)\n" + " ++i;", + "for (int &i : chars)\n" + "{\n" + " ++i;\n" + "}", + Style); } TEST_F(FormatTest, AlignAfterOpenBracketBlockIndent) { From f4d52cad671380d9fa6c1637e1c82b7296fd7da0 Mon Sep 17 00:00:00 2001 From: owenca Date: Sat, 21 May 2022 15:10:21 -0700 Subject: [PATCH 175/908] [clang-format] Fix a bug in "AfterControlStatement: MultiLine" Fixes #55582. Differential Revision: https://reviews.llvm.org/D125959 --- clang/lib/Format/UnwrappedLineFormatter.cpp | 5 +++-- clang/unittests/Format/FormatTest.cpp | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 30755ef1086b2f..f92c36b449ae5f 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -404,8 +404,9 @@ class LineJoiner { // If possible, merge the next line's wrapped left brace with the // current line. Otherwise, leave it on the next line, as this is a // multi-line control statement. - return (Style.ColumnLimit == 0 || - TheLine->Last->TotalLength <= Style.ColumnLimit) + return (Style.ColumnLimit == 0 || TheLine->Level * Style.IndentWidth + + TheLine->Last->TotalLength <= + Style.ColumnLimit) ? 1 : 0; } diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 646a6ed175075f..aebb44ee9acdb3 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -3096,6 +3096,14 @@ TEST_F(FormatTest, MultiLineControlStatements) { " baz);", format("do{foo();}while(bar&&baz);", Style)); // Long lines should put opening brace on new line. + verifyFormat("void f() {\n" + " if (a1 && a2 &&\n" + " a3)\n" + " {\n" + " quux();\n" + " }\n" + "}", + "void f(){if(a1&&a2&&a3){quux();}}", Style); EXPECT_EQ("if (foo && bar &&\n" " baz)\n" "{\n" From 130a9cc0a5e25e3be8ff3738518e86ae3ae0b5ba Mon Sep 17 00:00:00 2001 From: owenca Date: Sat, 21 May 2022 15:50:50 -0700 Subject: [PATCH 176/908] [clang-format] Fix a crash on lambda trailing return type Fixes #55625. --- clang/lib/Format/TokenAnnotator.cpp | 3 ++- clang/unittests/Format/FormatTest.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 9afe4f8fb0868e..7f1c4b8e61b6bb 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1185,7 +1185,8 @@ class AnnotatingParser { } break; case tok::arrow: - if (Tok->Previous && Tok->Previous->is(tok::kw_noexcept)) + if (Tok->isNot(TT_LambdaArrow) && Tok->Previous && + Tok->Previous->is(tok::kw_noexcept)) Tok->setType(TT_TrailingReturnArrow); break; default: diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index aebb44ee9acdb3..83b8dbed75e3fd 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -21338,6 +21338,7 @@ TEST_F(FormatTest, FormatsLambdas) { verifyFormat("int c = []() -> int * { return 2; }();\n"); verifyFormat("int c = []() -> vector { return {2}; }();\n"); verifyFormat("Foo([]() -> std::vector { return {2}; }());"); + verifyFormat("foo([]() noexcept -> int {});"); verifyGoogleFormat("auto a = [&b, c](D* d) -> D* {};"); verifyGoogleFormat("auto a = [&b, c](D* d) -> pair {};"); verifyGoogleFormat("auto a = [&b, c](D* d) -> D& {};"); From 554efc225217272df755a962d841d57056af83f4 Mon Sep 17 00:00:00 2001 From: owenca Date: Sat, 21 May 2022 16:13:35 -0700 Subject: [PATCH 177/908] [clang-format] Format unit tests with InsertBraces/RemoveBracesLLVM --- clang/unittests/Format/DefinitionBlockSeparatorTest.cpp | 3 ++- clang/unittests/Format/FormatTest.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp index cb24cc921bc05a..45dd2fdc46b68f 100644 --- a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp +++ b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp @@ -76,8 +76,9 @@ class DefinitionBlockSeparatorTest : public ::testing::Test { if (Result.size()) { auto LastChar = Result.back(); if ((Char == '\n' && LastChar == '\n') || - (Char == '\r' && (LastChar == '\r' || LastChar == '\n'))) + (Char == '\r' && (LastChar == '\r' || LastChar == '\n'))) { continue; + } } Result.push_back(Char); } diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 83b8dbed75e3fd..3621ba66781872 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -19609,7 +19609,8 @@ TEST_F(FormatTest, UnderstandPragmaRegion) { auto Style = getLLVMStyleWithColumns(0); verifyFormat("#pragma region TEST(FOO : BAR)", Style); - EXPECT_EQ("#pragma region TEST(FOO : BAR)", format("#pragma region TEST(FOO : BAR)", Style)); + EXPECT_EQ("#pragma region TEST(FOO : BAR)", + format("#pragma region TEST(FOO : BAR)", Style)); } TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { From 52d509f38b5069fbc897c01edb065acf8e0e8f42 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Sat, 21 May 2022 17:23:51 -0700 Subject: [PATCH 178/908] [M68k][Disassembler] Cleanup unused variables. NFC - Remove `MaxInstrWord` in M68kDisassembler.cpp. - Remove `MCII` field in `M68kDisassembler` class. NFC. --- .../Target/M68k/Disassembler/M68kDisassembler.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp index 6fb98aa838c0ff..31b59c17c0caff 100644 --- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp +++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp @@ -31,7 +31,6 @@ using namespace llvm; #define DEBUG_TYPE "m68k-disassembler" typedef MCDisassembler::DecodeStatus DecodeStatus; -const unsigned MaxInstrWord = 11; static const unsigned RegisterDecode[] = { M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5, @@ -98,13 +97,9 @@ static DecodeStatus DecodeCCRCRegisterClass(MCInst &Inst, APInt &Insn, #include "M68kGenDisassemblerTable.inc" /// A disassembler class for M68k. -class M68kDisassembler : public MCDisassembler { - MCInstrInfo *MCII; - -public: - M68kDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, - MCInstrInfo *MCII) - : MCDisassembler(STI, Ctx), MCII(MCII) {} +struct M68kDisassembler : public MCDisassembler { + M68kDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} virtual ~M68kDisassembler() {} DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, @@ -142,7 +137,7 @@ DecodeStatus M68kDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, static MCDisassembler *createM68kDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new M68kDisassembler(STI, Ctx, T.createMCInstrInfo()); + return new M68kDisassembler(STI, Ctx); } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kDisassembler() { From 0b903ef6aa0976a60d3f448837f3c43adaf09cc1 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 22 May 2022 01:15:34 +0000 Subject: [PATCH 179/908] Re-add release notes for GCC ABI compatibility for non-POD in packed structs --- clang/docs/ReleaseNotes.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5d1b12504f787b..a889c74a55239f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -424,6 +424,12 @@ OpenCL C Language Changes in Clang ABI Changes in Clang -------------------- +- GCC doesn't pack non-POD members in packed structs unless the packed + attribute is also specified on the member. Clang historically did perform + such packing. Clang now matches the gcc behavior (except on Darwin and PS4). + You can switch back to the old ABI behavior with the flag: + ``-fclang-abi-compat=13.0``. + OpenMP Support in Clang ----------------------- From 0e8ac3a79741e1db0296016f04b4024041aab5ab Mon Sep 17 00:00:00 2001 From: Ping Deng Date: Sun, 22 May 2022 02:13:35 +0000 Subject: [PATCH 180/908] [LegalizeTypes][VP] Add integer promotion support for vp.sitofp/vp.uitofp Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D125960 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 12 ++++++++ .../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 28 +++++++++++++++++++ .../RISCV/rvv/fixed-vectors-sitofp-vp.ll | 15 ++++++++++ .../RISCV/rvv/fixed-vectors-uitofp-vp.ll | 15 ++++++++++ llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll | 24 ++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll | 15 ++++++++++ llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll | 15 ++++++++++ 7 files changed, 124 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 3861a6164bbb6b..8a04b7f61e58a0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1631,6 +1631,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::VP_SETCC: case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::VP_SITOFP: case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast(N), @@ -1646,6 +1647,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: + case ISD::VP_UITOFP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; @@ -1977,6 +1979,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + if (N->getOpcode() == ISD::VP_SITOFP) + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0)), + N->getOperand(1), N->getOperand(2)), + 0); return SDValue(DAG.UpdateNodeOperands(N, SExtPromotedInteger(N->getOperand(0))), 0); } @@ -2101,6 +2108,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + if (N->getOpcode() == ISD::VP_UITOFP) + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0)), + N->getOperand(1), N->getOperand(2)), + 0); return SDValue(DAG.UpdateNodeOperands(N, ZExtPromotedInteger(N->getOperand(0))), 0); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index f932bdfd2817e6..265be226f7269d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -44,6 +44,34 @@ define <2 x float> @si2fp_v2i1_v2f32(<2 x i1> %x) { ret <2 x float> %z } +define <2 x float> @si2fp_v2i7_v2f32(<2 x i7> %x) { +; CHECK-LABEL: si2fp_v2i7_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsext.vf4 v9, v8 +; CHECK-NEXT: vfcvt.f.x.v v8, v9 +; CHECK-NEXT: ret + %z = sitofp <2 x i7> %x to <2 x float> + ret <2 x float> %z +} + +define <2 x float> @ui2fp_v2i7_v2f32(<2 x i7> %x) { +; CHECK-LABEL: ui2fp_v2i7_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vzext.vf4 v9, v8 +; CHECK-NEXT: vfcvt.f.xu.v v8, v9 +; CHECK-NEXT: ret + %z = uitofp <2 x i7> %x to <2 x float> + ret <2 x float> %z +} + define <2 x float> @ui2fp_v2i1_v2f32(<2 x i1> %x) { ; CHECK-LABEL: ui2fp_v2i1_v2f32: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll index b95cc757d28a8e..956c0bbf0cdc36 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll @@ -4,6 +4,21 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+experimental-zvfh \ ; RUN: -riscv-v-vector-bits-min=128 < %s | FileCheck %s +declare <4 x half> @llvm.vp.sitofp.v4f16.v4i7(<4 x i7>, <4 x i1>, i32) + +define <4 x half> @vsitofp_v4f16_v4i7(<4 x i7> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsitofp_v4f16_v4i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v9, v8, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i7(<4 x i7> %va, <4 x i1> %m, i32 %evl) + ret <4 x half> %v +} + declare <4 x half> @llvm.vp.sitofp.v4f16.v4i8(<4 x i8>, <4 x i1>, i32) define <4 x half> @vsitofp_v4f16_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll index 690f484d317b6c..c7832da90d2e7e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll @@ -4,6 +4,21 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+experimental-zvfh \ ; RUN: -riscv-v-vector-bits-min=128 < %s | FileCheck %s +declare <4 x half> @llvm.vp.uitofp.v4f16.v4i7(<4 x i7>, <4 x i1>, i32) + +define <4 x half> @vuitofp_v4f16_v4i7(<4 x i7> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vuitofp_v4f16_v4i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vand.vx v9, v8, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i7(<4 x i7> %va, <4 x i1> %m, i32 %evl) + ret <4 x half> %v +} + declare <4 x half> @llvm.vp.uitofp.v4f16.v4i8(<4 x i8>, <4 x i1>, i32) define <4 x half> @vuitofp_v4f16_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll index e4c5d030f15fd8..83d197c4504806 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll @@ -375,6 +375,30 @@ define @vsitofp_nxv1i8_nxv1f16( %va) { ret %evec } +define @vsitofp_nxv1i7_nxv1f16( %va) { +; CHECK-LABEL: vsitofp_nxv1i7_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v9, v8, 1 +; CHECK-NEXT: vfwcvt.f.x.v v8, v9 +; CHECK-NEXT: ret + %evec = sitofp %va to + ret %evec +} + +define @vuitofp_nxv1i7_nxv1f16( %va) { +; CHECK-LABEL: vuitofp_nxv1i7_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: ret + %evec = uitofp %va to + ret %evec +} + define @vuitofp_nxv1i8_nxv1f16( %va) { ; CHECK-LABEL: vuitofp_nxv1i8_nxv1f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll index e818ce85af595e..6540888501373e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll @@ -2,6 +2,21 @@ ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s +declare @llvm.vp.sitofp.nxv2f16.nxv2i7(, , i32) + +define @vsitofp_nxv2f16_nxv2i7( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsitofp_nxv2f16_nxv2i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v9, v8, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sitofp.nxv2f16.nxv2i7( %va, %m, i32 %evl) + ret %v +} + declare @llvm.vp.sitofp.nxv2f16.nxv2i8(, , i32) define @vsitofp_nxv2f16_nxv2i8( %va, %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll index 2df40d4694423b..9dc0c959209534 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll @@ -2,6 +2,21 @@ ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s +declare @llvm.vp.uitofp.nxv2f16.nxv2i7(, , i32) + +define @vuitofp_nxv2f16_nxv2i7( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vuitofp_nxv2f16_nxv2i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu +; CHECK-NEXT: vand.vx v9, v8, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.uitofp.nxv2f16.nxv2i7( %va, %m, i32 %evl) + ret %v +} + declare @llvm.vp.uitofp.nxv2f16.nxv2i8(, , i32) define @vuitofp_nxv2f16_nxv2i8( %va, %m, i32 zeroext %evl) { From d6994f7ccc25b81f59f5daf5374676ac2b00f08a Mon Sep 17 00:00:00 2001 From: Sheng Date: Sun, 22 May 2022 10:30:08 +0800 Subject: [PATCH 181/908] [M68k][Disassembler] Fix decoding conflict This diff fixes decoding conflict between move instructions and their tail-call counterpart Reviewed By: myhsu Differential Revision: https://reviews.llvm.org/D125948 --- llvm/lib/Target/M68k/M68kInstrData.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td index 1de461f6e9ac71..863432b9400526 100644 --- a/llvm/lib/Target/M68k/M68kInstrData.td +++ b/llvm/lib/Target/M68k/M68kInstrData.td @@ -205,7 +205,9 @@ let Pattern = [(null_frag)] in { foreach TYPE = [MxType16, MxType32] in def MOV # TYPE.Size # REG # AM # _TC : MxMove_RM("MxOp"#TYPE.Size#"AddrMode_"#AM), - !cast("MxMoveSrcOpEnc_"#AM)>; + !cast("MxMoveSrcOpEnc_"#AM)> { + let isCodeGenOnly = true; + } } // foreach AM } // let Pattern From 09865ae95dbf0322bbf3e7b3847b2b11373a0297 Mon Sep 17 00:00:00 2001 From: Sheng Date: Sun, 22 May 2022 10:34:25 +0800 Subject: [PATCH 182/908] [NFC][M68k][test] Add disassembler tests for move instructions --- llvm/test/MC/Disassembler/M68k/data.txt | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/llvm/test/MC/Disassembler/M68k/data.txt b/llvm/test/MC/Disassembler/M68k/data.txt index c29ca08c3a41c7..1a428fdd389930 100644 --- a/llvm/test/MC/Disassembler/M68k/data.txt +++ b/llvm/test/MC/Disassembler/M68k/data.txt @@ -4,3 +4,51 @@ 0x20 0x49 # CHECK: lea (50,%a0), %a1 0x43 0xe8 0x00 0x32 + +# CHECK: move.w (%a2), %a0 +0x30 0x52 + +# CHECK: move.w (%a1), %d2 +0x34 0x11 + +# CHECK; move.w (%a2)+, %a4 +0x38 0x5a + +# CHECK: move.w (%a0)+, %d2 +0x34 0x18 + +# CHECK: move.w -(%sp), %a1 +0x32 0x67 + +# CHECK: move.w -(%a6), %d7 +0x3e 0x26 + +# CHECK: move.w (12576,%a0), %a5 +0x3a 0x68 0x31 0x20 + +# CHECK: move.w (256,%sp), %d1 +0x32 0x2f 0x01 0x00 + +# CHECK: move.l (32,%a0,%d1), %sp +0x2e 0x70 0x18 0x20 + +# CHECK: move.l (64,%sp,%a0), %d0 +0x20 0x37 0x88 0x40 + +# CHECK: move.l $f0000000, %a5 +0x2a 0x79 0x00 0x00 0xf0 0x00 + +# CHECK: move.l $1, %d0 +0x20 0x39 0x00 0x01 0x00 0x00 + +# CHECK: move.l (32768,%pc), %a2 +0x24 0x7a 0x80 0x00 + +# CHECK: move.l (16384,%pc), %d1 +0x22 0x3a 0x40 0x00 + +# CHECK: move.l (128,%pc,%a0), %a5 +0x2a 0x7b 0x88 0x80 + +# CHECK: move.l (129,%pc,%d2), %d3 +0x26 0x3b 0x28 0x81 From af77b1d99016fde66edd9e72d907c03fe4511215 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Sun, 22 May 2022 09:28:39 +0100 Subject: [PATCH 183/908] [clang-tidy] add support for Demorgan conversions to readability-simplify-bool-expr Adds support for recognising and converting boolean expressions that can be simplified using De Morgans Law. This is a different implementation to D124650. Fixes https://github.com/llvm/llvm-project/issues/55092 Reviewed By: LegalizeAdulthood Differential Revision: https://reviews.llvm.org/D124806 --- .../readability/SimplifyBooleanExprCheck.cpp | 262 +++++++++++++++++- .../readability/SimplifyBooleanExprCheck.h | 5 + clang-tools-extra/docs/ReleaseNotes.rst | 4 + .../readability-simplify-boolean-expr.rst | 14 +- ...eadability-simplify-bool-expr-demorgan.cpp | 87 ++++++ 5 files changed, 370 insertions(+), 2 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability-simplify-bool-expr-demorgan.cpp diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp index 323cf1f2d5ecbd..3502375ea1ba60 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp @@ -9,6 +9,7 @@ #include "SimplifyBooleanExprCheck.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Lex/Lexer.h" +#include "llvm/Support/SaveAndRestore.h" #include #include @@ -257,6 +258,8 @@ static bool containsDiscardedTokens(const ASTContext &Context, } class SimplifyBooleanExprCheck::Visitor : public RecursiveASTVisitor { + using Base = RecursiveASTVisitor; + public: Visitor(SimplifyBooleanExprCheck *Check, ASTContext &Context) : Check(Check), Context(Context) {} @@ -503,7 +506,73 @@ class SimplifyBooleanExprCheck::Visitor : public RecursiveASTVisitor { return true; } + static bool isUnaryLNot(const Expr *E) { + return isa(E) && + cast(E)->getOpcode() == UO_LNot; + } + + template + static bool checkEitherSide(const BinaryOperator *BO, Functor Func) { + return Func(BO->getLHS()) || Func(BO->getRHS()); + } + + static bool nestedDemorgan(const Expr *E, unsigned NestingLevel) { + const auto *BO = dyn_cast(E->IgnoreUnlessSpelledInSource()); + if (!BO) + return false; + if (!BO->getType()->isBooleanType()) + return false; + switch (BO->getOpcode()) { + case BO_LT: + case BO_GT: + case BO_LE: + case BO_GE: + case BO_EQ: + case BO_NE: + return true; + case BO_LAnd: + case BO_LOr: + if (checkEitherSide(BO, isUnaryLNot)) + return true; + if (NestingLevel) { + if (checkEitherSide(BO, [NestingLevel](const Expr *E) { + return nestedDemorgan(E, NestingLevel - 1); + })) + return true; + } + return false; + default: + return false; + } + } + + bool TraverseUnaryOperator(UnaryOperator *Op) { + if (!Check->SimplifyDeMorgan || Op->getOpcode() != UO_LNot) + return Base::TraverseUnaryOperator(Op); + Expr *SubImp = Op->getSubExpr()->IgnoreImplicit(); + auto *Parens = dyn_cast(SubImp); + auto *BinaryOp = + Parens + ? dyn_cast(Parens->getSubExpr()->IgnoreImplicit()) + : dyn_cast(SubImp); + if (!BinaryOp || !BinaryOp->isLogicalOp() || + !BinaryOp->getType()->isBooleanType()) + return Base::TraverseUnaryOperator(Op); + if (checkEitherSide(BinaryOp, isUnaryLNot) || + checkEitherSide(BinaryOp, + [](const Expr *E) { return nestedDemorgan(E, 1); })) { + if (Check->reportDeMorgan(Context, Op, BinaryOp, !IsProcessing, parent(), + Parens) && + !Check->areDiagsSelfContained()) { + llvm::SaveAndRestore RAII(IsProcessing, true); + return Base::TraverseUnaryOperator(Op); + } + } + return Base::TraverseUnaryOperator(Op); + } + private: + bool IsProcessing = false; SimplifyBooleanExprCheck *Check; SmallVector StmtStack; ASTContext &Context; @@ -514,7 +583,8 @@ SimplifyBooleanExprCheck::SimplifyBooleanExprCheck(StringRef Name, : ClangTidyCheck(Name, Context), ChainedConditionalReturn(Options.get("ChainedConditionalReturn", false)), ChainedConditionalAssignment( - Options.get("ChainedConditionalAssignment", false)) {} + Options.get("ChainedConditionalAssignment", false)), + SimplifyDeMorgan(Options.get("SimplifyDeMorgan", true)) {} static bool containsBoolLiteral(const Expr *E) { if (!E) @@ -685,6 +755,196 @@ void SimplifyBooleanExprCheck::replaceWithAssignment(const ASTContext &Context, Range, Replacement); } +/// Swaps a \c BinaryOperator opcode from `&&` to `||` or vice-versa. +static bool flipDemorganOperator(llvm::SmallVectorImpl &Output, + const BinaryOperator *BO) { + assert(BO->isLogicalOp()); + if (BO->getOperatorLoc().isMacroID()) + return true; + Output.push_back(FixItHint::CreateReplacement( + BO->getOperatorLoc(), BO->getOpcode() == BO_LAnd ? "||" : "&&")); + return false; +} + +static BinaryOperatorKind getDemorganFlippedOperator(BinaryOperatorKind BO) { + assert(BinaryOperator::isLogicalOp(BO)); + return BO == BO_LAnd ? BO_LOr : BO_LAnd; +} + +static bool flipDemorganSide(SmallVectorImpl &Fixes, + const ASTContext &Ctx, const Expr *E, + Optional OuterBO); + +/// Inverts \p BinOp, Removing \p Parens if they exist and are safe to remove. +/// returns \c true if there is any issue building the Fixes, \c false +/// otherwise. +static bool flipDemorganBinaryOperator(SmallVectorImpl &Fixes, + const ASTContext &Ctx, + const BinaryOperator *BinOp, + Optional OuterBO, + const ParenExpr *Parens = nullptr) { + switch (BinOp->getOpcode()) { + case BO_LAnd: + case BO_LOr: { + // if we have 'a && b' or 'a || b', use demorgan to flip it to '!a || !b' + // or '!a && !b'. + if (flipDemorganOperator(Fixes, BinOp)) + return true; + auto NewOp = getDemorganFlippedOperator(BinOp->getOpcode()); + if (OuterBO) { + // The inner parens are technically needed in a fix for + // `!(!A1 && !(A2 || A3)) -> (A1 || (A2 && A3))`, + // however this would trip the LogicalOpParentheses warning. + // FIXME: Make this user configurable or detect if that warning is + // enabled. + constexpr bool LogicalOpParentheses = true; + if (((*OuterBO == NewOp) || (!LogicalOpParentheses && + (*OuterBO == BO_LOr && NewOp == BO_LAnd))) && + Parens) { + if (!Parens->getLParen().isMacroID() && + !Parens->getRParen().isMacroID()) { + Fixes.push_back(FixItHint::CreateRemoval(Parens->getLParen())); + Fixes.push_back(FixItHint::CreateRemoval(Parens->getRParen())); + } + } + if (*OuterBO == BO_LAnd && NewOp == BO_LOr && !Parens) { + Fixes.push_back(FixItHint::CreateInsertion(BinOp->getBeginLoc(), "(")); + Fixes.push_back(FixItHint::CreateInsertion( + Lexer::getLocForEndOfToken(BinOp->getEndLoc(), 0, + Ctx.getSourceManager(), + Ctx.getLangOpts()), + ")")); + } + } + if (flipDemorganSide(Fixes, Ctx, BinOp->getLHS(), NewOp) || + flipDemorganSide(Fixes, Ctx, BinOp->getRHS(), NewOp)) + return true; + return false; + }; + case BO_LT: + case BO_GT: + case BO_LE: + case BO_GE: + case BO_EQ: + case BO_NE: + // For comparison operators, just negate the comparison. + if (BinOp->getOperatorLoc().isMacroID()) + return true; + Fixes.push_back(FixItHint::CreateReplacement( + BinOp->getOperatorLoc(), + BinaryOperator::getOpcodeStr( + BinaryOperator::negateComparisonOp(BinOp->getOpcode())))); + return false; + default: + // for any other binary operator, just use logical not and wrap in + // parens. + if (Parens) { + if (Parens->getBeginLoc().isMacroID()) + return true; + Fixes.push_back(FixItHint::CreateInsertion(Parens->getBeginLoc(), "!")); + } else { + if (BinOp->getBeginLoc().isMacroID() || BinOp->getEndLoc().isMacroID()) + return true; + Fixes.append({FixItHint::CreateInsertion(BinOp->getBeginLoc(), "!("), + FixItHint::CreateInsertion( + Lexer::getLocForEndOfToken(BinOp->getEndLoc(), 0, + Ctx.getSourceManager(), + Ctx.getLangOpts()), + ")")}); + } + break; + } + return false; +} + +static bool flipDemorganSide(SmallVectorImpl &Fixes, + const ASTContext &Ctx, const Expr *E, + Optional OuterBO) { + if (isa(E) && cast(E)->getOpcode() == UO_LNot) { + // if we have a not operator, '!a', just remove the '!'. + if (cast(E)->getOperatorLoc().isMacroID()) + return true; + Fixes.push_back( + FixItHint::CreateRemoval(cast(E)->getOperatorLoc())); + return false; + } + if (const auto *BinOp = dyn_cast(E)) { + return flipDemorganBinaryOperator(Fixes, Ctx, BinOp, OuterBO); + } + if (const auto *Paren = dyn_cast(E)) { + if (const auto *BinOp = dyn_cast(Paren->getSubExpr())) { + return flipDemorganBinaryOperator(Fixes, Ctx, BinOp, OuterBO, Paren); + } + } + // Fallback case just insert a logical not operator. + if (E->getBeginLoc().isMacroID()) + return true; + Fixes.push_back(FixItHint::CreateInsertion(E->getBeginLoc(), "!")); + return false; +} + +static bool shouldRemoveParens(const Stmt *Parent, + BinaryOperatorKind NewOuterBinary, + const ParenExpr *Parens) { + if (!Parens) + return false; + if (!Parent) + return true; + switch (Parent->getStmtClass()) { + case Stmt::BinaryOperatorClass: { + const auto *BO = cast(Parent); + if (BO->isAssignmentOp()) + return true; + if (BO->isCommaOp()) + return true; + if (BO->getOpcode() == NewOuterBinary) + return true; + return false; + } + case Stmt::UnaryOperatorClass: + case Stmt::CXXRewrittenBinaryOperatorClass: + return false; + default: + return true; + } +} + +bool SimplifyBooleanExprCheck::reportDeMorgan(const ASTContext &Context, + const UnaryOperator *Outer, + const BinaryOperator *Inner, + bool TryOfferFix, + const Stmt *Parent, + const ParenExpr *Parens) { + assert(Outer); + assert(Inner); + assert(Inner->isLogicalOp()); + + auto Diag = + diag(Outer->getBeginLoc(), + "boolean expression can be simplified by DeMorgan's theorem"); + Diag << Outer->getSourceRange(); + // If we have already fixed this with a previous fix, don't attempt any fixes + if (!TryOfferFix) + return false; + if (Outer->getOperatorLoc().isMacroID()) + return false; + SmallVector Fixes; + auto NewOpcode = getDemorganFlippedOperator(Inner->getOpcode()); + if (shouldRemoveParens(Parent, NewOpcode, Parens)) { + Fixes.push_back(FixItHint::CreateRemoval( + SourceRange(Outer->getOperatorLoc(), Parens->getLParen()))); + Fixes.push_back(FixItHint::CreateRemoval(Parens->getRParen())); + } else { + Fixes.push_back(FixItHint::CreateRemoval(Outer->getOperatorLoc())); + } + if (flipDemorganOperator(Fixes, Inner)) + return false; + if (flipDemorganSide(Fixes, Context, Inner->getLHS(), NewOpcode) || + flipDemorganSide(Fixes, Context, Inner->getRHS(), NewOpcode)) + return false; + Diag << Fixes; + return true; +} } // namespace readability } // namespace tidy } // namespace clang diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h index 2724c9a4eddd71..72d8f3354b890c 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h @@ -58,12 +58,17 @@ class SimplifyBooleanExprCheck : public ClangTidyCheck { const IfStmt *If, const Expr *ThenReturn); + bool reportDeMorgan(const ASTContext &Context, const UnaryOperator *Outer, + const BinaryOperator *Inner, bool TryOfferFix, + const Stmt *Parent, const ParenExpr *Parens); + void issueDiag(const ASTContext &Result, SourceLocation Loc, StringRef Description, SourceRange ReplacementRange, StringRef Replacement); const bool ChainedConditionalReturn; const bool ChainedConditionalAssignment; + const bool SimplifyDeMorgan; }; } // namespace readability diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 41a9723ffeac8b..74e77f4a7de11a 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -194,6 +194,10 @@ Changes in existing checks ` when the parameter is referenced by an lvalue. +- Expanded :doc:`readability-simplify-boolean-expr + ` to simplify expressions + using DeMorgan's Theorem. + Removed checks ^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst index ef28acd4ddb322..240e7be64ad45c 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst @@ -4,7 +4,8 @@ readability-simplify-boolean-expr ================================= Looks for boolean expressions involving boolean constants and simplifies -them to use the appropriate boolean expression directly. +them to use the appropriate boolean expression directly. Simplifies +boolean expressions by application of DeMorgan's Theorem. Examples: @@ -27,6 +28,12 @@ Initial expression Result ``if (e) b = false; else b = true;`` ``b = !e;`` ``if (e) return true; return false;`` ``return e;`` ``if (e) return false; return true;`` ``return !e;`` +``!(!a || b)`` ``a && !b`` +``!(a || !b)`` ``!a && b`` +``!(!a || !b)`` ``a && b`` +``!(!a && b)`` ``a || !b`` +``!(a && !b)`` ``!a || b`` +``!(!a && !b)`` ``a || b`` =========================================== ================ The resulting expression ``e`` is modified as follows: @@ -84,3 +91,8 @@ Options If `true`, conditional boolean assignments at the end of an ``if/else if`` chain will be transformed. Default is `false`. + +.. option:: SimplifyDeMorgan + + If `true`, DeMorgan's Theorem will be applied to simplify negated + conjunctions and disjunctions. Default is `true`. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-simplify-bool-expr-demorgan.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-simplify-bool-expr-demorgan.cpp new file mode 100644 index 00000000000000..12a9eb65352d86 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-simplify-bool-expr-demorgan.cpp @@ -0,0 +1,87 @@ +// RUN: %check_clang_tidy %s readability-simplify-boolean-expr %t + +void eat(bool); + +void foo(bool A1, bool A2, bool A3, bool A4) { + bool X; + X = !(!A1 || A2); + X = !(A1 || !A2); + X = !(!A1 || !A2); + // CHECK-MESSAGES: :[[@LINE-3]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-3]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-3]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: X = A1 && !A2; + // CHECK-FIXES-NEXT: X = !A1 && A2; + // CHECK-FIXES-NEXT: X = A1 && A2; + + X = !(!A1 && A2); + X = !(A1 && !A2); + X = !(!A1 && !A2); + // CHECK-MESSAGES: :[[@LINE-3]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-3]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-3]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: X = A1 || !A2; + // CHECK-FIXES-NEXT: X = !A1 || A2; + // CHECK-FIXES-NEXT: X = A1 || A2; + + X = !(!A1 && !A2 && !A3); + X = !(!A1 && (!A2 && !A3)); + X = !(!A1 && (A2 && A3)); + // CHECK-MESSAGES: :[[@LINE-3]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-3]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-3]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: X = A1 || A2 || A3; + // CHECK-FIXES-NEXT: X = A1 || A2 || A3; + // CHECK-FIXES-NEXT: X = A1 || !A2 || !A3; + + X = !(A1 && A2 == A3); + X = !(!A1 && A2 > A3); + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: X = !A1 || A2 != A3; + // CHECK-FIXES-NEXT: X = A1 || A2 <= A3; + + // Ensure the check doesn't try to combine fixes for the inner and outer demorgan simplification. + X = !(!A1 && !(!A2 && !A3)); + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-2]]:16: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: X = A1 || (!A2 && !A3); + + // Testing to see how it handles parens + X = !(A1 && !A2 && !A3); + X = !(A1 && !A2 || !A3); + X = !(!A1 || A2 && !A3); + X = !((A1 || !A2) && !A3); + X = !((A1 || !A2) || !A3); + // CHECK-MESSAGES: :[[@LINE-5]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-5]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-5]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-5]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-5]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: X = !A1 || A2 || A3; + // CHECK-FIXES-NEXT: X = (!A1 || A2) && A3; + // CHECK-FIXES-NEXT: X = A1 && (!A2 || A3); + // CHECK-FIXES-NEXT: X = (!A1 && A2) || A3; + // CHECK-FIXES-NEXT: X = !A1 && A2 && A3; + X = !((A1 || A2) && (!A3 || A4)); + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: X = (!A1 && !A2) || (A3 && !A4); + + eat(!(!A1 && !A2)); + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: eat(A1 || A2); + + bool Init = !(!A1 || !A2); + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: bool Init = A1 && A2; + + X = A1 && !(!A2 || !A3); + X = A1 || !(!A2 || !A3); + X = A1 && !(!A2 && !A3); + // CHECK-MESSAGES: :[[@LINE-3]]:13: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-3]]:13: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES: :[[@LINE-3]]:13: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES: X = A1 && A2 && A3; + // CHECK-FIXES-NEXT: X = A1 || (A2 && A3); + // CHECK-FIXES-NEXT: X = A1 && (A2 || A3); +} From 9d0d24eec44787b4a762d01866d0b9b733bee778 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Sun, 22 May 2022 10:28:52 +0100 Subject: [PATCH 184/908] [clang-tidy] Fix not updating storeOptions after af77b1d9901 --- .../clang-tidy/readability/SimplifyBooleanExprCheck.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp index 3502375ea1ba60..f35f7839dc257a 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp @@ -666,6 +666,7 @@ void SimplifyBooleanExprCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "ChainedConditionalReturn", ChainedConditionalReturn); Options.store(Opts, "ChainedConditionalAssignment", ChainedConditionalAssignment); + Options.store(Opts, "SimplifyDeMorgan", SimplifyDeMorgan); } void SimplifyBooleanExprCheck::registerMatchers(MatchFinder *Finder) { From 258dac43d669bea0116124030b2dadf8ff9b922f Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 28 Apr 2022 21:06:05 +0100 Subject: [PATCH 185/908] [SVE] Enable use of 32bit gather/scatter indices for fixed length vectors Differential Revision: https://reviews.llvm.org/D125193 --- llvm/include/llvm/CodeGen/TargetLowering.h | 4 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++--- .../Target/AArch64/AArch64ISelLowering.cpp | 18 +++++--- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +- .../AArch64/sve-fixed-length-masked-gather.ll | 45 ++++++------------- .../sve-fixed-length-masked-scatter.ll | 45 ++++++------------- 8 files changed, 52 insertions(+), 77 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 5fc8757c55febe..a582838ff663e0 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1390,7 +1390,9 @@ class TargetLoweringBase { // Returns true if VT is a legal index type for masked gathers/scatters // on this target - virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; } + virtual bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const { + return false; + } /// Return how the condition code should be treated: either it is legal, needs /// to be expanded to some other code sequence, or the target has a custom diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eae9109fd1ab51..958f2dbc614596 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10441,14 +10441,14 @@ bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, } // Fold sext/zext of index into index type. -bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, +bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // It's always safe to look through zero extends. if (Index.getOpcode() == ISD::ZERO_EXTEND) { SDValue Op = Index.getOperand(0); - if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { IndexType = ISD::UNSIGNED_SCALED; Index = Op; return true; @@ -10463,7 +10463,7 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, if (Index.getOpcode() == ISD::SIGN_EXTEND && ISD::isIndexTypeSigned(IndexType)) { SDValue Op = Index.getOperand(0); - if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { Index = Op; return true; } @@ -10494,7 +10494,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MSC->isTruncatingStore()); } - if (refineIndexType(Index, IndexType, DAG)) { + if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, MSC->getMemOperand(), IndexType, @@ -10590,7 +10590,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType()); } - if (refineIndexType(Index, IndexType, DAG)) { + if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; return DAG.getMaskedGather( DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f8b71ee6428777..f3f10c60447e2d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4520,13 +4520,19 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; } -bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { - if (VT.getVectorElementType() == MVT::i32 && - VT.getVectorElementCount().getKnownMinValue() >= 4 && - !VT.isFixedLengthVector()) - return true; +bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT, + EVT DataVT) const { + // SVE only supports implicit extension of 32-bit indices. + if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32) + return false; - return false; + // Indices cannot be smaller than the main data type. + if (IndexVT.getScalarSizeInBits() < DataVT.getScalarSizeInBits()) + return false; + + // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit + // element container type, which would violate the previous clause. + return DataVT.isFixedLengthVector() || DataVT.getVectorMinNumElements() > 2; } bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 6d6b7d44b57e25..52ead327d0bedc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1090,7 +1090,7 @@ class AArch64TargetLowering : public TargetLowering { } bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; - bool shouldRemoveExtendFromGSIndex(EVT VT) const override; + bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; bool isVectorLoadExtDesirable(SDValue ExtVal) const override; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 458b3f3a4073e9..527753a6e22cbe 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11656,7 +11656,8 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( return Result; } -bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { +bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT, + EVT DataVT) const { return false; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 11a30c834fc233..1c461fb38ceb0e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -560,7 +560,7 @@ class RISCVTargetLowering : public TargetLowering { const RISCVRegisterInfo *TRI); MVT getContainerForFixedLengthVector(MVT VT) const; - bool shouldRemoveExtendFromGSIndex(EVT VT) const override; + bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; bool isLegalElementTypeForRVV(Type *ScalarTy) const; diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll index 0922b4bd0eabbd..69e063e122491b 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -959,19 +959,16 @@ define void @masked_gather_v32f64(<32 x double>* %a, <32 x double*>* %b) #0 { ; The above tests test the types, the below tests check that the addressing ; modes still function -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_gather_32b_scaled_sext_f16(<32 x half>* %a, <32 x i32>* %b, half* %base) #0 { ; VBITS_GE_2048-LABEL: masked_gather_32b_scaled_sext_f16: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p1.s, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b -; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d, lsl #1] -; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_2048-NEXT: ld1h { z0.s }, p1/z, [x2, z1.s, sxtw #1] ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -985,18 +982,14 @@ define void @masked_gather_32b_scaled_sext_f16(<32 x half>* %a, <32 x i32>* %b, ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_gather_32b_scaled_sext_f32(<32 x float>* %a, <32 x i32>* %b, float* %base) #0 { ; VBITS_GE_2048-LABEL: masked_gather_32b_scaled_sext_f32: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z1.d, lsl #2] -; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_2048-NEXT: ld1w { z0.s }, p1/z, [x2, z1.s, sxtw #2] ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret %cvals = load <32 x float>, <32 x float>* %a @@ -1009,7 +1002,6 @@ define void @masked_gather_32b_scaled_sext_f32(<32 x float>* %a, <32 x i32>* %b, ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_gather_32b_scaled_sext_f64(<32 x double>* %a, <32 x i32>* %b, double* %base) #0 { ; VBITS_GE_2048-LABEL: masked_gather_32b_scaled_sext_f64: ; VBITS_GE_2048: // %bb.0: @@ -1030,19 +1022,16 @@ define void @masked_gather_32b_scaled_sext_f64(<32 x double>* %a, <32 x i32>* %b ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_gather_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, half* %base) #0 { ; VBITS_GE_2048-LABEL: masked_gather_32b_scaled_zext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p1.s, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b -; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d, lsl #1] -; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_2048-NEXT: ld1h { z0.s }, p1/z, [x2, z1.s, uxtw #1] ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -1056,19 +1045,16 @@ define void @masked_gather_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, half ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_gather_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i8* %base) #0 { ; VBITS_GE_2048-LABEL: masked_gather_32b_unscaled_sext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p1.s, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b -; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d] -; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_2048-NEXT: ld1h { z0.s }, p1/z, [x2, z1.s, sxtw] ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -1083,19 +1069,16 @@ define void @masked_gather_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i8 ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_gather_32b_unscaled_zext(<32 x half>* %a, <32 x i32>* %b, i8* %base) #0 { ; VBITS_GE_2048-LABEL: masked_gather_32b_unscaled_zext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p1.s, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b -; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d] -; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_2048-NEXT: ld1h { z0.s }, p1/z, [x2, z1.s, uxtw] ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_2048-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll index 79d27b4ca7584e..edf937ab562e53 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -871,20 +871,17 @@ define void @masked_scatter_v32f64(<32 x double>* %a, <32 x double*>* %b) #0 { ; The above tests test the types, the below tests check that the addressing ; modes still function -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_scatter_32b_scaled_sext_f16(<32 x half>* %a, <32 x i32>* %b, half* %base) #0 { ; VBITS_GE_2048-LABEL: masked_scatter_32b_scaled_sext_f16: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p1.s, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d, lsl #1] +; VBITS_GE_2048-NEXT: st1h { z0.s }, p0, [x2, z1.s, sxtw #1] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -895,18 +892,14 @@ define void @masked_scatter_32b_scaled_sext_f16(<32 x half>* %a, <32 x i32>* %b, ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_scatter_32b_scaled_sext_f32(<32 x float>* %a, <32 x i32>* %b, float* %base) #0 { ; VBITS_GE_2048-LABEL: masked_scatter_32b_scaled_sext_f32: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b -; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [x2, z1.d, lsl #2] +; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x2, z1.s, sxtw #2] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x float>, <32 x float>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -917,7 +910,6 @@ define void @masked_scatter_32b_scaled_sext_f32(<32 x float>* %a, <32 x i32>* %b ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_scatter_32b_scaled_sext_f64(<32 x double>* %a, <32 x i32>* %b, double* %base) #0 { ; VBITS_GE_2048-LABEL: masked_scatter_32b_scaled_sext_f64: ; VBITS_GE_2048: // %bb.0: @@ -936,20 +928,17 @@ define void @masked_scatter_32b_scaled_sext_f64(<32 x double>* %a, <32 x i32>* % ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_scatter_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, half* %base) #0 { ; VBITS_GE_2048-LABEL: masked_scatter_32b_scaled_zext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p1.s, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d, lsl #1] +; VBITS_GE_2048-NEXT: st1h { z0.s }, p0, [x2, z1.s, uxtw #1] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -960,20 +949,17 @@ define void @masked_scatter_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, hal ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_scatter_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i8* %base) #0 { ; VBITS_GE_2048-LABEL: masked_scatter_32b_unscaled_sext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p1.s, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d] +; VBITS_GE_2048-NEXT: st1h { z0.s }, p0, [x2, z1.s, sxtw] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -985,20 +971,17 @@ define void @masked_scatter_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i ret void } -; NOTE: This produces an non-optimal addressing mode due to a temporary workaround define void @masked_scatter_32b_unscaled_zext(<32 x half>* %a, <32 x i32>* %b, i8* %base) #0 { ; VBITS_GE_2048-LABEL: masked_scatter_32b_unscaled_zext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p1.s, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d] +; VBITS_GE_2048-NEXT: st1h { z0.s }, p0, [x2, z1.s, uxtw] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b From 69c0af3de224131cca844debd12872d4d1686800 Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Sun, 22 May 2022 10:12:50 -0400 Subject: [PATCH 186/908] CWG 1394: Incomplete types as parameters of deleted functions Follow-up to previous commit: Add a DR test-case so the make_cxx_dr_status automation works. Bug: #52802 Fixes: 50b1faf5c188956fb59ea7d9f9d470591771aedb --- clang/test/CXX/drs/dr13xx.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/test/CXX/drs/dr13xx.cpp b/clang/test/CXX/drs/dr13xx.cpp index 57d0f3cc97cc11..9efbd52ec58f14 100644 --- a/clang/test/CXX/drs/dr13xx.cpp +++ b/clang/test/CXX/drs/dr13xx.cpp @@ -450,6 +450,13 @@ namespace dr1391 { // dr1391: partial } } +namespace dr1394 { // dr1394: 15 +#if __cplusplus >= 201103L +struct Incomplete; +Incomplete f(Incomplete) = delete; // well-formed +#endif +} + namespace dr1399 { // dr1399: dup 1388 template void f(T..., int, T...) {} // expected-note {{candidate}} expected-error 0-1{{C++11}} void g() { From 97590baead08932327b8a4ac2edf4b86628270cc Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 22 May 2022 16:24:13 +0100 Subject: [PATCH 187/908] [LV] Widen ptr-inductions with scalar uses for scalable VFs. Current codegen only supports scalarization of pointer inductions for scalable VFs if they are uniform. After 3bebec659 we now may enter the scalarization code path in VPWidenPointerInductionRecipe::execute for scalable vectors. Fall back to widening for scalable vectors if necessary. This should fix a build failure when bootstrapping LLVM with SVE, e.g. https://lab.llvm.org/buildbot/#/builders/176/builds/1723 --- .../Transforms/Vectorize/LoopVectorize.cpp | 3 +- llvm/lib/Transforms/Vectorize/VPlan.cpp | 11 +- llvm/lib/Transforms/Vectorize/VPlan.h | 3 + .../AArch64/sve-live-out-pointer-induction.ll | 106 ++++++++++++++++++ 4 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 725899535c7dc6..bca760a9caccb2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9610,8 +9610,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { auto *IVR = getParent()->getPlan()->getCanonicalIV(); PHINode *CanonicalIV = cast(State.get(IVR, 0)); - if (all_of(users(), - [this](const VPUser *U) { return U->usesScalars(this); })) { + if (onlyScalarsGenerated(State.VF)) { // This is the normalized GEP that starts counting at zero. Value *PtrInd = State.Builder.CreateSExtOrTrunc( CanonicalIV, IndDesc.getStep()->getType()); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index dff6fac4291de5..17df347e0866f9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1006,9 +1006,7 @@ void VPlan::execute(VPTransformState *State) { auto *WidenPhi = cast(&R); // TODO: Split off the case that all users of a pointer phi are scalar // from the VPWidenPointerInductionRecipe. - if (all_of(WidenPhi->users(), [WidenPhi](const VPUser *U) { - return U->usesScalars(WidenPhi); - })) + if (WidenPhi->onlyScalarsGenerated(State->VF)) continue; auto *GEP = cast(State->get(WidenPhi, 0)); @@ -1517,6 +1515,13 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { + bool IsUniform = vputils::onlyFirstLaneUsed(this); + return all_of(users(), + [&](const VPUser *U) { return U->usesScalars(this); }) && + (IsUniform || !VF.isScalable()); +} + void VPExpandSCEVRecipe::execute(VPTransformState &State) { assert(!State.Instance && "cannot be used in per-lane"); const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5be38dce53dbd3..7f4a5a98d824da 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1245,6 +1245,9 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { /// Generate vector values for the pointer induction. void execute(VPTransformState &State) override; + /// Returns true if only scalar values will be generated. + bool onlyScalarsGenerated(ElementCount VF); + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll new file mode 100644 index 00000000000000..718b2e868fea57 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-vectorize -mtriple=aarch64-unknown -mattr=+sve -S %s | FileCheck %s + +define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START_22:%.*]] = ptrtoint ptr [[START_2:%.*]] to i64 +; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START_22]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP5]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP7]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 8 +; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP9]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 8, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP11]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP16:%.*]] = add [[DOTSPLAT]], [[TMP15]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP16]], shufflevector ( insertelement ( poison, i64 8, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP11]], 1 +; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP18]], i32 0 +; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector [[DOTSPLATINSERT5]], poison, zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP20:%.*]] = add [[DOTSPLAT6]], [[TMP19]] +; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul [[TMP20]], shufflevector ( insertelement ( poison, i64 8, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP7]] +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP29]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 2 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 [[TMP31]] +; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 4 +; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; CHECK-NEXT: [[CAST_CMO:%.*]] = sub i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[CAST_CMO]], 8 +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP36]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV_1:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: store i64 0, ptr [[IV_2]], align 4 +; CHECK-NEXT: [[IV_2_NEXT]] = getelementptr inbounds ptr, ptr [[IV_2]], i64 1 +; CHECK-NEXT: [[IV_1_NEXT]] = getelementptr inbounds ptr, ptr [[IV_1]], i64 1 +; CHECK-NEXT: [[CMP_I_I_NOT_I:%.*]] = icmp eq ptr [[IV_2_NEXT]], [[END]] +; CHECK-NEXT: br i1 [[CMP_I_I_NOT_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi ptr [ [[IV_1]], [[LOOP]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret ptr [[RES_LCSSA]] +; +entry: + br label %loop + +loop: + %iv.1 = phi ptr [ %start.1, %entry ], [ %iv.1.next, %loop ] + %iv.2 = phi ptr [ %start.2, %entry ], [ %iv.2.next, %loop ] + store i64 0, ptr %iv.2 + %iv.2.next = getelementptr inbounds ptr, ptr %iv.2, i64 1 + %iv.1.next = getelementptr inbounds ptr, ptr %iv.1, i64 1 + %cmp.i.i.not.i = icmp eq ptr %iv.2.next, %end + br i1 %cmp.i.i.not.i, label %exit, label %loop + +exit: + %res.lcssa = phi ptr [ %iv.1, %loop ] + ret ptr %res.lcssa +} From e547b04d5b2c20bb5d14e49a86837c77573b267a Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 19 May 2022 17:16:49 +0200 Subject: [PATCH 188/908] [SystemZ] Bugfix for symbolic displacements. Properly handle the case where only the second operand of e.g. an MVC instruction uses a fixup for the displacement. Reviewed By: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D125982 --- .../Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp | 9 +++++++-- llvm/test/MC/SystemZ/fixups.s | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index 8f9f9c66feed22..242f566da2c98a 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -37,6 +37,8 @@ class SystemZMCCodeEmitter : public MCCodeEmitter { const MCInstrInfo &MCII; MCContext &Ctx; + mutable unsigned MemOpsEmitted; + public: SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : MCII(mcii), Ctx(ctx) { @@ -165,6 +167,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, verifyInstructionPredicates(MI, computeAvailableFeatures(STI.getFeatureBits())); + MemOpsEmitted = 0; uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); unsigned Size = MCII.get(MI.getOpcode()).getSize(); // Big-endian insertion of Size bytes. @@ -191,12 +194,14 @@ getDispOpValue(const MCInst &MI, unsigned OpNum, SmallVectorImpl &Fixups, SystemZ::FixupKind Kind) const { const MCOperand &MO = MI.getOperand(OpNum); - if (MO.isImm()) + if (MO.isImm()) { + ++MemOpsEmitted; return static_cast(MO.getImm()); + } if (MO.isExpr()) { // All instructions follow the pattern where the first displacement has a // 2 bytes offset, and the second one 4 bytes. - unsigned ByteOffs = Fixups.size() == 0 ? 2 : 4; + unsigned ByteOffs = MemOpsEmitted++ == 0 ? 2 : 4; Fixups.push_back(MCFixup::create(ByteOffs, MO.getExpr(), (MCFixupKind)Kind, MI.getLoc())); assert(Fixups.size() <= 2 && "More than two memory operands in MI?"); diff --git a/llvm/test/MC/SystemZ/fixups.s b/llvm/test/MC/SystemZ/fixups.s index 25202bb82c1c5b..77c71e3c987b18 100644 --- a/llvm/test/MC/SystemZ/fixups.s +++ b/llvm/test/MC/SystemZ/fixups.s @@ -287,6 +287,11 @@ .align 16 vgeg %v0, src(%v0,%r1), 0 +## Fixup for second operand only +# CHECK: mvc 32(8,%r0), src # encoding: [0xd2,0x07,0x00,0x20,0b0000AAAA,A] +# CHECK-NEXT: # fixup A - offset: 4, value: src, kind: FK_390_12 + .align 16 + mvc 32(8,%r0),src # Data relocs # llvm-mc does not show any "encoding" string for data, so we just check the relocs From aa9acb51f69a862547699c5f7fa5c6f6c1353253 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 20 May 2022 17:31:01 -0400 Subject: [PATCH 189/908] [InstCombine] add tests for icmp + sub patterns; NFC --- .../test/Transforms/InstCombine/icmp-range.ll | 203 ++++++++++++++---- 1 file changed, 166 insertions(+), 37 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 66d32533080475..b67356675eba66 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -1,18 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -S | FileCheck %s ; These should be InstSimplify checks, but most of the code ; is currently only in InstCombine. TODO: move supporting code +declare void @use(i8) +declare void @use_vec(<2 x i8>) + ; Definitely out of range define i1 @test_nonzero(i32* nocapture readonly %arg) { -; CHECK-LABEL:test_nonzero -; CHECK: ret i1 true +; CHECK-LABEL: @test_nonzero( +; CHECK-NEXT: ret i1 true +; %val = load i32, i32* %arg, !range !0 %rval = icmp ne i32 %val, 0 ret i1 %rval } define i1 @test_nonzero2(i32* nocapture readonly %arg) { -; CHECK-LABEL:test_nonzero2 -; CHECK: ret i1 false +; CHECK-LABEL: @test_nonzero2( +; CHECK-NEXT: ret i1 false +; %val = load i32, i32* %arg, !range !0 %rval = icmp eq i32 %val, 0 ret i1 %rval @@ -20,9 +26,12 @@ define i1 @test_nonzero2(i32* nocapture readonly %arg) { ; Potentially in range define i1 @test_nonzero3(i32* nocapture readonly %arg) { -; CHECK-LABEL: test_nonzero3 +; CHECK-LABEL: @test_nonzero3( +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ARG:%.*]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[RVAL:%.*]] = icmp ne i32 [[VAL]], 0 +; CHECK-NEXT: ret i1 [[RVAL]] +; ; Check that this does not trigger - it wouldn't be legal -; CHECK: icmp %val = load i32, i32* %arg, !range !1 %rval = icmp ne i32 %val, 0 ret i1 %rval @@ -30,16 +39,18 @@ define i1 @test_nonzero3(i32* nocapture readonly %arg) { ; Definitely in range define i1 @test_nonzero4(i8* nocapture readonly %arg) { -; CHECK-LABEL: test_nonzero4 -; CHECK: ret i1 false +; CHECK-LABEL: @test_nonzero4( +; CHECK-NEXT: ret i1 false +; %val = load i8, i8* %arg, !range !2 %rval = icmp ne i8 %val, 0 ret i1 %rval } define i1 @test_nonzero5(i8* nocapture readonly %arg) { -; CHECK-LABEL: test_nonzero5 -; CHECK: ret i1 false +; CHECK-LABEL: @test_nonzero5( +; CHECK-NEXT: ret i1 false +; %val = load i8, i8* %arg, !range !2 %rval = icmp ugt i8 %val, 0 ret i1 %rval @@ -47,8 +58,11 @@ define i1 @test_nonzero5(i8* nocapture readonly %arg) { ; Cheaper checks (most values in range meet requirements) define i1 @test_nonzero6(i8* %argw) { -; CHECK-LABEL: test_nonzero6 -; CHECK: icmp ne i8 %val, 0 +; CHECK-LABEL: @test_nonzero6( +; CHECK-NEXT: [[VAL:%.*]] = load i8, i8* [[ARGW:%.*]], align 1, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[RVAL:%.*]] = icmp ne i8 [[VAL]], 0 +; CHECK-NEXT: ret i1 [[RVAL]] +; %val = load i8, i8* %argw, !range !3 %rval = icmp sgt i8 %val, 0 ret i1 %rval @@ -56,8 +70,9 @@ define i1 @test_nonzero6(i8* %argw) { ; Constant not in range, should return true. define i1 @test_not_in_range(i32* nocapture readonly %arg) { -; CHECK-LABEL: test_not_in_range -; CHECK: ret i1 true +; CHECK-LABEL: @test_not_in_range( +; CHECK-NEXT: ret i1 true +; %val = load i32, i32* %arg, !range !0 %rval = icmp ne i32 %val, 6 ret i1 %rval @@ -65,8 +80,11 @@ define i1 @test_not_in_range(i32* nocapture readonly %arg) { ; Constant in range, can not fold. define i1 @test_in_range(i32* nocapture readonly %arg) { -; CHECK-LABEL: test_in_range -; CHECK: icmp ne i32 %val, 3 +; CHECK-LABEL: @test_in_range( +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ARG:%.*]], align 4, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[RVAL:%.*]] = icmp ne i32 [[VAL]], 3 +; CHECK-NEXT: ret i1 [[RVAL]] +; %val = load i32, i32* %arg, !range !0 %rval = icmp ne i32 %val, 3 ret i1 %rval @@ -74,8 +92,9 @@ define i1 @test_in_range(i32* nocapture readonly %arg) { ; Values in range greater than constant. define i1 @test_range_sgt_constant(i32* nocapture readonly %arg) { -; CHECK-LABEL: test_range_sgt_constant -; CHECK: ret i1 true +; CHECK-LABEL: @test_range_sgt_constant( +; CHECK-NEXT: ret i1 true +; %val = load i32, i32* %arg, !range !0 %rval = icmp sgt i32 %val, 0 ret i1 %rval @@ -83,8 +102,9 @@ define i1 @test_range_sgt_constant(i32* nocapture readonly %arg) { ; Values in range less than constant. define i1 @test_range_slt_constant(i32* nocapture readonly %arg) { -; CHECK-LABEL: test_range_slt_constant -; CHECK: ret i1 false +; CHECK-LABEL: @test_range_slt_constant( +; CHECK-NEXT: ret i1 false +; %val = load i32, i32* %arg, !range !0 %rval = icmp sgt i32 %val, 6 ret i1 %rval @@ -92,8 +112,9 @@ define i1 @test_range_slt_constant(i32* nocapture readonly %arg) { ; Values in union of multiple sub ranges not equal to constant. define i1 @test_multi_range1(i32* nocapture readonly %arg) { -; CHECK-LABEL: test_multi_range1 -; CHECK: ret i1 true +; CHECK-LABEL: @test_multi_range1( +; CHECK-NEXT: ret i1 true +; %val = load i32, i32* %arg, !range !4 %rval = icmp ne i32 %val, 0 ret i1 %rval @@ -101,11 +122,14 @@ define i1 @test_multi_range1(i32* nocapture readonly %arg) { ; Values in multiple sub ranges not equal to constant, but in ; union of sub ranges could possibly equal to constant. This -; in theory could also be folded and might be implemented in +; in theory could also be folded and might be implemented in ; the future if shown profitable in practice. define i1 @test_multi_range2(i32* nocapture readonly %arg) { -; CHECK-LABEL: test_multi_range2 -; CHECK: icmp ne i32 %val, 7 +; CHECK-LABEL: @test_multi_range2( +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ARG:%.*]], align 4, !range [[RNG3:![0-9]+]] +; CHECK-NEXT: [[RVAL:%.*]] = icmp ne i32 [[VAL]], 7 +; CHECK-NEXT: ret i1 [[RVAL]] +; %val = load i32, i32* %arg, !range !4 %rval = icmp ne i32 %val, 7 ret i1 %rval @@ -113,8 +137,12 @@ define i1 @test_multi_range2(i32* nocapture readonly %arg) { ; Values' ranges overlap each other, so it can not be simplified. define i1 @test_two_ranges(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) { -; CHECK-LABEL: test_two_ranges -; CHECK: icmp ult i32 %val2, %val1 +; CHECK-LABEL: @test_two_ranges( +; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32* [[ARG1:%.*]], align 4, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[VAL2:%.*]] = load i32, i32* [[ARG2:%.*]], align 4, !range [[RNG5:![0-9]+]] +; CHECK-NEXT: [[RVAL:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]] +; CHECK-NEXT: ret i1 [[RVAL]] +; %val1 = load i32, i32* %arg1, !range !5 %val2 = load i32, i32* %arg2, !range !6 %rval = icmp ult i32 %val2, %val1 @@ -123,8 +151,9 @@ define i1 @test_two_ranges(i32* nocapture readonly %arg1, i32* nocapture readonl ; Values' ranges do not overlap each other, so it can simplified to false. define i1 @test_two_ranges2(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) { -; CHECK-LABEL: test_two_ranges2 -; CHECK: ret i1 false +; CHECK-LABEL: @test_two_ranges2( +; CHECK-NEXT: ret i1 false +; %val1 = load i32, i32* %arg1, !range !0 %val2 = load i32, i32* %arg2, !range !6 %rval = icmp ult i32 %val2, %val1 @@ -133,18 +162,118 @@ define i1 @test_two_ranges2(i32* nocapture readonly %arg1, i32* nocapture readon ; Values' ranges do not overlap each other, so it can simplified to true. define i1 @test_two_ranges3(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) { -; CHECK-LABEL: test_two_ranges3 -; CHECK: ret i1 true +; CHECK-LABEL: @test_two_ranges3( +; CHECK-NEXT: ret i1 true +; %val1 = load i32, i32* %arg1, !range !0 %val2 = load i32, i32* %arg2, !range !6 %rval = icmp ugt i32 %val2, %val1 ret i1 %rval } -!0 = !{i32 1, i32 6} -!1 = !{i32 0, i32 6} -!2 = !{i8 0, i8 1} -!3 = !{i8 0, i8 6} +define i1 @sub_ult_zext(i1 %b, i8 %x, i8 %y) { +; CHECK-LABEL: @sub_ult_zext( +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i1 %b to i8 + %s = sub i8 %x, %y + %r = icmp ult i8 %s, %z + ret i1 %r +} + +define i1 @sub_ult_zext_use1(i1 %b, i8 %x, i8 %y) { +; CHECK-LABEL: @sub_ult_zext_use1( +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: call void @use(i8 [[Z]]) +; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i1 %b to i8 + call void @use(i8 %z) + %s = sub i8 %x, %y + %r = icmp ult i8 %s, %z + ret i1 %r +} + +define <2 x i1> @zext_ugt_sub_use2(<2 x i1> %b, <2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @zext_ugt_sub_use2( +; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> +; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use_vec(<2 x i8> [[S]]) +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[S]], [[Z]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %z = zext <2 x i1> %b to <2 x i8> + %s = sub <2 x i8> %x, %y + call void @use_vec(<2 x i8> %s) + %r = icmp ugt <2 x i8> %z, %s + ret <2 x i1> %r +} + +define i1 @sub_ult_zext_use3(i1 %b, i8 %x, i8 %y) { +; CHECK-LABEL: @sub_ult_zext_use3( +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: call void @use(i8 [[Z]]) +; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i8 [[S]]) +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i1 %b to i8 + call void @use(i8 %z) + %s = sub i8 %x, %y + call void @use(i8 %s) + %r = icmp ult i8 %s, %z + ret i1 %r +} + +define i1 @sub_ule_zext(i1 %b, i8 %x, i8 %y) { +; CHECK-LABEL: @sub_ule_zext( +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[S]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i1 %b to i8 + %s = sub i8 %x, %y + %r = icmp ule i8 %s, %z + ret i1 %r +} + +define <2 x i1> @sub_ult_and(<2 x i8> %b, <2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @sub_ult_and( +; CHECK-NEXT: [[A:%.*]] = and <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[S]], [[A]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %a = and <2 x i8> %b, + %s = sub <2 x i8> %x, %y + %r = icmp ult <2 x i8> %s, %a + ret <2 x i1> %r +} + +define i1 @and_ugt_sub(i8 %b, i8 %x, i8 %y) { +; CHECK-LABEL: @and_ugt_sub( +; CHECK-NEXT: [[A:%.*]] = and i8 [[B:%.*]], 1 +; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[A]], [[S]] +; CHECK-NEXT: ret i1 [[R]] +; + %a = and i8 %b, 1 + %s = sub i8 %x, %y + %r = icmp ugt i8 %a, %s + ret i1 %r +} + +!0 = !{i32 1, i32 6} +!1 = !{i32 0, i32 6} +!2 = !{i8 0, i8 1} +!3 = !{i8 0, i8 6} !4 = !{i32 1, i32 6, i32 8, i32 10} -!5 = !{i32 5, i32 10} -!6 = !{i32 8, i32 16} +!5 = !{i32 5, i32 10} +!6 = !{i32 8, i32 16} From 4069cccf3b4ff4afb743d3d371ead9e2d5491e3a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 22 May 2022 11:02:28 -0400 Subject: [PATCH 190/908] [InstCombine] fold icmp with sub and bool This is the specific pattern seen in #53432, but it can be extended in multiple ways: 1. The 'zext' could be an 'and' 2. The 'sub' could be some other binop with a similar ==0 property (udiv). There might be some way to generalize using knownbits, but that would require checking that the 'bool' value is created with some instruction that can be replaced with new icmp+logic. https://alive2.llvm.org/ce/z/-KCfpa --- .../InstCombine/InstCombineCompares.cpp | 26 +++++++++++++++++++ .../test/Transforms/InstCombine/icmp-range.ll | 17 +++++++----- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 4f20a0699ec50c..e2a6b6b1495b86 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5631,6 +5631,29 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return nullptr; } +/// If one operand of an icmp is effectively a bool (value range of {0,1}), +/// then try to reduce patterns based on that limit. +static Instruction *foldICmpUsingBoolRange(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + const ICmpInst::Predicate Pred = I.getPredicate(); + + Value *X, *Y, *Z; + if (Pred != ICmpInst::ICMP_ULT || !match(Op0, m_Sub(m_Value(X), m_Value(Y)))) + return nullptr; + + unsigned ExtraUses = !Op0->hasOneUse() + !Op1->hasOneUse(); + + // Sub must be 0 and bool must be true for "ULT": + // (sub X, Y) (X == Y) && Z + if (match(Op1, m_ZExt(m_Value(Z))) && ExtraUses < 2) { + Value *EqXY = Builder.CreateICmpEQ(X, Y); + return BinaryOperator::CreateAnd(EqXY, Z); + } + + return nullptr; +} + llvm::Optional> InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C) { @@ -6058,6 +6081,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpWithDominatingICmp(I)) return Res; + if (Instruction *Res = foldICmpUsingBoolRange(I, Builder)) + return Res; + if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index b67356675eba66..a29d4cd81b9d1c 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -173,9 +173,8 @@ define i1 @test_two_ranges3(i32* nocapture readonly %arg1, i32* nocapture readon define i1 @sub_ult_zext(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext( -; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 -; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[R]] ; %z = zext i1 %b to i8 @@ -188,8 +187,8 @@ define i1 @sub_ult_zext_use1(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext_use1( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 ; CHECK-NEXT: call void @use(i8 [[Z]]) -; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B]] ; CHECK-NEXT: ret i1 [[R]] ; %z = zext i1 %b to i8 @@ -201,10 +200,10 @@ define i1 @sub_ult_zext_use1(i1 %b, i8 %x, i8 %y) { define <2 x i1> @zext_ugt_sub_use2(<2 x i1> %b, <2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @zext_ugt_sub_use2( -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> ; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: call void @use_vec(<2 x i8> [[S]]) -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[S]], [[Z]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %z = zext <2 x i1> %b to <2 x i8> @@ -214,6 +213,8 @@ define <2 x i1> @zext_ugt_sub_use2(<2 x i1> %b, <2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %r } +; negative test - too many extra uses + define i1 @sub_ult_zext_use3(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext_use3( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 @@ -231,6 +232,8 @@ define i1 @sub_ult_zext_use3(i1 %b, i8 %x, i8 %y) { ret i1 %r } +; negative test - wrong predicate + define i1 @sub_ule_zext(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ule_zext( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 From cba0ebd576228d11817181a9ebc53a1931bb972b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 22 May 2022 12:13:20 -0400 Subject: [PATCH 191/908] Revert "[InstCombine] fold icmp with sub and bool" This reverts commit 4069cccf3b4ff4afb743d3d371ead9e2d5491e3a. This causes bot failures, and there's a possibly a better way to get this and other patterns. --- .../InstCombine/InstCombineCompares.cpp | 26 ------------------- .../test/Transforms/InstCombine/icmp-range.ll | 17 +++++------- 2 files changed, 7 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index e2a6b6b1495b86..4f20a0699ec50c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5631,29 +5631,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return nullptr; } -/// If one operand of an icmp is effectively a bool (value range of {0,1}), -/// then try to reduce patterns based on that limit. -static Instruction *foldICmpUsingBoolRange(ICmpInst &I, - InstCombiner::BuilderTy &Builder) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - const ICmpInst::Predicate Pred = I.getPredicate(); - - Value *X, *Y, *Z; - if (Pred != ICmpInst::ICMP_ULT || !match(Op0, m_Sub(m_Value(X), m_Value(Y)))) - return nullptr; - - unsigned ExtraUses = !Op0->hasOneUse() + !Op1->hasOneUse(); - - // Sub must be 0 and bool must be true for "ULT": - // (sub X, Y) (X == Y) && Z - if (match(Op1, m_ZExt(m_Value(Z))) && ExtraUses < 2) { - Value *EqXY = Builder.CreateICmpEQ(X, Y); - return BinaryOperator::CreateAnd(EqXY, Z); - } - - return nullptr; -} - llvm::Optional> InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C) { @@ -6081,9 +6058,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpWithDominatingICmp(I)) return Res; - if (Instruction *Res = foldICmpUsingBoolRange(I, Builder)) - return Res; - if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index a29d4cd81b9d1c..b67356675eba66 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -173,8 +173,9 @@ define i1 @test_two_ranges3(i32* nocapture readonly %arg1, i32* nocapture readon define i1 @sub_ult_zext(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] ; CHECK-NEXT: ret i1 [[R]] ; %z = zext i1 %b to i8 @@ -187,8 +188,8 @@ define i1 @sub_ult_zext_use1(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext_use1( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 ; CHECK-NEXT: call void @use(i8 [[Z]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B]] +; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] ; CHECK-NEXT: ret i1 [[R]] ; %z = zext i1 %b to i8 @@ -200,10 +201,10 @@ define i1 @sub_ult_zext_use1(i1 %b, i8 %x, i8 %y) { define <2 x i1> @zext_ugt_sub_use2(<2 x i1> %b, <2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @zext_ugt_sub_use2( +; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> ; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: call void @use_vec(<2 x i8> [[S]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[S]], [[Z]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %z = zext <2 x i1> %b to <2 x i8> @@ -213,8 +214,6 @@ define <2 x i1> @zext_ugt_sub_use2(<2 x i1> %b, <2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %r } -; negative test - too many extra uses - define i1 @sub_ult_zext_use3(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext_use3( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 @@ -232,8 +231,6 @@ define i1 @sub_ult_zext_use3(i1 %b, i8 %x, i8 %y) { ret i1 %r } -; negative test - wrong predicate - define i1 @sub_ule_zext(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ule_zext( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 From cd5f3241c3866da0ba7ede4b96d1a110108214dd Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 23 May 2022 01:03:15 +0900 Subject: [PATCH 192/908] ADT::GenericCycleInfo: Hide validateTree() in -Asserts. validateTree() is instantiated with __FILE__. It will be pruned at link time due to -ffunction-sections but be left in object files. Its user is only GenericCycleInfo::compute() with assert(validateTree()); Therefore I think validateTree() may be hidden with NDEBUG. This is a fixup for https://reviews.llvm.org/D112696 --- llvm/include/llvm/ADT/GenericCycleImpl.h | 2 ++ llvm/include/llvm/ADT/GenericCycleInfo.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h index 494b415cc27946..17f3a793c985b2 100644 --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -326,6 +326,7 @@ auto GenericCycleInfo::getCycle(const BlockT *Block) const return nullptr; } +#ifndef NDEBUG /// \brief Validate the internal consistency of the cycle tree. /// /// Note that this does \em not check that cycles are really cycles in the CFG, @@ -391,6 +392,7 @@ bool GenericCycleInfo::validateTree() const { return true; } +#endif /// \brief Print the cycle info. template diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h index 7228559dfc335c..5c2190411c1e04 100644 --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -249,7 +249,9 @@ template class GenericCycleInfo { /// Methods for debug and self-test. //@{ +#ifndef NDEBUG bool validateTree() const; +#endif void print(raw_ostream &Out) const; void dump() const { print(dbgs()); } //@} From c230ab6db8b415987736377c91061585efea7bf9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 22 May 2022 18:20:33 +0100 Subject: [PATCH 193/908] [LV] Re-generate check lines for loop-form.ll test. --- .../Transforms/LoopVectorize/loop-form.ll | 152 +++++++++--------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index f032b5c2772592..82b32324d7050f 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -59,28 +59,28 @@ define void @bottom_tested(i16* %p, i32 %n) { ; TAILFOLD: vector.body: ; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] -; TAILFOLD-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; TAILFOLD-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> -; TAILFOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 -; TAILFOLD-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; TAILFOLD-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; TAILFOLD-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> +; TAILFOLD-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; TAILFOLD-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; TAILFOLD: pred.store.if: -; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; TAILFOLD-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] -; TAILFOLD-NEXT: store i16 0, i16* [[TMP7]], align 4 +; TAILFOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; TAILFOLD-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] +; TAILFOLD-NEXT: store i16 0, i16* [[TMP5]], align 4 ; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] ; TAILFOLD: pred.store.continue: -; TAILFOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; TAILFOLD-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] +; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; TAILFOLD-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; TAILFOLD: pred.store.if1: -; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 -; TAILFOLD-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]] -; TAILFOLD-NEXT: store i16 0, i16* [[TMP10]], align 4 +; TAILFOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; TAILFOLD-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP7]] +; TAILFOLD-NEXT: store i16 0, i16* [[TMP8]], align 4 ; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] ; TAILFOLD: pred.store.continue2: ; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; TAILFOLD-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; TAILFOLD-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TAILFOLD: middle.block: ; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] ; TAILFOLD: scalar.ph: @@ -129,14 +129,14 @@ define void @early_exit(i16* %p, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -263,14 +263,14 @@ define void @multiple_unique_exit(i16* %p, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -345,14 +345,14 @@ define i32 @multiple_unique_exit2(i16* %p, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -429,14 +429,14 @@ define i32 @multiple_unique_exit3(i16* %p, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -514,14 +514,14 @@ define i32 @multiple_exit_blocks(i16* %p, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -603,14 +603,14 @@ define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -697,16 +697,16 @@ define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* -; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* +; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1128,22 +1128,22 @@ define i32 @me_reduction(i32* %addr) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP4]]) ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] @@ -1152,8 +1152,8 @@ define i32 @me_reduction(i32* %addr) { ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4 -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[GEP]], align 4 +; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP7]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 ; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]] From 145fe571061b48eaedbd79939bb2dc28267e9a0d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 22 May 2022 18:27:41 +0100 Subject: [PATCH 194/908] [LV] Use exiting block instead of latch in addUsersInExitBlock. The latch may not be the exiting block. Use the exiting block instead when looking up the incoming value of the LCSSA phi node. This fixes a crash with early-exit loops. --- .../Transforms/Vectorize/LoopVectorize.cpp | 5 +- .../Transforms/LoopVectorize/loop-form.ll | 106 +++++++++++++++--- 2 files changed, 93 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index bca760a9caccb2..f24622ee620a23 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8703,14 +8703,15 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, VPBasicBlock *MiddleVPBB, Loop *OrigLoop, VPlan &Plan) { BasicBlock *ExitBB = OrigLoop->getUniqueExitBlock(); + BasicBlock *ExitingBB = OrigLoop->getExitingBlock(); // Only handle single-exit loops with unique exit blocks for now. - if (!ExitBB || !ExitBB->getSinglePredecessor()) + if (!ExitBB || !ExitBB->getSinglePredecessor() || !ExitingBB) return; // Introduce VPUsers modeling the exit values. for (PHINode &ExitPhi : ExitBB->phis()) { Value *IncomingValue = - ExitPhi.getIncomingValueForBlock(OrigLoop->getLoopLatch()); + ExitPhi.getIncomingValueForBlock(ExitingBB); VPValue *V = Plan.getOrAddVPValue(IncomingValue, true); Plan.addLiveOut(&ExitPhi, V); } diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 82b32324d7050f..1ca0dfcc54531d 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -190,6 +190,80 @@ if.end: ret void } +define i32 @early_exit_with_live_out(i32* %ptr) { +; CHECK-LABEL: @early_exit_with_live_out( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> , <2 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 998, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: store i32 10, i32* [[GEP]], align 4 +; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ] +; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; +; TAILFOLD-LABEL: @early_exit_with_live_out( +; TAILFOLD-NEXT: entry: +; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] +; TAILFOLD: loop.header: +; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[IV]] +; TAILFOLD-NEXT: [[L:%.*]] = load i32, i32* [[GEP]], align 4 +; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; TAILFOLD-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; TAILFOLD-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]] +; TAILFOLD: loop.latch: +; TAILFOLD-NEXT: store i32 10, i32* [[GEP]], align 4 +; TAILFOLD-NEXT: br label [[LOOP_HEADER]] +; TAILFOLD: exit: +; TAILFOLD-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ] +; TAILFOLD-NEXT: ret i32 [[RES_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep = getelementptr i32, i32* %ptr, i64 %iv + %l = load i32, i32* %gep + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %exit, label %loop.latch + +loop.latch: + store i32 10, i32* %gep + br label %loop.header + +exit: + %res.lcssa = phi i32 [ %l, %loop.header ] + ret i32 %res.lcssa +} + ; Same as early_exit, but with optsize to prevent the use of ; a scalar epilogue. -- Can't vectorize this in either case. define void @optsize(i16* %p, i32 %n) optsize { @@ -270,7 +344,7 @@ define void @multiple_unique_exit(i16* %p, i32 %n) { ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -286,7 +360,7 @@ define void @multiple_unique_exit(i16* %p, i32 %n) { ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: if.end: ; CHECK-NEXT: ret void ; @@ -352,7 +426,7 @@ define i32 @multiple_unique_exit2(i16* %p, i32 %n) { ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -368,7 +442,7 @@ define i32 @multiple_unique_exit2(i16* %p, i32 %n) { ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: if.end: ; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] ; CHECK-NEXT: ret i32 [[I_LCSSA]] @@ -436,7 +510,7 @@ define i32 @multiple_unique_exit3(i16* %p, i32 %n) { ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -452,7 +526,7 @@ define i32 @multiple_unique_exit3(i16* %p, i32 %n) { ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: if.end: ; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[EXIT]] @@ -521,7 +595,7 @@ define i32 @multiple_exit_blocks(i16* %p, i32 %n) { ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -537,7 +611,7 @@ define i32 @multiple_exit_blocks(i16* %p, i32 %n) { ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: if.end: ; CHECK-NEXT: ret i32 0 ; CHECK: if.end2: @@ -610,7 +684,7 @@ define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -626,7 +700,7 @@ define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: if.end: ; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] ; CHECK-NEXT: ret i32 [[I_LCSSA]] @@ -706,7 +780,7 @@ define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -722,7 +796,7 @@ define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: if.end: ; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] ; CHECK-NEXT: ret i32 [[I_LCSSA]] @@ -1049,7 +1123,7 @@ define void @scalar_predication(float* %addr) { ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1069,7 +1143,7 @@ define void @scalar_predication(float* %addr) { ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1137,7 +1211,7 @@ define i32 @me_reduction(i32* %addr) { ; CHECK-NEXT: [[TMP4]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP4]]) ; CHECK-NEXT: br label [[SCALAR_PH]] @@ -1156,7 +1230,7 @@ define i32 @me_reduction(i32* %addr) { ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP7]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 -; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] ; CHECK-NEXT: ret i32 [[LCSSA]] From 202a4fde2ba92d70f1eb8760e1919b6ab19f0ced Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Sun, 22 May 2022 13:36:10 -0400 Subject: [PATCH 195/908] Test more C DR conformance (part two of many) This continues the work started earlier at filling our the C DR status page based on test coverage. --- clang/test/C/drs/dr060.c | 17 ++++ clang/test/C/drs/dr0xx.c | 196 +++++++++++++++++++++++++++++++++++-- clang/www/c_dr_status.html | 51 +++++----- 3 files changed, 233 insertions(+), 31 deletions(-) create mode 100644 clang/test/C/drs/dr060.c diff --git a/clang/test/C/drs/dr060.c b/clang/test/C/drs/dr060.c new file mode 100644 index 00000000000000..2650871e5dd6a4 --- /dev/null +++ b/clang/test/C/drs/dr060.c @@ -0,0 +1,17 @@ +/* RUN: %clang_cc1 -std=c89 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c99 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c11 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c17 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c2x -emit-llvm -o - %s | FileCheck %s + */ + +/* WG14 DR060: + * Array initialization from a string literal + */ + +const char str[5] = "foo"; +const __typeof__(*L"a") wstr[5] = L"foo"; + +// CHECK: @str = {{.*}}constant [5 x i8] c"foo\00\00" +// CHECK-NEXT: @wstr = {{.*}}constant [5 x i{{16|32}}] [i{{16|32}} 102, i{{16|32}} 111, i{{16|32}} 111, i{{16|32}} 0, i{{16|32}} 0] + diff --git a/clang/test/C/drs/dr0xx.c b/clang/test/C/drs/dr0xx.c index 4046765fa97ed6..b0b57fa2891497 100644 --- a/clang/test/C/drs/dr0xx.c +++ b/clang/test/C/drs/dr0xx.c @@ -1,8 +1,9 @@ -/* RUN: %clang_cc1 -std=c89 -verify=expected,c89 -pedantic -Wno-declaration-after-statement -Wno-c11-extensions %s - RUN: %clang_cc1 -std=c99 -verify -pedantic -Wno-c11-extensions %s - RUN: %clang_cc1 -std=c11 -verify -pedantic %s - RUN: %clang_cc1 -std=c17 -verify -pedantic %s - RUN: %clang_cc1 -std=c2x -verify -pedantic %s +/* RUN: %clang_cc1 -std=c89 -verify=expected,c89only -pedantic -Wno-declaration-after-statement -Wno-c11-extensions %s + RUN: %clang_cc1 -std=c89 -verify=expected,c89only -pedantic -Wno-declaration-after-statement -Wno-c11-extensions -fno-signed-char %s + RUN: %clang_cc1 -std=c99 -verify=expected,c99untilc2x -pedantic -Wno-c11-extensions %s + RUN: %clang_cc1 -std=c11 -verify=expected,c99untilc2x -pedantic %s + RUN: %clang_cc1 -std=c17 -verify=expected,c99untilc2x -pedantic %s + RUN: %clang_cc1 -std=c2x -verify=expected,c2xandup -pedantic %s */ /* The following are DRs which do not require tests to demonstrate @@ -28,6 +29,34 @@ * WG14 DR033: yes * Conformance questions around 'shall' violations outside of constraints * sections + * + * WG14 DR036: yes + * May floating-point constants be represented with more precision than implied + * by its type? + * + * WG14 DR037: yes + * Questions about multibyte characters and Unicode + * + * WG14 DR051: yes + * Question on pointer arithmetic + * + * WG14 DR052: yes + * Editorial corrections + * + * WG14 DR056: yes + * Floating-point representation precision requirements + * + * WG14 DR057: yes + * Is there an integral type for every pointer? + * + * WG14 DR059: yes + * Do types have to be completed? + * + * WG14 DR063: dup 056 + * Floating-point representation precision requirements + * + * WG14 DR067: yes + * Integer and integral type confusion */ @@ -49,11 +78,16 @@ struct dr007_a; struct dr007_b {int a;}; struct dr007_b; + /* WG14 DR009: no * Use of typedef names in parameter declarations + * + * FIXME: This should be diagnosed as expecting a declaration specifier instead + * of treated as declaring a parameter of type 'int (*)(dr009_t);' */ typedef int dr009_t; -void dr009_f(int dr009_t); +void dr009_f((dr009_t)); /* c99untilc2x-error {{type specifier missing, defaults to 'int'; ISO C99 and later do not support implicit int}} + c2xandup-error {{a type specifier is required for all declarations}} */ /* WG14 DR010: * Is a typedef to an incomplete type legal? @@ -67,6 +101,12 @@ int dr010_c = sizeof(dr010_t); /* expected-error {{invalid application of 'sizeo * Merging of declarations for linked identifier * * Note: more of this DR is tested in dr011.c + * + * WG14 DR034: yes + * External declarations in different scopes + * + * Note: DR034 has a question resolved by DR011 and another question where the + * result is UB. */ static int dr011_a[]; /* expected-warning {{tentative array definition assumed to have one element}} */ void dr011(void) { @@ -96,7 +136,7 @@ void dr011(void) { */ void dr012(void *p) { /* The behavior changed between C89 and C99. */ - (void)&*p; /* c89-warning {{ISO C forbids taking the address of an expression of type 'void'}} */ + (void)&*p; /* c89only-warning {{ISO C forbids taking the address of an expression of type 'void'}} */ } /* WG14 DR013: yes @@ -158,10 +198,150 @@ void dr031(int i) { } } -/* WG21 DR032: no +/* WG14 DR032: no * Must implementations diagnose extensions to the constant evaluation rules? * * This should issue a diagnostic because a constant-expression is a * conditional-expression, which excludes the comma operator. */ int dr032 = (1, 2); /* expected-warning {{left operand of comma operator has no effect}} */ + +#if __STDC_VERSION__ < 202000L +/* WG14 DR035: partial + * Questions about definition of functions without a prototype + */ +void dr035_1(a, b) /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} */ + int a(enum b {x, y}); /* expected-warning {{declaration of 'enum b' will not be visible outside of this function}} */ + int b; { + int test = x; /* expected-error {{use of undeclared identifier 'x'}} */ +} + +void dr035_2(c) /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} */ + enum m{q, r} c; { /* expected-warning {{declaration of 'enum m' will not be visible outside of this function}} */ + /* FIXME: This should be accepted because the scope of m, q, and r ends at + * the closing brace of the function per C89 6.1.2.1. + */ + int test = q; /* expected-error {{use of undeclared identifier 'q'}} */ +} +#endif /* __STDC_VERSION__ < 202000L */ + +/* WG14 DR038: yes + * Questions about argument substitution during macro expansion + */ +#define DR038_X 0x000E +#define DR038_Y 0x0100 +#define DR038(a) a +_Static_assert(DR038(DR038_X + DR038_Y) == DR038_X + DR038_Y, "fail"); + +/* WG14 DR039: yes + * Questions about the "C" locale + */ +_Static_assert(sizeof('a') == sizeof(int), "fail"); + +/* WG14 DR043: yes + * On the definition of the NULL macro + */ +void dr043(void) { + #include + /* NULL has to be an integer constant expression with the value 0, or such an + * expression cast to void *. If it's an integer constant expression other + * than the literal 0 (such as #define NULL 4-4), this would fail to compile + * unless the macro replacement list is properly parenthesized as it would + * expand to: (void)(void *)4-4; + */ + (void)(void *)NULL; + + /* If the NULL macro is an integer constant expression with the value 0 and + * it has been cast to void *, ensure that it's also fully parenthesized. If + * it isn't (such as #define NULL (void *)0), this would fail to compile as + * would expand to (void *)0->a; which gives a diagnostic about int not being + * a pointer, instead of((void *)0)->a; which gives a diagnostic about the + * base reference being void and not a structure. + */ + NULL->a; /* expected-error {{member reference base type 'void' is not a structure or union}} */ +} + +/* WG14 DR044: yes + * On the result of the offsetof macro + */ +void dr044(void) { + #include + struct S { int a, b; }; + /* Ensure that the result of offsetof is usable in a constant expression. */ + _Static_assert(offsetof(struct S, b) == sizeof(int), "fail"); +} + +/* WG14 DR046: yes + * Use of typedef names in parameter declarations + */ +typedef int dr046_t; +int dr046(int dr046_t) { return dr046_t; } + +/* WG14 DR047: yes + * Questions about declaration conformance + */ +struct dr047_t; /* expected-note 2 {{forward declaration of 'struct dr047_t'}} */ +struct dr047_t *dr047_1(struct dr047_t *p) {return p; } +struct dr047_t *dr047_2(struct dr047_t a[]) {return a; } /* expected-error {{array has incomplete element type 'struct dr047_t'}} */ +int *dr047_3(int a2[][]) {return *a2; } /* expected-error {{array has incomplete element type 'int[]'}} */ +extern struct dr047_t es1; +extern struct dr047_t es2[1]; /* expected-error {{array has incomplete element type 'struct dr047_t'}} */ + +/* WG14 DR050: yes + * Do wide string literals implicitly include ? + */ +void dr050(void) { + /* The NULL macro is previously defined because we include for + * other tests. Undefine the macro to demonstrate that use of a wide string + * literal doesn't magically include the header file. + */ + #undef NULL + (void)L"huttah!"; + (void)NULL; /* expected-error {{use of undeclared identifier 'NULL'}} */ +} + +#if __STDC_VERSION__ < 202000L +/* WG14 DR053: yes + * Accessing a pointer to a function with a prototype through a pointer to + * pointer to function without a prototype + */ +void dr053(void) { + int f(int); + int (*fp1)(int); + int (*fp2)(); /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C}} */ + int (**fpp)(); /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C}} */ + + fp1 = f; + fp2 = fp1; + (*fp2)(3); /* expected-warning {{passing arguments to a function without a prototype is deprecated in all versions of C and is not supported in C2x}} */ + fpp = &fp1; + (**fpp)(3); /* expected-warning {{passing arguments to a function without a prototype is deprecated in all versions of C and is not supported in C2x}} */ +} +#endif /* __STDC_VERSION__ < 202000L */ + +/* WG14 DR064: yes + * Null pointer constants + */ +char *dr064_1(int i, int *pi) { + *pi = i; + return 0; +} + +char *dr064_2(int i, int *pi) { + return (*pi = i, 0); /* expected-warning {{incompatible integer to pointer conversion returning 'int' from a function with result type 'char *'}} */ +} + +/* WG14 DR068: yes + * 'char' and signed vs unsigned integer types + */ +void dr068(void) { + #include + +#if CHAR_MAX == SCHAR_MAX + /* char is signed */ + _Static_assert('\xFF' == -1, "fail"); +#else + /* char is unsigned */ + _Static_assert('\xFF' == 0xFF, "fail"); +#endif +} diff --git a/clang/www/c_dr_status.html b/clang/www/c_dr_status.html index 05d85790cb9205..49783ed4b4eac3 100644 --- a/clang/www/c_dr_status.html +++ b/clang/www/c_dr_status.html @@ -250,37 +250,42 @@

C defect report implementation status

34 C89 External declarations in different scopes - Unknown + Yes 35 NAD Questions about definition of functions without a prototype - Unknown + +
Partial + Tags declared directly within an identifier list are incorrectly scoped + to the prototype rather than to the function body. +
+ 36 NAD May floating-point constants be represented with more precision than implied by its type? - Unknown + Yes 37 NAD Questions about multibyte characters and Unicode - Unknown + Yes 38 NAD Questions about argument substitution during macro expansion - Unknown + Yes 39 NAD Questions about the "C" locale - Unknown + Yes 40 @@ -304,13 +309,13 @@

C defect report implementation status

43 C89 On the definition of the NULL macro - Unknown + Yes 44 NAD On the result of the offsetof macro - Unknown + Yes 45 @@ -322,13 +327,13 @@

C defect report implementation status

46 NAD Use of typedef names in parameter declarations - Unknown + Yes 47 NAD Questions about declaration conformance - Unknown + Yes 48 @@ -346,25 +351,25 @@

C defect report implementation status

50 NAD Do wide string literals implicitly include <stddef.h>? - Unknown + Yes 51 NAD Question on pointer arithmetic - Unknown + Yes 52 C89 Editorial corrections - Unknown + Yes 53 C89 Accessing a pointer to a function with a prototype through a pointer to pointer to function without a prototype - Unknown + Yes 54 @@ -382,13 +387,13 @@

C defect report implementation status

56 NAD Floating-point representation precision requirements - Unknown + Yes 57 NAD Is there an integral type for every pointer? - Unknown + Yes 58 @@ -400,13 +405,13 @@

C defect report implementation status

59 NAD Do types have to be completed? - Unknown + Yes 60 C89 Array initialization from a string literal - Unknown + Yes 61 @@ -424,13 +429,13 @@

C defect report implementation status

63 Dup Floating-point representation precision requirements - Duplicate of 56 + Duplicate of 56 64 NAD Null pointer constants - Unknown + Yes 65 @@ -442,19 +447,19 @@

C defect report implementation status

66 NAD Another question on locales - Unknown + N/A 67 NAD Integer and integral type confusion - Unknown + Yes 68 NAD 'char' and signed vs unsigned integer types - Unknown + Yes 69 From 419e49621fb4b7cc2a46b09cd04020c083f7d64b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 22 May 2022 20:11:47 +0100 Subject: [PATCH 196/908] [LV] Add check line to test interleaving only with induction cast. Also simplify the value names a bit in the test. --- .../LoopVectorize/cast-induction.ll | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index 9e7fd7f6f93286..7b9ba15d593330 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S %s | FileCheck --check-prefix=VF4 %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S %s | FileCheck --check-prefix=IC2 %s ; rdar://problem/12848162 @@ -6,23 +7,33 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 @a = common global [2048 x i32] zeroinitializer, align 16 -;CHECK-LABEL: @example12( -; CHECK-LABEL: vector.body: -; CHECK: [[VEC_IND:%.+]] = phi <4 x i32> -; CHECK: store <4 x i32> [[VEC_IND]] -; CHECK: ret void define void @example12() { +; VF4-LABEL: @example12( +; VF4-LABEL: vector.body: +; VF4: [[VEC_IND:%.+]] = phi <4 x i32> +; VF4: store <4 x i32> [[VEC_IND]] +; VF4: middle.block: +; +; IC2-LABEL: @example12( +; IC2-LABEL: vector.body: +; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ] +; IC2-NEXT: [[TRUNC:%.+]] = trunc i64 [[INDEX]] to i32 +; IC2-NEXT: [[TRUNC0:%.+]] = add i32 [[TRUNC]], 0 +; IC2-NEXT: [[TRUNC1:%.+]] = add i32 [[TRUNC]], 1 +; IC2: store i32 [[TRUNC0]], +; IC2-NEXT: store i32 [[TRUNC1]], +; entry: br label %loop loop: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ] - %gep = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv - %iv.trunc = trunc i64 %indvars.iv to i32 + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %iv + %iv.trunc = trunc i64 %iv to i32 store i32 %iv.trunc, i32* %gep, align 4 - %indvars.iv.next = add i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, 1024 + %iv.next = add i64 %iv, 1 + %iv.next.trunc = trunc i64 %iv.next to i32 + %exitcond = icmp eq i32 %iv.next.trunc, 1024 br i1 %exitcond, label %exit, label %loop exit: From 768a1ca5eccb678947f4155e38a5f5744dcefb56 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 22 May 2022 12:47:27 -0700 Subject: [PATCH 197/908] [SelectionDAG] Fold abs(undef) to 0 instead of undef. abs should only produce a positive value or the signed minimum value. This means we can't fold abs(undef) to undef as that would allow more values. Fold to 0 instead to match InstSimplify. Fixes test mentioned in comment on pr55271. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D126174 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- llvm/test/CodeGen/X86/pr55271.ll | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr55271.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6ddf1ceef647e2..09c9d4179ae13a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5243,7 +5243,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isInteger() && VT == Operand.getValueType() && "Invalid ABS!"); if (OpOpcode == ISD::UNDEF) - return getUNDEF(VT); + return getConstant(0, DL, VT); break; case ISD::BSWAP: assert(VT.isInteger() && VT == Operand.getValueType() && diff --git a/llvm/test/CodeGen/X86/pr55271.ll b/llvm/test/CodeGen/X86/pr55271.ll new file mode 100644 index 00000000000000..3ffa6cf4acb802 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr55271.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +; abs(undef) should fold to 0 not undef. + +declare i32 @llvm.abs.i32(i32, i1 immarg) #0 + +define i32 @abs(i32 %0) { +; CHECK-LABEL: abs: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %2 = call i32 @llvm.abs.i32(i32 undef, i1 false) + ret i32 %2 +} From c11051a4001c7f89e8655f1776a75110a562a45e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 22 May 2022 14:13:30 -0700 Subject: [PATCH 198/908] [SelectionDAG] Add a freeze to ISD::ABS expansion. I had initially assumed this was the problem with https://github.com/llvm/llvm-project/issues/55271#issuecomment-1133426243 But it turns out that was a simpler issue. This patch is still more correct than what we were doing before so figured I'd submit it anyway. No test case because I'm not sure how to get an undef around until expansion. Looking at the test deltas I wonder if it be valid to combine (sext_inreg (freeze (aextload X))) -> (freeze (sextload X)). Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D126175 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 3 ++ llvm/test/CodeGen/WebAssembly/PR41149.ll | 32 +++++++++++-------- llvm/test/CodeGen/X86/iabs.ll | 4 +-- llvm/test/CodeGen/X86/neg-abs.ll | 10 +++--- .../Inputs/basic.ll.expected | 4 +-- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 35d289d955cc9b..5af1cd8d4f5350 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7855,6 +7855,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); + Op = DAG.getFreeze(Op); return DAG.getNode(ISD::UMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } @@ -7862,6 +7863,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, // 0 - abs(x) -> smin(x, sub(0,x)) if (IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMIN, VT)) { + Op = DAG.getFreeze(Op); SDValue Zero = DAG.getConstant(0, dl, VT); return DAG.getNode(ISD::SMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); @@ -7875,6 +7877,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return SDValue(); + Op = DAG.getFreeze(Op); SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); diff --git a/llvm/test/CodeGen/WebAssembly/PR41149.ll b/llvm/test/CodeGen/WebAssembly/PR41149.ll index 7ee99e1aff8160..428f84979d89eb 100644 --- a/llvm/test/CodeGen/WebAssembly/PR41149.ll +++ b/llvm/test/CodeGen/WebAssembly/PR41149.ll @@ -1,22 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=wasm32-unknown-unknown | FileCheck %s ; Regression test for PR41149. define void @mod() { ; CHECK-LABEL: mod: -; CHECK-NEXT: .functype mod () -> () -; CHECK: local.get 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.load8_s 0 -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.const 7 -; CHECK-NEXT: i32.shr_s -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: i32.xor -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.sub -; CHECK-NEXT: i32.store8 0 +; CHECK: .functype mod () -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.load8_u 0 +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 24 +; CHECK-NEXT: i32.shl +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32.shr_s +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: i32.xor +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: i32.store8 0 +; CHECK-NEXT: # fallthrough-return %tmp = load <4 x i8>, <4 x i8>* undef %tmp2 = icmp slt <4 x i8> %tmp, zeroinitializer %tmp3 = sub <4 x i8> zeroinitializer, %tmp diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index 2df88dbc22f74d..c3cb3b30aa0605 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -37,8 +37,8 @@ define i8 @test_i8(i8 %a) nounwind { define i16 @test_i16(i16 %a) nounwind { ; X86-NO-CMOV-LABEL: test_i16: ; X86-NO-CMOV: # %bb.0: -; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X86-NO-CMOV-NEXT: movl %eax, %ecx +; X86-NO-CMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: movswl %ax, %ecx ; X86-NO-CMOV-NEXT: sarl $15, %ecx ; X86-NO-CMOV-NEXT: xorl %ecx, %eax ; X86-NO-CMOV-NEXT: subl %ecx, %eax diff --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll index 429d0a3dc5d27d..84f82b7a694c35 100644 --- a/llvm/test/CodeGen/X86/neg-abs.ll +++ b/llvm/test/CodeGen/X86/neg-abs.ll @@ -34,8 +34,8 @@ define i8 @neg_abs_i8(i8 %x) nounwind { define i16 @neg_abs_i16(i16 %x) nounwind { ; X86-LABEL: neg_abs_i16: ; X86: # %bb.0: -; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movswl %cx, %eax ; X86-NEXT: sarl $15, %eax ; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax @@ -108,8 +108,8 @@ define i128 @neg_abs_i128(i128 %x) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: xorl %ecx, %edx @@ -178,8 +178,8 @@ define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind { define i16 @sub_abs_i16(i16 %x, i16 %y) nounwind { ; X86-LABEL: sub_abs_i16: ; X86: # %bb.0: -; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movswl %cx, %eax ; X86-NEXT: sarl $15, %eax ; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected index cf85117ec76de6..6e7d02bd82ed18 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected @@ -31,8 +31,8 @@ define i8 @test_i8(i8 %a) nounwind { define i16 @test_i16(i16 %a) nounwind { ; X86-NO-CMOV-LABEL: test_i16: ; X86-NO-CMOV: # %bb.0: -; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X86-NO-CMOV-NEXT: movl %eax, %ecx +; X86-NO-CMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: movswl %ax, %ecx ; X86-NO-CMOV-NEXT: sarl $15, %ecx ; X86-NO-CMOV-NEXT: xorl %ecx, %eax ; X86-NO-CMOV-NEXT: subl %ecx, %eax From 10c9ecce9f6096e18222a331c5e7d085bd813f75 Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Fri, 20 May 2022 08:48:52 +0000 Subject: [PATCH 199/908] [mlir][NFC] Replace some nested if with logical and. This patch replaces some nested if statement with logical and to reduce the nesting depth. Differential Revision: https://reviews.llvm.org/D126050 --- .../Dialect/Arithmetic/IR/ArithmeticOps.cpp | 59 +++++++++---------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index 7db71a636c9d7a..04a41acc041bca 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -1296,20 +1296,16 @@ OpFoldResult arith::CmpIOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_Zero())) { if (auto extOp = getLhs().getDefiningOp()) { - if (extOp.getOperand().getType().cast().getWidth() == 1) { - // extsi(%x : i1 -> iN) != 0 -> %x - if (getPredicate() == arith::CmpIPredicate::ne) { - return extOp.getOperand(); - } - } + // extsi(%x : i1 -> iN) != 0 -> %x + if (extOp.getOperand().getType().cast().getWidth() == 1 && + getPredicate() == arith::CmpIPredicate::ne) + return extOp.getOperand(); } if (auto extOp = getLhs().getDefiningOp()) { - if (extOp.getOperand().getType().cast().getWidth() == 1) { - // extui(%x : i1 -> iN) != 0 -> %x - if (getPredicate() == arith::CmpIPredicate::ne) { - return extOp.getOperand(); - } - } + // extui(%x : i1 -> iN) != 0 -> %x + if (extOp.getOperand().getType().cast().getWidth() == 1 && + getPredicate() == arith::CmpIPredicate::ne) + return extOp.getOperand(); } } @@ -1733,24 +1729,24 @@ struct SelectToExtUI : public OpRewritePattern { return failure(); // select %x, c1, %c0 => extui %arg - if (matchPattern(op.getTrueValue(), m_One())) - if (matchPattern(op.getFalseValue(), m_Zero())) { - rewriter.replaceOpWithNewOp(op, op.getType(), - op.getCondition()); - return success(); - } + if (matchPattern(op.getTrueValue(), m_One()) && + matchPattern(op.getFalseValue(), m_Zero())) { + rewriter.replaceOpWithNewOp(op, op.getType(), + op.getCondition()); + return success(); + } // select %x, c0, %c1 => extui (xor %arg, true) - if (matchPattern(op.getTrueValue(), m_Zero())) - if (matchPattern(op.getFalseValue(), m_One())) { - rewriter.replaceOpWithNewOp( - op, op.getType(), - rewriter.create( - op.getLoc(), op.getCondition(), - rewriter.create( - op.getLoc(), 1, op.getCondition().getType()))); - return success(); - } + if (matchPattern(op.getTrueValue(), m_Zero()) && + matchPattern(op.getFalseValue(), m_One())) { + rewriter.replaceOpWithNewOp( + op, op.getType(), + rewriter.create( + op.getLoc(), op.getCondition(), + rewriter.create( + op.getLoc(), 1, op.getCondition().getType()))); + return success(); + } return failure(); } @@ -1778,10 +1774,9 @@ OpFoldResult arith::SelectOp::fold(ArrayRef operands) { return falseVal; // select %x, true, false => %x - if (getType().isInteger(1)) - if (matchPattern(getTrueValue(), m_One())) - if (matchPattern(getFalseValue(), m_Zero())) - return condition; + if (getType().isInteger(1) && matchPattern(getTrueValue(), m_One()) && + matchPattern(getFalseValue(), m_Zero())) + return condition; if (auto cmp = dyn_cast_or_null(condition.getDefiningOp())) { auto pred = cmp.getPredicate(); From b86440ecde5c1dec5b898a3f1bc08ab9853d5ed9 Mon Sep 17 00:00:00 2001 From: "Zi Xuan Wu (Zeson)" Date: Fri, 20 May 2022 15:11:43 +0800 Subject: [PATCH 200/908] [CSKY] Fix the conflict of default fpu features and -mfpu option The arch or cpu has its default fpu features and versions such as fpuv2_sf/fpuv3_sf. And there is also -mfpu option to specify and override fpu version and features. For example, C860 has fpuv3_sf/fpuv3_df feature as default, when -mfpu=fpv2 is given, fpuv3_sf/fpuv3_df is replaced with fpuv2_sf/fpuv2_df. --- clang/lib/Basic/Targets/CSKY.cpp | 31 ++++++-- clang/lib/Basic/Targets/CSKY.h | 16 ++-- clang/lib/Driver/ToolChains/Arch/CSKY.cpp | 26 +++++-- clang/test/Driver/csky-mfpu.c | 93 +++++++++++++++++++++++ llvm/lib/Support/CSKYTargetParser.cpp | 3 + 5 files changed, 153 insertions(+), 16 deletions(-) create mode 100644 clang/test/Driver/csky-mfpu.c diff --git a/clang/lib/Basic/Targets/CSKY.cpp b/clang/lib/Basic/Targets/CSKY.cpp index 40004da9825945..adcffd90ae7800 100644 --- a/clang/lib/Basic/Targets/CSKY.cpp +++ b/clang/lib/Basic/Targets/CSKY.cpp @@ -95,17 +95,36 @@ void CSKYTargetInfo::getTargetDefines(const LangOptions &Opts, } } +bool CSKYTargetInfo::hasFeature(StringRef Feature) const { + return llvm::StringSwitch(Feature) + .Case("hard-float", HardFloat) + .Case("hard-float-abi", HardFloatABI) + .Case("fpuv2_sf", FPUV2_SF) + .Case("fpuv2_df", FPUV2_DF) + .Case("fpuv3_sf", FPUV3_SF) + .Case("fpuv3_df", FPUV3_DF) + .Case("vdspv2", VDSPV2) + .Case("dspv2", DSPV2) + .Case("vdspv1", VDSPV1) + .Case("3e3r1", is3E3R1) + .Default(false); +} + bool CSKYTargetInfo::handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) { - HardFloat = false; - VDSPV2 = false; - VDSPV1 = false; - DSPV2 = false; - is3E3R1 = false; - for (const auto &Feature : Features) { if (Feature == "+hard-float") HardFloat = true; + if (Feature == "+hard-float-abi") + HardFloatABI = true; + if (Feature == "+fpuv2_sf") + FPUV2_SF = true; + if (Feature == "+fpuv2_df") + FPUV2_DF = true; + if (Feature == "+fpuv3_sf") + FPUV3_SF = true; + if (Feature == "+fpuv3_df") + FPUV3_DF = true; if (Feature == "+vdspv2") VDSPV2 = true; if (Feature == "+dspv2") diff --git a/clang/lib/Basic/Targets/CSKY.h b/clang/lib/Basic/Targets/CSKY.h index 17e5f30d3cd87d..7e932e7c86b1c7 100644 --- a/clang/lib/Basic/Targets/CSKY.h +++ b/clang/lib/Basic/Targets/CSKY.h @@ -26,11 +26,16 @@ class LLVM_LIBRARY_VISIBILITY CSKYTargetInfo : public TargetInfo { llvm::CSKY::ArchKind Arch = llvm::CSKY::ArchKind::INVALID; std::string CPU; - bool HardFloat; - bool VDSPV2; - bool VDSPV1; - bool DSPV2; - bool is3E3R1; + bool HardFloat = false; + bool HardFloatABI = false; + bool FPUV2_SF = false; + bool FPUV2_DF = false; + bool FPUV3_SF = false; + bool FPUV3_DF = false; + bool VDSPV2 = false; + bool VDSPV1 = false; + bool DSPV2 = false; + bool is3E3R1 = false; public: CSKYTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) @@ -81,6 +86,7 @@ class LLVM_LIBRARY_VISIBILITY CSKYTargetInfo : public TargetInfo { void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; + bool hasFeature(StringRef Feature) const override; bool handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) override; diff --git a/clang/lib/Driver/ToolChains/Arch/CSKY.cpp b/clang/lib/Driver/ToolChains/Arch/CSKY.cpp index fb809956afa80a..3a8f9278560924 100644 --- a/clang/lib/Driver/ToolChains/Arch/CSKY.cpp +++ b/clang/lib/Driver/ToolChains/Arch/CSKY.cpp @@ -91,6 +91,22 @@ getCSKYFPUFeatures(const Driver &D, const Arg *A, const ArgList &Args, .Case("fpv3_hsf", llvm::CSKY::FK_FPV3_HSF) .Case("fpv3_sdf", llvm::CSKY::FK_FPV3_SDF) .Default(llvm::CSKY::FK_INVALID); + if (FPUID == llvm::CSKY::FK_INVALID) { + D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); + return llvm::CSKY::FK_INVALID; + } + + auto RemoveTargetFPUFeature = + [&Features](ArrayRef FPUFeatures) { + for (auto FPUFeature : FPUFeatures) { + auto it = std::find(Features.begin(), Features.end(), FPUFeature); + if (it != Features.end()) + Features.erase(it); + } + }; + + RemoveTargetFPUFeature({"+fpuv2_sf", "+fpuv2_df", "+fdivdu", "+fpuv3_hi", + "+fpuv3_hf", "+fpuv3_sf", "+fpuv3_df"}); if (!llvm::CSKY::getFPUFeatures(FPUID, Features)) { D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); @@ -126,6 +142,8 @@ void csky::getCSKYTargetFeatures(const Driver &D, const llvm::Triple &Triple, return; } cpuName = A->getValue(); + if (archName.empty()) + archName = llvm::CSKY::getArchName(Kind); } if (archName.empty() && cpuName.empty()) { @@ -144,11 +162,9 @@ void csky::getCSKYTargetFeatures(const Driver &D, const llvm::Triple &Triple, Features.push_back("+hard-float"); } - if (const Arg *FPUArg = Args.getLastArg(options::OPT_mfpu_EQ)) - getCSKYFPUFeatures(D, FPUArg, Args, FPUArg->getValue(), Features); - else if (FloatABI != csky::FloatABI::Soft && archName.empty()) - llvm::CSKY::getFPUFeatures(llvm::CSKY::FK_AUTO, Features); - uint64_t Extension = llvm::CSKY::getDefaultExtensions(cpuName); llvm::CSKY::getExtensionFeatures(Extension, Features); + + if (const Arg *FPUArg = Args.getLastArg(options::OPT_mfpu_EQ)) + getCSKYFPUFeatures(D, FPUArg, Args, FPUArg->getValue(), Features); } diff --git a/clang/test/Driver/csky-mfpu.c b/clang/test/Driver/csky-mfpu.c new file mode 100644 index 00000000000000..092ac56de92e40 --- /dev/null +++ b/clang/test/Driver/csky-mfpu.c @@ -0,0 +1,93 @@ +// Test that different values of -mfpu pick correct CSKY FPU target-feature(s). + +// RUN: %clang -target csky-unknown-linux %s -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-DEFAULT %s +// CHECK-DEFAULT-NOT: "-target-feature" "+hard-float" +// CHECK-DEFAULT-NOT: "-target-feature" "+hard-float-abi" +// CHECK-DEFAULT-NOT: "-target-feature" "+fpuv2_sf" +// CHECK-DEFAULT-NOT: "-target-feature" "+fpuv2_df" + +// RUN: %clang -target csky-unknown-linux %s -mfpu=fpv2 -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV2 %s +// CHECK-FPV2-NOT: "-target-feature" "+hard-float" +// CHECK-FPV2-NOT: "-target-feature" "+hard-float-abi" +// CHECK-FPV2: "-target-feature" "+fpuv2_sf" +// CHECK-FPV2: "-target-feature" "+fpuv2_df" + +// RUN: %clang -target csky-unknown-linux %s -mfpu=fpv2 -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV2-HARD %s +// CHECK-FPV2-HARD: "-target-feature" "+hard-float-abi" +// CHECK-FPV2-HARD: "-target-feature" "+hard-float" +// CHECK-FPV2-HARD: "-target-feature" "+fpuv2_sf" +// CHECK-FPV2-HARD: "-target-feature" "+fpuv2_df" + +// RUN: %clang -target csky-unknown-linux %s -mfpu=fpv2_divd -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV2DIVD-HARD %s +// CHECK-FPV2DIVD-HARD: "-target-feature" "+hard-float-abi" +// CHECK-FPV2DIVD-HARD: "-target-feature" "+hard-float" +// CHECK-FPV2DIVD-HARD: "-target-feature" "+fpuv2_sf" +// CHECK-FPV2DIVD-HARD: "-target-feature" "+fpuv2_df" +// CHECK-FPV2DIVD-HARD: "-target-feature" "+fdivdu" + +// RUN: %clang -target csky-unknown-linux %s -mfpu=auto -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-AUTO-HARD %s +// CHECK-AUTO-HARD: "-target-feature" "+hard-float-abi" +// CHECK-AUTO-HARD: "-target-feature" "+hard-float" +// CHECK-AUTO-HARD: "-target-feature" "+fpuv2_sf" +// CHECK-AUTO-HARD: "-target-feature" "+fpuv2_df" +// CHECK-AUTO-HARD: "-target-feature" "+fdivdu" + +// RUN: %clang -target csky-unknown-linux %s -mfpu=fpv2_sf -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV2SF-HARD %s +// CHECK-FPV2SF-HARD: "-target-feature" "+hard-float-abi" +// CHECK-FPV2SF-HARD: "-target-feature" "+hard-float" +// CHECK-FPV2SF-HARD: "-target-feature" "+fpuv2_sf" + +// RUN: %clang -target csky-unknown-linux %s -mfpu=fpv3 -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV3-HARD %s +// CHECK-FPV3-HARD: "-target-feature" "+hard-float-abi" +// CHECK-FPV3-HARD: "-target-feature" "+hard-float" +// CHECK-FPV3-HARD: "-target-feature" "+fpuv3_hf" +// CHECK-FPV3-HARD: "-target-feature" "+fpuv3_hi" +// CHECK-FPV3-HARD: "-target-feature" "+fpuv3_sf" +// CHECK-FPV3-HARD: "-target-feature" "+fpuv3_df" + +// RUN: %clang -target csky-unknown-linux %s -mfpu=fpv3_hf -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV3HF-HARD %s +// CHECK-FPV3HF-HARD: "-target-feature" "+hard-float-abi" +// CHECK-FPV3HF-HARD: "-target-feature" "+hard-float" +// CHECK-FPV3HF-HARD: "-target-feature" "+fpuv3_hf" +// CHECK-FPV3HF-HARD: "-target-feature" "+fpuv3_hi" + +// RUN: %clang -target csky-unknown-linux %s -mfpu=fpv3_hsf -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV3HSF-HARD %s +// CHECK-FPV3HSF-HARD: "-target-feature" "+hard-float-abi" +// CHECK-FPV3HSF-HARD: "-target-feature" "+hard-float" +// CHECK-FPV3HSF-HARD: "-target-feature" "+fpuv3_hf" +// CHECK-FPV3HSF-HARD: "-target-feature" "+fpuv3_hi" +// CHECK-FPV3HSF-HARD: "-target-feature" "+fpuv3_sf" + +// RUN: %clang -target csky-unknown-linux %s -mfpu=fpv3_sdf -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV3DF-HARD %s +// CHECK-FPV3DF-HARD: "-target-feature" "+hard-float-abi" +// CHECK-FPV3DF-HARD: "-target-feature" "+hard-float" +// CHECK-FPV3DF-HARD: "-target-feature" "+fpuv3_sf" +// CHECK-FPV3DF-HARD: "-target-feature" "+fpuv3_df" + +// RUN: %clang -target csky-unknown-linux %s -mcpu=c810 -mfpu=fpv3 -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV3-C810 %s +// CHECK-FPV3-C810: "-target-feature" "+hard-float-abi" +// CHECK-FPV3-C810: "-target-feature" "+hard-float" +// CHECK-FPV3-C810: "-target-feature" "+fpuv3_hf" +// CHECK-FPV3-C810: "-target-feature" "+fpuv3_hi" +// CHECK-FPV3-C810: "-target-feature" "+fpuv3_sf" +// CHECK-FPV3-C810: "-target-feature" "+fpuv3_df" +// CHECK-FPV3-C810-NOT: "-target-feature" "+fpuv2" + +// RUN: %clang -target csky-unknown-linux %s -mcpu=c860 -mfpu=fpv2 -mhard-float -### -o %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPV2-C860 %s +// CHECK-FPV2-C860: "-target-feature" "+hard-float-abi" +// CHECK-FPV2-C860: "-target-feature" "+hard-float" +// CHECK-FPV2-C860: "-target-feature" "+fpuv2_sf" +// CHECK-FPV2-C860: "-target-feature" "+fpuv2_df" +// CHECK-FPV2-C860-NOT: "-target-feature" "+fpuv3" \ No newline at end of file diff --git a/llvm/lib/Support/CSKYTargetParser.cpp b/llvm/lib/Support/CSKYTargetParser.cpp index 2273b6dce0c753..7e9d2ca0428d74 100644 --- a/llvm/lib/Support/CSKYTargetParser.cpp +++ b/llvm/lib/Support/CSKYTargetParser.cpp @@ -43,14 +43,17 @@ bool CSKY::getFPUFeatures(CSKYFPUKind CSKYFPUKind, break; case FK_FPV3: Features.push_back("+fpuv3_hf"); + Features.push_back("+fpuv3_hi"); Features.push_back("+fpuv3_sf"); Features.push_back("+fpuv3_df"); break; case FK_FPV3_HF: Features.push_back("+fpuv3_hf"); + Features.push_back("+fpuv3_hi"); break; case FK_FPV3_HSF: Features.push_back("+fpuv3_hf"); + Features.push_back("+fpuv3_hi"); Features.push_back("+fpuv3_sf"); break; case FK_FPV3_SDF: From 9b79f50b59c671f3a7b0c7654cdd6d59ab6a6bbc Mon Sep 17 00:00:00 2001 From: Jeremy Furtek Date: Sat, 21 May 2022 21:12:11 -0700 Subject: [PATCH 201/908] [mlir][tblgen][ods][python] Use keyword-only arguments for optional builder arguments in generated Python bindings This diff modifies `mlir-tblgen` to generate Python Operation class `__init__()` functions that use Python keyword-only arguments. Previously, all `__init__()` function arguments were positional. Python code to create MLIR Operations was required to provide values for ALL builder arguments, including optional arguments (attributes and operands). Callers that did not provide, for example, an optional attribute would be forced to provide `None` as an argument for EACH optional attribute. Proposed changes in this diff use `tblgen` record information (as provided by ODS) to generate keyword arguments for: - optional operands - optional attributes (which includes unit attributes) - default-valued attributes These `__init__()` function keyword arguments have default `None` values (i.e. the argument form is `optionalAttr=None`), allowing callers to create Operations more easily. Note that since optional arguments become keyword-only arguments (since they are placed after the bare `*` argument), this diff will require ALL optional operands and attributes to be provided using explicit keyword syntax. This may, in the short term, break any out-of-tree Python code that provided values via positional arguments. However, in the long term, it seems that requiring keywords for optional arguments will be more robust to operation changes that add arguments. Tests were modified to reflect the updated Operation builder calling convention. This diff partially addresses the requests made in the github issue below. https://github.com/llvm/llvm-project/issues/54932 Reviewed By: stellaraccident, mikeurbach Differential Revision: https://reviews.llvm.org/D124717 --- mlir/python/mlir/dialects/_func_ops_ext.py | 2 +- .../mlir/dialects/_ml_program_ops_ext.py | 2 +- mlir/python/mlir/dialects/_pdl_ops_ext.py | 20 +++---- mlir/test/mlir-tblgen/op-python-bindings.td | 8 +-- mlir/test/python/dialects/python_test.py | 8 +-- mlir/test/python/dialects/vector.py | 4 +- mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp | 60 +++++++++++++++++-- 7 files changed, 78 insertions(+), 26 deletions(-) diff --git a/mlir/python/mlir/dialects/_func_ops_ext.py b/mlir/python/mlir/dialects/_func_ops_ext.py index 6fe3ff5302e261..79577463d9199b 100644 --- a/mlir/python/mlir/dialects/_func_ops_ext.py +++ b/mlir/python/mlir/dialects/_func_ops_ext.py @@ -58,7 +58,7 @@ def __init__(self, type = TypeAttr.get(type) sym_visibility = StringAttr.get( str(visibility)) if visibility is not None else None - super().__init__(sym_name, type, sym_visibility, loc=loc, ip=ip) + super().__init__(sym_name, type, sym_visibility=sym_visibility, loc=loc, ip=ip) if body_builder: entry_block = self.add_entry_block() with InsertionPoint(entry_block): diff --git a/mlir/python/mlir/dialects/_ml_program_ops_ext.py b/mlir/python/mlir/dialects/_ml_program_ops_ext.py index a3df7ff0336072..8db82cf81c6788 100644 --- a/mlir/python/mlir/dialects/_ml_program_ops_ext.py +++ b/mlir/python/mlir/dialects/_ml_program_ops_ext.py @@ -48,7 +48,7 @@ def __init__(self, type = TypeAttr.get(type) sym_visibility = StringAttr.get( str(visibility)) if visibility is not None else None - super().__init__(sym_name, type, sym_visibility, loc=loc, ip=ip) + super().__init__(sym_name, type, sym_visibility=sym_visibility, loc=loc, ip=ip) if body_builder: entry_block = self.add_entry_block() with InsertionPoint(entry_block): diff --git a/mlir/python/mlir/dialects/_pdl_ops_ext.py b/mlir/python/mlir/dialects/_pdl_ops_ext.py index fb5b519c7c0229..bb63fe64dd035e 100644 --- a/mlir/python/mlir/dialects/_pdl_ops_ext.py +++ b/mlir/python/mlir/dialects/_pdl_ops_ext.py @@ -93,7 +93,7 @@ def __init__(self, ip=None): type = type if type is None else _get_value(type) result = pdl.AttributeType.get() - super().__init__(result, type, value, loc=loc, ip=ip) + super().__init__(result, type=type, value=value, loc=loc, ip=ip) class EraseOp: @@ -118,7 +118,7 @@ def __init__(self, ip=None): type = type if type is None else _get_value(type) result = pdl.ValueType.get() - super().__init__(result, type, loc=loc, ip=ip) + super().__init__(result, type=type, loc=loc, ip=ip) class OperandsOp: @@ -131,7 +131,7 @@ def __init__(self, ip=None): types = types if types is None else _get_value(types) result = pdl.RangeType.get(pdl.ValueType.get()) - super().__init__(result, types, loc=loc, ip=ip) + super().__init__(result, type=types, loc=loc, ip=ip) class OperationOp: @@ -155,7 +155,7 @@ def __init__(self, attributeNames = ArrayAttr.get(attributeNames) types = _get_values(types) result = pdl.OperationType.get() - super().__init__(result, name, args, attributeValues, attributeNames, types, loc=loc, ip=ip) + super().__init__(result, args, attributeValues, attributeNames, types, name=name, loc=loc, ip=ip) class PatternOp: @@ -170,7 +170,7 @@ def __init__(self, """Creates an PDL `pattern` operation.""" name_attr = None if name is None else _get_str_attr(name) benefit_attr = _get_int_attr(16, benefit) - super().__init__(benefit_attr, name_attr, loc=loc, ip=ip) + super().__init__(benefit_attr, sym_name=name_attr, loc=loc, ip=ip) self.regions[0].blocks.append() @property @@ -192,7 +192,7 @@ def __init__(self, op = _get_value(op) with_op = with_op if with_op is None else _get_value(with_op) with_values = _get_values(with_values) - super().__init__(op, with_op, with_values, loc=loc, ip=ip) + super().__init__(op, with_values, replOperation=with_op, loc=loc, ip=ip) class ResultOp: @@ -222,7 +222,7 @@ def __init__(self, ip=None): parent = _get_value(parent) index = index if index is None else _get_int_attr(32, index) - super().__init__(result, parent, index, loc=loc, ip=ip) + super().__init__(result, parent, index=index, loc=loc, ip=ip) class RewriteOp: @@ -238,7 +238,7 @@ def __init__(self, root = root if root is None else _get_value(root) name = name if name is None else _get_str_attr(name) args = _get_values(args) - super().__init__(root, name, args, loc=loc, ip=ip) + super().__init__(args, root=root,name=name, loc=loc, ip=ip) def add_body(self): """Add body (block) to the rewrite.""" @@ -261,7 +261,7 @@ def __init__(self, ip=None): type = type if type is None else _get_type_attr(type) result = pdl.TypeType.get() - super().__init__(result, type, loc=loc, ip=ip) + super().__init__(result, type=type, loc=loc, ip=ip) class TypesOp: @@ -275,4 +275,4 @@ def __init__(self, types = _get_array_attr([_get_type_attr(ty) for ty in types]) types = None if not types else types result = pdl.RangeType.get(pdl.TypeType.get()) - super().__init__(result, types, loc=loc, ip=ip) + super().__init__(result, types=types, loc=loc, ip=ip) diff --git a/mlir/test/mlir-tblgen/op-python-bindings.td b/mlir/test/mlir-tblgen/op-python-bindings.td index 59b5dec83c0308..f744ce501b1064 100644 --- a/mlir/test/mlir-tblgen/op-python-bindings.td +++ b/mlir/test/mlir-tblgen/op-python-bindings.td @@ -21,7 +21,7 @@ class TestOp traits = []> : // CHECK: _ODS_OPERAND_SEGMENTS = [-1,1,0,] def AttrSizedOperandsOp : TestOp<"attr_sized_operands", [AttrSizedOperandSegments]> { - // CHECK: def __init__(self, variadic1, non_variadic, variadic2, *, loc=None, ip=None): + // CHECK: def __init__(self, variadic1, non_variadic, *, variadic2=None, loc=None, ip=None): // CHECK: operands = [] // CHECK: results = [] // CHECK: attributes = {} @@ -110,7 +110,7 @@ def AttrSizedResultsOp : TestOp<"attr_sized_results", // CHECK-NOT: _ODS_OPERAND_SEGMENTS // CHECK-NOT: _ODS_RESULT_SEGMENTS def AttributedOp : TestOp<"attributed_op"> { - // CHECK: def __init__(self, i32attr, optionalF32Attr, unitAttr, in_, *, loc=None, ip=None): + // CHECK: def __init__(self, i32attr, in_, *, optionalF32Attr=None, unitAttr=None, loc=None, ip=None): // CHECK: operands = [] // CHECK: results = [] // CHECK: attributes = {} @@ -152,7 +152,7 @@ def AttributedOp : TestOp<"attributed_op"> { // CHECK-NOT: _ODS_OPERAND_SEGMENTS // CHECK-NOT: _ODS_RESULT_SEGMENTS def AttributedOpWithOperands : TestOp<"attributed_op_with_operands"> { - // CHECK: def __init__(self, _gen_arg_0, in_, _gen_arg_2, is_, *, loc=None, ip=None): + // CHECK: def __init__(self, _gen_arg_0, _gen_arg_2, *, in_=None, is_=None, loc=None, ip=None): // CHECK: operands = [] // CHECK: results = [] // CHECK: attributes = {} @@ -286,7 +286,7 @@ def MissingNamesOp : TestOp<"missing_names"> { // CHECK-NOT: _ODS_RESULT_SEGMENTS def OneOptionalOperandOp : TestOp<"one_optional_operand"> { let arguments = (ins AnyType:$non_optional, Optional:$optional); - // CHECK: def __init__(self, non_optional, optional, *, loc=None, ip=None): + // CHECK: def __init__(self, non_optional, *, optional=None, loc=None, ip=None): // CHECK: operands = [] // CHECK: results = [] // CHECK: attributes = {} diff --git a/mlir/test/python/dialects/python_test.py b/mlir/test/python/dialects/python_test.py index e7b1f44a3ad8e9..c73fce23d3c49c 100644 --- a/mlir/test/python/dialects/python_test.py +++ b/mlir/test/python/dialects/python_test.py @@ -28,13 +28,13 @@ def testAttributes(): # CHECK-DAG: optional_i32 = 2 : i32 # CHECK-DAG: unit # CHECK: } - op = test.AttributedOp(one, two, unit) + op = test.AttributedOp(one, optional_i32=two, unit=unit) print(f"{op}") # CHECK: "python_test.attributed_op"() { # CHECK: mandatory_i32 = 2 : i32 # CHECK: } - op2 = test.AttributedOp(two, None, None) + op2 = test.AttributedOp(two) print(f"{op2}") # @@ -218,11 +218,11 @@ def testOptionalOperandOp(): module = Module.create() with InsertionPoint(module.body): - op1 = test.OptionalOperandOp(None) + op1 = test.OptionalOperandOp() # CHECK: op1.input is None: True print(f"op1.input is None: {op1.input is None}") - op2 = test.OptionalOperandOp(op1) + op2 = test.OptionalOperandOp(input=op1) # CHECK: op2.input is None: False print(f"op2.input is None: {op2.input is None}") diff --git a/mlir/test/python/dialects/vector.py b/mlir/test/python/dialects/vector.py index c31579545e6e7f..8f8d7f19191cfa 100644 --- a/mlir/test/python/dialects/vector.py +++ b/mlir/test/python/dialects/vector.py @@ -46,9 +46,9 @@ def testTransferReadOp(): with InsertionPoint(f.add_entry_block()): A, zero, padding, mask = f.arguments vector.TransferReadOp(vector_type, A, [zero, zero], identity_map_attr, - padding, mask, None) + padding, mask=mask) vector.TransferReadOp(vector_type, A, [zero, zero], identity_map_attr, - padding, None, None) + padding) func.ReturnOp([]) # CHECK: @transfer_read(%[[MEM:.*]]: memref, %[[IDX:.*]]: index, diff --git a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp index 16fccff973ca76..83d2acce3ba2c9 100644 --- a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp +++ b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp @@ -620,6 +620,13 @@ populateBuilderArgs(const Operator &op, if (!op.getArg(i).is()) operandNames.push_back(name); } +} + +/// Populates `builderArgs` with the Python-compatible names of builder function +/// successor arguments. Additionally, `successorArgNames` is also populated. +static void populateBuilderArgsSuccessors( + const Operator &op, llvm::SmallVectorImpl &builderArgs, + llvm::SmallVectorImpl &successorArgNames) { for (int i = 0, e = op.getNumSuccessors(); i < e; ++i) { NamedSuccessor successor = op.getSuccessor(i); @@ -857,6 +864,8 @@ static void emitDefaultOpBuilder(const Operator &op, raw_ostream &os) { populateBuilderArgsResults(op, builderArgs); size_t numResultArgs = builderArgs.size(); populateBuilderArgs(op, builderArgs, operandArgNames, successorArgNames); + size_t numOperandAttrArgs = builderArgs.size() - numResultArgs; + populateBuilderArgsSuccessors(op, builderArgs, successorArgNames); populateBuilderLinesOperand(op, operandArgNames, builderLines); populateBuilderLinesAttr( @@ -868,10 +877,53 @@ static void emitDefaultOpBuilder(const Operator &op, raw_ostream &os) { populateBuilderLinesSuccessors(op, successorArgNames, builderLines); populateBuilderRegions(op, builderArgs, builderLines); - builderArgs.push_back("*"); - builderArgs.push_back("loc=None"); - builderArgs.push_back("ip=None"); - os << llvm::formatv(initTemplate, llvm::join(builderArgs, ", "), + // Layout of builderArgs vector elements: + // [ result_args operand_attr_args successor_args regions ] + + // Determine whether the argument corresponding to a given index into the + // builderArgs vector is a python keyword argument or not. + auto isKeywordArgFn = [&](size_t builderArgIndex) -> bool { + // All result, successor, and region arguments are positional arguments. + if ((builderArgIndex < numResultArgs) || + (builderArgIndex >= (numResultArgs + numOperandAttrArgs))) + return false; + // Keyword arguments: + // - optional named attributes (including unit attributes) + // - default-valued named attributes + // - optional operands + Argument a = op.getArg(builderArgIndex - numResultArgs); + if (auto *nattr = a.dyn_cast()) + return (nattr->attr.isOptional() || nattr->attr.hasDefaultValue()); + else if (auto *ntype = a.dyn_cast()) + return ntype->isOptional(); + else + return false; + }; + + // StringRefs in functionArgs refer to strings allocated by builderArgs. + llvm::SmallVector functionArgs; + + // Add positional arguments. + for (size_t i = 0, cnt = builderArgs.size(); i < cnt; ++i) { + if (!isKeywordArgFn(i)) + functionArgs.push_back(builderArgs[i]); + } + + // Add a bare '*' to indicate that all following arguments must be keyword + // arguments. + functionArgs.push_back("*"); + + // Add a default 'None' value to each keyword arg string, and then add to the + // function args list. + for (size_t i = 0, cnt = builderArgs.size(); i < cnt; ++i) { + if (isKeywordArgFn(i)) { + builderArgs[i].append("=None"); + functionArgs.push_back(builderArgs[i]); + } + } + functionArgs.push_back("loc=None"); + functionArgs.push_back("ip=None"); + os << llvm::formatv(initTemplate, llvm::join(functionArgs, ", "), llvm::join(builderLines, "\n ")); } From 37ccfc55ab4f12a1cf9f3c030c05f9bb30457149 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Mon, 23 May 2022 01:21:06 -0400 Subject: [PATCH 202/908] [Sparc] Have test use IAS --- llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll index e6712b5fc9a914..706e9e1e33d14a 100644 --- a/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll +++ b/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=sparc -no-integrated-as +; RUN: llc < %s -march=sparc ; PR 1557 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128" From 8c40e16fb4e20272808cd795ebc86125e69e0cd5 Mon Sep 17 00:00:00 2001 From: Chenbing Zheng Date: Mon, 23 May 2022 14:17:01 +0800 Subject: [PATCH 203/908] [InstCombine] add tests for bitcast; NFC --- llvm/test/Transforms/InstCombine/bitcast.ll | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll index 9289afccff3114..da2ff8e3d94457 100644 --- a/llvm/test/Transforms/InstCombine/bitcast.ll +++ b/llvm/test/Transforms/InstCombine/bitcast.ll @@ -149,6 +149,21 @@ define <4 x float> @bitcasts_and_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) { ret <4 x float> %bc3 } +define <4 x float> @bitcasts_or_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) { +; CHECK-LABEL: @bitcasts_or_bitcast_to_fp( +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[AND:%.*]] = or <2 x i64> [[BC1]], [[BC2]] +; CHECK-NEXT: [[BC3:%.*]] = bitcast <2 x i64> [[AND]] to <4 x float> +; CHECK-NEXT: ret <4 x float> [[BC3]] +; + %bc1 = bitcast <4 x float> %a to <2 x i64> + %bc2 = bitcast <8 x i16> %b to <2 x i64> + %and = or <2 x i64> %bc1, %bc2 + %bc3 = bitcast <2 x i64> %and to <4 x float> + ret <4 x float> %bc3 +} + ; FIXME: Transform limited from changing vector op to integer op to avoid codegen problems. define i128 @bitcast_or_bitcast(i128 %a, <2 x i64> %b) { From b876c23604c748bd267c1fcb0572845ee61549cc Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Mon, 23 May 2022 11:17:24 +0500 Subject: [PATCH 204/908] Revert "[lldb] Consider binary as module of last resort" This reverts commit a3c3482ceb529206b0ae4e7782e5496da5e0879d. It broke LLDB API test TestBadAddressBreakpoints.py Differential revision: https://reviews.llvm.org/D124731 --- .../Breakpoint/BreakpointResolverAddress.cpp | 27 +++++--------- lldb/source/Commands/Options.td | 6 ++-- .../breakpoint/set/address-nomodule/Makefile | 3 -- .../TestBreakpointAddressNoModule.py | 36 ------------------- .../set/address-nomodule/inferior.c | 6 ---- 5 files changed, 10 insertions(+), 68 deletions(-) delete mode 100644 lldb/test/API/commands/breakpoint/set/address-nomodule/Makefile delete mode 100644 lldb/test/API/commands/breakpoint/set/address-nomodule/TestBreakpointAddressNoModule.py delete mode 100644 lldb/test/API/commands/breakpoint/set/address-nomodule/inferior.c diff --git a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp index 9b6b6d29cbce85..c173fc0c2a7a6e 100644 --- a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp @@ -121,27 +121,16 @@ Searcher::CallbackReturn BreakpointResolverAddress::SearchCallback( if (filter.AddressPasses(m_addr)) { if (breakpoint.GetNumLocations() == 0) { - // If the address is just an offset ... - if (!m_addr.IsSectionOffset()) { - ModuleSP containing_module_sp = nullptr; + // If the address is just an offset, and we're given a module, see if we + // can find the appropriate module loaded in the binary, and fix up + // m_addr to use that. + if (!m_addr.IsSectionOffset() && m_module_filespec) { Target &target = breakpoint.GetTarget(); - if (m_module_filespec) { - // ... and we're given a module, see if we can find the - // appropriate module loaded in the binary, and fix up - // m_addr to use that. - ModuleSpec module_spec(m_module_filespec); - containing_module_sp = - target.GetImages().FindFirstModule(module_spec); - } else { - // ... and we're not given a module, see if the offset is - // somewhere in the executable module. If it is, then we'll - // fix up m_addr to use that. - containing_module_sp = target.GetExecutableModule(); - } - if (containing_module_sp) { + ModuleSpec module_spec(m_module_filespec); + ModuleSP module_sp = target.GetImages().FindFirstModule(module_spec); + if (module_sp) { Address tmp_address; - if (containing_module_sp->ResolveFileAddress(m_addr.GetOffset(), - tmp_address)) + if (module_sp->ResolveFileAddress(m_addr.GetOffset(), tmp_address)) m_addr = tmp_address; } } diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 284437887a0b94..c326f8a3207488 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -128,15 +128,13 @@ let Command = "breakpoint set" in { Arg<"AddressOrExpression">, Required, Desc<"Set the breakpoint at the specified address. If the address maps " "uniquely to a particular binary, then the address will be converted to " - "a \"file\" address, so that the breakpoint will track that binary+offset " + "a \"file\"address, so that the breakpoint will track that binary+offset " "no matter where the binary eventually loads. Alternately, if you also " "specify the module - with the -s option - then the address will be " "treated as a file address in that module, and resolved accordingly. " "Again, this will allow lldb to track that offset on subsequent reloads. " "The module need not have been loaded at the time you specify this " - "breakpoint, and will get resolved when the module is loaded. If no " - "module is specified, the binary being debugged is considered as a " - "fallback.">; + "breakpoint, and will get resolved when the module is loaded.">; def breakpoint_set_name : Option<"name", "n">, Group<3>, Arg<"FunctionName">, Completion<"Symbol">, Required, Desc<"Set the breakpoint by function name. Can be repeated multiple times " diff --git a/lldb/test/API/commands/breakpoint/set/address-nomodule/Makefile b/lldb/test/API/commands/breakpoint/set/address-nomodule/Makefile deleted file mode 100644 index 7c82a2dde6c2e7..00000000000000 --- a/lldb/test/API/commands/breakpoint/set/address-nomodule/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -C_SOURCES := inferior.c - -include Makefile.rules diff --git a/lldb/test/API/commands/breakpoint/set/address-nomodule/TestBreakpointAddressNoModule.py b/lldb/test/API/commands/breakpoint/set/address-nomodule/TestBreakpointAddressNoModule.py deleted file mode 100644 index 3e3550d6947085..00000000000000 --- a/lldb/test/API/commands/breakpoint/set/address-nomodule/TestBreakpointAddressNoModule.py +++ /dev/null @@ -1,36 +0,0 @@ -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - -class TestCase(TestBase): - - mydir = TestBase.compute_mydir(__file__) - - def get_address_from_symbol(self, symbol): - target = lldbutil.run_to_breakpoint_make_target(self, "a.out", True) - bp = target.BreakpointCreateByName(symbol, None) - address = bp.GetLocationAtIndex(0).GetAddress().GetFileAddress() - return address - - def test_set_address_no_module(self): - self.build() - - main_address = self.get_address_from_symbol("main") - - target = lldbutil.run_to_breakpoint_make_target(self) - debugger = target.GetDebugger() - - debugger.HandleCommand(f"break set -a {main_address:#x}") - self.assertEquals(target.GetNumBreakpoints(), 1) - - bp = target.GetBreakpointAtIndex(0) - self.assertIsNotNone(bp) - - _, _, thread, _ = lldbutil.run_to_breakpoint_do_run(self, target, bp) - self.assertGreaterEqual(thread.GetNumFrames(), 1) - - thread_pc = thread.GetFrameAtIndex(0).GetPCAddress() - self.assertNotEqual(thread_pc, None) - - self.assertEquals(main_address, thread_pc.GetFileAddress()) diff --git a/lldb/test/API/commands/breakpoint/set/address-nomodule/inferior.c b/lldb/test/API/commands/breakpoint/set/address-nomodule/inferior.c deleted file mode 100644 index b3af1438927c14..00000000000000 --- a/lldb/test/API/commands/breakpoint/set/address-nomodule/inferior.c +++ /dev/null @@ -1,6 +0,0 @@ -int function(int a) { return a; } - -int main() { - int f = function(10); - return f; -} From 32f189b0d9a881341e19e17a459f5b9cac4b0484 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Thu, 19 May 2022 11:05:24 +0200 Subject: [PATCH 205/908] [analyzer] Implement assumeInclusiveRange in terms of assumeInclusiveRangeDual Depends on D124758. This is the very same thing we have done for assumeDual, but this time we do it for assumeInclusiveRange. This patch is basically a no-brainer copy of that previous patch. Differential Revision: https://reviews.llvm.org/D125892 --- .../Core/PathSensitive/ConstraintManager.h | 47 +++++++------------ .../PathSensitive/SimpleConstraintManager.h | 21 +++++---- .../StaticAnalyzer/Core/ConstraintManager.cpp | 41 ++++++++++++++++ .../Core/SimpleConstraintManager.cpp | 2 +- 4 files changed, 71 insertions(+), 40 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h index f0dbcfd7dbff4e..bd92e7ff9af93b 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h @@ -94,35 +94,18 @@ class ConstraintManager { /// not perfectly precise and this may happen very rarely.) ProgramStatePair assumeDual(ProgramStateRef State, DefinedSVal Cond); - virtual ProgramStateRef assumeInclusiveRange(ProgramStateRef State, - NonLoc Value, - const llvm::APSInt &From, - const llvm::APSInt &To, - bool InBound) = 0; - - virtual ProgramStatePair assumeInclusiveRangeDual(ProgramStateRef State, - NonLoc Value, - const llvm::APSInt &From, - const llvm::APSInt &To) { - ProgramStateRef StInRange = - assumeInclusiveRange(State, Value, From, To, true); - - // If StTrue is infeasible, asserting the falseness of Cond is unnecessary - // because the existing constraints already establish this. - if (!StInRange) - return ProgramStatePair((ProgramStateRef)nullptr, State); - - ProgramStateRef StOutOfRange = - assumeInclusiveRange(State, Value, From, To, false); - if (!StOutOfRange) { - // We are careful to return the original state, /not/ StTrue, - // because we want to avoid having callers generate a new node - // in the ExplodedGraph. - return ProgramStatePair(State, (ProgramStateRef)nullptr); - } - - return ProgramStatePair(StInRange, StOutOfRange); - } + ProgramStateRef assumeInclusiveRange(ProgramStateRef State, NonLoc Value, + const llvm::APSInt &From, + const llvm::APSInt &To, bool InBound); + + /// Returns a pair of states (StInRange, StOutOfRange) where the given value + /// is assumed to be in the range or out of the range, respectively. + /// (Note that these two states might be equal if the parent state turns out + /// to be infeasible. This may happen if the underlying constraint solver is + /// not perfectly precise and this may happen very rarely.) + ProgramStatePair assumeInclusiveRangeDual(ProgramStateRef State, NonLoc Value, + const llvm::APSInt &From, + const llvm::APSInt &To); /// If a symbol is perfectly constrained to a constant, attempt /// to return the concrete value. @@ -163,6 +146,12 @@ class ConstraintManager { virtual ProgramStateRef assumeInternal(ProgramStateRef state, DefinedSVal Cond, bool Assumption) = 0; + virtual ProgramStateRef assumeInclusiveRangeInternal(ProgramStateRef State, + NonLoc Value, + const llvm::APSInt &From, + const llvm::APSInt &To, + bool InBound) = 0; + /// canReasonAbout - Not all ConstraintManagers can accurately reason about /// all SVal values. This method returns true if the ConstraintManager can /// reasonably handle a given SVal value. This is typically queried by diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h index 50206f6883adb6..725140e073c617 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h @@ -34,16 +34,6 @@ class SimpleConstraintManager : public ConstraintManager { // Implementation for interface from ConstraintManager. //===------------------------------------------------------------------===// - /// Ensures that the DefinedSVal conditional is expressed as a NonLoc by - /// creating boolean casts to handle Loc's. - ProgramStateRef assumeInternal(ProgramStateRef State, DefinedSVal Cond, - bool Assumption) override; - - ProgramStateRef assumeInclusiveRange(ProgramStateRef State, NonLoc Value, - const llvm::APSInt &From, - const llvm::APSInt &To, - bool InRange) override; - protected: //===------------------------------------------------------------------===// // Interface that subclasses must implement. @@ -74,6 +64,17 @@ class SimpleConstraintManager : public ConstraintManager { // Internal implementation. //===------------------------------------------------------------------===// + /// Ensures that the DefinedSVal conditional is expressed as a NonLoc by + /// creating boolean casts to handle Loc's. + ProgramStateRef assumeInternal(ProgramStateRef State, DefinedSVal Cond, + bool Assumption) override; + + ProgramStateRef assumeInclusiveRangeInternal(ProgramStateRef State, + NonLoc Value, + const llvm::APSInt &From, + const llvm::APSInt &To, + bool InRange) override; + SValBuilder &getSValBuilder() const { return SVB; } BasicValueFactory &getBasicVals() const { return SVB.getBasicValueFactory(); } SymbolManager &getSymbolManager() const { return SVB.getSymbolManager(); } diff --git a/clang/lib/StaticAnalyzer/Core/ConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/ConstraintManager.cpp index c29b3ab50a3c25..8a3ee5fb65040e 100644 --- a/clang/lib/StaticAnalyzer/Core/ConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/ConstraintManager.cpp @@ -71,8 +71,49 @@ ConstraintManager::assumeDual(ProgramStateRef State, DefinedSVal Cond) { return ProgramStatePair(StTrue, StFalse); } +ConstraintManager::ProgramStatePair +ConstraintManager::assumeInclusiveRangeDual(ProgramStateRef State, NonLoc Value, + const llvm::APSInt &From, + const llvm::APSInt &To) { + ProgramStateRef StInRange = + assumeInclusiveRangeInternal(State, Value, From, To, true); + if (!StInRange) { + ProgramStateRef StOutOfRange = + assumeInclusiveRangeInternal(State, Value, From, To, false); + if (LLVM_UNLIKELY(!StOutOfRange)) { // both infeasible + ProgramStateRef StInfeasible = State->cloneAsPosteriorlyOverconstrained(); + assert(StInfeasible->isPosteriorlyOverconstrained()); + // Checkers might rely on the API contract that both returned states + // cannot be null. Thus, we return StInfeasible for both branches because + // it might happen that a Checker uncoditionally uses one of them if the + // other is a nullptr. This may also happen with the non-dual and + // adjacent `assume(true)` and `assume(false)` calls. By implementing + // assume in therms of assumeDual, we can keep our API contract there as + // well. + return ProgramStatePair(StInfeasible, StInfeasible); + } + } + + ProgramStateRef StOutOfRange = + assumeInclusiveRangeInternal(State, Value, From, To, false); + if (!StOutOfRange) { + return ProgramStatePair(StInRange, nullptr); + } + + return ProgramStatePair(StInRange, StOutOfRange); +} + ProgramStateRef ConstraintManager::assume(ProgramStateRef State, DefinedSVal Cond, bool Assumption) { ConstraintManager::ProgramStatePair R = assumeDual(State, Cond); return Assumption ? R.first : R.second; } + +ProgramStateRef +ConstraintManager::assumeInclusiveRange(ProgramStateRef State, NonLoc Value, + const llvm::APSInt &From, + const llvm::APSInt &To, bool InBound) { + ConstraintManager::ProgramStatePair R = + assumeInclusiveRangeDual(State, Value, From, To); + return InBound ? R.first : R.second; +} diff --git a/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp index 0a9814a17860a5..7460b51a769751 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp @@ -91,7 +91,7 @@ ProgramStateRef SimpleConstraintManager::assumeAux(ProgramStateRef State, } // end switch } -ProgramStateRef SimpleConstraintManager::assumeInclusiveRange( +ProgramStateRef SimpleConstraintManager::assumeInclusiveRangeInternal( ProgramStateRef State, NonLoc Value, const llvm::APSInt &From, const llvm::APSInt &To, bool InRange) { From 96fba640cf58402b1015924f6582b14c6ac8cc32 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Thu, 19 May 2022 11:14:56 +0200 Subject: [PATCH 206/908] [analyzer][NFC] Factor out the copy-paste code repetition of assumeDual and assumeInclusiveRangeDual Depends on D125892. There might be efficiency and performance implications by using a lambda. Thus, I am going to conduct measurements to see if there is any noticeable impact. I've been thinking about two more alternatives: 1) Make `assumeDualImpl` a variadic template and (perfect) forward the arguments for the used `assume` function. 2) Use a macros. I have concerns though, whether these alternatives would deteriorate the readability of the code. Differential Revision: https://reviews.llvm.org/D125954 --- .../Core/PathSensitive/ConstraintManager.h | 4 ++ .../StaticAnalyzer/Core/ConstraintManager.cpp | 48 +++++++------------ 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h index bd92e7ff9af93b..755371f93b9c04 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h @@ -162,6 +162,10 @@ class ConstraintManager { /// Returns whether or not a symbol is known to be null ("true"), known to be /// non-null ("false"), or may be either ("underconstrained"). virtual ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym); + + template + ProgramStatePair assumeDualImpl(ProgramStateRef &State, + AssumeFunction &Assume); }; std::unique_ptr diff --git a/clang/lib/StaticAnalyzer/Core/ConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/ConstraintManager.cpp index 8a3ee5fb65040e..096266f75ce6c5 100644 --- a/clang/lib/StaticAnalyzer/Core/ConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/ConstraintManager.cpp @@ -42,12 +42,14 @@ ConditionTruthVal ConstraintManager::checkNull(ProgramStateRef State, return {}; } +template ConstraintManager::ProgramStatePair -ConstraintManager::assumeDual(ProgramStateRef State, DefinedSVal Cond) { - ProgramStateRef StTrue = assumeInternal(State, Cond, true); +ConstraintManager::assumeDualImpl(ProgramStateRef &State, + AssumeFunction &Assume) { + ProgramStateRef StTrue = Assume(true); if (!StTrue) { - ProgramStateRef StFalse = assumeInternal(State, Cond, false); + ProgramStateRef StFalse = Assume(false); if (LLVM_UNLIKELY(!StFalse)) { // both infeasible ProgramStateRef StInfeasible = State->cloneAsPosteriorlyOverconstrained(); assert(StInfeasible->isPosteriorlyOverconstrained()); @@ -63,7 +65,7 @@ ConstraintManager::assumeDual(ProgramStateRef State, DefinedSVal Cond) { return ProgramStatePair(nullptr, StFalse); } - ProgramStateRef StFalse = assumeInternal(State, Cond, false); + ProgramStateRef StFalse = Assume(false); if (!StFalse) { return ProgramStatePair(StTrue, nullptr); } @@ -71,36 +73,22 @@ ConstraintManager::assumeDual(ProgramStateRef State, DefinedSVal Cond) { return ProgramStatePair(StTrue, StFalse); } +ConstraintManager::ProgramStatePair +ConstraintManager::assumeDual(ProgramStateRef State, DefinedSVal Cond) { + auto AssumeFun = [&](bool Assumption) { + return assumeInternal(State, Cond, Assumption); + }; + return assumeDualImpl(State, AssumeFun); +} + ConstraintManager::ProgramStatePair ConstraintManager::assumeInclusiveRangeDual(ProgramStateRef State, NonLoc Value, const llvm::APSInt &From, const llvm::APSInt &To) { - ProgramStateRef StInRange = - assumeInclusiveRangeInternal(State, Value, From, To, true); - if (!StInRange) { - ProgramStateRef StOutOfRange = - assumeInclusiveRangeInternal(State, Value, From, To, false); - if (LLVM_UNLIKELY(!StOutOfRange)) { // both infeasible - ProgramStateRef StInfeasible = State->cloneAsPosteriorlyOverconstrained(); - assert(StInfeasible->isPosteriorlyOverconstrained()); - // Checkers might rely on the API contract that both returned states - // cannot be null. Thus, we return StInfeasible for both branches because - // it might happen that a Checker uncoditionally uses one of them if the - // other is a nullptr. This may also happen with the non-dual and - // adjacent `assume(true)` and `assume(false)` calls. By implementing - // assume in therms of assumeDual, we can keep our API contract there as - // well. - return ProgramStatePair(StInfeasible, StInfeasible); - } - } - - ProgramStateRef StOutOfRange = - assumeInclusiveRangeInternal(State, Value, From, To, false); - if (!StOutOfRange) { - return ProgramStatePair(StInRange, nullptr); - } - - return ProgramStatePair(StInRange, StOutOfRange); + auto AssumeFun = [&](bool Assumption) { + return assumeInclusiveRangeInternal(State, Value, From, To, Assumption); + }; + return assumeDualImpl(State, AssumeFun); } ProgramStateRef ConstraintManager::assume(ProgramStateRef State, From ff1681ddb303223973653f7f5f3f3435b48a1983 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Mon, 23 May 2022 08:03:23 +0100 Subject: [PATCH 207/908] [Support] Add missing header to Signals.h Without the change llvm build fails on this week's gcc-13 snapshot as: [ 0%] Building CXX object lib/Support/CMakeFiles/LLVMSupport.dir/Signals.cpp.o In file included from llvm/lib/Support/Signals.cpp:14: llvm/include/llvm/Support/Signals.h:119:8: error: variable or field 'CleanupOnSignal' declared void 119 | void CleanupOnSignal(uintptr_t Context); | ^~~~~~~~~~~~~~~ --- llvm/include/llvm/Support/Signals.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h index 44f5a750ff5cb0..937e0572d4a725 100644 --- a/llvm/include/llvm/Support/Signals.h +++ b/llvm/include/llvm/Support/Signals.h @@ -14,6 +14,7 @@ #ifndef LLVM_SUPPORT_SIGNALS_H #define LLVM_SUPPORT_SIGNALS_H +#include #include namespace llvm { From 5e9be93566f39ee6cecd579401e453eccfbe81e5 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Mon, 23 May 2022 08:39:48 +0100 Subject: [PATCH 208/908] [Support] Add missing header to Base64.h Without the change llvm build fails on this week's gcc-13 snapshot as: [ 91%] Building CXX object unittests/Support/CMakeFiles/SupportTests.dir/Base64Test.cpp.o In file included from llvm/unittests/Support/Base64Test.cpp:14: llvm/include/llvm/Support/Base64.h: In function 'std::string llvm::encodeBase64(const InputBytes&)': llvm/include/llvm/Support/Base64.h:29:5: error: 'uint32_t' was not declared in this scope 29 | uint32_t x = ((unsigned char)Bytes[i] << 16) | | ^~~~~~~~ --- llvm/include/llvm/Support/Base64.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/Support/Base64.h b/llvm/include/llvm/Support/Base64.h index 62064a35aa3448..da4ae1688574f9 100644 --- a/llvm/include/llvm/Support/Base64.h +++ b/llvm/include/llvm/Support/Base64.h @@ -13,6 +13,7 @@ #ifndef LLVM_SUPPORT_BASE64_H #define LLVM_SUPPORT_BASE64_H +#include #include namespace llvm { From 1b89a25a9b960886e486eb20b755634613c088f8 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 23 May 2022 15:23:00 +0800 Subject: [PATCH 209/908] [C++20] [Coroutines] Conform the updates for CWG issue 2585 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the updates in CWG issue 2585 https://cplusplus.github.io/CWG/issues/2585.html, we shouldn't find an allocation function with (size, p0, …, pn) in global scope. --- clang/lib/Sema/SemaCoroutine.cpp | 84 ++++++++++++++---------- clang/test/SemaCXX/coroutine-allocs2.cpp | 31 +++++++++ 2 files changed, 81 insertions(+), 34 deletions(-) create mode 100644 clang/test/SemaCXX/coroutine-allocs2.cpp diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index b43b0b3eb957cd..4281bb690ebff1 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1239,6 +1239,41 @@ bool CoroutineStmtBuilder::makeReturnOnAllocFailure() { return true; } +// Collect placement arguments for allocation function of coroutine FD. +// Return true if we collect placement arguments succesfully. Return false, +// otherwise. +static bool collectPlacementArgs(Sema &S, FunctionDecl &FD, SourceLocation Loc, + SmallVectorImpl &PlacementArgs) { + if (auto *MD = dyn_cast(&FD)) { + if (MD->isInstance() && !isLambdaCallOperator(MD)) { + ExprResult ThisExpr = S.ActOnCXXThis(Loc); + if (ThisExpr.isInvalid()) + return false; + ThisExpr = S.CreateBuiltinUnaryOp(Loc, UO_Deref, ThisExpr.get()); + if (ThisExpr.isInvalid()) + return false; + PlacementArgs.push_back(ThisExpr.get()); + } + } + + for (auto *PD : FD.parameters()) { + if (PD->getType()->isDependentType()) + continue; + + // Build a reference to the parameter. + auto PDLoc = PD->getLocation(); + ExprResult PDRefExpr = + S.BuildDeclRefExpr(PD, PD->getOriginalType().getNonReferenceType(), + ExprValueKind::VK_LValue, PDLoc); + if (PDRefExpr.isInvalid()) + return false; + + PlacementArgs.push_back(PDRefExpr.get()); + } + + return true; +} + bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // Form and check allocation and deallocation calls. assert(!IsPromiseDependentType && @@ -1255,13 +1290,7 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // allocated, followed by the coroutine function's arguments. If a matching // allocation function exists, use it. Otherwise, use an allocation function // that just takes the requested size. - - FunctionDecl *OperatorNew = nullptr; - FunctionDecl *OperatorDelete = nullptr; - FunctionDecl *UnusedResult = nullptr; - bool PassAlignment = false; - SmallVector PlacementArgs; - + // // [dcl.fct.def.coroutine]p9 // An implementation may need to allocate additional storage for a // coroutine. @@ -1288,31 +1317,12 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // and p_i denotes the i-th function parameter otherwise. For a non-static // member function, q_1 is an lvalue that denotes *this; any other q_i is an // lvalue that denotes the parameter copy corresponding to p_i. - if (auto *MD = dyn_cast(&FD)) { - if (MD->isInstance() && !isLambdaCallOperator(MD)) { - ExprResult ThisExpr = S.ActOnCXXThis(Loc); - if (ThisExpr.isInvalid()) - return false; - ThisExpr = S.CreateBuiltinUnaryOp(Loc, UO_Deref, ThisExpr.get()); - if (ThisExpr.isInvalid()) - return false; - PlacementArgs.push_back(ThisExpr.get()); - } - } - for (auto *PD : FD.parameters()) { - if (PD->getType()->isDependentType()) - continue; - // Build a reference to the parameter. - auto PDLoc = PD->getLocation(); - ExprResult PDRefExpr = - S.BuildDeclRefExpr(PD, PD->getOriginalType().getNonReferenceType(), - ExprValueKind::VK_LValue, PDLoc); - if (PDRefExpr.isInvalid()) - return false; - - PlacementArgs.push_back(PDRefExpr.get()); - } + FunctionDecl *OperatorNew = nullptr; + FunctionDecl *OperatorDelete = nullptr; + FunctionDecl *UnusedResult = nullptr; + bool PassAlignment = false; + SmallVector PlacementArgs; bool PromiseContainNew = [this, &PromiseType]() -> bool { DeclarationName NewName = @@ -1330,8 +1340,10 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // The allocation function's name is looked up by searching for it in the // scope of the promise type. // - If any declarations are found, ... - // - Otherwise, a search is performed in the global scope. - Sema::AllocationFunctionScope NewScope = PromiseContainNew ? Sema::AFS_Class : Sema::AFS_Global; + // - If no declarations are found in the scope of the promise type, a search + // is performed in the global scope. + Sema::AllocationFunctionScope NewScope = + PromiseContainNew ? Sema::AFS_Class : Sema::AFS_Global; S.FindAllocationFunctions(Loc, SourceRange(), NewScope, /*DeleteScope*/ Sema::AFS_Both, PromiseType, @@ -1339,13 +1351,17 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { OperatorNew, UnusedResult, /*Diagnose*/ false); }; + // We don't expect to call to global operator new with (size, p0, …, pn). + if (PromiseContainNew && !collectPlacementArgs(S, FD, Loc, PlacementArgs)) + return false; + LookupAllocationFunction(); // [dcl.fct.def.coroutine]p9 // If no viable function is found ([over.match.viable]), overload resolution // is performed again on a function call created by passing just the amount of // space required as an argument of type std::size_t. - if (!OperatorNew && !PlacementArgs.empty()) { + if (!OperatorNew && !PlacementArgs.empty() && PromiseContainNew) { PlacementArgs.clear(); LookupAllocationFunction(); } diff --git a/clang/test/SemaCXX/coroutine-allocs2.cpp b/clang/test/SemaCXX/coroutine-allocs2.cpp new file mode 100644 index 00000000000000..05b652c8fd4d58 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-allocs2.cpp @@ -0,0 +1,31 @@ +// Tests that we wouldn't generate an allocation call in global scope with (std::size_t, p0, ..., pn) +// Although this test generates codes, it aims to test the semantics. So it is put here. +// RUN: %clang_cc1 %s -std=c++20 -S -triple %itanium_abi_triple -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s +#include "Inputs/std-coroutine.h" + +namespace std { +typedef decltype(sizeof(int)) size_t; +} + +struct Allocator {}; + +struct resumable { + struct promise_type { + + resumable get_return_object() { return {}; } + auto initial_suspend() { return std::suspend_always(); } + auto final_suspend() noexcept { return std::suspend_always(); } + void unhandled_exception() {} + void return_void(){}; + }; +}; + +void *operator new(std::size_t, void*); + +resumable f1(void *) { + co_return; +} + +// CHECK: coro.alloc: +// CHECK-NEXT: [[SIZE:%.+]] = call i64 @llvm.coro.size.i64() +// CHECK-NEXT: call {{.*}} ptr @_Znwm(i64 noundef [[SIZE]]) From 6ef5e242f2f7a307c4f4ab9c9310410d1acba6a7 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 23 May 2022 08:55:54 +0100 Subject: [PATCH 210/908] [AArch64] Fix assumptions on input type of tryCombineFixedPointConvert It is possible for the input type to not be v2i64 or v4i32, so weaken the assertion to a return, fixing the crash in the new test. Fixes #55606 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- .../arm64-fixed-point-scalar-cvt-dagcombine.ll | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f3f10c60447e2d..e21de4c6270ffc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15109,7 +15109,7 @@ static SDValue tryCombineFixedPointConvert(SDNode *N, else if (Vec.getValueType() == MVT::v2i64) VecResTy = MVT::v2f64; else - llvm_unreachable("unexpected vector type!"); + return SDValue(); SDValue Convert = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); diff --git a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll index b068edc066760b..b580c4921fb66a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll @@ -40,8 +40,21 @@ entry: ret double %vcvtd_n_f64_s64 } +define float @do_stuff(<8 x i16> noundef %var_135) { +; CHECK-LABEL: do_stuff: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umaxv.8h h0, v0 +; CHECK-NEXT: ucvtf s0, s0, #1 +; CHECK-NEXT: ret +entry: + %vmaxv.i = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %var_135) #2 + %vcvts_n_f32_u32 = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %vmaxv.i, i32 1) + ret float %vcvts_n_f32_u32 +} + declare <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64>, <1 x i64>, i32) declare double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64, i32) declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>) declare i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double) - +declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>) #1 +declare float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32, i32) #1 From ade47bdc317bda57fe0f2a741e5bf271cfc44054 Mon Sep 17 00:00:00 2001 From: Peter Waller Date: Mon, 16 May 2022 20:59:17 +0000 Subject: [PATCH 211/908] [LV] Improve register pressure estimate at high VFs Previously, `getRegUsageForType` was implemented using `getTypeLegalizationCost`. `getRegUsageForType` is used by the loop vectorizer to estimate the register pressure caused by using a vector type. However, `getTypeLegalizationCost` currently only appears to understand splitting and not scalarization, so significantly underestimates the register requirements. Instead, use `getNumRegisters`, which understands when scalarization can occur (via computeRegisterProperties). This was discovered while investigating D118979 (Set maximum VF with shouldMaximizeVectorBandwidth), where under fixed-length 512-bit SVE the loop vectorizer previously ends up costing an v128i1 as 2 v64i* registers where it actually occupies 128 i32 registers. I'm sending this patch early for comment, I'm still doing some sanity checking with LNT. I note that getRegisterClassForType appears to return VectorRC even though the type in question (large vNi1 types) end up occupying scalar registers. That might be worth fixing too. Differential Revision: https://reviews.llvm.org/D125918 --- .../llvm/Analysis/TargetTransformInfo.h | 6 +- .../llvm/Analysis/TargetTransformInfoImpl.h | 2 +- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 7 +-- llvm/lib/Analysis/TargetTransformInfo.cpp | 2 +- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +- .../Target/RISCV/RISCVTargetTransformInfo.h | 2 +- .../Transforms/Vectorize/LoopVectorize.cpp | 10 +--- .../LoopVectorize/AArch64/i1-reg-usage.ll | 57 +++++++++++++++++++ .../LoopVectorize/X86/i1-reg-usage.ll | 32 +++++++++++ 9 files changed, 101 insertions(+), 19 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 376e1d13cdc109..4704697b40d2e2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -730,7 +730,7 @@ class TargetTransformInfo { bool isTypeLegal(Type *Ty) const; /// Returns the estimated number of registers required to represent \p Ty. - InstructionCost getRegUsageForType(Type *Ty) const; + unsigned getRegUsageForType(Type *Ty) const; /// Return true if switches should be turned into lookup tables for the /// target. @@ -1593,7 +1593,7 @@ class TargetTransformInfo::Concept { virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool useAA() = 0; virtual bool isTypeLegal(Type *Ty) = 0; - virtual InstructionCost getRegUsageForType(Type *Ty) = 0; + virtual unsigned getRegUsageForType(Type *Ty) = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool shouldBuildRelLookupTables() = 0; @@ -2032,7 +2032,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { } bool useAA() override { return Impl.useAA(); } bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } - InstructionCost getRegUsageForType(Type *Ty) override { + unsigned getRegUsageForType(Type *Ty) override { return Impl.getRegUsageForType(Ty); } bool shouldBuildLookupTables() override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index fe937adde411b5..383b91c7af13dc 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -312,7 +312,7 @@ class TargetTransformInfoImplBase { bool isTypeLegal(Type *Ty) const { return false; } - InstructionCost getRegUsageForType(Type *Ty) const { return 1; } + unsigned getRegUsageForType(Type *Ty) const { return 1; } bool shouldBuildLookupTables() const { return true; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a2b9860c46e6cd..c1b9533b6fc1eb 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -382,10 +382,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return getTLI()->isTypeLegal(VT); } - InstructionCost getRegUsageForType(Type *Ty) { - InstructionCost Val = getTLI()->getTypeLegalizationCost(DL, Ty).first; - assert(Val >= 0 && "Negative cost!"); - return Val; + unsigned getRegUsageForType(Type *Ty) { + EVT ETy = getTLI()->getValueType(DL, Ty); + return getTLI()->getNumRegisters(Ty->getContext(), ETy); } InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 384d95a5b70e5e..3e4cacf17a40dc 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -473,7 +473,7 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } -InstructionCost TargetTransformInfo::getRegUsageForType(Type *Ty) const { +unsigned TargetTransformInfo::getRegUsageForType(Type *Ty) const { return TTIImpl->getRegUsageForType(Ty); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 435d192d4912f1..1822640c74bae5 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -429,7 +429,7 @@ void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, BaseT::getPeelingPreferences(L, SE, PP); } -InstructionCost RISCVTTIImpl::getRegUsageForType(Type *Ty) { +unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) { TypeSize Size = Ty->getPrimitiveSizeInBits(); if (Ty->isVectorTy()) { if (Size.isScalable() && ST->hasVInstructions()) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index cdec074dc2a46b..bab157dbfef0c4 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -60,7 +60,7 @@ class RISCVTTIImpl : public BasicTTIImplBase { TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; - InstructionCost getRegUsageForType(Type *Ty); + unsigned getRegUsageForType(Type *Ty); InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f24622ee620a23..6a4159b06d577a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5987,16 +5987,10 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); - // A lambda that gets the register usage for the given type and VF. - const auto &TTICapture = TTI; - auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned { + auto GetRegUsage = [&TTI = TTI](Type *Ty, ElementCount VF) -> unsigned { if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty)) return 0; - InstructionCost::CostType RegUsage = - *TTICapture.getRegUsageForType(VectorType::get(Ty, VF)).getValue(); - assert(RegUsage >= 0 && RegUsage <= std::numeric_limits::max() && - "Nonsensical values for register usage."); - return RegUsage; + return TTI.getRegUsageForType(VectorType::get(Ty, VF)); }; for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll new file mode 100644 index 00000000000000..f0dc8e502769d2 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll @@ -0,0 +1,57 @@ +; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 < %s | FileCheck %s +; REQUIRES: asserts + +target triple = "aarch64" + +; Test that shows how many registers the loop vectorizer thinks an illegal will consume. + +; CHECK-LABEL: LV: Checking a loop in 'or_reduction_neon' from +; CHECK: LV(REG): VF = 32 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 72 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers + +define i1 @or_reduction_neon(i32 %arg, ptr %ptr) { +entry: + br label %loop +exit: + ret i1 %reduction_next +loop: + %induction = phi i32 [ 0, %entry ], [ %induction_next, %loop ] + %reduction = phi i1 [ 0, %entry ], [ %reduction_next, %loop ] + %gep = getelementptr inbounds i32, ptr %ptr, i32 %induction + %loaded = load i32, ptr %gep + %i1 = icmp eq i32 %loaded, %induction + %reduction_next = or i1 %i1, %reduction + %induction_next = add nuw i32 %induction, 1 + %cond = icmp eq i32 %induction_next, %arg + br i1 %cond, label %exit, label %loop, !llvm.loop !32 +} + +; CHECK-LABEL: LV: Checking a loop in 'or_reduction_sve' +; CHECK: LV(REG): VF = 64 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 136 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers + +define i1 @or_reduction_sve(i32 %arg, ptr %ptr) vscale_range(2,2) "target-features"="+sve" { +entry: + br label %loop +exit: + ret i1 %reduction_next +loop: + %induction = phi i32 [ 0, %entry ], [ %induction_next, %loop ] + %reduction = phi i1 [ true, %entry ], [ %reduction_next, %loop ] + %gep = getelementptr inbounds i32, ptr %ptr, i32 %induction + %loaded = load i32, ptr %gep + %i1 = icmp eq i32 %loaded, %induction + %reduction_next = or i1 %i1, %reduction + %induction_next = add nuw i32 %induction, 1 + %cond = icmp eq i32 %induction_next, %arg + br i1 %cond, label %exit, label %loop, !llvm.loop !64 +} + +!32 = distinct !{!32, !33} +!33 = !{!"llvm.loop.vectorize.width", i32 32} +!64 = distinct !{!64, !65} +!65 = !{!"llvm.loop.vectorize.width", i32 64} diff --git a/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll b/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll new file mode 100644 index 00000000000000..4cab716c75448a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll @@ -0,0 +1,32 @@ +; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 < %s | FileCheck %s +; REQUIRES: asserts + +target triple = "x86_64" + +; Test that shows how many registers the loop vectorizer thinks an illegal will consume. + +; CHECK-LABEL: LV: Checking a loop in 'or_reduction_avx' from +; CHECK: LV(REG): VF = 64 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 136 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers + +define i1 @or_reduction_avx(i32 %arg, ptr %ptr) "target-features"="+avx" { +entry: + br label %loop +exit: + ret i1 %reduction_next +loop: + %induction = phi i32 [ 0, %entry ], [ %induction_next, %loop ] + %reduction = phi i1 [ 0, %entry ], [ %reduction_next, %loop ] + %gep = getelementptr inbounds i32, ptr %ptr, i32 %induction + %loaded = load i32, ptr %gep + %i1 = icmp eq i32 %loaded, %induction + %reduction_next = or i1 %i1, %reduction + %induction_next = add nuw i32 %induction, 1 + %cond = icmp eq i32 %induction_next, %arg + br i1 %cond, label %exit, label %loop, !llvm.loop !64 +} + +!64 = distinct !{!64, !65} +!65 = !{!"llvm.loop.vectorize.width", i32 64} From 9e9cf3fa3d282644dd6802cbdf84f4ca6f932fa0 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 23 May 2022 16:21:42 +0800 Subject: [PATCH 212/908] Revert "[C++20] [Coroutines] Conform the updates for CWG issue 2585" This reverts commit 1b89a25a9b960886e486eb20b755634613c088f8. The test would fail in windows versions. --- clang/lib/Sema/SemaCoroutine.cpp | 84 ++++++++++-------------- clang/test/SemaCXX/coroutine-allocs2.cpp | 31 --------- 2 files changed, 34 insertions(+), 81 deletions(-) delete mode 100644 clang/test/SemaCXX/coroutine-allocs2.cpp diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 4281bb690ebff1..b43b0b3eb957cd 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1239,41 +1239,6 @@ bool CoroutineStmtBuilder::makeReturnOnAllocFailure() { return true; } -// Collect placement arguments for allocation function of coroutine FD. -// Return true if we collect placement arguments succesfully. Return false, -// otherwise. -static bool collectPlacementArgs(Sema &S, FunctionDecl &FD, SourceLocation Loc, - SmallVectorImpl &PlacementArgs) { - if (auto *MD = dyn_cast(&FD)) { - if (MD->isInstance() && !isLambdaCallOperator(MD)) { - ExprResult ThisExpr = S.ActOnCXXThis(Loc); - if (ThisExpr.isInvalid()) - return false; - ThisExpr = S.CreateBuiltinUnaryOp(Loc, UO_Deref, ThisExpr.get()); - if (ThisExpr.isInvalid()) - return false; - PlacementArgs.push_back(ThisExpr.get()); - } - } - - for (auto *PD : FD.parameters()) { - if (PD->getType()->isDependentType()) - continue; - - // Build a reference to the parameter. - auto PDLoc = PD->getLocation(); - ExprResult PDRefExpr = - S.BuildDeclRefExpr(PD, PD->getOriginalType().getNonReferenceType(), - ExprValueKind::VK_LValue, PDLoc); - if (PDRefExpr.isInvalid()) - return false; - - PlacementArgs.push_back(PDRefExpr.get()); - } - - return true; -} - bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // Form and check allocation and deallocation calls. assert(!IsPromiseDependentType && @@ -1290,7 +1255,13 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // allocated, followed by the coroutine function's arguments. If a matching // allocation function exists, use it. Otherwise, use an allocation function // that just takes the requested size. - // + + FunctionDecl *OperatorNew = nullptr; + FunctionDecl *OperatorDelete = nullptr; + FunctionDecl *UnusedResult = nullptr; + bool PassAlignment = false; + SmallVector PlacementArgs; + // [dcl.fct.def.coroutine]p9 // An implementation may need to allocate additional storage for a // coroutine. @@ -1317,12 +1288,31 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // and p_i denotes the i-th function parameter otherwise. For a non-static // member function, q_1 is an lvalue that denotes *this; any other q_i is an // lvalue that denotes the parameter copy corresponding to p_i. + if (auto *MD = dyn_cast(&FD)) { + if (MD->isInstance() && !isLambdaCallOperator(MD)) { + ExprResult ThisExpr = S.ActOnCXXThis(Loc); + if (ThisExpr.isInvalid()) + return false; + ThisExpr = S.CreateBuiltinUnaryOp(Loc, UO_Deref, ThisExpr.get()); + if (ThisExpr.isInvalid()) + return false; + PlacementArgs.push_back(ThisExpr.get()); + } + } + for (auto *PD : FD.parameters()) { + if (PD->getType()->isDependentType()) + continue; - FunctionDecl *OperatorNew = nullptr; - FunctionDecl *OperatorDelete = nullptr; - FunctionDecl *UnusedResult = nullptr; - bool PassAlignment = false; - SmallVector PlacementArgs; + // Build a reference to the parameter. + auto PDLoc = PD->getLocation(); + ExprResult PDRefExpr = + S.BuildDeclRefExpr(PD, PD->getOriginalType().getNonReferenceType(), + ExprValueKind::VK_LValue, PDLoc); + if (PDRefExpr.isInvalid()) + return false; + + PlacementArgs.push_back(PDRefExpr.get()); + } bool PromiseContainNew = [this, &PromiseType]() -> bool { DeclarationName NewName = @@ -1340,10 +1330,8 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // The allocation function's name is looked up by searching for it in the // scope of the promise type. // - If any declarations are found, ... - // - If no declarations are found in the scope of the promise type, a search - // is performed in the global scope. - Sema::AllocationFunctionScope NewScope = - PromiseContainNew ? Sema::AFS_Class : Sema::AFS_Global; + // - Otherwise, a search is performed in the global scope. + Sema::AllocationFunctionScope NewScope = PromiseContainNew ? Sema::AFS_Class : Sema::AFS_Global; S.FindAllocationFunctions(Loc, SourceRange(), NewScope, /*DeleteScope*/ Sema::AFS_Both, PromiseType, @@ -1351,17 +1339,13 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { OperatorNew, UnusedResult, /*Diagnose*/ false); }; - // We don't expect to call to global operator new with (size, p0, …, pn). - if (PromiseContainNew && !collectPlacementArgs(S, FD, Loc, PlacementArgs)) - return false; - LookupAllocationFunction(); // [dcl.fct.def.coroutine]p9 // If no viable function is found ([over.match.viable]), overload resolution // is performed again on a function call created by passing just the amount of // space required as an argument of type std::size_t. - if (!OperatorNew && !PlacementArgs.empty() && PromiseContainNew) { + if (!OperatorNew && !PlacementArgs.empty()) { PlacementArgs.clear(); LookupAllocationFunction(); } diff --git a/clang/test/SemaCXX/coroutine-allocs2.cpp b/clang/test/SemaCXX/coroutine-allocs2.cpp deleted file mode 100644 index 05b652c8fd4d58..00000000000000 --- a/clang/test/SemaCXX/coroutine-allocs2.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// Tests that we wouldn't generate an allocation call in global scope with (std::size_t, p0, ..., pn) -// Although this test generates codes, it aims to test the semantics. So it is put here. -// RUN: %clang_cc1 %s -std=c++20 -S -triple %itanium_abi_triple -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s -#include "Inputs/std-coroutine.h" - -namespace std { -typedef decltype(sizeof(int)) size_t; -} - -struct Allocator {}; - -struct resumable { - struct promise_type { - - resumable get_return_object() { return {}; } - auto initial_suspend() { return std::suspend_always(); } - auto final_suspend() noexcept { return std::suspend_always(); } - void unhandled_exception() {} - void return_void(){}; - }; -}; - -void *operator new(std::size_t, void*); - -resumable f1(void *) { - co_return; -} - -// CHECK: coro.alloc: -// CHECK-NEXT: [[SIZE:%.+]] = call i64 @llvm.coro.size.i64() -// CHECK-NEXT: call {{.*}} ptr @_Znwm(i64 noundef [[SIZE]]) From 0cc981e021eda2b9c14d41302c5f0409b0a42719 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 23 May 2022 09:39:00 +0100 Subject: [PATCH 213/908] [AArch64] implement isReassocProfitable, disable for (u|s)mlal. Currently reassociating add expressions can lead to failing to select (u|s)mlal. Implement isReassocProfitable to skip reassociating expressions that can be lowered to (u|s)mlal. The same issue exists for the *mlsl variants as well, but the DAG combiner doesn't use the isReassocProfitable hook before reassociating. To be fixed in a follow-up commit as this requires DAGCombiner changes as well. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D125895 --- .../Target/AArch64/AArch64ISelLowering.cpp | 44 +++++++++++++------ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 5 +++ llvm/test/CodeGen/AArch64/arm64-vmul.ll | 40 ++++++++--------- 3 files changed, 53 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e21de4c6270ffc..3aaadc767dbdc8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5468,6 +5468,36 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT( // Calling Convention Implementation //===----------------------------------------------------------------------===// +static unsigned getIntrinsicID(const SDNode *N) { + unsigned Opcode = N->getOpcode(); + switch (Opcode) { + default: + return Intrinsic::not_intrinsic; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return IID; + return Intrinsic::not_intrinsic; + } + } +} + +bool AArch64TargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0, + SDValue N1) const { + if (!N0.hasOneUse()) + return false; + + unsigned IID = getIntrinsicID(N1.getNode()); + // Avoid reassociating expressions that can be lowered to smlal/umlal. + if (IID == Intrinsic::aarch64_neon_umull || + N1.getOpcode() == AArch64ISD::UMULL || + IID == Intrinsic::aarch64_neon_smull || + N1.getOpcode() == AArch64ISD::SMULL) + return N0.getOpcode() != ISD::ADD; + + return true; +} + /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const { @@ -10692,20 +10722,6 @@ static bool isAllConstantBuildVector(const SDValue &PotentialBVec, return true; } -static unsigned getIntrinsicID(const SDNode *N) { - unsigned Opcode = N->getOpcode(); - switch (Opcode) { - default: - return Intrinsic::not_intrinsic; - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast(N->getOperand(0))->getZExtValue(); - if (IID < Intrinsic::num_intrinsics) - return IID; - return Intrinsic::not_intrinsic; - } - } -} - // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)), // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a // BUILD_VECTORs with constant element C1, C2 is a constant, and: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 52ead327d0bedc..f3f11bb43e1f30 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -494,6 +494,11 @@ class AArch64TargetLowering : public TargetLowering { explicit AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI); + /// Control the following reassociation of operands: (op (op x, c1), y) -> (op + /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. + bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, + SDValue N1) const override; + /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index f09d21a920ea7b..46fcaafc8b4436 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -388,12 +388,11 @@ define <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind define void @smlal8h_chain_with_constant(<8 x i16>* %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { ; CHECK-LABEL: smlal8h_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: smull.8h v0, v0, v2 -; CHECK-NEXT: mvn.8b v2, v2 ; CHECK-NEXT: movi.16b v3, #1 -; CHECK-NEXT: smlal.8h v0, v1, v2 -; CHECK-NEXT: add.8h v0, v0, v3 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: smlal.8h v3, v0, v2 +; CHECK-NEXT: mvn.8b v0, v2 +; CHECK-NEXT: smlal.8h v3, v1, v0 +; CHECK-NEXT: str q3, [x0] ; CHECK-NEXT: ret %xor = xor <8 x i8> %v3, %smull.1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v1, <8 x i8> %v3) @@ -407,13 +406,12 @@ define void @smlal8h_chain_with_constant(<8 x i16>* %dst, <8 x i8> %v1, <8 x i8> define void @smlal2d_chain_with_constant(<2 x i64>* %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { ; CHECK-LABEL: smlal2d_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: smull.2d v0, v0, v2 ; CHECK-NEXT: mov w8, #257 -; CHECK-NEXT: mvn.8b v2, v2 -; CHECK-NEXT: smlal.2d v0, v1, v2 -; CHECK-NEXT: dup.2d v1, x8 -; CHECK-NEXT: add.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: dup.2d v3, x8 +; CHECK-NEXT: smlal.2d v3, v0, v2 +; CHECK-NEXT: mvn.8b v0, v2 +; CHECK-NEXT: smlal.2d v3, v1, v0 +; CHECK-NEXT: str q3, [x0] ; CHECK-NEXT: ret %xor = xor <2 x i32> %v3, %smull.1 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v1, <2 x i32> %v3) @@ -671,12 +669,11 @@ define <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind define void @umlal8h_chain_with_constant(<8 x i16>* %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { ; CHECK-LABEL: umlal8h_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: umull.8h v0, v0, v2 -; CHECK-NEXT: mvn.8b v2, v2 ; CHECK-NEXT: movi.16b v3, #1 -; CHECK-NEXT: umlal.8h v0, v1, v2 -; CHECK-NEXT: add.8h v0, v0, v3 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: umlal.8h v3, v0, v2 +; CHECK-NEXT: mvn.8b v0, v2 +; CHECK-NEXT: umlal.8h v3, v1, v0 +; CHECK-NEXT: str q3, [x0] ; CHECK-NEXT: ret %xor = xor <8 x i8> %v3, %umull.1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v1, <8 x i8> %v3) @@ -690,13 +687,12 @@ define void @umlal8h_chain_with_constant(<8 x i16>* %dst, <8 x i8> %v1, <8 x i8> define void @umlal2d_chain_with_constant(<2 x i64>* %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { ; CHECK-LABEL: umlal2d_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: umull.2d v0, v0, v2 ; CHECK-NEXT: mov w8, #257 -; CHECK-NEXT: mvn.8b v2, v2 -; CHECK-NEXT: umlal.2d v0, v1, v2 -; CHECK-NEXT: dup.2d v1, x8 -; CHECK-NEXT: add.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: dup.2d v3, x8 +; CHECK-NEXT: umlal.2d v3, v0, v2 +; CHECK-NEXT: mvn.8b v0, v2 +; CHECK-NEXT: umlal.2d v3, v1, v0 +; CHECK-NEXT: str q3, [x0] ; CHECK-NEXT: ret %xor = xor <2 x i32> %v3, %umull.1 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v1, <2 x i32> %v3) From 572fc7d2fd14b768b58e697015b18c46cafb421c Mon Sep 17 00:00:00 2001 From: Andre Vieira Date: Mon, 23 May 2022 09:43:39 +0100 Subject: [PATCH 214/908] [AArch64] Order STP Q's by ascending address This patch adds an AArch64 specific PostRA MachineScheduler to try to schedule STP Q's to the same base-address in ascending order of offsets. We have found this to improve performance on Neoverse N1 and should not hurt other AArch64 cores. Differential Revision: https://reviews.llvm.org/D125377 --- .../AArch64/AArch64MachineScheduler.cpp | 39 +++++++++++++++ .../Target/AArch64/AArch64MachineScheduler.h | 33 ++++++++++++ .../Target/AArch64/AArch64TargetMachine.cpp | 7 ++- llvm/lib/Target/AArch64/CMakeLists.txt | 1 + .../call-translator-variadic-musttail.ll | 12 ++--- .../argument-blocks-array-of-struct.ll | 8 +-- .../CodeGen/AArch64/arm64-memset-inline.ll | 50 +++++++++---------- .../AArch64/ragreedy-local-interval-cost.ll | 12 ++--- .../AArch64/sve-fixed-length-fp-select.ll | 6 +-- .../llvm/lib/Target/AArch64/BUILD.gn | 1 + 10 files changed, 123 insertions(+), 46 deletions(-) create mode 100644 llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp create mode 100644 llvm/lib/Target/AArch64/AArch64MachineScheduler.h diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp new file mode 100644 index 00000000000000..ff15c0b07aa425 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp @@ -0,0 +1,39 @@ +//===- AArch64MachineScheduler.cpp - MI Scheduler for AArch64 -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AArch64MachineScheduler.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" + +using namespace llvm; + +bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand) { + bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand); + + if (Cand.isValid()) { + MachineInstr *Instr0 = TryCand.SU->getInstr(); + MachineInstr *Instr1 = Cand.SU->getInstr(); + // When dealing with two STPqi's. + if (Instr0 && Instr1 && Instr0->getOpcode() == Instr1->getOpcode () && + Instr0->getOpcode() == AArch64::STPQi) + { + MachineOperand &Base0 = Instr0->getOperand(2); + MachineOperand &Base1 = Instr1->getOperand(2); + int64_t Off0 = Instr0->getOperand(3).getImm(); + int64_t Off1 = Instr1->getOperand(3).getImm(); + // With the same base address and non-overlapping writes. + if (Base0.isIdenticalTo(Base1) && llabs (Off0 - Off1) >= 2) { + TryCand.Reason = NodeOrder; + // Order them by ascending offsets. + return Off0 < Off1; + } + } + } + + return OriginalResult; +} diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.h b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h new file mode 100644 index 00000000000000..23df015986d108 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h @@ -0,0 +1,33 @@ +//===- AArch64MachineScheduler.h - Custom AArch64 MI scheduler --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Custom AArch64 MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// A MachineSchedStrategy implementation for AArch64 post RA scheduling. +class AArch64PostRASchedStrategy : public PostGenericScheduler { +public: + AArch64PostRASchedStrategy(const MachineSchedContext *C) : + PostGenericScheduler(C) {} + +protected: + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; +}; + +} // end namespace llvm + +#endif + diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 2b73cd2aa4127c..bcfc3508055472 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -12,6 +12,7 @@ #include "AArch64TargetMachine.h" #include "AArch64.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64MachineScheduler.h" #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" #include "AArch64TargetObjectFile.h" @@ -474,15 +475,17 @@ class AArch64PassConfig : public TargetPassConfig { ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); + ScheduleDAGMI *DAG = + new ScheduleDAGMI(C, std::make_unique(C), + /* RemoveKillFlags=*/true); if (ST.hasFusion()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). - ScheduleDAGMI *DAG = createGenericSchedPostRA(C); DAG->addMutation(createAArch64MacroFusionDAGMutation()); return DAG; } - return nullptr; + return DAG; } void addIRPasses() override; diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt index aeedeb4eebac80..ca7d53dce2bb03 100644 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -65,6 +65,7 @@ add_llvm_target(AArch64CodeGen AArch64LoadStoreOptimizer.cpp AArch64LowerHomogeneousPrologEpilog.cpp AArch64MachineFunctionInfo.cpp + AArch64MachineScheduler.cpp AArch64MacroFusion.cpp AArch64MIPeepholeOpt.cpp AArch64MCInstLower.cpp diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll index 8cb0d5401b7def..2339157f5736c8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -62,11 +62,11 @@ define i32 @test_musttail_variadic_spill(i32 %arg0, ...) { ; CHECK-NEXT: mov x24, x5 ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 -; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill +; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 -; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill -; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill +; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill +; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: mov w0, w19 @@ -132,11 +132,11 @@ define void @f_thunk(i8* %this, ...) { ; CHECK-NEXT: mov x24, x5 ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 -; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill +; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 -; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill -; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill +; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill +; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: bl _get_f ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll index 2139d7043ab222..0b11b1555fb88e 100644 --- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll +++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll @@ -53,8 +53,8 @@ define [ 9 x double ] @array_9() { ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: str xzr, [x8, #64] -; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: stp q0, q0, [x8] +; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: ret ret [ 9 x double ] zeroinitializer } @@ -232,8 +232,8 @@ define [ 5 x %T_STRUCT_SAMEM ] @array_of_struct_in_memory() { ; CHECK-LABEL: array_of_struct_in_memory: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: stp q0, q0, [x8, #16] +; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret ret [ 5 x %T_STRUCT_SAMEM ] zeroinitializer @@ -350,8 +350,8 @@ define [ 2 x %T_NESTED_STRUCT_SAMEM ] @array_of_struct_nested_same_field_types_2 ; CHECK-LABEL: array_of_struct_nested_same_field_types_2: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: stp q0, q0, [x8, #16] +; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret ret [ 2 x %T_NESTED_STRUCT_SAMEM ] zeroinitializer @@ -440,8 +440,8 @@ define %T_IN_MEMORY @return_in_memory() { ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: str xzr, [x8, #64] -; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: stp q0, q0, [x8] +; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: ret ret %T_IN_MEMORY zeroinitializer } diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll index 1d59193f4d9156..1fe87a3fb5a04c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -52,8 +52,8 @@ define void @bzero_64_heap(i8* nocapture %c) { ; CHECK-LABEL: bzero_64_heap: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: stp q0, q0, [x0] +; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ret call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 64, i1 false) ret void @@ -230,8 +230,8 @@ define void @bzero_64_stack() { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 @@ -253,8 +253,8 @@ define void @bzero_72_stack() { ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -275,10 +275,10 @@ define void @bzero_128_stack() { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 @@ -300,14 +300,14 @@ define void @bzero_256_stack() { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #224] -; CHECK-NEXT: stp q0, q0, [sp, #192] -; CHECK-NEXT: stp q0, q0, [sp, #160] -; CHECK-NEXT: stp q0, q0, [sp, #128] -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #128] +; CHECK-NEXT: stp q0, q0, [sp, #160] +; CHECK-NEXT: stp q0, q0, [sp, #192] +; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 @@ -497,8 +497,8 @@ define void @memset_64_stack() { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 @@ -521,8 +521,8 @@ define void @memset_72_stack() { ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str x8, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -543,10 +543,10 @@ define void @memset_128_stack() { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 @@ -568,14 +568,14 @@ define void @memset_256_stack() { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #224] -; CHECK-NEXT: stp q0, q0, [sp, #192] -; CHECK-NEXT: stp q0, q0, [sp, #160] -; CHECK-NEXT: stp q0, q0, [sp, #128] -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #128] +; CHECK-NEXT: stp q0, q0, [sp, #160] +; CHECK-NEXT: stp q0, q0, [sp, #192] +; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll index b2c68148d0e56b..7570b8b03b8846 100644 --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -152,15 +152,11 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: stp q13, q12, [x8] ; CHECK-NEXT: stp q11, q10, [x8, #32] ; CHECK-NEXT: stp q9, q8, [x8, #64] -; CHECK-NEXT: stp q4, q15, [x8, #432] -; CHECK-NEXT: stp q14, q3, [x8, #464] -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: stp q31, q30, [x8, #96] ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: stp q29, q28, [x8, #144] +; CHECK-NEXT: stp q31, q30, [x8, #96] ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: stp q29, q28, [x8, #144] ; CHECK-NEXT: stp q27, q26, [x8, #176] -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: str q25, [x8, #208] ; CHECK-NEXT: stp q24, q23, [x8, #240] ; CHECK-NEXT: stp q22, q21, [x8, #272] @@ -168,7 +164,11 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: stp q18, q17, [x8, #336] ; CHECK-NEXT: stp q16, q7, [x8, #368] ; CHECK-NEXT: stp q6, q5, [x8, #400] +; CHECK-NEXT: stp q4, q15, [x8, #432] +; CHECK-NEXT: stp q14, q3, [x8, #464] +; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: str q2, [x8, #496] +; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore b8 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll index f863eee2b8654c..3539fcbf28b768 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll @@ -249,9 +249,9 @@ define void @select_v128f16(<128 x half>* %a, <128 x half>* %b, i1 %mask) #0 { ; NO_SVE-NEXT: ldr q26, [x1, #32] ; NO_SVE-NEXT: ldr q27, [x1, #16] ; NO_SVE-NEXT: ldr q11, [x1] +; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: stp q1, q0, [x0, #224] ; NO_SVE-NEXT: mov v0.16b, v8.16b -; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: mov v1.16b, v8.16b ; NO_SVE-NEXT: mov v2.16b, v8.16b ; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b @@ -531,9 +531,9 @@ define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, i1 %mask) #0 { ; NO_SVE-NEXT: ldr q26, [x1, #32] ; NO_SVE-NEXT: ldr q27, [x1, #16] ; NO_SVE-NEXT: ldr q11, [x1] +; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: stp q1, q0, [x0, #224] ; NO_SVE-NEXT: mov v0.16b, v8.16b -; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: mov v1.16b, v8.16b ; NO_SVE-NEXT: mov v2.16b, v8.16b ; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b @@ -813,9 +813,9 @@ define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, i1 %mask) #0 { ; NO_SVE-NEXT: ldr q26, [x1, #32] ; NO_SVE-NEXT: ldr q27, [x1, #16] ; NO_SVE-NEXT: ldr q11, [x1] +; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: stp q1, q0, [x0, #224] ; NO_SVE-NEXT: mov v0.16b, v8.16b -; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: mov v1.16b, v8.16b ; NO_SVE-NEXT: mov v2.16b, v8.16b ; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn index 57511fe983feb9..76b564822c76a6 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn @@ -131,6 +131,7 @@ static_library("LLVMAArch64CodeGen") { "AArch64MCInstLower.cpp", "AArch64MIPeepholeOpt.cpp", "AArch64MachineFunctionInfo.cpp", + "AArch64MachineScheduler.cpp", "AArch64MacroFusion.cpp", "AArch64PBQPRegAlloc.cpp", "AArch64PromoteConstant.cpp", From 5126c38012c1d00729549ff4e4cb9d2be66ab9ab Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 18 May 2022 15:34:12 +0200 Subject: [PATCH 215/908] [CGP] Freeze condition when despeculating ctlz/cttz Freeze the condition of the newly introduced conditional branch, to avoid immediate undefined behavior if the input to ctlz/cttz was originally poison. Differential Revision: https://reviews.llvm.org/D125887 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 8 ++++++-- llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll | 12 ++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9ed4f5bf432519..c71ca729391c80 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2038,7 +2038,8 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, return false; // Bail if the value is never zero. - if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL)) + Value *Op = CountZeros->getOperand(0); + if (isKnownNonZero(Op, *DL)) return false; // The intrinsic will be sunk behind a compare against zero and branch. @@ -2059,7 +2060,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, // Replace the unconditional branch that was created by the first split with // a compare against zero and a conditional branch. Value *Zero = Constant::getNullValue(Ty); - Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz"); + // Avoid introducing branch on poison. + if (!isGuaranteedNotToBeUndefOrPoison(Op)) + Op = Builder.CreateFreeze(Op, Op->getName() + ".fr"); + Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz"); Builder.CreateCondBr(Cmp, EndBlock, CallBlock); StartBlock->getTerminator()->eraseFromParent(); diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll index 7b1246e83ea576..53156ab7324ec4 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll @@ -13,7 +13,8 @@ target datalayout = "e-n32:64" define i64 @cttz(i64 %A) { ; SLOW-LABEL: @cttz( ; SLOW-NEXT: entry: -; SLOW-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A:%.*]], 0 +; SLOW-NEXT: [[A_FR:%.*]] = freeze i64 [[A:%.*]] +; SLOW-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0 ; SLOW-NEXT: br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] ; SLOW: cond.false: ; SLOW-NEXT: [[Z:%.*]] = call i64 @llvm.cttz.i64(i64 [[A]], i1 true) @@ -29,7 +30,8 @@ define i64 @cttz(i64 %A) { ; ; FAST_LZ-LABEL: @cttz( ; FAST_LZ-NEXT: entry: -; FAST_LZ-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A:%.*]], 0 +; FAST_LZ-NEXT: [[A_FR:%.*]] = freeze i64 [[A:%.*]] +; FAST_LZ-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0 ; FAST_LZ-NEXT: br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] ; FAST_LZ: cond.false: ; FAST_LZ-NEXT: [[Z:%.*]] = call i64 @llvm.cttz.i64(i64 [[A]], i1 true) @@ -46,7 +48,8 @@ entry: define i64 @ctlz(i64 %A) { ; SLOW-LABEL: @ctlz( ; SLOW-NEXT: entry: -; SLOW-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A:%.*]], 0 +; SLOW-NEXT: [[A_FR:%.*]] = freeze i64 [[A:%.*]] +; SLOW-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0 ; SLOW-NEXT: br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] ; SLOW: cond.false: ; SLOW-NEXT: [[Z:%.*]] = call i64 @llvm.ctlz.i64(i64 [[A]], i1 true) @@ -57,7 +60,8 @@ define i64 @ctlz(i64 %A) { ; ; FAST_TZ-LABEL: @ctlz( ; FAST_TZ-NEXT: entry: -; FAST_TZ-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A:%.*]], 0 +; FAST_TZ-NEXT: [[A_FR:%.*]] = freeze i64 [[A:%.*]] +; FAST_TZ-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0 ; FAST_TZ-NEXT: br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] ; FAST_TZ: cond.false: ; FAST_TZ-NEXT: [[Z:%.*]] = call i64 @llvm.ctlz.i64(i64 [[A]], i1 true) From 8e9528cb544a3d70cea96f1f99318643095a1410 Mon Sep 17 00:00:00 2001 From: LiaoChunyu Date: Mon, 23 May 2022 08:59:41 +0800 Subject: [PATCH 216/908] [RISCV][NFC] Test cases for fmuladd intrinsic These test cases are copy from fma Reviewed By: reames Differential Revision: https://reviews.llvm.org/D126049 --- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 88af4a90c13cc3..6dce5690c4e708 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -2266,3 +2266,118 @@ define void @round_v2f64(<2 x double>* %x) { ret void } declare <2 x double> @llvm.round.v2f64(<2 x double>) + +define void @fmuladd_v8f16(<8 x half>* %x, <8 x half>* %y, <8 x half>* %z) { +; CHECK-LABEL: fmuladd_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = load <8 x half>, <8 x half>* %y + %c = load <8 x half>, <8 x half>* %z + %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) + store <8 x half> %d, <8 x half>* %x + ret void +} +declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>) + +define void @fmuladd_v4f32(<4 x float>* %x, <4 x float>* %y, <4 x float>* %z) { +; CHECK-LABEL: fmuladd_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v9, (a1) +; CHECK-NEXT: vle32.v v10, (a2) +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = load <4 x float>, <4 x float>* %y + %c = load <4 x float>, <4 x float>* %z + %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) + store <4 x float> %d, <4 x float>* %x + ret void +} +declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) + +define void @fmuladd_v2f64(<2 x double>* %x, <2 x double>* %y, <2 x double>* %z) { +; CHECK-LABEL: fmuladd_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v9, (a1) +; CHECK-NEXT: vle64.v v10, (a2) +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vse64.v v10, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = load <2 x double>, <2 x double>* %y + %c = load <2 x double>, <2 x double>* %z + %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + store <2 x double> %d, <2 x double>* %x + ret void +} +declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) + +define void @fmsub_fmuladd_v8f16(<8 x half>* %x, <8 x half>* %y, <8 x half>* %z) { +; CHECK-LABEL: fmsub_fmuladd_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = load <8 x half>, <8 x half>* %y + %c = load <8 x half>, <8 x half>* %z + %neg = fneg <8 x half> %c + %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg) + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @fnmsub_fmuladd_v4f32(<4 x float>* %x, <4 x float>* %y, <4 x float>* %z) { +; CHECK-LABEL: fnmsub_fmuladd_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v9, (a1) +; CHECK-NEXT: vle32.v v10, (a2) +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = load <4 x float>, <4 x float>* %y + %c = load <4 x float>, <4 x float>* %z + %neg = fneg <4 x float> %a + %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c) + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @fnmadd_fmuladd_v2f64(<2 x double>* %x, <2 x double>* %y, <2 x double>* %z) { +; CHECK-LABEL: fnmadd_fmuladd_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v9, (a1) +; CHECK-NEXT: vle64.v v10, (a2) +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vse64.v v10, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = load <2 x double>, <2 x double>* %y + %c = load <2 x double>, <2 x double>* %z + %neg = fneg <2 x double> %b + %neg2 = fneg <2 x double> %c + %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2) + store <2 x double> %d, <2 x double>* %x + ret void +} From 8717b492dfcd12d6387543a2f8322e0cf9059982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Wed, 18 May 2022 10:31:41 +0200 Subject: [PATCH 217/908] [clang][driver] Dynamically select gcc-toolset/devtoolset version And pick the highest one, instead of adding all possibilities to the prefixes. Differential Revision: https://reviews.llvm.org/D125862 --- clang/lib/Driver/ToolChains/Gnu.cpp | 36 +++++++--- clang/unittests/Driver/ToolChainTest.cpp | 88 ++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 11 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 38280a2e36526d..29e6295103c7a2 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2130,17 +2130,31 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( // and gcc-toolsets. if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux && D.getVFS().exists("/opt/rh")) { - Prefixes.push_back("/opt/rh/gcc-toolset-11/root/usr"); - Prefixes.push_back("/opt/rh/gcc-toolset-10/root/usr"); - Prefixes.push_back("/opt/rh/devtoolset-11/root/usr"); - Prefixes.push_back("/opt/rh/devtoolset-10/root/usr"); - Prefixes.push_back("/opt/rh/devtoolset-9/root/usr"); - Prefixes.push_back("/opt/rh/devtoolset-8/root/usr"); - Prefixes.push_back("/opt/rh/devtoolset-7/root/usr"); - Prefixes.push_back("/opt/rh/devtoolset-6/root/usr"); - Prefixes.push_back("/opt/rh/devtoolset-4/root/usr"); - Prefixes.push_back("/opt/rh/devtoolset-3/root/usr"); - Prefixes.push_back("/opt/rh/devtoolset-2/root/usr"); + // Find the directory in /opt/rh/ starting with gcc-toolset-* or + // devtoolset-* with the highest version number and add that + // one to our prefixes. + std::string ChosenToolsetDir; + unsigned ChosenToolsetVersion = 0; + std::error_code EC; + for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin("/opt/rh", EC), + LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef ToolsetDir = llvm::sys::path::filename(LI->path()); + unsigned ToolsetVersion; + if ((!ToolsetDir.startswith("gcc-toolset-") && + !ToolsetDir.startswith("devtoolset-")) || + ToolsetDir.substr(ToolsetDir.rfind('-') + 1) + .getAsInteger(10, ToolsetVersion)) + continue; + + if (ToolsetVersion > ChosenToolsetVersion) { + ChosenToolsetVersion = ToolsetVersion; + ChosenToolsetDir = "/opt/rh/" + ToolsetDir.str(); + } + } + + if (ChosenToolsetVersion > 0) + Prefixes.push_back(ChosenToolsetDir); } // Fall back to /usr which is used by most non-Solaris systems. diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp index c652b093dc993b..4d2d938a50005c 100644 --- a/clang/unittests/Driver/ToolChainTest.cpp +++ b/clang/unittests/Driver/ToolChainTest.cpp @@ -610,4 +610,92 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DiagConsumer->clear(); } +TEST(ToolChainTest, Toolsets) { + IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); + IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); + struct TestDiagnosticConsumer : public DiagnosticConsumer {}; + + // Check (newer) GCC toolset installation. + { + IntrusiveRefCntPtr InMemoryFileSystem( + new llvm::vfs::InMemoryFileSystem); + + // These should be ignored + InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-2", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + InMemoryFileSystem->addFile("/opt/rh/gcc-toolset--", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + InMemoryFileSystem->addFile("/opt/rh/gcc-toolset--1", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + + // File needed for GCC installation detection. + InMemoryFileSystem->addFile( + "/opt/rh/gcc-toolset-12/lib/gcc/x86_64-redhat-linux/11/crtbegin.o", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + + DiagnosticsEngine Diags(DiagID, &*DiagOpts, new TestDiagnosticConsumer); + Driver TheDriver("/bin/clang", "x86_64-redhat-linux", Diags, + "clang LLVM compiler", InMemoryFileSystem); + std::unique_ptr C(TheDriver.BuildCompilation({"-v"})); + ASSERT_TRUE(C); + std::string S; + { + llvm::raw_string_ostream OS(S); + C->getDefaultToolChain().printVerboseInfo(OS); + } + if (is_style_windows(llvm::sys::path::Style::native)) + std::replace(S.begin(), S.end(), '\\', '/'); + EXPECT_EQ("Found candidate GCC installation: " + "/opt/rh/gcc-toolset-12/lib/gcc/x86_64-redhat-linux/11\n" + "Selected GCC installation: " + "/opt/rh/gcc-toolset-12/lib/gcc/x86_64-redhat-linux/11\n" + "Candidate multilib: .;@m64\n" + "Selected multilib: .;@m64\n", + S); + } + + // And older devtoolset. + { + IntrusiveRefCntPtr InMemoryFileSystem( + new llvm::vfs::InMemoryFileSystem); + + // These should be ignored + InMemoryFileSystem->addFile("/opt/rh/devtoolset-2", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + InMemoryFileSystem->addFile("/opt/rh/devtoolset-", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + InMemoryFileSystem->addFile("/opt/rh/devtoolset--", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + InMemoryFileSystem->addFile("/opt/rh/devtoolset--1", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + + // File needed for GCC installation detection. + InMemoryFileSystem->addFile( + "/opt/rh/devtoolset-12/lib/gcc/x86_64-redhat-linux/11/crtbegin.o", 0, + llvm::MemoryBuffer::getMemBuffer("\n")); + + DiagnosticsEngine Diags(DiagID, &*DiagOpts, new TestDiagnosticConsumer); + Driver TheDriver("/bin/clang", "x86_64-redhat-linux", Diags, + "clang LLVM compiler", InMemoryFileSystem); + std::unique_ptr C(TheDriver.BuildCompilation({"-v"})); + ASSERT_TRUE(C); + std::string S; + { + llvm::raw_string_ostream OS(S); + C->getDefaultToolChain().printVerboseInfo(OS); + } + if (is_style_windows(llvm::sys::path::Style::native)) + std::replace(S.begin(), S.end(), '\\', '/'); + EXPECT_EQ("Found candidate GCC installation: " + "/opt/rh/devtoolset-12/lib/gcc/x86_64-redhat-linux/11\n" + "Selected GCC installation: " + "/opt/rh/devtoolset-12/lib/gcc/x86_64-redhat-linux/11\n" + "Candidate multilib: .;@m64\n" + "Selected multilib: .;@m64\n", + S); + } +} + } // end anonymous namespace. From dd231f02a3eef7277ded7799eeac337faa664374 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 23 May 2022 11:48:24 +0100 Subject: [PATCH 218/908] [AArch64] Regenerate andandshift.ll test checks --- llvm/test/CodeGen/AArch64/andandshift.ll | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/andandshift.ll b/llvm/test/CodeGen/AArch64/andandshift.ll index e6019b36d5974d..00990ef4f5db91 100644 --- a/llvm/test/CodeGen/AArch64/andandshift.ll +++ b/llvm/test/CodeGen/AArch64/andandshift.ll @@ -1,11 +1,15 @@ -; RUN: llc -O3 < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 < %s | FileCheck %s + target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "arm64--linux-gnu" ; Function Attrs: nounwind readnone define i32 @test1(i8 %a) { -; CHECK-LABEL: @test1 -; CHECK: ubfx {{w[0-9]+}}, w0, #3, #5 +; CHECK-LABEL: test1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ubfx w0, w0, #3, #5 +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i32 %shr1 = lshr i32 %conv, 3 @@ -14,9 +18,13 @@ entry: ; Function Attrs: nounwind readnone define i32 @test2(i8 %a) { -; CHECK-LABEL: @test2 -; CHECK: and {{w[0-9]+}}, w0, #0xff -; CHECK: ubfx {{w[0-9]+}}, w0, #3, #5 +; CHECK-LABEL: test2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: ubfx w9, w0, #3, #5 +; CHECK-NEXT: cmp w8, #47 +; CHECK-NEXT: csel w0, w9, w8, hi +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i32 %cmp = icmp ugt i8 %a, 47 From c5e5cf12583895396269f2a56c78e4464bd143da Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Mon, 23 May 2022 11:18:30 +0100 Subject: [PATCH 219/908] Test stackmap support for i128 This diff adds tests that check the currently-working stackmap cases for i128. This will help ensure no regressions are later introduced by D125680 (when ready). Note that i128 stackmap support is currently incomplete, so we cant test all i128 functionality: i128 constants >= 2^{63} crash LLVM non-constant i128s crash LLVM So this change tests only constant i128 operands of value < 2^{63}. A couple of incorrect comments are also fixed. --- llvm/test/CodeGen/AArch64/arm64-stackmap.ll | 25 ++++++++++++++++----- llvm/test/CodeGen/AArch64/stackmap.ll | 21 ++++++++++++++--- llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll | 25 ++++++++++++++++----- llvm/test/CodeGen/SystemZ/stackmap.ll | 21 ++++++++++++++--- llvm/test/CodeGen/X86/stackmap.ll | 21 ++++++++++++++--- 5 files changed, 94 insertions(+), 19 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-stackmap.ll b/llvm/test/CodeGen/AArch64/arm64-stackmap.ll index e12a35a93e2243..59e2b1cb3473cf 100644 --- a/llvm/test/CodeGen/AArch64/arm64-stackmap.ll +++ b/llvm/test/CodeGen/AArch64/arm64-stackmap.ll @@ -16,7 +16,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; Num Functions ; CHECK-NEXT: .long 11 ; Num LargeConstants -; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 3 ; Num Callsites ; CHECK-NEXT: .long 11 @@ -58,13 +58,14 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; Num LargeConstants ; CHECK-NEXT: .quad 4294967295 ; CHECK-NEXT: .quad 4294967296 +; CHECK-NEXT: .quad 4294967297 ; Constant arguments ; ; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .long L{{.*}}-_constantargs ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 4 +; CHECK-NEXT: .short 6 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 @@ -79,25 +80,39 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 65536 -; SmallConstant +; LargeConstant at index 0 ; CHECK-NEXT: .byte 5 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 -; LargeConstant at index 0 +; LargeConstant at index 1 ; CHECK-NEXT: .byte 5 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 1 +; SmallConstant +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 66 +; LargeConstant at index 2 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 2 define void @constantargs() { entry: %0 = inttoptr i64 244837814094590 to i8* - tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 20, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296) + tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 20, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296, i128 66, i128 4294967297) ret void } diff --git a/llvm/test/CodeGen/AArch64/stackmap.ll b/llvm/test/CodeGen/AArch64/stackmap.ll index 8a58f9e6bc471b..601e3f7dbb6bfa 100644 --- a/llvm/test/CodeGen/AArch64/stackmap.ll +++ b/llvm/test/CodeGen/AArch64/stackmap.ll @@ -11,7 +11,7 @@ ; Num Functions ; CHECK-NEXT: .word 14 ; Num LargeConstants -; CHECK-NEXT: .word 3 +; CHECK-NEXT: .word 4 ; Num Callsites ; CHECK-NEXT: .word 18 @@ -63,6 +63,7 @@ ; CHECK-NEXT: .xword 2147483648 ; CHECK-NEXT: .xword 4294967295 ; CHECK-NEXT: .xword 4294967296 +; CHECK-NEXT: .xword 4294967297 ; Callsites ; Constant arguments @@ -70,7 +71,7 @@ ; CHECK-NEXT: .xword 1 ; CHECK-NEXT: .word .L{{.*}}-constantargs ; CHECK-NEXT: .hword 0 -; CHECK-NEXT: .hword 12 +; CHECK-NEXT: .hword 14 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 @@ -155,11 +156,25 @@ ; CHECK-NEXT: .hword 0 ; CHECK-NEXT: .hword 0 ; CHECK-NEXT: .word -1 +; SmallConstant +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 8 +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .word 66 +; LargeConstant at index 2 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 8 +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .word 3 define void @constantargs() { entry: %0 = inttoptr i64 12345 to i8* - tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 16, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1) + tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 16, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1, i128 66, i128 4294967297) ret void } diff --git a/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll b/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll index 8b2466bc06014c..6bb01492a95a6c 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll @@ -50,7 +50,7 @@ target triple = "powerpc64-unknown-linux-gnu" ; Num Functions ; CHECK-NEXT: .long 11 ; Num LargeConstants -; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 3 ; Num Callsites ; CHECK-NEXT: .long 11 @@ -92,13 +92,14 @@ target triple = "powerpc64-unknown-linux-gnu" ; Num LargeConstants ; CHECK-NEXT: .quad 4294967295 ; CHECK-NEXT: .quad 4294967296 +; CHECK-NEXT: .quad 4294967297 ; Constant arguments ; ; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .long .L{{.*}}-.L[[constantargs_BEGIN]] ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 4 +; CHECK-NEXT: .short 6 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 @@ -113,25 +114,39 @@ target triple = "powerpc64-unknown-linux-gnu" ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 65536 -; SmallConstant +; LargeConstant at index 0 ; CHECK-NEXT: .byte 5 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 -; LargeConstant at index 0 +; LargeConstant at index 1 ; CHECK-NEXT: .byte 5 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 1 +; SmallConstant +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 66 +; LargeConstant at index 2 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 2 define void @constantargs() { entry: %0 = inttoptr i64 244837814094590 to i8* - tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 40, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296) + tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 40, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296, i128 66, i128 4294967297) ret void } diff --git a/llvm/test/CodeGen/SystemZ/stackmap.ll b/llvm/test/CodeGen/SystemZ/stackmap.ll index bf1a2e3dc241cf..45c09d088d589b 100644 --- a/llvm/test/CodeGen/SystemZ/stackmap.ll +++ b/llvm/test/CodeGen/SystemZ/stackmap.ll @@ -11,7 +11,7 @@ ; Num Functions ; CHECK-NEXT: .long 15 ; Num LargeConstants -; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long 4 ; Num Callsites ; CHECK-NEXT: .long 19 @@ -66,6 +66,7 @@ ; CHECK-NEXT: .quad 2147483648 ; CHECK-NEXT: .quad 4294967295 ; CHECK-NEXT: .quad 4294967296 +; CHECK-NEXT: .quad 4294967297 ; Callsites ; Constant arguments @@ -73,7 +74,7 @@ ; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .long .L{{.*}}-constantargs ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 12 +; CHECK-NEXT: .short 14 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 @@ -158,11 +159,25 @@ ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long -1 +; SmallConstant +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 66 +; LargeConstant at index 3 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 3 define void @constantargs() { entry: %0 = inttoptr i64 12345 to i8* - tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 14, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1) + tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 14, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1, i128 66, i128 4294967297) ret void } diff --git a/llvm/test/CodeGen/X86/stackmap.ll b/llvm/test/CodeGen/X86/stackmap.ll index 601100bd570540..3db2865ff44679 100644 --- a/llvm/test/CodeGen/X86/stackmap.ll +++ b/llvm/test/CodeGen/X86/stackmap.ll @@ -11,7 +11,7 @@ ; Num Functions ; CHECK-NEXT: .long 16 ; Num LargeConstants -; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long 4 ; Num Callsites ; CHECK-NEXT: .long 20 @@ -69,6 +69,7 @@ ; CHECK-NEXT: .quad 2147483648 ; CHECK-NEXT: .quad 4294967295 ; CHECK-NEXT: .quad 4294967296 +; CHECK-NEXT: .quad 4294967297 ; Callsites ; Constant arguments @@ -76,7 +77,7 @@ ; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .long L{{.*}}-_constantargs ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 12 +; CHECK-NEXT: .short 14 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 @@ -161,11 +162,25 @@ ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long -1 +; SmallConstant +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 66 +; LargeConstant at index 3 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 3 define void @constantargs() { entry: %0 = inttoptr i64 12345 to i8* - tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 15, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1) + tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 15, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1, i128 66, i128 4294967297) ret void } From 21843d96e0553ca882c86a34f11802da0cd2b369 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 23 May 2022 12:04:43 +0100 Subject: [PATCH 220/908] [AMDGPU] Remove unneeded regex escaping in FileCheck patterns These must have crept in since D117298 was landed. --- .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll | 10 ++-- .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll | 56 +++++++++---------- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll | 2 +- llvm/test/CodeGen/AMDGPU/rel32.ll | 2 +- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll index 4c6955a6f554a3..72a43274a435ca 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll @@ -17,7 +17,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1)* %arg) #0 { @@ -36,7 +36,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) #0 { @@ -55,7 +55,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) #0 { @@ -74,7 +74,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) #0 { @@ -93,7 +93,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll index 14e419d5ce5420..23f6f2cb54dd7e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll @@ -20,7 +20,7 @@ declare <16 x i32> @llvm.amdgcn.smfmac.i32.32x32x32.i8(<2 x i32>, <4 x i32>, <16 ; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 ; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_i32_16x16x32_i8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_i32_16x16x32_i8 a[{{[0-9]+:[0-9]+}}], v[[[TWO]]:[[ONE]]], v[[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_i32_16x16x32_i8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -38,7 +38,7 @@ bb: ; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 ; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_i32_32x32x16_i8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_i32_32x32x16_i8 a[{{[0-9]+:[0-9]+}}], v[[[TWO]]:[[ONE]]], v[[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_i32_32x32x16_i8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -56,7 +56,7 @@ bb: ; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000 ; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:[[TWO]]], v{{\[}}[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -74,7 +74,7 @@ bb: ; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000 ; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:[[TWO]]], v{{\[}}[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -87,13 +87,13 @@ bb: } ; GCN-LABEL: {{^}}test_smfmac_f32_16x16x32_f16: -; GCN: s_load_dwordx4 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} -; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]]{{\[}}[[RLO:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SLO]]:{{[0-9]+}}]{{$}} +; GCN: s_load_dwordx4 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} +; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]][[[RLO:[0-9]+]]:{{[0-9]+}}], s[[[SLO]]:{{[0-9]+}}]{{$}} ; VGPRCD-DAG: v_mov_b64_e32 v[{{[0-9]+}}:[[RHI:[0-9]+]]], s[{{[0-9]+}}:[[SHI]]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 [[CD:a]][[RLO:[0-9]+]], s[[SLO]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} -; GCN: v_smfmac_f32_16x16x32_f16 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 -; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]] +; GCN: v_smfmac_f32_16x16x32_f16 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 +; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:[[RHI]]] define amdgpu_kernel void @test_smfmac_f32_16x16x32_f16(<4 x float> addrspace(1)* %arg, <4 x half> %a, <8 x half> %b, i32 %idx) #0 { bb: %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg @@ -103,13 +103,13 @@ bb: } ; GCN-LABEL: {{^}}test_smfmac_f32_32x32x16_f16: -; GCN: s_load_dwordx16 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} -; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]]{{\[}}[[RLO:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SLO]]:{{[0-9]+}}]{{$}} +; GCN: s_load_dwordx16 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} +; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]][[[RLO:[0-9]+]]:{{[0-9]+}}], s[[[SLO]]:{{[0-9]+}}]{{$}} ; VGPRCD-DAG: v_mov_b64_e32 v[{{[0-9]+}}:[[RHI:[0-9]+]]], s[{{[0-9]+}}:[[SHI]]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 [[CD:a]][[RLO:[0-9]+]], s[[SLO]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} -; GCN: v_smfmac_f32_32x32x16_f16 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 -; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:{{[0-9]+}}], s[{{[0-9:]+}}]{{$}} +; GCN: v_smfmac_f32_32x32x16_f16 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 +; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:{{[0-9]+}}], s[{{[0-9:]+}}]{{$}} ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 @@ -122,13 +122,13 @@ bb: } ; GCN-LABEL: {{^}}test_smfmac_f32_16x16x32_bf16: -; GCN: s_load_dwordx4 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} -; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]]{{\[}}[[RLO:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SLO]]:{{[0-9]+}}]{{$}} +; GCN: s_load_dwordx4 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} +; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]][[[RLO:[0-9]+]]:{{[0-9]+}}], s[[[SLO]]:{{[0-9]+}}]{{$}} ; VGPRCD-DAG: v_mov_b64_e32 v[{{[0-9]+}}:[[RHI:[0-9]+]]], s[{{[0-9]+}}:[[SHI]]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 [[CD:a]][[RLO:[0-9]+]], s[[SLO]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} -; GCN: v_smfmac_f32_16x16x32_bf16 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 -; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]] +; GCN: v_smfmac_f32_16x16x32_bf16 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 +; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:[[RHI]]] define amdgpu_kernel void @test_smfmac_f32_16x16x32_bf16(<4 x float> addrspace(1)* %arg, <4 x i16> %a, <8 x i16> %b, i32 %idx) #0 { bb: %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg @@ -138,13 +138,13 @@ bb: } ; GCN-LABEL: {{^}}test_smfmac_f32_32x32x16_bf16: -; GCN: s_load_dwordx16 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} -; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]]{{\[}}[[RLO:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SLO]]:{{[0-9]+}}]{{$}} +; GCN: s_load_dwordx16 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} +; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]][[[RLO:[0-9]+]]:{{[0-9]+}}], s[[[SLO]]:{{[0-9]+}}]{{$}} ; VGPRCD-DAG: v_mov_b64_e32 v[{{[0-9]+}}:[[RHI:[0-9]+]]], s[{{[0-9]+}}:[[SHI]]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 [[CD:a]][[RLO:[0-9]+]], s[[SLO]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} -; GCN: v_smfmac_f32_32x32x16_bf16 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 -; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:{{[0-9]+}}], s[{{[0-9:]+}}]{{$}} +; GCN: v_smfmac_f32_32x32x16_bf16 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 +; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:{{[0-9]+}}], s[{{[0-9:]+}}]{{$}} ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 @@ -157,13 +157,13 @@ bb: } ; GCN-LABEL: {{^}}test_smfmac_i32_16x16x64_i8: -; GCN: s_load_dwordx4 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} -; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]]{{\[}}[[RLO:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SLO]]:{{[0-9]+}}]{{$}} +; GCN: s_load_dwordx4 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} +; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]][[[RLO:[0-9]+]]:{{[0-9]+}}], s[[[SLO]]:{{[0-9]+}}]{{$}} ; VGPRCD-DAG: v_mov_b64_e32 v[{{[0-9]+}}:[[RHI:[0-9]+]]], s[{{[0-9]+}}:[[SHI]]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 [[CD:a]][[RLO:[0-9]+]], s[[SLO]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} -; GCN: v_smfmac_i32_16x16x64_i8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 -; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]] +; GCN: v_smfmac_i32_16x16x64_i8 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 +; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:[[RHI]]] define amdgpu_kernel void @test_smfmac_i32_16x16x64_i8(<4 x i32> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: %in.1 = load <4 x i32>, <4 x i32> addrspace(1)* %arg @@ -173,13 +173,13 @@ bb: } ; GCN-LABEL: {{^}}test_smfmac_i32_32x32x32_i8: -; GCN: s_load_dwordx16 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} -; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]]{{\[}}[[RLO:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SLO]]:{{[0-9]+}}]{{$}} +; GCN: s_load_dwordx16 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} +; VGPRCD-DAG: v_mov_b64_e32 [[CD:v]][[[RLO:[0-9]+]]:{{[0-9]+}}], s[[[SLO]]:{{[0-9]+}}]{{$}} ; VGPRCD-DAG: v_mov_b64_e32 v[{{[0-9]+}}:[[RHI:[0-9]+]]], s[{{[0-9]+}}:[[SHI]]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 [[CD:a]][[RLO:[0-9]+]], s[[SLO]]{{$}} ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} -; GCN: v_smfmac_i32_32x32x32_i8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 -; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:{{[0-9]+}}], s[{{[0-9:]+}}]{{$}} +; GCN: v_smfmac_i32_32x32x32_i8 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 +; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:{{[0-9]+}}], s[{{[0-9:]+}}]{{$}} ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll index 71cc85a24aa35b..5dabf3d7d9021c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -449,7 +449,7 @@ bb: ; NOLIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9:]+}}] ; LIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], 1.0 ; GFX90A: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], 1.0 -; GFX940: v_mfma_f32_32x32x8_f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], 1.0 +; GFX940: v_mfma_f32_32x32x8_f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], 1.0 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 diff --git a/llvm/test/CodeGen/AMDGPU/rel32.ll b/llvm/test/CodeGen/AMDGPU/rel32.ll index ee73a127330220..2765e0bb09ca41 100644 --- a/llvm/test/CodeGen/AMDGPU/rel32.ll +++ b/llvm/test/CodeGen/AMDGPU/rel32.ll @@ -4,7 +4,7 @@ @g = protected local_unnamed_addr addrspace(4) externally_initialized global i32 0, align 4 ; CHECK-LABEL: rel32_neg_offset: -; CHECK: s_getpc_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]{{]}} +; CHECK: s_getpc_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]] ; CHECK-NEXT: s_add_u32 s[[LO]], s[[LO]], g@rel32@lo-4 ; CHECK-NEXT: s_addc_u32 s[[HI]], s[[HI]], g@rel32@hi+4 define i32 addrspace(4)* @rel32_neg_offset() { From a02000611a8fc4843f7a3077d8bb4840ef08de8b Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 23 May 2022 13:47:13 +0200 Subject: [PATCH 221/908] Add support of the next Debian (Debian 13 - Trixie) --- clang/include/clang/Driver/Distro.h | 3 ++- clang/lib/Driver/Distro.cpp | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index 2723f75e894583..c31fbd4e93f1aa 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -38,6 +38,7 @@ class Distro { DebianBuster, DebianBullseye, DebianBookworm, + DebianTrixie, Exherbo, RHEL5, RHEL6, @@ -121,7 +122,7 @@ class Distro { bool IsOpenSUSE() const { return DistroVal == OpenSUSE; } bool IsDebian() const { - return DistroVal >= DebianLenny && DistroVal <= DebianBookworm; + return DistroVal >= DebianLenny && DistroVal <= DebianTrixie; } bool IsUbuntu() const { diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 5ac38c34d11288..b0e2f3ac94c754 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -153,6 +153,8 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { return Distro::DebianBullseye; case 12: return Distro::DebianBookworm; + case 13: + return Distro::DebianTrixie; default: return Distro::UnknownDistro; } From 6f4dc5dae60598e71c8211b0c1a705474b0a223f Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 23 May 2022 13:49:34 +0200 Subject: [PATCH 222/908] Add support of the next Ubuntu (Ubuntu 22.10 - Kinetic Kudu) --- clang/include/clang/Driver/Distro.h | 3 ++- clang/lib/Driver/Distro.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index c31fbd4e93f1aa..01d66b30b0386d 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -75,6 +75,7 @@ class Distro { UbuntuHirsute, UbuntuImpish, UbuntuJammy, + UbuntuKinetic, UnknownDistro }; @@ -126,7 +127,7 @@ class Distro { } bool IsUbuntu() const { - return DistroVal >= UbuntuHardy && DistroVal <= UbuntuJammy; + return DistroVal >= UbuntuHardy && DistroVal <= UbuntuKinetic; } bool IsAlpineLinux() const { return DistroVal == AlpineLinux; } diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index b0e2f3ac94c754..1898667279cc3b 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -91,6 +91,7 @@ static Distro::DistroType DetectLsbRelease(llvm::vfs::FileSystem &VFS) { .Case("hirsute", Distro::UbuntuHirsute) .Case("impish", Distro::UbuntuImpish) .Case("jammy", Distro::UbuntuJammy) + .Case("kinetic", Distro::UbuntuKinetic) .Default(Distro::UnknownDistro); return Version; } From 818cc9b285e8577d1255f8b046f87fc487fd3bd0 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 23 May 2022 15:48:47 +0300 Subject: [PATCH 223/908] [AMDGPU][MC][GFX940] Disable v_mac_f32_dpp Differential Revision: https://reviews.llvm.org/D126070 --- llvm/lib/Target/AMDGPU/VOPInstructions.td | 1 + llvm/test/MC/AMDGPU/gfx940_err.s | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index bb92333304c236..78f9beae49f97d 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -708,6 +708,7 @@ class VOP_DPP_Real : let isConvergent = ps.isConvergent; let SubtargetPredicate = ps.SubtargetPredicate; let AssemblerPredicate = ps.AssemblerPredicate; + let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let UseNamedOperandTable = ps.UseNamedOperandTable; diff --git a/llvm/test/MC/AMDGPU/gfx940_err.s b/llvm/test/MC/AMDGPU/gfx940_err.s index e425f23e83f8f9..da38629c5afc2e 100644 --- a/llvm/test/MC/AMDGPU/gfx940_err.s +++ b/llvm/test/MC/AMDGPU/gfx940_err.s @@ -1,8 +1,19 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 --implicit-check-not=error: %s v_mac_f32 v0, v1, v2 -// FIXME: error message is incorrect -// GFX940: error: operands are not valid for this GPU or mode +// GFX940: error: instruction not supported on this GPU + +v_mac_f32_e64 v5, v1, v2 +// GFX940: error: instruction not supported on this GPU + +v_mac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX940: error: instruction not supported on this GPU + +v_mac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX940: error: instruction not supported on this GPU + +v_mac_f32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// GFX940: error: instruction not supported on this GPU v_mad_f32 v0, v1, v2, v3 // GFX940: error: instruction not supported on this GPU From 0eccc92fa0fd1794988cd6bfeca2314107567fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 23 May 2022 15:22:27 +0200 Subject: [PATCH 224/908] Revert "[clang][driver] Dynamically select gcc-toolset/devtoolset version" This reverts commit 8717b492dfcd12d6387543a2f8322e0cf9059982. The new unittest fails on Windows buildbots, e.g. https://lab.llvm.org/buildbot/#/builders/119/builds/8647 --- clang/lib/Driver/ToolChains/Gnu.cpp | 36 +++------- clang/unittests/Driver/ToolChainTest.cpp | 88 ------------------------ 2 files changed, 11 insertions(+), 113 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 29e6295103c7a2..38280a2e36526d 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2130,31 +2130,17 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( // and gcc-toolsets. if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux && D.getVFS().exists("/opt/rh")) { - // Find the directory in /opt/rh/ starting with gcc-toolset-* or - // devtoolset-* with the highest version number and add that - // one to our prefixes. - std::string ChosenToolsetDir; - unsigned ChosenToolsetVersion = 0; - std::error_code EC; - for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin("/opt/rh", EC), - LE; - !EC && LI != LE; LI = LI.increment(EC)) { - StringRef ToolsetDir = llvm::sys::path::filename(LI->path()); - unsigned ToolsetVersion; - if ((!ToolsetDir.startswith("gcc-toolset-") && - !ToolsetDir.startswith("devtoolset-")) || - ToolsetDir.substr(ToolsetDir.rfind('-') + 1) - .getAsInteger(10, ToolsetVersion)) - continue; - - if (ToolsetVersion > ChosenToolsetVersion) { - ChosenToolsetVersion = ToolsetVersion; - ChosenToolsetDir = "/opt/rh/" + ToolsetDir.str(); - } - } - - if (ChosenToolsetVersion > 0) - Prefixes.push_back(ChosenToolsetDir); + Prefixes.push_back("/opt/rh/gcc-toolset-11/root/usr"); + Prefixes.push_back("/opt/rh/gcc-toolset-10/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-11/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-10/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-9/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-8/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-7/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-6/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-4/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-3/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-2/root/usr"); } // Fall back to /usr which is used by most non-Solaris systems. diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp index 4d2d938a50005c..c652b093dc993b 100644 --- a/clang/unittests/Driver/ToolChainTest.cpp +++ b/clang/unittests/Driver/ToolChainTest.cpp @@ -610,92 +610,4 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DiagConsumer->clear(); } -TEST(ToolChainTest, Toolsets) { - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - struct TestDiagnosticConsumer : public DiagnosticConsumer {}; - - // Check (newer) GCC toolset installation. - { - IntrusiveRefCntPtr InMemoryFileSystem( - new llvm::vfs::InMemoryFileSystem); - - // These should be ignored - InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-2", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/gcc-toolset--", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/gcc-toolset--1", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - - // File needed for GCC installation detection. - InMemoryFileSystem->addFile( - "/opt/rh/gcc-toolset-12/lib/gcc/x86_64-redhat-linux/11/crtbegin.o", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, new TestDiagnosticConsumer); - Driver TheDriver("/bin/clang", "x86_64-redhat-linux", Diags, - "clang LLVM compiler", InMemoryFileSystem); - std::unique_ptr C(TheDriver.BuildCompilation({"-v"})); - ASSERT_TRUE(C); - std::string S; - { - llvm::raw_string_ostream OS(S); - C->getDefaultToolChain().printVerboseInfo(OS); - } - if (is_style_windows(llvm::sys::path::Style::native)) - std::replace(S.begin(), S.end(), '\\', '/'); - EXPECT_EQ("Found candidate GCC installation: " - "/opt/rh/gcc-toolset-12/lib/gcc/x86_64-redhat-linux/11\n" - "Selected GCC installation: " - "/opt/rh/gcc-toolset-12/lib/gcc/x86_64-redhat-linux/11\n" - "Candidate multilib: .;@m64\n" - "Selected multilib: .;@m64\n", - S); - } - - // And older devtoolset. - { - IntrusiveRefCntPtr InMemoryFileSystem( - new llvm::vfs::InMemoryFileSystem); - - // These should be ignored - InMemoryFileSystem->addFile("/opt/rh/devtoolset-2", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/devtoolset-", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/devtoolset--", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/devtoolset--1", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - - // File needed for GCC installation detection. - InMemoryFileSystem->addFile( - "/opt/rh/devtoolset-12/lib/gcc/x86_64-redhat-linux/11/crtbegin.o", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, new TestDiagnosticConsumer); - Driver TheDriver("/bin/clang", "x86_64-redhat-linux", Diags, - "clang LLVM compiler", InMemoryFileSystem); - std::unique_ptr C(TheDriver.BuildCompilation({"-v"})); - ASSERT_TRUE(C); - std::string S; - { - llvm::raw_string_ostream OS(S); - C->getDefaultToolChain().printVerboseInfo(OS); - } - if (is_style_windows(llvm::sys::path::Style::native)) - std::replace(S.begin(), S.end(), '\\', '/'); - EXPECT_EQ("Found candidate GCC installation: " - "/opt/rh/devtoolset-12/lib/gcc/x86_64-redhat-linux/11\n" - "Selected GCC installation: " - "/opt/rh/devtoolset-12/lib/gcc/x86_64-redhat-linux/11\n" - "Candidate multilib: .;@m64\n" - "Selected multilib: .;@m64\n", - S); - } -} - } // end anonymous namespace. From 72832efc941aa9a0366b3e73031ecf5101355f5f Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Mon, 23 May 2022 14:03:54 +0100 Subject: [PATCH 225/908] [SPIR-V] Allow setting SPIR-V version via target triple. Currently added versions are from v1.0 to v1.5, other versions can be added as needed. This change also adds documentation about SPIR-V target support in LLVM. Differential Revision: https://reviews.llvm.org/D124776 --- llvm/docs/SPIRVUsage.rst | 73 ++++++++++++++++++++++++++++++ llvm/docs/UserGuides.rst | 4 ++ llvm/include/llvm/ADT/Triple.h | 10 ++++- llvm/lib/Support/Triple.cpp | 24 ++++++++-- llvm/unittests/ADT/TripleTest.cpp | 74 +++++++++++++++++++++++++++++++ 5 files changed, 180 insertions(+), 5 deletions(-) create mode 100644 llvm/docs/SPIRVUsage.rst diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst new file mode 100644 index 00000000000000..7529b5d3b586c9 --- /dev/null +++ b/llvm/docs/SPIRVUsage.rst @@ -0,0 +1,73 @@ +============================= +User Guide for SPIR-V Target +============================= + +.. contents:: + :local: + +.. toctree:: + :hidden: + +Introduction +============ + +The SPIR-V target provides code generation for the SPIR-V binary format described +in `the official SPIR-V specification `_. + +.. _spirv-target-triples: + +Target Triples +============== + +For cross-compilation into SPIR-V use option + +``-target ---`` + +to specify the target triple: + + .. table:: SPIR-V Architectures + + ============ ============================================================== + Architecture Description + ============ ============================================================== + ``spirv32`` SPIR-V with 32-bit pointer width. + ``spirv64`` SPIR-V with 64-bit pointer width. + ============ ============================================================== + + .. table:: SPIR-V Subarchitectures + + ============ ============================================================== + Architecture Description + ============ ============================================================== + ** SPIR-V version deduced by tools based on the compiled input. + ``v1.0`` SPIR-V version 1.0. + ``v1.1`` SPIR-V version 1.1. + ``v1.2`` SPIR-V version 1.2. + ``v1.3`` SPIR-V version 1.3. + ``v1.4`` SPIR-V version 1.4. + ``v1.5`` SPIR-V version 1.5. + ============ ============================================================== + + .. table:: SPIR-V Vendors + + ===================== ============================================================== + Vendor Description + ===================== ============================================================== + **/``unknown`` Generic SPIR-V target without any vendor-specific settings. + ===================== ============================================================== + + .. table:: Operating Systems + + ===================== ============================================================ + OS Description + ===================== ============================================================ + **/``unknown`` Defaults to the OpenCL runtime. + ===================== ============================================================ + + .. table:: SPIR-V Environments + + ===================== ============================================================== + Environment Description + ===================== ============================================================== + **/``unknown`` Defaults to the OpenCL environment. + ===================== ============================================================== diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index efee7d3ba74571..533d2cf2bb17cf 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -57,6 +57,7 @@ intermediate LLVM representation. ResponseGuide Remarks SourceLevelDebugging + SPIRVUsage StackSafetyAnalysis SupportLibrary TableGen/index @@ -243,3 +244,6 @@ Additional Topics This document describes a DWARF extension to allow location descriptions on the DWARF expression stack. It is part of :doc:`AMDGPUDwarfExtensionsForHeterogeneousDebugging`. + +:doc:`SPIRVUsage` + This document describes using the SPIR-V target to compile GPU kernels. diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index e140331b2b86fb..b1deb3e7b9a7a5 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -149,7 +149,15 @@ class Triple { MipsSubArch_r6, - PPCSubArch_spe + PPCSubArch_spe, + + // SPIR-V sub-arch corresponds to its version. + SPIRVSubArch_v10, + SPIRVSubArch_v11, + SPIRVSubArch_v12, + SPIRVSubArch_v13, + SPIRVSubArch_v14, + SPIRVSubArch_v15, }; enum VendorType { UnknownVendor, diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index 4aacad40c05e8b..6696d158b2c1ae 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -495,8 +495,10 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("hsail64", Triple::hsail64) .Case("spir", Triple::spir) .Case("spir64", Triple::spir64) - .Case("spirv32", Triple::spirv32) - .Case("spirv64", Triple::spirv64) + .Cases("spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2", + "spirv32v1.3", "spirv32v1.4", "spirv32v1.5", Triple::spirv32) + .Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2", + "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", Triple::spirv64) .StartsWith("kalimba", Triple::kalimba) .Case("lanai", Triple::lanai) .Case("renderscript32", Triple::renderscript32) @@ -654,6 +656,16 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { if (SubArchName == "arm64e") return Triple::AArch64SubArch_arm64e; + if (SubArchName.startswith("spirv")) + return StringSwitch(SubArchName) + .EndsWith("v1.0", Triple::SPIRVSubArch_v10) + .EndsWith("v1.1", Triple::SPIRVSubArch_v11) + .EndsWith("v1.2", Triple::SPIRVSubArch_v12) + .EndsWith("v1.3", Triple::SPIRVSubArch_v13) + .EndsWith("v1.4", Triple::SPIRVSubArch_v14) + .EndsWith("v1.5", Triple::SPIRVSubArch_v15) + .Default(Triple::NoSubArch); + StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName); // For now, this is the small part. Early return. @@ -1511,7 +1523,9 @@ Triple Triple::get32BitArchVariant() const { case Triple::riscv64: T.setArch(Triple::riscv32); break; case Triple::sparcv9: T.setArch(Triple::sparc); break; case Triple::spir64: T.setArch(Triple::spir); break; - case Triple::spirv64: T.setArch(Triple::spirv32); break; + case Triple::spirv64: + T.setArch(Triple::spirv32, getSubArch()); + break; case Triple::wasm64: T.setArch(Triple::wasm32); break; case Triple::x86_64: T.setArch(Triple::x86); break; } @@ -1586,7 +1600,9 @@ Triple Triple::get64BitArchVariant() const { case Triple::riscv32: T.setArch(Triple::riscv64); break; case Triple::sparc: T.setArch(Triple::sparcv9); break; case Triple::spir: T.setArch(Triple::spir64); break; - case Triple::spirv32: T.setArch(Triple::spirv64); break; + case Triple::spirv32: + T.setArch(Triple::spirv64, getSubArch()); + break; case Triple::thumb: T.setArch(Triple::aarch64); break; case Triple::thumbeb: T.setArch(Triple::aarch64_be); break; case Triple::wasm32: T.setArch(Triple::wasm64); break; diff --git a/llvm/unittests/ADT/TripleTest.cpp b/llvm/unittests/ADT/TripleTest.cpp index 19e5978acedf30..3b6a582c42fd13 100644 --- a/llvm/unittests/ADT/TripleTest.cpp +++ b/llvm/unittests/ADT/TripleTest.cpp @@ -243,11 +243,85 @@ TEST(TripleTest, ParsedIDs) { T = Triple("spirv32-unknown-unknown"); EXPECT_EQ(Triple::spirv32, T.getArch()); + EXPECT_EQ(Triple::NoSubArch, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv32v1.0-unknown-unknown"); + EXPECT_EQ(Triple::spirv32, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v10, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv32v1.1-unknown-unknown"); + EXPECT_EQ(Triple::spirv32, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v11, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv32v1.2-unknown-unknown"); + EXPECT_EQ(Triple::spirv32, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v12, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv32v1.3-unknown-unknown"); + EXPECT_EQ(Triple::spirv32, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v13, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv32v1.4-unknown-unknown"); + EXPECT_EQ(Triple::spirv32, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v14, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv32v1.5-unknown-unknown"); + EXPECT_EQ(Triple::spirv32, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v15, T.getSubArch()); EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); EXPECT_EQ(Triple::UnknownOS, T.getOS()); T = Triple("spirv64-unknown-unknown"); EXPECT_EQ(Triple::spirv64, T.getArch()); + EXPECT_EQ(Triple::NoSubArch, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv64v1.0-unknown-unknown"); + EXPECT_EQ(Triple::spirv64, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v10, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv64v1.1-unknown-unknown"); + EXPECT_EQ(Triple::spirv64, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v11, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv64v1.2-unknown-unknown"); + EXPECT_EQ(Triple::spirv64, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v12, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv64v1.3-unknown-unknown"); + EXPECT_EQ(Triple::spirv64, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v13, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv64v1.4-unknown-unknown"); + EXPECT_EQ(Triple::spirv64, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v14, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv64v1.5-unknown-unknown"); + EXPECT_EQ(Triple::spirv64, T.getArch()); + EXPECT_EQ(Triple::SPIRVSubArch_v15, T.getSubArch()); EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); EXPECT_EQ(Triple::UnknownOS, T.getOS()); From 45226d04f016cf2fdfa8e4d84b31650b975c58e1 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 23 May 2022 15:12:15 +0200 Subject: [PATCH 226/908] [InstCombine] Reuse icmp of and/or folds for logical and/or Similarly to a change recently done for fcmps, add a flag that indicates whether the and/or is logical to foldAndOrOfICmps, and reuse the function when folding logical and/or. We were already calling some parts of it, but this gives us a clearer indication of which parts may need poison-safe variants, and would also allow to fold combinations of bitwise and logical and/or. This change should be close to NFC, because all folds this enables were either already called previously, or can make use of implied poison reasoning. --- .../InstCombine/InstCombineAndOrXor.cpp | 94 ++++++++++++------- .../InstCombine/InstCombineInternal.h | 4 +- .../InstCombine/InstCombineSelect.cpp | 19 +--- 3 files changed, 64 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 6068a4e3922443..f442cf10bc243c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -872,6 +872,7 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1, /// Fold (icmp eq ctpop(X) 1) | (icmp eq X 0) into (icmp ult ctpop(X) 2) and /// fold (icmp ne ctpop(X) 1) & (icmp ne X 0) into (icmp ugt ctpop(X) 1). +/// Also used for logical and/or, must be poison safe. static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, InstCombiner::BuilderTy &Builder) { CmpInst::Predicate Pred0, Pred1; @@ -891,6 +892,7 @@ static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, } /// Reduce a pair of compares that check if a value has exactly 1 bit set. +/// Also used for logical and/or, must be poison safe. static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, InstCombiner::BuilderTy &Builder) { // Handle 'and' / 'or' commutation: make the equality check the first operand. @@ -1083,12 +1085,9 @@ Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, /// common operand with the constant. Callers are expected to call this with /// Cmp0/Cmp1 switched to handle logic op commutativity. static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, - BinaryOperator &Logic, + bool IsAnd, InstCombiner::BuilderTy &Builder, const SimplifyQuery &Q) { - bool IsAnd = Logic.getOpcode() == Instruction::And; - assert((IsAnd || Logic.getOpcode() == Instruction::Or) && "Wrong logic op"); - // Match an equality compare with a non-poison constant as Cmp0. // Also, give up if the compare can be constant-folded to avoid looping. ICmpInst::Predicate Pred0; @@ -1122,7 +1121,8 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; SubstituteCmp = Builder.CreateICmp(Pred1, Y, C); } - return Builder.CreateBinOp(Logic.getOpcode(), Cmp0, SubstituteCmp); + return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0, + SubstituteCmp); } /// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2) @@ -2389,14 +2389,17 @@ Value *foldAndOrOfICmpEqZeroAndICmp(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, } /// Fold (icmp)&(icmp) or (icmp)|(icmp) if possible. +/// If IsLogical is true, then the and/or is in select form and the transform +/// must be poison-safe. Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, - BinaryOperator &BO, bool IsAnd) { - const SimplifyQuery Q = SQ.getWithInstruction(&BO); + Instruction &I, bool IsAnd, + bool IsLogical) { + const SimplifyQuery Q = SQ.getWithInstruction(&I); // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) // if K1 and K2 are a one-bit mask. - if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &BO, IsAnd)) + if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &I, IsAnd, IsLogical)) return V; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); @@ -2421,48 +2424,67 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, } } - // handle (roughly): - // (icmp ne (A & B), C) | (icmp ne (A & D), E) - // (icmp eq (A & B), C) & (icmp eq (A & D), E) - if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, Builder)) - return V; + // TODO: Some (but not all) of the patterns handled by this function are + // safe with logical and/or. + if (!IsLogical) { + // handle (roughly): + // (icmp ne (A & B), C) | (icmp ne (A & D), E) + // (icmp eq (A & B), C) & (icmp eq (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, Builder)) + return V; + } - if (Value *V = foldAndOrOfICmpEqZeroAndICmp(LHS, RHS, IsAnd, Builder)) - return V; - if (Value *V = foldAndOrOfICmpEqZeroAndICmp(RHS, LHS, IsAnd, Builder)) - return V; + // TODO: One of these directions is fine with logical and/or, the other could + // be supported by inserting freeze. + if (!IsLogical) { + if (Value *V = foldAndOrOfICmpEqZeroAndICmp(LHS, RHS, IsAnd, Builder)) + return V; + if (Value *V = foldAndOrOfICmpEqZeroAndICmp(RHS, LHS, IsAnd, Builder)) + return V; + } - if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, BO, Builder, Q)) - return V; - if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, BO, Builder, Q)) - return V; + // TODO: Verify whether this is safe for logical and/or. + if (!IsLogical) { + if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, Builder, Q)) + return V; + if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd, Builder, Q)) + return V; + } if (Value *V = foldIsPowerOf2OrZero(LHS, RHS, IsAnd, Builder)) return V; if (Value *V = foldIsPowerOf2OrZero(RHS, LHS, IsAnd, Builder)) return V; - // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n - // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n - if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/!IsAnd)) - return V; + // TODO: One of these directions is fine with logical and/or, the other could + // be supported by inserting freeze. + if (!IsLogical) { + // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n + // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n + if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/!IsAnd)) + return V; - // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n - // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n - if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/!IsAnd)) - return V; + // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n + // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n + if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/!IsAnd)) + return V; + } - if (IsAnd) - if (Value *V = foldSignedTruncationCheck(LHS, RHS, BO, Builder)) + // TODO: Add conjugated or fold, check whether it is safe for logical and/or. + if (IsAnd && !IsLogical) + if (Value *V = foldSignedTruncationCheck(LHS, RHS, I, Builder)) return V; if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder)) return V; - if (Value *X = foldUnsignedUnderflowCheck(LHS, RHS, IsAnd, Q, Builder)) - return X; - if (Value *X = foldUnsignedUnderflowCheck(RHS, LHS, IsAnd, Q, Builder)) - return X; + // TODO: Verify whether this is safe for logical and/or. + if (!IsLogical) { + if (Value *X = foldUnsignedUnderflowCheck(LHS, RHS, IsAnd, Q, Builder)) + return X; + if (Value *X = foldUnsignedUnderflowCheck(RHS, LHS, IsAnd, Q, Builder)) + return X; + } if (Value *X = foldEqOfParts(LHS, RHS, IsAnd)) return X; @@ -2470,7 +2492,7 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) // TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs. - if (PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && + if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && PredL == PredR && match(LHS1, m_ZeroInt()) && match(RHS1, m_ZeroInt()) && LHS0->getType() == RHS0->getType()) { Value *NewOr = Builder.CreateOr(LHS0, RHS0); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 592e254f92ed62..6340c9996108a9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -344,8 +344,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final const CastInst *CI2); Value *simplifyIntToPtrRoundTripCast(Value *Val); - Value *foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &BO, - bool IsAnd); + Value *foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &I, + bool IsAnd, bool IsLogical = false); Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor); Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 4fb985c497306a..b8cc7c6e7aa405 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2781,22 +2781,11 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { /* IsAnd */ IsAnd)) return I; - if (auto *ICmp0 = dyn_cast(CondVal)) { - if (auto *ICmp1 = dyn_cast(Op1)) { - if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd, - /* IsLogical */ true)) + if (auto *ICmp0 = dyn_cast(CondVal)) + if (auto *ICmp1 = dyn_cast(Op1)) + if (auto *V = foldAndOrOfICmps(ICmp0, ICmp1, SI, IsAnd, + /* IsLogical */ true)) return replaceInstUsesWith(SI, V); - - if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd)) - return replaceInstUsesWith(SI, V); - - // This pattern would usually be converted into a bitwise and/or based - // on "implies poison" reasoning. However, this may fail if adds with - // nowrap flags are involved. - if (auto *V = foldAndOrOfICmpsUsingRanges(ICmp0, ICmp1, IsAnd)) - return replaceInstUsesWith(SI, V); - } - } } // select (select a, true, b), c, false -> select a, c, false From bea86a2d3f23a8c81f1dd53bc61291e1109abea8 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 23 May 2022 06:43:02 -0700 Subject: [PATCH 227/908] [SLP][NFC]Add a test for extracting scalar from undef result vector, NFC. --- .../X86/extract-scalar-from-undef.ll | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll new file mode 100644 index 00000000000000..7b57ddc0bae37c --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer -mtriple=x86_64-apple-macosx -mattr=+avx2 < %s | FileCheck %s + +define i64 @foo(i32 %tmp7) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[TMP7:%.*]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[OP_RDX]] to i64 +; CHECK-NEXT: ret i64 [[TMP64]] +; +bb: + %tmp = sub i32 0, 0 + %tmp2 = sub nsw i32 0, %tmp + %tmp3 = add i32 0, %tmp2 + %tmp4 = xor i32 %tmp3, 0 + %tmp6 = sub i32 0, 0 + %tmp8 = sub i32 %tmp7, 0 + %tmp9 = sub nsw i32 0, undef + %tmp10 = add nsw i32 0, %tmp6 + %tmp11 = sub nsw i32 0, %tmp8 + %tmp12 = add i32 0, %tmp10 + %tmp13 = xor i32 %tmp12, 0 + %tmp14 = add i32 0, %tmp9 + %tmp15 = xor i32 %tmp14, 0 + %tmp16 = add i32 0, %tmp11 + %tmp17 = xor i32 %tmp16, 0 + %tmp18 = add i32 %tmp13, %tmp4 + %tmp19 = add i32 %tmp18, 0 + %tmp20 = add i32 %tmp19, %tmp15 + %tmp21 = add i32 %tmp20, %tmp17 + %tmp22 = sub i32 0, 0 + %tmp23 = add i32 0, 0 + %tmp24 = sub i32 undef, 0 + %tmp25 = add nsw i32 %tmp23, undef + %tmp26 = add nsw i32 %tmp24, %tmp22 + %tmp27 = sub nsw i32 undef, %tmp24 + %tmp28 = add i32 0, %tmp25 + %tmp29 = xor i32 %tmp28, 0 + %tmp30 = add i32 0, %tmp26 + %tmp31 = xor i32 %tmp30, 0 + %tmp32 = add i32 0, %tmp27 + %tmp33 = xor i32 %tmp32, 0 + %tmp34 = add i32 %tmp31, %tmp21 + %tmp35 = add i32 %tmp34, %tmp29 + %tmp36 = add i32 %tmp35, 0 + %tmp37 = add i32 %tmp36, %tmp33 + %tmp38 = sub nsw i32 0, undef + %tmp39 = add i32 0, %tmp38 + %tmp40 = xor i32 %tmp39, 0 + %tmp41 = add i32 0, %tmp37 + %tmp42 = add i32 %tmp41, 0 + %tmp43 = add i32 %tmp42, %tmp40 + %tmp64 = zext i32 %tmp43 to i64 + ret i64 %tmp64 +} From 6793c63e885179c8b7f7c90117fb1bc948e7c02c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 22 May 2022 13:16:19 -0400 Subject: [PATCH 228/908] [InstCombine] add tests for icmp of zext i1; NFC --- .../test/Transforms/InstCombine/icmp-range.ll | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index b67356675eba66..53253b4b8b4023 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -171,6 +171,78 @@ define i1 @test_two_ranges3(i32* nocapture readonly %arg1, i32* nocapture readon ret i1 %rval } +define i1 @ugt_zext(i1 %b, i8 %x) { +; CHECK-LABEL: @ugt_zext( +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[Z]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i1 %b to i8 + %r = icmp ugt i8 %z, %x + ret i1 %r +} + +define <2 x i1> @ult_zext(<2 x i1> %b, <2 x i8> %p) { +; CHECK-LABEL: @ult_zext( +; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[P:%.*]], [[P]] +; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[X]], [[Z]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %x = mul <2 x i8> %p, %p ; thwart complexity-based canonicalization + %z = zext <2 x i1> %b to <2 x i8> + %r = icmp ult <2 x i8> %x, %z + ret <2 x i1> %r +} + +define i1 @uge_zext(i1 %b, i8 %x) { +; CHECK-LABEL: @uge_zext( +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[Z]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i1 %b to i8 + %r = icmp uge i8 %z, %x + ret i1 %r +} + +define i1 @ule_zext(i1 %b, i8 %p) { +; CHECK-LABEL: @ule_zext( +; CHECK-NEXT: [[X:%.*]] = mul i8 [[P:%.*]], [[P]] +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; + %x = mul i8 %p, %p ; thwart complexity-based canonicalization + %z = zext i1 %b to i8 + %r = icmp ule i8 %x, %z + ret i1 %r +} + +define i1 @ugt_zext_use(i1 %b, i8 %x) { +; CHECK-LABEL: @ugt_zext_use( +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: call void @use(i8 [[Z]]) +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[Z]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i1 %b to i8 + call void @use(i8 %z) + %r = icmp ugt i8 %z, %x + ret i1 %r +} + +define i1 @ult_zext_not_i1(i2 %b, i8 %x) { +; CHECK-LABEL: @ult_zext_not_i1( +; CHECK-NEXT: [[Z:%.*]] = zext i2 [[B:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[Z]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i2 %b to i8 + %r = icmp ult i8 %x, %z + ret i1 %r +} + define i1 @sub_ult_zext(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 @@ -184,6 +256,48 @@ define i1 @sub_ult_zext(i1 %b, i8 %x, i8 %y) { ret i1 %r } +define i1 @zext_ult_zext(i1 %b, i8 %p) { +; CHECK-LABEL: @zext_ult_zext( +; CHECK-NEXT: [[X:%.*]] = mul i8 [[P:%.*]], [[P]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[X]], [[TMP1]] +; CHECK-NEXT: ret i1 [[R]] +; + %x = mul i8 %p, %p ; thwart complexity-based canonicalization + %z = zext i1 %b to i16 + %zx = zext i8 %x to i16 + %r = icmp ult i16 %zx, %z + ret i1 %r +} + +define i1 @zext_ugt_zext(i1 %b, i4 %x) { +; CHECK-LABEL: @zext_ugt_zext( +; CHECK-NEXT: [[ZX:%.*]] = zext i4 [[X:%.*]] to i8 +; CHECK-NEXT: call void @use(i8 [[ZX]]) +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i4 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[TMP1]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i1 %b to i8 + %zx = zext i4 %x to i8 + call void @use(i8 %zx) + %r = icmp ugt i8 %z, %zx + ret i1 %r +} + +define i1 @sub_ult_zext_not_i1(i2 %b, i8 %x, i8 %y) { +; CHECK-LABEL: @sub_ult_zext_not_i1( +; CHECK-NEXT: [[Z:%.*]] = zext i2 [[B:%.*]] to i8 +; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; + %z = zext i2 %b to i8 + %s = sub i8 %x, %y + %r = icmp ult i8 %s, %z + ret i1 %r +} + define i1 @sub_ult_zext_use1(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext_use1( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 From 1ebad988b1106e5cb35ea76813a4d3f66070b8f0 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 23 May 2022 09:26:43 -0400 Subject: [PATCH 229/908] [InstCombine] fold icmp of zext bool based on limited range X (X == 0) && Y https://alive2.llvm.org/ce/z/avQDRY This is a generalization of 4069cccf3b4ff4a based on the post-commit suggestion. This also adds the i1 type check and tests that were missing from the earlier attempt; that commit caused several bot fails and was reverted. Differential Revision: https://reviews.llvm.org/D126171 --- .../InstCombine/InstCombineCompares.cpp | 21 ++++++++++ .../test/Transforms/InstCombine/icmp-range.ll | 41 +++++++++++++------ 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 4f20a0699ec50c..f88585929248a5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5631,6 +5631,24 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return nullptr; } +/// If one operand of an icmp is effectively a bool (value range of {0,1}), +/// then try to reduce patterns based on that limit. +static Instruction *foldICmpUsingBoolRange(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { + Value *X, *Y; + ICmpInst::Predicate Pred; + + // X must be 0 and bool must be true for "ULT": + // X (X == 0) && Y + if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_ZExt(m_Value(Y))))) && + Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULT) + return BinaryOperator::CreateAnd(Builder.CreateIsNull(X), Y); + + // TODO: Handle the related pattern with UGE/sext. + + return nullptr; +} + llvm::Optional> InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C) { @@ -6058,6 +6076,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpWithDominatingICmp(I)) return Res; + if (Instruction *Res = foldICmpUsingBoolRange(I, Builder)) + return Res; + if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 53253b4b8b4023..d55787ee7c5ee3 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -173,8 +173,8 @@ define i1 @test_two_ranges3(i32* nocapture readonly %arg1, i32* nocapture readon define i1 @ugt_zext(i1 %b, i8 %x) { ; CHECK-LABEL: @ugt_zext( -; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[Z]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[R]] ; %z = zext i1 %b to i8 @@ -185,8 +185,8 @@ define i1 @ugt_zext(i1 %b, i8 %x) { define <2 x i1> @ult_zext(<2 x i1> %b, <2 x i8> %p) { ; CHECK-LABEL: @ult_zext( ; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[P:%.*]], [[P]] -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[X]], [[Z]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X]], zeroinitializer +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %x = mul <2 x i8> %p, %p ; thwart complexity-based canonicalization @@ -195,6 +195,8 @@ define <2 x i1> @ult_zext(<2 x i1> %b, <2 x i8> %p) { ret <2 x i1> %r } +; negative test - need ult/ugt + define i1 @uge_zext(i1 %b, i8 %x) { ; CHECK-LABEL: @uge_zext( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 @@ -206,6 +208,8 @@ define i1 @uge_zext(i1 %b, i8 %x) { ret i1 %r } +; negative test - need ult/ugt + define i1 @ule_zext(i1 %b, i8 %p) { ; CHECK-LABEL: @ule_zext( ; CHECK-NEXT: [[X:%.*]] = mul i8 [[P:%.*]], [[P]] @@ -219,6 +223,8 @@ define i1 @ule_zext(i1 %b, i8 %p) { ret i1 %r } +; negative test - extra use + define i1 @ugt_zext_use(i1 %b, i8 %x) { ; CHECK-LABEL: @ugt_zext_use( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 @@ -232,6 +238,8 @@ define i1 @ugt_zext_use(i1 %b, i8 %x) { ret i1 %r } +; negative test - must be zext of i1 + define i1 @ult_zext_not_i1(i2 %b, i8 %x) { ; CHECK-LABEL: @ult_zext_not_i1( ; CHECK-NEXT: [[Z:%.*]] = zext i2 [[B:%.*]] to i8 @@ -243,11 +251,12 @@ define i1 @ult_zext_not_i1(i2 %b, i8 %x) { ret i1 %r } +; sub is eliminated + define i1 @sub_ult_zext(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext( -; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 -; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[R]] ; %z = zext i1 %b to i8 @@ -259,8 +268,8 @@ define i1 @sub_ult_zext(i1 %b, i8 %x, i8 %y) { define i1 @zext_ult_zext(i1 %b, i8 %p) { ; CHECK-LABEL: @zext_ult_zext( ; CHECK-NEXT: [[X:%.*]] = mul i8 [[P:%.*]], [[P]] -; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i8 -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[R]] ; %x = mul i8 %p, %p ; thwart complexity-based canonicalization @@ -270,12 +279,14 @@ define i1 @zext_ult_zext(i1 %b, i8 %p) { ret i1 %r } +; match and fold even if both sides are zexts (from different source types) + define i1 @zext_ugt_zext(i1 %b, i4 %x) { ; CHECK-LABEL: @zext_ugt_zext( ; CHECK-NEXT: [[ZX:%.*]] = zext i4 [[X:%.*]] to i8 ; CHECK-NEXT: call void @use(i8 [[ZX]]) -; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i4 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i4 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[R]] ; %z = zext i1 %b to i8 @@ -285,6 +296,8 @@ define i1 @zext_ugt_zext(i1 %b, i4 %x) { ret i1 %r } +; negative test - must be zext of i1 + define i1 @sub_ult_zext_not_i1(i2 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext_not_i1( ; CHECK-NEXT: [[Z:%.*]] = zext i2 [[B:%.*]] to i8 @@ -298,6 +311,8 @@ define i1 @sub_ult_zext_not_i1(i2 %b, i8 %x, i8 %y) { ret i1 %r } +; negative test - extra use (but we could try harder to fold this) + define i1 @sub_ult_zext_use1(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext_use1( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 @@ -315,10 +330,10 @@ define i1 @sub_ult_zext_use1(i1 %b, i8 %x, i8 %y) { define <2 x i1> @zext_ugt_sub_use2(<2 x i1> %b, <2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @zext_ugt_sub_use2( -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> ; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: call void @use_vec(<2 x i8> [[S]]) -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[S]], [[Z]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %z = zext <2 x i1> %b to <2 x i8> From 3e0be5610ff0e9d5bb15f2872bac8cd17bfcc34a Mon Sep 17 00:00:00 2001 From: Stephen Long Date: Mon, 23 May 2022 07:00:54 -0700 Subject: [PATCH 230/908] [MSVC, ARM64] Add __writex18 intrinsics https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-170 void __writex18byte(unsigned long, unsigned char) void __writex18word(unsigned long, unsigned short) void __writex18dword(unsigned long, unsigned long) void __writex18qword(unsigned long, unsigned __int64) Given the lack of documentation of the intrinsics, we chose to align the offset with just `CharUnits::One()` when calling `IRBuilderBase::CreateAlignedStore()`. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D126023 --- clang/include/clang/Basic/BuiltinsAArch64.def | 5 ++ clang/lib/CodeGen/CGBuiltin.cpp | 25 +++++++ clang/lib/Headers/intrin.h | 5 ++ .../test/CodeGen/arm64-microsoft-intrinsics.c | 68 +++++++++++++++++++ 4 files changed, 103 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index cebd1c07fbcc9d..a04b48dd128e3c 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -251,6 +251,11 @@ TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", "intrin.h", ALL_MS_LANGUAGE TARGET_HEADER_BUILTIN(__break, "vi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__writex18byte, "vULiUc", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__writex18word, "vULiUs", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__writex18dword, "vULiULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__writex18qword, "vULiULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef LANGBUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2ee734550dc15f..6168ba938db4f0 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9952,6 +9952,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return HigherBits; } + if (BuiltinID == AArch64::BI__writex18byte || + BuiltinID == AArch64::BI__writex18word || + BuiltinID == AArch64::BI__writex18dword || + BuiltinID == AArch64::BI__writex18qword) { + llvm::Type *IntTy = ConvertType(E->getArg(1)->getType()); + + // Read x18 as i8* + LLVMContext &Context = CGM.getLLVMContext(); + llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); + llvm::Value *X18 = Builder.CreateCall(F, Metadata); + X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0)); + + // Store val at x18 + offset + Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); + Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); + Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One()); + return Store; + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (Optional MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID)) diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index 07fcae36020d7a..dbc5159853ddf7 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -562,6 +562,11 @@ __int64 __mulh(__int64 __a, __int64 __b); unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b); void __break(int); + +void __writex18byte(unsigned long offset, unsigned char data); +void __writex18word(unsigned long offset, unsigned short data); +void __writex18dword(unsigned long offset, unsigned long data); +void __writex18qword(unsigned long offset, unsigned __int64 data); #endif /*----------------------------------------------------------------------------*\ diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index ecf271bae58085..a9b1d444553f58 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -119,5 +119,73 @@ unsigned __int64 check__getReg(void) { // CHECK-MSVC: call i64 @llvm.read_register.i64(metadata ![[MD2:.*]]) // CHECK-MSVC: call i64 @llvm.read_register.i64(metadata ![[MD3:.*]]) + +void check__writex18byte(unsigned long offset, unsigned char data) { + __writex18byte(offset, data); +} + +// CHECK-MSVC: %[[DATA_ADDR:.*]] = alloca i8, align 1 +// CHECK-MSVC: %[[OFFSET_ADDR:.*]] = alloca i32, align 4 +// CHECK-MSVC: store i8 %data, i8* %[[DATA_ADDR]], align 1 +// CHECK-MSVC: store i32 %offset, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[X18:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2]]) +// CHECK-MSVC: %[[X18_AS_PTR:.*]] = inttoptr i64 %[[X18]] to i8* +// CHECK-MSVC: %[[OFFSET:.*]] = load i32, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[ZEXT_OFFSET:.*]] = zext i32 %[[OFFSET]] to i64 +// CHECK-MSVC: %[[PTR:.*]] = getelementptr i8, i8* %[[X18_AS_PTR]], i64 %[[ZEXT_OFFSET]] +// CHECK-MSVC: %[[DATA:.*]] = load i8, i8* %[[DATA_ADDR]], align 1 +// CHECK-MSVC: store i8 %[[DATA]], i8* %[[PTR]], align 1 + +void check__writex18word(unsigned long offset, unsigned short data) { + __writex18word(offset, data); +} + +// CHECK-MSVC: %[[DATA_ADDR:.*]] = alloca i16, align 2 +// CHECK-MSVC: %[[OFFSET_ADDR:.*]] = alloca i32, align 4 +// CHECK-MSVC: store i16 %data, i16* %[[DATA_ADDR]], align 2 +// CHECK-MSVC: store i32 %offset, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[X18:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2]]) +// CHECK-MSVC: %[[X18_AS_PTR:.*]] = inttoptr i64 %[[X18]] to i8* +// CHECK-MSVC: %[[OFFSET:.*]] = load i32, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[ZEXT_OFFSET:.*]] = zext i32 %[[OFFSET]] to i64 +// CHECK-MSVC: %[[PTR:.*]] = getelementptr i8, i8* %[[X18_AS_PTR]], i64 %[[ZEXT_OFFSET]] +// CHECK-MSVC: %[[BITCAST_PTR:.*]] = bitcast i8* %[[PTR]] to i16* +// CHECK-MSVC: %[[DATA:.*]] = load i16, i16* %[[DATA_ADDR]], align 2 +// CHECK-MSVC: store i16 %[[DATA]], i16* %[[BITCAST_PTR]], align 1 + +void check__writex18dword(unsigned long offset, unsigned long data) { + __writex18dword(offset, data); +} + +// CHECK-MSVC: %[[DATA_ADDR:.*]] = alloca i32, align 4 +// CHECK-MSVC: %[[OFFSET_ADDR:.*]] = alloca i32, align 4 +// CHECK-MSVC: store i32 %data, i32* %[[DATA_ADDR]], align 4 +// CHECK-MSVC: store i32 %offset, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[X18:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2]]) +// CHECK-MSVC: %[[X18_AS_PTR:.*]] = inttoptr i64 %[[X18]] to i8* +// CHECK-MSVC: %[[OFFSET:.*]] = load i32, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[ZEXT_OFFSET:.*]] = zext i32 %[[OFFSET]] to i64 +// CHECK-MSVC: %[[PTR:.*]] = getelementptr i8, i8* %[[X18_AS_PTR]], i64 %[[ZEXT_OFFSET]] +// CHECK-MSVC: %[[BITCAST_PTR:.*]] = bitcast i8* %[[PTR]] to i32* +// CHECK-MSVC: %[[DATA:.*]] = load i32, i32* %[[DATA_ADDR]], align 4 +// CHECK-MSVC: store i32 %[[DATA]], i32* %[[BITCAST_PTR]], align 1 + +void check__writex18qword(unsigned long offset, unsigned __int64 data) { + __writex18qword(offset, data); +} + +// CHECK-MSVC: %[[DATA_ADDR:.*]] = alloca i64, align 8 +// CHECK-MSVC: %[[OFFSET_ADDR:.*]] = alloca i32, align 4 +// CHECK-MSVC: store i64 %data, i64* %[[DATA_ADDR]], align 8 +// CHECK-MSVC: store i32 %offset, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[X18:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2]]) +// CHECK-MSVC: %[[X18_AS_PTR:.*]] = inttoptr i64 %[[X18]] to i8* +// CHECK-MSVC: %[[OFFSET:.*]] = load i32, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[ZEXT_OFFSET:.*]] = zext i32 %[[OFFSET]] to i64 +// CHECK-MSVC: %[[PTR:.*]] = getelementptr i8, i8* %[[X18_AS_PTR]], i64 %[[ZEXT_OFFSET]] +// CHECK-MSVC: %[[BITCAST_PTR:.*]] = bitcast i8* %[[PTR]] to i64* +// CHECK-MSVC: %[[DATA:.*]] = load i64, i64* %[[DATA_ADDR]], align 8 +// CHECK-MSVC: store i64 %[[DATA]], i64* %[[BITCAST_PTR]], align 1 + // CHECK-MSVC: ![[MD2]] = !{!"x18"} // CHECK-MSVC: ![[MD3]] = !{!"sp"} From 2ac5ebedeac438f26bb0439c25a0069fd72914b0 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 4 Aug 2021 10:58:37 -0700 Subject: [PATCH 231/908] [SLP]Do not emit extract elements for insertelements users, replace with shuffles directly. SLP vectorizer emits extracts for externally used vectorized scalars and estimates the cost for each such extract. But in many cases these scalars are input for insertelement instructions, forming buildvector, and instead of extractelement/insertelement pair we can emit/cost estimate shuffle(s) cost and generate series of shuffles, which can be further optimized. Tested using test-suite (+SPEC2017), the tests passed, SLP was able to generate/vectorize more instructions in many cases and it allowed to reduce number of re-vectorization attempts (where we could try to vectorize buildector insertelements again and again). Differential Revision: https://reviews.llvm.org/D107966 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 308 ++++++++++++++++++ .../SLPVectorizer/AArch64/loadorder.ll | 8 +- .../AArch64/transpose-inseltpoison.ll | 2 +- .../SLPVectorizer/AArch64/transpose.ll | 2 +- .../SLPVectorizer/AArch64/tsc-s116.ll | 18 +- .../X86/buildvector-same-lane-insert.ll | 19 +- .../SLPVectorizer/X86/buildvector-shuffle.ll | 3 +- .../SLPVectorizer/X86/cmp-as-alternate-ops.ll | 16 +- .../SLPVectorizer/X86/crash_7zip.ll | 14 +- .../SLPVectorizer/X86/crash_bullet3.ll | 10 +- .../SLPVectorizer/X86/crash_cmpop.ll | 2 +- .../X86/crash_exceed_scheduling.ll | 2 +- .../SLPVectorizer/X86/crash_lencod.ll | 9 +- .../X86/crash_scheduling-inseltpoison.ll | 13 +- .../SLPVectorizer/X86/crash_scheduling.ll | 13 +- .../SLPVectorizer/X86/horizontal-list.ll | 11 +- .../SLPVectorizer/X86/horizontal-minmax.ll | 17 +- .../SLPVectorizer/X86/insert-shuffle.ll | 14 +- .../X86/jumbled-load-multiuse.ll | 7 +- .../SLPVectorizer/X86/jumbled-load.ll | 17 +- .../SLPVectorizer/X86/jumbled_store_crash.ll | 15 +- .../X86/load-merge-inseltpoison.ll | 5 +- .../SLPVectorizer/X86/load-merge.ll | 5 +- .../Transforms/SLPVectorizer/X86/lookahead.ll | 32 +- .../SLPVectorizer/X86/ordering-bug.ll | 7 +- llvm/test/Transforms/SLPVectorizer/X86/phi.ll | 60 ++-- .../SLPVectorizer/X86/pr42022-inseltpoison.ll | 14 +- .../Transforms/SLPVectorizer/X86/pr42022.ll | 14 +- .../SLPVectorizer/X86/reduction-same-vals.ll | 16 +- .../X86/remark_extract_broadcast.ll | 10 +- .../X86/reorder_diamond_match.ll | 25 +- .../X86/reordered-top-scalars.ll | 15 +- .../X86/vec_list_bias-inseltpoison.ll | 12 +- .../SLPVectorizer/X86/vec_list_bias.ll | 12 +- .../X86/vectorize-widest-phis.ll | 16 +- 35 files changed, 479 insertions(+), 284 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e366044353b1f9..fc0e7d86151103 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6668,6 +6668,23 @@ static bool isFirstInsertElement(const InsertElementInst *IE1, llvm_unreachable("Two different buildvectors not expected."); } +namespace { +/// Returns incoming Value *, if the requested type is Value * too, or a default +/// value, otherwise. +struct ValueSelect { + template + static typename std::enable_if::value, Value *>::type + get(Value *V) { + return V; + } + template + static typename std::enable_if::value, U>::type + get(Value *) { + return U(); + } +}; +} // namespace + /// Does the analysis of the provided shuffle masks and performs the requested /// actions on the vectors with the given shuffle masks. It tries to do it in /// several steps. @@ -6700,6 +6717,9 @@ static T *performExtractsShuffleAction( else Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; } + auto *V = ValueSelect::get(Base); + assert((!V || GetVF(V) == Mask.size()) && + "Expected base vector of VF number of elements."); Prev = Action(Mask, {nullptr, Res.first}); } else if (ShuffleMask.size() == 1) { // Base is undef and only 1 vector is shuffled - perform the action only for @@ -8105,6 +8125,17 @@ Value *BoUpSLP::vectorizeTree() { return vectorizeTree(ExternallyUsedValues); } +namespace { +/// Data type for handling buildvector sequences with the reused scalars from +/// other tree entries. +struct ShuffledInsertData { + /// List of insertelements to be replaced by shuffles. + SmallVector InsertElements; + /// The parent vectors and shuffle mask for the given list of inserts. + MapVector> ValueMasks; +}; +} // namespace + Value * BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { // All blocks must be scheduled before any instructions are inserted. @@ -8138,6 +8169,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); + SmallVector ShuffledInserts; + // Maps vector instruction to original insertelement instruction + DenseMap VectorToInsertElement; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -8177,6 +8211,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { assert(isa(Scalar->getType()) && isa(Scalar) && "In-tree scalar of vector type is not insertelement?"); + auto *IE = cast(Scalar); + VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; // If User == nullptr, the Scalar is used as extra arg. Generate @@ -8205,6 +8241,65 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { continue; } + if (auto *VU = dyn_cast(User)) { + // Skip if the scalar is another vector op or Vec is not an instruction. + if (!Scalar->getType()->isVectorTy() && isa(Vec)) { + if (auto *FTy = dyn_cast(User->getType())) { + Optional InsertIdx = getInsertIndex(VU); + if (InsertIdx) { + auto *It = + find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) { + // Checks if 2 insertelements are from the same buildvector. + InsertElementInst *VecInsert = Data.InsertElements.front(); + return areTwoInsertFromSameBuildVector(VU, VecInsert); + }); + unsigned Idx = *InsertIdx; + if (It == ShuffledInserts.end()) { + (void)ShuffledInserts.emplace_back(); + It = std::next(ShuffledInserts.begin(), + ShuffledInserts.size() - 1); + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + // Find the insertvector, vectorized in tree, if any. + Value *Base = VU; + while (auto *IEBase = dyn_cast(Base)) { + if (IEBase != User && + (!IEBase->hasOneUse() || + getInsertIndex(IEBase).getValueOr(Idx) == Idx)) + break; + // Build the mask for the vectorized insertelement instructions. + if (const TreeEntry *E = getTreeEntry(IEBase)) { + do { + IEBase = cast(Base); + int IEIdx = *getInsertIndex(IEBase); + assert(Mask[Idx] == UndefMaskElem && + "InsertElementInstruction used already."); + Mask[IEIdx] = IEIdx; + Base = IEBase->getOperand(0); + } while (E == getTreeEntry(Base)); + break; + } + Base = cast(Base)->getOperand(0); + // After the vectorization the def-use chain has changed, need + // to look through original insertelement instructions, if they + // get replaced by vector instructions. + auto It = VectorToInsertElement.find(Base); + if (It != VectorToInsertElement.end()) + Base = It->second; + } + } + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + Mask[Idx] = ExternalUse.Lane; + It->InsertElements.push_back(cast(User)); + continue; + } + } + } + } + // Generate extracts for out-of-tree users. // Find the insertion point for the extractelement lane. if (auto *VecI = dyn_cast(Vec)) { @@ -8240,6 +8335,219 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); } + // Checks if the mask is an identity mask. + auto &&IsIdentityMask = [](ArrayRef Mask, FixedVectorType *VecTy) { + int Limit = Mask.size(); + return VecTy->getNumElements() == Mask.size() && + all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) && + ShuffleVectorInst::isIdentityMask(Mask); + }; + // Tries to combine 2 different masks into single one. + auto &&CombineMasks = [](SmallVectorImpl &Mask, ArrayRef ExtMask) { + SmallVector NewMask(ExtMask.size(), UndefMaskElem); + for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) { + if (ExtMask[I] == UndefMaskElem) + continue; + NewMask[I] = Mask[ExtMask[I]]; + } + Mask.swap(NewMask); + }; + // Peek through shuffles, trying to simplify the final shuffle code. + auto &&PeekThroughShuffles = + [&IsIdentityMask, &CombineMasks](Value *&V, SmallVectorImpl &Mask, + bool CheckForLengthChange = false) { + while (auto *SV = dyn_cast(V)) { + // Exit if not a fixed vector type or changing size shuffle. + if (!isa(SV->getType()) || + (CheckForLengthChange && SV->changesLength())) + break; + // Exit if the identity or broadcast mask is found. + if (IsIdentityMask(Mask, cast(SV->getType())) || + SV->isZeroEltSplat()) + break; + bool IsOp1Undef = isUndefVector(SV->getOperand(0)); + bool IsOp2Undef = isUndefVector(SV->getOperand(1)); + if (!IsOp1Undef && !IsOp2Undef) + break; + SmallVector ShuffleMask(SV->getShuffleMask().begin(), + SV->getShuffleMask().end()); + CombineMasks(ShuffleMask, Mask); + Mask.swap(ShuffleMask); + if (IsOp2Undef) + V = SV->getOperand(0); + else + V = SV->getOperand(1); + } + }; + // Smart shuffle instruction emission, walks through shuffles trees and + // tries to find the best matching vector for the actual shuffle + // instruction. + auto &&CreateShuffle = [this, &IsIdentityMask, &PeekThroughShuffles, + &CombineMasks](Value *V1, Value *V2, + ArrayRef Mask) -> Value * { + assert(V1 && "Expected at least one vector value."); + if (V2 && !isUndefVector(V2)) { + // Peek through shuffles. + Value *Op1 = V1; + Value *Op2 = V2; + int VF = + cast(V1->getType())->getElementCount().getKnownMinValue(); + SmallVector CombinedMask1(Mask.size(), UndefMaskElem); + SmallVector CombinedMask2(Mask.size(), UndefMaskElem); + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (Mask[I] < VF) + CombinedMask1[I] = Mask[I]; + else + CombinedMask2[I] = Mask[I] - VF; + } + Value *PrevOp1; + Value *PrevOp2; + do { + PrevOp1 = Op1; + PrevOp2 = Op2; + PeekThroughShuffles(Op1, CombinedMask1, /*CheckForLengthChange=*/true); + PeekThroughShuffles(Op2, CombinedMask2, /*CheckForLengthChange=*/true); + // Check if we have 2 resizing shuffles - need to peek through operands + // again. + if (auto *SV1 = dyn_cast(Op1)) + if (auto *SV2 = dyn_cast(Op2)) + if (SV1->getOperand(0)->getType() == + SV2->getOperand(0)->getType() && + SV1->getOperand(0)->getType() != SV1->getType() && + isUndefVector(SV1->getOperand(1)) && + isUndefVector(SV2->getOperand(1))) { + Op1 = SV1->getOperand(0); + Op2 = SV2->getOperand(0); + SmallVector ShuffleMask1(SV1->getShuffleMask().begin(), + SV1->getShuffleMask().end()); + CombineMasks(ShuffleMask1, CombinedMask1); + CombinedMask1.swap(ShuffleMask1); + SmallVector ShuffleMask2(SV2->getShuffleMask().begin(), + SV2->getShuffleMask().end()); + CombineMasks(ShuffleMask2, CombinedMask2); + CombinedMask2.swap(ShuffleMask2); + } + } while (PrevOp1 != Op1 || PrevOp2 != Op2); + VF = cast(Op1->getType()) + ->getElementCount() + .getKnownMinValue(); + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (CombinedMask2[I] != UndefMaskElem) { + assert(CombinedMask1[I] == UndefMaskElem && + "Expected undefined mask element"); + CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF); + } + } + Value *Vec = Builder.CreateShuffleVector( + Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2, + CombinedMask1); + if (auto *I = dyn_cast(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + return Vec; + } + if (isa(V1)) + return PoisonValue::get(FixedVectorType::get( + cast(V1->getType())->getElementType(), Mask.size())); + Value *Op = V1; + SmallVector CombinedMask(Mask.begin(), Mask.end()); + PeekThroughShuffles(Op, CombinedMask); + if (!isa(Op->getType()) || + !IsIdentityMask(CombinedMask, cast(Op->getType()))) { + Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask); + if (auto *I = dyn_cast(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + return Vec; + } + return Op; + }; + + auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef Mask) { + unsigned VF = Mask.size(); + unsigned VecVF = cast(Vec->getType())->getNumElements(); + if (VF != VecVF) { + if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast(VF); })) { + Vec = CreateShuffle(Vec, nullptr, Mask); + return std::make_pair(Vec, true); + } + SmallVector ResizeMask(VF, UndefMaskElem); + for (unsigned I = 0; I < VF; ++I) { + if (Mask[I] != UndefMaskElem) + ResizeMask[Mask[I]] = Mask[I]; + } + Vec = CreateShuffle(Vec, nullptr, ResizeMask); + } + + return std::make_pair(Vec, false); + }; + // Perform shuffling of the vectorize tree entries for better handling of + // external extracts. + for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) { + // Find the first and the last instruction in the list of insertelements. + sort(ShuffledInserts[I].InsertElements, isFirstInsertElement); + InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front(); + InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back(); + Builder.SetInsertPoint(LastInsert); + auto Vector = ShuffledInserts[I].ValueMasks.takeVector(); + Value *NewInst = performExtractsShuffleAction( + makeMutableArrayRef(Vector.data(), Vector.size()), + FirstInsert->getOperand(0), + [](Value *Vec) { + return cast(Vec->getType()) + ->getElementCount() + .getKnownMinValue(); + }, + ResizeToVF, + [FirstInsert, &CreateShuffle](ArrayRef Mask, + ArrayRef Vals) { + assert((Vals.size() == 1 || Vals.size() == 2) && + "Expected exactly 1 or 2 input values."); + if (Vals.size() == 1) { + // Do not create shuffle if the mask is a simple identity + // non-resizing mask. + if (Mask.size() != cast(Vals.front()->getType()) + ->getNumElements() || + !ShuffleVectorInst::isIdentityMask(Mask)) + return CreateShuffle(Vals.front(), nullptr, Mask); + return Vals.front(); + } + return CreateShuffle(Vals.front() ? Vals.front() + : FirstInsert->getOperand(0), + Vals.back(), Mask); + }); + auto It = ShuffledInserts[I].InsertElements.rbegin(); + // Rebuild buildvector chain. + InsertElementInst *II = nullptr; + if (It != ShuffledInserts[I].InsertElements.rend()) + II = *It; + SmallVector Inserts; + while (It != ShuffledInserts[I].InsertElements.rend()) { + assert(II && "Must be an insertelement instruction."); + if (*It == II) + ++It; + else + Inserts.push_back(cast(II)); + II = dyn_cast(II->getOperand(0)); + } + for (Instruction *II : reverse(Inserts)) { + II->replaceUsesOfWith(II->getOperand(0), NewInst); + if (auto *NewI = dyn_cast(NewInst)) + if (II->getParent() == NewI->getParent() && II->comesBefore(NewI)) + II->moveAfter(NewI); + NewInst = II; + } + LastInsert->replaceAllUsesWith(NewInst); + for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) { + IE->replaceUsesOfWith(IE->getOperand(1), + PoisonValue::get(IE->getOperand(1)->getType())); + eraseInstruction(IE); + } + CSEBlocks.insert(LastInsert->getParent()); + } + // For each vectorized value: for (auto &TEPtr : VectorizableTree) { TreeEntry *Entry = TEPtr.get(); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5dc03ab1a3cd1f..68c8555bd6d435 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -1312,19 +1312,19 @@ define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], ; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP56]] ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP60]], [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP60]], [[TMP61]] ; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP70:%.*]] = add nsw <16 x i32> [[TMP68]], [[TMP69]] ; CHECK-NEXT: [[TMP71:%.*]] = sub nsw <16 x i32> [[TMP68]], [[TMP69]] ; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP72]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP74:%.*]] = add nsw <16 x i32> [[TMP72]], [[TMP73]] ; CHECK-NEXT: [[TMP75:%.*]] = sub nsw <16 x i32> [[TMP72]], [[TMP73]] ; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <16 x i32> [[TMP74]], <16 x i32> [[TMP75]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll index 12a2cd2a6834fe..148169fb64de42 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index 03a29f48a739e7..f5079fab51a6ab 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll index aab848ba73ddbe..e25fd9b206334e 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll @@ -26,19 +26,15 @@ define void @s116_modified(float* %a) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[GEP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP16]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP12]], <4 x float>* [[TMP13]], align 4 ; CHECK-NEXT: ret void ; %gep0 = getelementptr inbounds float, float* %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll index 77174151e96350..281169f7a69fea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll @@ -10,12 +10,9 @@ define void @test() { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 ; CHECK-NEXT: ret void ; @@ -46,14 +43,10 @@ define void @test1() { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT_I5_I10:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP9]], i64 1 -; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13]], ptr null, align 4 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13_2:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP10]], i64 1 -; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13_2]], ptr null, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr null, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP9]], ptr null, align 4 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds float, ptr undef, i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll index 3b0ec888de661c..2e0b0fb45901c4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll @@ -50,8 +50,7 @@ define void @test(float %a) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[AGG:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll index 4463f94b1c557c..36347b4843fe76 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll @@ -52,16 +52,12 @@ define { <2 x float>, <2 x float> } @test1(i32 %conv.i32.i.i.i) { ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 -; CHECK-NEXT: [[RETVAL_SROA_0_0_VEC_INSERT4:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 -; CHECK-NEXT: [[RETVAL_SROA_0_4_VEC_INSERT7:%.*]] = insertelement <2 x float> [[RETVAL_SROA_0_0_VEC_INSERT4]], float [[TMP8]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 -; CHECK-NEXT: [[RETVAL_SROA_7_8_VEC_INSERT11:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP6]], i32 3 -; CHECK-NEXT: [[RETVAL_SROA_7_12_VEC_INSERT13:%.*]] = insertelement <2 x float> [[RETVAL_SROA_7_8_VEC_INSERT11]], float [[TMP10]], i64 1 -; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[RETVAL_SROA_0_4_VEC_INSERT7]], 0 -; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[RETVAL_SROA_7_12_VEC_INSERT13]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> +; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0 +; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } zeroinitializer ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll index c9590ae76f405d..402211b4ef73eb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll @@ -13,21 +13,19 @@ define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145. ; CHECK-NEXT: [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4 ; CHECK-NEXT: br label [[DO_BODY66_I:%.*]] ; CHECK: do.body66.i: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> , i32 [[TMP2]], i32 1 ; CHECK-NEXT: br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]] ; CHECK: if.else.i: -; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], undef +; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[TMP1]], undef ; CHECK-NEXT: br label [[DO_COND_I]] ; CHECK: do.cond.i: -; CHECK-NEXT: [[TMP5]] = phi <2 x i32> [ [[TMP4]], [[IF_ELSE_I]] ], [ [[TMP3]], [[DO_BODY66_I]] ] +; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ [[TMP2]], [[IF_ELSE_I]] ], [ [[TMP1]], [[DO_BODY66_I]] ] ; CHECK-NEXT: br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]] ; CHECK: do.end1006.i: -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll index 30abc6f9fbe87a..9b183969001c80 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll @@ -36,17 +36,15 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(%class. ; CHECK: if.then325: ; CHECK-NEXT: br label [[IF_END327]] ; CHECK: if.end327: -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> , float [[TMP4]], i32 0 ; CHECK-NEXT: br i1 undef, label [[IF_THEN329:%.*]], label [[IF_END332]] ; CHECK: if.then329: ; CHECK-NEXT: br label [[IF_END332]] ; CHECK: if.end332: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[IF_THEN329]] ], [ [[TMP5]], [[IF_END327]] ], [ , [[IF_THEN291]] ] -; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ , [[IF_THEN291]] ] +; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[ARRAYIDX_I_I606:%.*]] = getelementptr inbounds [[CLASS_BTVECTOR3_23_221_463_485_507_573_595_683_727_749_815_837_991_1585_1607_1629_1651_1849_2047_2069_2091_2113:%.*]], %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* [[VERTICES:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP7]], <2 x float>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index ef1ed33ffcb995..0344c5a82fbed5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -32,7 +32,7 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) { ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1 ; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], ; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll index 94739340c8b5ab..0ab2cf94db8272 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -35,7 +35,7 @@ define void @exceed(double %0, double %1) { ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll index fa8c8a14e891ce..e87a5190d47216 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -129,11 +129,10 @@ define fastcc void @dct36(double* %inbuf) { ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll index bbd71825f96c6a..1c7cd3fd265d58 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -20,18 +20,15 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 -; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0 -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll index cd05b940be221f..6b9fdb23714fb1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -20,18 +20,15 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 -; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0 -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[TMP6]], i32 0 -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index f446f3682e580e..bafc67c9ea31f1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -896,12 +896,11 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 1 ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[MUL]], i32 0 ; THRESHOLD-NEXT: [[TMP5:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x float> -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <2 x float> , float [[TMP6]], i32 0 -; THRESHOLD-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP7]] -; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 -; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP9]], [[TMP10]] +; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> , <2 x float> [[TMP5]], <2 x i32> +; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP6]] +; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 +; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP8]], [[TMP9]] ; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[TMP2]], [[OP_RDX2]] ; THRESHOLD-NEXT: ret float [[OP_RDX3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 7c4aed1453c97f..801b4863e7c19e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1100,15 +1100,14 @@ define i32 @maxi8_wrong_parent(i32) { ; THRESH-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] ; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i1> poison, i1 [[OP_RDX]], i32 0 ; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i1> [[TMP11]], i1 [[TMP5]], i32 1 -; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 -; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP3]], i32 1 -; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 -; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP4]], i32 1 -; THRESH-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP14]], <2 x i32> [[TMP16]] -; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0 -; THRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 -; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP18]], i32 [[TMP19]] +; THRESH-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP2]], <2 x i32> +; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> +; THRESH-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP2]], <2 x i32> +; THRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP15]] +; THRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0 +; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1 +; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP17]], i32 [[TMP18]] ; THRESH-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP8]], [[OP_RDX3]] ; THRESH-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP8]], i32 [[OP_RDX3]] ; THRESH-NEXT: ret i32 [[OP_RDX5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll index 5a9273d1e5b3e9..b64c0e4e35a235 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -19,16 +19,10 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) { ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], poison ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], poison ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], poison -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[VEC1:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[VEC2:%.*]] = insertelement <2 x float> [[VEC1]], float [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[VEC3:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[VEC4:%.*]] = insertelement <2 x float> [[VEC3]], float [[TMP13]], i32 1 -; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VEC2]], 0 -; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[VEC4]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP10]], 0 +; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[TMP11]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[INS2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll index d785f8e76798b5..011ce7352079ca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -9,10 +9,9 @@ define i32 @fn1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll index 31c5d5b07f0e0f..85aa8aed5f5a59 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -58,18 +58,11 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 undef ; %in.addr = getelementptr inbounds i32, i32* %in, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll index 6bbde0957743ce..78572209687b87 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll @@ -39,14 +39,13 @@ define dso_local void @j() local_unnamed_addr { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 1 ; CHECK-NEXT: store float [[TMP15]], float* @f, align 4 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> , float [[CONV19]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 2 -; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP11]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = fadd <4 x float> [[TMP11]], [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]] to <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> [[SHUFFLE]], <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = fsub <4 x float> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = fadd <4 x float> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP19]], <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = fptosi <4 x float> [[TMP20]] to <4 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* [[TMP22]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll index 3d7d632cca3d63..6beff0162ffd26 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll @@ -59,9 +59,8 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[I3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll index 509c37f5da28c0..dd08e7983d70cb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -59,9 +59,8 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[I3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll index 36b867d4a148fb..faf86fbcafb9d5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -722,15 +722,11 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub ; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 -; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]] -; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]] -; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0 -; SSE-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1 -; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] ; SSE-NEXT: ret double [[ADD3]] ; ; AVX-LABEL: @splat_loads( @@ -791,17 +787,13 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2, ; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 -; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]] -; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]] -; SSE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1 -; SSE-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP10]], [[TMP11]] -; SSE-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 0 -; SSE-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP12]], i32 1 -; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP13]], [[TMP14]] +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]] +; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 +; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 +; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] ; SSE-NEXT: ret double [[RES]] ; ; AVX-LABEL: @splat_loads_with_internal_uses( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll index 464288ef340c80..fc18281c3f35ed 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll @@ -29,10 +29,9 @@ define void @f(i1 %x) #0 { ; CHECK: if.then: ; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i64> [[TMP11]], [[TMP7]] -; CHECK-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]] +; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index df767259b4f477..c6725da8a98693 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -151,7 +151,7 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x float> [ [[TMP5]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP8]], 7.000000e+00 @@ -163,25 +163,22 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* ; CHECK-NEXT: [[TMP12]] = load <2 x float>, <2 x float>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP17]], -; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP12]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], +; CHECK-NEXT: [[TMP16]] = fadd <4 x float> [[TMP6]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP17]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 0 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP21]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: @@ -244,27 +241,20 @@ define float @sort_phi_type(float* nocapture readonly %A) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP6]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll index 8ab5fb7c7ceec6..595b5c9d1079b0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll @@ -10,16 +10,10 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 -; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 -; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll index 3177f8557315dc..e97a3a4521482b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll @@ -10,16 +10,10 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 -; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 -; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll index 1fd2f30bf93cc9..aa93abaf4d9cbd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll @@ -10,18 +10,10 @@ define i64 @test() { ; CHECK: bb3: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ poison, [[BB2:%.*]] ], [ zeroinitializer, [[BB1:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP1]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP1]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP1]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[TMP65:%.*]] = sext i32 [[OP_RDX]] to i64 ; CHECK-NEXT: ret i64 [[TMP65]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll index 7ab7f3b67634b3..f71e214fe89382 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -9,12 +9,10 @@ define void @fextr(i16* %ptr) { ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll index 3315e71ae2aa60..fc1a673d818f92 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll @@ -12,22 +12,15 @@ define void @test() { ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]] ; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP15]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP19]] -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 16 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds i8, i8* undef, i64 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll index 9bbe2d5d673c76..bb0a39b7b6fb8e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll @@ -7,21 +7,18 @@ define i32 @test(i32* %isec) { ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[ISEC:%.*]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX10]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: br i1 false, label [[BLOCK1:%.*]], label [[BLOCK3:%.*]] ; CHECK: block1: ; CHECK-NEXT: br i1 false, label [[BLOCK2:%.*]], label [[BLOCK3]] ; CHECK: block2: ; CHECK-NEXT: br label [[BLOCK3]] ; CHECK: block3: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP5]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP7]] -; CHECK-NEXT: ret i32 [[TMP9]] +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP2]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: %arrayidx10 = getelementptr inbounds i32, i32* %isec, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll index 297aa493ed0715..0a07e03bac72c5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -49,15 +49,9 @@ define void @test(i32* nocapture %t2) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 -; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 -; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll index b0a406e6a8f514..3cf139ea0469b8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -49,15 +49,9 @@ define void @test(i32* nocapture %t2) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 -; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 -; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index 109c27e4f4f4e5..f2fbf1dfc756a1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -12,7 +12,7 @@ define void @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP18:%.*]], [[BB3:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: @@ -23,17 +23,13 @@ define void @foo() { ; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> poison, double [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = fcmp ogt <4 x double> [[TMP13]], [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = fptrunc <4 x double> [[TMP15]] to <4 x float> -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP2]], <4 x float> [[TMP16]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP18]] = phi <4 x float> [ [[TMP17]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] ; CHECK-NEXT: br label [[BB2]] ; entry: From a3a85fe59f489b7531e2852c1c573a26b18703b9 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 23 May 2022 16:10:20 +0200 Subject: [PATCH 232/908] [mlir] Add RSqrt tp ComplexOps.td. Differential Revision: https://reviews.llvm.org/D126202 --- .../mlir/Dialect/Complex/IR/ComplexOps.td | 19 +++++++++++++++++++ mlir/test/Dialect/Complex/ops.mlir | 13 +++++++++++++ 2 files changed, 32 insertions(+) diff --git a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td index a87af6f4fabfeb..3073d4e08866f5 100644 --- a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td +++ b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td @@ -387,6 +387,25 @@ def ReOp : ComplexUnaryOp<"re", let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// RsqrtOp +//===----------------------------------------------------------------------===// + +def RsqrtOp : ComplexUnaryOp<"rsqrt", [SameOperandsAndResultType]> { + let summary = "complex reciprocal of square root"; + let description = [{ + The `rsqrt` operation computes reciprocal of square root. + + Example: + + ```mlir + %a = complex.rsqrt %b : complex + ``` + }]; + + let results = (outs Complex:$result); +} + //===----------------------------------------------------------------------===// // SignOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Complex/ops.mlir b/mlir/test/Dialect/Complex/ops.mlir index 6c2ed8bc9dfeec..f0f80e08029744 100644 --- a/mlir/test/Dialect/Complex/ops.mlir +++ b/mlir/test/Dialect/Complex/ops.mlir @@ -61,5 +61,18 @@ func.func @ops(%f: f32) { // CHECK: complex.sub %[[C]], %[[C]] : complex %diff = complex.sub %complex, %complex : complex + + // CHECK: complex.tanh %[[C]] : complex + %tanh = complex.tanh %complex : complex + + // CHECK: complex.pow %[[C]], %[[C]] : complex + %pow = complex.pow %complex, %complex : complex + + // CHECK: complex.sqrt %[[C]] : complex + %sqrt = complex.sqrt %complex : complex + + // CHECK: complex.rsqrt %[[C]] : complex + %rsqrt = complex.rsqrt %complex : complex + return } From 9293539064aead8d1822e181b66fd5590c062a1d Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 23 May 2022 15:18:34 +0100 Subject: [PATCH 233/908] [TableGen] Remove an untrue statement from the docs You can't use foreach in a record body. This was a mistake in the documentation dating from when it was first written in D85838. --- llvm/docs/TableGen/ProgRef.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index e7e39f8f3c2733..9853a7c76307ed 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -1224,8 +1224,6 @@ The statement list establishes an inner scope. Variables local to a values do not carry over from one iteration to the next. Foreach loops may be nested. -The ``foreach`` statement can also be used in a record :token:`Body`. - .. Note that the productions involving RangeList and RangePiece have precedence over the more generic value parsing based on the first token. From f3eeefe4490efb6e9821e87ddf51f2d60ba435f6 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 23 May 2022 16:29:02 +0200 Subject: [PATCH 234/908] [mlir] Add Expm1 tp ComplexOps.td. Differential Revision: https://reviews.llvm.org/D126206 --- .../mlir/Dialect/Complex/IR/ComplexOps.td | 25 +++++++++++++++++++ mlir/test/Dialect/Complex/ops.mlir | 3 +++ 2 files changed, 28 insertions(+) diff --git a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td index 3073d4e08866f5..4a7377e7fe6437 100644 --- a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td +++ b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td @@ -222,6 +222,31 @@ def ExpOp : ComplexUnaryOp<"exp", [SameOperandsAndResultType]> { let results = (outs Complex:$result); } +//===----------------------------------------------------------------------===// +// Expm1Op +//===----------------------------------------------------------------------===// + +def Expm1Op : ComplexUnaryOp<"expm1", [SameOperandsAndResultType]> { + let summary = "computes exponential of a complex number minus 1"; + let description = [{ + Syntax: + + ``` + operation ::= ssa-id `=` `complex.expm1` ssa-use `:` type + ``` + + complex.expm1(x) := complex.exp(x) - 1 + + Example: + + ```mlir + %a = complex.expm1 %b : complex + ``` + }]; + + let results = (outs Complex:$result); +} + //===----------------------------------------------------------------------===// // ImOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Complex/ops.mlir b/mlir/test/Dialect/Complex/ops.mlir index f0f80e08029744..1b302af74b9c7b 100644 --- a/mlir/test/Dialect/Complex/ops.mlir +++ b/mlir/test/Dialect/Complex/ops.mlir @@ -38,6 +38,9 @@ func.func @ops(%f: f32) { // CHECK: complex.exp %[[C]] : complex %exp = complex.exp %complex : complex + // CHECK: complex.expm1 %[[C]] : complex + %expm1 = complex.expm1 %complex : complex + // CHECK: complex.log %[[C]] : complex %log = complex.log %complex : complex From b050686c4b30c53486955097d8678d43f97fef85 Mon Sep 17 00:00:00 2001 From: PeixinQiao Date: Mon, 23 May 2022 22:50:06 +0800 Subject: [PATCH 235/908] [NFC][flang] Change the OpenMP atomic read/write test cases Remove the integration tests and rename the file. Reviewed By: shraiysh, NimishMishra Differential Revision: https://reviews.llvm.org/D126169 --- flang/test/Lower/OpenMP/atomic-read.f90 | 46 +++++++++++++++ flang/test/Lower/OpenMP/atomic-write.f90 | 38 ++++++++++++ flang/test/Lower/OpenMP/atomic01.f90 | 74 ------------------------ flang/test/Lower/OpenMP/atomic02.f90 | 64 -------------------- 4 files changed, 84 insertions(+), 138 deletions(-) create mode 100644 flang/test/Lower/OpenMP/atomic-read.f90 create mode 100644 flang/test/Lower/OpenMP/atomic-write.f90 delete mode 100644 flang/test/Lower/OpenMP/atomic01.f90 delete mode 100644 flang/test/Lower/OpenMP/atomic02.f90 diff --git a/flang/test/Lower/OpenMP/atomic-read.f90 b/flang/test/Lower/OpenMP/atomic-read.f90 new file mode 100644 index 00000000000000..36ad61ff0e9015 --- /dev/null +++ b/flang/test/Lower/OpenMP/atomic-read.f90 @@ -0,0 +1,46 @@ +! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s + +! This test checks the lowering of atomic read + +!CHECK: func @_QQmain() { +!CHECK: %[[VAR_A:.*]] = fir.address_of(@_QFEa) : !fir.ref> +!CHECK: %[[VAR_B:.*]] = fir.address_of(@_QFEb) : !fir.ref> +!CHECK: %[[VAR_C:.*]] = fir.alloca !fir.logical<4> {bindc_name = "c", uniq_name = "_QFEc"} +!CHECK: %[[VAR_D:.*]] = fir.alloca !fir.logical<4> {bindc_name = "d", uniq_name = "_QFEd"} +!CHECK: %[[VAR_E:.*]] = fir.address_of(@_QFEe) : !fir.ref> +!CHECK: %[[VAR_F:.*]] = fir.address_of(@_QFEf) : !fir.ref> +!CHECK: %[[VAR_G:.*]] = fir.alloca f32 {bindc_name = "g", uniq_name = "_QFEg"} +!CHECK: %[[VAR_H:.*]] = fir.alloca f32 {bindc_name = "h", uniq_name = "_QFEh"} +!CHECK: %[[VAR_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} +!CHECK: %[[VAR_Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} +!CHECK: omp.atomic.read %[[VAR_X]] = %[[VAR_Y]] memory_order(acquire) hint(uncontended) : !fir.ref +!CHECK: omp.atomic.read %[[VAR_A]] = %[[VAR_B]] memory_order(relaxed) hint(none) : !fir.ref> +!CHECK: omp.atomic.read %[[VAR_C]] = %[[VAR_D]] memory_order(seq_cst) hint(contended) : !fir.ref> +!CHECK: omp.atomic.read %[[VAR_E]] = %[[VAR_F]] hint(speculative) : !fir.ref> +!CHECK: omp.atomic.read %[[VAR_G]] = %[[VAR_H]] hint(nonspeculative) : !fir.ref +!CHECK: omp.atomic.read %[[VAR_G]] = %[[VAR_H]] : !fir.ref +!CHECK: return +!CHECK: } + +program OmpAtomic + + use omp_lib + integer :: x, y + character :: a, b + logical :: c, d + character(8) :: e, f + real g, h + !$omp atomic acquire read hint(omp_sync_hint_uncontended) + x = y + !$omp atomic relaxed read hint(omp_sync_hint_none) + a = b + !$omp atomic read seq_cst hint(omp_sync_hint_contended) + c = d + !$omp atomic read hint(omp_sync_hint_speculative) + e = f + !$omp atomic read hint(omp_sync_hint_nonspeculative) + g = h + !$omp atomic read + g = h +end program OmpAtomic + diff --git a/flang/test/Lower/OpenMP/atomic-write.f90 b/flang/test/Lower/OpenMP/atomic-write.f90 new file mode 100644 index 00000000000000..1a9c264403563d --- /dev/null +++ b/flang/test/Lower/OpenMP/atomic-write.f90 @@ -0,0 +1,38 @@ +! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s + +! This test checks the lowering of atomic write + +!CHECK: func @_QQmain() { +!CHECK: %[[VAR_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} +!CHECK: %[[VAR_Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} +!CHECK: %[[VAR_Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFEz"} +!CHECK: %[[CONST_44:.*]] = arith.constant 44 : i32 +!CHECK: omp.atomic.write %[[VAR_X]] = %[[CONST_44]] hint(uncontended) memory_order(seq_cst) : !fir.ref, i32 +!CHECK: %[[CONST_7:.*]] = arith.constant 7 : i32 +!CHECK: {{.*}} = fir.load %[[VAR_Y]] : !fir.ref +!CHECK: %[[VAR_7y:.*]] = arith.muli %[[CONST_7]], {{.*}} : i32 +!CHECK: omp.atomic.write %[[VAR_X]] = %[[VAR_7y]] memory_order(relaxed) : !fir.ref, i32 +!CHECK: %[[CONST_10:.*]] = arith.constant 10 : i32 +!CHECK: {{.*}} = fir.load %[[VAR_X]] : !fir.ref +!CHECK: {{.*}} = arith.muli %[[CONST_10]], {{.*}} : i32 +!CHECK: {{.*}} = fir.load %[[VAR_Z]] : !fir.ref +!CHECK: %[[CONST_2:.*]] = arith.constant 2 : i32 +!CHECK: {{.*}} = arith.divsi {{.*}}, %[[CONST_2]] : i32 +!CHECK: {{.*}} = arith.addi {{.*}}, {{.*}} : i32 +!CHECK: omp.atomic.write %[[VAR_Y]] = {{.*}} hint(speculative) memory_order(release) : !fir.ref, i32 +!CHECK: return +!CHECK: } + +program OmpAtomicWrite + use omp_lib + integer :: x, y, z + !$omp atomic seq_cst write hint(omp_sync_hint_uncontended) + x = 8*4 + 12 + + !$omp atomic write relaxed + x = 7 * y + + !$omp atomic write release hint(omp_sync_hint_speculative) + y = 10*x + z/2 +end program OmpAtomicWrite + diff --git a/flang/test/Lower/OpenMP/atomic01.f90 b/flang/test/Lower/OpenMP/atomic01.f90 deleted file mode 100644 index 082181d8ade965..00000000000000 --- a/flang/test/Lower/OpenMP/atomic01.f90 +++ /dev/null @@ -1,74 +0,0 @@ -! RUN: bbc -fopenmp -emit-fir %s -o - | \ -! RUN: FileCheck %s --check-prefix=FIRDialect -! RUN: bbc -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | \ -! RUN: FileCheck %s --check-prefix=LLVMDialect - -! This test checks the lowering of atomic read - -!FIRDialect: func @_QQmain() { -!FIRDialect: %[[VAR_A:.*]] = fir.address_of(@_QFEa) : !fir.ref> -!FIRDialect: %[[VAR_B:.*]] = fir.address_of(@_QFEb) : !fir.ref> -!FIRDialect: %[[VAR_C:.*]] = fir.alloca !fir.logical<4> {bindc_name = "c", uniq_name = "_QFEc"} -!FIRDialect: %[[VAR_D:.*]] = fir.alloca !fir.logical<4> {bindc_name = "d", uniq_name = "_QFEd"} -!FIRDialect: %[[VAR_E:.*]] = fir.address_of(@_QFEe) : !fir.ref> -!FIRDialect: %[[VAR_F:.*]] = fir.address_of(@_QFEf) : !fir.ref> -!FIRDialect: %[[VAR_G:.*]] = fir.alloca f32 {bindc_name = "g", uniq_name = "_QFEg"} -!FIRDialect: %[[VAR_H:.*]] = fir.alloca f32 {bindc_name = "h", uniq_name = "_QFEh"} -!FIRDialect: %[[VAR_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} -!FIRDialect: %[[VAR_Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} -!FIRDialect: omp.atomic.read %[[VAR_X]] = %[[VAR_Y]] memory_order(acquire) hint(uncontended) : !fir.ref -!FIRDialect: omp.atomic.read %[[VAR_A]] = %[[VAR_B]] memory_order(relaxed) hint(none) : !fir.ref> -!FIRDialect: omp.atomic.read %[[VAR_C]] = %[[VAR_D]] memory_order(seq_cst) hint(contended) : !fir.ref> -!FIRDialect: omp.atomic.read %[[VAR_E]] = %[[VAR_F]] hint(speculative) : !fir.ref> -!FIRDialect: omp.atomic.read %[[VAR_G]] = %[[VAR_H]] hint(nonspeculative) : !fir.ref -!FIRDialect: omp.atomic.read %[[VAR_G]] = %[[VAR_H]] : !fir.ref -!FIRDialect: return -!FIRDialect: } - -!LLVMDialect: llvm.func @_QQmain() { -!LLVMDialect: %[[LLVM_VAR_A:.*]] = llvm.mlir.addressof @_QFEa : !llvm.ptr> -!LLVMDialect: %[[LLVM_VAR_B:.*]] = llvm.mlir.addressof @_QFEb : !llvm.ptr> -!LLVMDialect: {{.*}} = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[LLVM_VAR_C:.*]] = llvm.alloca {{.*}} x i32 {bindc_name = "c", in_type = !fir.logical<4>, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEc"} : (i64) -> !llvm.ptr -!LLVMDialect: {{.*}} = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[LLVM_VAR_D:.*]] = llvm.alloca {{.*}} x i32 {bindc_name = "d", in_type = !fir.logical<4>, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEd"} : (i64) -> !llvm.ptr -!LLVMDialect: %[[LLVM_VAR_E:.*]] = llvm.mlir.addressof @_QFEe : !llvm.ptr> -!LLVMDialect: %[[LLVM_VAR_F:.*]] = llvm.mlir.addressof @_QFEf : !llvm.ptr> -!LLVMDialect: {{.*}} = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[LLVM_VAR_G:.*]] = llvm.alloca {{.*}} x f32 {bindc_name = "g", in_type = f32, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEg"} : (i64) -> !llvm.ptr -!LLVMDialect: {{.*}} = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[LLVM_VAR_H:.*]] = llvm.alloca {{.*}} x f32 {bindc_name = "h", in_type = f32, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEh"} : (i64) -> !llvm.ptr -!LLVMDialect: {{.*}} = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[LLVM_VAR_X:.*]] = llvm.alloca {{.*}} x i32 {bindc_name = "x", in_type = i32, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEx"} : (i64) -> !llvm.ptr -!LLVMDialect: {{.*}} = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[LLVM_VAR_Y:.*]] = llvm.alloca {{.*}} x i32 {bindc_name = "y", in_type = i32, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEy"} : (i64) -> !llvm.ptr -!LLVMDialect: omp.atomic.read %[[LLVM_VAR_X]] = %[[LLVM_VAR_Y]] memory_order(acquire) hint(uncontended) : !llvm.ptr -!LLVMDialect: omp.atomic.read %[[LLVM_VAR_A]] = %[[LLVM_VAR_B]] memory_order(relaxed) hint(none) : !llvm.ptr> -!LLVMDialect: omp.atomic.read %[[LLVM_VAR_C]] = %[[LLVM_VAR_D]] memory_order(seq_cst) hint(contended) : !llvm.ptr -!LLVMDialect: omp.atomic.read %[[LLVM_VAR_E]] = %[[LLVM_VAR_F]] hint(speculative) : !llvm.ptr> -!LLVMDialect: omp.atomic.read %[[LLVM_VAR_G]] = %[[LLVM_VAR_H]] hint(nonspeculative) : !llvm.ptr -!LLVMDialect: omp.atomic.read %[[LLVM_VAR_G]] = %[[LLVM_VAR_H]] : !llvm.ptr -!LLVMDialect: llvm.return -!LLVMDialect: } - -program OmpAtomic - - use omp_lib - integer :: x, y - character :: a, b - logical :: c, d - character(8) :: e, f - real g, h - !$omp atomic acquire read hint(omp_sync_hint_uncontended) - x = y - !$omp atomic relaxed read hint(omp_sync_hint_none) - a = b - !$omp atomic read seq_cst hint(omp_sync_hint_contended) - c = d - !$omp atomic read hint(omp_sync_hint_speculative) - e = f - !$omp atomic read hint(omp_sync_hint_nonspeculative) - g = h - !$omp atomic read - g = h -end program OmpAtomic diff --git a/flang/test/Lower/OpenMP/atomic02.f90 b/flang/test/Lower/OpenMP/atomic02.f90 deleted file mode 100644 index f5ef943e48e146..00000000000000 --- a/flang/test/Lower/OpenMP/atomic02.f90 +++ /dev/null @@ -1,64 +0,0 @@ -! RUN: bbc -fopenmp -emit-fir %s -o - | \ -! RUN: FileCheck %s --check-prefix=FIRDialect -! RUN: bbc -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | \ -! RUN: FileCheck %s --check-prefix=LLVMDialect - -! This test checks the lowering of atomic write - -!FIRDialect: func @_QQmain() { -!FIRDialect: %[[VAR_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} -!FIRDialect: %[[VAR_Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} -!FIRDialect: %[[VAR_Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFEz"} -!FIRDialect: %[[CONST_44:.*]] = arith.constant 44 : i32 -!FIRDialect: omp.atomic.write %[[VAR_X]] = %[[CONST_44]] hint(uncontended) memory_order(seq_cst) : !fir.ref, i32 -!FIRDialect: %[[CONST_7:.*]] = arith.constant 7 : i32 -!FIRDialect: {{.*}} = fir.load %[[VAR_Y]] : !fir.ref -!FIRDialect: %[[VAR_7y:.*]] = arith.muli %[[CONST_7]], {{.*}} : i32 -!FIRDialect: omp.atomic.write %[[VAR_X]] = %[[VAR_7y]] memory_order(relaxed) : !fir.ref, i32 -!FIRDialect: %[[CONST_10:.*]] = arith.constant 10 : i32 -!FIRDialect: {{.*}} = fir.load %[[VAR_X]] : !fir.ref -!FIRDialect: {{.*}} = arith.muli %[[CONST_10]], {{.*}} : i32 -!FIRDialect: {{.*}} = fir.load %[[VAR_Z]] : !fir.ref -!FIRDialect: %[[CONST_2:.*]] = arith.constant 2 : i32 -!FIRDialect: {{.*}} = arith.divsi {{.*}}, %[[CONST_2]] : i32 -!FIRDialect: {{.*}} = arith.addi {{.*}}, {{.*}} : i32 -!FIRDialect: omp.atomic.write %[[VAR_Y]] = {{.*}} hint(speculative) memory_order(release) : !fir.ref, i32 -!FIRDialect: return -!FIRDialect: } - -!LLVMDialect: llvm.func @_QQmain() { -!LLVMDialect: %[[LLVM_VAR_2:.*]] = llvm.mlir.constant(2 : i32) : i32 -!LLVMDialect: %[[LLVM_VAR_10:.*]] = llvm.mlir.constant(10 : i32) : i32 -!LLVMDialect: %[[LLVM_VAR_7:.*]] = llvm.mlir.constant(7 : i32) : i32 -!LLVMDialect: %[[LLVM_VAR_44:.*]] = llvm.mlir.constant(44 : i32) : i32 -!LLVMDialect: %[[LLVM_VAR_1:.*]] = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[LLVM_VAR_X:.*]] = llvm.alloca {{.*}} x i32 {bindc_name = "x", in_type = i32, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEx"} : (i64) -> !llvm.ptr -!LLVMDialect: %[[LLVM_VAR_1_SECOND:.*]] = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[LLVM_VAR_Y:.*]] = llvm.alloca {{.*}} x i32 {bindc_name = "y", in_type = i32, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEy"} : (i64) -> !llvm.ptr -!LLVMDialect: %[[LLVM_VAR_1_THIRD:.*]] = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[LLVM_VAR_Z:.*]] = llvm.alloca {{.*}} x i32 {bindc_name = "z", in_type = i32, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEz"} : (i64) -> !llvm.ptr -!LLVMDialect: omp.atomic.write %[[LLVM_VAR_X]] = %[[LLVM_VAR_44]] hint(uncontended) memory_order(seq_cst) : !llvm.ptr, i32 -!LLVMDialect: {{.*}} = llvm.load %[[LLVM_VAR_Y]] : !llvm.ptr -!LLVMDialect: %[[LLVM_VAR_MUL_RESULT:.*]] = llvm.mul {{.*}}, %[[LLVM_VAR_7]] : i32 -!LLVMDialect: omp.atomic.write %[[LLVM_VAR_X]] = %[[LLVM_VAR_MUL_RESULT]] memory_order(relaxed) : !llvm.ptr, i32 -!LLVMDialect: {{.*}} = llvm.load %[[LLVM_VAR_X]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.mul {{.*}}, %[[LLVM_VAR_10]] : i32 -!LLVMDialect: {{.*}} = llvm.load %[[LLVM_VAR_Z]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.sdiv {{.*}}, %[[LLVM_VAR_2]] : i32 -!LLVMDialect: {{.*}} = llvm.add {{.*}} : i32 -!LLVMDialect: omp.atomic.write %[[LLVM_VAR_Y]] = {{.*}} hint(speculative) memory_order(release) : !llvm.ptr, i32 -!LLVMDialect: llvm.return -!LLVMDialect: } - -program OmpAtomicWrite - use omp_lib - integer :: x, y, z - !$omp atomic seq_cst write hint(omp_sync_hint_uncontended) - x = 8*4 + 12 - - !$omp atomic write relaxed - x = 7 * y - - !$omp atomic write release hint(omp_sync_hint_speculative) - y = 10*x + z/2 -end program OmpAtomicWrite From ec55f0bd5833ed3d64a49176a13075aee1965911 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Mon, 23 May 2022 16:53:17 +0200 Subject: [PATCH 236/908] [mlir][bufferization][NFC] Improve assembly format of AllocTensorOp No longer pass static dim sizes as an attribute. This was redundant and required extra checks in the verifier. This change also makes the op symmetrical to memref::AllocOp. Differential Revision: https://reviews.llvm.org/D126178 --- .../Bufferization/IR/BufferizationOps.td | 74 +++---------- .../Bufferization/IR/BufferizationOps.cpp | 103 ++++-------------- .../Transforms/OneShotModuleBufferize.cpp | 2 +- .../Transforms/InitTensorToAllocTensor.cpp | 4 +- ...ot-bufferize-alloc-tensor-elimination.mlir | 8 +- ...ne-shot-bufferize-allow-return-allocs.mlir | 2 +- .../one-shot-bufferize-partial.mlir | 2 +- .../Transforms/one-shot-bufferize.mlir | 6 +- ...-module-bufferize-allow-return-allocs.mlir | 2 +- .../one-shot-module-bufferize-analysis.mlir | 8 +- .../one-shot-module-bufferize-invalid.mlir | 10 +- .../Transforms/one-shot-module-bufferize.mlir | 10 +- .../Dialect/Bufferization/canonicalize.mlir | 4 +- mlir/test/Dialect/Bufferization/invalid.mlir | 22 +--- ...alysis-2fill-extract-matmul-all-perms.mlir | 48 ++++---- ...rize-analysis-init-tensor-elimination.mlir | 4 +- .../Dialect/Linalg/one-shot-bufferize.mlir | 6 +- .../SCF/one-shot-bufferize-analysis.mlir | 2 +- mlir/test/Dialect/SCF/one-shot-bufferize.mlir | 4 +- .../Dialect/Tensor/one-shot-bufferize.mlir | 4 +- .../Linalg/CPU/test-one-shot-bufferize.mlir | 12 +- 21 files changed, 109 insertions(+), 228 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td index 38a4d39d0ce2db..4ae773af33d7ca 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td @@ -38,15 +38,11 @@ def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor", decisions during One-Shot Bufferize. }]; - let arguments = - (ins Variadic:$sizes, I64ArrayAttr:$static_sizes); + let arguments = (ins Variadic:$dynamicSizes); let results = (outs AnyTensor:$result); - let assemblyFormat = [{ - custom($sizes, $static_sizes) attr-dict - `:` type($result) - }]; + let assemblyFormat = "`(`$dynamicSizes`)` attr-dict `:` type($result)"; let extraClassDeclaration = [{ LogicalResult bufferize(RewriterBase &rewriter, BufferizationState &state); @@ -56,81 +52,41 @@ def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor", return false; } - static StringRef getStaticSizesAttrName() { - return "static_sizes"; - } - RankedTensorType getType() { return getResult().getType().cast(); } - // Infer the shape of the result tensor given the static shapes - // and element type of the result tensor. - static Type inferResultType(ArrayRef staticSizes, Type elementType, - Attribute encoding = {}); - // Return true if the size of the tensor is dynamic at `idx` - bool isDynamicSize(unsigned idx) { - APInt v = *(static_sizes().getAsValueRange().begin() + idx); - return ShapedType::isDynamic(v.getSExtValue()); - } - - // Assert that the size of the result tensor is static at `idx` - // and return the shape. - int64_t getStaticSize(unsigned idx) { - assert(!isDynamicSize(idx) && "expected static size"); - APInt v = *(static_sizes(). - template getAsValueRange().begin() + idx); - return v.getSExtValue(); + bool isDynamicDim(unsigned idx) { + return getType().isDynamicDim(idx); } // Return the argument position that contains the dynamic size of // the tensor at dimension `idx`. Asserts that the shape is // dynamic at that `idx`. unsigned getIndexOfDynamicSize(unsigned idx) { - assert(isDynamicSize(idx) && "expected dynamic size"); + assert(isDynamicDim(idx) && "expected dynamic size"); + ArrayRef shape = getType().getShape(); return std::count_if( - static_sizes().getValue().begin(), - static_sizes().getValue().begin() + idx, - [&](Attribute attr) { - return ShapedType::isDynamic(attr.cast().getInt()); - }); + shape.begin(), shape.begin() + idx, + [&](int64_t size) { return ShapedType::isDynamic(size); }); } - // Return both static and dynamic sizes as a list of `OpFoldResult`. - SmallVector getMixedSizes(); - // Return the Value of the dynamic size of the tensor at dimension // `idx`. Asserts that the shape is dynamic at that `idx. Value getDynamicSize(unsigned idx) { return getOperand(getIndexOfDynamicSize(idx)); } - }]; - let builders = [ - OpBuilder<(ins "ValueRange":$shape, - "ArrayRef":$staticShape, "Type":$elementType), - [{ - build($_builder, $_state, - AllocTensorOp::inferResultType(staticShape, elementType), - shape, $_builder.getI64ArrayAttr(staticShape)); - }]>, - OpBuilder<(ins "ValueRange":$shape, "Type":$elementType), - [{ - SmallVector staticShape( - shape.size(), ShapedType::kDynamicSize); - build($_builder, $_state, shape, staticShape, elementType); - }]>, - OpBuilder<(ins "ArrayRef":$staticShape, "Type":$elementType), - [{ - build($_builder, $_state, ValueRange{}, staticShape, elementType); - }]>, - OpBuilder<(ins "ArrayRef":$sizes, "Type":$elementType, - CArg<"ArrayRef", "{}">:$attrs)> - ]; + // Assert that the size of the result tensor is static at `idx` + // and return the shape. + int64_t getStaticSize(unsigned idx) { + assert(!isDynamicDim(idx) && "expected static size"); + return getType().getShape()[idx]; + } + }]; let hasCanonicalizer = 1; - let hasCustomAssemblyFormat = 1; let hasVerifier = 1; } diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp index 00f0a74f73924a..d9eb0bb2118db7 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp @@ -12,6 +12,7 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/IR/Matchers.h" using namespace mlir; using namespace mlir::bufferization; @@ -145,62 +146,14 @@ LogicalResult AllocTensorOp::bufferize(RewriterBase &rewriter, return success(); } -void AllocTensorOp::build(OpBuilder &b, OperationState &result, - ArrayRef sizes, Type elementType, - ArrayRef attrs) { - SmallVector dynamicSizes; - SmallVector staticSizes; - dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, - ShapedType::kDynamicSize); - auto resultType = RankedTensorType ::get(staticSizes, elementType); - build(b, result, resultType, dynamicSizes, b.getI64ArrayAttr(staticSizes)); - result.addAttributes(attrs); -} - LogicalResult AllocTensorOp::verify() { - RankedTensorType resultType = getType(); - SmallVector staticSizes = llvm::to_vector<4>(llvm::map_range( - static_sizes().cast(), - [](Attribute a) -> int64_t { return a.cast().getInt(); })); - - if (failed(verifyListOfOperandsOrIntegers( - *this, "sizes", resultType.getRank(), static_sizes(), sizes(), - ShapedType::isDynamic))) - return failure(); - - if (static_sizes().size() != static_cast(resultType.getRank())) - return emitError("expected ") << resultType.getRank() << " sizes values"; - - Type expectedType = AllocTensorOp::inferResultType( - staticSizes, resultType.getElementType(), resultType.getEncoding()); - if (resultType != expectedType) { - return emitError("specified type ") - << resultType << " does not match the inferred type " - << expectedType; - } + if (getType().getNumDynamicDims() != + static_cast(dynamicSizes().size())) + return emitError("expected ") + << getType().getNumDynamicDims() << " dynamic sizes"; return success(); } -Type AllocTensorOp::inferResultType(ArrayRef staticSizes, - Type elementType, Attribute encoding) { - return RankedTensorType::get(staticSizes, elementType, encoding); -} - -SmallVector AllocTensorOp::getMixedSizes() { - SmallVector mixedSizes; - mixedSizes.reserve(getType().getRank()); - unsigned dynamicValIndex = 0; - for (Attribute attr : static_sizes()) { - auto intAttr = attr.cast(); - if (!ShapedType::isDynamic(intAttr.getInt())) { - mixedSizes.push_back(intAttr); - continue; - } - mixedSizes.push_back(sizes()[dynamicValIndex++]); - } - return mixedSizes; -} - namespace { /// Change the type of the result of a `bufferization.alloc_tensor` by making /// the result type statically sized along dimension that in the original @@ -208,46 +161,36 @@ namespace { /// `constant` op. For example: /// /// %c5 = arith.constant 5: index -/// %0 = bufferization.alloc_tensor [%arg0, %c5] : tensor +/// %0 = bufferization.alloc_tensor(%arg0, %c5) : tensor /// /// to /// -/// %0 = bufferization.alloc_tensor [%arg0, 5] : tensor +/// %0 = bufferization.alloc_tensor(%arg0) : tensor struct ReplaceStaticShapeDims : OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AllocTensorOp op, PatternRewriter &rewriter) const override { - SmallVector dynamicSizes; - SmallVector staticSizes; - for (unsigned i = 0, e = op.getType().getRank(); i != e; ++i) { - // If the size is already static, nothing to do. - if (!op.isDynamicSize(i)) { - staticSizes.push_back(op.getStaticSize(i)); + SmallVector newShape = llvm::to_vector(op.getType().getShape()); + SmallVector newDynamicSizes; + unsigned int dynValCounter = 0; + for (int64_t i = 0; i < op.getType().getRank(); ++i) { + if (!op.isDynamicDim(i)) continue; + Value value = op.dynamicSizes()[dynValCounter++]; + APInt intVal; + if (matchPattern(value, m_ConstantInt(&intVal))) { + newShape[i] = intVal.getSExtValue(); + } else { + newDynamicSizes.push_back(value); } - - // If the size is dynamic but defined using a `constant` op, get the - // constant value to find the static size to use. - unsigned operandNum = op.getIndexOfDynamicSize(i); - Value sizeOperand = op.getOperand(operandNum); - if (auto constantIndexOp = - sizeOperand.getDefiningOp()) { - staticSizes.push_back(constantIndexOp.value()); - continue; - } - - // Fallback case. Keep the size dynamic. - dynamicSizes.push_back(sizeOperand); - staticSizes.push_back(ShapedType::kDynamicSize); } - RankedTensorType newType = - RankedTensorType::get(staticSizes, op.getType().getElementType()); + RankedTensorType newType = RankedTensorType::get( + newShape, op.getType().getElementType(), op.getType().getEncoding()); if (newType == op.getType()) return failure(); auto newOp = - rewriter.create(op.getLoc(), newType, dynamicSizes, - rewriter.getI64ArrayAttr(staticSizes)); + rewriter.create(op.getLoc(), newType, newDynamicSizes); rewriter.replaceOpWithNewOp(op, op.getType(), newOp); return success(); } @@ -262,7 +205,7 @@ struct FoldDimOfAllocTensorOp : public OpRewritePattern { auto allocTensorOp = dimOp.source().getDefiningOp(); if (!allocTensorOp || !maybeConstantIndex) return failure(); - if (!allocTensorOp.isDynamicSize(*maybeConstantIndex)) + if (!allocTensorOp.getType().isDynamicDim(*maybeConstantIndex)) return failure(); rewriter.replaceOp(dimOp, allocTensorOp.getDynamicSize(*maybeConstantIndex)); @@ -280,7 +223,7 @@ LogicalResult AllocTensorOp::reifyResultShapes( OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) { auto shapes = llvm::to_vector<4>(llvm::map_range( llvm::seq(0, getType().getRank()), [&](int64_t dim) -> Value { - if (isDynamicSize(dim)) + if (isDynamicDim(dim)) return getDynamicSize(dim); return builder.create(getLoc(), getStaticSize(dim)); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index aa6672860ec287..fec5cb6b56a2b9 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -32,7 +32,7 @@ // Example: `foo` fails bufferization because %0 is not equivalent to any bbArg. // ``` // func @foo() -> tensor { -// %0 = linalg.alloc_tensor [...] : tensor +// %0 = bufferization.alloc_tensor(...) : tensor // return %0 : tensor // } // ``` diff --git a/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp index e1d74e99242e3c..0f926f5a05281e 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp @@ -23,8 +23,8 @@ struct InitTensorLoweringPattern : public OpRewritePattern { LogicalResult matchAndRewrite(InitTensorOp op, PatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp( - op, op.getMixedSizes(), op.getType().getElementType()); + rewriter.replaceOpWithNewOp(op, op.getType(), + op.sizes()); return success(); } }; diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir index 76c835f04e5f57..dd7dd54b3a7022 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir @@ -18,7 +18,7 @@ func.func @buffer_forwarding_conflict( // insert_slice. AllocTensorOp replaces the alloc_tensor with an out-of-place // extract_slice. // CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]]) - %a = bufferization.alloc_tensor[%sz] : tensor + %a = bufferization.alloc_tensor(%sz) : tensor // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[EXTRACT_SLICE_ALLOC]] : memref) %f = linalg.fill ins(%f0 : f32) outs(%a : tensor) -> tensor @@ -50,7 +50,7 @@ func.func @buffer_forwarding_no_conflict( // alloc_tensor itself does not alloc but forwards to the insert_slice. // InitTensorOp replaces the alloc_tensor with an inplace extract_slice. // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1] - %a = bufferization.alloc_tensor[%sz] : tensor + %a = bufferization.alloc_tensor(%sz) : tensor // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref) -> tensor @@ -71,7 +71,7 @@ func.func @insertion_point_inside_loop(%t : tensor, %sz : index) -> (tens %c5 = arith.constant 5 : index // CHECK-NOT: memref.alloc - %blank = bufferization.alloc_tensor [5] : tensor<5xf32> + %blank = bufferization.alloc_tensor() : tensor<5xf32> // CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} { %r = scf.for %iv = %c0 to %sz step %c5 iter_args(%bb = %t) -> (tensor) { @@ -102,7 +102,7 @@ func.func @insertion_point_outside_loop(%t : tensor, %sz : index, // CHECK-NOT: memref.alloc // CHECK: %[[subview:.*]] = memref.subview %[[t]][%[[idx]]] [5] [1] - %blank = bufferization.alloc_tensor [5] : tensor<5xf32> + %blank = bufferization.alloc_tensor() : tensor<5xf32> // CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} { %r = scf.for %iv = %c0 to %sz step %c5 iter_args(%bb = %t) -> (tensor) { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir index 56c75fefe3ce2d..9fff7f990b3917 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir @@ -16,7 +16,7 @@ func.func @buffer_not_deallocated(%t : tensor, %c : i1) -> tensor // CHECK-NOT: dealloc // CHECK: scf.yield %[[casted]] %sz = "test.some_op"() : () -> (index) - %0 = bufferization.alloc_tensor[%sz] : tensor + %0 = bufferization.alloc_tensor(%sz) : tensor scf.yield %0 : tensor } else { // CHECK: } else { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir index 06de89c0d8ef85..1eb1b4cac9f61e 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir @@ -142,7 +142,7 @@ func.func @unknown_op_may_read(%v: vector<5xf32>) // bufferizes out-of-place. // CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32> // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32> - %t1 = bufferization.alloc_tensor [10] : tensor<10xf32> + %t1 = bufferization.alloc_tensor() : tensor<10xf32> // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[m1]] // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1]] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir index 74382aef3fa72e..e2cda814a8d154 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -44,7 +44,7 @@ func.func @return_tensor(%A : tensor, %v : vector<4xf32>) -> (tensor) -> () { // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = bufferization.alloc_tensor[10] : tensor<10xf32> + %0 = bufferization.alloc_tensor() : tensor<10xf32> %c0 = arith.constant 0 : index // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] @@ -97,7 +97,7 @@ func.func @read_after_write_conflict(%cst : f32, %idx : index, %idx2 : index) // CHECK-LABEL: func @copy_deallocated( func.func @copy_deallocated() -> tensor<10xf32> { // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = bufferization.alloc_tensor[10] : tensor<10xf32> + %0 = bufferization.alloc_tensor() : tensor<10xf32> // CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc]] // CHECK: memref.dealloc %[[alloc]] // CHECK: return %[[alloc_tensor]] @@ -111,7 +111,7 @@ func.func @copy_deallocated() -> tensor<10xf32> { func.func @select_different_tensors(%t: tensor, %sz: index, %c: i1) -> tensor { // CHECK-DAG: %[[m:.*]] = bufferization.to_memref %[[t]] : memref // CHECK-DAG: %[[alloc:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref - %0 = bufferization.alloc_tensor [%sz] : tensor + %0 = bufferization.alloc_tensor(%sz) : tensor // A cast must be inserted because %t and %0 have different memref types. // CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref to memref diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir index 60b30cee6f6a7d..044392a2667529 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir @@ -17,7 +17,7 @@ // CHECK: %[[alloc:.*]] = memref.alloc // CHECK: return %[[alloc]] func.func @create_tensor() -> tensor<10xf32> { - %0 = bufferization.alloc_tensor [10] : tensor<10xf32> + %0 = bufferization.alloc_tensor() : tensor<10xf32> return %0 : tensor<10xf32> } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir index 3bade5c3bf062d..757417624e684d 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir @@ -682,7 +682,7 @@ func.func @matmul_on_tensors( %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant 1.000000e+00 : f32 - %7 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %7 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} @@ -720,7 +720,7 @@ func.func @matmul_on_tensors( %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant 1.000000e+00 : f32 - %7 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %7 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} @@ -1252,7 +1252,7 @@ func.func @write_to_same_alloc_tensor_in_place( %lb : index, %ub : index, %step : index, %sz: index, %sz2: index) -> (tensor) { - %B = bufferization.alloc_tensor [%sz2] : tensor + %B = bufferization.alloc_tensor(%sz2) : tensor // CHECK: scf.for {{.*}} { %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { @@ -1280,7 +1280,7 @@ func.func @write_to_same_alloc_tensor_out_of_place( %lb : index, %ub : index, %step : index, %sz: index, %sz2: index, %f: f32) -> (tensor) { - %B = bufferization.alloc_tensor [%sz2] : tensor + %B = bufferization.alloc_tensor(%sz2) : tensor %C = tensor.insert %f into %B[%lb] : tensor // CHECK: scf.for {{.*}} { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir index 33983d19b9bca6..140f67b7c30241 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir @@ -60,7 +60,7 @@ func.func @scf_if_not_aliasing( scf.yield %t1 : tensor } else { // This buffer aliases. - %t2 = bufferization.alloc_tensor [%idx] : tensor + %t2 = bufferization.alloc_tensor(%idx) : tensor // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} scf.yield %t2 : tensor } @@ -221,7 +221,7 @@ func.func @unknown_op(%A : tensor<4xf32>) -> tensor<4xf32> func.func @mini_test_case1() -> tensor<10x20xf32> { %f0 = arith.constant 0.0 : f32 - %t = bufferization.alloc_tensor [10, 20] : tensor<10x20xf32> + %t = bufferization.alloc_tensor() : tensor<10x20xf32> %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32> // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} return %r : tensor<10x20xf32> @@ -274,7 +274,7 @@ func.func @call_to_unknown_tensor_returning_func(%t : tensor) { // ----- func.func @foo(%t : tensor<5xf32>) -> (tensor<5xf32>) { - %0 = bufferization.alloc_tensor [5] : tensor<5xf32> + %0 = bufferization.alloc_tensor() : tensor<5xf32> // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} return %0 : tensor<5xf32> } @@ -291,7 +291,7 @@ func.func @call_to_func_returning_non_equiv_tensor(%t : tensor<5xf32>) { func.func @destination_passing_style_dominance_test_1(%cst : f32, %idx : index, %idx2 : index) -> f32 { %0 = scf.execute_region -> tensor { - %1 = bufferization.alloc_tensor [%idx] : tensor + %1 = bufferization.alloc_tensor(%idx) : tensor // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} scf.yield %1 : tensor } @@ -304,7 +304,7 @@ func.func @destination_passing_style_dominance_test_1(%cst : f32, %idx : index, func.func @destination_passing_style_dominance_test_2(%cst : f32, %idx : index, %idx2 : index) -> f32 { - %1 = bufferization.alloc_tensor [%idx] : tensor + %1 = bufferization.alloc_tensor(%idx) : tensor %0 = scf.execute_region -> tensor { // This YieldOp is in destination-passing style, thus no error. diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index f6ba5c9f3f9a3e..d617a29c03642d 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -70,7 +70,7 @@ func.func private @external_func_with_return_val(tensor<4xi32>) -> f32 // CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: #[[$map2a]]> { func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>) { - %t = bufferization.alloc_tensor [20, 10] : tensor<20x10xf32> + %t = bufferization.alloc_tensor() : tensor<20x10xf32> %0 = tensor.extract_slice %t[%idx, %idx][2, %sz][1, 1] : tensor<20x10xf32> to tensor<2x?xf32> return %0 : tensor<2x?xf32> @@ -120,7 +120,7 @@ func.func @main(%t: tensor {bufferization.writable = false}) -> (f32) { // CHECK-LABEL: func @func_without_tensor_args func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = bufferization.alloc_tensor[10] : tensor<10xf32> + %0 = bufferization.alloc_tensor() : tensor<10xf32> %c0 = arith.constant 0 : index // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] @@ -456,9 +456,9 @@ func.func @main() { // CHECK-DAG: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> // CHECK-DAG: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> // CHECK-DAG: %[[cC:.*]] = memref.cast %[[C]] : memref to memref - %A = bufferization.alloc_tensor [64] : tensor<64xf32> - %B = bufferization.alloc_tensor [64] : tensor<64xf32> - %C = bufferization.alloc_tensor [] : tensor + %A = bufferization.alloc_tensor() : tensor<64xf32> + %B = bufferization.alloc_tensor() : tensor<64xf32> + %C = bufferization.alloc_tensor() : tensor // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>) // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>) diff --git a/mlir/test/Dialect/Bufferization/canonicalize.mlir b/mlir/test/Dialect/Bufferization/canonicalize.mlir index 949024369836d1..c41cebd77a9ae7 100644 --- a/mlir/test/Dialect/Bufferization/canonicalize.mlir +++ b/mlir/test/Dialect/Bufferization/canonicalize.mlir @@ -249,10 +249,10 @@ func.func @load_from_buffer_cast(%arg0: index, %arg1: index, func.func @alloc_tensor_canonicalize() -> (tensor<4x5x?xf32>) { %c6 = arith.constant 6 : index - %0 = bufferization.alloc_tensor [4, 5, %c6] : tensor<4x5x?xf32> + %0 = bufferization.alloc_tensor(%c6) : tensor<4x5x?xf32> return %0 : tensor<4x5x?xf32> } // CHECK: func @alloc_tensor_canonicalize -// CHECK: %[[T0:.+]] = bufferization.alloc_tensor [4, 5, 6] : tensor<4x5x6xf32> +// CHECK: %[[T0:.+]] = bufferization.alloc_tensor() : tensor<4x5x6xf32> // CHECK: %[[T1:.+]] = tensor.cast %[[T0]] : tensor<4x5x6xf32> to tensor<4x5x?xf32> // CHECK: return %[[T1]] diff --git a/mlir/test/Dialect/Bufferization/invalid.mlir b/mlir/test/Dialect/Bufferization/invalid.mlir index f461f8a6e76772..9e732b9bc6e487 100644 --- a/mlir/test/Dialect/Bufferization/invalid.mlir +++ b/mlir/test/Dialect/Bufferization/invalid.mlir @@ -1,26 +1,8 @@ // RUN: mlir-opt %s -split-input-file -verify-diagnostics -func.func @alloc_tensor_err(%arg0 : index, %arg1 : index) -{ - // expected-error @+1 {{specified type 'tensor<4x?x?x5xf32>' does not match the inferred type 'tensor<4x5x?x?xf32>'}} - %1 = bufferization.alloc_tensor [4, 5, %arg0, %arg1] : tensor<4x?x?x5xf32> - return -} - -// ----- - -func.func @alloc_tensor_err(%arg0 : index) -{ - // expected-error @+1 {{expected 4 sizes values}} - %1 = bufferization.alloc_tensor [4, 5, %arg0] : tensor<4x?x?x5xf32> - return -} - -// ----- - func.func @alloc_tensor_err(%arg0 : index) { - // expected-error @+1 {{expected 2 dynamic sizes values}} - %1 = "bufferization.alloc_tensor"(%arg0) {static_sizes = [4, -1, -1, 5]} : (index) -> tensor<4x?x?x5xf32> + // expected-error @+1 {{expected 2 dynamic sizes}} + %1 = bufferization.alloc_tensor(%arg0) : tensor<4x?x?x5xf32> return } diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir index b5e2ee8e1e1351..cbf8d97b40722a 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir @@ -15,7 +15,7 @@ func.func @fill_extract_matmul_1234( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -42,7 +42,7 @@ func.func @fill_extract_matmul_1243( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -69,7 +69,7 @@ func.func @fill_extract_matmul_1324( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -96,7 +96,7 @@ func.func @fill_extract_matmul_1342( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -123,7 +123,7 @@ func.func @fill_extract_matmul_1423( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -150,7 +150,7 @@ func.func @fill_extract_matmul_1432( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -177,7 +177,7 @@ func.func @fill_extract_matmul_2134( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -204,7 +204,7 @@ func.func @fill_extract_matmul_2143( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -231,7 +231,7 @@ func.func @fill_extract_matmul_2314( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -258,7 +258,7 @@ func.func @fill_extract_matmul_2341( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -285,7 +285,7 @@ func.func @fill_extract_matmul_2413( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -312,7 +312,7 @@ func.func @fill_extract_matmul_2431( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -339,7 +339,7 @@ func.func @fill_extract_matmul_3124( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -366,7 +366,7 @@ func.func @fill_extract_matmul_3142( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -392,7 +392,7 @@ func.func @fill_extract_matmul_3214( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -419,7 +419,7 @@ func.func @fill_extract_matmul_3241( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -446,7 +446,7 @@ func.func @fill_extract_matmul_3412( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -473,7 +473,7 @@ func.func @fill_extract_matmul_3421( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -500,7 +500,7 @@ func.func @fill_extract_matmul_4123( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -527,7 +527,7 @@ func.func @fill_extract_matmul_4132( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -554,7 +554,7 @@ func.func @fill_extract_matmul_4213( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -581,7 +581,7 @@ func.func @fill_extract_matmul_4231( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -608,7 +608,7 @@ func.func @fill_extract_matmul_4312( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -635,7 +635,7 @@ func.func @fill_extract_matmul_4321( %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir index 088b8c708540f2..18302e25ece144 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir @@ -10,7 +10,7 @@ func.func @buffer_forwarding_conflict(%arg0: tensor {bufferization.writab // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"] // Instead of allocating, share buffer with some inplace bufferization? - %0 = bufferization.alloc_tensor [%arg1] : tensor + %0 = bufferization.alloc_tensor(%arg1) : tensor // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] @@ -37,7 +37,7 @@ func.func @buffer_forwarding_no_conflict(%arg0: tensor {bufferization.wri // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] // Instead of allocating, share buffer with some inplace bufferization? - %0 = bufferization.alloc_tensor [%arg1] : tensor + %0 = bufferization.alloc_tensor(%arg1) : tensor // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir index 72023f62d7b08e..7fad7d62dc9074 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -347,9 +347,9 @@ func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x %c0 = arith.constant 0 : index %c32 = arith.constant 32 : index %c8 = arith.constant 8 : index - %0 = bufferization.alloc_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> + %0 = bufferization.alloc_tensor() : tensor<4x1x6x8xf32> %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor - %2 = bufferization.alloc_tensor [1, 6, 8] : tensor<1x6x8xf32> + %2 = bufferization.alloc_tensor() : tensor<1x6x8xf32> %3 = scf.for %arg3 = %c0 to %c32 step %c8 iter_args(%arg4 = %1) -> (tensor) { %4 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg3) %5 = tensor.insert_slice %2 into %arg4[%4,0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] : @@ -370,7 +370,7 @@ func.func @do_not_copy_alloc_tensors(%f1: f32, %f2: f32, %idx: index) // CHECK-NOT: copy // CHECK: memref.store // CHECK: memref.store - %0 = bufferization.alloc_tensor [5] : tensor<5xf32> + %0 = bufferization.alloc_tensor() : tensor<5xf32> %1 = tensor.insert %f1 into %0[%idx] : tensor<5xf32> %2 = tensor.insert %f2 into %0[%idx] : tensor<5xf32> return %1, %2 : tensor<5xf32>, tensor<5xf32> diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir index c427db7d9fbece..556c6fb0591fb3 100644 --- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir @@ -583,7 +583,7 @@ func.func @write_to_same_tensor_in_loop_in_place( { // CHECK: scf.for {{.*}} { %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - %B = bufferization.alloc_tensor [%sz] : tensor + %B = bufferization.alloc_tensor(%sz) : tensor %i2 = arith.index_cast %i : index to i32 %i3 = arith.sitofp %i2 : i32 to f32 // The tensor.insert is in-place because the %B is defined inside the loop. diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir index 4ff9c9ea326fed..a3b57228beaf67 100644 --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -220,7 +220,7 @@ func.func @scf_if_non_equiv_yields( // CHECK: return %[[r]] func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 { %r = scf.execute_region -> (tensor) { - %t2 = bufferization.alloc_tensor [%i] : tensor + %t2 = bufferization.alloc_tensor(%i) : tensor scf.yield %t2 : tensor } %f = tensor.extract %r[%j] : tensor @@ -274,7 +274,7 @@ func.func @scf_for_yield_non_equivalent( func.func @scf_for_yield_allocation(%t: tensor, %lb : index, %ub : index, %step : index) -> tensor { %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor { - %t2 = bufferization.alloc_tensor [%i] : tensor + %t2 = bufferization.alloc_tensor(%i) : tensor scf.yield %t2 : tensor } diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir index 7a98300a1e66f9..1e24376c9434d4 100644 --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -179,9 +179,9 @@ func.func @rank_reducing( %c8 = arith.constant 8 : index %c32 = arith.constant 32 : index %c0 = arith.constant 0 : index - %0 = bufferization.alloc_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> + %0 = bufferization.alloc_tensor() : tensor<4x1x6x8xf32> %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor - %2 = bufferization.alloc_tensor [1, 6, 8] : tensor<1x6x8xf32> + %2 = bufferization.alloc_tensor() : tensor<1x6x8xf32> %5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor) { %7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7) %8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32> diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir index 731f7e5c5c0ff6..db5a539fdee300 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir @@ -16,7 +16,7 @@ func.func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: ten %c0 = arith.constant 0 : index %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor %1 = affine.apply #map0(%c0, %c64)[%c2] - %2 = bufferization.alloc_tensor [%1, 2] : tensor + %2 = bufferization.alloc_tensor(%1) : tensor %3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor) { %8 = affine.apply #map1(%arg3, %c0)[%c2] %9 = tensor.extract_slice %arg1[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32> @@ -33,13 +33,13 @@ func.func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: ten // call @printMemrefF32(%B) : (tensor<*xf32>) -> () %4 = affine.apply #map0(%c0, %c64)[%c2] - %5 = bufferization.alloc_tensor [%4, 2] : tensor + %5 = bufferization.alloc_tensor(%4) : tensor %6 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %5) -> (tensor) { %8 = affine.apply #map1(%arg3, %c0)[%c2] %9 = tensor.extract_slice %arg0[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32> %10 = tensor.cast %9 : tensor<2xf32> to tensor %11 = tensor.pad %10 low[%c0] high[%c0] { - ^bb0(%arg5: index): + ^bb0(%arg5: index): tensor.yield %cst : f32 } : tensor to tensor<2xf32> %12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor @@ -80,9 +80,9 @@ func.func @main() { %v1 = arith.constant 1.0 : f32 %v2 = arith.constant 2.0 : f32 - %A = bufferization.alloc_tensor [64] : tensor<64xf32> - %B = bufferization.alloc_tensor [64] : tensor<64xf32> - %C = bufferization.alloc_tensor [] : tensor + %A = bufferization.alloc_tensor() : tensor<64xf32> + %B = bufferization.alloc_tensor() : tensor<64xf32> + %C = bufferization.alloc_tensor() : tensor %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) -> tensor From bb82f746129f7deee5e09bd1577ce55eb88c6f9f Mon Sep 17 00:00:00 2001 From: Jingu Kang Date: Mon, 23 May 2022 12:33:48 +0100 Subject: [PATCH 237/908] Revert "Revert "[AArch64] Set maximum VF with shouldMaximizeVectorBandwidth"" This reverts commit 42ebfa8269470e6b1fe2de996d3f1db6d142e16a. The commmit from https://reviews.llvm.org/D125918 has fixed the stage 2 build failure. Differential Revision: https://reviews.llvm.org/D118979 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 11 +++++++---- .../llvm/Analysis/TargetTransformInfoImpl.h | 5 ++++- llvm/lib/Analysis/TargetTransformInfo.cpp | 5 +++-- .../AArch64/AArch64TargetTransformInfo.cpp | 6 ++++++ .../Target/AArch64/AArch64TargetTransformInfo.h | 2 ++ .../Target/Hexagon/HexagonTargetTransformInfo.h | 7 +++---- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 5 ++++- ...ctorization-factor-for-unprofitable-memops.ll | 11 ++++++----- .../AArch64/loop-vectorization-factors.ll | 6 +++--- .../AArch64/reduction-small-size.ll | 16 ++++++++-------- .../scalable-vectorization-cost-tuning.ll | 2 +- .../AArch64/scalable-vectorization.ll | 16 ++++++++-------- .../LoopVectorize/AArch64/sve-illegal-type.ll | 8 ++++---- 13 files changed, 59 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 4704697b40d2e2..2a4620e5abcacf 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -937,7 +937,8 @@ class TargetTransformInfo { /// creating vectors that span multiple vector registers. /// If false, the vectorization factor will be chosen based on the /// size of the widest element type. - bool shouldMaximizeVectorBandwidth() const; + /// \p K Register Kind for vectorization. + bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; /// \return The minimum vectorization factor for types of given element /// bit width, or 0 if there is no minimum VF. The returned value only @@ -1640,7 +1641,8 @@ class TargetTransformInfo::Concept { virtual unsigned getMinVectorRegisterBitWidth() const = 0; virtual Optional getMaxVScale() const = 0; virtual Optional getVScaleForTuning() const = 0; - virtual bool shouldMaximizeVectorBandwidth() const = 0; + virtual bool + shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0; virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const = 0; virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0; @@ -2139,8 +2141,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { Optional getVScaleForTuning() const override { return Impl.getVScaleForTuning(); } - bool shouldMaximizeVectorBandwidth() const override { - return Impl.shouldMaximizeVectorBandwidth(); + bool shouldMaximizeVectorBandwidth( + TargetTransformInfo::RegisterKind K) const override { + return Impl.shouldMaximizeVectorBandwidth(K); } ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 383b91c7af13dc..044e9bc179a5cd 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -417,7 +417,10 @@ class TargetTransformInfoImplBase { Optional getMaxVScale() const { return None; } Optional getVScaleForTuning() const { return None; } - bool shouldMaximizeVectorBandwidth() const { return false; } + bool + shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { + return false; + } ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { return ElementCount::get(0, IsScalable); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 3e4cacf17a40dc..ac5c56d5bd964a 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -626,8 +626,9 @@ Optional TargetTransformInfo::getVScaleForTuning() const { return TTIImpl->getVScaleForTuning(); } -bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const { - return TTIImpl->shouldMaximizeVectorBandwidth(); +bool TargetTransformInfo::shouldMaximizeVectorBandwidth( + TargetTransformInfo::RegisterKind K) const { + return TTIImpl->shouldMaximizeVectorBandwidth(K); } ElementCount TargetTransformInfo::getMinimumVF(unsigned ElemWidth, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index cf61d779d8120a..205d59f878d3ff 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -51,6 +51,12 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return (CallerBits & CalleeBits) == CalleeBits; } +bool AArch64TTIImpl::shouldMaximizeVectorBandwidth( + TargetTransformInfo::RegisterKind K) const { + assert(K != TargetTransformInfo::RGK_Scalar); + return K == TargetTransformInfo::RGK_FixedWidthVector; +} + /// Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 679ee5fecfbfb9..a84cc89a6bf424 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -135,6 +135,8 @@ class AArch64TTIImpl : public BasicTTIImplBase { return ST->getVScaleForTuning(); } + bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; + /// Try to return an estimate cost factor that can be used as a multiplier /// when scalarizing an operation for a vector with ElementCount \p VF. /// For scalable vectors this currently takes the most pessimistic view based diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 16245528b5f2e3..7bbaf7ae9cb260 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -86,12 +86,11 @@ class HexagonTTIImpl : public BasicTTIImplBase { unsigned getMinVectorRegisterBitWidth() const; ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const; - bool shouldMaximizeVectorBandwidth() const { + bool + shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { return true; } - bool supportsEfficientVectorElementLoadStore() { - return false; - } + bool supportsEfficientVectorElementLoadStore() { return false; } bool hasBranchDivergence() { return false; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6a4159b06d577a..cc9e7425ee55d1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5216,9 +5216,12 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget( return ElementCount::getFixed(ClampedConstTripCount); } + TargetTransformInfo::RegisterKind RegKind = + ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector + : TargetTransformInfo::RGK_FixedWidthVector; ElementCount MaxVF = MaxVectorElementCount; if (MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 && - TTI.shouldMaximizeVectorBandwidth())) { + TTI.shouldMaximizeVectorBandwidth(RegKind))) { auto MaxVectorElementCountMaxBW = ElementCount::get( PowerOf2Floor(WidestRegister.getKnownMinSize() / SmallestType), ComputeScalableMaxVF); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll index 371d209bafffed..a1ca0fea797275 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll @@ -4,11 +4,12 @@ ; are not profitable. ; Test with a loop that contains memory accesses of i8 and i32 types. The -; default maximum VF for NEON is 4. And while we don't have an instruction to -; load 4 x i8, vectorization might still be profitable. +; maximum VF for NEON is calculated by 128/size of smallest type in loop. +; And while we don't have an instruction to load 4 x i8, vectorization +; might still be profitable. define void @test_load_i8_store_i32(i8* noalias %src, i32* noalias %dst, i32 %off, i64 %N) { ; CHECK-LABEL: @test_load_i8_store_i32( -; CHECK: <4 x i8> +; CHECK: <16 x i8> ; entry: br label %loop @@ -32,7 +33,7 @@ exit: ; Same as test_load_i8_store_i32, but with types flipped for load and store. define void @test_load_i32_store_i8(i32* noalias %src, i8* noalias %dst, i32 %off, i64 %N) { ; CHECK-LABEL: @test_load_i32_store_i8( -; CHECK: <4 x i8> +; CHECK: <16 x i8> ; entry: br label %loop @@ -84,7 +85,7 @@ exit: ; vectorization factor. define void @test_load_i8_store_i64_large(i8* noalias %src, i64* noalias %dst, i64* noalias %dst.2, i64* noalias %dst.3, i64* noalias %dst.4, i64* noalias %dst.5, i64%off, i64 %off.2, i64 %N) { ; CHECK-LABEL: @test_load_i8_store_i64_large -; CHECK: <2 x i64> +; CHECK: <8 x i64> ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index e6e43375204da6..28eabe382dfbd1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -116,9 +116,9 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: @add_d( -; CHECK: load <4 x i16> -; CHECK: add nsw <4 x i32> -; CHECK: store <4 x i32> +; CHECK: load <8 x i16> +; CHECK: add nsw <8 x i32> +; CHECK: store <8 x i32> define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 { entry: %cmp7 = icmp sgt i32 %len, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll index a95c0aa6f375f7..071255c4f4f002 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll @@ -123,16 +123,16 @@ for.body: ; } ; ; CHECK: vector.body: -; CHECK: phi <8 x i16> -; CHECK: [[Ld1:%[a-zA-Z0-9.]+]] = load <8 x i8> -; CHECK: zext <8 x i8> [[Ld1]] to <8 x i16> -; CHECK: [[Ld2:%[a-zA-Z0-9.]+]] = load <8 x i8> -; CHECK: zext <8 x i8> [[Ld2]] to <8 x i16> -; CHECK: add <8 x i16> -; CHECK: add <8 x i16> +; CHECK: phi <16 x i16> +; CHECK: [[Ld1:%[a-zA-Z0-9.]+]] = load <16 x i8> +; CHECK: zext <16 x i8> [[Ld1]] to <16 x i16> +; CHECK: [[Ld2:%[a-zA-Z0-9.]+]] = load <16 x i8> +; CHECK: zext <16 x i8> [[Ld2]] to <16 x i16> +; CHECK: add <16 x i16> +; CHECK: add <16 x i16> ; ; CHECK: middle.block: -; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> +; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> ; CHECK: zext i16 [[Rdx]] to i32 ; define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll index 27868480c23b5a..262236075f7cdf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll @@ -29,7 +29,7 @@ ; NEOVERSE-N2: LV: Vector loop of width vscale x 4 costs: 3 (assuming a minimum vscale of 1). ; VF-4: <4 x i32> -; VF-VSCALE4: +; VF-VSCALE4: <16 x i32> define void @test0(i32* %a, i8* %b, i32* %c) #0 { entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll index 0e96c62e2ad0bd..d15199662be087 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll @@ -9,9 +9,9 @@ define void @test0(i32* %a, i8* %b, i32* %c) #0 { ; CHECK: LV: Checking a loop in 'test0' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 -; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4 +; CHECK_SCALABLE_ON: LV: Selecting VF: 16 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF -; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 +; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16 ; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 16 entry: @@ -40,9 +40,9 @@ exit: define void @test1(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in 'test1' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 -; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4 +; CHECK_SCALABLE_ON: LV: Selecting VF: 16 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF -; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 +; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16 entry: @@ -72,9 +72,9 @@ exit: define void @test2(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in 'test2' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 -; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 2 +; CHECK_SCALABLE_ON: LV: Selecting VF: 16 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF -; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 +; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2 ; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16 entry: @@ -104,9 +104,9 @@ exit: define void @test3(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in 'test3' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1 -; CHECK_SCALABLE_ON: LV: Selecting VF: 4 +; CHECK_SCALABLE_ON: LV: Selecting VF: 16 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF -; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 +; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1 ; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16 entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll index 4d0886f4d953da..43ef43c11507d3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll @@ -83,11 +83,11 @@ for.end: define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) { ; CHECK-LABEL: @uniform_store_i1 ; CHECK: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, <2 x i64*> {{.*}}, i64 1 -; CHECK: %[[ICMP:.*]] = icmp eq <2 x i64*> %[[GEP]], %[[SPLAT:.*]] -; CHECK: %[[EXTRACT1:.*]] = extractelement <2 x i1> %[[ICMP]], i32 0 +; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, <64 x i64*> {{.*}}, i64 1 +; CHECK: %[[ICMP:.*]] = icmp eq <64 x i64*> %[[GEP]], %[[SPLAT:.*]] +; CHECK: %[[EXTRACT1:.*]] = extractelement <64 x i1> %[[ICMP]], i32 0 ; CHECK: store i1 %[[EXTRACT1]], i1* %dst -; CHECK: %[[EXTRACT2:.*]] = extractelement <2 x i1> %[[ICMP]], i32 1 +; CHECK: %[[EXTRACT2:.*]] = extractelement <64 x i1> %[[ICMP]], i32 1 ; CHECK: store i1 %[[EXTRACT2]], i1* %dst ; CHECK-NOT: vscale entry: From 131249cd1fb46efe100ae6c2d54d7ce95d09e422 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 23 May 2022 17:24:19 +0200 Subject: [PATCH 238/908] [InstCombine] Add tests for recursive and/or of icmp folds (NFC) Add variations with bitwise and logical and/or, as well as commuted operands. --- .../Transforms/InstCombine/and-or-icmps.ll | 517 ++++++++++++++++++ 1 file changed, 517 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index ad6bf63d584ca0..8ba8586dcbb5c9 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -1276,3 +1276,520 @@ define i1 @is_ascii_alphabetic_inverted(i32 %char) { %logical = select i1 %cmp1, i1 %cmp2, i1 false ret i1 %logical } + +define i1 @bitwise_and_bitwise_and_icmps(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_and_bitwise_and_icmps( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = and i1 %c1, %c2 + %and2 = and i1 %and1, %c3 + ret i1 %and2 +} + +define i1 @bitwise_and_bitwise_and_icmps_comm1(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_and_bitwise_and_icmps_comm1( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = and i1 %c1, %c2 + %and2 = and i1 %c3, %and1 + ret i1 %and2 +} + +define i1 @bitwise_and_bitwise_and_icmps_comm2(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_and_bitwise_and_icmps_comm2( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = and i1 %c2, %c1 + %and2 = and i1 %and1, %c3 + ret i1 %and2 +} + +define i1 @bitwise_and_bitwise_and_icmps_comm3(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_and_bitwise_and_icmps_comm3( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = and i1 %c2, %c1 + %and2 = and i1 %c3, %and1 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_icmps(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_and_logical_and_icmps( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C3]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = select i1 %c1, i1 %c2, i1 false + %and2 = and i1 %and1, %c3 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_icmps_comm1(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_and_logical_and_icmps_comm1( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[C3]], [[AND1]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = select i1 %c1, i1 %c2, i1 false + %and2 = and i1 %c3, %and1 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_icmps_comm2(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_and_logical_and_icmps_comm2( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C2]], i1 [[C1]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C3]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = select i1 %c2, i1 %c1, i1 false + %and2 = and i1 %and1, %c3 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_icmps_comm3(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_and_logical_and_icmps_comm3( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C2]], i1 [[C1]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[C3]], [[AND1]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = select i1 %c2, i1 %c1, i1 false + %and2 = and i1 %c3, %and1 + ret i1 %and2 +} + +define i1 @logical_and_bitwise_and_icmps(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_and_bitwise_and_icmps( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = and i1 %c1, %c2 + %and2 = select i1 %and1, i1 %c3, i1 false + ret i1 %and2 +} + +define i1 @logical_and_bitwise_and_icmps_comm1(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_and_bitwise_and_icmps_comm1( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i1 [[C1]], [[C2]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[C3]], i1 [[AND1]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = and i1 %c1, %c2 + %and2 = select i1 %c3, i1 %and1, i1 false + ret i1 %and2 +} + +define i1 @logical_and_bitwise_and_icmps_comm2(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_and_bitwise_and_icmps_comm2( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = and i1 %c2, %c1 + %and2 = select i1 %and1, i1 %c3, i1 false + ret i1 %and2 +} + +define i1 @logical_and_bitwise_and_icmps_comm3(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_and_bitwise_and_icmps_comm3( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i1 [[C2]], [[C1]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[C3]], i1 [[AND1]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = and i1 %c2, %c1 + %and2 = select i1 %c3, i1 %and1, i1 false + ret i1 %and2 +} + +define i1 @logical_and_logical_and_icmps(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_and_logical_and_icmps( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 [[C3]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = select i1 %c1, i1 %c2, i1 false + %and2 = select i1 %and1, i1 %c3, i1 false + ret i1 %and2 +} + +define i1 @logical_and_logical_and_icmps_comm1(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_and_logical_and_icmps_comm1( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C3]], i1 [[C1]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[TMP1]], [[C2]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = select i1 %c1, i1 %c2, i1 false + %and2 = select i1 %c3, i1 %and1, i1 false + ret i1 %and2 +} + +define i1 @logical_and_logical_and_icmps_comm2(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_and_logical_and_icmps_comm2( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C2]], i1 [[C1]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C3]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = select i1 %c2, i1 %c1, i1 false + %and2 = select i1 %and1, i1 %c3, i1 false + ret i1 %and2 +} + +define i1 @logical_and_logical_and_icmps_comm3(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_and_logical_and_icmps_comm3( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[TMP1]], i1 [[C1]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp slt i8 %x, 1 + %c3 = icmp sgt i8 %x, -1 + %and1 = select i1 %c2, i1 %c1, i1 false + %and2 = select i1 %c3, i1 %and1, i1 false + ret i1 %and2 +} + +define i1 @bitwise_or_bitwise_or_icmps(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_or_bitwise_or_icmps( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = or i1 %c1, %c2 + %or2 = or i1 %or1, %c3 + ret i1 %or2 +} + +define i1 @bitwise_or_bitwise_or_icmps_comm1(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_or_bitwise_or_icmps_comm1( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = or i1 %c1, %c2 + %or2 = or i1 %c3, %or1 + ret i1 %or2 +} + +define i1 @bitwise_or_bitwise_or_icmps_comm2(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_or_bitwise_or_icmps_comm2( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = or i1 %c2, %c1 + %or2 = or i1 %or1, %c3 + ret i1 %or2 +} + +define i1 @bitwise_or_bitwise_or_icmps_comm3(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_or_bitwise_or_icmps_comm3( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = or i1 %c2, %c1 + %or2 = or i1 %c3, %or1 + ret i1 %or2 +} + +define i1 @bitwise_or_logical_or_icmps(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_or_logical_or_icmps( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] +; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C3]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = select i1 %c1, i1 true, i1 %c2 + %or2 = or i1 %or1, %c3 + ret i1 %or2 +} + +define i1 @bitwise_or_logical_or_icmps_comm1(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_or_logical_or_icmps_comm1( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] +; CHECK-NEXT: [[OR2:%.*]] = or i1 [[C3]], [[OR1]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = select i1 %c1, i1 true, i1 %c2 + %or2 = or i1 %c3, %or1 + ret i1 %or2 +} + +define i1 @bitwise_or_logical_or_icmps_comm2(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_or_logical_or_icmps_comm2( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C2]], i1 true, i1 [[C1]] +; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C3]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = select i1 %c2, i1 true, i1 %c1 + %or2 = or i1 %or1, %c3 + ret i1 %or2 +} + +define i1 @bitwise_or_logical_or_icmps_comm3(i8 %x, i8 %y) { +; CHECK-LABEL: @bitwise_or_logical_or_icmps_comm3( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C2]], i1 true, i1 [[C1]] +; CHECK-NEXT: [[OR2:%.*]] = or i1 [[C3]], [[OR1]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = select i1 %c2, i1 true, i1 %c1 + %or2 = or i1 %c3, %or1 + ret i1 %or2 +} + +define i1 @logical_or_bitwise_or_icmps(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_or_bitwise_or_icmps( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = or i1 %c1, %c2 + %or2 = select i1 %or1, i1 true, i1 %c3 + ret i1 %or2 +} + +define i1 @logical_or_bitwise_or_icmps_comm1(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_or_bitwise_or_icmps_comm1( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[OR1:%.*]] = or i1 [[C1]], [[C2]] +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[C3]], i1 true, i1 [[OR1]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = or i1 %c1, %c2 + %or2 = select i1 %c3, i1 true, i1 %or1 + ret i1 %or2 +} + +define i1 @logical_or_bitwise_or_icmps_comm2(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_or_bitwise_or_icmps_comm2( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = or i1 %c2, %c1 + %or2 = select i1 %or1, i1 true, i1 %c3 + ret i1 %or2 +} + +define i1 @logical_or_bitwise_or_icmps_comm3(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_or_bitwise_or_icmps_comm3( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[OR1:%.*]] = or i1 [[C2]], [[C1]] +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[C3]], i1 true, i1 [[OR1]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = or i1 %c2, %c1 + %or2 = select i1 %c3, i1 true, i1 %or1 + ret i1 %or2 +} + +define i1 @logical_or_logical_or_icmps(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_or_logical_or_icmps( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[OR1]], i1 true, i1 [[C3]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = select i1 %c1, i1 true, i1 %c2 + %or2 = select i1 %or1, i1 true, i1 %c3 + ret i1 %or2 +} + +define i1 @logical_or_logical_or_icmps_comm1(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_or_logical_or_icmps_comm1( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C3]], i1 true, i1 [[C1]] +; CHECK-NEXT: [[OR2:%.*]] = or i1 [[TMP1]], [[C2]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = select i1 %c1, i1 true, i1 %c2 + %or2 = select i1 %c3, i1 true, i1 %or1 + ret i1 %or2 +} + +define i1 @logical_or_logical_or_icmps_comm2(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_or_logical_or_icmps_comm2( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C2]], i1 true, i1 [[C1]] +; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C3]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = select i1 %c2, i1 true, i1 %c1 + %or2 = select i1 %or1, i1 true, i1 %c3 + ret i1 %or2 +} + +define i1 @logical_or_logical_or_icmps_comm3(i8 %x, i8 %y) { +; CHECK-LABEL: @logical_or_logical_or_icmps_comm3( +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[TMP1]], i1 true, i1 [[C1]] +; CHECK-NEXT: ret i1 [[OR2]] +; + %c1 = icmp eq i8 %y, 42 + %c2 = icmp sge i8 %x, 1 + %c3 = icmp sle i8 %x, -1 + %or1 = select i1 %c2, i1 true, i1 %c1 + %or2 = select i1 %c3, i1 true, i1 %or1 + ret i1 %or2 +} + From f45c1e436e47f18cbef83b1dffa15fb7f234b357 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 23 May 2022 17:29:33 +0200 Subject: [PATCH 239/908] [InstCombine] Change operand order in recursive and/or of icmps fold The order obviously doesn't matter for bitwise and/or, but would matter for logical and/or, so change it to preserve the original order. --- .../Transforms/InstCombine/InstCombineAndOrXor.cpp | 8 ++++---- llvm/test/Transforms/InstCombine/and-or-icmps.ll | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index f442cf10bc243c..107a57f409a92f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1995,7 +1995,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast(Y)) if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true)) - return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); + return replaceInstUsesWith(I, Builder.CreateAnd(X, Res)); } if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) @@ -2003,7 +2003,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast(Y)) if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true)) - return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); + return replaceInstUsesWith(I, Builder.CreateAnd(X, Res)); } } @@ -2794,7 +2794,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); if (auto *Cmp = dyn_cast(Y)) if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false)) - return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); + return replaceInstUsesWith(I, Builder.CreateOr(X, Res)); } if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) @@ -2802,7 +2802,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); if (auto *Cmp = dyn_cast(Y)) if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false)) - return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); + return replaceInstUsesWith(I, Builder.CreateOr(X, Res)); } } diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 8ba8586dcbb5c9..7f0792da52f63e 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -1281,7 +1281,7 @@ define i1 @bitwise_and_bitwise_and_icmps(i8 %x, i8 %y) { ; CHECK-LABEL: @bitwise_and_bitwise_and_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp eq i8 %y, 42 @@ -1296,7 +1296,7 @@ define i1 @bitwise_and_bitwise_and_icmps_comm1(i8 %x, i8 %y) { ; CHECK-LABEL: @bitwise_and_bitwise_and_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp eq i8 %y, 42 @@ -1409,7 +1409,7 @@ define i1 @logical_and_bitwise_and_icmps(i8 %x, i8 %y) { ; CHECK-LABEL: @logical_and_bitwise_and_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp eq i8 %y, 42 @@ -1539,7 +1539,7 @@ define i1 @bitwise_or_bitwise_or_icmps(i8 %x, i8 %y) { ; CHECK-LABEL: @bitwise_or_bitwise_or_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[C1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp eq i8 %y, 42 @@ -1554,7 +1554,7 @@ define i1 @bitwise_or_bitwise_or_icmps_comm1(i8 %x, i8 %y) { ; CHECK-LABEL: @bitwise_or_bitwise_or_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[C1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp eq i8 %y, 42 @@ -1667,7 +1667,7 @@ define i1 @logical_or_bitwise_or_icmps(i8 %x, i8 %y) { ; CHECK-LABEL: @logical_or_bitwise_or_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[C1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp eq i8 %y, 42 From 89e663c4f83a6736fc74a01ec48cb4f01210f86f Mon Sep 17 00:00:00 2001 From: Richard Date: Fri, 20 May 2022 17:19:11 -0600 Subject: [PATCH 240/908] [clang-tidy] Improve add_new_check.py to recognize more checks When looking for whether or not a check provides fixits, the script examines the implementation of the check. Some checks are not implemented in source files that correspond one-to-one with the check name, e.g. cert-dcl21-cpp. So if we can't find the check implementation directly from the check name, open up the corresponding module file and look for the class name that is registered with the check. Then consult the file corresponding to the class name. Some checks are derived from a base class that implements fixits. So if we can't find fixits in the implementation file for a check, scrape out the name of it's base class. If it's not ClangTidyCheck, then consult the base class implementation to look for fixit support. Differential Revision: https://reviews.llvm.org/D126134 Fixes #55630 --- clang-tools-extra/clang-tidy/add_new_check.py | 107 +++++++++++++++--- .../docs/clang-tidy/checks/list.rst | 22 ++-- 2 files changed, 105 insertions(+), 24 deletions(-) diff --git a/clang-tools-extra/clang-tidy/add_new_check.py b/clang-tools-extra/clang-tidy/add_new_check.py index fc839744863143..2e8684ca756dc2 100644 --- a/clang-tools-extra/clang-tidy/add_new_check.py +++ b/clang-tools-extra/clang-tidy/add_new_check.py @@ -158,12 +158,17 @@ def write_implementation(module_path, module, namespace, check_name_camel): 'namespace': namespace}) -# Modifies the module to include the new check. -def adapt_module(module_path, module, check_name, check_name_camel): +# Returns the source filename that implements the module. +def get_module_filename(module_path, module): modulecpp = list(filter( lambda p: p.lower() == module.lower() + 'tidymodule.cpp', os.listdir(module_path)))[0] - filename = os.path.join(module_path, modulecpp) + return os.path.join(module_path, modulecpp) + + +# Modifies the module to include the new check. +def adapt_module(module_path, module, check_name, check_name_camel): + filename = get_module_filename(module_path, module) with io.open(filename, 'r', encoding='utf8') as f: lines = f.readlines() @@ -320,24 +325,100 @@ def update_checks_list(clang_tidy_path): os.listdir(docs_dir))) doc_files.sort() + # We couldn't find the source file from the check name, so try to find the + # class name that corresponds to the check in the module file. + def filename_from_module(module_name, check_name): + module_path = os.path.join(clang_tidy_path, module_name) + if not os.path.isdir(module_path): + return '' + module_file = get_module_filename(module_path, module_name) + if not os.path.isfile(module_file): + return '' + with io.open(module_file, 'r') as f: + code = f.read() + full_check_name = module_name + '-' + check_name + name_pos = code.find('"' + full_check_name + '"') + if name_pos == -1: + return '' + stmt_end_pos = code.find(';', name_pos) + if stmt_end_pos == -1: + return '' + stmt_start_pos = code.rfind(';', 0, name_pos) + if stmt_start_pos == -1: + stmt_start_pos = code.rfind('{', 0, name_pos) + if stmt_start_pos == -1: + return '' + stmt = code[stmt_start_pos+1:stmt_end_pos] + matches = re.search('registerCheck<([^>:]*)>\(\s*"([^"]*)"\s*\)', stmt) + if matches and matches[2] == full_check_name: + class_name = matches[1] + if '::' in class_name: + parts = class_name.split('::') + class_name = parts[-1] + class_path = os.path.join(clang_tidy_path, module_name, '..', *parts[0:-1]) + else: + class_path = os.path.join(clang_tidy_path, module_name) + return get_actual_filename(class_path, class_name + '.cpp') + + return '' + + # Examine code looking for a c'tor definition to get the base class name. + def get_base_class(code, check_file): + check_class_name = os.path.splitext(os.path.basename(check_file))[0] + ctor_pattern = check_class_name + '\([^:]*\)\s*:\s*([A-Z][A-Za-z0-9]*Check)\(' + matches = re.search('\s+' + check_class_name + '::' + ctor_pattern, code) + + # The constructor might be inline in the header. + if not matches: + header_file = os.path.splitext(check_file)[0] + '.h' + if not os.path.isfile(header_file): + return '' + with io.open(header_file, encoding='utf8') as f: + code = f.read() + matches = re.search(' ' + ctor_pattern, code) + + if matches and matches[1] != 'ClangTidyCheck': + return matches[1] + return '' + + # Some simple heuristics to figure out if a check has an autofix or not. + def has_fixits(code): + for needle in ['FixItHint', 'ReplacementText', 'fixit', + 'TransformerClangTidyCheck']: + if needle in code: + return True + return False + + # Try to figure out of the check supports fixits. def has_auto_fix(check_name): dirname, _, check_name = check_name.partition('-') - checker_code = get_actual_filename(os.path.join(clang_tidy_path, dirname), + check_file = get_actual_filename(os.path.join(clang_tidy_path, dirname), get_camel_check_name(check_name) + '.cpp') - if not os.path.isfile(checker_code): + if not os.path.isfile(check_file): # Some older checks don't end with 'Check.cpp' - checker_code = get_actual_filename(os.path.join(clang_tidy_path, dirname), + check_file = get_actual_filename(os.path.join(clang_tidy_path, dirname), get_camel_name(check_name) + '.cpp') - if not os.path.isfile(checker_code): - return '' + if not os.path.isfile(check_file): + # Some checks aren't in a file based on the check name. + check_file = filename_from_module(dirname, check_name) + if not check_file or not os.path.isfile(check_file): + return '' - with io.open(checker_code, encoding='utf8') as f: + with io.open(check_file, encoding='utf8') as f: code = f.read() - for needle in ['FixItHint', 'ReplacementText', 'fixit', 'TransformerClangTidyCheck']: - if needle in code: - # Some simple heuristics to figure out if a checker has an autofix or not. - return ' "Yes"' + if has_fixits(code): + return ' "Yes"' + + base_class = get_base_class(code, check_file) + if base_class: + base_file = os.path.join(clang_tidy_path, dirname, base_class + '.cpp') + if os.path.isfile(base_file): + with io.open(base_file, encoding='utf8') as f: + code = f.read() + if has_fixits(code): + return ' "Yes"' + return '' def process_doc(doc_file): diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index cfab8a01e42d30..a099e4bdfbc8a3 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -37,19 +37,19 @@ Clang-Tidy Checks `altera-struct-pack-align `_, "Yes" `altera-unroll-loops `_, `android-cloexec-accept `_, "Yes" - `android-cloexec-accept4 `_, + `android-cloexec-accept4 `_, "Yes" `android-cloexec-creat `_, "Yes" `android-cloexec-dup `_, "Yes" - `android-cloexec-epoll-create `_, - `android-cloexec-epoll-create1 `_, - `android-cloexec-fopen `_, - `android-cloexec-inotify-init `_, - `android-cloexec-inotify-init1 `_, - `android-cloexec-memfd-create `_, - `android-cloexec-open `_, + `android-cloexec-epoll-create `_, "Yes" + `android-cloexec-epoll-create1 `_, "Yes" + `android-cloexec-fopen `_, "Yes" + `android-cloexec-inotify-init `_, "Yes" + `android-cloexec-inotify-init1 `_, "Yes" + `android-cloexec-memfd-create `_, "Yes" + `android-cloexec-open `_, "Yes" `android-cloexec-pipe `_, "Yes" - `android-cloexec-pipe2 `_, - `android-cloexec-socket `_, + `android-cloexec-pipe2 `_, "Yes" + `android-cloexec-socket `_, "Yes" `android-comparison-in-temp-failure-retry `_, `boost-use-to-string `_, "Yes" `bugprone-argument-comment `_, "Yes" @@ -105,7 +105,7 @@ Clang-Tidy Checks `bugprone-terminating-continue `_, "Yes" `bugprone-throw-keyword-missing `_, `bugprone-too-small-loop-variable `_, - `bugprone-unchecked-optional-access `_, "Yes" + `bugprone-unchecked-optional-access `_, `bugprone-undefined-memory-manipulation `_, `bugprone-undelegated-constructor `_, `bugprone-unhandled-exception-at-new `_, From 02d3499a46cc0f5b52af5f94c951acaad2a6ccdc Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Sat, 21 May 2022 21:30:01 -0700 Subject: [PATCH 241/908] NFC: Silence two warnings for unused bufferization symbols in release mode. Differential Revision: https://reviews.llvm.org/D126182 --- mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp | 2 ++ .../Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 0ef56d2a5ab4dd..c4ce22c4539fa5 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -389,6 +389,8 @@ class BufferizationRewriter : public IRRewriter { DenseSet &toMemrefOps; /// The bufferization options. + /// Used for debug modes. + LLVM_ATTRIBUTE_UNUSED const BufferizationOptions &options; }; } // namespace diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index fec5cb6b56a2b9..c9b941c6a34ce2 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -105,6 +105,8 @@ static FuncAnalysisState &getFuncAnalysisState(AnalysisState &state) { } /// Return the state (phase) of analysis of the FuncOp. +/// Used for debug modes. +LLVM_ATTRIBUTE_UNUSED static FuncOpAnalysisState getFuncOpAnalysisState(const AnalysisState &state, func::FuncOp funcOp) { const FuncAnalysisState &funcState = getFuncAnalysisState(state); From a1dcfb75ea8c31dd39edb6bdab6f54cde81cad85 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Tue, 22 Mar 2022 10:49:08 -0700 Subject: [PATCH 242/908] [clang] Module global init mangling C++20 modules require emission of an initializer function, which is called by importers of the module. This implements the mangling for that function. It is the one place the ABI exposes partition names in symbols -- but fortunately only needed by other TUs of that same module. Reviewed By: bruno Differential Revision: https://reviews.llvm.org/D122741 --- clang/include/clang/AST/Mangle.h | 2 ++ clang/lib/AST/ItaniumMangle.cpp | 31 ++++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/AST/Mangle.h b/clang/include/clang/AST/Mangle.h index 9bca97a611d08a..96cc8c90a8e83b 100644 --- a/clang/include/clang/AST/Mangle.h +++ b/clang/include/clang/AST/Mangle.h @@ -199,6 +199,8 @@ class ItaniumMangleContext : public MangleContext { virtual void mangleDynamicStermFinalizer(const VarDecl *D, raw_ostream &) = 0; + virtual void mangleModuleInitializer(const Module *Module, raw_ostream &) = 0; + // This has to live here, otherwise the CXXNameMangler won't have access to // it. virtual DiscriminatorOverrideTy getDiscriminatorOverride() const = 0; diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index b380e02fc8f7d0..a9416397c90d6f 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -130,6 +130,8 @@ class ItaniumMangleContextImpl : public ItaniumMangleContext { void mangleLambdaSig(const CXXRecordDecl *Lambda, raw_ostream &) override; + void mangleModuleInitializer(const Module *Module, raw_ostream &) override; + bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { // Lambda closure types are already numbered. if (isLambda(ND)) @@ -438,7 +440,7 @@ class CXXNameMangler { void mangleType(QualType T); void mangleNameOrStandardSubstitution(const NamedDecl *ND); void mangleLambdaSig(const CXXRecordDecl *Lambda); - void mangleModuleNamePrefix(StringRef Name); + void mangleModuleNamePrefix(StringRef Name, bool IsPartition = false); private: @@ -1057,8 +1059,8 @@ void CXXNameMangler::mangleModuleName(const NamedDecl *ND) { // ::= // ::= // ::= W -// ::= W P # not (yet) needed -void CXXNameMangler::mangleModuleNamePrefix(StringRef Name) { +// ::= W P +void CXXNameMangler::mangleModuleNamePrefix(StringRef Name, bool IsPartition) { // ::= S _ auto It = ModuleSubstitutions.find(Name); if (It != ModuleSubstitutions.end()) { @@ -1072,10 +1074,14 @@ void CXXNameMangler::mangleModuleNamePrefix(StringRef Name) { auto Parts = Name.rsplit('.'); if (Parts.second.empty()) Parts.second = Parts.first; - else - mangleModuleNamePrefix(Parts.first); + else { + mangleModuleNamePrefix(Parts.first, IsPartition); + IsPartition = false; + } Out << 'W'; + if (IsPartition) + Out << 'P'; Out << Parts.second.size() << Parts.second; ModuleSubstitutions.insert({Name, SeqID++}); } @@ -6533,6 +6539,21 @@ void ItaniumMangleContextImpl::mangleLambdaSig(const CXXRecordDecl *Lambda, Mangler.mangleLambdaSig(Lambda); } +void ItaniumMangleContextImpl::mangleModuleInitializer(const Module *M, + raw_ostream &Out) { + // ::= GI # module initializer function + CXXNameMangler Mangler(*this, Out); + Mangler.getStream() << "_ZGI"; + Mangler.mangleModuleNamePrefix(M->getPrimaryModuleInterfaceName()); + if (M->isModulePartition()) { + // The partition needs including, as partitions can have them too. + auto Partition = M->Name.find(':'); + Mangler.mangleModuleNamePrefix( + StringRef(&M->Name[Partition + 1], M->Name.size() - Partition - 1), + /*IsPartition*/ true); + } +} + ItaniumMangleContext *ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags, bool IsAux) { From 210c4e7fc887accafbeec9facfc70753e35d2e21 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Mon, 23 May 2022 18:10:12 +0200 Subject: [PATCH 243/908] [mlir][bufferization] Fix Python bindings Differential Revision: https://reviews.llvm.org/D126179 --- mlir/python/CMakeLists.txt | 9 ++++ .../mlir/dialects/_bufferization_ops_ext.py | 33 +++----------- mlir/python/mlir/dialects/bufferization.py | 5 +++ .../mlir/python/BUILD.bazel | 44 +++++++++++++++++++ 4 files changed, 65 insertions(+), 26 deletions(-) create mode 100644 mlir/python/mlir/dialects/bufferization.py diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index 3e14a197476de9..d280bf105aef07 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -63,6 +63,15 @@ declare_mlir_dialect_python_bindings( SOURCES_GLOB dialects/async_dialect/*.py DIALECT_NAME async_dialect) +declare_mlir_dialect_python_bindings( + ADD_TO_PARENT MLIRPythonSources.Dialects + ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" + TD_FILE dialects/BufferizationOps.td + SOURCES + dialects/bufferization.py + dialects/_bufferization_ops_ext.py + DIALECT_NAME bufferization) + declare_mlir_dialect_python_bindings( ADD_TO_PARENT MLIRPythonSources.Dialects ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" diff --git a/mlir/python/mlir/dialects/_bufferization_ops_ext.py b/mlir/python/mlir/dialects/_bufferization_ops_ext.py index 2414c8b7476be2..c720844af2ebd0 100644 --- a/mlir/python/mlir/dialects/_bufferization_ops_ext.py +++ b/mlir/python/mlir/dialects/_bufferization_ops_ext.py @@ -5,7 +5,7 @@ try: from typing import Sequence, Union from ..ir import * - from ._ods_common import get_default_loc_context as _get_default_loc_context + from ._ods_common import get_default_loc_context from typing import Any, List, Union except ImportError as e: @@ -16,36 +16,17 @@ class AllocTensorOp: """Extends the bufferization.alloc_tensor op.""" def __init__(self, - sizes: Union[Sequence[int], Sequence[Value]], - element_type: Type, + tensor_type: Type, + dynamic_sizes: Sequence[Value], *, loc=None, ip=None): - """Constructs an `alloc_tensor` with either static or dynamic sizes.""" + """Constructs an `alloc_tensor` with static and/or dynamic sizes.""" context = get_default_loc_context(loc) - operands = [] - attributes = {} - # TODO: Refactor the AllocTensorOp to take an element type attribute and - # then use normal result type inference, unifying the Python and C++ side - # with a standard mechanism (versus stashing that in builders). - if sizes and isinstance(sizes[0], Value): - # Dynamic sizes. - operands.extend(sizes) - static_size_ints = [-1] * len(sizes) - result_type = RankedTensorType.get(static_size_ints, element_type) - else: - # Static sizes. - result_type = RankedTensorType.get(sizes, element_type) - static_size_ints = sizes - - i64_type = IntegerType.get_signless(64) - attributes["static_sizes"] = ArrayAttr.get( - [IntegerAttr.get(i64_type, s) for s in static_size_ints], - context=context) op = self.build_generic( - results=[result_type], - operands=operands, - attributes=attributes, + results=[tensor_type], + operands=dynamic_sizes, + attributes={}, loc=loc, ip=ip) OpView.__init__(self, op) diff --git a/mlir/python/mlir/dialects/bufferization.py b/mlir/python/mlir/dialects/bufferization.py new file mode 100644 index 00000000000000..2121122f12764e --- /dev/null +++ b/mlir/python/mlir/dialects/bufferization.py @@ -0,0 +1,5 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from ._bufferization_ops_gen import * diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel index 18436f2d6698c7..c3cba2f044cf46 100644 --- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel @@ -252,6 +252,50 @@ filegroup( ], ) +##---------------------------------------------------------------------------## +# Bufferization dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "BufferizationOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + includes = ["../include"], + deps = [ + "//mlir:BufferizableOpInterfaceTdFiles", + "//mlir:BufferizationOpsTdFiles", + "//mlir:OpBaseTdFiles", + ], +) + +gentbl_filegroup( + name = "BufferizationOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=bufferization", + ], + "mlir/dialects/_bufferization_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/BufferizationOps.td", + deps = [ + ":BufferizationOpsPyTdFiles", + ], +) + +filegroup( + name = "BufferizationOpsPyFiles", + srcs = [ + "mlir/dialects/_bufferization_ops_ext.py", + "mlir/dialects/bufferization.py", + ":BufferizationOpsPyGen", + ], +) + ##---------------------------------------------------------------------------## # ControlFlow dialect. ##---------------------------------------------------------------------------## From c30a8c80837608ccb190aecb84a723ca00dd87df Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 23 May 2022 09:07:54 -0700 Subject: [PATCH 244/908] [lldb] Fix should_skip_simulator_test decorator Currently simulator tests get skipped when the reported platform is macosx rather than darwin. Update the decorator to match both. --- lldb/packages/Python/lldbsuite/test/decorators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 8f636024abe916..467530fdfd7e74 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -378,8 +378,8 @@ def apple_simulator_test(platform): The SDK identifiers for simulators are iphonesimulator, appletvsimulator, watchsimulator """ def should_skip_simulator_test(): - if lldbplatformutil.getHostPlatform() != 'darwin': - return "simulator tests are run only on darwin hosts" + if lldbplatformutil.getHostPlatform() not in ['darwin', 'macosx']: + return "simulator tests are run only on darwin hosts." try: DEVNULL = open(os.devnull, 'w') output = subprocess.check_output(["xcodebuild", "-showsdks"], stderr=DEVNULL).decode("utf-8") From 598c5ddba6b0fd1e7226ffd4dc34e44ec7c7c513 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Mon, 23 May 2022 18:37:26 +0200 Subject: [PATCH 245/908] [mlir][bufferize] Support fully dynamic layout maps in BufferResultsToOutParams Also fixes integration of the pass into One-Shot Bufferize and adds additional test cases. BufferResultsToOutParams can be used with "identity-layout-map" and "fully-dynamic-layout-map". "infer-layout-map" is not supported. Differential Revision: https://reviews.llvm.org/D125636 --- .../Transforms/BufferResultsToOutParams.cpp | 65 +++++++++--- .../Bufferization/Transforms/Bufferize.cpp | 25 +++-- .../one-shot-module-bufferize-out-params.mlir | 99 ++++++++++++++++++- 3 files changed, 162 insertions(+), 27 deletions(-) diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp index c9018280ea23fc..4e189b4fd89d66 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp @@ -15,20 +15,50 @@ using namespace mlir; +/// Return `true` if the given MemRef type has a fully dynamic layout. +static bool hasFullyDynamicLayoutMap(MemRefType type) { + int64_t offset; + SmallVector strides; + if (failed(getStridesAndOffset(type, strides, offset))) + return false; + if (!llvm::all_of(strides, [](int64_t stride) { + return ShapedType::isDynamicStrideOrOffset(stride); + })) + return false; + if (!ShapedType::isDynamicStrideOrOffset(offset)) + return false; + return true; +} + +/// Return `true` if the given MemRef type has a static identity layout (i.e., +/// no layout). +static bool hasStaticIdentityLayout(MemRefType type) { + return type.getLayout().isIdentity(); +} + // Updates the func op and entry block. // // Any args appended to the entry block are added to `appendedEntryArgs`. -static void updateFuncOp(func::FuncOp func, - SmallVectorImpl &appendedEntryArgs) { +static LogicalResult +updateFuncOp(func::FuncOp func, + SmallVectorImpl &appendedEntryArgs) { auto functionType = func.getFunctionType(); // Collect information about the results will become appended arguments. SmallVector erasedResultTypes; BitVector erasedResultIndices(functionType.getNumResults()); for (const auto &resultType : llvm::enumerate(functionType.getResults())) { - if (resultType.value().isa()) { + if (auto memrefType = resultType.value().dyn_cast()) { + if (!hasStaticIdentityLayout(memrefType) && + !hasFullyDynamicLayoutMap(memrefType)) { + // Only buffers with static identity layout can be allocated. These can + // be casted to memrefs with fully dynamic layout map. Other layout maps + // are not supported. + return func->emitError() + << "cannot create out param for result with unsupported layout"; + } erasedResultIndices.set(resultType.index()); - erasedResultTypes.push_back(resultType.value()); + erasedResultTypes.push_back(memrefType); } } @@ -51,10 +81,12 @@ static void updateFuncOp(func::FuncOp func, // Add the new arguments to the entry block if the function is not external. if (func.isExternal()) - return; + return success(); Location loc = func.getLoc(); for (Type type : erasedResultTypes) appendedEntryArgs.push_back(func.front().addArgument(type, loc)); + + return success(); } // Updates all ReturnOps in the scope of the given func::FuncOp by either @@ -66,7 +98,7 @@ static void updateReturnOps(func::FuncOp func, SmallVector copyIntoOutParams; SmallVector keepAsReturnOperands; for (Value operand : op.getOperands()) { - if (operand.getType().isa()) + if (operand.getType().isa()) copyIntoOutParams.push_back(operand); else keepAsReturnOperands.push_back(operand); @@ -88,7 +120,7 @@ static LogicalResult updateCalls(ModuleOp module) { SmallVector replaceWithNewCallResults; SmallVector replaceWithOutParams; for (OpResult result : op.getResults()) { - if (result.getType().isa()) + if (result.getType().isa()) replaceWithOutParams.push_back(result); else replaceWithNewCallResults.push_back(result); @@ -96,14 +128,24 @@ static LogicalResult updateCalls(ModuleOp module) { SmallVector outParams; OpBuilder builder(op); for (Value memref : replaceWithOutParams) { - if (!memref.getType().cast().hasStaticShape()) { + if (!memref.getType().cast().hasStaticShape()) { op.emitError() << "cannot create out param for dynamically shaped result"; didFail = true; return; } - Value outParam = builder.create( - op.getLoc(), memref.getType().cast()); + auto memrefType = memref.getType().cast(); + auto allocType = + MemRefType::get(memrefType.getShape(), memrefType.getElementType(), + AffineMap(), memrefType.getMemorySpaceAsInt()); + Value outParam = builder.create(op.getLoc(), allocType); + if (!hasStaticIdentityLayout(memrefType)) { + // Layout maps are already checked in `updateFuncOp`. + assert(hasFullyDynamicLayoutMap(memrefType) && + "layout map not supported"); + outParam = + builder.create(op.getLoc(), memrefType, outParam); + } memref.replaceAllUsesWith(outParam); outParams.push_back(outParam); } @@ -126,7 +168,8 @@ LogicalResult mlir::bufferization::promoteBufferResultsToOutParams(ModuleOp module) { for (auto func : module.getOps()) { SmallVector appendedEntryArgs; - updateFuncOp(func, appendedEntryArgs); + if (failed(updateFuncOp(func, appendedEntryArgs))) + return failure(); if (func.isExternal()) continue; updateReturnOps(func, appendedEntryArgs); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index c4ce22c4539fa5..9fee93edee1a01 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -307,19 +307,18 @@ bufferization::finalizeBuffers(Operation *op, &hasLeakingAllocs))) return failure(); - if (hasLeakingAllocs) { - // Promote returned buffers to "out" parameters. - // TODO: Pass options to support custom dealloc ops. - if (options.promoteBufferResultsToOutParams && isa(op) && - failed(promoteBufferResultsToOutParams(cast(op)))) - return failure(); - - // Create deallocation ops for all "leaking buffers" and all buffer - // allocations that were added during the above promotion process. - // TODO: Pass options to support custom dealloc ops. - if (options.createDeallocs && failed(deallocateBuffers(op))) - return failure(); - } + // Promote returned buffers to "out" parameters. + // TODO: Pass options to support custom dealloc ops. + if (options.promoteBufferResultsToOutParams && isa(op) && + failed(promoteBufferResultsToOutParams(cast(op)))) + return failure(); + + // Create deallocation ops for all "leaking buffers" and all buffer + // allocations that were added during the above promotion process. + // TODO: Pass options to support custom dealloc ops. + if (hasLeakingAllocs && options.createDeallocs && + failed(deallocateBuffers(op))) + return failure(); // Deallocate all remaining buffers at the end of their parent blocks. if (failed(createAllocDeallocOps(op, options))) diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir index 517f71b0aef417..236b961f78a6b4 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir @@ -1,11 +1,19 @@ -// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs promote-buffer-results-to-out-params" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs promote-buffer-results-to-out-params function-boundary-type-conversion=fully-dynamic-layout-map" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs promote-buffer-results-to-out-params function-boundary-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT +// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=infer-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-BASELINE + +// Note: function-boundary-type-conversion=infer-layout-map with +// promote-buffer-results-to-out-params is an unsupported combination. // Note: This bufferization is not very efficient yet, but it works. // CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> // CHECK-LABEL: func @callee( // CHECK-SAME: %[[arg0:.*]]: memref<5xf32, #[[$map1]]>, -// CHECK-SAME: %[[arg1:.*]]: memref<5xf32>) { +// CHECK-SAME: %[[arg1:.*]]: memref<5xf32, #[[$map1]]>) { +// This alloc is not needed, but it is inserted due to the out-of-place +// bufferization of the tensor.insert. With a better layering of the out param +// promotion pass, this alloc could be avoided. // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> // CHECK: memref.copy %[[arg0]], %[[alloc]] // CHECK: memref.store %{{.*}}, %[[alloc]] @@ -13,21 +21,46 @@ // CHECK: memref.dealloc %[[alloc]] // CHECK: return // CHECK: } + +// CHECK-NO-LAYOUT-LABEL: func @callee( +// CHECK-NO-LAYOUT-SAME: %[[arg0:.*]]: memref<5xf32>, +// CHECK-NO-LAYOUT-SAME: %[[arg1:.*]]: memref<5xf32>) { +// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> +// CHECK-NO-LAYOUT: memref.copy %[[arg0]], %[[alloc]] +// CHECK-NO-LAYOUT: memref.store {{.*}}, %[[alloc]] +// CHECK-NO-LAYOUT: memref.copy %[[alloc]], %[[arg1]] +// CHECK-NO-LAYOUT: memref.dealloc %[[alloc]] + +// CHECK-BASELINE: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> +// CHECK-BASELINE-LABEL: func @callee( +// CHECK-BASELINE-SAME: %[[arg0:.*]]: memref<5xf32, #[[$map1]]>) -> memref<5xf32> { +// CHECK-BASELINE: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> +// CHECK-BASELINE: memref.copy %[[arg0]], %[[alloc]] +// CHECK-BASELINE: memref.store {{.*}}, %[[alloc]] +// CHECK-BASELINE: return %[[alloc]] func.func @callee(%t: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) { %c0 = arith.constant 0 : index %cst = arith.constant 8.0 : f32 + // This must bufferize out-of-place. %1 = tensor.insert %cst into %t[%c0] : tensor<5xf32> + // Instead of returning %1, copy into new out param. %t will disappear + // entirely because the buffer is equivalent to a bbArg. return %t, %1 : tensor<5xf32>, tensor<5xf32> } // CHECK: func @main(%[[arg0:.*]]: memref<5xf32, #[[$map1]]>) -> (f32, f32) { // CHECK: %[[alloc:.*]] = memref.alloc() : memref<5xf32> -// CHECK: call @callee(%[[arg0]], %[[alloc]]) +// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<5xf32> to memref<5xf32, #[[$map1]]> +// CHECK: call @callee(%[[arg0]], %[[casted]]) // CHECK: %[[l1:.*]] = memref.load %[[arg0]] // CHECK: %[[l2:.*]] = memref.load %[[alloc]] // CHECK: memref.dealloc %[[alloc]] // CHECK: return %[[l1]], %[[l2]] // CHECK: } + +// CHECK-NO-LAYOUT-LABEL: func @main(%{{.*}}: memref<5xf32>) -> (f32, f32) { +// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() : memref<5xf32> +// CHECK-NO-LAYOUT: call @callee(%{{.*}}, %[[alloc]]) func.func @main(%t: tensor<5xf32>) -> (f32, f32) { %c0 = arith.constant 0 : index %0, %1 = func.call @callee(%t) @@ -37,3 +70,63 @@ func.func @main(%t: tensor<5xf32>) -> (f32, f32) { return %2, %3 : f32, f32 } +// ----- + +// CHECK: #[[$map2a:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> +// CHECK: #[[$map2b:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 20 + s0 + d1)> +// CHECK-LABEL: func @callee( +// CHECK-SAME: %{{.*}}: index, +// CHECK-SAME: %[[r:.*]]: memref<2x5xf32, #[[$map2a]]>) { +// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32> +// CHECK: %[[subview:.*]] = memref.subview %[[alloc]]{{.*}} : memref<10x20xf32> to memref<2x5xf32, #[[$map2b]]> +// CHECK: memref.copy %[[subview]], %[[r]] +// CHECK: memref.dealloc %[[alloc]] + +// CHECK-NO-LAYOUT-LABEL: func @callee( +// CHECK-NO-LAYOUT-SAME: %{{.*}}: index, +// CHECK-NO-LAYOUT-SAME: %[[r:.*]]: memref<2x5xf32>) { +// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32> +// CHECK-NO-LAYOUT: %[[subview:.*]] = memref.subview %[[alloc]] +// Note: This alloc is not needed, but it is inserted before the returned buffer +// is promoted to an out param to reconcile mismatching layout maps on return +// value and function signature. +// CHECK-NO-LAYOUT: %[[alloc2:.*]] = memref.alloc() : memref<2x5xf32> +// CHECK-NO-LAYOUT: memref.copy %[[subview]], %[[alloc2]] +// CHECK-NO-LAYOUT: memref.dealloc %[[alloc]] +// CHECK-NO-LAYOUT: memref.copy %[[alloc2]], %[[r]] +// CHECK-NO-LAYOUT: memref.dealloc %[[alloc2]] + +// CHECK-BASELINE: #[[$map2:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 20 + s0 + d1)> +// CHECK-BASELINE-LABEL: func @callee( +// CHECK-BASELINE-SAME: %{{.*}}: index) -> memref<2x5xf32, #[[$map2]]> { +// CHECK-BASELINE: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32> +// CHECK-BASELINE: %[[subview:.*]] = memref.subview %[[alloc]] +// CHECK-BASELINE: return %[[subview]] +func.func @callee(%idx: index) -> tensor<2x5xf32> { + %0 = bufferization.alloc_tensor() : tensor<10x20xf32> + %1 = tensor.extract_slice %0[%idx, %idx][2, 5][1, 1] : tensor<10x20xf32> to tensor<2x5xf32> + return %1 : tensor<2x5xf32> +} + +// CHECK: func @main( +// CHECK: %[[alloc:.*]] = memref.alloc() : memref<2x5xf32> +// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<2x5xf32> to memref<2x5xf32, #[[$map2a]]> +// CHECK: call @callee(%{{.*}}, %[[casted]]) +// CHECK: memref.load %[[alloc]] +// CHECK: memref.dealloc %[[alloc]] + +// CHECK-NO-LAYOUT: func @main( +// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() : memref<2x5xf32> +// CHECK-NO-LAYOUT: call @callee(%{{.*}}, %[[alloc]]) +// CHECK-NO-LAYOUT: memref.load %[[alloc]] +// CHECK-NO-LAYOUT: memref.dealloc + +// CHECK-BASELINE: func @main( +// CHECK-BASELINE: %[[call:.*]] = call @callee +// CHECK-BASELINE: memref.load %[[call]] +func.func @main(%idx: index) -> f32 { + %c0 = arith.constant 0 : index + %0 = func.call @callee(%idx) : (index) -> (tensor<2x5xf32>) + %1 = tensor.extract %0[%c0, %c0] : tensor<2x5xf32> + return %1 : f32 +} From 334f63e7c39f298611d27a2ae27d31e4431be10f Mon Sep 17 00:00:00 2001 From: Christopher Bate Date: Fri, 20 May 2022 14:41:55 -0600 Subject: [PATCH 246/908] [mlir][NvGpuToNVVM] Fix missing i4 support for nvgpu.mma.sync This changes adds missing support for the i4 data type. Tests are added to ensure proper lowering of an nvgpu.mma.sync operation targeting the 16x8x64xi4 and 16x8x32xi4 MMA variants in the NVVM dialect. Differential Revision: https://reviews.llvm.org/D126092 --- .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 7 +++ .../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 48 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index 5152beaa5f61e0..ccf85915e49fab 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -145,7 +145,9 @@ static SmallVector unpackOperandVector(RewriterBase &rewriter, Type f64Ty = rewriter.getF64Type(); Type f32Ty = rewriter.getF32Type(); Type i8Ty = rewriter.getI8Type(); + Type i4Ty = rewriter.getIntegerType(4); Type i8x4Ty = LLVM::getFixedVectorType(i8Ty, 4); + Type i4x8Ty = LLVM::getFixedVectorType(i4Ty, 8); Type f32x1Ty = LLVM::getFixedVectorType(f32Ty, 1); auto arrayTy = operand.getType().cast(); @@ -156,6 +158,7 @@ static SmallVector unpackOperandVector(RewriterBase &rewriter, // For 4xi8 vectors, the intrinsic expects these to be provided as i32 // scalar types. if (arrayTy.getElementType() == i8x4Ty || + arrayTy.getElementType() == i4x8Ty || (arrayTy.getElementType() == f32x1Ty && operandPtxType == NVVM::MMATypes::tf32)) { result.push_back( @@ -281,6 +284,10 @@ struct MmaSyncOptoNVVM : public ConvertOpToLLVMPattern { ptxTypeA = NVVM::MMATypes::s8; ptxTypeB = NVVM::MMATypes::s8; overflow = NVVM::MMAIntOverflow::satfinite; + } else if (aType.getElementType().isInteger(4)) { + ptxTypeA = NVVM::MMATypes::s4; + ptxTypeB = NVVM::MMATypes::s4; + overflow = NVVM::MMAIntOverflow::satfinite; } else if (aType.getElementType().isF16()) { ptxTypeA = NVVM::MMATypes::f16; ptxTypeB = NVVM::MMATypes::f16; diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 249cb48944c88d..7bd02b7413117b 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -102,6 +102,54 @@ func.func @m16n8k32_int8(%arg0: vector<4x4xi8>, %arg1: vector<2x4xi8>, %arg2: ve // ----- +// CHECK-LABEL: @m16n8k32_i4 +func.func @m16n8k32_i4(%arg0: vector<2x8xi4>, %arg1: vector<1x8xi4>, %arg2: vector<2x2xi32>) -> vector<2x2xi32> { + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<8xi4>> + // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32 + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<8xi4>> + // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32 + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<1 x vector<8xi4>> + // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32 + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> + // CHECK: [[d:%.+]] = nvvm.mma.sync + // CHECK-SAME: intOverflowBehavior = #nvvm.mma_int_overflow + // CHECK-SAME: multiplicandAPtxType = #nvvm.mma_type + // CHECK-SAME: multiplicandBPtxType = #nvvm.mma_type + // CHECK-SAME: shape = {k = 32 : i32, m = 16 : i32, n = 8 : i32} + %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 32]} : (vector<2x8xi4>, vector<1x8xi4>, vector<2x2xi32>) -> vector<2x2xi32> + return %d : vector<2x2xi32> +} + +// ----- + +// CHECK-LABEL: @m16n8k64_i4 +func.func @m16n8k64_i4(%arg0: vector<4x8xi4>, %arg1: vector<2x8xi4>, %arg2: vector<2x2xi32>) -> vector<2x2xi32> { + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<4 x vector<8xi4>> + // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32 + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<4 x vector<8xi4>> + // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32 + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<4 x vector<8xi4>> + // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32 + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<4 x vector<8xi4>> + // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32 + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<8xi4>> + // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32 + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<8xi4>> + // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32 + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> + // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> + // CHECK: [[d:%.+]] = nvvm.mma.sync + // CHECK-SAME: intOverflowBehavior = #nvvm.mma_int_overflow + // CHECK-SAME: multiplicandAPtxType = #nvvm.mma_type + // CHECK-SAME: multiplicandBPtxType = #nvvm.mma_type + // CHECK-SAME: shape = {k = 64 : i32, m = 16 : i32, n = 8 : i32} + %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 64]} : (vector<4x8xi4>, vector<2x8xi4>, vector<2x2xi32>) -> vector<2x2xi32> + return %d : vector<2x2xi32> +} + +// ----- + // CHECK-LABEL: @m8n8k4_f64 func.func @m8n8k4_f64(%arg0: vector<1x1xf64>, %arg1: vector<1x1xf64>, %arg2: vector<1x2xf64>) -> vector<1x2xf64> { // CHECK: llvm.extractvalue From 82c85bf38e9d0f5c5f81a6e8e7ba9359d9dba906 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Mon, 23 May 2022 18:49:45 +0200 Subject: [PATCH 247/908] [mlir][bufferize][NFC] Update One-Shot Bufferize pass documentation Differential Revision: https://reviews.llvm.org/D125637 --- .../Bufferization/Transforms/Passes.td | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td index 22ddda8bd8ca8e..9d99e960e3083f 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -175,11 +175,13 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> { example, `tensor.generate` is not in destination-passing style and always results in a new buffer allocation. - One-Shot Bufferize deallocates all buffers that it allocates. Returning or - yielding newly allocated buffers from a block can lead to bad performance - because additional buffer copies would be inserted. By default, such IR is - rejected by One-Shot Bufferize. If performance is not important, such IR can - be allowed with `allow-return-allocs=1`. + One-Shot Bufferize deallocates all buffers that it allocates. Yielding newly + allocated buffers from a block can lead to bad performance because + additional buffer copies are often needed to make sure that every buffer + allocation is also deallocated again. By default, such IR is rejected by + One-Shot Bufferize. Such IR can be allowed with `allow-return-allocs`. + Note that new buffer allocations that are returned from a function can + currently not be deallocated and leak. One-Shot Bufferize will by default reject IR that contains non-bufferizable op, i.e., ops that do not implemement BufferizableOpInterface. Such IR can @@ -207,13 +209,6 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> { behavior can be overridden with `unknown-type-conversion`. Valid values are `fully-dynamic-layout-map` and `identity-layout-map`. - Layout maps on function signatures can be controlled with a separate - `function-boundary-type-conversion` option, which can be set to - `infer-layout-map` in addition to the two possible values mentioned above. - When layout maps are referred, function return types may be more precise. - Function argument types cannot be inferred and have fully dynamic layout - maps in that case. - For testing/debugging purposes, `test-analysis-only=1 print-conflicts=1` prints analysis results and explains why an OpOperand was decided to bufferize out-of-place. This is useful for understanding why One-Shot @@ -224,13 +219,30 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> { and supports only simple cases at the moment. In particular: * Recursive or circular function call graphs are not supported. - * If a newly allocated buffer is returned from a function (with - `allow-return-allocs`), the buffer will never be deallocated and leak. - Such IR needs special handling, e.g., allocation hoisting or reference - counting. + * When a returned tensor can be proven to be equivalent to a tensor function + argument, the return value disappears. Instead, the buffer of the tensor + argument is modified in-place. + * Returning non-equivalent tensors is forbidden by default and must be + explicitly activated with `allow-return-allocs`. If such a tensor happens + to bufferize to a new memory allocation, this buffer will never be + deallocated and leak. Such IR needs special handling, e.g., allocation + hoisting or reference counting. + * Non-equivalent returned tensors of fully static size can be promoted to + function arguments with `promote-buffer-results-to-out-params`. In that + case, buffers for such tensors are allocated at each call site. Instead of + returning a buffer, the buffer contents are copied into these allocations. * External functions (without bodies) that return a tensor are not supported. * Function with multiple blocks or multiple ReturnOps are not supported. + * Layout maps on function signatures can be controlled with a separate + `function-boundary-type-conversion` option, which is similar to + `unknown-type-conversion` but supports an additional `infer-layout-map` + option. `fully-dynamic-layout-map` and `identity-layout-map` ensure that + function signatures bufferize to easily predictable types, potentially at + the cost of additional casts and copies, respectively. When layout maps + are inferred, function return types may be more precise, but less + predictable. Function argument types cannot be inferred and always have + fully dynamic layout maps with `infer-layout-map`. One-Shot Bufferize implements the following contract around function calls: The buffer of function arguments is always writable (unless annotated with From 7085cb6011d4593f39c6c3369d1e29ff08edc514 Mon Sep 17 00:00:00 2001 From: Christopher Bate Date: Tue, 17 May 2022 15:42:47 -0600 Subject: [PATCH 248/908] [mlir][NvGpuToNVVM] Fix byte size calculation in async copy lowering AsyncCopyOp lowering converted "size in elements" to "size in bytes" assuming the element type size is at least one byte. This removes that restriction, allowing for types such as i4 and b1 to be handled correctly. Differential Revision: https://reviews.llvm.org/D125838 --- .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 2 +- .../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index ccf85915e49fab..7ee7dce361f30a 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -381,7 +381,7 @@ struct NVGPUAsyncCopyLowering scrPtr); int64_t numElements = adaptor.numElements().getZExtValue(); int64_t sizeInBytes = - (dstMemrefType.getElementTypeBitWidth() / 8) * numElements; + (dstMemrefType.getElementTypeBitWidth() * numElements) / 8; // bypass L1 is only supported for byte sizes of 16, we drop the hint // otherwise. UnitAttr bypassL1 = sizeInBytes == 16 ? adaptor.bypassL1Attr() : UnitAttr(); diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 7bd02b7413117b..8a8d6d5bca06df 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -267,3 +267,28 @@ func.func @async_cp( return } +// ----- + +// CHECK-LABEL: @async_cp_i4( +// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index) +func.func @async_cp_i4( + %src: memref<128x64xi4>, %dst: memref<128x128xi4, 3>, %i : index) -> !nvgpu.device.async.token { + // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64 + // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(128 : index) : i64 + // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64 + // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[IDX1]] : i64 + // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI1]]] : (!llvm.ptr, i64) -> !llvm.ptr + // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr to !llvm.ptr + // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK-DAG: %[[S2:.*]] = llvm.mlir.constant(64 : index) : i64 + // CHECK-DAG: %[[FI2:.*]] = llvm.mul %[[IDX1]], %[[S2]] : i64 + // CHECK-DAG: %[[FI3:.*]] = llvm.add %[[FI2]], %[[IDX1]] : i64 + // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI3]]] : (!llvm.ptr, i64) -> !llvm.ptr + // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr to !llvm.ptr + // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr to !llvm.ptr + // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16 + %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i], 32 : memref<128x64xi4> to memref<128x128xi4, 3> + return %0 : !nvgpu.device.async.token +} + From 224a8653c98ec39bc8e636523f59b07008faaccf Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 23 May 2022 09:58:54 -0700 Subject: [PATCH 249/908] [llvm-nm][docs] Document -W and -U Latest GNU nm (milestone: 2.39) has added -W/--no-weak and changed -U to mean --defined-only (instead of --unicode=). The changes match our semantics. Close #55297 Reviewed by: jhenderson, keith Differential Revision: https://reviews.llvm.org/D126133 --- llvm/docs/CommandGuide/llvm-nm.rst | 4 ++-- llvm/tools/llvm-nm/Opts.td | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-nm.rst b/llvm/docs/CommandGuide/llvm-nm.rst index 8fc2b214cf05a6..d75cf2e80a3e44 100644 --- a/llvm/docs/CommandGuide/llvm-nm.rst +++ b/llvm/docs/CommandGuide/llvm-nm.rst @@ -145,7 +145,7 @@ OPTIONS Show all symbols, even those usually suppressed. -.. option:: --defined-only +.. option:: --defined-only, -U Print only symbols defined in this file. @@ -197,7 +197,7 @@ OPTIONS Show symbols in the order encountered. -.. option:: --no-weak +.. option:: --no-weak, -W Don't print weak symbols. diff --git a/llvm/tools/llvm-nm/Opts.td b/llvm/tools/llvm-nm/Opts.td index 6cb530db72f4df..60ac134269b3bf 100644 --- a/llvm/tools/llvm-nm/Opts.td +++ b/llvm/tools/llvm-nm/Opts.td @@ -77,7 +77,7 @@ def : F<"r", "Alias for --reverse-sort">, Alias; def : F<"S", "Alias for --print-size">, Alias; def : JoinedOrSeparate<["-"], "t">, HelpText<"Alias for --radix">, Alias, MetaVarName<"">; def : F<"u", "Alias for --undefined-only">, Alias; -def : F<"U", "Deprecated alias for --defined-only">, Alias, Flags<[HelpHidden]>; +def : F<"U", "Alias for --defined-only">, Alias; def : F<"v", "Alias for --numeric-sort">, Alias; def : F<"V", "Alias for --version">, Alias; -def : F<"W", "Deprecated alias for --no-weak">, Alias, Flags<[HelpHidden]>; +def : F<"W", "Alias for --no-weak">, Alias; From 46c1f77e160a63726e312c0550157ee451bacd46 Mon Sep 17 00:00:00 2001 From: Jeffrey Tan Date: Tue, 17 May 2022 09:17:26 -0700 Subject: [PATCH 250/908] Add [opt] suffix to optimized stack frame in lldb-vscode To help user identify optimized code This diff adds a "[opt]" suffix to optimized stack frames in lldb-vscode. This provides consistent experience as command line lldb. It also adds a new "optimized" attribute to DAP stack frame object so that it is easy to identify from telemetry than parsing trailing "[opt]". Differential Revision: https://reviews.llvm.org/D126013 --- .../API/tools/lldb-vscode/optimized/Makefile | 3 ++ .../optimized/TestVSCode_optimized.py | 35 +++++++++++++++++++ .../API/tools/lldb-vscode/optimized/main.cpp | 16 +++++++++ lldb/tools/lldb-vscode/JSONUtils.cpp | 14 +++++++- 4 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 lldb/test/API/tools/lldb-vscode/optimized/Makefile create mode 100644 lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py create mode 100644 lldb/test/API/tools/lldb-vscode/optimized/main.cpp diff --git a/lldb/test/API/tools/lldb-vscode/optimized/Makefile b/lldb/test/API/tools/lldb-vscode/optimized/Makefile new file mode 100644 index 00000000000000..685b2606b55a12 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/optimized/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp +CXXFLAGS_EXTRAS := -O3 -glldb +include Makefile.rules diff --git a/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py b/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py new file mode 100644 index 00000000000000..862fe632a51bca --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py @@ -0,0 +1,35 @@ +""" +Test lldb-vscode variables/stackTrace request for optimized code +""" + +from __future__ import print_function + +import unittest2 +import vscode +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import lldbvscode_testcase + + +class TestVSCode_optimized(lldbvscode_testcase.VSCodeTestCaseBase): + mydir = TestBase.compute_mydir(__file__) + + @skipIfWindows + @skipIfRemote + def test_stack_frame_name(self): + ''' Test optimized frame has special name suffix. + ''' + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = 'main.cpp' + breakpoint_line = line_number(source, '// breakpoint 1') + lines = [breakpoint_line] + breakpoint_ids = self.set_source_breakpoints(source, lines) + self.assertEqual(len(breakpoint_ids), len(lines), + "expect correct number of breakpoints") + self.continue_to_breakpoints(breakpoint_ids) + leaf_frame = self.vscode.get_stackFrame(frameIndex=0) + self.assertTrue(leaf_frame['name'].endswith(' [opt]')) + parent_frame = self.vscode.get_stackFrame(frameIndex=1) + self.assertTrue(parent_frame['name'].endswith(' [opt]')) diff --git a/lldb/test/API/tools/lldb-vscode/optimized/main.cpp b/lldb/test/API/tools/lldb-vscode/optimized/main.cpp new file mode 100644 index 00000000000000..24ace4e4196f55 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/optimized/main.cpp @@ -0,0 +1,16 @@ +#include +#include + +int foo(int x, int y) { + printf("Got input %d, %d\n", x, y); + return x + y + 3; // breakpoint 1 +} + +int main(int argc, char const *argv[]) { + int optimized = argc > 1 ? std::stoi(argv[1]) : 0; + + printf("argc: %d, optimized: %d\n", argc, optimized); + int result = foo(argc, 20); + printf("result: %d\n", result); // breakpoint 2 + return 0; +} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 55d734e65fb5d1..cd8d8861d98a17 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -751,7 +751,19 @@ llvm::json::Value CreateStackFrame(lldb::SBFrame &frame) { llvm::json::Object object; int64_t frame_id = MakeVSCodeFrameID(frame); object.try_emplace("id", frame_id); - EmplaceSafeString(object, "name", frame.GetFunctionName()); + + std::string frame_name; + const char *func_name = frame.GetFunctionName(); + if (func_name) + frame_name = func_name; + else + frame_name = ""; + bool is_optimized = frame.GetFunction().GetIsOptimized(); + if (is_optimized) + frame_name += " [opt]"; + EmplaceSafeString(object, "name", frame_name); + object.try_emplace("optimized", is_optimized); + int64_t disasm_line = 0; object.try_emplace("source", CreateSource(frame, disasm_line)); From 0fe220a33179723b7b366c2b398bac3c3b4216ec Mon Sep 17 00:00:00 2001 From: Jeffrey Tan Date: Tue, 17 May 2022 09:21:10 -0700 Subject: [PATCH 251/908] Show error message for optimized variables This fixes an issue that optimized variable error message is not shown to end users in lldb-vscode. Differential Revision: https://reviews.llvm.org/D126014 --- .../optimized/TestVSCode_optimized.py | 19 +++++++++++++++++++ lldb/tools/lldb-vscode/JSONUtils.cpp | 5 ++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py b/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py index 862fe632a51bca..a5e40d03abe44d 100644 --- a/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py +++ b/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py @@ -33,3 +33,22 @@ def test_stack_frame_name(self): self.assertTrue(leaf_frame['name'].endswith(' [opt]')) parent_frame = self.vscode.get_stackFrame(frameIndex=1) self.assertTrue(parent_frame['name'].endswith(' [opt]')) + + @skipIfWindows + @skipIfRemote + def test_optimized_variable(self): + ''' Test optimized variable value contains error. + ''' + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = 'main.cpp' + breakpoint_line = line_number(source, '// breakpoint 2') + lines = [breakpoint_line] + # Set breakpoint in the thread function so we can step the threads + breakpoint_ids = self.set_source_breakpoints(source, lines) + self.assertEqual(len(breakpoint_ids), len(lines), + "expect correct number of breakpoints") + self.continue_to_breakpoints(breakpoint_ids) + optimized_variable = self.vscode.get_local_variable('optimized') + + self.assertTrue(optimized_variable['value'].startswith('"; + } else if (!value.empty()) { strm << value; if (!summary.empty()) strm << ' ' << summary; From bc2fe4a0d675e7559de5734df0a4a40170389acc Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 23 May 2022 10:10:08 -0700 Subject: [PATCH 252/908] [RISCV] Add basic fault-first load coverage for VSETVLI insertion Simplified version of a test taken from D123581. --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index 267a57c41c1ecc..4e0364e41eb74b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -493,6 +493,34 @@ entry: ret i64 %vl } +; Fault first loads can modify VL. +; TODO: The first and third VSETVLIs are redundant here. +define @vleNff(i64* %str, i64 %n, i64 %x) { +; CHECK-LABEL: vleNff: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vadd.vx v8, v8, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 0, i64 2) + %1 = bitcast i64* %str to * + %2 = tail call { , i64 } @llvm.riscv.vleff.nxv1i64.i64( undef, * %1, i64 %0) + %3 = extractvalue { , i64 } %2, 0 + %4 = extractvalue { , i64 } %2, 1 + %5 = tail call @llvm.riscv.vadd.nxv1i64.i64.i64( %3, %3, i64 %x, i64 %4) + ret %5 +} + +declare { , i64 } @llvm.riscv.vleff.nxv1i64.i64( + , * nocapture, i64) + +declare @llvm.riscv.vmseq.nxv1i64.i64.i64( + , i64, i64) + declare @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( , , @@ -501,6 +529,12 @@ declare @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( i64, i64); +declare @llvm.riscv.vadd.nxv1i64.i64.i64( + , + , + i64, + i64); + declare @llvm.riscv.vfadd.mask.nxv1f64.f64( , , From dbd1ba28a3a435c87eb2a977028ea22e9aabf148 Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Mon, 23 May 2022 10:43:12 -0700 Subject: [PATCH 253/908] [PS5] Disable a test, same as PS4 --- clang/test/Driver/nostdincxx.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/nostdincxx.cpp b/clang/test/Driver/nostdincxx.cpp index dad40eaeddaf80..0d0204fb7366e6 100644 --- a/clang/test/Driver/nostdincxx.cpp +++ b/clang/test/Driver/nostdincxx.cpp @@ -5,5 +5,5 @@ // CHECK: file not found #include -// MSVC and PS4 have C++ headers in the same directory as C headers. -// UNSUPPORTED: ms-sdk, ps4 +// MSVC, PS4, PS5 have C++ headers in the same directory as C headers. +// UNSUPPORTED: ms-sdk, ps4, ps5 From 2f2ca30d0abacda7ffd2dd71680e66161cffcbe3 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 23 May 2022 19:53:40 +0200 Subject: [PATCH 254/908] Fix an unused variable warning in no-asserts build mode --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fc0e7d86151103..4df7aaca736235 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6718,6 +6718,7 @@ static T *performExtractsShuffleAction( Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; } auto *V = ValueSelect::get(Base); + (void)V; assert((!V || GetVF(V) == Mask.size()) && "Expected base vector of VF number of elements."); Prev = Action(Mask, {nullptr, Res.first}); From 0da230ff44399f74dc9bf05c8f212e7fc22079fa Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Wed, 18 May 2022 16:31:49 -0700 Subject: [PATCH 255/908] [lldb] Improve formatting of dlopen error messages (NFC) Ensure there's a space between "utility" and "function", and also makes it easier to grep/search for "utility function". While making this change, I also re-formatted the other dlopen error messages (with clang-format). This fix other instances of spaces missing between words, and makes each of these strings fit a single line, making them greppable. Differential Revision: https://reviews.llvm.org/D126078 --- .../Plugins/Platform/POSIX/PlatformPOSIX.cpp | 65 ++++++++++--------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp index 9a2f0972f99936..fb0d39ea6f866c 100644 --- a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp +++ b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp @@ -613,9 +613,9 @@ PlatformPOSIX::MakeLoadImageUtilityFunction(ExecutionContext &exe_ctx, std::move(expr), dlopen_wrapper_name, eLanguageTypeC_plus_plus, exe_ctx); if (!utility_fn_or_error) { std::string error_str = llvm::toString(utility_fn_or_error.takeError()); - error.SetErrorStringWithFormat("dlopen error: could not create utility" - "function: %s", - error_str.c_str()); + error.SetErrorStringWithFormat( + "dlopen error: could not create utility function: %s", + error_str.c_str()); return nullptr; } std::unique_ptr dlopen_utility_func_up = @@ -650,8 +650,9 @@ PlatformPOSIX::MakeLoadImageUtilityFunction(ExecutionContext &exe_ctx, do_dlopen_function = dlopen_utility_func_up->MakeFunctionCaller( clang_void_pointer_type, arguments, exe_ctx.GetThreadSP(), utility_error); if (utility_error.Fail()) { - error.SetErrorStringWithFormat("dlopen error: could not make function" - "caller: %s", utility_error.AsCString()); + error.SetErrorStringWithFormat( + "dlopen error: could not make function caller: %s", + utility_error.AsCString()); return nullptr; } @@ -717,8 +718,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, permissions, utility_error); if (path_addr == LLDB_INVALID_ADDRESS) { - error.SetErrorStringWithFormat("dlopen error: could not allocate memory" - "for path: %s", utility_error.AsCString()); + error.SetErrorStringWithFormat( + "dlopen error: could not allocate memory for path: %s", + utility_error.AsCString()); return LLDB_INVALID_IMAGE_TOKEN; } @@ -730,8 +732,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, process->WriteMemory(path_addr, path.c_str(), path_len, utility_error); if (utility_error.Fail()) { - error.SetErrorStringWithFormat("dlopen error: could not write path string:" - " %s", utility_error.AsCString()); + error.SetErrorStringWithFormat( + "dlopen error: could not write path string: %s", + utility_error.AsCString()); return LLDB_INVALID_IMAGE_TOKEN; } @@ -742,8 +745,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, permissions, utility_error); if (utility_error.Fail()) { - error.SetErrorStringWithFormat("dlopen error: could not allocate memory" - "for path: %s", utility_error.AsCString()); + error.SetErrorStringWithFormat( + "dlopen error: could not allocate memory for path: %s", + utility_error.AsCString()); return LLDB_INVALID_IMAGE_TOKEN; } @@ -791,9 +795,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, permissions, utility_error); if (path_array_addr == LLDB_INVALID_ADDRESS) { - error.SetErrorStringWithFormat("dlopen error: could not allocate memory" - "for path array: %s", - utility_error.AsCString()); + error.SetErrorStringWithFormat( + "dlopen error: could not allocate memory for path array: %s", + utility_error.AsCString()); return LLDB_INVALID_IMAGE_TOKEN; } @@ -807,8 +811,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, path_array.size(), utility_error); if (utility_error.Fail()) { - error.SetErrorStringWithFormat("dlopen error: could not write path array:" - " %s", utility_error.AsCString()); + error.SetErrorStringWithFormat( + "dlopen error: could not write path array: %s", + utility_error.AsCString()); return LLDB_INVALID_IMAGE_TOKEN; } // Now make spaces in the target for the buffer. We need to add one for @@ -819,9 +824,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, permissions, utility_error); if (buffer_addr == LLDB_INVALID_ADDRESS) { - error.SetErrorStringWithFormat("dlopen error: could not allocate memory" - "for buffer: %s", - utility_error.AsCString()); + error.SetErrorStringWithFormat( + "dlopen error: could not allocate memory for buffer: %s", + utility_error.AsCString()); return LLDB_INVALID_IMAGE_TOKEN; } @@ -844,9 +849,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, func_args_addr, arguments, diagnostics)) { - error.SetErrorStringWithFormat("dlopen error: could not write function " - "arguments: %s", - diagnostics.GetString().c_str()); + error.SetErrorStringWithFormat( + "dlopen error: could not write function arguments: %s", + diagnostics.GetString().c_str()); return LLDB_INVALID_IMAGE_TOKEN; } @@ -886,9 +891,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, ExpressionResults results = do_dlopen_function->ExecuteFunction( exe_ctx, &func_args_addr, options, diagnostics, return_value); if (results != eExpressionCompleted) { - error.SetErrorStringWithFormat("dlopen error: failed executing " - "dlopen wrapper function: %s", - diagnostics.GetString().c_str()); + error.SetErrorStringWithFormat( + "dlopen error: failed executing dlopen wrapper function: %s", + diagnostics.GetString().c_str()); return LLDB_INVALID_IMAGE_TOKEN; } @@ -896,8 +901,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, lldb::addr_t token = process->ReadPointerFromMemory(return_addr, utility_error); if (utility_error.Fail()) { - error.SetErrorStringWithFormat("dlopen error: could not read the return " - "struct: %s", utility_error.AsCString()); + error.SetErrorStringWithFormat( + "dlopen error: could not read the return struct: %s", + utility_error.AsCString()); return LLDB_INVALID_IMAGE_TOKEN; } @@ -920,8 +926,9 @@ uint32_t PlatformPOSIX::DoLoadImage(lldb_private::Process *process, lldb::addr_t error_addr = process->ReadPointerFromMemory(return_addr + addr_size, utility_error); if (utility_error.Fail()) { - error.SetErrorStringWithFormat("dlopen error: could not read error string: " - "%s", utility_error.AsCString()); + error.SetErrorStringWithFormat( + "dlopen error: could not read error string: %s", + utility_error.AsCString()); return LLDB_INVALID_IMAGE_TOKEN; } From 4f1e64b54f59dd4c303d04b62926f35bde5a2c79 Mon Sep 17 00:00:00 2001 From: Stephen Long Date: Mon, 23 May 2022 07:01:55 -0700 Subject: [PATCH 256/908] [MSVC, ARM64] Add __readx18 intrinsics https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-170 unsigned char __readx18byte(unsigned long) unsigned short __readx18word(unsigned long) unsigned long __readx18dword(unsigned long) unsigned __int64 __readx18qword(unsigned long) Given the lack of documentation of the intrinsics, we chose to align the offset with just `CharUnits::One()` when calling `IRBuilderBase::CreateAlignedLoad()` Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D126024 --- clang/include/clang/Basic/BuiltinsAArch64.def | 5 ++ clang/lib/CodeGen/CGBuiltin.cpp | 24 ++++++++ clang/lib/Headers/intrin.h | 5 ++ .../test/CodeGen/arm64-microsoft-intrinsics.c | 59 +++++++++++++++++++ 4 files changed, 93 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index a04b48dd128e3c..65ab4fcced9aed 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -256,6 +256,11 @@ TARGET_HEADER_BUILTIN(__writex18word, "vULiUs", "nh", "intrin.h", ALL_MS_LANGUA TARGET_HEADER_BUILTIN(__writex18dword, "vULiULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__writex18qword, "vULiULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__readx18byte, "UcULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__readx18word, "UsULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__readx18dword, "ULiULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__readx18qword, "ULLiULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef LANGBUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6168ba938db4f0..bdc638299c4bde 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9977,6 +9977,30 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Store; } + if (BuiltinID == AArch64::BI__readx18byte || + BuiltinID == AArch64::BI__readx18word || + BuiltinID == AArch64::BI__readx18dword || + BuiltinID == AArch64::BI__readx18qword) { + llvm::Type *IntTy = ConvertType(E->getType()); + + // Read x18 as i8* + LLVMContext &Context = CGM.getLLVMContext(); + llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); + llvm::Value *X18 = Builder.CreateCall(F, Metadata); + X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0)); + + // Load x18 + offset + Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); + Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); + Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0)); + LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One()); + return Load; + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (Optional MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID)) diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index dbc5159853ddf7..de68b07491c6c5 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -567,6 +567,11 @@ void __writex18byte(unsigned long offset, unsigned char data); void __writex18word(unsigned long offset, unsigned short data); void __writex18dword(unsigned long offset, unsigned long data); void __writex18qword(unsigned long offset, unsigned __int64 data); + +unsigned char __readx18byte(unsigned long offset); +unsigned short __readx18word(unsigned long offset); +unsigned long __readx18dword(unsigned long offset); +unsigned __int64 __readx18qword(unsigned long offset); #endif /*----------------------------------------------------------------------------*\ diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index a9b1d444553f58..1ad5233bb4c816 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -187,5 +187,64 @@ void check__writex18qword(unsigned long offset, unsigned __int64 data) { // CHECK-MSVC: %[[DATA:.*]] = load i64, i64* %[[DATA_ADDR]], align 8 // CHECK-MSVC: store i64 %[[DATA]], i64* %[[BITCAST_PTR]], align 1 +unsigned char check__readx18byte(unsigned long offset) { + return __readx18byte(offset); +} + +// CHECK-MSVC: %[[OFFSET_ADDR:.*]] = alloca i32, align 4 +// CHECK-MSVC: store i32 %offset, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[X18:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2]]) +// CHECK-MSVC: %[[X18_AS_PTR:.*]] = inttoptr i64 %[[X18]] to i8* +// CHECK-MSVC: %[[OFFSET:.*]] = load i32, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[ZEXT_OFFSET:.*]] = zext i32 %[[OFFSET]] to i64 +// CHECK-MSVC: %[[PTR:.*]] = getelementptr i8, i8* %[[X18_AS_PTR]], i64 %[[ZEXT_OFFSET]] +// CHECK-MSVC: %[[RETVAL:.*]] = load i8, i8* %[[PTR]], align 1 +// CHECK-MSVC: ret i8 %[[RETVAL]] + +unsigned short check__readx18word(unsigned long offset) { + return __readx18word(offset); +} + +// CHECK-MSVC: %[[OFFSET_ADDR:.*]] = alloca i32, align 4 +// CHECK-MSVC: store i32 %offset, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[X18:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2]]) +// CHECK-MSVC: %[[X18_AS_PTR:.*]] = inttoptr i64 %[[X18]] to i8* +// CHECK-MSVC: %[[OFFSET:.*]] = load i32, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[ZEXT_OFFSET:.*]] = zext i32 %[[OFFSET]] to i64 +// CHECK-MSVC: %[[PTR:.*]] = getelementptr i8, i8* %[[X18_AS_PTR]], i64 %[[ZEXT_OFFSET]] +// CHECK-MSVC: %[[BITCAST_PTR:.*]] = bitcast i8* %[[PTR]] to i16* +// CHECK-MSVC: %[[RETVAL:.*]] = load i16, i16* %[[BITCAST_PTR]], align 1 +// CHECK-MSVC: ret i16 %[[RETVAL]] + +unsigned long check__readx18dword(unsigned long offset) { + return __readx18dword(offset); +} + +// CHECK-MSVC: %[[OFFSET_ADDR:.*]] = alloca i32, align 4 +// CHECK-MSVC: store i32 %offset, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[X18:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2]]) +// CHECK-MSVC: %[[X18_AS_PTR:.*]] = inttoptr i64 %[[X18]] to i8* +// CHECK-MSVC: %[[OFFSET:.*]] = load i32, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[ZEXT_OFFSET:.*]] = zext i32 %[[OFFSET]] to i64 +// CHECK-MSVC: %[[PTR:.*]] = getelementptr i8, i8* %[[X18_AS_PTR]], i64 %[[ZEXT_OFFSET]] +// CHECK-MSVC: %[[BITCAST_PTR:.*]] = bitcast i8* %[[PTR]] to i32* +// CHECK-MSVC: %[[RETVAL:.*]] = load i32, i32* %[[BITCAST_PTR]], align 1 +// CHECK-MSVC: ret i32 %[[RETVAL]] + +unsigned __int64 check__readx18qword(unsigned long offset) { + return __readx18qword(offset); +} + +// CHECK-MSVC: %[[OFFSET_ADDR:.*]] = alloca i32, align 4 +// CHECK-MSVC: store i32 %offset, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[X18:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2]]) +// CHECK-MSVC: %[[X18_AS_PTR:.*]] = inttoptr i64 %[[X18]] to i8* +// CHECK-MSVC: %[[OFFSET:.*]] = load i32, i32* %[[OFFSET_ADDR]], align 4 +// CHECK-MSVC: %[[ZEXT_OFFSET:.*]] = zext i32 %[[OFFSET]] to i64 +// CHECK-MSVC: %[[PTR:.*]] = getelementptr i8, i8* %[[X18_AS_PTR]], i64 %[[ZEXT_OFFSET]] +// CHECK-MSVC: %[[BITCAST_PTR:.*]] = bitcast i8* %[[PTR]] to i64* +// CHECK-MSVC: %[[RETVAL:.*]] = load i64, i64* %[[BITCAST_PTR]], align 1 +// CHECK-MSVC: ret i64 %[[RETVAL]] + // CHECK-MSVC: ![[MD2]] = !{!"x18"} // CHECK-MSVC: ![[MD3]] = !{!"sp"} From 760298adc264f9c1029d93ab38711c131e19a2f4 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Mon, 23 May 2022 11:00:22 -0700 Subject: [PATCH 257/908] [lldb] Specify aguments of `image list` Register positional argument details in `CommandObjectTargetModulesList`. I recently learned that `image list` takes a module name, but the help info does not indicate this. With this change, `help image list` will show that it accepts zero or more module names. This makes it easier to get info about specific modules, without having to find/grep through the full image list. Reviewed By: DavidSpickett Differential Revision: https://reviews.llvm.org/D125154 --- lldb/source/Commands/CommandObjectTarget.cpp | 7 +++++-- lldb/test/API/commands/help/TestHelp.py | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index f42588b673a46c..7789698c1d6787 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -50,6 +50,7 @@ #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/State.h" #include "lldb/Utility/Timer.h" +#include "lldb/lldb-enumerations.h" #include "lldb/lldb-private-enumerations.h" #include "llvm/ADT/ScopeExit.h" @@ -2901,8 +2902,10 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { CommandObjectTargetModulesList(CommandInterpreter &interpreter) : CommandObjectParsed( interpreter, "target modules list", - "List current executable and dependent shared library images.", - "target modules list []") {} + "List current executable and dependent shared library images.") { + CommandArgumentData module_arg{eArgTypeShlibName, eArgRepeatStar}; + m_arguments.push_back({module_arg}); + } ~CommandObjectTargetModulesList() override = default; diff --git a/lldb/test/API/commands/help/TestHelp.py b/lldb/test/API/commands/help/TestHelp.py index e3363e870e5db9..09e7a32a854ca6 100644 --- a/lldb/test/API/commands/help/TestHelp.py +++ b/lldb/test/API/commands/help/TestHelp.py @@ -104,6 +104,13 @@ def test_help_image_du_line_should_work(self): self.expect("help image du line", substrs=[ 'Dump the line table for one or more compilation units']) + @no_debug_info_test + def test_help_image_list_shows_positional_args(self): + """Command 'help image list' should describe positional args.""" + # 'image' is an alias for 'target modules'. + self.expect("help image list", substrs=[ + ' [...]']) + @no_debug_info_test def test_help_target_variable_syntax(self): """Command 'help target variable' should display ...""" From 75eb0576debda4343fea35b1e2f01d8cbea12ab7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 22 May 2022 22:32:16 -0700 Subject: [PATCH 258/908] [AArch64] Add test case for pr55644. NFC --- llvm/test/CodeGen/AArch64/pr55644.ll | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/pr55644.ll diff --git a/llvm/test/CodeGen/AArch64/pr55644.ll b/llvm/test/CodeGen/AArch64/pr55644.ll new file mode 100644 index 00000000000000..ac98de13c6f07d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr55644.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-apple-darwin | FileCheck %s + +define i1 @f(i2 %0) { +; CHECK-LABEL: f: +; CHECK: ; %bb.0: +; CHECK-NEXT: sbfx w8, w0, #0, #2 +; CHECK-NEXT: add w8, w8, w8 +; CHECK-NEXT: lsl w9, w8, #30 +; CHECK-NEXT: cmp w8, w9, asr #30 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %2 = call { i2, i1 } @llvm.smul.with.overflow.i2(i2 %0, i2 -2) + %3 = extractvalue { i2, i1 } %2, 1 + ret i1 %3 +} + +declare { i2, i1 } @llvm.smul.with.overflow.i2(i2, i2) From 569d8945f311e76b25925ff4b4257734711d9e2f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 22 May 2022 22:38:04 -0700 Subject: [PATCH 259/908] [DAGCombiner][AArch64] Don't fold (smulo x, 2) -> (saddo x, x) if VT is i2. If the VT is i2, then 2 is really -2. Test has not been commited yet, but diff shows the change. Fixes PR55644. Differential Revision: https://reviews.llvm.org/D126213 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++- llvm/test/CodeGen/AArch64/pr55644.ll | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 958f2dbc614596..61d23aeec06eaa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4913,7 +4913,9 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); // (mulo x, 2) -> (addo x, x) - if (N1C && N1C->getAPIntValue() == 2) + // FIXME: This needs a freeze. + if (N1C && N1C->getAPIntValue() == 2 && + (!IsSigned || VT.getScalarSizeInBits() > 2)) return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, N->getVTList(), N0, N0); diff --git a/llvm/test/CodeGen/AArch64/pr55644.ll b/llvm/test/CodeGen/AArch64/pr55644.ll index ac98de13c6f07d..00d29d700cea18 100644 --- a/llvm/test/CodeGen/AArch64/pr55644.ll +++ b/llvm/test/CodeGen/AArch64/pr55644.ll @@ -5,9 +5,10 @@ define i1 @f(i2 %0) { ; CHECK-LABEL: f: ; CHECK: ; %bb.0: ; CHECK-NEXT: sbfx w8, w0, #0, #2 -; CHECK-NEXT: add w8, w8, w8 -; CHECK-NEXT: lsl w9, w8, #30 -; CHECK-NEXT: cmp w8, w9, asr #30 +; CHECK-NEXT: lsl w8, w8, #1 +; CHECK-NEXT: neg w9, w8 +; CHECK-NEXT: lsl w9, w9, #30 +; CHECK-NEXT: cmn w8, w9, asr #30 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %2 = call { i2, i1 } @llvm.smul.with.overflow.i2(i2 %0, i2 -2) From e8c20d995bed4b0f7d3579daef1f215633eaf5ee Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 23 May 2022 13:31:00 -0400 Subject: [PATCH 260/908] [IR] add and use pattern match specialization for sqrt intrinsic; NFC This was included in D126190 originally, but it's independent and a useful change for readability. --- llvm/include/llvm/IR/PatternMatch.h | 5 +++++ llvm/lib/Analysis/InstructionSimplify.cpp | 4 ++-- .../InstCombine/InstCombineMulDivRem.cpp | 15 ++++++--------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 5009feebc6357d..9c1834a2c32485 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -2212,6 +2212,11 @@ m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { return m_Intrinsic(Op0, Op1, Op2); } +template +inline typename m_Intrinsic_Ty::Ty m_Sqrt(const Opnd0 &Op0) { + return m_Intrinsic(Op0); +} + //===----------------------------------------------------------------------===// // Matchers for two-operands operators with the operators in either order // diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 5241c0c90d0056..9cfddc06d97e71 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5290,8 +5290,8 @@ static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, // 2. Ignore non-zero negative numbers because sqrt would produce NAN. // 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0. Value *X; - if (Op0 == Op1 && match(Op0, m_Intrinsic(m_Value(X))) && - FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros()) + if (Op0 == Op1 && match(Op0, m_Sqrt(m_Value(X))) && FMF.allowReassoc() && + FMF.noNaNs() && FMF.noSignedZeros()) return X; return nullptr; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index f0ed930bdb11c2..ea40f088c130f9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -529,9 +529,8 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { // sqrt(X) * sqrt(Y) -> sqrt(X * Y) // nnan disallows the possibility of returning a number if both operands are // negative (in that case, we should return NaN). - if (I.hasNoNaNs() && - match(Op0, m_OneUse(m_Intrinsic(m_Value(X)))) && - match(Op1, m_OneUse(m_Intrinsic(m_Value(Y))))) { + if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) && + match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) { Value *XY = Builder.CreateFMulFMF(X, Y, &I); Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I); return replaceInstUsesWith(I, Sqrt); @@ -545,11 +544,11 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { // has the necessary (reassoc) fast-math-flags. if (I.hasNoSignedZeros() && match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && - match(Y, m_Intrinsic(m_Value(X))) && Op1 == X) + match(Y, m_Sqrt(m_Value(X))) && Op1 == X) return BinaryOperator::CreateFDivFMF(X, Y, &I); if (I.hasNoSignedZeros() && match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && - match(Y, m_Intrinsic(m_Value(X))) && Op0 == X) + match(Y, m_Sqrt(m_Value(X))) && Op0 == X) return BinaryOperator::CreateFDivFMF(X, Y, &I); // Like the similar transform in instsimplify, this requires 'nsz' because @@ -558,14 +557,12 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { Op0->hasNUses(2)) { // Peek through fdiv to find squaring of square root: // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y - if (match(Op0, m_FDiv(m_Value(X), - m_Intrinsic(m_Value(Y))))) { + if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) { Value *XX = Builder.CreateFMulFMF(X, X, &I); return BinaryOperator::CreateFDivFMF(XX, Y, &I); } // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X) - if (match(Op0, m_FDiv(m_Intrinsic(m_Value(Y)), - m_Value(X)))) { + if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) { Value *XX = Builder.CreateFMulFMF(X, X, &I); return BinaryOperator::CreateFDivFMF(Y, XX, &I); } From 806e8a1c8e54b6f8563eae0c0ed6b719b7eb6089 Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Mon, 23 May 2022 11:18:15 -0700 Subject: [PATCH 261/908] [Sanitizer][Darwin] Add SANITIZER_DRIVERKIT platform macro --- compiler-rt/lib/sanitizer_common/sanitizer_platform.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index 91c0c5233f7946..edf32843189c3c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -85,6 +85,11 @@ # else # define SANITIZER_IOSSIM 0 # endif +# if TARGET_OS_DRIVERKIT +# define SANITIZER_DRIVERKIT 1 +# else +# define SANITIZER_DRIVERKIT 0 +# endif #else # define SANITIZER_APPLE 0 # define SANITIZER_MAC SANITIZER_APPLE @@ -93,6 +98,7 @@ # define SANITIZER_TVOS 0 # define SANITIZER_IOSSIM 0 # define SANITIZER_OSX 0 +# define SANITIZER_DRIVERKIT 0 #endif #if defined(_WIN32) From bedd3ee88152584b1c32c276b0ff38bb891f2812 Mon Sep 17 00:00:00 2001 From: natashaknk Date: Mon, 23 May 2022 10:58:33 -0700 Subject: [PATCH 262/908] [mlir][tosa] Change tosa.depthwise_conv2d's ending reshape to a collapse. TOSAs depthwise_conv2d operation includes a reshape to include the implicit x1 dimension. Reviewed By: rsuderman Differential Revision: https://reviews.llvm.org/D126212 --- .../TosaToLinalg/TosaToLinalgNamed.cpp | 38 ++++++++++++++----- .../TosaToLinalg/tosa-to-linalg-named.mlir | 12 +++--- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp index 4c3547a47eb2e6..b929587bafe89c 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -144,6 +144,19 @@ static SmallVector inferDynamicDimsForConv( return filteredDims; } +// Creates a map to collapse the last dimension of the Depthwise convolution op +// due to a shape mismatch +static void createDepthwiseConvCollapseMap( + int64_t outputRank, SmallVector &reassociationMap, + OpBuilder &rewriter) { + reassociationMap.resize(outputRank); + for (int i = 0; i < outputRank; i++) { + reassociationMap[i].push_back(rewriter.getAffineDimExpr(i)); + } + reassociationMap[outputRank - 1].push_back( + rewriter.getAffineDimExpr(outputRank)); +} + namespace { class ConvConverter : public OpConversionPattern { @@ -331,6 +344,7 @@ class DepthwiseConvConverter ShapedType weightTy = weight.getType().cast(); ShapedType biasTy = bias.getType().cast(); ShapedType resultTy = op->getResult(0).getType().cast(); + int64_t resultRank = resultTy.getRank(); Type inputETy = inputTy.getElementType(); Type resultETy = resultTy.getElementType(); @@ -410,10 +424,10 @@ class DepthwiseConvConverter // Broadcast the initial value to the output tensor before convolving. SmallVector indexingMaps; indexingMaps.push_back(AffineMap::get( - /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, + /*dimCount=*/resultRank, /*symbolCount=*/0, {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank)); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank)); Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); Value initTensor = rewriter.create( @@ -432,14 +446,18 @@ class DepthwiseConvConverter loc, linalgConvTy, ValueRange{input, weight}, ValueRange{zeroTensor}, strideAttr, dilationAttr) .getResult(0); - Value convReshape = rewriter.create( - loc, resultTy, conv, rewriter.getI64ArrayAttr(resultTy.getShape())); + + SmallVector reassociationMap; + createDepthwiseConvCollapseMap(resultRank, reassociationMap, rewriter); + Value convReshape = rewriter.create( + loc, resultTy, conv, reassociationMap); + Value result = rewriter .create( loc, resultTy, ValueRange({bias, convReshape}), biasInitTensor, indexingMaps, - getNParallelLoopsAttrs(resultTy.getRank()), + getNParallelLoopsAttrs(resultRank), [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { Value added = nestedBuilder.create( @@ -457,14 +475,16 @@ class DepthwiseConvConverter loc, linalgConvTy, ValueRange{input, weight, iZpVal, kZpVal}, ValueRange{zeroTensor}, strideAttr, dilationAttr) .getResult(0); - Value convReshape = rewriter.create( - loc, resultTy, conv, rewriter.getI64ArrayAttr(resultTy.getShape())); + SmallVector reassociationMap; + createDepthwiseConvCollapseMap(resultRank, reassociationMap, rewriter); + Value convReshape = rewriter.create( + loc, resultTy, conv, reassociationMap); Value result = rewriter .create( loc, resultTy, ValueRange({bias, convReshape}), biasInitTensor, indexingMaps, - getNParallelLoopsAttrs(resultTy.getRank()), + getNParallelLoopsAttrs(resultRank), [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { Value added = nestedBuilder.create( diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir index b6060c1fa03be6..8ecffb11d3fe71 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -477,7 +477,7 @@ func.func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) - // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]} + // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 @@ -501,7 +501,7 @@ func.func @depthwise_conv_dyn(%arg0 : tensor, %arg1 : tensor<3x1x3x // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[OUT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 5, 33] // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor<3x1x3x11xf32>) outs(%[[FILL]] : tensor) - // CHECK: %[[COLLAPSED:.+]] = "tosa.reshape"(%[[DEPTH]]) {new_shape = [-1, 5, 5, 33]} + // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor) outs(%[[OUT]] : tensor) { // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // CHECK: %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32 @@ -523,7 +523,7 @@ func.func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3 // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) - // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]} + // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 @@ -551,7 +551,7 @@ func.func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3 // CHECK: [[C128:%.+]] = arith.constant -128 // CHECK: [[C42:%.+]] = arith.constant 42 // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>) - // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 12, 12, 512]} + // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) { // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 @@ -575,7 +575,7 @@ func.func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : // CHECK: [[C128:%.+]] = arith.constant -128 // CHECK: [[C42:%.+]] = arith.constant 42 // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>) - // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 10, 10, 512]} + // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) { // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 @@ -596,7 +596,7 @@ func.func @depthwise_conv2d_dyn_w_h(%arg0: tensor<2x?x?x3xf32>, %arg1: tensor<3x // CHECK: tensor.yield %cst : f32 // CHECK: } : tensor<2x?x?x3xf32> to tensor<2x?x?x3xf32> // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) outs(%22 : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32> - // CHECK: %[[RESHAPED:.+]] = "tosa.reshape"(%[[CONV]]) {new_shape = [2, -1, -1, 15]} : (tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x15xf32> + // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[CONV]] {{\[}}[0], [1], [2], [3, 4]] %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 2, 3, 4], dilation = [2, 1], stride = [1, 2]} : (tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>, tensor<15xf32>) -> tensor<2x?x?x15xf32> return } From f0a61c2ce2afec200967c59d45181c7a9fbe2c3f Mon Sep 17 00:00:00 2001 From: Jorge Gorbe Moya Date: Mon, 23 May 2022 11:29:10 -0700 Subject: [PATCH 263/908] Remove `friend` classes from TypeCategoryMap As far as I can tell, the only thing those friend classes access is the `ValueSP` typedef. Given that this is a map-ish class, with "Map" in its name, it doesn't seem like a stretch to make `KeyType`, `ValueType` and `ValueSP` public. More so when the public methods of the class have `KeyType` and `ValueSP` arguments and clearly `ValueSP` needs to be accessed from the outside. `friend` complicates local reasoning about who can access private members, which is valuable in a class like this that has every method locking a mutex to prevent concurrent access. Differential Revision: https://reviews.llvm.org/D126103 --- lldb/include/lldb/DataFormatters/TypeCategoryMap.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lldb/include/lldb/DataFormatters/TypeCategoryMap.h b/lldb/include/lldb/DataFormatters/TypeCategoryMap.h index 4dbca29db066d4..45dbb306aa758c 100644 --- a/lldb/include/lldb/DataFormatters/TypeCategoryMap.h +++ b/lldb/include/lldb/DataFormatters/TypeCategoryMap.h @@ -23,13 +23,13 @@ namespace lldb_private { class TypeCategoryMap { private: - typedef ConstString KeyType; - typedef TypeCategoryImpl ValueType; - typedef ValueType::SharedPointer ValueSP; typedef std::list ActiveCategoriesList; typedef ActiveCategoriesList::iterator ActiveCategoriesIterator; public: + typedef ConstString KeyType; + typedef TypeCategoryImpl ValueType; + typedef ValueType::SharedPointer ValueSP; typedef std::map MapType; typedef MapType::iterator MapIterator; typedef std::function ForEachCallback; @@ -103,9 +103,6 @@ class TypeCategoryMap { ActiveCategoriesList &active_list() { return m_active_categories; } std::recursive_mutex &mutex() { return m_map_mutex; } - - friend class FormattersContainer; - friend class FormatManager; }; } // namespace lldb_private From eebc1fb772c56df9958fb916b41ff3c329fae145 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 20 May 2022 23:31:13 +0200 Subject: [PATCH 264/908] [libc++] Granularize parts of `` is quite a large header, so I'll granularize it in a few steps. Reviewed By: ldionne, #libc Spies: libcxx-commits, mgorny Differential Revision: https://reviews.llvm.org/D124755 --- libcxx/include/CMakeLists.txt | 25 + libcxx/include/__type_traits/add_pointer.h | 41 ++ libcxx/include/__type_traits/conditional.h | 31 + libcxx/include/__type_traits/decay.h | 65 ++ libcxx/include/__type_traits/enable_if.h | 32 + libcxx/include/__type_traits/is_array.h | 52 ++ libcxx/include/__type_traits/is_base_of.h | 32 + libcxx/include/__type_traits/is_const.h | 45 ++ libcxx/include/__type_traits/is_convertible.h | 108 ++++ .../include/__type_traits/is_floating_point.h | 37 ++ libcxx/include/__type_traits/is_function.h | 40 ++ libcxx/include/__type_traits/is_integral.h | 74 +++ .../is_member_function_pointer.h | 64 ++ .../__type_traits/is_member_object_pointer.h | 46 ++ .../include/__type_traits/is_null_pointer.h | 41 ++ libcxx/include/__type_traits/is_reference.h | 70 +++ .../__type_traits/is_reference_wrapper.h | 31 + .../include/__type_traits/is_referenceable.h | 35 ++ libcxx/include/__type_traits/is_same.h | 44 ++ libcxx/include/__type_traits/is_void.h | 45 ++ libcxx/include/__type_traits/is_volatile.h | 45 ++ libcxx/include/__type_traits/remove_const.h | 28 + libcxx/include/__type_traits/remove_cv.h | 30 + libcxx/include/__type_traits/remove_extent.h | 34 ++ .../include/__type_traits/remove_reference.h | 31 + .../include/__type_traits/remove_volatile.h | 28 + libcxx/include/cstddef | 29 +- libcxx/include/module.modulemap | 29 +- libcxx/include/type_traits | 563 +----------------- libcxx/test/libcxx/private_headers.verify.cpp | 25 + 30 files changed, 1232 insertions(+), 568 deletions(-) create mode 100644 libcxx/include/__type_traits/add_pointer.h create mode 100644 libcxx/include/__type_traits/conditional.h create mode 100644 libcxx/include/__type_traits/decay.h create mode 100644 libcxx/include/__type_traits/enable_if.h create mode 100644 libcxx/include/__type_traits/is_array.h create mode 100644 libcxx/include/__type_traits/is_base_of.h create mode 100644 libcxx/include/__type_traits/is_const.h create mode 100644 libcxx/include/__type_traits/is_convertible.h create mode 100644 libcxx/include/__type_traits/is_floating_point.h create mode 100644 libcxx/include/__type_traits/is_function.h create mode 100644 libcxx/include/__type_traits/is_integral.h create mode 100644 libcxx/include/__type_traits/is_member_function_pointer.h create mode 100644 libcxx/include/__type_traits/is_member_object_pointer.h create mode 100644 libcxx/include/__type_traits/is_null_pointer.h create mode 100644 libcxx/include/__type_traits/is_reference.h create mode 100644 libcxx/include/__type_traits/is_reference_wrapper.h create mode 100644 libcxx/include/__type_traits/is_referenceable.h create mode 100644 libcxx/include/__type_traits/is_same.h create mode 100644 libcxx/include/__type_traits/is_void.h create mode 100644 libcxx/include/__type_traits/is_volatile.h create mode 100644 libcxx/include/__type_traits/remove_const.h create mode 100644 libcxx/include/__type_traits/remove_cv.h create mode 100644 libcxx/include/__type_traits/remove_extent.h create mode 100644 libcxx/include/__type_traits/remove_reference.h create mode 100644 libcxx/include/__type_traits/remove_volatile.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index d7aa09e2c8580c..74336ed19703e6 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -431,8 +431,33 @@ set(files __threading_support __tree __tuple + __type_traits/add_pointer.h + __type_traits/conditional.h + __type_traits/decay.h + __type_traits/enable_if.h __type_traits/integral_constant.h + __type_traits/is_array.h + __type_traits/is_base_of.h __type_traits/is_callable.h + __type_traits/is_const.h + __type_traits/is_convertible.h + __type_traits/is_floating_point.h + __type_traits/is_function.h + __type_traits/is_integral.h + __type_traits/is_member_function_pointer.h + __type_traits/is_member_object_pointer.h + __type_traits/is_null_pointer.h + __type_traits/is_reference.h + __type_traits/is_reference_wrapper.h + __type_traits/is_referenceable.h + __type_traits/is_same.h + __type_traits/is_void.h + __type_traits/is_volatile.h + __type_traits/remove_const.h + __type_traits/remove_cv.h + __type_traits/remove_extent.h + __type_traits/remove_reference.h + __type_traits/remove_volatile.h __undef_macros __utility/as_const.h __utility/auto_cast.h diff --git a/libcxx/include/__type_traits/add_pointer.h b/libcxx/include/__type_traits/add_pointer.h new file mode 100644 index 00000000000000..aea2490c5d0cc8 --- /dev/null +++ b/libcxx/include/__type_traits/add_pointer.h @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_ADD_POINTER_H +#define _LIBCPP___TYPE_TRAITS_ADD_POINTER_H + +#include <__config> +#include <__type_traits/is_referenceable.h> +#include <__type_traits/is_same.h> +#include <__type_traits/remove_cv.h> +#include <__type_traits/remove_reference.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template ::value || + _IsSame::type, void>::value> +struct __add_pointer_impl + {typedef _LIBCPP_NODEBUG typename remove_reference<_Tp>::type* type;}; +template struct __add_pointer_impl<_Tp, false> + {typedef _LIBCPP_NODEBUG _Tp type;}; + +template struct _LIBCPP_TEMPLATE_VIS add_pointer + {typedef _LIBCPP_NODEBUG typename __add_pointer_impl<_Tp>::type type;}; + +#if _LIBCPP_STD_VER > 11 +template using add_pointer_t = typename add_pointer<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_ADD_POINTER_H diff --git a/libcxx/include/__type_traits/conditional.h b/libcxx/include/__type_traits/conditional.h new file mode 100644 index 00000000000000..ef182a9ed6550e --- /dev/null +++ b/libcxx/include/__type_traits/conditional.h @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_CONDITIONAL_H +#define _LIBCPP___TYPE_TRAITS_CONDITIONAL_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template + struct _LIBCPP_TEMPLATE_VIS conditional {typedef _If type;}; +template + struct _LIBCPP_TEMPLATE_VIS conditional {typedef _Then type;}; + +#if _LIBCPP_STD_VER > 11 +template using conditional_t = typename conditional<_Bp, _If, _Then>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_CONDITIONAL_H diff --git a/libcxx/include/__type_traits/decay.h b/libcxx/include/__type_traits/decay.h new file mode 100644 index 00000000000000..d47ad03fe0082e --- /dev/null +++ b/libcxx/include/__type_traits/decay.h @@ -0,0 +1,65 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_DECAY_H +#define _LIBCPP___TYPE_TRAITS_DECAY_H + +#include <__config> +#include <__type_traits/add_pointer.h> +#include <__type_traits/conditional.h> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_array.h> +#include <__type_traits/is_function.h> +#include <__type_traits/is_referenceable.h> +#include <__type_traits/remove_cv.h> +#include <__type_traits/remove_extent.h> +#include <__type_traits/remove_reference.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct __decay { + typedef _LIBCPP_NODEBUG typename remove_cv<_Up>::type type; +}; + +template +struct __decay<_Up, true> { +public: + typedef _LIBCPP_NODEBUG typename conditional + < + is_array<_Up>::value, + typename remove_extent<_Up>::type*, + typename conditional + < + is_function<_Up>::value, + typename add_pointer<_Up>::type, + typename remove_cv<_Up>::type + >::type + >::type type; +}; + +template +struct _LIBCPP_TEMPLATE_VIS decay +{ +private: + typedef _LIBCPP_NODEBUG typename remove_reference<_Tp>::type _Up; +public: + typedef _LIBCPP_NODEBUG typename __decay<_Up, __is_referenceable<_Up>::value>::type type; +}; + +#if _LIBCPP_STD_VER > 11 +template using decay_t = typename decay<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_DECAY_H diff --git a/libcxx/include/__type_traits/enable_if.h b/libcxx/include/__type_traits/enable_if.h new file mode 100644 index 00000000000000..d7ccd6fd0368fb --- /dev/null +++ b/libcxx/include/__type_traits/enable_if.h @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_ENABLE_IF_H +#define _LIBCPP___TYPE_TRAITS_ENABLE_IF_H + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS enable_if {}; +template struct _LIBCPP_TEMPLATE_VIS enable_if {typedef _Tp type;}; + +template using __enable_if_t _LIBCPP_NODEBUG = typename enable_if<_Bp, _Tp>::type; + +#if _LIBCPP_STD_VER > 11 +template using enable_if_t = typename enable_if<_Bp, _Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_ENABLE_IF_H diff --git a/libcxx/include/__type_traits/is_array.h b/libcxx/include/__type_traits/is_array.h new file mode 100644 index 00000000000000..766d2a20302876 --- /dev/null +++ b/libcxx/include/__type_traits/is_array.h @@ -0,0 +1,52 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_ARRAY_H +#define _LIBCPP___TYPE_TRAITS_IS_ARRAY_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// TODO: Clang incorrectly reports that __is_array is true for T[0]. +// Re-enable the branch once https://llvm.org/PR54705 is fixed. +#if __has_keyword(__is_array) && 0 + +template +struct _LIBCPP_TEMPLATE_VIS is_array : _BoolConstant<__is_array(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_array_v = __is_array(_Tp); +#endif + +#else + +template struct _LIBCPP_TEMPLATE_VIS is_array + : public false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_array<_Tp[]> + : public true_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_array<_Tp[_Np]> + : public true_type {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_array_v = is_array<_Tp>::value; +#endif + +#endif // __has_keyword(__is_array) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_ARRAY_H diff --git a/libcxx/include/__type_traits/is_base_of.h b/libcxx/include/__type_traits/is_base_of.h new file mode 100644 index 00000000000000..b944a65d2823b7 --- /dev/null +++ b/libcxx/include/__type_traits/is_base_of.h @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_BASE_OF_H +#define _LIBCPP___TYPE_TRAITS_IS_BASE_OF_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct _LIBCPP_TEMPLATE_VIS is_base_of + : public integral_constant {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_base_of_v = is_base_of<_Bp, _Dp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_BASE_OF_H diff --git a/libcxx/include/__type_traits/is_const.h b/libcxx/include/__type_traits/is_const.h new file mode 100644 index 00000000000000..5501832f560bab --- /dev/null +++ b/libcxx/include/__type_traits/is_const.h @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_CONST_H +#define _LIBCPP___TYPE_TRAITS_IS_CONST_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_keyword(__is_const) + +template +struct _LIBCPP_TEMPLATE_VIS is_const : _BoolConstant<__is_const(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_const_v = __is_const(_Tp); +#endif + +#else + +template struct _LIBCPP_TEMPLATE_VIS is_const : public false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_const<_Tp const> : public true_type {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_const_v = is_const<_Tp>::value; +#endif + +#endif // __has_keyword(__is_const) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_CONST_H diff --git a/libcxx/include/__type_traits/is_convertible.h b/libcxx/include/__type_traits/is_convertible.h new file mode 100644 index 00000000000000..884e808e2ae4d1 --- /dev/null +++ b/libcxx/include/__type_traits/is_convertible.h @@ -0,0 +1,108 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_CONVERTIBLE_H +#define _LIBCPP___TYPE_TRAITS_IS_CONVERTIBLE_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_array.h> +#include <__type_traits/is_function.h> +#include <__type_traits/is_void.h> +#include <__type_traits/remove_reference.h> +#include <__utility/declval.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_feature(is_convertible_to) && !defined(_LIBCPP_USE_IS_CONVERTIBLE_FALLBACK) + +template struct _LIBCPP_TEMPLATE_VIS is_convertible + : public integral_constant {}; + +#else // __has_feature(is_convertible_to) + +namespace __is_convertible_imp +{ +template void __test_convert(_Tp); + +template +struct __is_convertible_test : public false_type {}; + +template +struct __is_convertible_test<_From, _To, + decltype(__is_convertible_imp::__test_convert<_To>(declval<_From>()))> : public true_type +{}; + +template ::value, + bool _IsFunction = is_function<_Tp>::value, + bool _IsVoid = is_void<_Tp>::value> + struct __is_array_function_or_void {enum {value = 0};}; +template struct __is_array_function_or_void<_Tp, true, false, false> {enum {value = 1};}; +template struct __is_array_function_or_void<_Tp, false, true, false> {enum {value = 2};}; +template struct __is_array_function_or_void<_Tp, false, false, true> {enum {value = 3};}; +} + +template ::type>::value> +struct __is_convertible_check +{ + static const size_t __v = 0; +}; + +template +struct __is_convertible_check<_Tp, 0> +{ + static const size_t __v = sizeof(_Tp); +}; + +template ::value, + unsigned _T2_is_array_function_or_void = __is_convertible_imp::__is_array_function_or_void<_T2>::value> +struct __is_convertible + : public integral_constant::value + > +{}; + +template struct __is_convertible<_T1, _T2, 0, 1> : public false_type {}; +template struct __is_convertible<_T1, _T2, 1, 1> : public false_type {}; +template struct __is_convertible<_T1, _T2, 2, 1> : public false_type {}; +template struct __is_convertible<_T1, _T2, 3, 1> : public false_type {}; + +template struct __is_convertible<_T1, _T2, 0, 2> : public false_type {}; +template struct __is_convertible<_T1, _T2, 1, 2> : public false_type {}; +template struct __is_convertible<_T1, _T2, 2, 2> : public false_type {}; +template struct __is_convertible<_T1, _T2, 3, 2> : public false_type {}; + +template struct __is_convertible<_T1, _T2, 0, 3> : public false_type {}; +template struct __is_convertible<_T1, _T2, 1, 3> : public false_type {}; +template struct __is_convertible<_T1, _T2, 2, 3> : public false_type {}; +template struct __is_convertible<_T1, _T2, 3, 3> : public true_type {}; + +template struct _LIBCPP_TEMPLATE_VIS is_convertible + : public __is_convertible<_T1, _T2> +{ + static const size_t __complete_check1 = __is_convertible_check<_T1>::__v; + static const size_t __complete_check2 = __is_convertible_check<_T2>::__v; +}; + +#endif // __has_feature(is_convertible_to) + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_convertible_v = is_convertible<_From, _To>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_CONVERTIBLE_H diff --git a/libcxx/include/__type_traits/is_floating_point.h b/libcxx/include/__type_traits/is_floating_point.h new file mode 100644 index 00000000000000..d93e5d99d07b8b --- /dev/null +++ b/libcxx/include/__type_traits/is_floating_point.h @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_FLOATING_POINT_H +#define _LIBCPP___TYPE_TRAITS_IS_FLOATING_POINT_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/remove_cv.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct __libcpp_is_floating_point : public false_type {}; +template <> struct __libcpp_is_floating_point : public true_type {}; +template <> struct __libcpp_is_floating_point : public true_type {}; +template <> struct __libcpp_is_floating_point : public true_type {}; + +template struct _LIBCPP_TEMPLATE_VIS is_floating_point + : public __libcpp_is_floating_point::type> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_FLOATING_POINT_H diff --git a/libcxx/include/__type_traits/is_function.h b/libcxx/include/__type_traits/is_function.h new file mode 100644 index 00000000000000..23ba9cbb0dd630 --- /dev/null +++ b/libcxx/include/__type_traits/is_function.h @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_FUNCTIONAL_H +#define _LIBCPP___TYPE_TRAITS_IS_FUNCTIONAL_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_const.h> +#include <__type_traits/is_reference.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS is_function + : public _BoolConstant< +#ifdef __clang__ + __is_function(_Tp) +#else + !(is_reference<_Tp>::value || is_const::value) +#endif + > {}; + + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_function_v = is_function<_Tp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_FUNCTIONAL_H diff --git a/libcxx/include/__type_traits/is_integral.h b/libcxx/include/__type_traits/is_integral.h new file mode 100644 index 00000000000000..7df964f992851f --- /dev/null +++ b/libcxx/include/__type_traits/is_integral.h @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_INTEGRAL_H +#define _LIBCPP___TYPE_TRAITS_IS_INTEGRAL_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/remove_cv.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct __libcpp_is_integral { enum { value = 0 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +#endif +#ifndef _LIBCPP_HAS_NO_CHAR8_T +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +#endif +#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +#endif +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +template <> struct __libcpp_is_integral { enum { value = 1 }; }; +#ifndef _LIBCPP_HAS_NO_INT128 +template <> struct __libcpp_is_integral<__int128_t> { enum { value = 1 }; }; +template <> struct __libcpp_is_integral<__uint128_t> { enum { value = 1 }; }; +#endif + +#if __has_keyword(__is_integral) + +template +struct _LIBCPP_TEMPLATE_VIS is_integral : _BoolConstant<__is_integral(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_integral_v = __is_integral(_Tp); +#endif + +#else + +template struct _LIBCPP_TEMPLATE_VIS is_integral + : public _BoolConstant<__libcpp_is_integral::type>::value> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_integral_v = is_integral<_Tp>::value; +#endif + +#endif // __has_keyword(__is_integral) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_INTEGRAL_H diff --git a/libcxx/include/__type_traits/is_member_function_pointer.h b/libcxx/include/__type_traits/is_member_function_pointer.h new file mode 100644 index 00000000000000..f18f3ebc5a3a63 --- /dev/null +++ b/libcxx/include/__type_traits/is_member_function_pointer.h @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_MEMBER_FUNCTION_POINTER_H +#define _LIBCPP___TYPE_TRAITS_IS_MEMBER_FUNCTION_POINTER_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_function.h> +#include <__type_traits/remove_cv.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct __libcpp_is_member_pointer { + enum { + __is_member = false, + __is_func = false, + __is_obj = false + }; +}; +template struct __libcpp_is_member_pointer<_Tp _Up::*> { + enum { + __is_member = true, + __is_func = is_function<_Tp>::value, + __is_obj = !__is_func, + }; +}; + +#if __has_keyword(__is_member_function_pointer) + +template +struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer + : _BoolConstant<__is_member_function_pointer(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_member_function_pointer_v = __is_member_function_pointer(_Tp); +#endif + +#else // __has_keyword(__is_member_function_pointer) + +template struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer + : public _BoolConstant< __libcpp_is_member_pointer::type>::__is_func > {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_member_function_pointer_v = is_member_function_pointer<_Tp>::value; +#endif + +#endif // __has_keyword(__is_member_function_pointer) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_MEMBER_FUNCTION_POINTER_H diff --git a/libcxx/include/__type_traits/is_member_object_pointer.h b/libcxx/include/__type_traits/is_member_object_pointer.h new file mode 100644 index 00000000000000..41f81d529388e2 --- /dev/null +++ b/libcxx/include/__type_traits/is_member_object_pointer.h @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_MEMBER_OBJECT_POINTER_H +#define _LIBCPP___TYPE_TRAITS_IS_MEMBER_OBJECT_POINTER_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_keyword(__is_member_object_pointer) + +template +struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer + : _BoolConstant<__is_member_object_pointer(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_member_object_pointer_v = __is_member_object_pointer(_Tp); +#endif + +#else // __has_keyword(__is_member_object_pointer) + +template struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer + : public _BoolConstant< __libcpp_is_member_pointer::type>::__is_obj > {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_member_object_pointer_v = is_member_object_pointer<_Tp>::value; +#endif + +#endif // __has_keyword(__is_member_object_pointer) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_MEMBER_FUNCTION_POINTER_H diff --git a/libcxx/include/__type_traits/is_null_pointer.h b/libcxx/include/__type_traits/is_null_pointer.h new file mode 100644 index 00000000000000..f81cb9aadb2f11 --- /dev/null +++ b/libcxx/include/__type_traits/is_null_pointer.h @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_NULL_POINTER_H +#define _LIBCPP___TYPE_TRAITS_IS_NULL_POINTER_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/remove_cv.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct __is_nullptr_t_impl : public false_type {}; +template <> struct __is_nullptr_t_impl : public true_type {}; + +template struct _LIBCPP_TEMPLATE_VIS __is_nullptr_t + : public __is_nullptr_t_impl::type> {}; + +#if _LIBCPP_STD_VER > 11 +template struct _LIBCPP_TEMPLATE_VIS is_null_pointer + : public __is_nullptr_t_impl::type> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_null_pointer_v = is_null_pointer<_Tp>::value; +#endif +#endif // _LIBCPP_STD_VER > 11 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_NULL_POINTER_H diff --git a/libcxx/include/__type_traits/is_reference.h b/libcxx/include/__type_traits/is_reference.h new file mode 100644 index 00000000000000..8aa55c13ff83e9 --- /dev/null +++ b/libcxx/include/__type_traits/is_reference.h @@ -0,0 +1,70 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_REFERENCE_H +#define _LIBCPP___TYPE_TRAITS_IS_REFERENCE_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_keyword(__is_lvalue_reference) && \ + __has_keyword(__is_rvalue_reference) && \ + __has_keyword(__is_reference) + +template +struct _LIBCPP_TEMPLATE_VIS is_lvalue_reference : _BoolConstant<__is_lvalue_reference(_Tp)> { }; + +template +struct _LIBCPP_TEMPLATE_VIS is_rvalue_reference : _BoolConstant<__is_rvalue_reference(_Tp)> { }; + +template +struct _LIBCPP_TEMPLATE_VIS is_reference : _BoolConstant<__is_reference(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_reference_v = __is_reference(_Tp); +template +inline constexpr bool is_lvalue_reference_v = __is_lvalue_reference(_Tp); +template +inline constexpr bool is_rvalue_reference_v = __is_rvalue_reference(_Tp); +#endif + +#else // __has_keyword(__is_lvalue_reference) && etc... + +template struct _LIBCPP_TEMPLATE_VIS is_lvalue_reference : public false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_lvalue_reference<_Tp&> : public true_type {}; + +template struct _LIBCPP_TEMPLATE_VIS is_rvalue_reference : public false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_rvalue_reference<_Tp&&> : public true_type {}; + +template struct _LIBCPP_TEMPLATE_VIS is_reference : public false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_reference<_Tp&> : public true_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_reference<_Tp&&> : public true_type {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_reference_v = is_reference<_Tp>::value; + +template +inline constexpr bool is_lvalue_reference_v = is_lvalue_reference<_Tp>::value; + +template +inline constexpr bool is_rvalue_reference_v = is_rvalue_reference<_Tp>::value; +#endif + +#endif // __has_keyword(__is_lvalue_reference) && etc... + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_REFERENCE_H diff --git a/libcxx/include/__type_traits/is_reference_wrapper.h b/libcxx/include/__type_traits/is_reference_wrapper.h new file mode 100644 index 00000000000000..cd391bb07e78dc --- /dev/null +++ b/libcxx/include/__type_traits/is_reference_wrapper.h @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_REFERENCE_WRAPPER_H +#define _LIBCPP___TYPE_TRAITS_IS_REFERENCE_WRAPPER_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/remove_cv.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template class _LIBCPP_TEMPLATE_VIS reference_wrapper; + +template struct __is_reference_wrapper_impl : public false_type {}; +template struct __is_reference_wrapper_impl > : public true_type {}; +template struct __is_reference_wrapper + : public __is_reference_wrapper_impl::type> {}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_ENABLE_IF_H diff --git a/libcxx/include/__type_traits/is_referenceable.h b/libcxx/include/__type_traits/is_referenceable.h new file mode 100644 index 00000000000000..6d98a3db997ea6 --- /dev/null +++ b/libcxx/include/__type_traits/is_referenceable.h @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_REFERENCEABLE_H +#define _LIBCPP___TYPE_TRAITS_IS_REFERENCEABLE_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_same.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +struct __two {char __lx[2];}; + +struct __is_referenceable_impl { + template static _Tp& __test(int); + template static __two __test(...); +}; + +template +struct __is_referenceable : integral_constant(0)), __two>::value> {}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_REFERENCEABLE_H diff --git a/libcxx/include/__type_traits/is_same.h b/libcxx/include/__type_traits/is_same.h new file mode 100644 index 00000000000000..6fa0afbc18cadf --- /dev/null +++ b/libcxx/include/__type_traits/is_same.h @@ -0,0 +1,44 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_SAME_H +#define _LIBCPP___TYPE_TRAITS_IS_SAME_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct _LIBCPP_TEMPLATE_VIS is_same : _BoolConstant<__is_same(_Tp, _Up)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_same_v = __is_same(_Tp, _Up); +#endif + +// _IsSame has the same effect as is_same but instantiates fewer types: +// is_same and is_same are guaranteed to be different types, but +// _IsSame and _IsSame are the same type (namely, false_type). +// Neither GCC nor Clang can mangle the __is_same builtin, so _IsSame +// mustn't be directly used anywhere that contributes to name-mangling +// (such as in a dependent return type). + +template +using _IsSame = _BoolConstant<__is_same(_Tp, _Up)>; + +template +using _IsNotSame = _BoolConstant; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_SAME_H diff --git a/libcxx/include/__type_traits/is_void.h b/libcxx/include/__type_traits/is_void.h new file mode 100644 index 00000000000000..29e68cc529dfa2 --- /dev/null +++ b/libcxx/include/__type_traits/is_void.h @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_VOID_H +#define _LIBCPP___TYPE_TRAITS_IS_VOID_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_keyword(__is_void) + +template +struct _LIBCPP_TEMPLATE_VIS is_void : _BoolConstant<__is_void(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_void_v = __is_void(_Tp); +#endif + +#else + +template struct _LIBCPP_TEMPLATE_VIS is_void + : public is_same::type, void> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_void_v = is_void<_Tp>::value; +#endif + +#endif // __has_keyword(__is_void) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_VOID_H diff --git a/libcxx/include/__type_traits/is_volatile.h b/libcxx/include/__type_traits/is_volatile.h new file mode 100644 index 00000000000000..372703e7ced6a9 --- /dev/null +++ b/libcxx/include/__type_traits/is_volatile.h @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_VOLATILE_H +#define _LIBCPP___TYPE_TRAITS_IS_VOLATILE_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_keyword(__is_volatile) + +template +struct _LIBCPP_TEMPLATE_VIS is_volatile : _BoolConstant<__is_volatile(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_volatile_v = __is_volatile(_Tp); +#endif + +#else + +template struct _LIBCPP_TEMPLATE_VIS is_volatile : public false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_volatile<_Tp volatile> : public true_type {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_volatile_v = is_volatile<_Tp>::value; +#endif + +#endif // __has_keyword(__is_volatile) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_VOLATILE_H diff --git a/libcxx/include/__type_traits/remove_const.h b/libcxx/include/__type_traits/remove_const.h new file mode 100644 index 00000000000000..8efc893e965a65 --- /dev/null +++ b/libcxx/include/__type_traits/remove_const.h @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_REMOVE_CONST_H +#define _LIBCPP___TYPE_TRAITS_REMOVE_CONST_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS remove_const {typedef _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_const {typedef _Tp type;}; +#if _LIBCPP_STD_VER > 11 +template using remove_const_t = typename remove_const<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_REMOVE_CONST_H diff --git a/libcxx/include/__type_traits/remove_cv.h b/libcxx/include/__type_traits/remove_cv.h new file mode 100644 index 00000000000000..ce1e4e45c6d13f --- /dev/null +++ b/libcxx/include/__type_traits/remove_cv.h @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_REMOVE_CV_H +#define _LIBCPP___TYPE_TRAITS_REMOVE_CV_H + +#include <__config> +#include <__type_traits/remove_const.h> +#include <__type_traits/remove_volatile.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS remove_cv +{typedef typename remove_volatile::type>::type type;}; +#if _LIBCPP_STD_VER > 11 +template using remove_cv_t = typename remove_cv<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_REMOVE_CV_H diff --git a/libcxx/include/__type_traits/remove_extent.h b/libcxx/include/__type_traits/remove_extent.h new file mode 100644 index 00000000000000..e353de3616168b --- /dev/null +++ b/libcxx/include/__type_traits/remove_extent.h @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_REMOVE_EXTENT_H +#define _LIBCPP___TYPE_TRAITS_REMOVE_EXTENT_H + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS remove_extent + {typedef _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_extent<_Tp[]> + {typedef _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_extent<_Tp[_Np]> + {typedef _Tp type;}; + +#if _LIBCPP_STD_VER > 11 +template using remove_extent_t = typename remove_extent<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_REMOVE_EXTENT_H diff --git a/libcxx/include/__type_traits/remove_reference.h b/libcxx/include/__type_traits/remove_reference.h new file mode 100644 index 00000000000000..a69e48dc584b0c --- /dev/null +++ b/libcxx/include/__type_traits/remove_reference.h @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_REMOVE_REFERENCE_H +#define _LIBCPP___TYPE_TRAITS_REMOVE_REFERENCE_H + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS remove_reference {typedef _LIBCPP_NODEBUG _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_reference<_Tp&> {typedef _LIBCPP_NODEBUG _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_reference<_Tp&&> {typedef _LIBCPP_NODEBUG _Tp type;}; + +#if _LIBCPP_STD_VER > 11 +template using remove_reference_t = typename remove_reference<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_REMOVE_REFERENCE_H diff --git a/libcxx/include/__type_traits/remove_volatile.h b/libcxx/include/__type_traits/remove_volatile.h new file mode 100644 index 00000000000000..79f64c46a27d79 --- /dev/null +++ b/libcxx/include/__type_traits/remove_volatile.h @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_REMOVE_VOLATILE_H +#define _LIBCPP___TYPE_TRAITS_REMOVE_VOLATILE_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS remove_volatile {typedef _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_volatile {typedef _Tp type;}; +#if _LIBCPP_STD_VER > 11 +template using remove_volatile_t = typename remove_volatile<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_REMOVE_VOLATILE_H diff --git a/libcxx/include/cstddef b/libcxx/include/cstddef index 2a8b686bf38885..2b0659ab8ee7db 100644 --- a/libcxx/include/cstddef +++ b/libcxx/include/cstddef @@ -35,6 +35,7 @@ Types: #include <__assert> // all public C++ headers provide the assertion handler #include <__config> +#include <__type_traits/is_integral.h> #include #include @@ -52,34 +53,6 @@ using ::size_t _LIBCPP_USING_IF_EXISTS; using ::max_align_t _LIBCPP_USING_IF_EXISTS; #endif -template struct __libcpp_is_integral { enum { value = 0 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -#endif -#ifndef _LIBCPP_HAS_NO_CHAR8_T -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -#endif -#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -#endif -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -template <> struct __libcpp_is_integral { enum { value = 1 }; }; -#ifndef _LIBCPP_HAS_NO_INT128 -template <> struct __libcpp_is_integral<__int128_t> { enum { value = 1 }; }; -template <> struct __libcpp_is_integral<__uint128_t> { enum { value = 1 }; }; -#endif - _LIBCPP_END_NAMESPACE_STD #if _LIBCPP_STD_VER > 14 diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 87969556a0a728..d62e6ae4df47f4 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -982,8 +982,33 @@ module std [system] { export functional.__functional.unwrap_ref export * - module integral_constant { private header "__type_traits/integral_constant.h" } - module is_callable { private header "__type_traits/is_callable.h" } + module add_pointer { private header "__type_traits/add_pointer.h" } + module conditional { private header "__type_traits/conditional.h" } + module decay { private header "__type_traits/decay.h" } + module enable_if { private header "__type_traits/enable_if.h" } + module integral_constant { private header "__type_traits/integral_constant.h" } + module is_array { private header "__type_traits/is_array.h" } + module is_base_of { private header "__type_traits/is_base_of.h" } + module is_callable { private header "__type_traits/is_callable.h" } + module is_const { private header "__type_traits/is_const.h" } + module is_convertible { private header "__type_traits/is_convertible.h" } + module is_floating_point { private header "__type_traits/is_floating_point.h" } + module is_function { private header "__type_traits/is_function.h" } + module is_integral { private header "__type_traits/is_integral.h" } + module is_member_function_pointer { private header "__type_traits/is_member_function_pointer.h" } + module is_member_object_pointer { private header "__type_traits/is_member_object_pointer.h" } + module is_null_pointer { private header "__type_traits/is_null_pointer.h" } + module is_reference { private header "__type_traits/is_reference.h" } + module is_reference_wrapper { private header "__type_traits/is_reference_wrapper.h" } + module is_referenceable { private header "__type_traits/is_referenceable.h" } + module is_same { private header "__type_traits/is_same.h" } + module is_void { private header "__type_traits/is_void.h" } + module is_volatile { private header "__type_traits/is_volatile.h" } + module remove_const { private header "__type_traits/remove_const.h" } + module remove_cv { private header "__type_traits/remove_cv.h" } + module remove_extent { private header "__type_traits/remove_extent.h" } + module remove_reference { private header "__type_traits/remove_reference.h" } + module remove_volatile { private header "__type_traits/remove_volatile.h" } } module typeindex { header "typeindex" diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 9faa8d526aa705..b56f5395dd2611 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -418,8 +418,33 @@ namespace std */ #include <__assert> // all public C++ headers provide the assertion handler #include <__config> +#include <__type_traits/add_pointer.h> +#include <__type_traits/conditional.h> +#include <__type_traits/decay.h> +#include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> +#include <__type_traits/is_array.h> +#include <__type_traits/is_base_of.h> #include <__type_traits/is_callable.h> +#include <__type_traits/is_const.h> +#include <__type_traits/is_convertible.h> +#include <__type_traits/is_floating_point.h> +#include <__type_traits/is_function.h> +#include <__type_traits/is_integral.h> +#include <__type_traits/is_member_function_pointer.h> +#include <__type_traits/is_member_object_pointer.h> +#include <__type_traits/is_null_pointer.h> +#include <__type_traits/is_reference.h> +#include <__type_traits/is_reference_wrapper.h> +#include <__type_traits/is_referenceable.h> +#include <__type_traits/is_same.h> +#include <__type_traits/is_void.h> +#include <__type_traits/is_volatile.h> +#include <__type_traits/remove_const.h> +#include <__type_traits/remove_cv.h> +#include <__type_traits/remove_extent.h> +#include <__type_traits/remove_reference.h> +#include <__type_traits/remove_volatile.h> #include <__utility/declval.h> #include #include @@ -431,18 +456,8 @@ namespace std _LIBCPP_BEGIN_NAMESPACE_STD template struct _LIBCPP_TEMPLATE_VIS pair; -template class _LIBCPP_TEMPLATE_VIS reference_wrapper; template struct _LIBCPP_TEMPLATE_VIS hash; -template struct _LIBCPP_TEMPLATE_VIS enable_if {}; -template struct _LIBCPP_TEMPLATE_VIS enable_if {typedef _Tp type;}; - -template using __enable_if_t _LIBCPP_NODEBUG = typename enable_if<_Bp, _Tp>::type; - -#if _LIBCPP_STD_VER > 11 -template using enable_if_t = typename enable_if<_Bp, _Tp>::type; -#endif - template struct _MetaBase; template <> struct _MetaBase { @@ -505,39 +520,8 @@ struct __void_t { typedef void type; }; template struct _LIBCPP_TEMPLATE_VIS __dependent_type : public _Tp {}; - -template - struct _LIBCPP_TEMPLATE_VIS conditional {typedef _If type;}; -template - struct _LIBCPP_TEMPLATE_VIS conditional {typedef _Then type;}; - -#if _LIBCPP_STD_VER > 11 -template using conditional_t = typename conditional<_Bp, _If, _Then>::type; -#endif - // is_same -template -struct _LIBCPP_TEMPLATE_VIS is_same : _BoolConstant<__is_same(_Tp, _Up)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_same_v = __is_same(_Tp, _Up); -#endif - -// _IsSame has the same effect as is_same but instantiates fewer types: -// is_same and is_same are guaranteed to be different types, but -// _IsSame and _IsSame are the same type (namely, false_type). -// Neither GCC nor Clang can mangle the __is_same builtin, so _IsSame -// mustn't be directly used anywhere that contributes to name-mangling -// (such as in a dependent return type). - -template -using _IsSame = _BoolConstant<__is_same(_Tp, _Up)>; - -template -using _IsNotSame = _BoolConstant; - template using __test_for_primary_template = __enable_if_t< _IsSame<_Tp, typename _Tp::__primary_template>::value @@ -547,148 +531,8 @@ using __is_primary_template = _IsValidExpansion< __test_for_primary_template, _Tp >; -// helper class - -struct __two {char __lx[2];}; - -// is_const - -#if __has_keyword(__is_const) - -template -struct _LIBCPP_TEMPLATE_VIS is_const : _BoolConstant<__is_const(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_const_v = __is_const(_Tp); -#endif - -#else - -template struct _LIBCPP_TEMPLATE_VIS is_const : public false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_const<_Tp const> : public true_type {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_const_v = is_const<_Tp>::value; -#endif - -#endif // __has_keyword(__is_const) - -// is_volatile - -#if __has_keyword(__is_volatile) - -template -struct _LIBCPP_TEMPLATE_VIS is_volatile : _BoolConstant<__is_volatile(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_volatile_v = __is_volatile(_Tp); -#endif - -#else - -template struct _LIBCPP_TEMPLATE_VIS is_volatile : public false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_volatile<_Tp volatile> : public true_type {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_volatile_v = is_volatile<_Tp>::value; -#endif - -#endif // __has_keyword(__is_volatile) - -// remove_const - -template struct _LIBCPP_TEMPLATE_VIS remove_const {typedef _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_const {typedef _Tp type;}; -#if _LIBCPP_STD_VER > 11 -template using remove_const_t = typename remove_const<_Tp>::type; -#endif - -// remove_volatile - -template struct _LIBCPP_TEMPLATE_VIS remove_volatile {typedef _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_volatile {typedef _Tp type;}; -#if _LIBCPP_STD_VER > 11 -template using remove_volatile_t = typename remove_volatile<_Tp>::type; -#endif - -// remove_cv - -template struct _LIBCPP_TEMPLATE_VIS remove_cv -{typedef typename remove_volatile::type>::type type;}; -#if _LIBCPP_STD_VER > 11 -template using remove_cv_t = typename remove_cv<_Tp>::type; -#endif - -// is_void - -#if __has_keyword(__is_void) - -template -struct _LIBCPP_TEMPLATE_VIS is_void : _BoolConstant<__is_void(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_void_v = __is_void(_Tp); -#endif - -#else - -template struct _LIBCPP_TEMPLATE_VIS is_void - : public is_same::type, void> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_void_v = is_void<_Tp>::value; -#endif - -#endif // __has_keyword(__is_void) - -// __is_nullptr_t - -template struct __is_nullptr_t_impl : public false_type {}; -template <> struct __is_nullptr_t_impl : public true_type {}; - -template struct _LIBCPP_TEMPLATE_VIS __is_nullptr_t - : public __is_nullptr_t_impl::type> {}; - -#if _LIBCPP_STD_VER > 11 -template struct _LIBCPP_TEMPLATE_VIS is_null_pointer - : public __is_nullptr_t_impl::type> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_null_pointer_v = is_null_pointer<_Tp>::value; -#endif -#endif // _LIBCPP_STD_VER > 11 - // is_integral -#if __has_keyword(__is_integral) - -template -struct _LIBCPP_TEMPLATE_VIS is_integral : _BoolConstant<__is_integral(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_integral_v = __is_integral(_Tp); -#endif - -#else - -template struct _LIBCPP_TEMPLATE_VIS is_integral - : public _BoolConstant<__libcpp_is_integral::type>::value> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_integral_v = is_integral<_Tp>::value; -#endif - -#endif // __has_keyword(__is_integral) - // [basic.fundamental] defines five standard signed integer types; // __int128_t is an extended signed integer type. // The signed and unsigned integer types, plus bool and the @@ -714,52 +558,6 @@ template <> struct __libcpp_is_unsigned_integer : public tru template <> struct __libcpp_is_unsigned_integer<__uint128_t> : public true_type {}; #endif -// is_floating_point -// implements __libcpp_floating_point - -template struct __libcpp_is_floating_point : public false_type {}; -template <> struct __libcpp_is_floating_point : public true_type {}; -template <> struct __libcpp_is_floating_point : public true_type {}; -template <> struct __libcpp_is_floating_point : public true_type {}; - -template struct _LIBCPP_TEMPLATE_VIS is_floating_point - : public __libcpp_is_floating_point::type> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value; -#endif - -// is_array - -// TODO: Clang incorrectly reports that __is_array is true for T[0]. -// Re-enable the branch once https://llvm.org/PR54705 is fixed. -#if __has_keyword(__is_array) && 0 - -template -struct _LIBCPP_TEMPLATE_VIS is_array : _BoolConstant<__is_array(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_array_v = __is_array(_Tp); -#endif - -#else - -template struct _LIBCPP_TEMPLATE_VIS is_array - : public false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_array<_Tp[]> - : public true_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_array<_Tp[_Np]> - : public true_type {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_array_v = is_array<_Tp>::value; -#endif - -#endif // __has_keyword(__is_array) - // is_pointer // Before AppleClang 12.0.5, __is_pointer didn't work for Objective-C types. @@ -797,55 +595,6 @@ inline constexpr bool is_pointer_v = is_pointer<_Tp>::value; #endif // __has_keyword(__is_pointer) -// is_reference - -#if __has_keyword(__is_lvalue_reference) && \ - __has_keyword(__is_rvalue_reference) && \ - __has_keyword(__is_reference) - -template -struct _LIBCPP_TEMPLATE_VIS is_lvalue_reference : _BoolConstant<__is_lvalue_reference(_Tp)> { }; - -template -struct _LIBCPP_TEMPLATE_VIS is_rvalue_reference : _BoolConstant<__is_rvalue_reference(_Tp)> { }; - -template -struct _LIBCPP_TEMPLATE_VIS is_reference : _BoolConstant<__is_reference(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_reference_v = __is_reference(_Tp); -template -inline constexpr bool is_lvalue_reference_v = __is_lvalue_reference(_Tp); -template -inline constexpr bool is_rvalue_reference_v = __is_rvalue_reference(_Tp); -#endif - -#else // __has_keyword(__is_lvalue_reference) && etc... - -template struct _LIBCPP_TEMPLATE_VIS is_lvalue_reference : public false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_lvalue_reference<_Tp&> : public true_type {}; - -template struct _LIBCPP_TEMPLATE_VIS is_rvalue_reference : public false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_rvalue_reference<_Tp&&> : public true_type {}; - -template struct _LIBCPP_TEMPLATE_VIS is_reference : public false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_reference<_Tp&> : public true_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_reference<_Tp&&> : public true_type {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_reference_v = is_reference<_Tp>::value; - -template -inline constexpr bool is_lvalue_reference_v = is_lvalue_reference<_Tp>::value; - -template -inline constexpr bool is_rvalue_reference_v = is_rvalue_reference<_Tp>::value; -#endif - -#endif // __has_keyword(__is_lvalue_reference) && etc... - // is_union #if __has_feature(is_union) || defined(_LIBCPP_COMPILER_GCC) @@ -891,61 +640,6 @@ template inline constexpr bool is_class_v = is_class<_Tp>::value; #endif -// is_function - -template struct _LIBCPP_TEMPLATE_VIS is_function - : public _BoolConstant< -#ifdef __clang__ - __is_function(_Tp) -#else - !(is_reference<_Tp>::value || is_const::value) -#endif - > {}; - - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_function_v = is_function<_Tp>::value; -#endif - -template struct __libcpp_is_member_pointer { - enum { - __is_member = false, - __is_func = false, - __is_obj = false - }; -}; -template struct __libcpp_is_member_pointer<_Tp _Up::*> { - enum { - __is_member = true, - __is_func = is_function<_Tp>::value, - __is_obj = !__is_func, - }; -}; - -#if __has_keyword(__is_member_function_pointer) - -template -struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer - : _BoolConstant<__is_member_function_pointer(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_member_function_pointer_v = __is_member_function_pointer(_Tp); -#endif - -#else // __has_keyword(__is_member_function_pointer) - -template struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer - : public _BoolConstant< __libcpp_is_member_pointer::type>::__is_func > {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_member_function_pointer_v = is_member_function_pointer<_Tp>::value; -#endif - -#endif // __has_keyword(__is_member_function_pointer) - // is_member_pointer #if __has_keyword(__is_member_pointer) @@ -970,31 +664,6 @@ inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value; #endif // __has_keyword(__is_member_pointer) -// is_member_object_pointer - -#if __has_keyword(__is_member_object_pointer) - -template -struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer - : _BoolConstant<__is_member_object_pointer(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_member_object_pointer_v = __is_member_object_pointer(_Tp); -#endif - -#else // __has_keyword(__is_member_object_pointer) - -template struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer - : public _BoolConstant< __libcpp_is_member_pointer::type>::__is_obj > {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_member_object_pointer_v = is_member_object_pointer<_Tp>::value; -#endif - -#endif // __has_keyword(__is_member_object_pointer) - // is_enum #if __has_feature(is_enum) || defined(_LIBCPP_COMPILER_GCC) @@ -1159,18 +828,6 @@ inline constexpr bool is_compound_v = is_compound<_Tp>::value; #endif // __has_keyword(__is_compound) -// __is_referenceable [defns.referenceable] - -struct __is_referenceable_impl { - template static _Tp& __test(int); - template static __two __test(...); -}; - -template -struct __is_referenceable : integral_constant(0)), __two>::value> {}; - - // add_const template struct _LIBCPP_TEMPLATE_VIS add_const { @@ -1200,16 +857,6 @@ template struct _LIBCPP_TEMPLATE_VIS add_cv { template using add_cv_t = typename add_cv<_Tp>::type; #endif -// remove_reference - -template struct _LIBCPP_TEMPLATE_VIS remove_reference {typedef _LIBCPP_NODEBUG _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_reference<_Tp&> {typedef _LIBCPP_NODEBUG _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_reference<_Tp&&> {typedef _LIBCPP_NODEBUG _Tp type;}; - -#if _LIBCPP_STD_VER > 11 -template using remove_reference_t = typename remove_reference<_Tp>::type; -#endif - // add_lvalue_reference template ::value> struct __add_lvalue_reference_impl { typedef _LIBCPP_NODEBUG _Tp type; }; @@ -1275,21 +922,6 @@ template using remove_pointer_t = typename remove_pointer<_Tp>::type // add_pointer -template ::value || - _IsSame::type, void>::value> -struct __add_pointer_impl - {typedef _LIBCPP_NODEBUG typename remove_reference<_Tp>::type* type;}; -template struct __add_pointer_impl<_Tp, false> - {typedef _LIBCPP_NODEBUG _Tp type;}; - -template struct _LIBCPP_TEMPLATE_VIS add_pointer - {typedef _LIBCPP_NODEBUG typename __add_pointer_impl<_Tp>::type type;}; - -#if _LIBCPP_STD_VER > 11 -template using add_pointer_t = typename add_pointer<_Tp>::type; -#endif - // type_identity template @@ -1422,19 +1054,6 @@ inline constexpr size_t extent_v = extent<_Tp, _Ip>::value; #endif // __has_keyword(__array_extent) -// remove_extent - -template struct _LIBCPP_TEMPLATE_VIS remove_extent - {typedef _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_extent<_Tp[]> - {typedef _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_extent<_Tp[_Np]> - {typedef _Tp type;}; - -#if _LIBCPP_STD_VER > 11 -template using remove_extent_t = typename remove_extent<_Tp>::type; -#endif - // remove_all_extents template struct _LIBCPP_TEMPLATE_VIS remove_all_extents @@ -1468,42 +1087,6 @@ inline constexpr bool is_unbounded_array_v = is_unbounded_array<_Tp>::value; #endif -// decay - -template -struct __decay { - typedef _LIBCPP_NODEBUG typename remove_cv<_Up>::type type; -}; - -template -struct __decay<_Up, true> { -public: - typedef _LIBCPP_NODEBUG typename conditional - < - is_array<_Up>::value, - typename remove_extent<_Up>::type*, - typename conditional - < - is_function<_Up>::value, - typename add_pointer<_Up>::type, - typename remove_cv<_Up>::type - >::type - >::type type; -}; - -template -struct _LIBCPP_TEMPLATE_VIS decay -{ -private: - typedef _LIBCPP_NODEBUG typename remove_reference<_Tp>::type _Up; -public: - typedef _LIBCPP_NODEBUG typename __decay<_Up, __is_referenceable<_Up>::value>::type type; -}; - -#if _LIBCPP_STD_VER > 11 -template using decay_t = typename decay<_Tp>::type; -#endif - // is_abstract template struct _LIBCPP_TEMPLATE_VIS is_abstract @@ -1540,17 +1123,6 @@ inline constexpr bool is_aggregate_v = is_aggregate<_Tp>::value; #endif // _LIBCPP_STD_VER > 14 -// is_base_of - -template -struct _LIBCPP_TEMPLATE_VIS is_base_of - : public integral_constant {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_base_of_v = is_base_of<_Bp, _Dp>::value; -#endif - // __is_core_convertible // [conv.general]/3 says "E is convertible to T" whenever "T t=E;" is well-formed. @@ -1566,87 +1138,6 @@ struct __is_core_convertible<_Tp, _Up, decltype( static_cast(0) ( static_cast<_Tp(*)()>(0)() ) )> : public true_type {}; -// is_convertible - -#if __has_feature(is_convertible_to) && !defined(_LIBCPP_USE_IS_CONVERTIBLE_FALLBACK) - -template struct _LIBCPP_TEMPLATE_VIS is_convertible - : public integral_constant {}; - -#else // __has_feature(is_convertible_to) - -namespace __is_convertible_imp -{ -template void __test_convert(_Tp); - -template -struct __is_convertible_test : public false_type {}; - -template -struct __is_convertible_test<_From, _To, - decltype(__is_convertible_imp::__test_convert<_To>(declval<_From>()))> : public true_type -{}; - -template ::value, - bool _IsFunction = is_function<_Tp>::value, - bool _IsVoid = is_void<_Tp>::value> - struct __is_array_function_or_void {enum {value = 0};}; -template struct __is_array_function_or_void<_Tp, true, false, false> {enum {value = 1};}; -template struct __is_array_function_or_void<_Tp, false, true, false> {enum {value = 2};}; -template struct __is_array_function_or_void<_Tp, false, false, true> {enum {value = 3};}; -} - -template ::type>::value> -struct __is_convertible_check -{ - static const size_t __v = 0; -}; - -template -struct __is_convertible_check<_Tp, 0> -{ - static const size_t __v = sizeof(_Tp); -}; - -template ::value, - unsigned _T2_is_array_function_or_void = __is_convertible_imp::__is_array_function_or_void<_T2>::value> -struct __is_convertible - : public integral_constant::value - > -{}; - -template struct __is_convertible<_T1, _T2, 0, 1> : public false_type {}; -template struct __is_convertible<_T1, _T2, 1, 1> : public false_type {}; -template struct __is_convertible<_T1, _T2, 2, 1> : public false_type {}; -template struct __is_convertible<_T1, _T2, 3, 1> : public false_type {}; - -template struct __is_convertible<_T1, _T2, 0, 2> : public false_type {}; -template struct __is_convertible<_T1, _T2, 1, 2> : public false_type {}; -template struct __is_convertible<_T1, _T2, 2, 2> : public false_type {}; -template struct __is_convertible<_T1, _T2, 3, 2> : public false_type {}; - -template struct __is_convertible<_T1, _T2, 0, 3> : public false_type {}; -template struct __is_convertible<_T1, _T2, 1, 3> : public false_type {}; -template struct __is_convertible<_T1, _T2, 2, 3> : public false_type {}; -template struct __is_convertible<_T1, _T2, 3, 3> : public true_type {}; - -template struct _LIBCPP_TEMPLATE_VIS is_convertible - : public __is_convertible<_T1, _T2> -{ - static const size_t __complete_check1 = __is_convertible_check<_T1>::__v; - static const size_t __complete_check2 = __is_convertible_check<_T2>::__v; -}; - -#endif // __has_feature(is_convertible_to) - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_convertible_v = is_convertible<_From, _To>::value; -#endif - // is_nothrow_convertible #if _LIBCPP_STD_VER > 17 @@ -3349,10 +2840,6 @@ template inline constexpr bool is_trivial_v = is_trivial<_Tp>::value; #endif -template struct __is_reference_wrapper_impl : public false_type {}; -template struct __is_reference_wrapper_impl > : public true_type {}; -template struct __is_reference_wrapper - : public __is_reference_wrapper_impl::type> {}; #ifndef _LIBCPP_CXX03_LANG diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index 057aaa4e11ad96..d85a7e99fa81f9 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -441,8 +441,33 @@ END-SCRIPT #include <__thread/poll_with_backoff.h> // expected-error@*:* {{use of private header from outside its module: '__thread/poll_with_backoff.h'}} #include <__thread/timed_backoff_policy.h> // expected-error@*:* {{use of private header from outside its module: '__thread/timed_backoff_policy.h'}} #include <__tuple> // expected-error@*:* {{use of private header from outside its module: '__tuple'}} +#include <__type_traits/add_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/add_pointer.h'}} +#include <__type_traits/conditional.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/conditional.h'}} +#include <__type_traits/decay.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/decay.h'}} +#include <__type_traits/enable_if.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/enable_if.h'}} #include <__type_traits/integral_constant.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/integral_constant.h'}} +#include <__type_traits/is_array.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_array.h'}} +#include <__type_traits/is_base_of.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_base_of.h'}} #include <__type_traits/is_callable.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_callable.h'}} +#include <__type_traits/is_const.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_const.h'}} +#include <__type_traits/is_convertible.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_convertible.h'}} +#include <__type_traits/is_floating_point.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_floating_point.h'}} +#include <__type_traits/is_function.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_function.h'}} +#include <__type_traits/is_integral.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_integral.h'}} +#include <__type_traits/is_member_function_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_member_function_pointer.h'}} +#include <__type_traits/is_member_object_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_member_object_pointer.h'}} +#include <__type_traits/is_null_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_null_pointer.h'}} +#include <__type_traits/is_reference.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_reference.h'}} +#include <__type_traits/is_reference_wrapper.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_reference_wrapper.h'}} +#include <__type_traits/is_referenceable.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_referenceable.h'}} +#include <__type_traits/is_same.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_same.h'}} +#include <__type_traits/is_void.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_void.h'}} +#include <__type_traits/is_volatile.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_volatile.h'}} +#include <__type_traits/remove_const.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_const.h'}} +#include <__type_traits/remove_cv.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_cv.h'}} +#include <__type_traits/remove_extent.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_extent.h'}} +#include <__type_traits/remove_reference.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_reference.h'}} +#include <__type_traits/remove_volatile.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_volatile.h'}} #include <__utility/as_const.h> // expected-error@*:* {{use of private header from outside its module: '__utility/as_const.h'}} #include <__utility/auto_cast.h> // expected-error@*:* {{use of private header from outside its module: '__utility/auto_cast.h'}} #include <__utility/cmp.h> // expected-error@*:* {{use of private header from outside its module: '__utility/cmp.h'}} From b177a90ce7b590dfce6479142f46fd1b9554a3b3 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 12 May 2022 15:46:18 +0200 Subject: [PATCH 265/908] [libc++] Always enable the ranges concepts The ranges concepts were already available in libc++13, so we shouldn't guard them with `_LIBCPP_HAS_NO_INCOMPLETE_RANGES`. Fixes https://github.com/llvm/llvm-project/issues/54765 Reviewed By: #libc, ldionne Spies: ldionne, libcxx-commits Differential Revision: https://reviews.llvm.org/D124011 --- libcxx/include/__ranges/concepts.h | 4 ---- libcxx/include/__ranges/data.h | 4 ++-- libcxx/include/__ranges/size.h | 4 ++-- .../libcxx/ranges/has-no-incomplete-ranges.compile.pass.cpp | 3 +-- .../move.sentinel/concept_conformance.compile.pass.cpp | 1 - libcxx/test/std/ranges/range.access/data.pass.cpp | 1 - libcxx/test/std/ranges/range.access/size.pass.cpp | 1 - .../range.req/range.refinements/common_range.compile.pass.cpp | 1 - .../range.req/range.refinements/subsumption.compile.pass.cpp | 1 - 9 files changed, 5 insertions(+), 15 deletions(-) diff --git a/libcxx/include/__ranges/concepts.h b/libcxx/include/__ranges/concepts.h index 241f90c0a1d17c..87df1d18baf8a3 100644 --- a/libcxx/include/__ranges/concepts.h +++ b/libcxx/include/__ranges/concepts.h @@ -68,8 +68,6 @@ namespace ranges { template using range_rvalue_reference_t = iter_rvalue_reference_t>; -#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) - // [range.sized] template concept sized_range = range<_Tp> && requires(_Tp& __t) { ranges::size(__t); }; @@ -135,8 +133,6 @@ namespace ranges { (is_lvalue_reference_v<_Tp> || (movable> && !__is_std_initializer_list>)))); -#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) - } // namespace ranges #endif // _LIBCPP_STD_VER > 17 diff --git a/libcxx/include/__ranges/data.h b/libcxx/include/__ranges/data.h index 2f24d89b9c4573..35d1af1e648ff2 100644 --- a/libcxx/include/__ranges/data.h +++ b/libcxx/include/__ranges/data.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if _LIBCPP_STD_VER > 17 // [range.prim.data] @@ -99,7 +99,7 @@ inline namespace __cpo { } // namespace __cpo } // namespace ranges -#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#endif // _LIBCPP_STD_VER > 17 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h index 514c554ef4f0b6..b8326683139681 100644 --- a/libcxx/include/__ranges/size.h +++ b/libcxx/include/__ranges/size.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if _LIBCPP_STD_VER > 17 namespace ranges { template @@ -128,7 +128,7 @@ inline namespace __cpo { } // namespace __cpo } // namespace ranges -#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#endif // _LIBCPP_STD_VER > 17 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/libcxx/ranges/has-no-incomplete-ranges.compile.pass.cpp b/libcxx/test/libcxx/ranges/has-no-incomplete-ranges.compile.pass.cpp index 0d151073ca4810..3dd6b20cc37332 100644 --- a/libcxx/test/libcxx/ranges/has-no-incomplete-ranges.compile.pass.cpp +++ b/libcxx/test/libcxx/ranges/has-no-incomplete-ranges.compile.pass.cpp @@ -25,7 +25,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { - int output_range; int data; int size; int prev; @@ -37,5 +36,5 @@ namespace ranges { int filter_view; int join_view; int views; // this entire namespace should be absent -} +} // namespace ranges _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/concept_conformance.compile.pass.cpp index ce584244b34e4e..88e0f77ea39fab 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/concept_conformance.compile.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-incomplete-ranges // diff --git a/libcxx/test/std/ranges/range.access/data.pass.cpp b/libcxx/test/std/ranges/range.access/data.pass.cpp index be9c9eec86b7fe..357e52a1462751 100644 --- a/libcxx/test/std/ranges/range.access/data.pass.cpp +++ b/libcxx/test/std/ranges/range.access/data.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-incomplete-ranges // std::ranges::data diff --git a/libcxx/test/std/ranges/range.access/size.pass.cpp b/libcxx/test/std/ranges/range.access/size.pass.cpp index 42706d9d006fe5..05681733f37d92 100644 --- a/libcxx/test/std/ranges/range.access/size.pass.cpp +++ b/libcxx/test/std/ranges/range.access/size.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-incomplete-ranges // std::ranges::size diff --git a/libcxx/test/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp b/libcxx/test/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp index 0d29c834339b67..a546cfb6ccd7cc 100644 --- a/libcxx/test/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-incomplete-ranges // template // concept common_range; diff --git a/libcxx/test/std/ranges/range.req/range.refinements/subsumption.compile.pass.cpp b/libcxx/test/std/ranges/range.req/range.refinements/subsumption.compile.pass.cpp index 244492c65500fe..23d447fb34b623 100644 --- a/libcxx/test/std/ranges/range.req/range.refinements/subsumption.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.req/range.refinements/subsumption.compile.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-incomplete-ranges // template // concept input_iterator; From 570390580567ccc940c141e9ee4e16fd31ace176 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 20 May 2022 17:11:58 +0200 Subject: [PATCH 266/908] [libc++] Assume that push_macro and pop_macro are available All compilers that libc++ supports support `push_macro` and `pop_macro`. So let's remove it. Reviewed By: ldionne, #libc Spies: libcxx-commits, mgorny Differential Revision: https://reviews.llvm.org/D126073 --- libcxx/CMakeLists.txt | 3 --- libcxx/include/__config | 31 ++----------------------------- libcxx/include/__undef_macros | 21 ++------------------- 3 files changed, 4 insertions(+), 51 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 7dd32b13e3bece..a182a76dda782c 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -634,9 +634,6 @@ function(cxx_add_warning_flags target) if (LIBCXX_ENABLE_PEDANTIC) target_add_compile_flags_if_supported(${target} PRIVATE -pedantic) endif() - if (LIBCXX_DISABLE_MACRO_CONFLICT_WARNINGS) - target_compile_definitions(${target} PRIVATE -D_LIBCPP_DISABLE_MACRO_CONFLICT_WARNINGS) - endif() endfunction() # Exception flags ============================================================= diff --git a/libcxx/include/__config b/libcxx/include/__config index b6dd32c5c320e8..62b367167637f9 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1235,35 +1235,8 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container( #define _LIBCPP_HAS_NO_CXX20_COROUTINES #endif -#if defined(_LIBCPP_COMPILER_IBM) -#define _LIBCPP_HAS_NO_PRAGMA_PUSH_POP_MACRO -#endif - -#if defined(_LIBCPP_HAS_NO_PRAGMA_PUSH_POP_MACRO) -# define _LIBCPP_PUSH_MACROS -# define _LIBCPP_POP_MACROS -#else - // Don't warn about macro conflicts when we can restore them at the - // end of the header. -# ifndef _LIBCPP_DISABLE_MACRO_CONFLICT_WARNINGS -# define _LIBCPP_DISABLE_MACRO_CONFLICT_WARNINGS -# endif -# if defined(_LIBCPP_COMPILER_MSVC) -# define _LIBCPP_PUSH_MACROS \ - __pragma(push_macro("min")) \ - __pragma(push_macro("max")) -# define _LIBCPP_POP_MACROS \ - __pragma(pop_macro("min")) \ - __pragma(pop_macro("max")) -# else -# define _LIBCPP_PUSH_MACROS \ - _Pragma("push_macro(\"min\")") \ - _Pragma("push_macro(\"max\")") -# define _LIBCPP_POP_MACROS \ - _Pragma("pop_macro(\"min\")") \ - _Pragma("pop_macro(\"max\")") -# endif -#endif // defined(_LIBCPP_HAS_NO_PRAGMA_PUSH_POP_MACRO) +# define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") +# define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") #ifndef _LIBCPP_NO_AUTO_LINK # if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY) diff --git a/libcxx/include/__undef_macros b/libcxx/include/__undef_macros index 40b2b64d0a6fa4..3bacbbacb502a0 100644 --- a/libcxx/include/__undef_macros +++ b/libcxx/include/__undef_macros @@ -7,27 +7,10 @@ // //===----------------------------------------------------------------------===// - #ifdef min -#if !defined(_LIBCPP_DISABLE_MACRO_CONFLICT_WARNINGS) -#if defined(_LIBCPP_WARNING) -_LIBCPP_WARNING("macro min is incompatible with C++. Try #define NOMINMAX " - "before any Windows header. #undefing min") -#else -#warning: macro min is incompatible with C++. #undefing min -#endif -#endif -#undef min +# undef min #endif #ifdef max -#if !defined(_LIBCPP_DISABLE_MACRO_CONFLICT_WARNINGS) -#if defined(_LIBCPP_WARNING) -_LIBCPP_WARNING("macro max is incompatible with C++. Try #define NOMINMAX " - "before any Windows header. #undefing max") -#else -#warning: macro max is incompatible with C++. #undefing max -#endif -#endif -#undef max +# undef max #endif From 19e21d91bc174559a5aa0653a5bfe74d8cba93ad Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sun, 22 May 2022 13:34:22 +0200 Subject: [PATCH 267/908] [libc++] Add auto to the list of required extensions in C++03 We use `auto` in C++03, so we shouldn't say that we aren't. Reviewed By: ldionne, #libc Spies: libcxx-commits Differential Revision: https://reviews.llvm.org/D126165 --- libcxx/docs/DesignDocs/ExtendedCXX03Support.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/docs/DesignDocs/ExtendedCXX03Support.rst b/libcxx/docs/DesignDocs/ExtendedCXX03Support.rst index e9e3fc4d23032f..213cf2dab01e15 100644 --- a/libcxx/docs/DesignDocs/ExtendedCXX03Support.rst +++ b/libcxx/docs/DesignDocs/ExtendedCXX03Support.rst @@ -27,12 +27,12 @@ libc++ expects Clang to provide are: * Alias templates * defaulted and deleted Functions. * reference qualified Functions +* ``auto`` There are also features that Clang *does not* provide as an extension in C++03 mode. These include: * ``constexpr`` and ``noexcept`` -* ``auto`` * Trailing return types. * ``>>`` without a space. From 40f7fca3d91fa4a480a7f8477a207a6a1a4a2496 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sun, 22 May 2022 13:43:37 +0200 Subject: [PATCH 268/908] [libc++] Add ranges::max_element to the synopsis and ADL-proof the __min_element_impl calls Reviewed By: ldionne, #libc Spies: sstefan1, libcxx-commits Differential Revision: https://reviews.llvm.org/D126167 --- libcxx/include/__algorithm/ranges_max_element.h | 4 ++-- libcxx/include/algorithm | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/libcxx/include/__algorithm/ranges_max_element.h b/libcxx/include/__algorithm/ranges_max_element.h index 2ec464a52d8b6b..d8d7242e176bb7 100644 --- a/libcxx/include/__algorithm/ranges_max_element.h +++ b/libcxx/include/__algorithm/ranges_max_element.h @@ -36,7 +36,7 @@ struct __fn { _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); }; - return __min_element_impl(__first, __last, __comp_lhs_rhs_swapped, __proj); + return ranges::__min_element_impl(__first, __last, __comp_lhs_rhs_swapped, __proj); } template operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); }; - return __min_element_impl(ranges::begin(__r), ranges::end(__r), __comp_lhs_rhs_swapped, __proj); + return ranges::__min_element_impl(ranges::begin(__r), ranges::end(__r), __comp_lhs_rhs_swapped, __proj); } }; } // namespace __max_element diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 1a75dc0dacb22b..a95fa769f024a5 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -45,6 +45,15 @@ namespace ranges { indirect_strict_weak_order, Proj>> Comp = ranges::less> // since C++20 constexpr borrowed_iterator_t min_element(R&& r, Comp comp = {}, Proj proj = {}); + template S, class Proj = identity, + indirect_strict_weak_order> Comp = ranges::less> + constexpr I ranges::max_element(I first, S last, Comp comp = {}, Proj proj = {}); // since C++20 + + template, Proj>> Comp = ranges::less> + constexpr borrowed_iterator_t ranges::max_element(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 + + template S1, input_iterator I2, sentinel_for<_I2> S2, class Pred = ranges::equal_to, class Proj1 = identity, class Proj2 = identity> requires indirectly_comparable From 7ffc99bedcd23943d758c0b33e7ac87cf7974d1f Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 23 May 2022 18:52:16 +0000 Subject: [PATCH 269/908] [gn build] Port eebc1fb772c5 --- .../gn/secondary/libcxx/include/BUILD.gn | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 44c9b052bea067..7c07e1cb44c17d 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -487,8 +487,33 @@ if (current_toolchain == default_toolchain) { "__threading_support", "__tree", "__tuple", + "__type_traits/add_pointer.h", + "__type_traits/conditional.h", + "__type_traits/decay.h", + "__type_traits/enable_if.h", "__type_traits/integral_constant.h", + "__type_traits/is_array.h", + "__type_traits/is_base_of.h", "__type_traits/is_callable.h", + "__type_traits/is_const.h", + "__type_traits/is_convertible.h", + "__type_traits/is_floating_point.h", + "__type_traits/is_function.h", + "__type_traits/is_integral.h", + "__type_traits/is_member_function_pointer.h", + "__type_traits/is_member_object_pointer.h", + "__type_traits/is_null_pointer.h", + "__type_traits/is_reference.h", + "__type_traits/is_reference_wrapper.h", + "__type_traits/is_referenceable.h", + "__type_traits/is_same.h", + "__type_traits/is_void.h", + "__type_traits/is_volatile.h", + "__type_traits/remove_const.h", + "__type_traits/remove_cv.h", + "__type_traits/remove_extent.h", + "__type_traits/remove_reference.h", + "__type_traits/remove_volatile.h", "__undef_macros", "__utility/as_const.h", "__utility/auto_cast.h", From 1962389979be0b755969311bd2630b4a1c15ac2c Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Mon, 23 May 2022 11:32:38 -0700 Subject: [PATCH 270/908] [Sanitizer][Darwin] Add explanation for Apple platform macros Differential Revision: https://reviews.llvm.org/D126229 --- compiler-rt/lib/sanitizer_common/sanitizer_platform.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index edf32843189c3c..66141dc0911454 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -55,6 +55,13 @@ # define SANITIZER_SOLARIS 0 #endif +// - SANITIZER_APPLE: all Apple code +// - TARGET_OS_OSX: macOS +// - SANITIZER_IOS: devices (iOS and iOS-like) +// - SANITIZER_WATCHOS +// - SANITIZER_TVOS +// - SANITIZER_IOSSIM: simulators (iOS and iOS-like) +// - SANITIZER_DRIVERKIT #if defined(__APPLE__) # define SANITIZER_APPLE 1 // SANITIZER_MAC will be deprecated/removed in the future @@ -93,11 +100,11 @@ #else # define SANITIZER_APPLE 0 # define SANITIZER_MAC SANITIZER_APPLE +# define SANITIZER_OSX 0 # define SANITIZER_IOS 0 # define SANITIZER_WATCHOS 0 # define SANITIZER_TVOS 0 # define SANITIZER_IOSSIM 0 -# define SANITIZER_OSX 0 # define SANITIZER_DRIVERKIT 0 #endif From 319a722f6fca365c8f71f457eac60bc3909988ee Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 23 May 2022 08:09:55 -0700 Subject: [PATCH 271/908] [SLP][NFC]Improve compile time, NFC. Builds UserIgnore list only once as a SmallDenseSet without rebuilding it between the runs, iterate over gathers instead list of reduction ops, do some checks in the buildTree_rec only if the corresponding containers are not empty. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 113 ++++++++++++------ 1 file changed, 78 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4df7aaca736235..beb54dc7822caa 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -895,7 +895,10 @@ class BoUpSLP { /// Construct a vectorizable tree that starts at \p Roots, ignoring users for /// the purpose of scheduling and extraction in the \p UserIgnoreLst. void buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst = None); + const SmallDenseSet &UserIgnoreLst); + + /// Construct a vectorizable tree that starts at \p Roots. + void buildTree(ArrayRef Roots); /// Builds external uses of the vectorized scalars, i.e. the list of /// vectorized scalars to be extracted, their lanes and their scalar users. \p @@ -916,6 +919,7 @@ class BoUpSLP { } MinBWs.clear(); InstrElementSize.clear(); + UserIgnoreList = nullptr; } unsigned getTreeSize() const { return VectorizableTree.size(); } @@ -2073,8 +2077,8 @@ class BoUpSLP { AnalyzedReductionVals.clear(); } /// Checks if the given value is gathered in one of the nodes. - bool isGathered(Value *V) const { - return MustGather.contains(V); + bool isAnyGathered(const SmallDenseSet &Vals) const { + return any_of(MustGather, [&](Value *V) { return Vals.contains(V); }); } ~BoUpSLP(); @@ -3192,7 +3196,7 @@ class BoUpSLP { void scheduleBlock(BlockScheduling *BS); /// List of users to ignore during scheduling and that don't need extracting. - SmallPtrSet UserIgnoreList; + const SmallDenseSet *UserIgnoreList = nullptr; /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of /// sorted SmallVectors of unsigned. @@ -4119,7 +4123,7 @@ void BoUpSLP::buildExternalUses( } // Ignore users in the user ignore list. - if (UserIgnoreList.contains(UserInst)) + if (UserIgnoreList && UserIgnoreList->contains(UserInst)) continue; LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " @@ -4276,10 +4280,16 @@ BoUpSLP::findExternalStoreUsersReorderIndices(TreeEntry *TE) const { } void BoUpSLP::buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst) { + const SmallDenseSet &UserIgnoreLst) { + deleteTree(); + UserIgnoreList = &UserIgnoreLst; + if (!allSameType(Roots)) + return; + buildTree_rec(Roots, 0, EdgeInfo()); +} + +void BoUpSLP::buildTree(ArrayRef Roots) { deleteTree(); - UserIgnoreList.clear(); - UserIgnoreList.insert(UserIgnoreLst.begin(), UserIgnoreLst.end()); if (!allSameType(Roots)) return; buildTree_rec(Roots, 0, EdgeInfo()); @@ -4595,12 +4605,14 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // the same block. // Don't vectorize ephemeral values. - for (Value *V : VL) { - if (EphValues.count(V)) { - LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V - << ") is ephemeral.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); - return; + if (!EphValues.empty()) { + for (Value *V : VL) { + if (EphValues.count(V)) { + LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V + << ") is ephemeral.\n"); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + return; + } } } @@ -4638,13 +4650,15 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } // The reduction nodes (stored in UserIgnoreList) also should stay scalar. - for (Value *V : VL) { - if (UserIgnoreList.contains(V)) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); - if (TryToFindDuplicates(S)) - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndicies); - return; + if (UserIgnoreList && !UserIgnoreList->empty()) { + for (Value *V : VL) { + if (UserIgnoreList && UserIgnoreList->contains(V)) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); + if (TryToFindDuplicates(S)) + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + return; + } } } @@ -8570,7 +8584,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); // It is legal to delete users in the ignorelist. - assert((getTreeEntry(U) || UserIgnoreList.contains(U) || + assert((getTreeEntry(U) || + (UserIgnoreList && UserIgnoreList->contains(U)) || (isa_and_nonnull(U) && isDeleted(cast(U)))) && "Deleting out-of-tree value"); @@ -10689,6 +10704,8 @@ class HorizontalReduction { /// Attempt to vectorize the tree found by matchAssociativeReduction. Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) { constexpr int ReductionLimit = 4; + constexpr unsigned RegMaxNumber = 4; + constexpr unsigned RedValsMaxNumber = 128; // If there are a sufficient number of reduction values, reduce // to a nearby power-of-2. We can safely generate oversized // vectors and rely on the backend to split them to legal sizes. @@ -10726,12 +10743,12 @@ class HorizontalReduction { // The reduction root is used as the insertion point for new instructions, // so set it as externally used to prevent it from being deleted. ExternallyUsedValues[ReductionRoot]; - SmallVector IgnoreList; + SmallDenseSet IgnoreList; for (ReductionOpsType &RdxOps : ReductionOps) for (Value *RdxOp : RdxOps) { if (!RdxOp) continue; - IgnoreList.push_back(RdxOp); + IgnoreList.insert(RdxOp); } bool IsCmpSelMinMax = isCmpSelMinMax(cast(ReductionRoot)); @@ -10793,7 +10810,12 @@ class HorizontalReduction { if (NumReducedVals < ReductionLimit) continue; - unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); + unsigned MaxVecRegSize = V.getMaxVecRegSize(); + unsigned EltSize = V.getVectorElementSize(Candidates[0]); + unsigned MaxElts = RegMaxNumber * PowerOf2Floor(MaxVecRegSize / EltSize); + + unsigned ReduxWidth = std::min( + PowerOf2Floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts)); unsigned Start = 0; unsigned Pos = Start; // Restarts vectorization attempt with lower vector factor. @@ -10803,10 +10825,7 @@ class HorizontalReduction { &CheckForReusedReductionOpsLocal, &PrevReduxWidth, &V, &IgnoreList](bool IgnoreVL = false) { - bool IsAnyRedOpGathered = - !IgnoreVL && any_of(IgnoreList, [&V](Value *RedOp) { - return V.isGathered(RedOp); - }); + bool IsAnyRedOpGathered = !IgnoreVL && V.isAnyGathered(IgnoreList); if (!CheckForReusedReductionOpsLocal && PrevReduxWidth == ReduxWidth) { // Check if any of the reduction ops are gathered. If so, worth // trying again with less number of reduction ops. @@ -10871,13 +10890,37 @@ class HorizontalReduction { LocalExternallyUsedValues[TrackedVals[V]]; }); } - for (unsigned Cnt = 0; Cnt < NumReducedVals; ++Cnt) { - if (Cnt >= Pos && Cnt < Pos + ReduxWidth) + // Number of uses of the candidates in the vector of values. + SmallDenseMap NumUses; + for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) { + Value *V = Candidates[Cnt]; + if (NumUses.count(V) > 0) + continue; + NumUses[V] = std::count(VL.begin(), VL.end(), V); + } + for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) { + Value *V = Candidates[Cnt]; + if (NumUses.count(V) > 0) + continue; + NumUses[V] = std::count(VL.begin(), VL.end(), V); + } + // Gather externally used values. + SmallPtrSet Visited; + for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) { + Value *V = Candidates[Cnt]; + if (!Visited.insert(V).second) + continue; + unsigned NumOps = VectorizedVals.lookup(V) + NumUses[V]; + if (NumOps != ReducedValsToOps.find(V)->second.size()) + LocalExternallyUsedValues[V]; + } + for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) { + Value *V = Candidates[Cnt]; + if (!Visited.insert(V).second) continue; - unsigned NumOps = VectorizedVals.lookup(Candidates[Cnt]) + - std::count(VL.begin(), VL.end(), Candidates[Cnt]); - if (NumOps != ReducedValsToOps.find(Candidates[Cnt])->second.size()) - LocalExternallyUsedValues[Candidates[Cnt]]; + unsigned NumOps = VectorizedVals.lookup(V) + NumUses[V]; + if (NumOps != ReducedValsToOps.find(V)->second.size()) + LocalExternallyUsedValues[V]; } V.buildExternalUses(LocalExternallyUsedValues); From 84acdd32ca912bee94dfc78b718f223880c59334 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Mon, 23 May 2022 19:07:10 +0100 Subject: [PATCH 272/908] [SVEInstrFormats] Ensure scatter instructions are named consistently. --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 6 +++--- llvm/lib/Target/AArch64/AArch64SchedA64FX.td | 8 ++++---- llvm/lib/Target/AArch64/SVEInstrFormats.td | 12 ++++++------ 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 186026b853012d..eee713ec3857c1 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1123,9 +1123,9 @@ let Predicates = [HasSVE] in { // Scatters using scaled 64-bit offsets, e.g. // st1h z0.d, p0, [x0, z0.d, lsl #1] - defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>; - defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>; - defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>; + defm SST1H_D : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>; + defm SST1W_D : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>; + defm SST1D : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>; } // End HasSVE let Predicates = [HasSVEorStreamingSVE] in { diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index fa10d056b7f7e3..c40672e4a5f7d9 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -3554,7 +3554,7 @@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCW_ZPiI)>; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B, ST1B_D, ST1B_H, ST1B_S)>; // [421] "st1b $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D_REAL, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>; +def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>; // [422] "st1b $Zt, $Pg, [$Rn, $imm4, mul vl]"; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B_D_IMM, ST1B_H_IMM, ST1B_IMM, ST1B_S_IMM)>; @@ -3566,7 +3566,7 @@ def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1B_D_IMM, SST1B_S_IMM)>; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D)>; // [425] "st1d $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D_REAL, SST1D_SCALED_SCALED_REAL, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>; +def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D, SST1D_SCALED, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>; // [426] "st1d $Zt, $Pg, [$Rn, $imm4, mul vl]"; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D_IMM)>; @@ -3578,7 +3578,7 @@ def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1D_IMM)>; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H, ST1H_D, ST1H_S)>; // [429] "st1h $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D_REAL, SST1H_D_SCALED_SCALED_REAL, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>; +def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D, SST1H_D_SCALED, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>; // [430] "st1h $Zt, $Pg, [$Rn, $imm4, mul vl]"; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H_D_IMM, ST1H_IMM, ST1H_S_IMM)>; @@ -3590,7 +3590,7 @@ def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1H_D_IMM, SST1H_S_IMM)>; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W, ST1W_D)>; // [433] "st1w $Zt, $Pg, [$Rn, $Zm]"; -def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D_REAL, SST1W_D_SCALED_SCALED_REAL, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>; +def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D, SST1W_D_SCALED, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>; // [434] "st1w $Zt, $Pg, [$Rn, $imm4, mul vl]"; def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W_D_IMM, ST1W_IMM)>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index e9e97c71505f86..48af5a7b90654b 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -6028,25 +6028,25 @@ multiclass sve_mem_sst_sv_64_scaled msz, string asm, SDPatternOperator op, RegisterOperand zprext, ValueType vt> { - def _SCALED_REAL : sve_mem_sst_sv2; + def _SCALED : sve_mem_sst_sv2; def : InstAlias(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; + (!cast(NAME # _SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt), - (!cast(NAME # _SCALED_REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>; + (!cast(NAME # _SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>; } multiclass sve_mem_sst_sv_64_unscaled msz, string asm, SDPatternOperator op, ValueType vt> { - def _REAL : sve_mem_sst_sv2; + def NAME : sve_mem_sst_sv2; def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; + (!cast(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt), - (!cast(NAME # _REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; + (!cast(NAME) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; } class sve_mem_sst_vi opc, string asm, ZPRRegOp zprty, From 86617256864ebcbda03b6ce843deeb6a41a85800 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 23 May 2022 20:27:42 +0100 Subject: [PATCH 273/908] [AArch64] Add tests with free shuffles for indexed fma variants. The new tests contain examples where shuffles are free, because indexed fma instructions can be used. --- .../AArch64/sink-free-instructions.ll | 183 ++++++++++++++++++ 1 file changed, 183 insertions(+) diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll index 94164c08a4b382..244d2c35bbacbc 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll @@ -494,3 +494,186 @@ if.else: %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %s3, <8 x i8> %s4) ret <8 x i16> %vmull1 } + +declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) + +define <8 x half> @sink_shufflevector_fma_v8f16(i1 %c, <8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: @sink_shufflevector_fma_v8f16( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; CHECK-NEXT: [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; CHECK-NEXT: [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; CHECK-NEXT: [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]]) +; CHECK-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]]) +; CHECK-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]]) +; CHECK-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]]) +; CHECK-NEXT: ret <8 x half> [[R_3]] +; CHECK: if.else: +; CHECK-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]]) +; CHECK-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]]) +; CHECK-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]]) +; CHECK-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]]) +; CHECK-NEXT: ret <8 x half> [[R_7]] +; +entry: + %s0 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer + %s1 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> + %s2 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> + %s3 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> + %s4 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> + %s5 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> + %s6 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> + %s7 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> + br i1 %c, label %if.then, label %if.else + +if.then: + %r.0 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %s0, <8 x half> %b) + %r.1 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.0, <8 x half> %s1, <8 x half> %b) + %r.2 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.1, <8 x half> %s2, <8 x half> %b) + %r.3 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.2, <8 x half> %s3, <8 x half> %b) + ret <8 x half> %r.3 + +if.else: + %r.4 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %s4, <8 x half> %b) + %r.5 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.4, <8 x half> %s5, <8 x half> %b) + %r.6 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.5, <8 x half> %s6, <8 x half> %b) + %r.7 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.6, <8 x half> %s7, <8 x half> %b) + ret <8 x half> %r.7 +} + +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @sink_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @sink_shufflevector_fma_v4f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S0]], <4 x float> [[B]]) +; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_0]], <4 x float> [[S1]], <4 x float> [[B]]) +; CHECK-NEXT: ret <4 x float> [[R_1]] +; CHECK: if.else: +; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[S2]], <4 x float> [[B]]) +; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_2]], <4 x float> [[S3]], <4 x float> [[B]]) +; CHECK-NEXT: ret <4 x float> [[R_3]] +; +entry: + %s0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> zeroinitializer + %s1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + %s2 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + %s3 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + br i1 %c, label %if.then, label %if.else + +if.then: + %r.0 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %s0, <4 x float> %b) + %r.1 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %r.0, <4 x float> %s1, <4 x float> %b) + ret <4 x float> %r.1 + +if.else: + %r.2 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %s2, <4 x float> %b) + %r.3 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %r.2, <4 x float> %s3, <4 x float> %b) + ret <4 x float> %r.3 +} + +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @sink_shufflevector_fma_v2f64(i1 %c, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @sink_shufflevector_fma_v2f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[R_0:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> [[S0]], <2 x double> [[B]]) +; CHECK-NEXT: ret <2 x double> [[R_0]] +; CHECK: if.else: +; CHECK-NEXT: [[R_1:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[S1]], <2 x double> [[B]]) +; CHECK-NEXT: ret <2 x double> [[R_1]] +; +entry: + %s0 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> zeroinitializer + %s1 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> + br i1 %c, label %if.then, label %if.else + +if.then: + %r.0 = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %s0, <2 x double> %b) + ret <2 x double> %r.0 + +if.else: + %r.1 = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %s1, <2 x double> %b) + ret <2 x double> %r.1 +} + +define <4 x float> @do_not_sink_out_of_range_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @do_not_sink_out_of_range_shufflevector_fma_v4f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[R:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S4]], <4 x float> [[B]]) +; CHECK-NEXT: ret <4 x float> [[R]] +; CHECK: if.else: +; CHECK-NEXT: ret <4 x float> zeroinitializer +; +entry: + %s4 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + br i1 %c, label %if.then, label %if.else + +if.then: + %r = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %s4, <4 x float> %b) + ret <4 x float> %r + +if.else: + ret <4 x float> zeroinitializer +} + +declare <5 x float> @llvm.fma.v5f32(<5 x float>, <5 x float>, <5 x float>) + +define <5 x float> @do_not_sink_shufflevector_fma_v5f32(i1 %c, <8 x float> %a, <5 x float> %b) { +; CHECK-LABEL: @do_not_sink_shufflevector_fma_v5f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> zeroinitializer +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> +; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[R_0:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B:%.*]], <5 x float> [[S0]], <5 x float> [[B]]) +; CHECK-NEXT: [[R_1:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_0]], <5 x float> [[S1]], <5 x float> [[B]]) +; CHECK-NEXT: ret <5 x float> [[R_1]] +; CHECK: if.else: +; CHECK-NEXT: [[R_2:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B]], <5 x float> [[S2]], <5 x float> [[B]]) +; CHECK-NEXT: [[R_3:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_2]], <5 x float> [[S3]], <5 x float> [[B]]) +; CHECK-NEXT: [[R_4:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_3]], <5 x float> [[S4]], <5 x float> [[B]]) +; CHECK-NEXT: ret <5 x float> [[R_4]] +; +entry: + %s0 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> zeroinitializer + %s1 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> + %s2 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> + %s3 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> + %s4 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> + br i1 %c, label %if.then, label %if.else + +if.then: + %r.0 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %b, <5 x float> %s0, <5 x float> %b) + %r.1 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %r.0, <5 x float> %s1, <5 x float> %b) + ret <5 x float> %r.1 + +if.else: + %r.2 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %b, <5 x float> %s2, <5 x float> %b) + %r.3 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %r.2, <5 x float> %s3, <5 x float> %b) + %r.4 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %r.3, <5 x float> %s4, <5 x float> %b) + ret <5 x float> %r.4 +} From e7afa23366f5f94be968f84fb9ef14fe7156135b Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Mon, 23 May 2022 12:38:02 -0700 Subject: [PATCH 274/908] [mlir] Use 'native' instead of 'llvm_has_native_target' in the mlir tests The tests actually require the target triple to match the host, rather than just having the host in the list of available targets. This change removes `llvm_has_native_target` and instead uses the `native` feature from the lit configuration. Reviewed By: stellaraccident Differential Revision: https://reviews.llvm.org/D126011 --- mlir/test/CAPI/execution_engine.c | 4 ++-- mlir/test/lit.cfg.py | 6 ------ mlir/test/mlir-cpu-runner/lit.local.cfg | 4 ++-- mlir/test/python/execution_engine.py | 2 +- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c index dc9ef4b6841cb0..7420af463b855d 100644 --- a/mlir/test/CAPI/execution_engine.c +++ b/mlir/test/CAPI/execution_engine.c @@ -9,8 +9,8 @@ /* RUN: mlir-capi-execution-engine-test 2>&1 | FileCheck %s */ -/* REQUIRES: llvm_has_native_target -*/ +/* REQUIRES: native + */ #include "mlir-c/Conversion.h" #include "mlir-c/ExecutionEngine.h" diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index 3a2e2815e04e08..1d009afe997296 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -110,12 +110,6 @@ # it can be explicitly opted-in by prefixing the variable name with $ config.environment['FILECHECK_OPTS'] = "-enable-var-scope --allow-unused-prefixes=false" - -if config.native_target in config.targets_to_build: - config.available_features.add('llvm_has_native_target') - - - # Add the python path for both the source and binary tree. # Note that presently, the python sources come from the source tree and the # binaries come from the build tree. This should be unified to the build tree diff --git a/mlir/test/mlir-cpu-runner/lit.local.cfg b/mlir/test/mlir-cpu-runner/lit.local.cfg index 7dc3525ae0aef3..5951990786b682 100644 --- a/mlir/test/mlir-cpu-runner/lit.local.cfg +++ b/mlir/test/mlir-cpu-runner/lit.local.cfg @@ -8,7 +8,7 @@ if sys.platform == 'win32': if 'msan' in config.available_features: config.unsupported = True -# Requires the native target to be configured in. -if 'llvm_has_native_target' not in config.available_features: +# Requires native execution. +if 'native' not in config.available_features: config.unsupported = True diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py index 1093846d3e7b15..5eb0dffdc84560 100644 --- a/mlir/test/python/execution_engine.py +++ b/mlir/test/python/execution_engine.py @@ -1,5 +1,5 @@ # RUN: %PYTHON %s 2>&1 | FileCheck %s -# REQUIRES: llvm_has_native_target +# REQUIRES: native import gc, sys from mlir.ir import * from mlir.passmanager import * From 6cb3fdc278b95204baf60587fe2c25ff65f4e612 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 23 May 2022 15:36:35 -0400 Subject: [PATCH 275/908] [libc++] Remove duplicate tests for callable concepts This is essentially a revert of c7ad02009. Indeed, it seems that both 96dbdd75 and c7ad02009 were committed, but c7ad02009 seems to be only an older version of 96dbdd75's tests. --- .../equivalence_relation.pass.cpp | 60 ----------------- ...ence_relation.subsumption.compile.pass.cpp | 12 ++-- .../equivalence_relation.subsumption.pass.cpp | 60 ----------------- .../concept.predicate/predicate.pass.cpp | 65 ------------------- .../predicate.subsumption.pass.cpp | 32 --------- .../concept.relation/relation.pass.cpp | 58 ----------------- .../relation.subsumption.pass.cpp | 40 ------------ .../strict_weak_order.pass.cpp | 59 ----------------- ...ct_weak_order.subsumption.compile.pass.cpp | 15 ++--- .../strict_weak_order.subsumption.pass.cpp | 39 ----------- 10 files changed, 9 insertions(+), 431 deletions(-) delete mode 100644 libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.pass.cpp delete mode 100644 libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.subsumption.pass.cpp delete mode 100644 libcxx/test/std/concepts/concepts.callable/concept.predicate/predicate.pass.cpp delete mode 100644 libcxx/test/std/concepts/concepts.callable/concept.predicate/predicate.subsumption.pass.cpp delete mode 100644 libcxx/test/std/concepts/concepts.callable/concept.relation/relation.pass.cpp delete mode 100644 libcxx/test/std/concepts/concepts.callable/concept.relation/relation.subsumption.pass.cpp delete mode 100644 libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.pass.cpp delete mode 100644 libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.subsumption.pass.cpp diff --git a/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.pass.cpp deleted file mode 100644 index fd370c3277786f..00000000000000 --- a/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.pass.cpp +++ /dev/null @@ -1,60 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// template -// concept equivalence_relation; - -#include - -static_assert(std::equivalence_relation); -static_assert(std::equivalence_relation); -static_assert(std::equivalence_relation); - -static_assert(!std::equivalence_relation); -static_assert(!std::equivalence_relation); -static_assert(!std::equivalence_relation); - -static_assert( - !std::equivalence_relation); -static_assert(!std::equivalence_relation); - -struct S1 {}; -static_assert(std::relation); -static_assert(std::relation); - -struct S2 {}; - -struct P1 { - bool operator()(S1, S1) const; -}; -static_assert(std::equivalence_relation); - -struct P2 { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; -}; -static_assert(!std::equivalence_relation); - -struct P3 { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; - bool operator()(S2, S1) const; -}; -static_assert(!std::equivalence_relation); - -struct P4 { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; - bool operator()(S2, S1) const; - bool operator()(S2, S2) const; -}; -static_assert(std::equivalence_relation); - -int main(int, char**) { return 0; } diff --git a/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.subsumption.compile.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.subsumption.compile.pass.cpp index 69609bf6f6652a..8d09663bfdfc3f 100644 --- a/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.subsumption.compile.pass.cpp +++ b/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.subsumption.compile.pass.cpp @@ -37,10 +37,8 @@ constexpr bool check_equivalence_relation_subsumes_relation() { } // clang-format on -static_assert(check_equivalence_relation_subsumes_relation()); -static_assert(check_equivalence_relation_subsumes_relation()); +static_assert(check_equivalence_relation_subsumes_relation()); +static_assert(check_equivalence_relation_subsumes_relation()); static_assert(check_equivalence_relation_subsumes_relation()); static_assert(check_equivalence_relation_subsumes_relation()); @@ -58,10 +56,8 @@ constexpr bool check_relation_subsumes_equivalence_relation() { } // clang-format on -static_assert(check_relation_subsumes_equivalence_relation()); -static_assert(check_relation_subsumes_equivalence_relation()); +static_assert(check_relation_subsumes_equivalence_relation()); +static_assert(check_relation_subsumes_equivalence_relation()); static_assert(check_relation_subsumes_equivalence_relation()); static_assert(check_relation_subsumes_equivalence_relation()); diff --git a/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.subsumption.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.subsumption.pass.cpp deleted file mode 100644 index cc0337ed54f87b..00000000000000 --- a/libcxx/test/std/concepts/concepts.callable/concept.equiv/equivalence_relation.subsumption.pass.cpp +++ /dev/null @@ -1,60 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// template -// concept equivalence_relation; - -#include - -// clang-format off -template -requires std::relation -constexpr bool check_subsumption() { return false; } - -template -requires std::equivalence_relation && true -constexpr bool check_subsumption() { return false; } - -template -requires std::equivalence_relation && true -constexpr bool check_subsumption() { return true; } -// clang-format on - -static_assert(check_subsumption()); -static_assert(check_subsumption()); - -struct S1 {}; -struct S2 {}; - -struct R { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; - bool operator()(S2, S1) const; - bool operator()(S2, S2) const; -}; -static_assert(check_subsumption()); -static_assert(check_subsumption()); - -// clang-format off -template -requires std::relation && true -constexpr bool check_reverse_subsumption() { return true; } - -template -requires std::equivalence_relation -constexpr bool check_no_subsumption() { return false; } -// clang-format on - -static_assert(check_reverse_subsumption()); -static_assert(check_reverse_subsumption()); -static_assert(check_reverse_subsumption()); -static_assert(check_reverse_subsumption()); - -int main(int, char**) { return 0; } diff --git a/libcxx/test/std/concepts/concepts.callable/concept.predicate/predicate.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.predicate/predicate.pass.cpp deleted file mode 100644 index ce3ac8bcbc563f..00000000000000 --- a/libcxx/test/std/concepts/concepts.callable/concept.predicate/predicate.pass.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// template -// concept predicate; - -#include -#include - -static_assert(std::predicate); -static_assert(std::predicate); -static_assert(std::predicate); - -static_assert(!std::predicate); -static_assert(!std::predicate); -static_assert(!std::predicate); - -struct S {}; - -static_assert(!std::predicate); -static_assert(!std::predicate); -static_assert(std::predicate); -static_assert(std::predicate); -static_assert(std::predicate); -static_assert(!std::predicate); -static_assert(!std::predicate); - -static_assert(!std::predicate); -static_assert(!std::predicate); -static_assert(!std::predicate); -static_assert(std::predicate); - -struct Predicate { - bool operator()(int, double, char); -}; -static_assert(std::predicate); -static_assert(std::predicate); -static_assert(!std::predicate); -static_assert(!std::predicate); - -constexpr bool check_lambda(auto) { return false; } - -constexpr bool check_lambda(std::predicate auto) { return true; } - -static_assert(check_lambda([] { return std::true_type(); })); -static_assert(check_lambda([]() -> int* { return nullptr; })); - -struct boolean { - operator bool() const noexcept; -}; -static_assert(check_lambda([] { return boolean(); })); - -struct explicit_bool { - explicit operator bool() const noexcept; -}; -static_assert(!check_lambda([] { return explicit_bool(); })); - -int main(int, char**) { return 0; } diff --git a/libcxx/test/std/concepts/concepts.callable/concept.predicate/predicate.subsumption.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.predicate/predicate.subsumption.pass.cpp deleted file mode 100644 index f3806a8cf20ef2..00000000000000 --- a/libcxx/test/std/concepts/concepts.callable/concept.predicate/predicate.subsumption.pass.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// template -// concept predicate; - -#include - -constexpr bool check_subsumption(std::regular_invocable auto) { - return false; -} - -// clang-format off -template -requires std::predicate && true -constexpr bool check_subsumption(F) -{ - return true; -} -// clang-format on - -static_assert(!check_subsumption([] {})); -static_assert(check_subsumption([] { return true; })); - -int main(int, char**) { return 0; } diff --git a/libcxx/test/std/concepts/concepts.callable/concept.relation/relation.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.relation/relation.pass.cpp deleted file mode 100644 index 74252863373d94..00000000000000 --- a/libcxx/test/std/concepts/concepts.callable/concept.relation/relation.pass.cpp +++ /dev/null @@ -1,58 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// template -// concept relation; - -#include - -static_assert(std::relation); -static_assert(std::relation); -static_assert(std::relation); - -static_assert(!std::relation); -static_assert(!std::relation); -static_assert(!std::relation); -static_assert(!std::relation); -static_assert(!std::relation); - -struct S1 {}; -static_assert(std::relation); -static_assert(std::relation); - -struct S2 {}; - -struct P1 { - bool operator()(S1, S1) const; -}; -static_assert(std::relation); - -struct P2 { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; -}; -static_assert(!std::relation); - -struct P3 { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; - bool operator()(S2, S1) const; -}; -static_assert(!std::relation); - -struct P4 { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; - bool operator()(S2, S1) const; - bool operator()(S2, S2) const; -}; -static_assert(std::relation); - -int main(int, char**) { return 0; } diff --git a/libcxx/test/std/concepts/concepts.callable/concept.relation/relation.subsumption.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.relation/relation.subsumption.pass.cpp deleted file mode 100644 index f8a9e4abf843e6..00000000000000 --- a/libcxx/test/std/concepts/concepts.callable/concept.relation/relation.subsumption.pass.cpp +++ /dev/null @@ -1,40 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// template -// concept relation; - -#include - -// clang-format off -template -requires std::predicate && std::predicate && - std::predicate && std::predicate -constexpr bool check_subsumption() { return false; } - -template -requires std::relation && true -constexpr bool check_subsumption() { return true; } -// clang-format on - -static_assert(check_subsumption()); - -struct S1 {}; -struct S2 {}; - -struct R { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; - bool operator()(S2, S1) const; - bool operator()(S2, S2) const; -}; -static_assert(check_subsumption()); - -int main(int, char**) { return 0; } diff --git a/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.pass.cpp deleted file mode 100644 index d4d3c9f8fc18b8..00000000000000 --- a/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.pass.cpp +++ /dev/null @@ -1,59 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// template -// concept strict_weak_order; - -#include - -static_assert(std::strict_weak_order); -static_assert(std::strict_weak_order); -static_assert(std::strict_weak_order); - -static_assert(!std::strict_weak_order); -static_assert(!std::strict_weak_order); -static_assert(!std::strict_weak_order); - -static_assert(!std::strict_weak_order); -static_assert(!std::strict_weak_order); - -struct S1 {}; -static_assert(std::strict_weak_order); -static_assert(std::strict_weak_order); - -struct S2 {}; - -struct P1 { - bool operator()(S1, S1) const; -}; -static_assert(std::strict_weak_order); - -struct P2 { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; -}; -static_assert(!std::strict_weak_order); - -struct P3 { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; - bool operator()(S2, S1) const; -}; -static_assert(!std::strict_weak_order); - -struct P4 { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; - bool operator()(S2, S1) const; - bool operator()(S2, S2) const; -}; -static_assert(std::strict_weak_order); - -int main(int, char**) { return 0; } diff --git a/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.subsumption.compile.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.subsumption.compile.pass.cpp index f94981686387ca..c03f31445951dd 100644 --- a/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.subsumption.compile.pass.cpp +++ b/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.subsumption.compile.pass.cpp @@ -37,10 +37,8 @@ constexpr bool check_strict_weak_order_subsumes_relation() { } // clang-format on -static_assert( - check_strict_weak_order_subsumes_relation()); -static_assert(check_strict_weak_order_subsumes_relation()); +static_assert(check_strict_weak_order_subsumes_relation()); +static_assert(check_strict_weak_order_subsumes_relation()); static_assert(check_strict_weak_order_subsumes_relation()); static_assert(check_strict_weak_order_subsumes_relation()); @@ -58,10 +56,8 @@ constexpr bool check_relation_subsumes_strict_weak_order() { } // clang-format on -static_assert( - check_relation_subsumes_strict_weak_order()); -static_assert(check_relation_subsumes_strict_weak_order()); +static_assert(check_relation_subsumes_strict_weak_order()); +static_assert(check_relation_subsumes_strict_weak_order()); static_assert(check_relation_subsumes_strict_weak_order()); static_assert(check_relation_subsumes_strict_weak_order()); @@ -79,6 +75,5 @@ constexpr bool check_strict_weak_order_subsumes_itself() { } // clang-format on -static_assert( - check_strict_weak_order_subsumes_itself()); +static_assert(check_strict_weak_order_subsumes_itself()); static_assert(check_strict_weak_order_subsumes_itself()); diff --git a/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.subsumption.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.subsumption.pass.cpp deleted file mode 100644 index 68b92d62b6a9cd..00000000000000 --- a/libcxx/test/std/concepts/concepts.callable/concept.strictweakorder/strict_weak_order.subsumption.pass.cpp +++ /dev/null @@ -1,39 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// template -// concept strict_weak_order; - -#include - -// clang-format off -template -requires std::relation -constexpr bool check_subsumption() { return false; } - -template -requires std::strict_weak_order && true -constexpr bool check_subsumption() { return true; } -// clang-format on - -static_assert(check_subsumption()); - -struct S1 {}; -struct S2 {}; - -struct R { - bool operator()(S1, S1) const; - bool operator()(S1, S2) const; - bool operator()(S2, S1) const; - bool operator()(S2, S2) const; -}; -static_assert(check_subsumption()); - -int main(int, char**) { return 0; } From 217531f12b4b97dadb80c66ab97c71e57ae0adcf Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Mon, 23 May 2022 12:49:20 -0700 Subject: [PATCH 276/908] [PS5] Make driver's PIC behavior match PS4 The new test is a copy of the corresponding PS4 test, with the triple etc updated, because there's currently no good way to make one lit test "iterate" with multiple targets. --- .../clang/Basic/DiagnosticDriverKinds.td | 4 +- clang/lib/Driver/ToolChains/CommonArgs.cpp | 13 ++- clang/test/Driver/ps5-pic.c | 106 ++++++++++++++++++ 3 files changed, 115 insertions(+), 8 deletions(-) create mode 100644 clang/test/Driver/ps5-pic.c diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 7961b006a9a00a..3cc9565e6e8b27 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -546,8 +546,8 @@ def warn_drv_unable_to_find_directory_expected : Warning< "unable to find %0 directory, expected to be in '%1'">, InGroup, DefaultIgnore; -def warn_drv_ps4_force_pic : Warning< - "option '%0' was ignored by the PS4 toolchain, using '-fPIC'">, +def warn_drv_ps_force_pic : Warning< + "option '%0' was ignored by the %1 toolchain, using '-fPIC'">, InGroup; def warn_drv_ps_sdk_dir : Warning< diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 33f3df80a2a2e9..45add8ad94a5f2 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1286,23 +1286,24 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC); } else { PIE = PIC = false; - if (EffectiveTriple.isPS4()) { + if (EffectiveTriple.isPS()) { Arg *ModelArg = Args.getLastArg(options::OPT_mcmodel_EQ); StringRef Model = ModelArg ? ModelArg->getValue() : ""; if (Model != "kernel") { PIC = true; - ToolChain.getDriver().Diag(diag::warn_drv_ps4_force_pic) - << LastPICArg->getSpelling(); + ToolChain.getDriver().Diag(diag::warn_drv_ps_force_pic) + << LastPICArg->getSpelling() + << (EffectiveTriple.isPS4() ? "PS4" : "PS5"); } } } } } - // Introduce a Darwin and PS4-specific hack. If the default is PIC, but the - // PIC level would've been set to level 1, force it back to level 2 PIC + // Introduce a Darwin and PS4/PS5-specific hack. If the default is PIC, but + // the PIC level would've been set to level 1, force it back to level 2 PIC // instead. - if (PIC && (Triple.isOSDarwin() || EffectiveTriple.isPS4())) + if (PIC && (Triple.isOSDarwin() || EffectiveTriple.isPS())) IsPICLevelTwo |= ToolChain.isPICDefault(); // This kernel flags are a trump-card: they will disable PIC/PIE diff --git a/clang/test/Driver/ps5-pic.c b/clang/test/Driver/ps5-pic.c new file mode 100644 index 00000000000000..0396122accf40d --- /dev/null +++ b/clang/test/Driver/ps5-pic.c @@ -0,0 +1,106 @@ +// REQUIRES: x86-registered-target + +// Test the driver's control over the PIC behavior for PS5 compiler. +// These consist of tests of the relocation model flags and the +// pic level flags passed to CC1. +// +// CHECK-NO-PIC: "-mrelocation-model" "static" +// CHECK-NO-PIC-NOT: "-pic-level" +// CHECK-NO-PIC-NOT: "-pic-is-pie" +// +// CHECK-DYNAMIC-NO-PIC2: unsupported option '-mdynamic-no-pic' +// CHECK-DYNAMIC-NO-PIC2: "-mrelocation-model" "dynamic-no-pic" +// +// CHECK-PIC2: "-mrelocation-model" "pic" +// CHECK-PIC2: "-pic-level" "2" +// +// CHECK-PIE2: "-mrelocation-model" "pic" +// CHECK-PIE2: "-pic-is-pie" +// +// CHECK-NOPIC-IGNORED: using '-fPIC' +// CHECK-NOPIC-IGNORED: "-mrelocation-model" "pic" +// CHECK-NOPIC-IGNORED: "-pic-level" "2" +// +// CHECK-DIAG-PIC: option '-fno-PIC' was ignored by the PS5 toolchain, using '-fPIC' +// CHECK-DIAG-PIE: option '-fno-PIE' was ignored by the PS5 toolchain, using '-fPIC' +// CHECK-DIAG-pic: option '-fno-pic' was ignored by the PS5 toolchain, using '-fPIC' +// CHECK-DIAG-pie: option '-fno-pie' was ignored by the PS5 toolchain, using '-fPIC' +// +// CHECK-STATIC-ERR: unsupported option '-static' for target 'PS5' + +// RUN: %clang -c %s -target x86_64-sie-ps5 -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIC2 +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpic -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIC2 +// RUN: %clang -c %s -target x86_64-sie-ps5 -fPIC -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIC2 +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpie -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIE2 +// RUN: %clang -c %s -target x86_64-sie-ps5 -fPIE -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIE2 +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpic -fno-pic -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fPIC -fno-PIC -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpic -fno-PIC -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fPIC -fno-pic -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpie -fno-pie -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fPIE -fno-PIE -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpie -fno-PIE -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fPIE -fno-pie -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpie -fno-pic -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpic -fno-pie -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpic -fPIC -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIC2 +// RUN: %clang -c %s -target x86_64-sie-ps5 -fPIC -fpic -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIC2 +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpic -fPIE -fpie -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIE2 +// RUN: %clang -c %s -target x86_64-sie-ps5 -fpie -fPIC -fPIE -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIE2 +// +// Defaults change for PS5. +// RUN: %clang -c %s -target x86_64-sie-ps5 -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIC2 +// RUN: %clang -c %s -target x86_64-sie-ps5 -fno-pic -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// RUN: %clang -c %s -target x86_64-sie-ps5 -fno-PIC -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED +// +// Disregard any of the PIC-specific flags if we have a trump-card flag. +// RUN: %clang -c %s -target x86_64-sie-ps5 -mkernel -fPIC -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NO-PIC +// RUN: %clang -c %s -target x86_64-sie-ps5 -mdynamic-no-pic -fPIC -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-DYNAMIC-NO-PIC2 +// +// -static not supported at all. +// RUN: %clang -c %s -target x86_64-sie-ps5 -static -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-STATIC-ERR +// +// -fno-PIC etc. is obeyed if -mcmodel=kernel is also present. +// RUN: %clang -c %s -target x86_64-sie-ps5 -mcmodel=kernel -fno-PIC -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NO-PIC +// RUN: %clang -c %s -target x86_64-sie-ps5 -mcmodel=kernel -fno-PIE -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NO-PIC +// RUN: %clang -c %s -target x86_64-sie-ps5 -mcmodel=kernel -fno-pic -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NO-PIC +// RUN: %clang -c %s -target x86_64-sie-ps5 -mcmodel=kernel -fno-pie -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NO-PIC +// +// Verify that we reflect the option the user specified, when we ignore it. +// RUN: %clang -c %s -target x86_64-sie-ps5 -fno-PIC -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-DIAG-PIC +// RUN: %clang -c %s -target x86_64-sie-ps5 -fno-PIE -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-DIAG-PIE +// RUN: %clang -c %s -target x86_64-sie-ps5 -fno-pic -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-DIAG-pic +// RUN: %clang -c %s -target x86_64-sie-ps5 -fno-pie -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-DIAG-pie From 12bae5f3e2d2f1a95f33cb22149d5d5642389eae Mon Sep 17 00:00:00 2001 From: Qunyan Mangus Date: Mon, 23 May 2022 12:48:06 -0700 Subject: [PATCH 277/908] Remove duplicate fields in RAGreedy RAGreedy has two fields of RegisterClassInfo, one called RCI and another RegClassInfo from its base class. RCI is initialized without freezeReservedRegs first, while RegClassInfo does. Therefore, if reserved registers information is changed between last time freezeReservedRegs is called and RAGreedy, it's not picked up by RCI. Instead of having both fields in RAGreedy, remove RCI and use RegClassInfo instead. Also removed is the TRI field which is present in its base class. Reviewed By: MatzeB Differential Revision: https://reviews.llvm.org/D125926 --- llvm/lib/CodeGen/RegAllocGreedy.cpp | 12 ++++++------ llvm/lib/CodeGen/RegAllocGreedy.h | 2 -- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 0bcd2c567a50d0..d2e26197e94a4d 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -310,8 +310,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) { // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); const TargetRegisterClass &RC = *MRI->getRegClass(Reg); - bool ForceGlobal = !ReverseLocal && - (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC)); + bool ForceGlobal = + !ReverseLocal && (Size / SlotIndex::InstrDist) > + (2 * RegClassInfo.getNumAllocatableRegs(&RC)); unsigned GlobalBit = 0; if (Stage == RS_Assign && !ForceGlobal && !LI->empty() && @@ -1444,7 +1445,8 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg, const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC, *MF); - unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); + unsigned SuperRCNumAllocatableRegs = + RegClassInfo.getNumAllocatableRegs(SuperRC); // Split around every non-copy instruction if this split will relax // the constraints on the virtual register. // Otherwise, splitting just inserts uncoalescable copies that do not help @@ -1454,7 +1456,7 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg, if (MI->isFullCopy() || SuperRCNumAllocatableRegs == getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, - TII, TRI, RCI)) { + TII, TRI, RegClassInfo)) { LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); continue; } @@ -2681,9 +2683,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; - TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); - RCI.runOnMachineFunction(mf); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index ad810c25ec6e92..1e30e2e2308d7b 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -164,8 +164,6 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass, // Shortcuts to some useful interface. const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - RegisterClassInfo RCI; // analyses SlotIndexes *Indexes; From 97c3ef5c8a289ca54ca0c61c75fd00adab92b7c0 Mon Sep 17 00:00:00 2001 From: Sotiris Apostolakis Date: Mon, 23 May 2022 10:47:32 -0400 Subject: [PATCH 278/908] [SelectOpti][2/5] Select-to-branch base transformation This patch implements the actual transformation of selects to branches. It includes only the base transformation without any sinking. Depends on D120230 Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D122259 --- llvm/lib/CodeGen/SelectOptimize.cpp | 234 ++++++++++++++++++++++- llvm/test/CodeGen/X86/select-optimize.ll | 71 +++++++ 2 files changed, 301 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/X86/select-optimize.ll diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index a0260128b36b41..337d825b1dcd54 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -10,16 +10,40 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "select-optimize" + +STATISTIC(NumSelectsConverted, "Number of selects converted"); + namespace { class SelectOptimize : public FunctionPass { + const TargetMachine *TM = nullptr; + const TargetSubtargetInfo *TSI; + const TargetLowering *TLI = nullptr; + const LoopInfo *LI; + std::unique_ptr BFI; + std::unique_ptr BPI; + public: static char ID; SelectOptimize() : FunctionPass(ID) { @@ -28,16 +52,218 @@ class SelectOptimize : public FunctionPass { bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override {} + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + +private: + // Select groups consist of consecutive select instructions with the same + // condition. + using SelectGroup = SmallVector; + using SelectGroups = SmallVector; + + bool optimizeSelects(Function &F); + void convertProfitableSIGroups(SelectGroups &ProfSIGroups); + void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); + bool isSelectKindSupported(SelectInst *SI); }; } // namespace char SelectOptimize::ID = 0; -INITIALIZE_PASS(SelectOptimize, "select-optimize", "Optimize selects", false, - false) + +INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } bool SelectOptimize::runOnFunction(Function &F) { - llvm_unreachable("Unimplemented"); + TM = &getAnalysis().getTM(); + TSI = TM->getSubtargetImpl(F); + TLI = TSI->getTargetLowering(); + LI = &getAnalysis().getLoopInfo(); + BPI.reset(new BranchProbabilityInfo(F, *LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); + + return optimizeSelects(F); +} + +bool SelectOptimize::optimizeSelects(Function &F) { + // Collect all the select groups. + SelectGroups SIGroups; + for (BasicBlock &BB : F) { + collectSelectGroups(BB, SIGroups); + } + + // Determine for which select groups it is profitable converting to branches. + SelectGroups ProfSIGroups; + // For now assume that all select groups can be profitably converted to + // branches. + for (SelectGroup &ASI : SIGroups) { + ProfSIGroups.push_back(ASI); + } + + // Convert to branches the select groups that were deemed + // profitable-to-convert. + convertProfitableSIGroups(ProfSIGroups); + + // Code modified if at least one select group was converted. + return !ProfSIGroups.empty(); +} + +/// If \p isTrue is true, return the true value of \p SI, otherwise return +/// false value of \p SI. If the true/false value of \p SI is defined by any +/// select instructions in \p Selects, look through the defining select +/// instruction until the true/false value is not defined in \p Selects. +static Value * +getTrueOrFalseValue(SelectInst *SI, bool isTrue, + const SmallPtrSet &Selects) { + Value *V = nullptr; + for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); + DefSI = dyn_cast(V)) { + assert(DefSI->getCondition() == SI->getCondition() && + "The condition of DefSI does not match with SI"); + V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); + } + assert(V && "Failed to get select true/false value"); + return V; +} + +void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { + for (SelectGroup &ASI : ProfSIGroups) { + // TODO: eliminate the redundancy of logic transforming selects to branches + // by removing CodeGenPrepare::optimizeSelectInst and optimizing here + // selects for all cases (with and without profile information). + + // Transform a sequence like this: + // start: + // %cmp = cmp uge i32 %a, %b + // %sel = select i1 %cmp, i32 %c, i32 %d + // + // Into: + // start: + // %cmp = cmp uge i32 %a, %b + // %cmp.frozen = freeze %cmp + // br i1 %cmp.frozen, label %select.end, label %select.false + // select.false: + // br label %select.end + // select.end: + // %sel = phi i32 [ %c, %start ], [ %d, %select.false ] + // + // %cmp should be frozen, otherwise it may introduce undefined behavior. + + // We split the block containing the select(s) into two blocks. + SelectInst *SI = ASI.front(); + SelectInst *LastSI = ASI.back(); + BasicBlock *StartBlock = SI->getParent(); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); + BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); + // Delete the unconditional branch that was just created by the split. + StartBlock->getTerminator()->eraseFromParent(); + + // Move any debug/pseudo instructions that were in-between the select + // group to the newly-created end block. + SmallVector DebugPseudoINS; + auto DIt = SI->getIterator(); + while (&*DIt != LastSI) { + if (DIt->isDebugOrPseudoInst()) + DebugPseudoINS.push_back(&*DIt); + DIt++; + } + for (auto DI : DebugPseudoINS) { + DI->moveBefore(&*EndBlock->getFirstInsertionPt()); + } + + // These are the new basic blocks for the conditional branch. + // For now, no instruction sinking to the true/false blocks. + // Thus both True and False blocks will be empty. + BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; + + // Use the 'false' side for a new input value to the PHI. + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(SI->getDebugLoc()); + + // For the 'true' side the path originates from the start block from the + // point view of the new PHI. + TrueBlock = StartBlock; + + // Insert the real conditional branch based on the original condition. + BasicBlock *TT, *FT; + TT = EndBlock; + FT = FalseBlock; + IRBuilder<> IB(SI); + auto *CondFr = + IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); + IB.CreateCondBr(CondFr, TT, FT, SI); + + SmallPtrSet INS; + INS.insert(ASI.begin(), ASI.end()); + // Use reverse iterator because later select may use the value of the + // earlier select, and we need to propagate value through earlier select + // to get the PHI operand. + for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { + SelectInst *SI = *It; + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); + PN->takeName(SI); + PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); + PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); + PN->setDebugLoc(SI->getDebugLoc()); + + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + INS.erase(SI); + ++NumSelectsConverted; + } + } +} + +void SelectOptimize::collectSelectGroups(BasicBlock &BB, + SelectGroups &SIGroups) { + BasicBlock::iterator BBIt = BB.begin(); + while (BBIt != BB.end()) { + Instruction *I = &*BBIt++; + if (SelectInst *SI = dyn_cast(I)) { + SelectGroup SIGroup; + SIGroup.push_back(SI); + while (BBIt != BB.end()) { + Instruction *NI = &*BBIt; + SelectInst *NSI = dyn_cast(NI); + if (NSI && SI->getCondition() == NSI->getCondition()) { + SIGroup.push_back(NSI); + } else if (!NI->isDebugOrPseudoInst()) { + // Debug/pseudo instructions should be skipped and not prevent the + // formation of a select group. + break; + } + ++BBIt; + } + + // If the select type is not supported, no point optimizing it. + // Instruction selection will take care of it. + if (!isSelectKindSupported(SI)) + continue; + + SIGroups.push_back(SIGroup); + } + } +} + +bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); + if (VectorCond) + return false; + TargetLowering::SelectSupportKind SelectKind; + if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + return TLI->isSelectSupported(SelectKind); } diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll new file mode 100644 index 00000000000000..300fb4de312db0 --- /dev/null +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s + +; Single select converted to branch +define i32 @single_select(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @single_select( +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0 + ret i32 %sel +} + +; Select group converted to branch +define i32 @select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { +; CHECK-LABEL: @select_group( +; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[A]], [[SELECT_FALSE]] ] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0 + call void @llvm.dbg.value(metadata i32 %sel1, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !3) + %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !0 + %add = add i32 %sel1, %sel2 + ret i32 %add +} + +; Select group with intra-group dependence converted to branch +define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { +; CHECK-LABEL: @select_group_intra_group( +; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[B]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret i32 [[SUB]] +; + %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0 + %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !0 + %sub = sub i32 %sel1, %sel2 + ret i32 %sub +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.module.flags = !{!6, !7} + +!0 = !{!"branch_weights", i32 1, i32 100} +!1 = !DIFile(filename: "test.c", directory: "/test") +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !2) +!4 = !DILocalVariable(name: "x", scope: !3) +!5 = !{} +!6 = !{i32 2, !"Dwarf Version", i32 4} +!7 = !{i32 1, !"Debug Info Version", i32 3} From cead4eceb01b935fae07bf4a7e91911b344d2fec Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Mon, 23 May 2022 13:11:01 -0700 Subject: [PATCH 279/908] [symbolizer] Parse DW_TAG_variable DIs to show line info for globals Currently, llvm-symbolizer doesn't like to parse .debug_info in order to show the line info for global variables. addr2line does this. In the future, I'm looking to migrate AddressSanitizer off of internal metadata over to using debuginfo, and this is predicated on being able to get the line info for global variables. This patch adds the requisite support for getting the line info from the .debug_info section for symbolizing global variables. This only happens when you ask for a global variable to be symbolized as data. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D123538 --- llvm/include/llvm/DebugInfo/DIContext.h | 4 + .../llvm/DebugInfo/DWARF/DWARFContext.h | 2 + llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h | 7 + llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 14 + llvm/include/llvm/DebugInfo/PDB/PDBContext.h | 2 + llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 97 ++-- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 60 +++ llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 97 ++++ llvm/lib/DebugInfo/PDB/PDBContext.cpp | 7 + llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp | 4 + .../Symbolize/SymbolizableObjectFile.cpp | 8 + .../Symbolize/ELF/data-command-symtab.yaml | 3 + .../tools/llvm-symbolizer/data-location.yaml | 450 ++++++++++++++++++ llvm/test/tools/llvm-symbolizer/data.s | 3 + 14 files changed, 697 insertions(+), 61 deletions(-) create mode 100644 llvm/test/tools/llvm-symbolizer/data-location.yaml diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h index a9f98588cf2ddf..9b278b69607383 100644 --- a/llvm/include/llvm/DebugInfo/DIContext.h +++ b/llvm/include/llvm/DebugInfo/DIContext.h @@ -114,6 +114,8 @@ struct DIGlobal { std::string Name; uint64_t Start = 0; uint64_t Size = 0; + std::string DeclFile; + uint64_t DeclLine = 0; DIGlobal() : Name(DILineInfo::BadString) {} }; @@ -239,6 +241,8 @@ class DIContext { virtual DILineInfo getLineInfoForAddress( object::SectionedAddress Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; + virtual DILineInfo + getLineInfoForDataAddress(object::SectionedAddress Address) = 0; virtual DILineInfoTable getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index 9749cb9cd059af..52ae538c55bb30 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -360,6 +360,8 @@ class DWARFContext : public DIContext { DILineInfo getLineInfoForAddress( object::SectionedAddress Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; + DILineInfo + getLineInfoForDataAddress(object::SectionedAddress Address) override; DILineInfoTable getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index 4a4d105a2b23d3..149c5ef4e49392 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -280,6 +280,13 @@ class DWARFDie { /// \returns an iterator range for the attributes of the current DIE. iterator_range attributes() const; + /// Gets the type size (in bytes) for this DIE. + /// + /// \param PointerSize the pointer size of the containing CU. + /// \returns if this is a type DIE, or this DIE contains a DW_AT_type, returns + /// the size of the type. + Optional getTypeSize(uint64_t PointerSize); + class iterator; iterator begin() const; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 0341344bc7b8ba..9188865b4d771c 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -9,6 +9,7 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -27,6 +28,7 @@ #include #include #include +#include #include #include @@ -240,6 +242,11 @@ class DWARFUnit { /// std::map::upper_bound for address range lookup. std::map> AddrDieMap; + /// Map from the location (interpreted DW_AT_location) of a DW_TAG_variable, + /// to the end address and the corresponding DIE. + std::map> VariableDieMap; + DenseSet RootsParsedForVariables; + using die_iterator_range = iterator_range::iterator>; @@ -322,6 +329,9 @@ class DWARFUnit { /// Recursively update address to Die map. void updateAddressDieMap(DWARFDie Die); + /// Recursively update address to variable Die map. + void updateVariableDieMap(DWARFDie Die); + void setRangesSection(const DWARFSection *RS, uint64_t Base) { RangeSection = RS; RangeSectionBase = Base; @@ -436,6 +446,10 @@ class DWARFUnit { /// cleared. DWARFDie getSubroutineForAddress(uint64_t Address); + /// Returns variable DIE for the address provided. The pointer is alive as + /// long as parsed compile unit DIEs are not cleared. + DWARFDie getVariableForAddress(uint64_t Address); + /// getInlinedChainForAddress - fetches inlined chain for a given address. /// Returns empty chain if there is no subprogram containing address. The /// chain is valid as long as parsed compile unit DIEs are not cleared. diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h index 7b6793f0a639a0..3163c0a1dae034 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h @@ -45,6 +45,8 @@ namespace pdb { DILineInfo getLineInfoForAddress( object::SectionedAddress Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; + DILineInfo + getLineInfoForDataAddress(object::SectionedAddress Address) override; DILineInfoTable getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 6c652dd74c8040..4422141ff3c978 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1037,7 +1037,25 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { // First, get the offset of the compile unit. uint64_t CUOffset = getDebugAranges()->findAddress(Address); // Retrieve the compile unit. - return getCompileUnitForOffset(CUOffset); + if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset)) + return OffsetCU; + + // Global variables are often not found by the above search, for one of two + // reasons: + // 1. .debug_aranges may not include global variables. On clang, it seems we + // put the globals in the aranges, but this isn't true for gcc. + // 2. Even if the global variable is in a .debug_arange, global variables + // may not be captured in the [start, end) addresses described by the + // parent compile unit. + // + // So, we walk the CU's and their child DI's manually, looking for the + // specific global variable. + for (std::unique_ptr &CU : compile_units()) { + if (DWARFDie Die = CU->getVariableForAddress(Address)) { + return static_cast(CU.get()); + } + } + return nullptr; } DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { @@ -1107,64 +1125,6 @@ static bool getFunctionNameAndStartLineForAddress( return FoundResult; } -static Optional getTypeSize(DWARFDie Type, uint64_t PointerSize) { - if (auto SizeAttr = Type.find(DW_AT_byte_size)) - if (Optional Size = SizeAttr->getAsUnsignedConstant()) - return Size; - - switch (Type.getTag()) { - case DW_TAG_pointer_type: - case DW_TAG_reference_type: - case DW_TAG_rvalue_reference_type: - return PointerSize; - case DW_TAG_ptr_to_member_type: { - if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) - if (BaseType.getTag() == DW_TAG_subroutine_type) - return 2 * PointerSize; - return PointerSize; - } - case DW_TAG_const_type: - case DW_TAG_immutable_type: - case DW_TAG_volatile_type: - case DW_TAG_restrict_type: - case DW_TAG_typedef: { - if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) - return getTypeSize(BaseType, PointerSize); - break; - } - case DW_TAG_array_type: { - DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type); - if (!BaseType) - return Optional(); - Optional BaseSize = getTypeSize(BaseType, PointerSize); - if (!BaseSize) - return Optional(); - uint64_t Size = *BaseSize; - for (DWARFDie Child : Type) { - if (Child.getTag() != DW_TAG_subrange_type) - continue; - - if (auto ElemCountAttr = Child.find(DW_AT_count)) - if (Optional ElemCount = - ElemCountAttr->getAsUnsignedConstant()) - Size *= *ElemCount; - if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) - if (Optional UpperBound = - UpperBoundAttr->getAsSignedConstant()) { - int64_t LowerBound = 0; - if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) - LowerBound = LowerBoundAttr->getAsSignedConstant().getValueOr(0); - Size *= *UpperBound - LowerBound + 1; - } - } - return Size; - } - default: - break; - } - return Optional(); -} - static Optional getExpressionFrameOffset(ArrayRef Expr, Optional FrameBaseReg) { @@ -1225,7 +1185,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, if (Optional Name = dwarf::toString(*NameAttr)) Local.Name = *Name; if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type)) - Local.Size = getTypeSize(Type, getCUAddrSize()); + Local.Size = Type.getTypeSize(getCUAddrSize()); if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) { if (const auto *LT = CU->getContext().getLineTableForUnit(CU)) LT->getFileNameByIndex( @@ -1266,7 +1226,6 @@ DWARFContext::getLocalsForAddress(object::SectionedAddress Address) { DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, DILineInfoSpecifier Spec) { DILineInfo Result; - DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); if (!CU) return Result; @@ -1281,6 +1240,22 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, Spec.FLIKind, Result); } } + + return Result; +} + +DILineInfo +DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { + DILineInfo Result; + DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Result; + + if (DWARFDie Die = CU->getVariableForAddress(Address.Address)) { + Result.FileName = Die.getDeclFile(FileLineInfoKind::AbsoluteFilePath); + Result.Line = Die.getDeclLine(); + } + return Result; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 265096c7e3fba8..b375fc5ecbe079 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -1099,6 +1099,66 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); } +Optional DWARFDie::getTypeSize(uint64_t PointerSize) { + if (auto SizeAttr = find(DW_AT_byte_size)) + if (Optional Size = SizeAttr->getAsUnsignedConstant()) + return Size; + + switch (getTag()) { + case DW_TAG_pointer_type: + case DW_TAG_reference_type: + case DW_TAG_rvalue_reference_type: + return PointerSize; + case DW_TAG_ptr_to_member_type: { + if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) + if (BaseType.getTag() == DW_TAG_subroutine_type) + return 2 * PointerSize; + return PointerSize; + } + case DW_TAG_const_type: + case DW_TAG_immutable_type: + case DW_TAG_volatile_type: + case DW_TAG_restrict_type: + case DW_TAG_typedef: { + if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) + return BaseType.getTypeSize(PointerSize); + break; + } + case DW_TAG_array_type: { + DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type); + if (!BaseType) + return None; + Optional BaseSize = BaseType.getTypeSize(PointerSize); + if (!BaseSize) + return None; + uint64_t Size = *BaseSize; + for (DWARFDie Child : *this) { + if (Child.getTag() != DW_TAG_subrange_type) + continue; + + if (auto ElemCountAttr = Child.find(DW_AT_count)) + if (Optional ElemCount = + ElemCountAttr->getAsUnsignedConstant()) + Size *= *ElemCount; + if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) + if (Optional UpperBound = + UpperBoundAttr->getAsSignedConstant()) { + int64_t LowerBound = 0; + if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) + LowerBound = LowerBoundAttr->getAsSignedConstant().getValueOr(0); + Size *= *UpperBound - LowerBound + 1; + } + } + return Size; + } + default: + if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) + return BaseType.getTypeSize(PointerSize); + break; + } + return None; +} + /// Helper to dump a DIE with all of its parents, but no siblings. static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent, DIDumpOptions DumpOpts, unsigned Depth = 0) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index 3f554e3feeb6bc..3879cf1aabab1f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -18,11 +19,13 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFListTable.h" #include "llvm/DebugInfo/DWARF/DWARFObject.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Path.h" @@ -746,6 +749,100 @@ DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) { return R->second.second; } +void DWARFUnit::updateVariableDieMap(DWARFDie Die) { + for (DWARFDie Child : Die) { + if (isType(Child.getTag())) + continue; + updateVariableDieMap(Child); + } + + if (Die.getTag() != DW_TAG_variable) + return; + + Expected Locations = + Die.getLocations(DW_AT_location); + if (!Locations) { + // Missing DW_AT_location is fine here. + consumeError(Locations.takeError()); + return; + } + + uint64_t Address = UINT64_MAX; + + for (const DWARFLocationExpression &Location : *Locations) { + uint8_t AddressSize = getAddressByteSize(); + DataExtractor Data(Location.Expr, /*IsLittleEndian=*/true, AddressSize); + DWARFExpression Expr(Data, AddressSize); + auto It = Expr.begin(); + if (It == Expr.end()) + continue; + + // Match exactly the main sequence used to describe global variables: + // `DW_OP_addr[x] [+ DW_OP_plus_uconst]`. Currently, this is the sequence + // that LLVM produces for DILocalVariables and DIGlobalVariables. If, in + // future, the DWARF producer (`DwarfCompileUnit::addLocationAttribute()` is + // a good starting point) is extended to use further expressions, this code + // needs to be updated. + uint64_t LocationAddr; + if (It->getCode() == dwarf::DW_OP_addr) { + LocationAddr = It->getRawOperand(0); + } else if (It->getCode() == dwarf::DW_OP_addrx) { + uint64_t DebugAddrOffset = It->getRawOperand(0); + if (auto Pointer = getAddrOffsetSectionItem(DebugAddrOffset)) { + LocationAddr = Pointer->Address; + } + } else { + continue; + } + + // Read the optional 2nd operand, a DW_OP_plus_uconst. + if (++It != Expr.end()) { + if (It->getCode() != dwarf::DW_OP_plus_uconst) + continue; + + LocationAddr += It->getRawOperand(0); + + // Probe for a 3rd operand, if it exists, bail. + if (++It != Expr.end()) + continue; + } + + Address = LocationAddr; + break; + } + + // Get the size of the global variable. If all else fails (i.e. the global has + // no type), then we use a size of one to still allow symbolization of the + // exact address. + uint64_t GVSize = 1; + if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type)) + if (Optional Size = Die.getTypeSize(getAddressByteSize())) + GVSize = *Size; + + if (Address != UINT64_MAX) + VariableDieMap[Address] = {Address + GVSize, Die}; +} + +DWARFDie DWARFUnit::getVariableForAddress(uint64_t Address) { + extractDIEsIfNeeded(false); + + auto RootDie = getUnitDIE(); + + auto RootLookup = RootsParsedForVariables.insert(RootDie.getOffset()); + if (RootLookup.second) + updateVariableDieMap(RootDie); + + auto R = VariableDieMap.upper_bound(Address); + if (R == VariableDieMap.begin()) + return DWARFDie(); + + // upper_bound's previous item contains Address. + --R; + if (Address >= R->second.first) + return DWARFDie(); + return R->second.second; +} + void DWARFUnit::getInlinedChainForAddress(uint64_t Address, SmallVectorImpl &InlinedChain) { diff --git a/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/llvm/lib/DebugInfo/PDB/PDBContext.cpp index 0444093d7622fb..e600fb7385f133 100644 --- a/llvm/lib/DebugInfo/PDB/PDBContext.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBContext.cpp @@ -64,6 +64,13 @@ DILineInfo PDBContext::getLineInfoForAddress(object::SectionedAddress Address, return Result; } +DILineInfo +PDBContext::getLineInfoForDataAddress(object::SectionedAddress Address) { + // Unimplemented. S_GDATA and S_LDATA in CodeView (used to describe global + // variables) aren't capable of carrying line information. + return DILineInfo(); +} + DILineInfoTable PDBContext::getLineInfoForAddressRange(object::SectionedAddress Address, uint64_t Size, diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index 496c8149782e8f..877380213f215e 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -206,6 +206,10 @@ void PlainPrinterBase::print(const Request &Request, const DIGlobal &Global) { Name = DILineInfo::Addr2LineBadString; OS << Name << "\n"; OS << Global.Start << " " << Global.Size << "\n"; + if (Global.DeclFile.empty()) + OS << "??:?\n"; + else + OS << Global.DeclFile << ":" << Global.DeclLine << "\n"; printFooter(); } diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index fcff531895a2af..d8ee9264b64f41 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -327,6 +327,14 @@ DIGlobal SymbolizableObjectFile::symbolizeData( std::string FileName; getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size, FileName); + Res.DeclFile = FileName; + + // Try and get a better filename:lineno pair from the debuginfo, if present. + DILineInfo DL = DebugInfoContext->getLineInfoForDataAddress(ModuleOffset); + if (DL.Line != 0) { + Res.DeclFile = DL.FileName; + Res.DeclLine = DL.Line; + } return Res; } diff --git a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml index 984e444b2fda9f..83af3111c5dd63 100644 --- a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml +++ b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml @@ -7,12 +7,15 @@ # CHECK: func # CHECK-NEXT: 4096 1 +# CHECK-NEXT: ??:? # CHECK-EMPTY: # CHECK-NEXT: data # CHECK-NEXT: 8192 2 +# CHECK-NEXT: ??:? # CHECK-EMPTY: # CHECK-NEXT: notype # CHECK-NEXT: 8194 3 +# CHECK-NEXT: ??:? # CHECK-EMPTY: --- !ELF diff --git a/llvm/test/tools/llvm-symbolizer/data-location.yaml b/llvm/test/tools/llvm-symbolizer/data-location.yaml new file mode 100644 index 00000000000000..54f7d9be44a1ab --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/data-location.yaml @@ -0,0 +1,450 @@ +## Show that when "DATA" is used with an address, it forces the found location +## to be symbolized as data, including the source information. + +# RUN: yaml2obj %s -o %t.so + +# RUN: llvm-symbolizer 'DATA 0x304d0' 'DATA 0x304d1' 'DATA 0x304d3' \ +# RUN: 'DATA 0x304c0' 'DATA 0x304c8' 'DATA 0x304d4' 'DATA 0x304dc' \ +# RUN: 'DATA 0x304d8' --obj=%t.so | FileCheck %s + +# CHECK: bss_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:1 +# CHECK-EMPTY: + +## Check that lookups in the middle of the symbol are also resolved correctly. +# CHECK: bss_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:1 +# CHECK-EMPTY: +# CHECK: bss_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:1 +# CHECK-EMPTY: + +## Now, the remainder of the symbols. +# CHECK-NEXT: data_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:2 +# CHECK-EMPTY: +# CHECK-NEXT: str +# CHECK-NEXT: {{[0-9]+}} 8 +# CHECK-NEXT: /tmp/file.cpp:4 +# CHECK-EMPTY: +# CHECK-NEXT: f()::function_global +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:8 +# CHECK-EMPTY: + +## Including the one that includes an addend. +# CHECK-NEXT: alpha +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:12 +# CHECK-EMPTY: +# CHECK-NEXT: beta +# CHECK-NEXT: {{[0-9]+}} 4 +# CHECK-NEXT: /tmp/file.cpp:13 +# CHECK-EMPTY: + +## Ensure there's still a global that's offset-based. +# RUN: llvm-dwarfdump --debug-info %t.so | FileCheck %s --check-prefix=OFFSET + +# OFFSET: DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4) + +################################################################################ +## File below was generated using: +## +## $ clang++ -g -O3 /tmp/file.cpp -shared -fuse-ld=lld -nostdlib \ +## -target aarch64-linux-gnuabi -mllvm -global-merge-ignore-single-use \ +## -o /tmp/file.so +## +## With /tmp/file.cpp as: +## 1: int bss_global; +## 2: int data_global = 2; +## 3: +## 4: const char* str = +## 5: "12345678"; +## 6: +## 7: int* f() { +## 8: static int function_global; +## 9: return &function_global; +## 10: } +## 11: +## 12: static int alpha; +## 13: static int beta; +## 14: int *f(bool b) { return beta ? &alpha : β } +## 15: +## +## ... then, one can get the offsets using `nm`, like: +## $ nm out.so | grep bss_global +## 00000000000038fc B bss_global +## +## Note the use of the aarch64 target (with -nostdlib in order to allow linkage +## without libraries for cross-compilation) as well as -O3 and +## -global-merge-ignore-single-use. This is a specific combination that makes +## the compiler emit the `alpha` global variable with a more complex +## DW_AT_location than just a DW_OP_addr/DW_OP_addrx. In this instance, it +## outputs a `DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4)`. +## +## Ideally, this would be tested by invoking clang directly on a C source file, +## but unfortunately there's no way to do that for LLVM tests. The other option +## is to compile IR to an objfile, but llvm-symbolizer doesn't understand that +## two symbols can have the same address in different sections. In the code +## above, for example, we'd have bss_global at .bss+0x0, and data_global at +## .data+0x0, and so the symbolizer would only print one of them. Hence, we have +## the ugly dso-to-yaml blob below. +## +## For now, constant strings don't have a debuginfo entry, and so can't be +## symbolized correctly. In future (if D123534 gets merged), this can be updated +## to include a check that llvm-symbolizer can also symbolize constant strings, +## like `str` above (basically that &"12345678" should be symbolizable) +## to the specific line. Then, you can find the address of the constant string +## from the relocation: +## +## $ nm out.so | grep str +## 00000000000038c0 D str +## $ llvm-objdump -R out.so | grep 38c0 +## 00000000000038c0 R_X86_64_RELATIVE *ABS*+0x4f8 # <-- 0x4f8 +################################################################################ + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_AARCH64 +ProgramHeaders: + - Type: PT_PHDR + Flags: [ PF_R ] + VAddr: 0x40 + Align: 0x8 + - Type: PT_LOAD + Flags: [ PF_R ] + FirstSec: .dynsym + LastSec: .eh_frame + Align: 0x10000 + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + FirstSec: .text + LastSec: .text + VAddr: 0x103E4 + Align: 0x10000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + FirstSec: .dynamic + LastSec: .dynamic + VAddr: 0x20410 + Align: 0x10000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + FirstSec: .data + LastSec: .bss + VAddr: 0x304C0 + Align: 0x10000 + - Type: PT_DYNAMIC + Flags: [ PF_W, PF_R ] + FirstSec: .dynamic + LastSec: .dynamic + VAddr: 0x20410 + Align: 0x8 + - Type: PT_GNU_RELRO + Flags: [ PF_R ] + FirstSec: .dynamic + LastSec: .dynamic + VAddr: 0x20410 + - Type: PT_GNU_EH_FRAME + Flags: [ PF_R ] + FirstSec: .eh_frame_hdr + LastSec: .eh_frame_hdr + VAddr: 0x37C + Align: 0x4 + - Type: PT_GNU_STACK + Flags: [ PF_W, PF_R ] + Align: 0x0 +Sections: + - Name: .dynsym + Type: SHT_DYNSYM + Flags: [ SHF_ALLOC ] + Address: 0x238 + Link: .dynstr + AddressAlign: 0x8 + - Name: .gnu.hash + Type: SHT_GNU_HASH + Flags: [ SHF_ALLOC ] + Address: 0x2C8 + Link: .dynsym + AddressAlign: 0x8 + Header: + SymNdx: 0x1 + Shift2: 0x1A + BloomFilter: [ 0x400188002180000C ] + HashBuckets: [ 0x1 ] + HashValues: [ 0xEE8502A, 0xEE85016, 0xC033991C, 0x61F7372E, 0xB88AB7F ] + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Address: 0x2F8 + Link: .dynsym + AddressAlign: 0x4 + Bucket: [ 5, 0, 4, 0, 3, 0 ] + Chain: [ 0, 0, 0, 1, 2, 0 ] + - Name: .dynstr + Type: SHT_STRTAB + Flags: [ SHF_ALLOC ] + Address: 0x330 + AddressAlign: 0x1 + - Name: .rela.dyn + Type: SHT_RELA + Flags: [ SHF_ALLOC ] + Address: 0x358 + Link: .dynsym + AddressAlign: 0x8 + Relocations: + - Offset: 0x304C8 + Type: R_AARCH64_RELATIVE + Addend: 880 + - Name: .rodata + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_MERGE, SHF_STRINGS ] + Address: 0x370 + AddressAlign: 0x1 + EntSize: 0x1 + Content: '313233343536373800' + - Name: .eh_frame_hdr + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x37C + AddressAlign: 0x4 + Content: 011B033B18000000020000006800010034000000740001004C000000 + - Name: .eh_frame + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x398 + AddressAlign: 0x8 + Content: 1400000000000000017A5200017C1E011B0C1F0000000000140000001C0000002C0001000C00000000000000000000001400000034000000200001001C000000000000000000000000000000 + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x103E4 + AddressAlign: 0x4 + Content: 0001009000501391C0035FD60801009008611391E90308AA2A4540B85F0100710001899AC0035FD6 + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x20410 + Link: .dynstr + AddressAlign: 0x8 + Entries: + - Tag: DT_RELA + Value: 0x358 + - Tag: DT_RELASZ + Value: 0x18 + - Tag: DT_RELAENT + Value: 0x18 + - Tag: DT_RELACOUNT + Value: 0x1 + - Tag: DT_SYMTAB + Value: 0x238 + - Tag: DT_SYMENT + Value: 0x18 + - Tag: DT_STRTAB + Value: 0x330 + - Tag: DT_STRSZ + Value: 0x28 + - Tag: DT_GNU_HASH + Value: 0x2C8 + - Tag: DT_HASH + Value: 0x2F8 + - Tag: DT_NULL + Value: 0x0 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x304C0 + AddressAlign: 0x8 + Content: '02000000000000000000000000000000' + - Name: .bss + Type: SHT_NOBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x304D0 + AddressAlign: 0x4 + Size: 0x10 + - Name: .debug_abbrev + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 011101252513050325721710171B25111B120673170000023400032549133F193A0B3B0B0218000003240003253E0B0B0B0000040F004913000005260049130000062E01111B120640187A196E2503253A0B3B0B49133F190000073400032549133A0B3B0B02180000083400032549133A0B3B0B02186E25000009050003253A0B3B0B4913000000 + - Name: .debug_info + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: AB0000000500010800000000010021000108000000000000000205280000000800000002032E000000000102A1000304050402052E000000000202A101020648000000000402A102044D00000005520000000307080106050C000000016F0D0E0007A500000007082E000000000802A1030008092E000000000D02A1040A080B2E000000000C04A10423040C06061C000000016F0F0E000EA50000000910000EAA00000000042E0000000311020100 + - Name: .debug_str_offsets + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 4C00000005000000A2000000000000002C00000059000000280000001C00000072000000640000008C0000008700000069000000140000007B0000009C0000001A0000000E0000008500000076000000 + - Name: .comment + Type: SHT_PROGBITS + Flags: [ SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 4C696E6B65723A204C4C442031352E302E300000636C616E672076657273696F6E2031352E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420306462616566363162353666306566306162306366333865613932666663316633356265653366662900 + - Name: .debug_line + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 620000000500080037000000010101FB0E0D00010101010000000100000101011F010E00000003011F020F051E0100000000006C97BBE59F7DC6A9EA956633431DA63E0400000902E4030100000000001805030A140500BF05190A0105120608740204000101 + - Name: .debug_line_str + Type: SHT_PROGBITS + Flags: [ SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 2F746D702F66696C652E637070002F7573722F6C6F63616C2F676F6F676C652F686F6D652F6D69746368702F6C6C766D2D6275696C642F6F707400 +Symbols: + - Name: file.cpp + Type: STT_FILE + Index: SHN_ABS + - Name: '$x.0' + Section: .text + Value: 0x103E4 + - Name: _ZZ1fvE15function_global + Type: STT_OBJECT + Section: .bss + Value: 0x304D4 + Size: 0x4 + - Name: '$d.1' + Section: .bss + Value: 0x304D0 + - Name: '$d.2' + Section: .data + Value: 0x304C0 + - Name: '$d.3' + Section: .rodata + Value: 0x370 + - Name: '$d.4' + Section: .debug_abbrev + - Name: '$d.5' + Section: .debug_info + - Name: '$d.6' + Section: .debug_str_offsets + - Name: '$d.7' + Section: .debug_str + Value: 0xA2 + - Name: '$d.8' + Section: .debug_addr + - Name: _ZL4beta + Type: STT_OBJECT + Section: .bss + Value: 0x304D8 + Size: 0x4 + - Name: _ZL5alpha + Type: STT_OBJECT + Section: .bss + Value: 0x304DC + Size: 0x4 + - Name: '$d.9' + Section: .comment + Value: 0x13 + - Name: '$d.10' + Section: .eh_frame + Value: 0x398 + - Name: '$d.11' + Section: .debug_line + - Name: '$d.12' + Section: .debug_line_str + Value: 0xE + - Name: _DYNAMIC + Section: .dynamic + Value: 0x20410 + Other: [ STV_HIDDEN ] + - Name: _Z1fv + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x103E4 + Size: 0xC + - Name: _Z1fb + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x103F0 + Size: 0x1C + - Name: bss_global + Type: STT_OBJECT + Section: .bss + Binding: STB_GLOBAL + Value: 0x304D0 + Size: 0x4 + - Name: data_global + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x304C0 + Size: 0x4 + - Name: str + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x304C8 + Size: 0x8 +DynamicSymbols: + - Name: _Z1fv + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x103E4 + Size: 0xC + - Name: _Z1fb + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x103F0 + Size: 0x1C + - Name: bss_global + Type: STT_OBJECT + Section: .bss + Binding: STB_GLOBAL + Value: 0x304D0 + Size: 0x4 + - Name: data_global + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x304C0 + Size: 0x4 + - Name: str + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x304C8 + Size: 0x8 +DWARF: + debug_str: + - '/tmp/file.cpp' + - _Z1fb + - alpha + - f + - data_global + - int + - '/usr/local/google/home/mitchp/llvm-build/opt' + - bss_global + - char + - _ZL4beta + - str + - bool + - _ZL5alpha + - b + - beta + - function_global + - _Z1fv + - 'clang version 15.0.0 (https://github.com/llvm/llvm-project.git 0dbaef61b56f0ef0ab0cf38ea92ffc1f35bee3ff)' + debug_addr: + - Length: 0x3C + Version: 0x5 + AddressSize: 0x8 + Entries: + - Address: 0x304D0 + - Address: 0x304C0 + - Address: 0x304C8 + - Address: 0x304D4 + - Address: 0x304D8 + - Address: 0x103E4 + - Address: 0x103F0 +... diff --git a/llvm/test/tools/llvm-symbolizer/data.s b/llvm/test/tools/llvm-symbolizer/data.s index e8039f146dbd14..cc9503c59141a9 100644 --- a/llvm/test/tools/llvm-symbolizer/data.s +++ b/llvm/test/tools/llvm-symbolizer/data.s @@ -7,9 +7,12 @@ # CHECK: d1 # CHECK-NEXT: 0 8 +# CHECK-NEXT: ??:? # CHECK-EMPTY: # CHECK-NEXT: d2 # CHECK-NEXT: 8 4 +# CHECK-NEXT: ??:? +# CHECK-EMPTY: d1: .quad 0x1122334455667788 From 40b9a2616e459ce271fe14aedd915cc841021dd7 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sun, 22 May 2022 08:56:30 +0900 Subject: [PATCH 280/908] emitStringLiteralDef: Return earlier here. NFC. Differential Revision: https://reviews.llvm.org/D126135 --- llvm/utils/TableGen/SequenceToOffsetTable.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/utils/TableGen/SequenceToOffsetTable.h b/llvm/utils/TableGen/SequenceToOffsetTable.h index 41cdefdb194988..a3d1e185cb2c9c 100644 --- a/llvm/utils/TableGen/SequenceToOffsetTable.h +++ b/llvm/utils/TableGen/SequenceToOffsetTable.h @@ -170,18 +170,18 @@ class SequenceToOffsetTable { /// `EmitLongStrLiterals` is false void emitStringLiteralDef(raw_ostream &OS, const llvm::Twine &Decl) const { assert(Entries && "Call layout() before emitStringLiteralDef()"); - if (EmitLongStrLiterals) { - OS << "\n#ifdef __GNUC__\n" - << "#pragma GCC diagnostic push\n" - << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n" - << "#endif\n" - << Decl << " = {\n"; - } else { + if (!EmitLongStrLiterals) { OS << Decl << " = {\n"; emit(OS, printChar, "0"); OS << "\n};\n\n"; return; } + + OS << "\n#ifdef __GNUC__\n" + << "#pragma GCC diagnostic push\n" + << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n" + << "#endif\n" + << Decl << " = {\n"; for (auto I : Seqs) { OS << " /* " << I.second << " */ \""; for (auto C : I.first) { From a1d490319a9e2196786f6504e0c447ee0a1dad71 Mon Sep 17 00:00:00 2001 From: Jeffrey Tan Date: Mon, 23 May 2022 10:17:44 -0700 Subject: [PATCH 281/908] Fix lldb-vscode frame test failure Previous patch (https://reviews.llvm.org/D126013) added a new "optimized" attribute to DAP stack frame this caused some tests, like lldb-vscode/coreFile/TestVSCode_coreFile.py to fail because the tests explicitly check for all attributes. To fix the test failure I decided to remove this attribute. Differential Revision: https://reviews.llvm.org/D126225 --- lldb/tools/lldb-vscode/JSONUtils.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 1fb4c00a825552..4bc965e61b81e5 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -765,7 +765,6 @@ llvm::json::Value CreateStackFrame(lldb::SBFrame &frame) { if (is_optimized) frame_name += " [opt]"; EmplaceSafeString(object, "name", frame_name); - object.try_emplace("optimized", is_optimized); int64_t disasm_line = 0; object.try_emplace("source", CreateSource(frame, disasm_line)); From c8e087082927e289bae328dc6dd07fe02b3341b1 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sun, 22 May 2022 08:52:03 +0900 Subject: [PATCH 282/908] [TableGen] emitStringLiteralDef: Pad trailing '\0' at the end of char array. Fixup for https://reviews.llvm.org/D73044 String literal has an implicit terminator '\0'. This commit adjusts char array to long literal. This causes difference of artifacts between -long-string-literals=true and false. Differential Revision: https://reviews.llvm.org/D126136 --- llvm/utils/TableGen/SequenceToOffsetTable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/TableGen/SequenceToOffsetTable.h b/llvm/utils/TableGen/SequenceToOffsetTable.h index a3d1e185cb2c9c..1b3451c24cb084 100644 --- a/llvm/utils/TableGen/SequenceToOffsetTable.h +++ b/llvm/utils/TableGen/SequenceToOffsetTable.h @@ -173,7 +173,7 @@ class SequenceToOffsetTable { if (!EmitLongStrLiterals) { OS << Decl << " = {\n"; emit(OS, printChar, "0"); - OS << "\n};\n\n"; + OS << " 0\n};\n\n"; return; } From 8725dc5e2f73e94740e9e74744074cb026bb607b Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Mon, 23 May 2022 21:47:22 +0000 Subject: [PATCH 283/908] [libc][docs] Use same formatting for headers in source_layout utils looks different from the other directory names in the docs, see https://libc.llvm.org/source_layout.html#the-utils-directory Differential revision: https://reviews.llvm.org/D126211 --- libc/docs/source_layout.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/docs/source_layout.rst b/libc/docs/source_layout.rst index a767239bbe43a3..960a060d9b7b75 100644 --- a/libc/docs/source_layout.rst +++ b/libc/docs/source_layout.rst @@ -84,8 +84,8 @@ toplevel ``libc`` directory itself. A test for, say the ``mmap`` function, lives in the directory ``test/src/sys/mman/`` as implementation of ``mmap`` lives in ``src/sys/mman``. -The `utils` directory ---------------------- +The ``utils`` directory +----------------------- This directory contains utilities used by other parts of the llvm-libc system. See the `README` files, in the sub-directories within this directory, to learn From 21c060c497fd699581e00646af520ff556fa35f3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 23 May 2022 15:09:26 -0700 Subject: [PATCH 284/908] github: Switch release PR repository to llvm/llvm-project-release-prs As discussed in https://discourse.llvm.org/t/creating-a-new-repository-for-release-branch-pull-requests/61339 Reviewed By: asl Differential Revision: https://reviews.llvm.org/D125851 --- llvm/utils/git/github-automation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index 3164371d8c6ae2..7dbcf02a820b6d 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -272,7 +272,7 @@ def execute_command(self) -> bool: release_workflow_parser.add_argument('--issue-number', type=int, required=True, help='The issue number to update') release_workflow_parser.add_argument('--branch-repo-token', type=str, help='GitHub authentication token to use for the repository where new branches will be pushed. Defaults to TOKEN.') -release_workflow_parser.add_argument('--branch-repo', type=str, default='llvmbot/llvm-project', +release_workflow_parser.add_argument('--branch-repo', type=str, default='llvm/llvm-project-release-prs', help='The name of the repo where new branches will be pushed (e.g. llvm/llvm-project)') release_workflow_parser.add_argument('sub_command', type=str, choices=['print-release-branch', 'auto'], help='Print to stdout the name of the release branch ISSUE_NUMBER should be backported to') From 24239e246c78c48dabdc042e5c40f40b2a28947b Mon Sep 17 00:00:00 2001 From: Jamie Schmeiser Date: Thu, 7 Oct 2021 15:02:19 -0400 Subject: [PATCH 285/908] Add new hidden option -print-on-crash that prints out IR that caused opt pipeline to crash A new hidden option -print-on-crash that prints the IR as it was upon entering the last pass when there is a crash. The IR is saved in its print form before each pass is started and a signal handler is registered. If the compilation crashes, the signal handler will print the saved IR to dbgs(). This option can be modified using -print-module-scope to get the IR for the complete module. Note that this option only works with the new pass manager. Reviewed By: yrouban Differential Revision: https://reviews.llvm.org/D86657 --- .../llvm/Passes/StandardInstrumentations.h | 20 +++++++ llvm/lib/Passes/PassBuilder.cpp | 11 ++++ llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Passes/StandardInstrumentations.cpp | 54 +++++++++++++++++++ llvm/test/Other/print-on-crash.ll | 30 +++++++++++ 5 files changed, 116 insertions(+) create mode 100644 llvm/test/Other/print-on-crash.ll diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 4438bdb00717cb..32ecc9ec5fb006 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -480,6 +480,25 @@ class DotCfgChangeReporter : public ChangeReporter> { std::unique_ptr HTML; }; +// Print IR on crash. +class PrintCrashIRInstrumentation { +public: + PrintCrashIRInstrumentation() + : SavedIR("*** Dump of IR Before Last Pass Unknown ***") {} + ~PrintCrashIRInstrumentation(); + void registerCallbacks(PassInstrumentationCallbacks &PIC); + void reportCrashIR(); + +protected: + std::string SavedIR; + +private: + // The crash reporter that will report on a crash. + static PrintCrashIRInstrumentation *CrashReporter; + // Crash handler registered when print-on-crash is specified. + static void SignalHandler(void *); +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -493,6 +512,7 @@ class StandardInstrumentations { PseudoProbeVerifier PseudoProbeVerification; InLineChangePrinter PrintChangedDiff; DotCfgChangeReporter WebsiteChangeReporter; + PrintCrashIRInstrumentation PrintCrashIR; VerifyInstrumentation Verify; bool VerifyEach; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 52feab025f1286..42fde3752724ea 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -375,6 +375,17 @@ bool shouldPopulateClassToPassNames() { !printAfterPasses().empty(); } +// A pass for testing -print-on-crash. +// DO NOT USE THIS EXCEPT FOR TESTING! +class TriggerCrashPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &, ModuleAnalysisManager &) { + abort(); + return PreservedAnalyses::all(); + } + static StringRef name() { return "TriggerCrashPass"; } +}; + } // namespace PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index bb839c7cee6ff0..aa43c96cb1fe18 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -114,6 +114,7 @@ MODULE_PASS("strip-debug-declare", StripDebugDeclarePass()) MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass()) MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) +MODULE_PASS("trigger-crash", TriggerCrashPass()) MODULE_PASS("verify", VerifierPass()) MODULE_PASS("view-callgraph", CallGraphViewerPass()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass()) diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 83c8239bc9b36c..4cd2a1ad51e364 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -28,12 +28,14 @@ #include "llvm/IR/PrintPasses.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Program.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/Signals.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -165,6 +167,12 @@ static cl::opt DotCfgDir( cl::desc("Generate dot files into specified directory for changed IRs"), cl::Hidden, cl::init("./")); +// An option to print the IR that was being processed when a pass crashes. +static cl::opt + PrintCrashIR("print-on-crash", + cl::desc("Print the last form of the IR before crash"), + cl::init(false), cl::Hidden); + namespace { // Perform a system based diff between \p Before and \p After, using @@ -2115,6 +2123,51 @@ StandardInstrumentations::StandardInstrumentations( ChangePrinter::PrintChangedDotCfgVerbose), Verify(DebugLogging), VerifyEach(VerifyEach) {} +PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = + nullptr; + +void PrintCrashIRInstrumentation::reportCrashIR() { dbgs() << SavedIR; } + +void PrintCrashIRInstrumentation::SignalHandler(void *) { + // Called by signal handlers so do not lock here + // Is the PrintCrashIRInstrumentation still alive? + if (!CrashReporter) + return; + + assert(PrintCrashIR && "Did not expect to get here without option set."); + CrashReporter->reportCrashIR(); +} + +PrintCrashIRInstrumentation::~PrintCrashIRInstrumentation() { + if (!CrashReporter) + return; + + assert(PrintCrashIR && "Did not expect to get here without option set."); + CrashReporter = nullptr; +} + +void PrintCrashIRInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if (!PrintCrashIR || CrashReporter) + return; + + sys::AddSignalHandler(SignalHandler, nullptr); + CrashReporter = this; + + PIC.registerBeforeNonSkippedPassCallback([this](StringRef PassID, Any IR) { + SavedIR.clear(); + raw_string_ostream OS(SavedIR); + OS << formatv("*** Dump of {0}IR Before Last Pass {1}", + llvm::forcePrintModuleIR() ? "Module " : "", PassID); + if (!isInteresting(IR, PassID)) { + OS << " Filtered Out ***\n"; + return; + } + OS << " Started ***\n"; + unwrapAndPrint(OS, IR); + }); +} + void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC, FunctionAnalysisManager *FAM) { PrintIR.registerCallbacks(PIC); @@ -2130,6 +2183,7 @@ void StandardInstrumentations::registerCallbacks( Verify.registerCallbacks(PIC); PrintChangedDiff.registerCallbacks(PIC); WebsiteChangeReporter.registerCallbacks(PIC); + PrintCrashIR.registerCallbacks(PIC); } template class ChangeReporter; diff --git a/llvm/test/Other/print-on-crash.ll b/llvm/test/Other/print-on-crash.ll new file mode 100644 index 00000000000000..640ddb2b34ab59 --- /dev/null +++ b/llvm/test/Other/print-on-crash.ll @@ -0,0 +1,30 @@ +; A test that the hidden option -print-on-crash properly sets a signal handler +; which gets called when a pass crashes. The trigger-crash pass asserts. + +; RUN: not --crash opt -print-on-crash -passes=trigger-crash < %s 2>&1 | FileCheck %s --check-prefix=CHECK_SIMPLE + +; A test that the signal handler set by the hidden option -print-on-crash +; is not called when no pass crashes. + +; RUN: opt -print-on-crash -passes="default" < %s 2>&1 | FileCheck %s --check-prefix=CHECK_NO_CRASH + +; RUN: not --crash opt -print-on-crash -print-module-scope -passes=trigger-crash < %s 2>&1 | FileCheck %s --check-prefix=CHECK_MODULE + +; RUN: not --crash opt -print-on-crash -print-module-scope -passes=trigger-crash -filter-passes=TriggerCrashPass < %s 2>&1 | FileCheck %s --check-prefix=CHECK_MODULE + +; RUN: not --crash opt -print-on-crash -print-module-scope -passes=trigger-crash -filter-passes=blah < %s 2>&1 | FileCheck %s --check-prefix=CHECK_FILTERED + +; CHECK_SIMPLE: *** Dump of IR Before Last Pass {{.*}} Started *** +; CHECK_SIMPLE: @main +; CHECK_SIMPLE: entry: +; CHECK_NO_CRASH-NOT: *** Dump of IR +; CHECK_MODULE: *** Dump of Module IR Before Last Pass {{.*}} Started *** +; CHECK_MODULE: ; ModuleID = {{.*}} +; CHECK_FILTERED: *** Dump of Module IR Before Last Pass {{.*}} Filtered Out *** + +define i32 @main() { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + ret i32 0 +} From 4f89ff3fc71b0b2adfeb74b900e9a2a90ef80174 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 23 May 2022 15:56:35 -0700 Subject: [PATCH 286/908] [test][clang] Move -O3 in command line --- clang/test/CodeGen/sanitizer-module-constructor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/test/CodeGen/sanitizer-module-constructor.c b/clang/test/CodeGen/sanitizer-module-constructor.c index 8c7f78dee9bfd9..abc579c6fe23d8 100644 --- a/clang/test/CodeGen/sanitizer-module-constructor.c +++ b/clang/test/CodeGen/sanitizer-module-constructor.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsanitize=address -emit-llvm -O3 -fdebug-pass-manager -o - %s 2>&1 | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsanitize=thread -emit-llvm -O3 -fdebug-pass-manager -o - %s 2>&1 | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsanitize=memory -emit-llvm -O3 -fdebug-pass-manager -o - %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsanitize=address -O3 -emit-llvm -fdebug-pass-manager -o - %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsanitize=thread -O3 -emit-llvm -fdebug-pass-manager -o - %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsanitize=memory -O3 -emit-llvm -fdebug-pass-manager -o - %s 2>&1 | FileCheck %s // This is regression test for PR42877 From 63d69a21b7a7c07c0004823fb68a58a439adbc84 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 16 May 2022 10:09:28 +0000 Subject: [PATCH 287/908] Apply clang-tidy fixes for performance-unnecessary-value-param in Utils.cpp (NFC) --- mlir/include/mlir/Dialect/SCF/Utils/Utils.h | 2 +- mlir/lib/Dialect/SCF/Utils/Utils.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h index 95df92e2edc6c3..09032cb679a8c9 100644 --- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h @@ -54,7 +54,7 @@ using NewYieldValueFn = std::function( OpBuilder &b, Location loc, ArrayRef newBBArgs)>; scf::ForOp replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop, ValueRange newIterOperands, - NewYieldValueFn newYieldValuesFn); + const NewYieldValueFn &newYieldValuesFn); /// Outline a region with a single block into a new FuncOp. /// Assumes the FuncOp result types is the type of the yielded operands of the diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index d4c96e51d549aa..0910cb31f13cb7 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -36,9 +36,10 @@ struct LoopParams { }; } // namespace -scf::ForOp mlir::replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop, - ValueRange newIterOperands, - NewYieldValueFn newYieldValuesFn) { +scf::ForOp +mlir::replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop, + ValueRange newIterOperands, + const NewYieldValueFn &newYieldValuesFn) { // Create a new loop before the existing one, with the extra operands. OpBuilder::InsertionGuard g(builder); builder.setInsertionPoint(loop); From f38765a813e6f17fc8ca478b771c1eb13135145b Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 16 May 2022 10:24:43 +0000 Subject: [PATCH 288/908] Apply clang-tidy fixes for modernize-use-override in SparseTensorUtils.cpp (NFC) --- mlir/lib/ExecutionEngine/SparseTensorUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 339476d4553864..c46ad463c3cfdc 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -896,9 +896,9 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { uint64_t rank, const uint64_t *perm) : Base(tensor, rank, perm) {} - ~SparseTensorEnumerator() final override = default; + ~SparseTensorEnumerator() final = default; - void forallElements(ElementConsumer yield) final override { + void forallElements(ElementConsumer yield) final { forallElements(yield, 0, 0); } From 994a1841eb304d445a9d02025a19a09a645b47af Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 16 May 2022 10:33:00 +0000 Subject: [PATCH 289/908] Apply clang-tidy fixes for modernize-use-bool-literals in Parser.cpp (NFC) --- mlir/lib/Parser/Parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp index 06b2810a2ec1e2..7f21100cec6ed7 100644 --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -189,7 +189,7 @@ InFlightDiagnostic Parser::emitWrongTokenError(const Twine &message) { StringRef startOfBuffer(bufferStart, curPtr - bufferStart); // Back up over entirely blank lines. - while (1) { + while (true) { // Back up until we see a \n, but don't look past the buffer start. startOfBuffer = startOfBuffer.rtrim(" \t"); From e5d8fb690e350e5d4d4c807988f04157b2ba4a57 Mon Sep 17 00:00:00 2001 From: Hendrik Greving Date: Mon, 16 May 2022 07:34:04 -0700 Subject: [PATCH 290/908] [BasicBlockUtils] Add corner case test for loop metadata. Adds a test to expose #55416. Differential Revision: https://reviews.llvm.org/D125696 --- .../LoopSimplify/update_latch_md2.ll | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 llvm/test/Transforms/LoopSimplify/update_latch_md2.ll diff --git a/llvm/test/Transforms/LoopSimplify/update_latch_md2.ll b/llvm/test/Transforms/LoopSimplify/update_latch_md2.ll new file mode 100644 index 00000000000000..488860ad190c71 --- /dev/null +++ b/llvm/test/Transforms/LoopSimplify/update_latch_md2.ll @@ -0,0 +1,36 @@ +; Tests loop-simplify does not move the loop metadata, because +; the loopexit block is not the latch of the loop _bb6. + +; FIXME(#55416): The metadata should not move. + +; RUN: opt < %s -passes=loop-simplify -S | FileCheck %s +; CHECK-LABEL: loop.header.loopexit: +; CHECK: br label %loop.header, !llvm.loop !0 +; CHECK-LABEL: loop.latch: +; CHECK-NOT: br i1 %p, label %loop.latch, label %loop.header.loopexit, !llvm.loop !0 + +define void @func(i1 %p) { +entry: + br label %loop.header + +loop.header: + br i1 %p, label %bb1, label %exit + +bb1: + br i1 %p, label %bb2, label %bb3 + +bb2: + br label %bb3 + +bb3: + br label %loop.latch + +loop.latch: + br i1 %p, label %loop.latch, label %loop.header, !llvm.loop !0 + +exit: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.mustprogress"} From 4f93d5cc1d47c6b54a948712b6181dc55d028c88 Mon Sep 17 00:00:00 2001 From: Hendrik Greving Date: Fri, 13 May 2022 10:53:13 -0700 Subject: [PATCH 291/908] [BasicBlockUtils] Do not move loop metadata if outer loop header. Fixes a bug preventing moving the loop's metadata to an outer loop's header, which happens if the loop's exit is also the header of an outer loop. Adjusts test for above. Fixes #55416. Differential Revision: https://reviews.llvm.org/D125574 --- llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 6 +++++- llvm/test/Transforms/LoopSimplify/update_latch_md2.ll | 4 +--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index fd92a39d325936..e9983ff8217616 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1162,7 +1162,11 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef Preds, if (NewLatch != OldLatch) { MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop"); NewLatch->getTerminator()->setMetadata("llvm.loop", MD); - OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr); + // It's still possible that OldLatch is the latch of another inner loop, + // in which case we do not remove the metadata. + Loop *IL = LI->getLoopFor(OldLatch); + if (IL && IL->getLoopLatch() != OldLatch) + OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr); } } diff --git a/llvm/test/Transforms/LoopSimplify/update_latch_md2.ll b/llvm/test/Transforms/LoopSimplify/update_latch_md2.ll index 488860ad190c71..1d54e868890e03 100644 --- a/llvm/test/Transforms/LoopSimplify/update_latch_md2.ll +++ b/llvm/test/Transforms/LoopSimplify/update_latch_md2.ll @@ -1,13 +1,11 @@ ; Tests loop-simplify does not move the loop metadata, because ; the loopexit block is not the latch of the loop _bb6. -; FIXME(#55416): The metadata should not move. - ; RUN: opt < %s -passes=loop-simplify -S | FileCheck %s ; CHECK-LABEL: loop.header.loopexit: ; CHECK: br label %loop.header, !llvm.loop !0 ; CHECK-LABEL: loop.latch: -; CHECK-NOT: br i1 %p, label %loop.latch, label %loop.header.loopexit, !llvm.loop !0 +; CHECK: br i1 %p, label %loop.latch, label %loop.header.loopexit, !llvm.loop !0 define void @func(i1 %p) { entry: From 74f98419770041deb983b4f736ebdc06de4ee2ac Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Fri, 20 May 2022 14:39:33 -0700 Subject: [PATCH 292/908] [lld][WebAssembly] Allow use of statically allocated TLS region. It turns out we were already allocating static address space for TLS data along with the non-TLS static data, but this space was going unused/ignored. With this change, we include the TLS segment in `__wasm_init_memory` (which does the work of loading the passive segments into memory when a module is first loaded). We also set the `__tls_base` global to point to the start of this segment. This means that the runtime can use this static copy of the TLS data for the first/primary thread if it chooses, rather than doing a runtime allocation prior to calling `__wasm_init_tls`. Practically speaking, this will allow emscripten to avoid dynamic allocation of TLS region on the main thread. Differential Revision: https://reviews.llvm.org/D126107 --- lld/test/wasm/data-segments.ll | 15 +++++++- lld/test/wasm/tls_init_symbols.s | 6 ++-- lld/wasm/Writer.cpp | 51 +++++++++++++++++++++------ llvm/include/llvm/BinaryFormat/Wasm.h | 1 + 4 files changed, 60 insertions(+), 13 deletions(-) diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll index 25b45982fe8bc0..bfba82ee29cf2b 100644 --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -128,7 +128,7 @@ ; PASSIVE-PIC-NEXT: Locals: ; PASSIVE32-PIC-NEXT: - Type: I32 ; PASSIVE64-PIC-NEXT: - Type: I64 -; PASSIVE-PIC-NEXT: Count: 1 +; PASSIVE-PIC-NEXT: Count: 2 ; PASSIVE-PIC-NEXT: Body: {{.*}} ; PASSIVE-PIC-NEXT: - Index: 3 ; PASSIVE-PIC-NEXT: Locals: [] @@ -178,6 +178,19 @@ ; DIS-NEXT: # 2: down to label0 ; DIS-NEXT: end +; NOPIC-DIS-NEXT: [[PTR]].const 1024 +; NOPIC-DIS-NEXT: [[PTR]].const 1024 +; NOPIC-DIS-NEXT: global.set 1 +; PIC-DIS-NEXT: [[PTR]].const 0 +; PIC-DIS-NEXT: global.get 1 +; PIC-DIS-NEXT: [[PTR]].add +; PIC-DIS-NEXT: local.tee 1 +; PIC-DIS-NEXT: global.set {{\d*}} +; PIC-DIS-NEXT: local.get 1 +; DIS-NEXT: i32.const 0 +; DIS-NEXT: i32.const 4 +; DIS-NEXT: memory.init 0, 0 + ; NOPIC-DIS-NEXT: [[PTR]].const 1028 ; PIC-DIS-NEXT: [[PTR]].const 4 ; PIC-DIS-NEXT: global.get 1 diff --git a/lld/test/wasm/tls_init_symbols.s b/lld/test/wasm/tls_init_symbols.s index 1470e0658e008b..601766d681d853 100644 --- a/lld/test/wasm/tls_init_symbols.s +++ b/lld/test/wasm/tls_init_symbols.s @@ -42,8 +42,10 @@ tls_sym: # CHECK-NEXT: - Index: 1 # CHECK-NEXT: Name: __wasm_init_tls # CHECK-NEXT: - Index: 2 -# CHECK-NEXT: Name: __wasm_apply_global_tls_relocs +# CHECK-NEXT: Name: __wasm_init_memory # CHECK-NEXT: - Index: 3 +# CHECK-NEXT: Name: __wasm_apply_global_tls_relocs +# CHECK-NEXT: - Index: 4 # CHECK-NEXT: Name: _start # DIS: <__wasm_init_tls>: @@ -53,7 +55,7 @@ tls_sym: # DIS-NEXT: i32.const 0 # DIS-NEXT: i32.const 4 # DIS-NEXT: memory.init 0, 0 -# DIS-NEXT: call 2 +# DIS-NEXT: call 3 # DIS-NEXT: end # DIS: <__wasm_apply_global_tls_relocs>: diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index fa5b8d5db0e802..9e3e32c183f19e 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -218,6 +218,7 @@ void Writer::writeSections() { } static void setGlobalPtr(DefinedGlobal *g, uint64_t memoryPtr) { + LLVM_DEBUG(dbgs() << "setGlobalPtr " << g->getName() << " -> " << memoryPtr << "\n"); g->global->setPointerValue(memoryPtr); } @@ -276,13 +277,15 @@ void Writer::layoutMemory() { memoryPtr, seg->size, seg->alignment)); if (!config->relocatable && seg->isTLS()) { - if (config->sharedMemory) { + if (WasmSym::tlsSize) { auto *tlsSize = cast(WasmSym::tlsSize); setGlobalPtr(tlsSize, seg->size); - + } + if (WasmSym::tlsAlign) { auto *tlsAlign = cast(WasmSym::tlsAlign); setGlobalPtr(tlsAlign, int64_t{1} << seg->alignment); - } else if (WasmSym::tlsBase) { + } + if (!config->sharedMemory && WasmSym::tlsBase) { auto *tlsBase = cast(WasmSym::tlsBase); setGlobalPtr(tlsBase, memoryPtr); } @@ -970,9 +973,6 @@ static void createFunction(DefinedFunction *func, StringRef bodyContent) { } bool Writer::needsPassiveInitialization(const OutputSegment *segment) { - // TLS segments are initialized separately - if (segment->isTLS()) - return false; // If bulk memory features is supported then we can perform bss initialization // (via memory.fill) during `__wasm_init_memory`. if (config->importMemory && !segment->requiredInBinary()) @@ -1002,6 +1002,11 @@ void Writer::createSyntheticInitFunctions() { "__wasm_init_memory", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_init_memory")); WasmSym::initMemory->markLive(); + if (config->sharedMemory) { + // This global is assigned during __wasm_init_memory in the shared memory + // case. + WasmSym::tlsBase->markLive(); + } } if (config->sharedMemory && out.globalSec->needsTLSRelocations()) { @@ -1126,7 +1131,7 @@ void Writer::createInitMemoryFunction() { // With PIC code we cache the flag address in local 0 if (config->isPic) { writeUleb128(os, 1, "num local decls"); - writeUleb128(os, 1, "local count"); + writeUleb128(os, 2, "local count"); writeU8(os, is64 ? WASM_TYPE_I64 : WASM_TYPE_I32, "address type"); writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base"); @@ -1177,10 +1182,31 @@ void Writer::createInitMemoryFunction() { if (config->isPic) { writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), - "memory_base"); + "__memory_base"); writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "i32.add"); } + + // When we initialize the TLS segment we also set the `__tls_base` + // global. This allows the runtime to use this static copy of the + // TLS data for the first/main thread. + if (config->sharedMemory && s->isTLS()) { + if (config->isPic) { + // Cache the result of the addionion in local 0 + writeU8(os, WASM_OPCODE_LOCAL_TEE, "local.tee"); + writeUleb128(os, 1, "local 1"); + } else { + writePtrConst(os, s->startVA, is64, "destination address"); + } + writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET"); + writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), + "__tls_base"); + if (config->isPic) { + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.tee"); + writeUleb128(os, 1, "local 1"); + } + } + if (s->isBss) { writeI32Const(os, 0, "fill value"); writeI32Const(os, s->size, "memory region size"); @@ -1243,6 +1269,10 @@ void Writer::createInitMemoryFunction() { for (const OutputSegment *s : segments) { if (needsPassiveInitialization(s) && !s->isBss) { + // The TLS region should not be dropped since its is needed + // during the intiailizing of each thread (__wasm_init_tls). + if (config->sharedMemory && s->isTLS()) + continue; // data.drop instruction writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); writeUleb128(os, WASM_OPCODE_DATA_DROP, "data.drop"); @@ -1440,7 +1470,6 @@ void Writer::createInitTLSFunction() { writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "global index"); - WasmSym::tlsBase->markLive(); // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend op. writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); @@ -1623,8 +1652,10 @@ void Writer::run() { } } - if (WasmSym::initTLS && WasmSym::initTLS->isLive()) + if (WasmSym::initTLS && WasmSym::initTLS->isLive()) { + log("-- createInitTLSFunction"); createInitTLSFunction(); + } if (errorCount()) return; diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 23773b1af9d700..2419a06fbe96c6 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -283,6 +283,7 @@ enum : unsigned { WASM_OPCODE_CALL = 0x10, WASM_OPCODE_LOCAL_GET = 0x20, WASM_OPCODE_LOCAL_SET = 0x21, + WASM_OPCODE_LOCAL_TEE = 0x22, WASM_OPCODE_GLOBAL_GET = 0x23, WASM_OPCODE_GLOBAL_SET = 0x24, WASM_OPCODE_I32_STORE = 0x36, From e141e719e166665ac7313eece4cb1c8e82b9ab45 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 11 May 2022 13:15:59 -0700 Subject: [PATCH 293/908] [flang] Fix character length calculation for Unicode component The character length value in the derived type component information table entry is already in units of characters, not bytes, so don't divide by the per-character byte size. Differential Revision: https://reviews.llvm.org/D126139 --- flang/runtime/type-info.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/runtime/type-info.cpp b/flang/runtime/type-info.cpp index 8f66aeab407935..ecc2bf9e28d911 100644 --- a/flang/runtime/type-info.cpp +++ b/flang/runtime/type-info.cpp @@ -91,7 +91,7 @@ void Component::EstablishDescriptor(Descriptor &descriptor, if (cat == TypeCategory::Character) { std::size_t lengthInChars{0}; if (auto length{characterLen_.GetValue(&container)}) { - lengthInChars = static_cast(*length / kind_); + lengthInChars = static_cast(*length); } else { RUNTIME_CHECK( terminator, characterLen_.genre() == Value::Genre::Deferred); From b8feeba0b407e703385738af7e415d0c8972a420 Mon Sep 17 00:00:00 2001 From: Xeonacid Date: Tue, 24 May 2022 02:58:23 +0200 Subject: [PATCH 294/908] [RISCV] Make old JIT ExecutionEngine tests unsupported Make old JIT ExecutionEngine tests unsupported for RISCV, like many other architectures included. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D126188 --- llvm/test/ExecutionEngine/lit.local.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/ExecutionEngine/lit.local.cfg b/llvm/test/ExecutionEngine/lit.local.cfg index d5c6decc7ef5b4..3338d19e967f1c 100644 --- a/llvm/test/ExecutionEngine/lit.local.cfg +++ b/llvm/test/ExecutionEngine/lit.local.cfg @@ -1,4 +1,4 @@ -if config.root.native_target in ['Sparc', 'PowerPC', 'AArch64', 'ARM64', 'SystemZ', 'Hexagon']: +if config.root.native_target in ['Sparc', 'PowerPC', 'AArch64', 'ARM64', 'SystemZ', 'Hexagon', 'RISCV']: config.unsupported = True # ExecutionEngine tests are not expected to pass in a cross-compilation setup. From 9f33dd733ff507f4ebbf155f8e4c3be52a1e8406 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 11 May 2022 14:13:50 -0700 Subject: [PATCH 295/908] [flang] Allow global scope names that clash with intrinsic modules Intrinsic module names are not in the user's namespace, so they are free to declare global names that conflict with intrinsic modules. Differential Revision: https://reviews.llvm.org/D126140 --- flang/lib/Semantics/mod-file.cpp | 35 ++++++++++++++++++++------- flang/lib/Semantics/resolve-names.cpp | 4 --- flang/test/Semantics/modfile49.f90 | 19 +++++++++++++++ 3 files changed, 45 insertions(+), 13 deletions(-) create mode 100644 flang/test/Semantics/modfile49.f90 diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index d7e6efcb3b3982..466a49f9ab4d70 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -911,6 +911,8 @@ static bool VerifyHeader(llvm::ArrayRef content) { Scope *ModFileReader::Read(const SourceName &name, std::optional isIntrinsic, Scope *ancestor, bool silent) { std::string ancestorName; // empty for module + Symbol *notAModule{nullptr}; + bool fatalError{false}; if (ancestor) { if (auto *scope{ancestor->FindSubmodule(name)}) { return scope; @@ -920,7 +922,14 @@ Scope *ModFileReader::Read(const SourceName &name, if (!isIntrinsic.value_or(false)) { auto it{context_.globalScope().find(name)}; if (it != context_.globalScope().end()) { - return it->second->scope(); + Scope *scope{it->second->scope()}; + if (scope->kind() == Scope::Kind::Module) { + return scope; + } else { + notAModule = scope->symbol(); + // USE, NON_INTRINSIC global name isn't a module? + fatalError = isIntrinsic.has_value(); + } } } if (isIntrinsic.value_or(true)) { @@ -935,7 +944,8 @@ Scope *ModFileReader::Read(const SourceName &name, options.isModuleFile = true; options.features.Enable(common::LanguageFeature::BackslashEscapes); options.features.Enable(common::LanguageFeature::OpenMP); - if (!isIntrinsic.value_or(false)) { + if (!isIntrinsic.value_or(false) && !notAModule) { + // Scan non-intrinsic module directories options.searchDirectories = context_.searchDirectories(); // If a directory is in both lists, the intrinsic module directory // takes precedence. @@ -950,14 +960,21 @@ Scope *ModFileReader::Read(const SourceName &name, } } auto path{ModFileName(name, ancestorName, context_.moduleFileSuffix())}; - const auto *sourceFile{parsing.Prescan(path, options)}; - if (parsing.messages().AnyFatalError()) { + const auto *sourceFile{fatalError ? nullptr : parsing.Prescan(path, options)}; + if (fatalError || parsing.messages().AnyFatalError()) { if (!silent) { - for (auto &msg : parsing.messages().messages()) { - std::string str{msg.ToString()}; - Say(name, ancestorName, - parser::MessageFixedText{str.c_str(), str.size(), msg.severity()}, - path); + if (notAModule) { + // Module is not explicitly INTRINSIC, and there's already a global + // symbol of the same name that is not a module. + context_.SayWithDecl( + *notAModule, name, "'%s' is not a module"_err_en_US, name); + } else { + for (auto &msg : parsing.messages().messages()) { + std::string str{msg.ToString()}; + Say(name, ancestorName, + parser::MessageFixedText{str.c_str(), str.size(), msg.severity()}, + path); + } } } return nullptr; diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 062c016cc7381b..a31831301bd138 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -2934,10 +2934,6 @@ Scope *ModuleVisitor::FindModule(const parser::Name &name, if (!scope) { return nullptr; } - if (scope->kind() != Scope::Kind::Module) { - Say(name, "'%s' is not a module"_err_en_US); - return nullptr; - } if (DoesScopeContain(scope, currScope())) { // 14.2.2(1) Say(name, "Module '%s' cannot USE itself"_err_en_US); } diff --git a/flang/test/Semantics/modfile49.f90 b/flang/test/Semantics/modfile49.f90 new file mode 100644 index 00000000000000..030258098ff190 --- /dev/null +++ b/flang/test/Semantics/modfile49.f90 @@ -0,0 +1,19 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +subroutine foo +end +subroutine iso_fortran_env +end +subroutine bad1 + !ERROR: 'foo' is not a module + use foo +end +subroutine ok1 + use, intrinsic :: iso_fortran_env +end +subroutine ok2 + use iso_fortran_env +end +subroutine bad2 + !ERROR: 'iso_fortran_env' is not a module + use, non_intrinsic :: iso_fortran_env +end From 0377322897985294681bc57932f903a888f7fe68 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 11 May 2022 14:32:59 -0700 Subject: [PATCH 296/908] [flang] Ignore BIND(C) binding name conflicts of inner procedures The binding names of inner procedures with BIND(C) are not exposed to the loader and should be ignored for potential conflict errors. Differential Revision: https://reviews.llvm.org/D126141 --- flang/lib/Semantics/check-declarations.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 63272082520591..72816b6d33c1ee 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -1857,7 +1857,9 @@ void CheckHelper::CheckGenericOps(const Scope &scope) { static const std::string *DefinesBindCName(const Symbol &symbol) { const auto *subp{symbol.detailsIf()}; - if ((subp && !subp->isInterface()) || symbol.has()) { + if ((subp && !subp->isInterface() && + ClassifyProcedure(symbol) != ProcedureDefinitionClass::Internal) || + symbol.has()) { // Symbol defines data or entry point return symbol.GetBindName(); } else { From c0ec1036d6532143998fa393b5a469b4a5e964ff Mon Sep 17 00:00:00 2001 From: Vy Nguyen Date: Tue, 24 May 2022 07:59:18 +0700 Subject: [PATCH 297/908] [lld-macho][nfc] Run clang-format on lld/MachO/*.{h,cpp} - fixed inconsistent indents and spaces - prevent extraneous formatting changes in other patches Differential Revision: https://reviews.llvm.org/D126262 --- lld/MachO/ConcatOutputSection.cpp | 4 ++-- lld/MachO/InputFiles.cpp | 7 +++---- lld/MachO/InputSection.h | 3 ++- lld/MachO/MarkLive.cpp | 3 +-- lld/MachO/OutputSection.cpp | 4 +--- lld/MachO/SyntheticSections.cpp | 4 ++-- lld/MachO/Writer.cpp | 2 +- 7 files changed, 12 insertions(+), 15 deletions(-) diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp index 73222eeef0b914..ed709a30ade4dc 100644 --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -328,8 +328,8 @@ void TextOutputSection::finalize() { std::to_string(thunkInfo.sequence++)); if (!isa(funcSym) || cast(funcSym)->isExternal()) { r.referent = thunkInfo.sym = symtab->addDefined( - thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0, - thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true, + thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0, thunkSize, + /*isWeakDef=*/false, /*isPrivateExtern=*/true, /*isThumb=*/false, /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); } else { diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index f0cd4269d282cb..05f491e1b7bae5 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -258,8 +258,8 @@ InputFile::InputFile(Kind kind, const InterfaceFile &interface) // used by the Mach-O format. static Optional getRecordSize(StringRef segname, StringRef name) { if (name == section_names::compactUnwind) { - if (segname == segment_names::ld) - return target->wordSize == 8 ? 32 : 20; + if (segname == segment_names::ld) + return target->wordSize == 8 ? 32 : 20; } if (config->icfLevel == ICFLevel::none) return {}; @@ -439,8 +439,7 @@ static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec, template void ObjFile::parseRelocations(ArrayRef sectionHeaders, - const SectionHeader &sec, - Section §ion) { + const SectionHeader &sec, Section §ion) { auto *buf = reinterpret_cast(mb.getBufferStart()); ArrayRef relInfos( reinterpret_cast(buf + sec.reloff), sec.nreloc); diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index c4366c2ed34a52..e0184431d6d052 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -71,7 +71,8 @@ class InputSection { public: // is address assigned? bool isFinal = false; - // keep the address of the symbol(s) in this section unique in the final binary ? + // keep the address of the symbol(s) in this section unique in the final + // binary ? bool keepUnique = false; uint32_t align = 1; diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp index 9cae677ee03ac4..7239e30bbdb220 100644 --- a/lld/MachO/MarkLive.cpp +++ b/lld/MachO/MarkLive.cpp @@ -165,8 +165,7 @@ void MarkLiveImpl::markTransitively() { // contain other types of InputSections (due to S_ATTR_LIVE_SUPPORT), but // those entries should never be pushed onto the worklist. auto *isec = cast(getInputSection(entry)); - assert(isec->live && - "We mark as live when pushing onto the worklist!"); + assert(isec->live && "We mark as live when pushing onto the worklist!"); // Mark all symbols listed in the relocation table for this section. for (const Reloc &r : isec->relocs) { diff --git a/lld/MachO/OutputSection.cpp b/lld/MachO/OutputSection.cpp index 8d7a29c2916ca9..461d4f82b184a2 100644 --- a/lld/MachO/OutputSection.cpp +++ b/lld/MachO/OutputSection.cpp @@ -13,9 +13,7 @@ using namespace llvm; using namespace lld; using namespace lld::macho; -uint64_t OutputSection::getSegmentOffset() const { - return addr - parent->addr; -} +uint64_t OutputSection::getSegmentOffset() const { return addr - parent->addr; } void OutputSection::assignAddressesToStartEndSymbols() { for (Defined *d : sectionStartSymbols) diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 7e1d92b3bad818..61597ccf9ebf58 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -704,8 +704,8 @@ uint32_t LazyBindingSection::encode(const Symbol &sym) { OutputSegment *dataSeg = in.lazyPointers->parent; os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | dataSeg->index); - uint64_t offset = in.lazyPointers->addr - dataSeg->addr + - sym.stubsIndex * target->wordSize; + uint64_t offset = + in.lazyPointers->addr - dataSeg->addr + sym.stubsIndex * target->wordSize; encodeULEB128(offset, os); encodeDylibOrdinal(ordinalForSymbol(sym), os); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index ac36630569a45b..3fbf520657d123 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -990,7 +990,7 @@ void Writer::finalizeAddresses() { continue; // Other kinds of OutputSections have already been finalized. if (auto concatOsec = dyn_cast(osec)) - concatOsec->finalizeContents(); + concatOsec->finalizeContents(); } } From 8b42bc5662ca13c97746b7301bd503b2662dc444 Mon Sep 17 00:00:00 2001 From: Sotiris Apostolakis Date: Mon, 23 May 2022 16:26:09 -0400 Subject: [PATCH 298/908] [SelectOpti][3/5] Base Heuristics This patch adds the base heuristics for determining whether branches are more profitable than conditional moves. Base heuristics apply to all code apart from inner-most loops. Depends on D122259 Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D120231 --- llvm/lib/CodeGen/SelectOptimize.cpp | 295 ++++++++++++++++++++++- llvm/test/CodeGen/X86/select-optimize.ll | 228 +++++++++++++++--- 2 files changed, 485 insertions(+), 38 deletions(-) diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 337d825b1dcd54..5cf937335df969 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -15,37 +15,73 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" +#include +#include +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "select-optimize" +STATISTIC(NumSelectOptAnalyzed, + "Number of select groups considered for conversion to branch"); +STATISTIC(NumSelectConvertedExpColdOperand, + "Number of select groups converted due to expensive cold operand"); +STATISTIC(NumSelectConvertedHighPred, + "Number of select groups converted due to high-predictability"); +STATISTIC(NumSelectUnPred, + "Number of select groups not converted due to unpredictability"); +STATISTIC(NumSelectColdBB, + "Number of select groups not converted due to cold basic block"); STATISTIC(NumSelectsConverted, "Number of selects converted"); +static cl::opt ColdOperandThreshold( + "cold-operand-threshold", + cl::desc("Maximum frequency of path for an operand to be considered cold."), + cl::init(20), cl::Hidden); + +static cl::opt ColdOperandMaxCostMultiplier( + "cold-operand-max-cost-multiplier", + cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " + "slice of a cold operand to be considered inexpensive."), + cl::init(1), cl::Hidden); + namespace { class SelectOptimize : public FunctionPass { const TargetMachine *TM = nullptr; const TargetSubtargetInfo *TSI; const TargetLowering *TLI = nullptr; + const TargetTransformInfo *TTI = nullptr; const LoopInfo *LI; + DominatorTree *DT; std::unique_ptr BFI; std::unique_ptr BPI; + ProfileSummaryInfo *PSI; + OptimizationRemarkEmitter *ORE; public: static char ID; + SelectOptimize() : FunctionPass(ID) { initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); } @@ -53,8 +89,12 @@ class SelectOptimize : public FunctionPass { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addRequired(); } private: @@ -63,9 +103,47 @@ class SelectOptimize : public FunctionPass { using SelectGroup = SmallVector; using SelectGroups = SmallVector; + // Converts select instructions of a function to conditional jumps when deemed + // profitable. Returns true if at least one select was converted. bool optimizeSelects(Function &F); + + // Heuristics for determining which select instructions can be profitably + // conveted to branches. Separate heuristics for selects in inner-most loops + // and the rest of code regions (base heuristics for non-inner-most loop + // regions). + void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); + void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); + + // Converts to branches the select groups that were deemed + // profitable-to-convert. void convertProfitableSIGroups(SelectGroups &ProfSIGroups); + + // Splits selects of a given basic block into select groups. void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); + + // Determines for which select groups it is profitable converting to branches + // (base heuristics). + void findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups); + // Determines if a select group should be converted to a branch (base + // heuristics). + bool isConvertToBranchProfitableBase(const SmallVector &ASI); + + // Returns true if there are expensive instructions in the cold value + // operand's (if any) dependence slice of any of the selects of the given + // group. + bool hasExpensiveColdOperand(const SmallVector &ASI); + + // For a given source instruction, collect its backwards dependence slice + // consisting of instructions exclusively computed for producing the operands + // of the source instruction. + void getExclBackwardsSlice(Instruction *I, + SmallVector &Slice); + + // Returns true if the condition of the select is highly predictable. + bool isSelectHighlyPredictable(const SelectInst *SI); + + // Returns true if the target architecture supports lowering a given select. bool isSelectKindSupported(SelectInst *SI); }; } // namespace @@ -75,7 +153,11 @@ char SelectOptimize::ID = 0; INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) @@ -85,27 +167,37 @@ bool SelectOptimize::runOnFunction(Function &F) { TM = &getAnalysis().getTM(); TSI = TM->getSubtargetImpl(F); TLI = TSI->getTargetLowering(); + + // If none of the select types is supported then skip this pass. + // This is an optimization pass. Legality issues will be handled by + // instruction selection. + if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && + !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && + !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) + return false; + + TTI = &getAnalysis().getTTI(F); + DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); + PSI = &getAnalysis().getPSI(); + ORE = &getAnalysis().getORE(); + + // When optimizing for size, selects are preferable over branches. + if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) + return false; return optimizeSelects(F); } bool SelectOptimize::optimizeSelects(Function &F) { - // Collect all the select groups. - SelectGroups SIGroups; - for (BasicBlock &BB : F) { - collectSelectGroups(BB, SIGroups); - } - // Determine for which select groups it is profitable converting to branches. SelectGroups ProfSIGroups; - // For now assume that all select groups can be profitably converted to - // branches. - for (SelectGroup &ASI : SIGroups) { - ProfSIGroups.push_back(ASI); - } + // Base heuristics apply only to non-loops and outer loops. + optimizeSelectsBase(F, ProfSIGroups); + // Separate heuristics for inner-most loops. + optimizeSelectsInnerLoops(F, ProfSIGroups); // Convert to branches the select groups that were deemed // profitable-to-convert. @@ -115,6 +207,25 @@ bool SelectOptimize::optimizeSelects(Function &F) { return !ProfSIGroups.empty(); } +void SelectOptimize::optimizeSelectsBase(Function &F, + SelectGroups &ProfSIGroups) { + // Collect all the select groups. + SelectGroups SIGroups; + for (BasicBlock &BB : F) { + // Base heuristics apply only to non-loops and outer loops. + Loop *L = LI->getLoopFor(&BB); + if (L && L->isInnermost()) + continue; + collectSelectGroups(BB, SIGroups); + } + + // Determine for which select groups it is profitable converting to branches. + findProfitableSIGroupsBase(SIGroups, ProfSIGroups); +} + +void SelectOptimize::optimizeSelectsInnerLoops(Function &F, + SelectGroups &ProfSIGroups) {} + /// If \p isTrue is true, return the true value of \p SI, otherwise return /// false value of \p SI. If the true/false value of \p SI is defined by any /// select instructions in \p Selects, look through the defining select @@ -256,6 +367,168 @@ void SelectOptimize::collectSelectGroups(BasicBlock &BB, } } +void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups) { + for (SelectGroup &ASI : SIGroups) { + ++NumSelectOptAnalyzed; + if (isConvertToBranchProfitableBase(ASI)) + ProfSIGroups.push_back(ASI); + } +} + +bool SelectOptimize::isConvertToBranchProfitableBase( + const SmallVector &ASI) { + SelectInst *SI = ASI.front(); + OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); + + // Skip cold basic blocks. Better to optimize for size for cold blocks. + if (PSI->isColdBlock(SI->getParent(), BFI.get())) { + ++NumSelectColdBB; + ORmiss << "Not converted to branch because of cold basic block. "; + ORE->emit(ORmiss); + return false; + } + + // If unpredictable, branch form is less profitable. + if (SI->getMetadata(LLVMContext::MD_unpredictable)) { + ++NumSelectUnPred; + ORmiss << "Not converted to branch because of unpredictable branch. "; + ORE->emit(ORmiss); + return false; + } + + // If highly predictable, branch form is more profitable, unless a + // predictable select is inexpensive in the target architecture. + if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { + ++NumSelectConvertedHighPred; + OR << "Converted to branch because of highly predictable branch. "; + ORE->emit(OR); + return true; + } + + // Look for expensive instructions in the cold operand's (if any) dependence + // slice of any of the selects in the group. + if (hasExpensiveColdOperand(ASI)) { + ++NumSelectConvertedExpColdOperand; + OR << "Converted to branch because of expensive cold operand."; + ORE->emit(OR); + return true; + } + + ORmiss << "Not profitable to convert to branch (base heuristic)."; + ORE->emit(ORmiss); + return false; +} + +static InstructionCost divideNearest(InstructionCost Numerator, + uint64_t Denominator) { + return (Numerator + (Denominator / 2)) / Denominator; +} + +bool SelectOptimize::hasExpensiveColdOperand( + const SmallVector &ASI) { + bool ColdOperand = false; + uint64_t TrueWeight, FalseWeight, TotalWeight; + if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t MinWeight = std::min(TrueWeight, FalseWeight); + TotalWeight = TrueWeight + FalseWeight; + // Is there a path with frequency 100 * MinWeight; + } else if (PSI->hasProfileSummary()) { + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); + ORmiss << "Profile data available but missing branch-weights metadata for " + "select instruction. "; + ORE->emit(ORmiss); + } + if (!ColdOperand) + return false; + // Check if the cold path's dependence slice is expensive for any of the + // selects of the group. + for (SelectInst *SI : ASI) { + Instruction *ColdI = nullptr; + uint64_t HotWeight; + if (TrueWeight < FalseWeight) { + ColdI = dyn_cast(SI->getTrueValue()); + HotWeight = FalseWeight; + } else { + ColdI = dyn_cast(SI->getFalseValue()); + HotWeight = TrueWeight; + } + if (ColdI) { + SmallVector ColdSlice; + getExclBackwardsSlice(ColdI, ColdSlice); + InstructionCost SliceCost = 0; + for (auto *ColdII : ColdSlice) { + SliceCost += + TTI->getInstructionCost(ColdII, TargetTransformInfo::TCK_Latency); + } + // The colder the cold value operand of the select is the more expensive + // the cmov becomes for computing the cold value operand every time. Thus, + // the colder the cold operand is the more its cost counts. + // Get nearest integer cost adjusted for coldness. + InstructionCost AdjSliceCost = + divideNearest(SliceCost * HotWeight, TotalWeight); + if (AdjSliceCost >= + ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive) + return true; + } + } + return false; +} + +// For a given source instruction, collect its backwards dependence slice +// consisting of instructions exclusively computed for the purpose of producing +// the operands of the source instruction. As an approximation +// (sufficiently-accurate in practice), we populate this set with the +// instructions of the backwards dependence slice that only have one-use and +// form an one-use chain that leads to the source instruction. +void SelectOptimize::getExclBackwardsSlice( + Instruction *I, SmallVector &Slice) { + SmallPtrSet Visited; + std::queue Worklist; + Worklist.push(I); + while (!Worklist.empty()) { + Instruction *II = Worklist.front(); + Worklist.pop(); + + // Avoid cycles. + if (Visited.count(II)) + continue; + Visited.insert(II); + + if (!II->hasOneUse()) + continue; + + // Avoid considering instructions with less frequency than the source + // instruction (i.e., avoid colder code regions of the dependence slice). + if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) + continue; + + // Eligible one-use instruction added to the dependence slice. + Slice.push_back(II); + + // Explore all the operands of the current instruction to expand the slice. + for (unsigned k = 0; k < II->getNumOperands(); ++k) + if (auto *OpI = dyn_cast(II->getOperand(k))) + Worklist.push(OpI); + } +} + +bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Max = std::max(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + if (Sum != 0) { + auto Probability = BranchProbability::getBranchProbability(Max, Sum); + if (Probability > TTI->getPredictableBranchThreshold()) + return true; + } + } + return false; +} + bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); if (VectorCond) diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll index 300fb4de312db0..99a194f033bb54 100644 --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -1,47 +1,133 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s -; Single select converted to branch -define i32 @single_select(i32 %a, i32 %b, i1 %cmp) { -; CHECK-LABEL: @single_select( +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Test base heuristic 1: +;; highly-biased selects assumed to be highly predictable, converted to branches +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; If a select is obviously predictable, turn it into a branch. +define i32 @weighted_select1(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @weighted_select1( ; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16:![0-9]+]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: ; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] ; CHECK-NEXT: ret i32 [[SEL]] ; - %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0 + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 + ret i32 %sel +} + +; If a select is obviously predictable (reversed profile weights), +; turn it into a branch. +define i32 @weighted_select2(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @weighted_select2( +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF17:![0-9]+]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16 + ret i32 %sel +} + +; Not obvioulsy predictable select. +define i32 @weighted_select3(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @weighted_select3( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF18:![0-9]+]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !17 + ret i32 %sel +} + +; Unpredictable select should not form a branch. +define i32 @unpred_select(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @unpred_select( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !unpredictable !19 +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !unpredictable !20 + ret i32 %sel +} + +; Predictable select in function with optsize attribute should not form branch. +define i32 @weighted_select_optsize(i32 %a, i32 %b, i1 %cmp) optsize { +; CHECK-LABEL: @weighted_select_optsize( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 ret i32 %sel } -; Select group converted to branch -define i32 @select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { -; CHECK-LABEL: @select_group( +define i32 @weighted_select_pgso(i32 %a, i32 %b, i1 %cmp) !prof !14 { +; CHECK-LABEL: @weighted_select_pgso( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 + ret i32 %sel +} + +; If two selects in a row are predictable, turn them into branches. +define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { +; CHECK-LABEL: @weighted_selects( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[SEL]], 0 +; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP1]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]], !prof [[PROF16]] +; CHECK: select.false2: +; CHECK-NEXT: br label [[SELECT_END1]] +; CHECK: select.end1: +; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[B]], [[SELECT_END]] ], [ [[A]], [[SELECT_FALSE2]] ] +; CHECK-NEXT: ret i32 [[SEL1]] +; + %cmp = icmp ne i32 %a, 0 + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 + %cmp1 = icmp ne i32 %sel, 0 + %sel1 = select i1 %cmp1, i32 %b, i32 %a, !prof !15 + ret i32 %sel1 +} + +; If select group predictable, turn it into a branch. +define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { +; CHECK-LABEL: @weighted_select_group( ; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: ; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] ; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[A]], [[SELECT_FALSE]] ] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[ADD]] ; - %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0 - call void @llvm.dbg.value(metadata i32 %sel1, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !3) - %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !0 + %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !15 + call void @llvm.dbg.value(metadata i32 %sel1, metadata !24, metadata !DIExpression()), !dbg !DILocation(scope: !23) + %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !15 %add = add i32 %sel1, %sel2 ret i32 %add } -; Select group with intra-group dependence converted to branch +; Predictable select group with intra-group dependence converted to branch define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { ; CHECK-LABEL: @select_group_intra_group( ; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: @@ -50,22 +136,110 @@ define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { ; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[SUB]] ; - %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0 - %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !0 + %sel1 = select i1 %cmp, i32 %a, i32 %b,!prof !15 + %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !15 %sub = sub i32 %sel1, %sel2 ret i32 %sub } +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Test base heuristic 2: +;; look for expensive instructions in the one-use slice of the cold path +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Select with cold one-use load value operand should form branch and +; sink load +define i32 @expensive_val_operand1(i32* nocapture %a, i32 %y, i1 %cmp) { +; CHECK-LABEL: @expensive_val_operand1( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: ret i32 [[SEL]] +; + %load = load i32, i32* %a, align 8 + %sel = select i1 %cmp, i32 %load, i32 %y, !prof !17 + ret i32 %sel +} + +; Expensive hot value operand and cheap cold value operand. +define i32 @expensive_val_operand2(i32* nocapture %a, i32 %x, i1 %cmp) { +; CHECK-LABEL: @expensive_val_operand2( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]], !prof [[PROF18]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %load = load i32, i32* %a, align 8 + %sel = select i1 %cmp, i32 %x, i32 %load, !prof !17 + ret i32 %sel +} + +; Cold value operand with load in its one-use dependence slice shoud result +; into a branch with sinked dependence slice. +define i32 @expensive_val_operand3(i32* nocapture %a, i32 %b, i32 %y, i1 %cmp) { +; CHECK-LABEL: @expensive_val_operand3( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 +; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: ret i32 [[SEL]] +; + %load = load i32, i32* %a, align 8 + %x = add i32 %load, %b + %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17 + ret i32 %sel +} + +; Multiple uses of the load value operand. +define i32 @expensive_val_operand4(i32 %a, i32* nocapture %b, i32 %x, i1 %cmp) { +; CHECK-LABEL: @expensive_val_operand4( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[B:%.*]], align 4 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL]], [[LOAD]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %load = load i32, i32* %b, align 4 + %sel = select i1 %cmp, i32 %x, i32 %load + %add = add i32 %sel, %load + ret i32 %add +} + ; Function Attrs: nounwind readnone speculatable willreturn declare void @llvm.dbg.value(metadata, metadata, metadata) -!llvm.module.flags = !{!6, !7} - -!0 = !{!"branch_weights", i32 1, i32 100} -!1 = !DIFile(filename: "test.c", directory: "/test") -!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !5, splitDebugInlining: false, nameTableKind: None) -!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !2) -!4 = !DILocalVariable(name: "x", scope: !3) -!5 = !{} -!6 = !{i32 2, !"Dwarf Version", i32 4} -!7 = !{i32 1, !"Debug Info Version", i32 3} +!llvm.module.flags = !{!0, !26, !27} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} +!15 = !{!"branch_weights", i32 1, i32 100} +!16 = !{!"branch_weights", i32 100, i32 1} +!17 = !{!"branch_weights", i32 1, i32 99} +!18 = !{!"branch_weights", i32 50, i32 50} +!19 = !{!"function_entry_count", i64 100} +!20 = !{} +!21 = !DIFile(filename: "test.c", directory: "/test") +!22 = distinct !DICompileUnit(language: DW_LANG_C99, file: !21, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !25, splitDebugInlining: false, nameTableKind: None) +!23 = distinct !DISubprogram(name: "test", scope: !21, file: !21, line: 1, unit: !22) +!24 = !DILocalVariable(name: "x", scope: !23) +!25 = !{} +!26 = !{i32 2, !"Dwarf Version", i32 4} +!27 = !{i32 1, !"Debug Info Version", i32 3} From d7ebb7461151fdb8dcf00a9a01d484355a0c6074 Mon Sep 17 00:00:00 2001 From: Sotiris Apostolakis Date: Mon, 23 May 2022 22:05:41 -0400 Subject: [PATCH 299/908] [SelectOpti][4/5] Loop Heuristics This patch adds the loop-level heuristics for determining whether branches are more profitable than conditional moves. These heuristics apply to only inner-most loops. Depends on D120231 Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D120232 --- llvm/lib/CodeGen/SelectOptimize.cpp | 348 ++++++++++++++++++++++- llvm/test/CodeGen/X86/select-optimize.ll | 293 +++++++++++++++++++ 2 files changed, 639 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 5cf937335df969..2194da77e0b4c0 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -30,6 +31,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/ScaledNumber.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/SizeOpts.h" #include @@ -52,6 +54,8 @@ STATISTIC(NumSelectUnPred, "Number of select groups not converted due to unpredictability"); STATISTIC(NumSelectColdBB, "Number of select groups not converted due to cold basic block"); +STATISTIC(NumSelectConvertedLoop, + "Number of select groups converted due to loop-level analysis"); STATISTIC(NumSelectsConverted, "Number of selects converted"); static cl::opt ColdOperandThreshold( @@ -65,6 +69,31 @@ static cl::opt ColdOperandMaxCostMultiplier( "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden); +static cl::opt + GainGradientThreshold("select-opti-loop-gradient-gain-threshold", + cl::desc("Gradient gain threshold (%)."), + cl::init(25), cl::Hidden); + +static cl::opt + GainCycleThreshold("select-opti-loop-cycle-gain-threshold", + cl::desc("Minimum gain per loop (in cycles) threshold."), + cl::init(4), cl::Hidden); + +static cl::opt GainRelativeThreshold( + "select-opti-loop-relative-gain-threshold", + cl::desc( + "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), + cl::init(8), cl::Hidden); + +static cl::opt MispredictDefaultRate( + "mispredict-default-rate", cl::Hidden, cl::init(25), + cl::desc("Default mispredict rate (initialized to 25%).")); + +static cl::opt + DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, + cl::init(false), + cl::desc("Disable loop-level heuristics.")); + namespace { class SelectOptimize : public FunctionPass { @@ -78,6 +107,7 @@ class SelectOptimize : public FunctionPass { std::unique_ptr BPI; ProfileSummaryInfo *PSI; OptimizationRemarkEmitter *ORE; + TargetSchedModel TSchedModel; public: static char ID; @@ -103,6 +133,15 @@ class SelectOptimize : public FunctionPass { using SelectGroup = SmallVector; using SelectGroups = SmallVector; + using Scaled64 = ScaledNumber; + + struct CostInfo { + /// Predicated cost (with selects as conditional moves). + Scaled64 PredCost; + /// Non-predicated cost (with selects converted to branches). + Scaled64 NonPredCost; + }; + // Converts select instructions of a function to conditional jumps when deemed // profitable. Returns true if at least one select was converted. bool optimizeSelects(Function &F); @@ -122,9 +161,12 @@ class SelectOptimize : public FunctionPass { void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); // Determines for which select groups it is profitable converting to branches - // (base heuristics). + // (base and inner-most-loop heuristics). void findProfitableSIGroupsBase(SelectGroups &SIGroups, SelectGroups &ProfSIGroups); + void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups, + SelectGroups &ProfSIGroups); + // Determines if a select group should be converted to a branch (base // heuristics). bool isConvertToBranchProfitableBase(const SmallVector &ASI); @@ -143,6 +185,29 @@ class SelectOptimize : public FunctionPass { // Returns true if the condition of the select is highly predictable. bool isSelectHighlyPredictable(const SelectInst *SI); + // Loop-level checks to determine if a non-predicated version (with branches) + // of the given loop is more profitable than its predicated version. + bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]); + + // Computes instruction and loop-critical-path costs for both the predicated + // and non-predicated version of the given loop. + bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups, + DenseMap &InstCostMap, + CostInfo *LoopCost); + + // Returns a set of all the select instructions in the given select groups. + SmallPtrSet getSIset(const SelectGroups &SIGroups); + + // Returns the latency cost of a given instruction. + Optional computeInstCost(const Instruction *I); + + // Returns the misprediction cost of a given select when converted to branch. + Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost); + + // Returns the cost of a branch when the prediction is correct. + Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, + const SelectInst *SI); + // Returns true if the target architecture supports lowering a given select. bool isSelectKindSupported(SelectInst *SI); }; @@ -183,6 +248,7 @@ bool SelectOptimize::runOnFunction(Function &F) { BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); PSI = &getAnalysis().getPSI(); ORE = &getAnalysis().getORE(); + TSchedModel.init(TSI); // When optimizing for size, selects are preferable over branches. if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) @@ -224,7 +290,24 @@ void SelectOptimize::optimizeSelectsBase(Function &F, } void SelectOptimize::optimizeSelectsInnerLoops(Function &F, - SelectGroups &ProfSIGroups) {} + SelectGroups &ProfSIGroups) { + SmallVector Loops(LI->begin(), LI->end()); + // Need to check size on each iteration as we accumulate child loops. + for (unsigned long i = 0; i < Loops.size(); ++i) + for (Loop *ChildL : Loops[i]->getSubLoops()) + Loops.push_back(ChildL); + + for (Loop *L : Loops) { + if (!L->isInnermost()) + continue; + + SelectGroups SIGroups; + for (BasicBlock *BB : L->getBlocks()) + collectSelectGroups(*BB, SIGroups); + + findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups); + } +} /// If \p isTrue is true, return the true value of \p SI, otherwise return /// false value of \p SI. If the true/false value of \p SI is defined by any @@ -376,6 +459,53 @@ void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, } } +void SelectOptimize::findProfitableSIGroupsInnerLoops( + const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { + NumSelectOptAnalyzed += SIGroups.size(); + // For each select group in an inner-most loop, + // a branch is more preferable than a select/conditional-move if: + // i) conversion to branches for all the select groups of the loop satisfies + // loop-level heuristics including reducing the loop's critical path by + // some threshold (see SelectOptimize::checkLoopHeuristics); and + // ii) the total cost of the select group is cheaper with a branch compared + // to its predicated version. The cost is in terms of latency and the cost + // of a select group is the cost of its most expensive select instruction + // (assuming infinite resources and thus fully leveraging available ILP). + + DenseMap InstCostMap; + CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()}, + {Scaled64::getZero(), Scaled64::getZero()}}; + if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) || + !checkLoopHeuristics(L, LoopCost)) { + return; + } + + for (SelectGroup &ASI : SIGroups) { + // Assuming infinite resources, the cost of a group of instructions is the + // cost of the most expensive instruction of the group. + Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero(); + for (SelectInst *SI : ASI) { + SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost); + BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost); + } + if (BranchCost < SelectCost) { + OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front()); + OR << "Profitable to convert to branch (loop analysis). BranchCost=" + << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() + << ". "; + ORE->emit(OR); + ++NumSelectConvertedLoop; + ProfSIGroups.push_back(ASI); + } else { + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); + ORmiss << "Select is more profitable (loop analysis). BranchCost=" + << BranchCost.toString() + << ", SelectCost=" << SelectCost.toString() << ". "; + ORE->emit(ORmiss); + } + } +} + bool SelectOptimize::isConvertToBranchProfitableBase( const SmallVector &ASI) { SelectInst *SI = ASI.front(); @@ -529,6 +659,220 @@ bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { return false; } +bool SelectOptimize::checkLoopHeuristics(const Loop *L, + const CostInfo LoopCost[2]) { + // Loop-level checks to determine if a non-predicated version (with branches) + // of the loop is more profitable than its predicated version. + + if (DisableLoopLevelHeuristics) + return true; + + OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", + L->getHeader()->getFirstNonPHI()); + + if (LoopCost[0].NonPredCost > LoopCost[0].PredCost || + LoopCost[1].NonPredCost >= LoopCost[1].PredCost) { + ORmissL << "No select conversion in the loop due to no reduction of loop's " + "critical path. "; + ORE->emit(ORmissL); + return false; + } + + Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost, + LoopCost[1].PredCost - LoopCost[1].NonPredCost}; + + // Profitably converting to branches need to reduce the loop's critical path + // by at least some threshold (absolute gain of GainCycleThreshold cycles and + // relative gain of 12.5%). + if (Gain[1] < Scaled64::get(GainCycleThreshold) || + Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) { + Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost; + ORmissL << "No select conversion in the loop due to small reduction of " + "loop's critical path. Gain=" + << Gain[1].toString() + << ", RelativeGain=" << RelativeGain.toString() << "%. "; + ORE->emit(ORmissL); + return false; + } + + // If the loop's critical path involves loop-carried dependences, the gradient + // of the gain needs to be at least GainGradientThreshold% (defaults to 25%). + // This check ensures that the latency reduction for the loop's critical path + // keeps decreasing with sufficient rate beyond the two analyzed loop + // iterations. + if (Gain[1] > Gain[0]) { + Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) / + (LoopCost[1].PredCost - LoopCost[0].PredCost); + if (GradientGain < Scaled64::get(GainGradientThreshold)) { + ORmissL << "No select conversion in the loop due to small gradient gain. " + "GradientGain=" + << GradientGain.toString() << "%. "; + ORE->emit(ORmissL); + return false; + } + } + // If the gain decreases it is not profitable to convert. + else if (Gain[1] < Gain[0]) { + ORmissL + << "No select conversion in the loop due to negative gradient gain. "; + ORE->emit(ORmissL); + return false; + } + + // Non-predicated version of the loop is more profitable than its + // predicated version. + return true; +} + +// Computes instruction and loop-critical-path costs for both the predicated +// and non-predicated version of the given loop. +// Returns false if unable to compute these costs due to invalid cost of loop +// instruction(s). +bool SelectOptimize::computeLoopCosts( + const Loop *L, const SelectGroups &SIGroups, + DenseMap &InstCostMap, CostInfo *LoopCost) { + const auto &SIset = getSIset(SIGroups); + // Compute instruction and loop-critical-path costs across two iterations for + // both predicated and non-predicated version. + const unsigned Iterations = 2; + for (unsigned Iter = 0; Iter < Iterations; ++Iter) { + // Cost of the loop's critical path. + CostInfo &MaxCost = LoopCost[Iter]; + for (BasicBlock *BB : L->getBlocks()) { + for (const Instruction &I : *BB) { + if (I.isDebugOrPseudoInst()) + continue; + // Compute the predicated and non-predicated cost of the instruction. + Scaled64 IPredCost = Scaled64::getZero(), + INonPredCost = Scaled64::getZero(); + + // Assume infinite resources that allow to fully exploit the available + // instruction-level parallelism. + // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost) + for (const Use &U : I.operands()) { + auto UI = dyn_cast(U.get()); + if (!UI) + continue; + if (InstCostMap.count(UI)) { + IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost); + INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost); + } + } + auto ILatency = computeInstCost(&I); + if (!ILatency.hasValue()) { + OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I); + ORmissL << "Invalid instruction cost preventing analysis and " + "optimization of the inner-most loop containing this " + "instruction. "; + ORE->emit(ORmissL); + return false; + } + IPredCost += Scaled64::get(ILatency.getValue()); + INonPredCost += Scaled64::get(ILatency.getValue()); + + // For a select that can be converted to branch, + // compute its cost as a branch (non-predicated cost). + // + // BranchCost = PredictedPathCost + MispredictCost + // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb + // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate + if (SIset.contains(&I)) { + auto SI = dyn_cast(&I); + + Scaled64 TrueOpCost = Scaled64::getZero(), + FalseOpCost = Scaled64::getZero(); + if (auto *TI = dyn_cast(SI->getTrueValue())) + if (InstCostMap.count(TI)) + TrueOpCost = InstCostMap[TI].NonPredCost; + if (auto *FI = dyn_cast(SI->getFalseValue())) + if (InstCostMap.count(FI)) + FalseOpCost = InstCostMap[FI].NonPredCost; + Scaled64 PredictedPathCost = + getPredictedPathCost(TrueOpCost, FalseOpCost, SI); + + Scaled64 CondCost = Scaled64::getZero(); + if (auto *CI = dyn_cast(SI->getCondition())) + if (InstCostMap.count(CI)) + CondCost = InstCostMap[CI].NonPredCost; + Scaled64 MispredictCost = getMispredictionCost(SI, CondCost); + + INonPredCost = PredictedPathCost + MispredictCost; + } + + InstCostMap[&I] = {IPredCost, INonPredCost}; + MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost); + MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost); + } + } + } + return true; +} + +SmallPtrSet +SelectOptimize::getSIset(const SelectGroups &SIGroups) { + SmallPtrSet SIset; + for (const SelectGroup &ASI : SIGroups) + for (const SelectInst *SI : ASI) + SIset.insert(SI); + return SIset; +} + +Optional SelectOptimize::computeInstCost(const Instruction *I) { + InstructionCost ICost = + TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); + if (auto OC = ICost.getValue()) + return Optional(OC.getValue()); + return Optional(None); +} + +ScaledNumber +SelectOptimize::getMispredictionCost(const SelectInst *SI, + const Scaled64 CondCost) { + uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; + + // Account for the default misprediction rate when using a branch + // (conservatively set to 25% by default). + uint64_t MispredictRate = MispredictDefaultRate; + // If the select condition is obviously predictable, then the misprediction + // rate is zero. + if (isSelectHighlyPredictable(SI)) + MispredictRate = 0; + + // CondCost is included to account for cases where the computation of the + // condition is part of a long dependence chain (potentially loop-carried) + // that would delay detection of a misprediction and increase its cost. + Scaled64 MispredictCost = + std::max(Scaled64::get(MispredictPenalty), CondCost) * + Scaled64::get(MispredictRate); + MispredictCost /= Scaled64::get(100); + + return MispredictCost; +} + +// Returns the cost of a branch when the prediction is correct. +// TrueCost * TrueProbability + FalseCost * FalseProbability. +ScaledNumber +SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, + const SelectInst *SI) { + Scaled64 PredPathCost; + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t SumWeight = TrueWeight + FalseWeight; + if (SumWeight != 0) { + PredPathCost = TrueCost * Scaled64::get(TrueWeight) + + FalseCost * Scaled64::get(FalseWeight); + PredPathCost /= Scaled64::get(SumWeight); + return PredPathCost; + } + } + // Without branch weight metadata, we assume 75% for the one path and 25% for + // the other, and pick the result with the biggest cost. + PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost, + FalseCost * Scaled64::get(3) + TrueCost); + PredPathCost /= Scaled64::get(4); + return PredPathCost; +} + bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); if (VectorCond) diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll index 99a194f033bb54..ee267c80e7a5ec 100644 --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -211,9 +211,301 @@ define i32 @expensive_val_operand4(i32 %a, i32* nocapture %b, i32 %x, i1 %cmp) { ret i32 %add } +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Test loop heuristic: loop-level critical-path analysis +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Use of cmov in this test would put a load and a fsub on the critical path. +;; Loop-level analysis should decide to form a branch. +;; +;;double cmov_on_critical_path(int n, double x, double *a) { +;; for (int i = 0; i < n; i++) { +;; double r = a[i]; +;; if (x > r) +;; // 50% of iterations +;; x -= r; +;; } +;; return x; +;;} +define double @cmov_on_critical_path(i32 %n, double %x, double* nocapture %a) { +; CHECK-LABEL: @cmov_on_critical_path( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret double [[X:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[R:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]] +; CHECK-NEXT: [[X2_FROZEN:%.*]] = freeze i1 [[CMP2]] +; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]], !prof [[PROF27:![0-9]+]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[FOR_BODY]] ], [ [[X1]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] +; CHECK: for.exit: +; CHECK-NEXT: ret double [[X2]] +; +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %entry + ret double %x + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ] + %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv + %r = load double, double* %arrayidx, align 8 + %sub = fsub double %x1, %r + %cmp2 = fcmp ogt double %x1, %r + %x2 = select i1 %cmp2, double %sub, double %x1, !prof !18 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.exit, label %for.body + +for.exit: ; preds = %for.body + ret double %x2 +} + +;; The common path includes expensive operations (load and fsub) making +;; branch similarly expensive to cmov, and thus the gain is small. +;; Loop-level analysis should decide on not forming a branch. +;; +;;double small_gain(int n, double x, double *a) { +;; for (int i = 0; i < n; i++) { +;; double r = a[i]; +;; if (x > r) +;; // 99% of iterations +;; x -= r; +;; } +;; return x; +;;} +define double @small_gain(i32 %n, double %x, double* nocapture %a) { +; CHECK-LABEL: @small_gain( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret double [[X:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[FOR_BODY]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[R:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ole double [[X1]], [[R]] +; CHECK-NEXT: [[X2]] = select i1 [[CMP2]], double [[X1]], double [[SUB]], !prof [[PROF18]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] +; CHECK: for.exit: +; CHECK-NEXT: ret double [[X2]] +; +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %entry + ret double %x + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ] + %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv + %r = load double, double* %arrayidx, align 8 + %sub = fsub double %x1, %r + %cmp2 = fcmp ole double %x1, %r + %x2 = select i1 %cmp2, double %x1, double %sub, !prof !17 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.exit, label %for.body + +for.exit: ; preds = %for.body + ret double %x2 +} + +;; Use of a branch in this test would avoid executing a load and several +;; floating-point operations for most cases (70% of the time). +;; Yet, the gain is not increasing much per iteration (small gradient gain). +;; Loop-level analysis should decide not to form a branch. +;; +;;double small_gradient(int n, double x, double *a) { +;; for (int i = 0; i < n; i++) { +;; double r = 2 * a[i] + i; +;; if (r > 0) +;; // 30% of iterations +;; x -= r; +;; } +;; return x; +;;} +define double @small_gradient(i32 %n, double %x, double* nocapture %a) { +; CHECK-LABEL: @small_gradient( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[X_ADDR_0_LCSSA:%.*]] = phi double [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_ADDR_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: ret double [[X_ADDR_0_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X_ADDR_010:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fmuladd.f64(double [[TMP0]], double 2.000000e+00, double 1.000000e+00) +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[TMP1]], 0.000000e+00 +; CHECK-NEXT: [[SUB:%.*]] = select i1 [[CMP1]], double [[TMP1]], double 0.000000e+00, !prof [[PROF28:![0-9]+]] +; CHECK-NEXT: [[X_ADDR_1]] = fsub double [[X_ADDR_010]], [[SUB]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; +entry: + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %x.addr.0.lcssa = phi double [ %x, %entry ], [ %x.addr.1, %for.body ] + ret double %x.addr.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %x.addr.010 = phi double [ %x, %for.body.preheader ], [ %x.addr.1, %for.body ] + %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv + %0 = load double, double* %arrayidx, align 8 + %1 = call double @llvm.fmuladd.f64(double %0, double 2.000000e+00, double 1.000000e+00) + %cmp1 = fcmp ogt double %1, 0.000000e+00 + %sub = select i1 %cmp1, double %1, double 0.000000e+00, !prof !28 + %x.addr.1 = fsub double %x.addr.010, %sub + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +;; One select on the critical path and one off the critical path. +;; Loop-level analysis should decide to form a branch only for +;; the select on the critical path. +;; +;;double loop_select_groups(int n, double x, double *a, int k) { +;; int c = 0; +;; for (int i = 0; i < n; i++) { +;; double r = a[i]; +;; if (x > r) +;; x -= r; +;; if (i == k) +;; c += n; +;; } +;; return x + c; +;;} +define double @loop_select_groups(i32 %n, double %x, double* nocapture %a, i32 %k) { +; CHECK-LABEL: @loop_select_groups( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: [[PHI_CAST:%.*]] = sitofp i32 [[C_1:%.*]] to double +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[C_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[PHI_CAST]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[X_ADDR_0_LCSSA:%.*]] = phi double [ [[X:%.*]], [[ENTRY]] ], [ [[X_ADDR_1:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[X_ADDR_0_LCSSA]], [[C_0_LCSSA]] +; CHECK-NEXT: ret double [[ADD5]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ] +; CHECK-NEXT: [[X_ADDR_022:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[SELECT_END]] ] +; CHECK-NEXT: [[C_020:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[C_1]], [[SELECT_END]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[X_ADDR_022]], [[TMP0]] +; CHECK-NEXT: [[SUB_FROZEN:%.*]] = freeze i1 [[CMP1]] +; CHECK-NEXT: br i1 [[SUB_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SUB:%.*]] = phi double [ [[TMP0]], [[FOR_BODY]] ], [ 0.000000e+00, [[SELECT_FALSE]] ] +; CHECK-NEXT: [[X_ADDR_1]] = fsub double [[X_ADDR_022]], [[SUB]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[K:%.*]], [[N]] +; CHECK-NEXT: [[ADD:%.*]] = select i1 [[CMP2]], i32 [[N]], i32 0 +; CHECK-NEXT: [[C_1]] = add nsw i32 [[ADD]], [[C_020]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] +; +entry: + %cmp19 = icmp sgt i32 %n, 0 + br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + %phi.cast = sitofp i32 %c.1 to double + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %c.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %phi.cast, %for.cond.cleanup.loopexit ] + %x.addr.0.lcssa = phi double [ %x, %entry ], [ %x.addr.1, %for.cond.cleanup.loopexit ] + %add5 = fadd double %x.addr.0.lcssa, %c.0.lcssa + ret double %add5 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %x.addr.022 = phi double [ %x, %for.body.preheader ], [ %x.addr.1, %for.body ] + %c.020 = phi i32 [ 0, %for.body.preheader ], [ %c.1, %for.body ] + %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv + %0 = load double, double* %arrayidx, align 8 + %cmp1 = fcmp ogt double %x.addr.022, %0 + %sub = select i1 %cmp1, double %0, double 0.000000e+00 + %x.addr.1 = fsub double %x.addr.022, %sub + %1 = trunc i64 %indvars.iv to i32 + %cmp2 = icmp eq i32 %k, %n + %add = select i1 %cmp2, i32 %n, i32 0 + %c.1 = add nsw i32 %add, %c.020 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +} + ; Function Attrs: nounwind readnone speculatable willreturn declare void @llvm.dbg.value(metadata, metadata, metadata) +; Function Attrs: mustprogress nofree nosync nounwind readnone speculatable willreturn +declare double @llvm.fmuladd.f64(double, double, double) + !llvm.module.flags = !{!0, !26, !27} !0 = !{i32 1, !"ProfileSummary", !1} !1 = !{!2, !3, !4, !5, !6, !7, !8, !9} @@ -243,3 +535,4 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !25 = !{} !26 = !{i32 2, !"Dwarf Version", i32 4} !27 = !{i32 1, !"Debug Info Version", i32 3} +!28 = !{!"branch_weights", i32 30, i32 70} From ae9489025f1a8021e57a61e79666558ca17afd68 Mon Sep 17 00:00:00 2001 From: Wolfgang Pieb Date: Mon, 23 May 2022 17:08:01 -0700 Subject: [PATCH 300/908] [NFC][Metadata] Define move constructor and move assignment operator for MDOperand. This is a preparatory patch for the MDNode resize functionality. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D125994 --- llvm/include/llvm/IR/Metadata.h | 15 +++++++++++++-- llvm/unittests/IR/MetadataTest.cpp | 29 +++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index af3e5e967db665..0810a814da819d 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -775,10 +775,21 @@ class MDOperand { public: MDOperand() = default; - MDOperand(MDOperand &&) = delete; MDOperand(const MDOperand &) = delete; - MDOperand &operator=(MDOperand &&) = delete; + MDOperand(MDOperand &&Op) { + MD = Op.MD; + if (MD) + (void)MetadataTracking::retrack(Op.MD, MD); + Op.MD = nullptr; + } MDOperand &operator=(const MDOperand &) = delete; + MDOperand &operator=(MDOperand &&Op) { + MD = Op.MD; + if (MD) + (void)MetadataTracking::retrack(Op.MD, MD); + Op.MD = nullptr; + return *this; + } ~MDOperand() { untrack(); } Metadata *get() const { return MD; } diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp index 4297e3283338e6..41ea3e9c2f5ca4 100644 --- a/llvm/unittests/IR/MetadataTest.cpp +++ b/llvm/unittests/IR/MetadataTest.cpp @@ -3602,4 +3602,33 @@ TEST_F(DebugVariableTest, DenseMap) { EXPECT_EQ(DebugVariableMap.find(DebugVariableFragB)->second, 12u); } +typedef MetadataTest MDTupleAllocationTest; +TEST_F(MDTupleAllocationTest, Tracking) { + // Make sure that the move constructor and move assignment op + // for MDOperand correctly adjust tracking information. + auto *Value1 = getConstantAsMetadata(); + MDTuple *A = MDTuple::getDistinct(Context, {Value1, Value1}); + EXPECT_EQ(A->getOperand(0), Value1); + EXPECT_EQ(A->getOperand(1), Value1); + + MDNode::op_range Ops = A->operands(); + + MDOperand NewOps1; + // Move assignment operator. + NewOps1 = std::move(*const_cast(Ops.begin())); + // Move constructor. + MDOperand NewOps2(std::move(*const_cast(Ops.begin() + 1))); + + EXPECT_EQ(NewOps1.get(), static_cast(Value1)); + EXPECT_EQ(NewOps2.get(), static_cast(Value1)); + + auto *Value2 = getConstantAsMetadata(); + Value *V1 = Value1->getValue(); + Value *V2 = Value2->getValue(); + ValueAsMetadata::handleRAUW(V1, V2); + + EXPECT_EQ(NewOps1.get(), static_cast(Value2)); + EXPECT_EQ(NewOps2.get(), static_cast(Value2)); +} + } // end namespace From 602682225ad6c9135e84bbca3b91d5738712c64f Mon Sep 17 00:00:00 2001 From: usama hameed Date: Thu, 19 May 2022 16:51:34 -0700 Subject: [PATCH 301/908] bugfix in InfiniteLoopCheck to not print warnings for unevaluated loops Differential Revision: https://reviews.llvm.org/D126034 --- .../clang-tidy/bugprone/InfiniteLoopCheck.cpp | 19 ++++++++++ .../checkers/bugprone-infinite-loop.cpp | 35 +++++++++++++++++++ .../Analysis/Analyses/ExprMutationAnalyzer.h | 2 ++ clang/lib/Analysis/ExprMutationAnalyzer.cpp | 7 +++- 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp index 3359a7f8932ede..cb9bd7bb43f505 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp @@ -81,6 +81,22 @@ static bool isVarThatIsPossiblyChanged(const Decl *Func, const Stmt *LoopStmt, return false; } +bool isUnevaluated(const Decl *Func, const Stmt *LoopStmt, const Stmt *Cond, + ASTContext *Context) { + if (const auto *Exp = dyn_cast(Cond)) { + if (const auto *ForLoop = dyn_cast(LoopStmt)) { + return (ForLoop->getInc() && ExprMutationAnalyzer::isUnevaluated( + Exp, *ForLoop->getInc(), *Context)) || + (ForLoop->getBody() && ExprMutationAnalyzer::isUnevaluated( + Exp, *ForLoop->getBody(), *Context)) || + (ForLoop->getCond() && ExprMutationAnalyzer::isUnevaluated( + Exp, *ForLoop->getCond(), *Context)); + } + return ExprMutationAnalyzer::isUnevaluated(Exp, *LoopStmt, *Context); + } + return true; +} + /// Return whether at least one variable of `Cond` changed in `LoopStmt`. static bool isAtLeastOneCondVarChanged(const Decl *Func, const Stmt *LoopStmt, const Stmt *Cond, ASTContext *Context) { @@ -177,6 +193,9 @@ void InfiniteLoopCheck::check(const MatchFinder::MatchResult &Result) { } } + if (isUnevaluated(Func, LoopStmt, Cond, Result.Context)) + return; + if (isAtLeastOneCondVarChanged(Func, LoopStmt, Cond, Result.Context)) return; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp index 0074266ce4b87b..a22f1bf06a5346 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp @@ -650,3 +650,38 @@ void test_dependent_condition() { do { } while (1, (false) && val4 == 1); } + +void test_typeof() { + __typeof__({ + for (int i = 0; i < 10; ++i) { + } + 0; + }) x; +} + +void test_typeof_infinite() { + __typeof__({ + for (int i = 0; i < 10;) { + } + 0; + }) x; +} + +void test_typeof_while_infinite() { + __typeof__({ + int i = 0; + while (i < 10) { + } + 0; + }) x; +} + +void test_typeof_dowhile_infinite() { + __typeof__({ + int i = 0; + do { + + } while (i < 10); + 0; + }) x; +} diff --git a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h index 9397c5df78abef..1b2b7ffcfb67a5 100644 --- a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h +++ b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h @@ -38,6 +38,8 @@ class ExprMutationAnalyzer { } const Stmt *findPointeeMutation(const Expr *Exp); const Stmt *findPointeeMutation(const Decl *Dec); + static bool isUnevaluated(const Expr *Exp, const Stmt &Stm, + ASTContext &Context); private: using MutationFinder = const Stmt *(ExprMutationAnalyzer::*)(const Expr *); diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index e9ff5e5e876589..4d7decc1137c2b 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -194,7 +194,8 @@ const Stmt *ExprMutationAnalyzer::tryEachDeclRef(const Decl *Dec, return nullptr; } -bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp) { +bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp, const Stmt &Stm, + ASTContext &Context) { return selectFirst( NodeID::value, match( @@ -225,6 +226,10 @@ bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp) { Stm, Context)) != nullptr; } +bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp) { + return isUnevaluated(Exp, Stm, Context); +} + const Stmt * ExprMutationAnalyzer::findExprMutation(ArrayRef Matches) { return tryEachMatch(Matches, this, &ExprMutationAnalyzer::findMutation); From 63ecb7dcc80d17770461c8bf01bddeb2b795625b Mon Sep 17 00:00:00 2001 From: usama hameed Date: Mon, 23 May 2022 14:52:14 -0700 Subject: [PATCH 302/908] bugfix in InfiniteLoopCheck to not print warnings for unevaluated loops Added a separate check for unevaluated statements. Updated InfiniteLoopCheck to use new check Differential Revision: https://reviews.llvm.org/D126246 --- .../clang-tidy/bugprone/InfiniteLoopCheck.cpp | 18 +----- .../Analysis/Analyses/ExprMutationAnalyzer.h | 2 + clang/lib/Analysis/ExprMutationAnalyzer.cpp | 62 +++++++++++-------- 3 files changed, 38 insertions(+), 44 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp index cb9bd7bb43f505..8815de220882ed 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp @@ -81,22 +81,6 @@ static bool isVarThatIsPossiblyChanged(const Decl *Func, const Stmt *LoopStmt, return false; } -bool isUnevaluated(const Decl *Func, const Stmt *LoopStmt, const Stmt *Cond, - ASTContext *Context) { - if (const auto *Exp = dyn_cast(Cond)) { - if (const auto *ForLoop = dyn_cast(LoopStmt)) { - return (ForLoop->getInc() && ExprMutationAnalyzer::isUnevaluated( - Exp, *ForLoop->getInc(), *Context)) || - (ForLoop->getBody() && ExprMutationAnalyzer::isUnevaluated( - Exp, *ForLoop->getBody(), *Context)) || - (ForLoop->getCond() && ExprMutationAnalyzer::isUnevaluated( - Exp, *ForLoop->getCond(), *Context)); - } - return ExprMutationAnalyzer::isUnevaluated(Exp, *LoopStmt, *Context); - } - return true; -} - /// Return whether at least one variable of `Cond` changed in `LoopStmt`. static bool isAtLeastOneCondVarChanged(const Decl *Func, const Stmt *LoopStmt, const Stmt *Cond, ASTContext *Context) { @@ -193,7 +177,7 @@ void InfiniteLoopCheck::check(const MatchFinder::MatchResult &Result) { } } - if (isUnevaluated(Func, LoopStmt, Cond, Result.Context)) + if (ExprMutationAnalyzer::isUnevaluated(LoopStmt, *LoopStmt, *Result.Context)) return; if (isAtLeastOneCondVarChanged(Func, LoopStmt, Cond, Result.Context)) diff --git a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h index 1b2b7ffcfb67a5..a48b9735dfee5a 100644 --- a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h +++ b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h @@ -38,6 +38,8 @@ class ExprMutationAnalyzer { } const Stmt *findPointeeMutation(const Expr *Exp); const Stmt *findPointeeMutation(const Decl *Dec); + static bool isUnevaluated(const Stmt *Smt, const Stmt &Stm, + ASTContext &Context); static bool isUnevaluated(const Expr *Exp, const Stmt &Stm, ASTContext &Context); diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index 4d7decc1137c2b..135327a3cfe54e 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -194,36 +194,44 @@ const Stmt *ExprMutationAnalyzer::tryEachDeclRef(const Decl *Dec, return nullptr; } +auto isUnevaluatedMatcher(const Stmt *Exp) { + return anyOf( + // `Exp` is part of the underlying expression of + // decltype/typeof if it has an ancestor of + // typeLoc. + hasAncestor(typeLoc(unless(hasAncestor(unaryExprOrTypeTraitExpr())))), + hasAncestor(expr(anyOf( + // `UnaryExprOrTypeTraitExpr` is unevaluated + // unless it's sizeof on VLA. + unaryExprOrTypeTraitExpr( + unless(sizeOfExpr(hasArgumentOfType(variableArrayType())))), + // `CXXTypeidExpr` is unevaluated unless it's + // applied to an expression of glvalue of + // polymorphic class type. + cxxTypeidExpr(unless(isPotentiallyEvaluated())), + // The controlling expression of + // `GenericSelectionExpr` is unevaluated. + genericSelectionExpr( + hasControllingExpr(hasDescendant(equalsNode(Exp)))), + cxxNoexceptExpr())))); +} + bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp, const Stmt &Stm, ASTContext &Context) { - return selectFirst( + return selectFirst(NodeID::value, + match(findAll(expr(canResolveToExpr(equalsNode(Exp)), + isUnevaluatedMatcher(Exp)) + .bind(NodeID::value)), + Stm, Context)) != nullptr; +} + +bool ExprMutationAnalyzer::isUnevaluated(const Stmt *Exp, const Stmt &Stm, + ASTContext &Context) { + return selectFirst( NodeID::value, - match( - findAll( - expr(canResolveToExpr(equalsNode(Exp)), - anyOf( - // `Exp` is part of the underlying expression of - // decltype/typeof if it has an ancestor of - // typeLoc. - hasAncestor(typeLoc(unless( - hasAncestor(unaryExprOrTypeTraitExpr())))), - hasAncestor(expr(anyOf( - // `UnaryExprOrTypeTraitExpr` is unevaluated - // unless it's sizeof on VLA. - unaryExprOrTypeTraitExpr(unless(sizeOfExpr( - hasArgumentOfType(variableArrayType())))), - // `CXXTypeidExpr` is unevaluated unless it's - // applied to an expression of glvalue of - // polymorphic class type. - cxxTypeidExpr( - unless(isPotentiallyEvaluated())), - // The controlling expression of - // `GenericSelectionExpr` is unevaluated. - genericSelectionExpr(hasControllingExpr( - hasDescendant(equalsNode(Exp)))), - cxxNoexceptExpr()))))) - .bind(NodeID::value)), - Stm, Context)) != nullptr; + match(findAll(stmt(equalsNode(Exp), isUnevaluatedMatcher(Exp)) + .bind(NodeID::value)), + Stm, Context)) != nullptr; } bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp) { From ca81abcfd752e65c533825a5fadac19ce5a33578 Mon Sep 17 00:00:00 2001 From: usama hameed Date: Mon, 23 May 2022 16:05:15 -0700 Subject: [PATCH 303/908] updated canResolveToExpr to accept both statements and expressions. Removed unnecessary code --- .../Analysis/Analyses/ExprMutationAnalyzer.h | 2 - clang/lib/Analysis/ExprMutationAnalyzer.cpp | 69 +++++++++---------- 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h index a48b9735dfee5a..1ceef944fbc34e 100644 --- a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h +++ b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h @@ -40,8 +40,6 @@ class ExprMutationAnalyzer { const Stmt *findPointeeMutation(const Decl *Dec); static bool isUnevaluated(const Stmt *Smt, const Stmt &Stm, ASTContext &Context); - static bool isUnevaluated(const Expr *Exp, const Stmt &Stm, - ASTContext &Context); private: using MutationFinder = const Stmt *(ExprMutationAnalyzer::*)(const Expr *); diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index 135327a3cfe54e..e3bb902b1fe979 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -39,8 +39,13 @@ AST_MATCHER_P(Expr, maybeEvalCommaExpr, ast_matchers::internal::Matcher, return InnerMatcher.matches(*Result, Finder, Builder); } -AST_MATCHER_P(Expr, canResolveToExpr, ast_matchers::internal::Matcher, +AST_MATCHER_P(Stmt, canResolveToExpr, ast_matchers::internal::Matcher, InnerMatcher) { + auto *Exp = dyn_cast(&Node); + if (!Exp) { + return stmt().matches(Node, Finder, Builder); + } + auto DerivedToBase = [](const ast_matchers::internal::Matcher &Inner) { return implicitCastExpr(anyOf(hasCastKind(CK_DerivedToBase), hasCastKind(CK_UncheckedDerivedToBase)), @@ -71,7 +76,7 @@ AST_MATCHER_P(Expr, canResolveToExpr, ast_matchers::internal::Matcher, IgnoreDerivedToBase(ConditionalOperator), IgnoreDerivedToBase(ElvisOperator)))); - return ComplexMatcher.matches(Node, Finder, Builder); + return ComplexMatcher.matches(*Exp, Finder, Builder); } // Similar to 'hasAnyArgument', but does not work because 'InitListExpr' does @@ -194,44 +199,36 @@ const Stmt *ExprMutationAnalyzer::tryEachDeclRef(const Decl *Dec, return nullptr; } -auto isUnevaluatedMatcher(const Stmt *Exp) { - return anyOf( - // `Exp` is part of the underlying expression of - // decltype/typeof if it has an ancestor of - // typeLoc. - hasAncestor(typeLoc(unless(hasAncestor(unaryExprOrTypeTraitExpr())))), - hasAncestor(expr(anyOf( - // `UnaryExprOrTypeTraitExpr` is unevaluated - // unless it's sizeof on VLA. - unaryExprOrTypeTraitExpr( - unless(sizeOfExpr(hasArgumentOfType(variableArrayType())))), - // `CXXTypeidExpr` is unevaluated unless it's - // applied to an expression of glvalue of - // polymorphic class type. - cxxTypeidExpr(unless(isPotentiallyEvaluated())), - // The controlling expression of - // `GenericSelectionExpr` is unevaluated. - genericSelectionExpr( - hasControllingExpr(hasDescendant(equalsNode(Exp)))), - cxxNoexceptExpr())))); -} - -bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp, const Stmt &Stm, - ASTContext &Context) { - return selectFirst(NodeID::value, - match(findAll(expr(canResolveToExpr(equalsNode(Exp)), - isUnevaluatedMatcher(Exp)) - .bind(NodeID::value)), - Stm, Context)) != nullptr; -} - bool ExprMutationAnalyzer::isUnevaluated(const Stmt *Exp, const Stmt &Stm, ASTContext &Context) { return selectFirst( NodeID::value, - match(findAll(stmt(equalsNode(Exp), isUnevaluatedMatcher(Exp)) - .bind(NodeID::value)), - Stm, Context)) != nullptr; + match( + findAll( + stmt(canResolveToExpr(equalsNode(Exp)), + anyOf( + // `Exp` is part of the underlying expression of + // decltype/typeof if it has an ancestor of + // typeLoc. + hasAncestor(typeLoc(unless( + hasAncestor(unaryExprOrTypeTraitExpr())))), + hasAncestor(expr(anyOf( + // `UnaryExprOrTypeTraitExpr` is unevaluated + // unless it's sizeof on VLA. + unaryExprOrTypeTraitExpr(unless(sizeOfExpr( + hasArgumentOfType(variableArrayType())))), + // `CXXTypeidExpr` is unevaluated unless it's + // applied to an expression of glvalue of + // polymorphic class type. + cxxTypeidExpr( + unless(isPotentiallyEvaluated())), + // The controlling expression of + // `GenericSelectionExpr` is unevaluated. + genericSelectionExpr(hasControllingExpr( + hasDescendant(equalsNode(Exp)))), + cxxNoexceptExpr()))))) + .bind(NodeID::value)), + Stm, Context)) != nullptr; } bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp) { From 6c12ae8163c77c508aa846958360aa96d1c2c00d Mon Sep 17 00:00:00 2001 From: Hyoun Kyu Cho Date: Tue, 24 May 2022 03:04:40 +0000 Subject: [PATCH 304/908] Exposes interface to free up caching data structure in DWARFDebugLine and DWARFUnit for memory management This is minimum changes extracted from https://reviews.llvm.org/D78950. The old patch tried to add LRU eviction of caching data structure. Due to multiple layers of interfaces that users could be using, it was not clear where to put the functionality. While we work out on where to put that functionality, it'll be great to add this minimum interface change so that the user could implement their own memory management. More specifically: * Add a clearLineTable method for DWARFDebugLine which erases the given offset from the LineTableMap. * DWARFDebugContext adds the clearLineTableForUnit method that leverages clearLineTable to remove the object corresponding to a given compile unit, for memory management purposes. When it is referred to again, the line table object will be repopulated. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D90006 --- .../llvm/DebugInfo/DWARF/DWARFContext.h | 4 + .../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 1 + llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 16 ++++ llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 4 + llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 3 + .../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 76 +++++++++++++++++++ 6 files changed, 104 insertions(+) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index 52ae538c55bb30..bf591ed554c6a6 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -333,6 +333,10 @@ class DWARFContext : public DIContext { getLineTableForUnit(DWARFUnit *U, function_ref RecoverableErrorHandler); + // Clear the line table object corresponding to a compile unit for memory + // management purpose. When it's referred to again, it'll be re-populated. + void clearLineTableForUnit(DWARFUnit *U); + DataExtractor getStringExtractor() const { return DataExtractor(DObj->getStrSection(), false, 0); } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index f3be160f7398a0..86f90135f8d4e2 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -304,6 +304,7 @@ class DWARFDebugLine { getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx, const DWARFUnit *U, function_ref RecoverableErrorHandler); + void clearLineTable(uint64_t Offset); /// Helper to allow for parsing of an entire .debug_line section in sequence. class SectionParser { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 4422141ff3c978..40c9c420ef6b89 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1003,6 +1003,22 @@ Expected DWARFContext::getLineTableForUnit( RecoverableErrorHandler); } +void DWARFContext::clearLineTableForUnit(DWARFUnit *U) { + if (!Line) + return; + + auto UnitDIE = U->getUnitDIE(); + if (!UnitDIE) + return; + + auto Offset = toSectionOffset(UnitDIE.find(DW_AT_stmt_list)); + if (!Offset) + return; + + uint64_t stmtOffset = *Offset + U->getLineTableOffset(); + Line->clearLineTable(stmtOffset); +} + void DWARFContext::parseNormalUnits() { if (!NormalUnits.empty()) return; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 81394e2b24b748..2e0780e249aa46 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -601,6 +601,10 @@ Expected DWARFDebugLine::getOrParseLineTable( return LT; } +void DWARFDebugLine::clearLineTable(uint64_t Offset) { + LineTableMap.erase(Offset); +} + static StringRef getOpcodeName(uint8_t Opcode, uint8_t OpcodeBase) { assert(Opcode != 0); if (Opcode < OpcodeBase) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index 3879cf1aabab1f..29c769834b06cb 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -380,6 +380,9 @@ void DWARFUnit::clear() { AddrOffsetSectionBase = None; SU = nullptr; clearDIEs(false); + AddrDieMap.clear(); + if (DWO) + DWO->clear(); DWO.reset(); } diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp index 731aa4f0f3d0fa..9c5e1c18705ae8 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp @@ -330,6 +330,82 @@ TEST_P(DebugLineParameterisedFixture, GetOrParseLineTableValidTable) { // correctly. } +#ifdef _AIX +TEST_P(DebugLineParameterisedFixture, DISABLED_ClearLineValidTable) { +#else +TEST_P(DebugLineParameterisedFixture, ClearLineValidTable) { +#endif + if (!setupGenerator(Version)) + GTEST_SKIP(); + + SCOPED_TRACE("Checking Version " + std::to_string(Version) + ", Format " + + (Format == DWARF64 ? "DWARF64" : "DWARF32")); + + LineTable < = Gen->addLineTable(Format); + LT.addExtendedOpcode(9, DW_LNE_set_address, {{0xadd4e55, LineTable::Quad}}); + LT.addStandardOpcode(DW_LNS_copy, {}); + LT.addByte(0xaa); + LT.addExtendedOpcode(1, DW_LNE_end_sequence, {}); + + LineTable <2 = Gen->addLineTable(Format); + LT2.addExtendedOpcode(9, DW_LNE_set_address, {{0x11223344, LineTable::Quad}}); + LT2.addStandardOpcode(DW_LNS_copy, {}); + LT2.addByte(0xbb); + LT2.addExtendedOpcode(1, DW_LNE_end_sequence, {}); + LT2.addExtendedOpcode(9, DW_LNE_set_address, {{0x55667788, LineTable::Quad}}); + LT2.addStandardOpcode(DW_LNS_copy, {}); + LT2.addByte(0xcc); + LT2.addExtendedOpcode(1, DW_LNE_end_sequence, {}); + + generate(); + + // Check that we have what we expect before calling clearLineTable(). + auto ExpectedLineTable = Line.getOrParseLineTable(LineData, 0, *Context, + nullptr, RecordRecoverable); + ASSERT_TRUE((bool)ExpectedLineTable); + EXPECT_FALSE(Recoverable); + const DWARFDebugLine::LineTable *Expected = *ExpectedLineTable; + checkDefaultPrologue(Version, Format, Expected->Prologue, 16); + EXPECT_EQ(Expected->Sequences.size(), 1u); + + uint64_t SecondOffset = + Expected->Prologue.sizeofTotalLength() + Expected->Prologue.TotalLength; + Recoverable = Error::success(); + auto ExpectedLineTable2 = Line.getOrParseLineTable( + LineData, SecondOffset, *Context, nullptr, RecordRecoverable); + ASSERT_TRUE((bool)ExpectedLineTable2); + EXPECT_FALSE(Recoverable); + const DWARFDebugLine::LineTable *Expected2 = *ExpectedLineTable2; + checkDefaultPrologue(Version, Format, Expected2->Prologue, 32); + EXPECT_EQ(Expected2->Sequences.size(), 2u); + + // Check that we no longer get the line tables after clearLineTable(). + Line.clearLineTable(0); + Line.clearLineTable(SecondOffset); + EXPECT_EQ(Line.getLineTable(0), nullptr); + EXPECT_EQ(Line.getLineTable(SecondOffset), nullptr); + + // Check that if the same offset is requested, the contents match what we + // had before. + Recoverable = Error::success(); + auto ExpectedLineTable3 = Line.getOrParseLineTable( + LineData, 0, *Context, nullptr, RecordRecoverable); + ASSERT_TRUE((bool)ExpectedLineTable3); + EXPECT_FALSE(Recoverable); + const DWARFDebugLine::LineTable *Expected3 = *ExpectedLineTable3; + checkDefaultPrologue(Version, Format, Expected3->Prologue, 16); + EXPECT_EQ(Expected3->Sequences.size(), 1u); + + Recoverable = Error::success(); + auto ExpectedLineTable4 = Line.getOrParseLineTable( + LineData, SecondOffset, *Context, nullptr, RecordRecoverable); + ASSERT_TRUE((bool)ExpectedLineTable4); + EXPECT_FALSE(Recoverable); + const DWARFDebugLine::LineTable *Expected4 = *ExpectedLineTable4; + checkDefaultPrologue(Version, Format, Expected4->Prologue, 32); + EXPECT_EQ(Expected4->Sequences.size(), 2u); +} + #ifdef _AIX TEST_F(DebugLineBasicFixture, DISABLED_ErrorForReservedLength) { #else From 244494a201f81996a401e49c7b13bdf54589f293 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Mon, 23 May 2022 23:26:14 -0400 Subject: [PATCH 305/908] [Hexagon] Fix test on OpenBSD The test specifies a CPU arch but not a particular OS. So if run on OpenBSD it acts as if it's an OpenBSD/hexagon system. OpenBSD uses __guard_local instead of __stack_chk_guard so the test will fail. So specify an OS other than OpenBSD fixes the test. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D126265 --- llvm/test/CodeGen/Hexagon/sdata-stack-guard.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/Hexagon/sdata-stack-guard.ll b/llvm/test/CodeGen/Hexagon/sdata-stack-guard.ll index 2b618f09afc343..56a0c981245662 100644 --- a/llvm/test/CodeGen/Hexagon/sdata-stack-guard.ll +++ b/llvm/test/CodeGen/Hexagon/sdata-stack-guard.ll @@ -1,9 +1,9 @@ ; Check that the __stack_chk_guard was placed in small data. -; RUN: llc -march=hexagon -O2 -hexagon-small-data-threshold=4 < %s | FileCheck -check-prefix=GPREL %s +; RUN: llc -march=hexagon -mtriple=hexagon-unknown-linux-gnu -O2 -hexagon-small-data-threshold=4 < %s | FileCheck -check-prefix=GPREL %s ; GPREL: memw(gp+#__stack_chk_guard) ; For threshold less than 4 (size of address), the variable is not placed in small-data -; RUN: llc -march=hexagon -O2 -hexagon-small-data-threshold=0 < %s | FileCheck -check-prefix=ABS %s +; RUN: llc -march=hexagon -mtriple=hexagon-unknown-linux-gnu -O2 -hexagon-small-data-threshold=0 < %s | FileCheck -check-prefix=ABS %s ; ABS: memw(##__stack_chk_guard) @g0 = private unnamed_addr constant [37 x i8] c"This string is longer than 16 bytes\0A\00", align 1 From a111fb960108df910a864500f3b98d75d37f083c Mon Sep 17 00:00:00 2001 From: Sotiris Apostolakis Date: Mon, 23 May 2022 23:04:20 -0400 Subject: [PATCH 306/908] [SelectOpti][5/5] Optimize select-to-branch transformation This patch optimizes the transformation of selects to a branch when the heuristics deemed it profitable. It aggressively sinks eligible instructions to the newly created true/false blocks to prevent their execution on the common path and interleaves dependence slices to maximize ILP. Depends on D120232 Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D120233 --- llvm/lib/CodeGen/SelectOptimize.cpp | 153 +++++++++++++++++++---- llvm/test/CodeGen/X86/select-optimize.ll | 44 ++++--- 2 files changed, 154 insertions(+), 43 deletions(-) diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 2194da77e0b4c0..1cab0dc9a4811d 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -179,8 +179,8 @@ class SelectOptimize : public FunctionPass { // For a given source instruction, collect its backwards dependence slice // consisting of instructions exclusively computed for producing the operands // of the source instruction. - void getExclBackwardsSlice(Instruction *I, - SmallVector &Slice); + void getExclBackwardsSlice(Instruction *I, std::stack &Slice, + bool ForSinking = false); // Returns true if the condition of the select is highly predictable. bool isSelectHighlyPredictable(const SelectInst *SI); @@ -329,6 +329,10 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue, void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { for (SelectGroup &ASI : ProfSIGroups) { + // The code transformation here is a modified version of the sinking + // transformation in CodeGenPrepare::optimizeSelectInst with a more + // aggressive strategy of which instructions to sink. + // // TODO: eliminate the redundancy of logic transforming selects to branches // by removing CodeGenPrepare::optimizeSelectInst and optimizing here // selects for all cases (with and without profile information). @@ -342,13 +346,72 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // start: // %cmp = cmp uge i32 %a, %b // %cmp.frozen = freeze %cmp - // br i1 %cmp.frozen, label %select.end, label %select.false + // br i1 %cmp.frozen, label %select.true, label %select.false + // select.true: + // br label %select.end // select.false: // br label %select.end // select.end: - // %sel = phi i32 [ %c, %start ], [ %d, %select.false ] + // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] // // %cmp should be frozen, otherwise it may introduce undefined behavior. + // In addition, we may sink instructions that produce %c or %d into the + // destination(s) of the new branch. + // If the true or false blocks do not contain a sunken instruction, that + // block and its branch may be optimized away. In that case, one side of the + // first branch will point directly to select.end, and the corresponding PHI + // predecessor block will be the start block. + + // Find all the instructions that can be soundly sunk to the true/false + // blocks. These are instructions that are computed solely for producing the + // operands of the select instructions in the group and can be sunk without + // breaking the semantics of the LLVM IR (e.g., cannot sink instructions + // with side effects). + SmallVector, 2> TrueSlices, FalseSlices; + unsigned long maxTrueSliceLen = 0, maxFalseSliceLen = 0; + for (SelectInst *SI : ASI) { + // For each select, compute the sinkable dependence chains of the true and + // false operands. + if (auto *TI = dyn_cast(SI->getTrueValue())) { + std::stack TrueSlice; + getExclBackwardsSlice(TI, TrueSlice, true); + maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); + TrueSlices.push_back(TrueSlice); + } + if (auto *FI = dyn_cast(SI->getFalseValue())) { + std::stack FalseSlice; + getExclBackwardsSlice(FI, FalseSlice, true); + maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); + FalseSlices.push_back(FalseSlice); + } + } + // In the case of multiple select instructions in the same group, the order + // of non-dependent instructions (instructions of different dependence + // slices) in the true/false blocks appears to affect performance. + // Interleaving the slices seems to experimentally be the optimal approach. + // This interleaving scheduling allows for more ILP (with a natural downside + // of increasing a bit register pressure) compared to a simple ordering of + // one whole chain after another. One would expect that this ordering would + // not matter since the scheduling in the backend of the compiler would + // take care of it, but apparently the scheduler fails to deliver optimal + // ILP with a naive ordering here. + SmallVector TrueSlicesInterleaved, FalseSlicesInterleaved; + for (unsigned long IS = 0; IS < maxTrueSliceLen; ++IS) { + for (auto &S : TrueSlices) { + if (!S.empty()) { + TrueSlicesInterleaved.push_back(S.top()); + S.pop(); + } + } + } + for (unsigned long IS = 0; IS < maxFalseSliceLen; ++IS) { + for (auto &S : FalseSlices) { + if (!S.empty()) { + FalseSlicesInterleaved.push_back(S.top()); + S.pop(); + } + } + } // We split the block containing the select(s) into two blocks. SelectInst *SI = ASI.front(); @@ -374,24 +437,55 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { } // These are the new basic blocks for the conditional branch. - // For now, no instruction sinking to the true/false blocks. - // Thus both True and False blocks will be empty. + // At least one will become an actual new basic block. BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; - - // Use the 'false' side for a new input value to the PHI. - FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", - EndBlock->getParent(), EndBlock); - auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); - FalseBranch->setDebugLoc(SI->getDebugLoc()); - - // For the 'true' side the path originates from the start block from the - // point view of the new PHI. - TrueBlock = StartBlock; + BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr; + if (!TrueSlicesInterleaved.empty()) { + TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + TrueBranch->setDebugLoc(LastSI->getDebugLoc()); + for (Instruction *TrueInst : TrueSlicesInterleaved) + TrueInst->moveBefore(TrueBranch); + } + if (!FalseSlicesInterleaved.empty()) { + FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(LastSI->getDebugLoc()); + for (Instruction *FalseInst : FalseSlicesInterleaved) + FalseInst->moveBefore(FalseBranch); + } + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new input value to the PHI. + if (TrueBlock == FalseBlock) { + assert(TrueBlock == nullptr && + "Unexpected basic block transform while optimizing select"); + + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(SI->getDebugLoc()); + } // Insert the real conditional branch based on the original condition. + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that side of the branch goes to the end block + // directly and the path originates from the start block from the point of + // view of the new PHI. BasicBlock *TT, *FT; - TT = EndBlock; - FT = FalseBlock; + if (TrueBlock == nullptr) { + TT = EndBlock; + FT = FalseBlock; + TrueBlock = StartBlock; + } else if (FalseBlock == nullptr) { + TT = TrueBlock; + FT = EndBlock; + FalseBlock = StartBlock; + } else { + TT = TrueBlock; + FT = FalseBlock; + } IRBuilder<> IB(SI); auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); @@ -586,12 +680,13 @@ bool SelectOptimize::hasExpensiveColdOperand( HotWeight = TrueWeight; } if (ColdI) { - SmallVector ColdSlice; + std::stack ColdSlice; getExclBackwardsSlice(ColdI, ColdSlice); InstructionCost SliceCost = 0; - for (auto *ColdII : ColdSlice) { - SliceCost += - TTI->getInstructionCost(ColdII, TargetTransformInfo::TCK_Latency); + while (!ColdSlice.empty()) { + SliceCost += TTI->getInstructionCost(ColdSlice.top(), + TargetTransformInfo::TCK_Latency); + ColdSlice.pop(); } // The colder the cold value operand of the select is the more expensive // the cmov becomes for computing the cold value operand every time. Thus, @@ -613,8 +708,9 @@ bool SelectOptimize::hasExpensiveColdOperand( // (sufficiently-accurate in practice), we populate this set with the // instructions of the backwards dependence slice that only have one-use and // form an one-use chain that leads to the source instruction. -void SelectOptimize::getExclBackwardsSlice( - Instruction *I, SmallVector &Slice) { +void SelectOptimize::getExclBackwardsSlice(Instruction *I, + std::stack &Slice, + bool ForSinking) { SmallPtrSet Visited; std::queue Worklist; Worklist.push(I); @@ -630,13 +726,20 @@ void SelectOptimize::getExclBackwardsSlice( if (!II->hasOneUse()) continue; + // Cannot soundly sink instructions with side-effects. + // Terminator or phi instructions cannot be sunk. + // Avoid sinking other select instructions (should be handled separetely). + if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || + isa(II) || isa(II))) + continue; + // Avoid considering instructions with less frequency than the source // instruction (i.e., avoid colder code regions of the dependence slice). if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) continue; // Eligible one-use instruction added to the dependence slice. - Slice.push_back(II); + Slice.push(II); // Explore all the operands of the current instruction to expand the slice. for (unsigned k = 0; k < II->getNumOperands(); ++k) diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll index ee267c80e7a5ec..f2dff8cf8c9f15 100644 --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -105,20 +105,28 @@ define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { ; If select group predictable, turn it into a branch. define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { ; CHECK-LABEL: @weighted_select_group( +; CHECK-NEXT: [[A1:%.*]] = add i32 [[A:%.*]], 1 ; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] -; CHECK: select.false: +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !prof [[PROF16]] +; CHECK: select.true.sink: +; CHECK-NEXT: [[C1:%.*]] = add i32 [[C:%.*]], 1 +; CHECK-NEXT: br label [[SELECT_END:%.*]] +; CHECK: select.false.sink: +; CHECK-NEXT: [[B1:%.*]] = add i32 [[B:%.*]], 1 ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] -; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[A]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A1]], [[SELECT_TRUE_SINK]] ], [ [[B1]], [[SELECT_FALSE_SINK]] ] +; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C1]], [[SELECT_TRUE_SINK]] ], [ [[A1]], [[SELECT_FALSE_SINK]] ] ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[ADD]] ; - %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !15 + %a1 = add i32 %a, 1 + %b1 = add i32 %b, 1 + %c1 = add i32 %c, 1 + %sel1 = select i1 %cmp, i32 %a1, i32 %b1, !prof !15 call void @llvm.dbg.value(metadata i32 %sel1, metadata !24, metadata !DIExpression()), !dbg !DILocation(scope: !23) - %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !15 + %sel2 = select i1 %cmp, i32 %c1, i32 %a1, !prof !15 %add = add i32 %sel1, %sel2 ret i32 %add } @@ -151,13 +159,13 @@ define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { ; sink load define i32 @expensive_val_operand1(i32* nocapture %a, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand1( -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] -; CHECK: select.false: +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] +; CHECK: select.true.sink: +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] ; CHECK-NEXT: ret i32 [[SEL]] ; %load = load i32, i32* %a, align 8 @@ -181,14 +189,14 @@ define i32 @expensive_val_operand2(i32* nocapture %a, i32 %x, i1 %cmp) { ; into a branch with sinked dependence slice. define i32 @expensive_val_operand3(i32* nocapture %a, i32 %b, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand3( +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] +; CHECK: select.true.sink: ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] -; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] ; CHECK-NEXT: ret i32 [[SEL]] ; %load = load i32, i32* %a, align 8 @@ -242,14 +250,14 @@ define double @cmov_on_critical_path(i32 %n, double %x, double* nocapture %a) { ; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[R:%.*]] = load double, double* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] ; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]] ; CHECK-NEXT: [[X2_FROZEN:%.*]] = freeze i1 [[CMP2]] -; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]], !prof [[PROF27:![0-9]+]] -; CHECK: select.false: +; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]] +; CHECK: select.true.sink: +; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[FOR_BODY]] ], [ [[X1]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[SELECT_TRUE_SINK]] ], [ [[X1]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] From 1786e70bd85df491a91d19b400855fedcc00ce9e Mon Sep 17 00:00:00 2001 From: Sotiris Apostolakis Date: Tue, 24 May 2022 00:02:00 -0400 Subject: [PATCH 307/908] Revert "[SelectOpti][5/5] Optimize select-to-branch transformation" This reverts commit a111fb960108df910a864500f3b98d75d37f083c. --- llvm/lib/CodeGen/SelectOptimize.cpp | 153 ++++------------------- llvm/test/CodeGen/X86/select-optimize.ll | 44 +++---- 2 files changed, 43 insertions(+), 154 deletions(-) diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 1cab0dc9a4811d..2194da77e0b4c0 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -179,8 +179,8 @@ class SelectOptimize : public FunctionPass { // For a given source instruction, collect its backwards dependence slice // consisting of instructions exclusively computed for producing the operands // of the source instruction. - void getExclBackwardsSlice(Instruction *I, std::stack &Slice, - bool ForSinking = false); + void getExclBackwardsSlice(Instruction *I, + SmallVector &Slice); // Returns true if the condition of the select is highly predictable. bool isSelectHighlyPredictable(const SelectInst *SI); @@ -329,10 +329,6 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue, void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { for (SelectGroup &ASI : ProfSIGroups) { - // The code transformation here is a modified version of the sinking - // transformation in CodeGenPrepare::optimizeSelectInst with a more - // aggressive strategy of which instructions to sink. - // // TODO: eliminate the redundancy of logic transforming selects to branches // by removing CodeGenPrepare::optimizeSelectInst and optimizing here // selects for all cases (with and without profile information). @@ -346,72 +342,13 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // start: // %cmp = cmp uge i32 %a, %b // %cmp.frozen = freeze %cmp - // br i1 %cmp.frozen, label %select.true, label %select.false - // select.true: - // br label %select.end + // br i1 %cmp.frozen, label %select.end, label %select.false // select.false: // br label %select.end // select.end: - // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] + // %sel = phi i32 [ %c, %start ], [ %d, %select.false ] // // %cmp should be frozen, otherwise it may introduce undefined behavior. - // In addition, we may sink instructions that produce %c or %d into the - // destination(s) of the new branch. - // If the true or false blocks do not contain a sunken instruction, that - // block and its branch may be optimized away. In that case, one side of the - // first branch will point directly to select.end, and the corresponding PHI - // predecessor block will be the start block. - - // Find all the instructions that can be soundly sunk to the true/false - // blocks. These are instructions that are computed solely for producing the - // operands of the select instructions in the group and can be sunk without - // breaking the semantics of the LLVM IR (e.g., cannot sink instructions - // with side effects). - SmallVector, 2> TrueSlices, FalseSlices; - unsigned long maxTrueSliceLen = 0, maxFalseSliceLen = 0; - for (SelectInst *SI : ASI) { - // For each select, compute the sinkable dependence chains of the true and - // false operands. - if (auto *TI = dyn_cast(SI->getTrueValue())) { - std::stack TrueSlice; - getExclBackwardsSlice(TI, TrueSlice, true); - maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); - TrueSlices.push_back(TrueSlice); - } - if (auto *FI = dyn_cast(SI->getFalseValue())) { - std::stack FalseSlice; - getExclBackwardsSlice(FI, FalseSlice, true); - maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); - FalseSlices.push_back(FalseSlice); - } - } - // In the case of multiple select instructions in the same group, the order - // of non-dependent instructions (instructions of different dependence - // slices) in the true/false blocks appears to affect performance. - // Interleaving the slices seems to experimentally be the optimal approach. - // This interleaving scheduling allows for more ILP (with a natural downside - // of increasing a bit register pressure) compared to a simple ordering of - // one whole chain after another. One would expect that this ordering would - // not matter since the scheduling in the backend of the compiler would - // take care of it, but apparently the scheduler fails to deliver optimal - // ILP with a naive ordering here. - SmallVector TrueSlicesInterleaved, FalseSlicesInterleaved; - for (unsigned long IS = 0; IS < maxTrueSliceLen; ++IS) { - for (auto &S : TrueSlices) { - if (!S.empty()) { - TrueSlicesInterleaved.push_back(S.top()); - S.pop(); - } - } - } - for (unsigned long IS = 0; IS < maxFalseSliceLen; ++IS) { - for (auto &S : FalseSlices) { - if (!S.empty()) { - FalseSlicesInterleaved.push_back(S.top()); - S.pop(); - } - } - } // We split the block containing the select(s) into two blocks. SelectInst *SI = ASI.front(); @@ -437,55 +374,24 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { } // These are the new basic blocks for the conditional branch. - // At least one will become an actual new basic block. + // For now, no instruction sinking to the true/false blocks. + // Thus both True and False blocks will be empty. BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; - BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr; - if (!TrueSlicesInterleaved.empty()) { - TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink", - EndBlock->getParent(), EndBlock); - TrueBranch = BranchInst::Create(EndBlock, TrueBlock); - TrueBranch->setDebugLoc(LastSI->getDebugLoc()); - for (Instruction *TrueInst : TrueSlicesInterleaved) - TrueInst->moveBefore(TrueBranch); - } - if (!FalseSlicesInterleaved.empty()) { - FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink", - EndBlock->getParent(), EndBlock); - FalseBranch = BranchInst::Create(EndBlock, FalseBlock); - FalseBranch->setDebugLoc(LastSI->getDebugLoc()); - for (Instruction *FalseInst : FalseSlicesInterleaved) - FalseInst->moveBefore(FalseBranch); - } - // If there was nothing to sink, then arbitrarily choose the 'false' side - // for a new input value to the PHI. - if (TrueBlock == FalseBlock) { - assert(TrueBlock == nullptr && - "Unexpected basic block transform while optimizing select"); - - FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", - EndBlock->getParent(), EndBlock); - auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); - FalseBranch->setDebugLoc(SI->getDebugLoc()); - } + + // Use the 'false' side for a new input value to the PHI. + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(SI->getDebugLoc()); + + // For the 'true' side the path originates from the start block from the + // point view of the new PHI. + TrueBlock = StartBlock; // Insert the real conditional branch based on the original condition. - // If we did not create a new block for one of the 'true' or 'false' paths - // of the condition, it means that side of the branch goes to the end block - // directly and the path originates from the start block from the point of - // view of the new PHI. BasicBlock *TT, *FT; - if (TrueBlock == nullptr) { - TT = EndBlock; - FT = FalseBlock; - TrueBlock = StartBlock; - } else if (FalseBlock == nullptr) { - TT = TrueBlock; - FT = EndBlock; - FalseBlock = StartBlock; - } else { - TT = TrueBlock; - FT = FalseBlock; - } + TT = EndBlock; + FT = FalseBlock; IRBuilder<> IB(SI); auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); @@ -680,13 +586,12 @@ bool SelectOptimize::hasExpensiveColdOperand( HotWeight = TrueWeight; } if (ColdI) { - std::stack ColdSlice; + SmallVector ColdSlice; getExclBackwardsSlice(ColdI, ColdSlice); InstructionCost SliceCost = 0; - while (!ColdSlice.empty()) { - SliceCost += TTI->getInstructionCost(ColdSlice.top(), - TargetTransformInfo::TCK_Latency); - ColdSlice.pop(); + for (auto *ColdII : ColdSlice) { + SliceCost += + TTI->getInstructionCost(ColdII, TargetTransformInfo::TCK_Latency); } // The colder the cold value operand of the select is the more expensive // the cmov becomes for computing the cold value operand every time. Thus, @@ -708,9 +613,8 @@ bool SelectOptimize::hasExpensiveColdOperand( // (sufficiently-accurate in practice), we populate this set with the // instructions of the backwards dependence slice that only have one-use and // form an one-use chain that leads to the source instruction. -void SelectOptimize::getExclBackwardsSlice(Instruction *I, - std::stack &Slice, - bool ForSinking) { +void SelectOptimize::getExclBackwardsSlice( + Instruction *I, SmallVector &Slice) { SmallPtrSet Visited; std::queue Worklist; Worklist.push(I); @@ -726,20 +630,13 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I, if (!II->hasOneUse()) continue; - // Cannot soundly sink instructions with side-effects. - // Terminator or phi instructions cannot be sunk. - // Avoid sinking other select instructions (should be handled separetely). - if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || - isa(II) || isa(II))) - continue; - // Avoid considering instructions with less frequency than the source // instruction (i.e., avoid colder code regions of the dependence slice). if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) continue; // Eligible one-use instruction added to the dependence slice. - Slice.push(II); + Slice.push_back(II); // Explore all the operands of the current instruction to expand the slice. for (unsigned k = 0; k < II->getNumOperands(); ++k) diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll index f2dff8cf8c9f15..ee267c80e7a5ec 100644 --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -105,28 +105,20 @@ define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { ; If select group predictable, turn it into a branch. define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { ; CHECK-LABEL: @weighted_select_group( -; CHECK-NEXT: [[A1:%.*]] = add i32 [[A:%.*]], 1 ; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !prof [[PROF16]] -; CHECK: select.true.sink: -; CHECK-NEXT: [[C1:%.*]] = add i32 [[C:%.*]], 1 -; CHECK-NEXT: br label [[SELECT_END:%.*]] -; CHECK: select.false.sink: -; CHECK-NEXT: [[B1:%.*]] = add i32 [[B:%.*]], 1 +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] +; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A1]], [[SELECT_TRUE_SINK]] ], [ [[B1]], [[SELECT_FALSE_SINK]] ] -; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C1]], [[SELECT_TRUE_SINK]] ], [ [[A1]], [[SELECT_FALSE_SINK]] ] +; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[A]], [[SELECT_FALSE]] ] ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[ADD]] ; - %a1 = add i32 %a, 1 - %b1 = add i32 %b, 1 - %c1 = add i32 %c, 1 - %sel1 = select i1 %cmp, i32 %a1, i32 %b1, !prof !15 + %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !15 call void @llvm.dbg.value(metadata i32 %sel1, metadata !24, metadata !DIExpression()), !dbg !DILocation(scope: !23) - %sel2 = select i1 %cmp, i32 %c1, i32 %a1, !prof !15 + %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !15 %add = add i32 %sel1, %sel2 ret i32 %add } @@ -159,13 +151,13 @@ define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { ; sink load define i32 @expensive_val_operand1(i32* nocapture %a, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand1( -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] -; CHECK: select.true.sink: ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] +; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] ; CHECK-NEXT: ret i32 [[SEL]] ; %load = load i32, i32* %a, align 8 @@ -189,14 +181,14 @@ define i32 @expensive_val_operand2(i32* nocapture %a, i32 %x, i1 %cmp) { ; into a branch with sinked dependence slice. define i32 @expensive_val_operand3(i32* nocapture %a, i32 %b, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand3( -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] -; CHECK: select.true.sink: ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] +; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] ; CHECK-NEXT: ret i32 [[SEL]] ; %load = load i32, i32* %a, align 8 @@ -250,14 +242,14 @@ define double @cmov_on_critical_path(i32 %n, double %x, double* nocapture %a) { ; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[R:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] ; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]] ; CHECK-NEXT: [[X2_FROZEN:%.*]] = freeze i1 [[CMP2]] -; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]] -; CHECK: select.true.sink: -; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] +; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]], !prof [[PROF27:![0-9]+]] +; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[SELECT_TRUE_SINK]] ], [ [[X1]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[FOR_BODY]] ], [ [[X1]], [[SELECT_FALSE]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] From 7f680b260ffe34c648cbd3fd16615d8f5cdab39f Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Mon, 16 May 2022 18:10:27 -0700 Subject: [PATCH 308/908] [flang] Allow more forward references to ENTRY names Forward references to ENTRY names to pass them as actual procedure arguments don't work in all cases, exposing some basic ordering problems in name resolution for these symbols. Refactor; create all the necessary procedure symbols, and either function result or host association symbols (for subroutines), at the time that the subprogrma scope is created, so that the names exist in the scope as text "before" the ENTRY is processed in name resolution. Some processing remains in PostEntryStmt() so that we can check that an ENTRY with an explicit distinct RESULT doesn't also have declarations for the ENTRY name. Differential Revision: https://reviews.llvm.org/D126142 --- flang/include/flang/Semantics/symbol.h | 6 - flang/lib/Evaluate/tools.cpp | 39 +-- flang/lib/Semantics/check-declarations.cpp | 20 +- flang/lib/Semantics/resolve-names.cpp | 325 +++++++++++---------- flang/lib/Semantics/tools.cpp | 12 + flang/test/Semantics/entry01.f90 | 46 ++- flang/test/Semantics/symbol20.f90 | 47 +++ 7 files changed, 302 insertions(+), 193 deletions(-) create mode 100644 flang/test/Semantics/symbol20.f90 diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 874c9d89a23e49..e79f8ab6503e27 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -137,16 +137,10 @@ class SubprogramNameDetails { SubprogramNameDetails() = delete; SubprogramKind kind() const { return kind_; } ProgramTree &node() const { return *node_; } - bool isEntryStmt() const { return isEntryStmt_; } - SubprogramNameDetails &set_isEntryStmt(bool yes = true) { - isEntryStmt_ = yes; - return *this; - } private: SubprogramKind kind_; common::Reference node_; - bool isEntryStmt_{false}; }; // A name from an entity-decl -- could be object or function. diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 394d1027a198e8..bc8b7087164ba5 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1230,20 +1230,21 @@ bool IsPureProcedure(const Scope &scope) { bool IsFunction(const Symbol &symbol) { const Symbol &ultimate{symbol.GetUltimate()}; return ultimate.test(Symbol::Flag::Function) || - common::visit( - common::visitors{ - [](const SubprogramDetails &x) { return x.isFunction(); }, - [](const ProcEntityDetails &x) { - const auto &ifc{x.interface()}; - return ifc.type() || - (ifc.symbol() && IsFunction(*ifc.symbol())); + (!ultimate.test(Symbol::Flag::Subroutine) && + common::visit( + common::visitors{ + [](const SubprogramDetails &x) { return x.isFunction(); }, + [](const ProcEntityDetails &x) { + const auto &ifc{x.interface()}; + return ifc.type() || + (ifc.symbol() && IsFunction(*ifc.symbol())); + }, + [](const ProcBindingDetails &x) { + return IsFunction(x.symbol()); + }, + [](const auto &) { return false; }, }, - [](const ProcBindingDetails &x) { - return IsFunction(x.symbol()); - }, - [](const auto &) { return false; }, - }, - ultimate.details()); + ultimate.details())); } bool IsFunction(const Scope &scope) { @@ -1399,10 +1400,14 @@ bool IsDeferredShape(const Symbol &symbol) { bool IsFunctionResult(const Symbol &original) { const Symbol &symbol{GetAssociationRoot(original)}; - return (symbol.has() && - symbol.get().isFuncResult()) || - (symbol.has() && - symbol.get().isFuncResult()); + return common::visit( + common::visitors{ + [](const EntityDetails &x) { return x.isFuncResult(); }, + [](const ObjectEntityDetails &x) { return x.isFuncResult(); }, + [](const ProcEntityDetails &x) { return x.isFuncResult(); }, + [](const auto &) { return false; }, + }, + symbol.details()); } bool IsKindTypeParameter(const Symbol &symbol) { diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 72816b6d33c1ee..99c6e27e714a40 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -897,14 +897,9 @@ void CheckHelper::CheckSubprogram( if (subprogram) { subprogramDetails = subprogram->detailsIf(); } - if (entryScope->kind() != Scope::Kind::Subprogram) { - error = "ENTRY may appear only in a subroutine or function"_err_en_US; - } else if (!(entryScope->parent().IsGlobal() || - entryScope->parent().IsModule() || - entryScope->parent().IsSubmodule())) { + if (!(entryScope->parent().IsGlobal() || entryScope->parent().IsModule() || + entryScope->parent().IsSubmodule())) { error = "ENTRY may not appear in an internal subprogram"_err_en_US; - } else if (FindSeparateModuleSubprogramInterface(subprogram)) { - error = "ENTRY may not appear in a separate module procedure"_err_en_US; } else if (subprogramDetails && details.isFunction() && subprogramDetails->isFunction() && !context_.HasError(details.result()) && @@ -1812,8 +1807,13 @@ void CheckHelper::CheckGenericOps(const Scope &scope) { auto addSpecifics{[&](const Symbol &generic) { const auto *details{generic.GetUltimate().detailsIf()}; if (!details) { - if (generic.test(Symbol::Flag::Function)) { - Characterize(generic); + // Not a generic; ensure characteristics are defined if a function. + auto restorer{messages_.SetLocation(generic.name())}; + if (IsFunction(generic) && !context_.HasError(generic)) { + if (const Symbol * result{FindFunctionResult(generic)}; + result && !context_.HasError(*result)) { + Characterize(generic); + } } return; } @@ -1825,8 +1825,8 @@ void CheckHelper::CheckGenericOps(const Scope &scope) { const std::vector &bindingNames{details->bindingNames()}; for (std::size_t i{0}; i < specifics.size(); ++i) { const Symbol &specific{*specifics[i]}; + auto restorer{messages_.SetLocation(bindingNames[i])}; if (const Procedure * proc{Characterize(specific)}) { - auto restorer{messages_.SetLocation(bindingNames[i])}; if (kind.IsAssignment()) { if (!CheckDefinedAssignment(specific, *proc)) { continue; diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index a31831301bd138..cbb657040783e6 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -464,6 +464,8 @@ class FuncResultStack { ~FuncResultStack(); struct FuncInfo { + explicit FuncInfo(const Scope &s) : scope{s} {} + const Scope &scope; // Parse tree of the type specification in the FUNCTION prefix const parser::DeclarationTypeSpec *parsedType{nullptr}; // Name of the function RESULT in the FUNCTION suffix, if any @@ -480,8 +482,8 @@ class FuncResultStack { void CompleteTypeIfFunctionResult(Symbol &); FuncInfo *Top() { return stack_.empty() ? nullptr : &stack_.back(); } - FuncInfo &Push() { return stack_.emplace_back(); } - void Pop() { stack_.pop_back(); } + FuncInfo &Push(const Scope &scope) { return stack_.emplace_back(scope); } + void Pop(); private: ScopeHandler &scopeHandler_; @@ -841,6 +843,7 @@ class SubprogramVisitor : public virtual ScopeHandler, public InterfaceVisitor { const parser::LanguageBindingSpec * = nullptr); Symbol *GetSpecificFromGeneric(const parser::Name &); SubprogramDetails &PostSubprogramStmt(const parser::Name &); + void CreateEntry(const parser::EntryStmt &stmt, Symbol &subprogram); void PostEntryStmt(const parser::EntryStmt &stmt); }; @@ -2024,17 +2027,17 @@ FuncResultStack::~FuncResultStack() { CHECK(stack_.empty()); } void FuncResultStack::CompleteFunctionResultType() { // If the function has a type in the prefix, process it now. - if (IsFunction(scopeHandler_.currScope())) { - FuncInfo &info{DEREF(Top())}; - if (info.parsedType) { - scopeHandler_.messageHandler().set_currStmtSource(info.source); + FuncInfo *info{Top()}; + if (info && &info->scope == &scopeHandler_.currScope()) { + if (info->parsedType) { + scopeHandler_.messageHandler().set_currStmtSource(info->source); if (const auto *type{ - scopeHandler_.ProcessTypeSpec(*info.parsedType, true)}) { - if (!scopeHandler_.context().HasError(info.resultSymbol)) { - info.resultSymbol->SetType(*type); + scopeHandler_.ProcessTypeSpec(*info->parsedType, true)}) { + if (!scopeHandler_.context().HasError(info->resultSymbol)) { + info->resultSymbol->SetType(*type); } } - info.parsedType = nullptr; + info->parsedType = nullptr; } } } @@ -2049,6 +2052,12 @@ void FuncResultStack::CompleteTypeIfFunctionResult(Symbol &symbol) { } } +void FuncResultStack::Pop() { + if (!stack_.empty() && &stack_.back().scope == &scopeHandler_.currScope()) { + stack_.pop_back(); + } +} + // ScopeHandler implementation void ScopeHandler::SayAlreadyDeclared(const parser::Name &name, Symbol &prev) { @@ -2203,6 +2212,7 @@ void ScopeHandler::PopScope() { for (auto &pair : currScope()) { ConvertToObjectEntity(*pair.second); } + funcResultStack_.Pop(); // If popping back into a global scope, pop back to the main global scope. SetScope(currScope_->parent().IsGlobal() ? context().globalScope() : currScope_->parent()); @@ -2440,6 +2450,12 @@ bool ScopeHandler::ConvertToProcEntity(Symbol &symbol) { } else if (symbol.has()) { symbol.set_details(ProcEntityDetails{}); } else if (auto *details{symbol.detailsIf()}) { + if (IsFunctionResult(symbol) && + !(IsPointer(symbol) && symbol.attrs().test(Attr::EXTERNAL))) { + // Don't turn function result into a procedure pointer unless both + // POUNTER and EXTERNAL + return false; + } funcResultStack_.CompleteTypeIfFunctionResult(symbol); symbol.set_details(ProcEntityDetails{std::move(*details)}); if (symbol.GetType() && !symbol.test(Symbol::Flag::Implicit)) { @@ -3265,26 +3281,45 @@ void SubprogramVisitor::Post(const parser::FunctionStmt &stmt) { FuncResultStack::FuncInfo &info{DEREF(funcResultStack().Top())}; CHECK(info.inFunctionStmt); info.inFunctionStmt = false; - if (info.resultName && info.resultName->source != name.source) { + bool distinctResultName{ + info.resultName && info.resultName->source != name.source}; + if (distinctResultName) { // Note that RESULT is ignored if it has the same name as the function. + // The symbol created by PushScope() is retained as a place-holder + // for error detection. funcResultName = info.resultName; } else { - EraseSymbol(name); // was added by PushSubprogramScope + EraseSymbol(name); // was added by PushScope() funcResultName = &name; } - // add function result to function scope if (details.isFunction()) { CHECK(context().HasError(currScope().symbol())); } else { - // add function result to function scope - EntityDetails funcResultDetails; - funcResultDetails.set_funcResult(true); - Symbol &result{MakeSymbol(*funcResultName, std::move(funcResultDetails))}; - info.resultSymbol = &result; - details.set_result(result); + // RESULT(x) can be the same explicitly-named RESULT(x) as an ENTRY + // statement. + Symbol *result{nullptr}; + if (distinctResultName) { + if (auto iter{currScope().find(funcResultName->source)}; + iter != currScope().end()) { + Symbol &entryResult{*iter->second}; + if (IsFunctionResult(entryResult)) { + result = &entryResult; + } + } + } + if (result) { + Resolve(*funcResultName, *result); + } else { + // add function result to function scope + EntityDetails funcResultDetails; + funcResultDetails.set_funcResult(true); + result = &MakeSymbol(*funcResultName, std::move(funcResultDetails)); + } + info.resultSymbol = result; + details.set_result(*result); } // C1560. - if (info.resultName && info.resultName->source == name.source) { + if (info.resultName && !distinctResultName) { Say(info.resultName->source, "The function name should not appear in RESULT, references to '%s' " "inside the function will be considered as references to the " @@ -3322,94 +3357,124 @@ void SubprogramVisitor::Post(const parser::EntryStmt &stmt) { EndAttrs(); } -void SubprogramVisitor::PostEntryStmt(const parser::EntryStmt &stmt) { - Scope &inclusiveScope{InclusiveScope()}; - const Symbol *subprogram{inclusiveScope.symbol()}; - if (!subprogram) { - CHECK(context().AnyFatalError()); - return; - } - const auto &name{std::get(stmt.t)}; - const parser::Name *resultName{nullptr}; - if (const auto &maybeSuffix{ - std::get>(stmt.t)}) { - resultName = common::GetPtrFromOptional(maybeSuffix->resultName); - } - bool inFunction{IsFunction(currScope())}; - if (resultName) { // RESULT(result) is present - if (!inFunction) { - // error was already emitted for the suffix - } else if (resultName->source == subprogram->name()) { // C1574 - Say2(resultName->source, +void SubprogramVisitor::CreateEntry( + const parser::EntryStmt &stmt, Symbol &subprogram) { + const auto &entryName{std::get(stmt.t)}; + Scope &outer{currScope().parent()}; + Symbol::Flag subpFlag{subprogram.test(Symbol::Flag::Function) + ? Symbol::Flag::Function + : Symbol::Flag::Subroutine}; + Attrs attrs; + if (Symbol * extant{FindSymbol(outer, entryName)}) { + if (!HandlePreviousCalls(entryName, *extant, subpFlag)) { + if (outer.IsTopLevel()) { + Say2(entryName, + "'%s' is already defined as a global identifier"_err_en_US, *extant, + "Previous definition of '%s'"_en_US); + } else { + SayAlreadyDeclared(entryName, *extant); + } + return; + } + attrs = extant->attrs(); + } + const auto &suffix{std::get>(stmt.t)}; + bool badResultName{false}; + std::optional distinctResultName; + if (suffix && suffix->resultName && + suffix->resultName->source != entryName.source) { + distinctResultName = suffix->resultName->source; + const parser::Name &resultName{*suffix->resultName}; + if (resultName.source == subprogram.name()) { // C1574 + Say2(resultName.source, "RESULT(%s) may not have the same name as the function"_err_en_US, - subprogram->name(), "Containing function"_en_US); - } else if (const Symbol * - symbol{FindSymbol(inclusiveScope.parent(), *resultName)}) { // C1574 - if (const auto *details{symbol->detailsIf()}) { - if (details->entryScope() == &inclusiveScope) { - Say2(resultName->source, + subprogram, "Containing function"_en_US); + badResultName = true; + } else if (const Symbol * extant{FindSymbol(outer, resultName)}) { // C1574 + if (const auto *details{extant->detailsIf()}) { + if (details->entryScope() == &currScope()) { + Say2(resultName.source, "RESULT(%s) may not have the same name as an ENTRY in the function"_err_en_US, - symbol->name(), "Conflicting ENTRY"_en_US); + extant->name(), "Conflicting ENTRY"_en_US); + badResultName = true; } } } - if (Symbol * symbol{FindSymbol(name)}) { // C1570 - // When RESULT() appears, ENTRY name can't have been already declared - if (inclusiveScope.Contains(symbol->owner())) { - Say2(name, - "ENTRY name '%s' may not be declared when RESULT() is present"_err_en_US, - *symbol, "Previous declaration of '%s'"_en_US); - } - } - if (resultName->source == name.source) { - // ignore RESULT() hereafter when it's the same name as the ENTRY - resultName = nullptr; - } } + if (outer.IsModule() && !attrs.test(Attr::PRIVATE)) { + attrs.set(Attr::PUBLIC); + } + Symbol &entrySymbol{MakeSymbol(outer, entryName.source, attrs)}; SubprogramDetails entryDetails; - entryDetails.set_entryScope(inclusiveScope); - if (inFunction) { - // Create the entity to hold the function result, if necessary. - auto &effectiveResultName{*(resultName ? resultName : &name)}; - Symbol *resultSymbol{FindInScope(currScope(), effectiveResultName)}; - if (resultSymbol) { // C1574 - common::visit( - common::visitors{[resultSymbol](UnknownDetails &) { - EntityDetails entity; - entity.set_funcResult(true); - resultSymbol->set_details(std::move(entity)); - }, - [](EntityDetails &x) { x.set_funcResult(true); }, - [](ObjectEntityDetails &x) { x.set_funcResult(true); }, - [](ProcEntityDetails &x) { x.set_funcResult(true); }, - [&](const auto &) { - Say2(effectiveResultName.source, - "'%s' was previously declared as an item that may not be used as a function result"_err_en_US, - resultSymbol->name(), "Previous declaration of '%s'"_en_US); - context().SetError(*resultSymbol); - }}, - resultSymbol->details()); - // The Function flag will have been set if the ENTRY's symbol was created - // as a placeholder in BeginSubprogram. This prevents misuse of the ENTRY - // as a subroutine. Clear it now because it's inappropriate for a - // function result. - resultSymbol->set(Symbol::Flag::Function, false); - } else if (!inSpecificationPart_) { - ObjectEntityDetails entity; - entity.set_funcResult(true); - resultSymbol = &MakeSymbol(effectiveResultName, std::move(entity)); - ApplyImplicitRules(*resultSymbol); + entryDetails.set_entryScope(currScope()); + entrySymbol.set(subpFlag); + if (subpFlag == Symbol::Flag::Function) { + Symbol *result{nullptr}; + EntityDetails resultDetails; + resultDetails.set_funcResult(true); + if (distinctResultName) { + if (!badResultName) { + // RESULT(x) can be the same explicitly-named RESULT(x) as + // the enclosing function or another ENTRY. + if (auto iter{currScope().find(suffix->resultName->source)}; + iter != currScope().end()) { + result = &*iter->second; + } + if (!result) { + result = &MakeSymbol( + *distinctResultName, Attrs{}, std::move(resultDetails)); + } + Resolve(*suffix->resultName, *result); + } } else { - EntityDetails entity; - entity.set_funcResult(true); - resultSymbol = &MakeSymbol(effectiveResultName, std::move(entity)); + result = &MakeSymbol(entryName.source, Attrs{}, std::move(resultDetails)); } - if (!resultName) { - name.symbol = nullptr; // symbol will be used for entry point below + if (result) { + entryDetails.set_result(*result); } - entryDetails.set_result(*resultSymbol); } + if (subpFlag == Symbol::Flag::Subroutine || + (distinctResultName && !badResultName)) { + Symbol &assoc{MakeSymbol(entryName.source)}; + assoc.set_details(HostAssocDetails{entrySymbol}); + assoc.set(Symbol::Flag::Subroutine); + } + Resolve(entryName, entrySymbol); + entrySymbol.set_details(std::move(entryDetails)); +} +void SubprogramVisitor::PostEntryStmt(const parser::EntryStmt &stmt) { + // The entry symbol should have already been created and resolved + // in CreateEntry(), called by BeginSubprogram(), with one exception (below). + const auto &name{std::get(stmt.t)}; + Scope &inclusiveScope{InclusiveScope()}; + if (!name.symbol) { + if (inclusiveScope.kind() != Scope::Kind::Subprogram) { + Say(name.source, + "ENTRY '%s' may appear only in a subroutine or function"_err_en_US, + name.source); + } else if (FindSeparateModuleSubprogramInterface(inclusiveScope.symbol())) { + Say(name.source, + "ENTRY '%s' may not appear in a separate module procedure"_err_en_US, + name.source); + } else { + // C1571 - entry is nested, so was not put into the program tree; error + // is emitted from MiscChecker in semantics.cpp. + } + return; + } + Symbol &entrySymbol{*name.symbol}; + if (context().HasError(entrySymbol)) { + return; + } + if (!entrySymbol.has()) { + SayAlreadyDeclared(name, entrySymbol); + return; + } + SubprogramDetails &entryDetails{entrySymbol.get()}; + CHECK(entryDetails.entryScope() == &inclusiveScope); + entrySymbol.attrs() |= GetAttrs(); + SetBindNameOn(entrySymbol); for (const auto &dummyArg : std::get>(stmt.t)) { if (const auto *dummyName{std::get_if(&dummyArg.u)}) { Symbol *dummy{FindSymbol(*dummyName)}; @@ -3433,7 +3498,7 @@ void SubprogramVisitor::PostEntryStmt(const parser::EntryStmt &stmt) { } entryDetails.add_dummyArg(*dummy); } else { - if (inFunction) { // C1573 + if (entrySymbol.test(Symbol::Flag::Function)) { // C1573 Say(name, "ENTRY in a function may not have an alternate return dummy argument"_err_en_US); break; @@ -3441,34 +3506,6 @@ void SubprogramVisitor::PostEntryStmt(const parser::EntryStmt &stmt) { entryDetails.add_alternateReturn(); } } - - Symbol::Flag subpFlag{ - inFunction ? Symbol::Flag::Function : Symbol::Flag::Subroutine}; - Scope &outer{inclusiveScope.parent()}; // global or module scope - if (outer.IsModule() && attrs_ && !attrs_->test(Attr::PRIVATE)) { - attrs_->set(Attr::PUBLIC); - } - if (Symbol * extant{FindSymbol(outer, name)}) { - if (!HandlePreviousCalls(name, *extant, subpFlag)) { - if (outer.IsGlobal()) { - Say2(name, "'%s' is already defined as a global identifier"_err_en_US, - *extant, "Previous definition of '%s'"_en_US); - } else { - SayAlreadyDeclared(name, *extant); - } - return; - } - } - - Symbol *entrySymbol{&MakeSymbol(outer, name.source, GetAttrs())}; - if (auto *generic{entrySymbol->detailsIf()}) { - CHECK(generic->specific()); - entrySymbol = generic->specific(); - } - entrySymbol->set_details(std::move(entryDetails)); - SetBindNameOn(*entrySymbol); - entrySymbol->set(subpFlag); - Resolve(name, *entrySymbol); } // A subprogram declared with MODULE PROCEDURE @@ -3486,9 +3523,6 @@ bool SubprogramVisitor::BeginMpSubprogram(const parser::Name &name) { // Convert the module procedure's interface into a subprogram. SetScope(DEREF(symbol->scope())); symbol->get().set_isInterface(false); - if (IsFunction(*symbol)) { - funcResultStack().Push(); // just to be popped later - } } else { // Copy the interface into a new subprogram scope. Symbol &newSymbol{MakeSymbol(name, SubprogramDetails{})}; @@ -3506,7 +3540,6 @@ bool SubprogramVisitor::BeginMpSubprogram(const parser::Name &name) { if (details.isFunction()) { currScope().erase(symbol->name()); newDetails.set_result(*currScope().CopySymbol(details.result())); - funcResultStack().Push(); // just to be popped later } } return true; @@ -3551,33 +3584,15 @@ bool SubprogramVisitor::BeginSubprogram(const parser::Name &name, newSymbol.attrs().set(Attr::PUBLIC); } } - if (IsFunction(currScope())) { - funcResultStack().Push(); - if (entryStmts) { - // It's possible to refer to the function result variable of an ENTRY - // statement that lacks an explicit RESULT in code that appears before the - // ENTRY. Create a placeholder symbol now for that case so that the name - // doesn't resolve instead to the ENTRY's symbol in the scope around the - // function. - for (const auto &ref : *entryStmts) { - const auto &suffix{std::get>(ref->t)}; - if (!(suffix && suffix->resultName)) { - Symbol &symbol{MakeSymbol(std::get(ref->t).source, - Attrs{}, UnknownDetails{})}; - symbol.set(Symbol::Flag::Function); - } - } + if (entryStmts) { + for (const auto &ref : *entryStmts) { + CreateEntry(*ref, newSymbol); } } return true; } -void SubprogramVisitor::EndSubprogram() { - if (IsFunction(currScope())) { - funcResultStack().Pop(); - } - PopScope(); -} +void SubprogramVisitor::EndSubprogram() { PopScope(); } bool SubprogramVisitor::HandlePreviousCalls( const parser::Name &name, Symbol &symbol, Symbol::Flag subpFlag) { @@ -3644,6 +3659,9 @@ Symbol &SubprogramVisitor::PushSubprogramScope(const parser::Name &name, symbol->ReplaceName(name.source); symbol->set(subpFlag); PushScope(Scope::Kind::Subprogram, symbol); + if (subpFlag == Symbol::Flag::Function) { + funcResultStack().Push(currScope()); + } if (inInterfaceBlock()) { auto &details{symbol->get()}; details.set_isInterface(); @@ -6718,7 +6736,7 @@ void ResolveNamesVisitor::HandleProcedureName( } else if (CheckUseError(name)) { // error was reported } else { - auto &nonUltimateSymbol = *symbol; + auto &nonUltimateSymbol{*symbol}; symbol = &Resolve(name, symbol)->GetUltimate(); bool convertedToProcEntity{ConvertToProcEntity(*symbol)}; if (convertedToProcEntity && !symbol->attrs().test(Attr::EXTERNAL) && @@ -7352,10 +7370,11 @@ void ResolveNamesVisitor::AddSubpNames(ProgramTree &node) { symbol.set(Symbol::Flag::Function); } else if (childKind == ProgramTree::Kind::Subroutine) { symbol.set(Symbol::Flag::Subroutine); + } else { + continue; // make ENTRY symbols only where valid } for (const auto &entryStmt : child.entryStmts()) { SubprogramNameDetails details{kind, child}; - details.set_isEntryStmt(); auto &symbol{ MakeSymbol(std::get(entryStmt->t), std::move(details))}; symbol.set(child.GetSubpFlag()); diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index cb485bc11cdb31..7e6d1d0aecb31d 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -1363,6 +1363,18 @@ const Symbol *IsFunctionResultWithSameNameAsFunction(const Symbol &symbol) { return function; } } + // Check ENTRY result symbols too + const Scope &outer{symbol.owner().parent()}; + auto iter{outer.find(symbol.name())}; + if (iter != outer.end()) { + const Symbol &outerSym{*iter->second}; + if (const auto *subp{outerSym.detailsIf()}) { + if (subp->entryScope() == &symbol.owner() && + symbol.name() == outerSym.name()) { + return &outerSym; + } + } + } } return nullptr; } diff --git a/flang/test/Semantics/entry01.f90 b/flang/test/Semantics/entry01.f90 index 44065e36100df1..696f65411018e0 100644 --- a/flang/test/Semantics/entry01.f90 +++ b/flang/test/Semantics/entry01.f90 @@ -2,7 +2,7 @@ ! Tests valid and invalid ENTRY statements module m1 - !ERROR: ENTRY may appear only in a subroutine or function + !ERROR: ENTRY 'badentryinmodule' may appear only in a subroutine or function entry badentryinmodule interface module subroutine separate @@ -30,18 +30,18 @@ subroutine internal submodule(m1) m1s1 contains module procedure separate - !ERROR: ENTRY may not appear in a separate module procedure + !ERROR: ENTRY 'badentryinsmp' may not appear in a separate module procedure entry badentryinsmp ! 1571 end procedure end submodule program main - !ERROR: ENTRY may appear only in a subroutine or function + !ERROR: ENTRY 'badentryinprogram' may appear only in a subroutine or function entry badentryinprogram ! C1571 end program block data bd1 - !ERROR: ENTRY may appear only in a subroutine or function + !ERROR: ENTRY 'badentryinbd' may appear only in a subroutine or function entry badentryinbd ! C1571 end block data @@ -80,9 +80,9 @@ function ifunc() integer, allocatable :: alloc integer, pointer :: ptr entry iok1() - !ERROR: ENTRY name 'ibad1' may not be declared when RESULT() is present + !ERROR: 'ibad1' is already declared in this scoping unit entry ibad1() result(ibad1res) ! C1570 - !ERROR: 'ibad2' was previously declared as an item that may not be used as a function result + !ERROR: 'ibad2' is already declared in this scoping unit entry ibad2() !ERROR: ENTRY in a function may not have an alternate return dummy argument entry ibadalt(*) ! C1573 @@ -92,6 +92,7 @@ function ifunc() !ERROR: RESULT(iok) may not have the same name as an ENTRY in the function entry isameres2() result(iok) ! C1574 entry isameres3() result(iok2) ! C1574 + !ERROR: 'iok2' is already declared in this scoping unit entry iok2() !These cases are all acceptably incompatible entry iok3() result(weird1) @@ -114,6 +115,8 @@ function ifunc() continue ! force transition to execution part entry implicit() implicit = 666 ! ok, just ensure that it works + !ERROR: Cannot call function 'implicit' like a subroutine + call implicit end function function chfunc() result(chr) @@ -133,8 +136,9 @@ subroutine externals !ERROR: 'iok1' is already defined as a global identifier entry iok1 integer :: ix + !ERROR: Cannot call subroutine 'iproc' like a function + !ERROR: Function result characteristics are not known ix = iproc() - !ERROR: 'iproc' was previously called as a function entry iproc end subroutine @@ -212,3 +216,31 @@ real function setBefore entry ent end function end module + +module m6 + contains + recursive subroutine passSubr + call foo(passSubr) + call foo(ent1) + entry ent1 + call foo(ent1) + end subroutine + recursive function passFunc1 + !ERROR: Actual argument associated with procedure dummy argument 'e=' is not a procedure + call foo(passFunc1) + !ERROR: Actual argument associated with procedure dummy argument 'e=' is not a procedure + call foo(ent2) + entry ent2 + !ERROR: Actual argument associated with procedure dummy argument 'e=' is not a procedure + call foo(ent2) + end function + recursive function passFunc2() result(res) + call foo(passFunc2) + call foo(ent3) + entry ent3() result(res) + call foo(ent3) + end function + subroutine foo(e) + external e + end subroutine +end module diff --git a/flang/test/Semantics/symbol20.f90 b/flang/test/Semantics/symbol20.f90 new file mode 100644 index 00000000000000..8c827769333211 --- /dev/null +++ b/flang/test/Semantics/symbol20.f90 @@ -0,0 +1,47 @@ +! RUN: %python %S/test_symbols.py %s %flang_fc1 +! Test handling of pernicious case in which it is conformant Fortran +! to use the name of a function in a CALL statement. Almost all +! other compilers produce bogus errors for this case and/or crash. + +!DEF: /m Module +module m +contains + !DEF: /m/foo PUBLIC (Function) Subprogram + function foo() + !DEF: /m/bar PUBLIC (Subroutine) Subprogram + !DEF: /m/foo/foo EXTERNAL, POINTER (Subroutine) ProcEntity + procedure(bar), pointer :: foo + !REF: /m/bar + !DEF: /m/foo/baz EXTERNAL, POINTER (Subroutine) ProcEntity + procedure(bar), pointer :: baz + !REF: /m/foo/foo + !REF: /m/bar + foo => bar + !REF: /m/foo/foo + call foo + !DEF: /m/baz PUBLIC (Function) Subprogram + entry baz() + !REF: /m/foo/baz + !REF: /m/bar + baz => bar + !REF: /m/foo/baz + call baz + end function + !REF: /m/bar + subroutine bar + print *, "in bar" + end subroutine +end module +!DEF: /demo MainProgram +program demo + !REF: /m + use :: m + !DEF: /demo/bar (Subroutine) Use + !DEF: /demo/p EXTERNAL, POINTER (Subroutine) ProcEntity + procedure(bar), pointer :: p + !REF: /demo/p + !DEF: /demo/foo (Function) Use + p => foo() + !REF: /demo/p + call p +end program From 7604c59bd2336ebb34f28de3e6c883abbdd3f7c7 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Tue, 24 May 2022 09:53:33 +0530 Subject: [PATCH 309/908] [OpenMP][IRBuilder] `omp task` support This patch adds basic support for `omp task` to the OpenMPIRBuilder. The outlined function after code extraction is called from a wrapper function with appropriate arguments. This wrapper function is passed to the runtime calls for task allocation. This approach is different from the Clang approach - clang directly emits the runtime call to the outlined function. The outlining utility (OutlineInfo) simply outlines the code and generates a function call to the outlined function. After the function has been generated by the outlining utility, there is no easy way to alter the function arguments without meddling with the outlining itself. Hence the wrapper function approach is taken. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D71989 --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 10 ++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 166 ++++++++++++++++++ .../Frontend/OpenMPIRBuilderTest.cpp | 165 +++++++++++++++++ 3 files changed, 341 insertions(+) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 7f2805c2e84ed7..93b1297a793013 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -618,6 +618,16 @@ class OpenMPIRBuilder { /// \param Loc The location where the taskyield directive was encountered. void createTaskyield(const LocationDescription &Loc); + /// Generator for `#omp task` + /// + /// \param Loc The location where the task construct was encountered. + /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param BodyGenCB Callback that will generate the region code. + /// \param Tied True if the task is tied, false if the task is untied. + InsertPointTy createTask(const LocationDescription &Loc, + InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, + bool Tied = true); + /// Functions used to generate reductions. Such functions take two Values /// representing LHS and RHS of the reduction, respectively, and a reference /// to the value that is updated to refer to the reduction result. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 8a7ce10103649c..dfa20d4cabd1ea 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1253,6 +1253,172 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { emitTaskyieldImpl(Loc); } +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createTask(const LocationDescription &Loc, + InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, + bool Tied) { + if (!updateToLocation(Loc)) + return InsertPointTy(); + + // The current basic block is split into four basic blocks. After outlining, + // they will be mapped as follows: + // ``` + // def current_fn() { + // current_basic_block: + // br label %task.exit + // task.exit: + // ; instructions after task + // } + // def outlined_fn() { + // task.alloca: + // br label %task.body + // task.body: + // ret void + // } + // ``` + BasicBlock *TaskExitBB = splitBB(Builder, /*CreateBranch=*/true, "task.exit"); + BasicBlock *TaskBodyBB = splitBB(Builder, /*CreateBranch=*/true, "task.body"); + BasicBlock *TaskAllocaBB = + splitBB(Builder, /*CreateBranch=*/true, "task.alloca"); + + OutlineInfo OI; + OI.EntryBB = TaskAllocaBB; + OI.OuterAllocaBB = AllocaIP.getBlock(); + OI.ExitBB = TaskExitBB; + OI.PostOutlineCB = [this, &Loc, Tied](Function &OutlinedFn) { + // The input IR here looks like the following- + // ``` + // func @current_fn() { + // outlined_fn(%args) + // } + // func @outlined_fn(%args) { ... } + // ``` + // + // This is changed to the following- + // + // ``` + // func @current_fn() { + // runtime_call(..., wrapper_fn, ...) + // } + // func @wrapper_fn(..., %args) { + // outlined_fn(%args) + // } + // func @outlined_fn(%args) { ... } + // ``` + + // The stale call instruction will be replaced with a new call instruction + // for runtime call with a wrapper function. + assert(OutlinedFn.getNumUses() == 1 && + "there must be a single user for the outlined function"); + CallInst *StaleCI = cast(OutlinedFn.user_back()); + + // HasTaskData is true if any variables are captured in the outlined region, + // false otherwise. + bool HasTaskData = StaleCI->arg_size() > 0; + Builder.SetInsertPoint(StaleCI); + + // Gather the arguments for emitting the runtime call for + // @__kmpc_omp_task_alloc + Function *TaskAllocFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc); + + // Arguments - `loc_ref` (Ident) and `gtid` (ThreadID) + // call. + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *ThreadID = getOrCreateThreadID(Ident); + + // Argument - `flags` + // If task is tied, then (Flags & 1) == 1. + // If task is untied, then (Flags & 1) == 0. + // TODO: Handle the other flags. + Value *Flags = Builder.getInt32(Tied); + + // Argument - `sizeof_kmp_task_t` (TaskSize) + // Tasksize refers to the size in bytes of kmp_task_t data structure + // including private vars accessed in task. + Value *TaskSize = Builder.getInt64(0); + if (HasTaskData) { + AllocaInst *ArgStructAlloca = + dyn_cast(StaleCI->getArgOperand(0)); + assert(ArgStructAlloca && + "Unable to find the alloca instruction corresponding to arguments " + "for extracted function"); + StructType *ArgStructType = + dyn_cast(ArgStructAlloca->getAllocatedType()); + assert(ArgStructType && "Unable to find struct type corresponding to " + "arguments for extracted function"); + TaskSize = + Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); + } + + // TODO: Argument - sizeof_shareds + + // Argument - task_entry (the wrapper function) + // If the outlined function has some captured variables (i.e. HasTaskData is + // true), then the wrapper function will have an additional argument (the + // struct containing captured variables). Otherwise, no such argument will + // be present. + SmallVector WrapperArgTys{Builder.getInt32Ty()}; + if (HasTaskData) + WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType()); + FunctionCallee WrapperFuncVal = M.getOrInsertFunction( + (Twine(OutlinedFn.getName()) + ".wrapper").str(), + FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false)); + Function *WrapperFunc = dyn_cast(WrapperFuncVal.getCallee()); + PointerType *WrapperFuncBitcastType = + FunctionType::get(Builder.getInt32Ty(), + {Builder.getInt32Ty(), Builder.getInt8PtrTy()}, false) + ->getPointerTo(); + Value *WrapperFuncBitcast = + ConstantExpr::getBitCast(WrapperFunc, WrapperFuncBitcastType); + + // Emit the @__kmpc_omp_task_alloc runtime call + // The runtime call returns a pointer to an area where the task captured + // variables must be copied before the task is run (NewTaskData) + CallInst *NewTaskData = Builder.CreateCall( + TaskAllocFn, + {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags, + /*sizeof_task=*/TaskSize, /*sizeof_shared=*/Builder.getInt64(0), + /*task_func=*/WrapperFuncBitcast}); + + // Copy the arguments for outlined function + if (HasTaskData) { + Value *TaskData = StaleCI->getArgOperand(0); + Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); + Builder.CreateMemCpy(NewTaskData, Alignment, TaskData, Alignment, + TaskSize); + } + + // Emit the @__kmpc_omp_task runtime call to spawn the task + Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task); + Builder.CreateCall(TaskFn, {Ident, ThreadID, NewTaskData}); + + StaleCI->eraseFromParent(); + + // Emit the body for wrapper function + BasicBlock *WrapperEntryBB = + BasicBlock::Create(M.getContext(), "", WrapperFunc); + Builder.SetInsertPoint(WrapperEntryBB); + if (HasTaskData) + Builder.CreateCall(&OutlinedFn, {WrapperFunc->getArg(1)}); + else + Builder.CreateCall(&OutlinedFn); + Builder.CreateRet(Builder.getInt32(0)); + }; + + addOutlineInfo(std::move(OI)); + + InsertPointTy TaskAllocaIP = + InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); + InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); + BodyGenCB(TaskAllocaIP, TaskBodyIP); + Builder.SetInsertPoint(TaskExitBB); + + return Builder.saveIP(); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 5f7e3228ae5704..aef5992c93ea92 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -4412,4 +4412,169 @@ TEST_F(OpenMPIRBuilderTest, EmitMapperCall) { EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy()); } +TEST_F(OpenMPIRBuilderTest, CreateTask) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); + AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); + Value *Val128 = + Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + Builder.restoreIP(AllocaIP); + AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, + "bodygen.alloca128"); + + Builder.restoreIP(CodeGenIP); + // Loading and storing captured pointer and values + Builder.CreateStore(Val128, Local128); + Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, + "bodygen.load32"); + + LoadInst *PrivLoad128 = Builder.CreateLoad( + Local128->getAllocatedType(), Local128, "bodygen.local.load128"); + Value *Cmp = Builder.CreateICmpNE( + Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType())); + Instruction *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), + &ThenTerm, &ElseTerm); + }; + + BasicBlock *AllocaBB = Builder.GetInsertBlock(); + BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + OpenMPIRBuilder::LocationDescription Loc( + InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); + Builder.restoreIP(OMPBuilder.createTask( + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), + BodyGenCB)); + OMPBuilder.finalize(); + Builder.CreateRetVoid(); + + EXPECT_FALSE(verifyModule(*M, &errs())); + + CallInst *TaskAllocCall = dyn_cast( + OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) + ->user_back()); + + // Verify the Ident argument + GlobalVariable *Ident = cast(TaskAllocCall->getArgOperand(0)); + ASSERT_NE(Ident, nullptr); + EXPECT_TRUE(Ident->hasInitializer()); + Constant *Initializer = Ident->getInitializer(); + GlobalVariable *SrcStrGlob = + cast(Initializer->getOperand(4)->stripPointerCasts()); + ASSERT_NE(SrcStrGlob, nullptr); + ConstantDataArray *SrcSrc = + dyn_cast(SrcStrGlob->getInitializer()); + ASSERT_NE(SrcSrc, nullptr); + + // Verify the num_threads argument. + CallInst *GTID = dyn_cast(TaskAllocCall->getArgOperand(1)); + ASSERT_NE(GTID, nullptr); + EXPECT_EQ(GTID->arg_size(), 1U); + EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); + + // Verify the flags + // TODO: Check for others flags. Currently testing only for tiedness. + ConstantInt *Flags = dyn_cast(TaskAllocCall->getArgOperand(2)); + ASSERT_NE(Flags, nullptr); + EXPECT_EQ(Flags->getSExtValue(), 1); + + // Verify the data size + ConstantInt *DataSize = + dyn_cast(TaskAllocCall->getArgOperand(3)); + ASSERT_NE(DataSize, nullptr); + EXPECT_EQ(DataSize->getSExtValue(), 24); // 64-bit pointer + 128-bit integer + + // TODO: Verify size of shared clause variables + + // Verify Wrapper function + Function *WrapperFunc = + dyn_cast(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); + ASSERT_NE(WrapperFunc, nullptr); + EXPECT_FALSE(WrapperFunc->isDeclaration()); + CallInst *OutlinedFnCall = dyn_cast(WrapperFunc->begin()->begin()); + ASSERT_NE(OutlinedFnCall, nullptr); + EXPECT_EQ(WrapperFunc->getArg(0)->getType(), Builder.getInt32Ty()); + EXPECT_EQ(OutlinedFnCall->getArgOperand(0), WrapperFunc->getArg(1)); + + // Verify the presence of `trunc` and `icmp` instructions in Outlined function + Function *OutlinedFn = OutlinedFnCall->getCalledFunction(); + ASSERT_NE(OutlinedFn, nullptr); + EXPECT_TRUE(any_of(instructions(OutlinedFn), + [](Instruction &inst) { return isa(&inst); })); + EXPECT_TRUE(any_of(instructions(OutlinedFn), + [](Instruction &inst) { return isa(&inst); })); + + // Verify the execution of the task + CallInst *TaskCall = dyn_cast( + OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task) + ->user_back()); + ASSERT_NE(TaskCall, nullptr); + EXPECT_EQ(TaskCall->getArgOperand(0), Ident); + EXPECT_EQ(TaskCall->getArgOperand(1), GTID); + EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall); + + // Verify that the argument data has been copied + for (User *in : TaskAllocCall->users()) { + if (MemCpyInst *memCpyInst = dyn_cast(in)) + EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall); + } +} + +TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; + + BasicBlock *AllocaBB = Builder.GetInsertBlock(); + BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + OpenMPIRBuilder::LocationDescription Loc( + InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); + Builder.restoreIP(OMPBuilder.createTask( + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), + BodyGenCB)); + OMPBuilder.finalize(); + Builder.CreateRetVoid(); + + EXPECT_FALSE(verifyModule(*M, &errs())); +} + +TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; + BasicBlock *AllocaBB = Builder.GetInsertBlock(); + BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + OpenMPIRBuilder::LocationDescription Loc( + InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); + Builder.restoreIP(OMPBuilder.createTask( + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB, + /*Tied=*/false)); + OMPBuilder.finalize(); + Builder.CreateRetVoid(); + + // Check for the `Tied` argument + CallInst *TaskAllocCall = dyn_cast( + OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) + ->user_back()); + ASSERT_NE(TaskAllocCall, nullptr); + ConstantInt *Flags = dyn_cast(TaskAllocCall->getArgOperand(2)); + ASSERT_NE(Flags, nullptr); + EXPECT_EQ(Flags->getZExtValue() & 1U, 0U); + + EXPECT_FALSE(verifyModule(*M, &errs())); +} + } // namespace From 62a9b36fcf728b104ea87e6eb84c0be69b779df7 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Tue, 19 Apr 2022 03:40:17 -0400 Subject: [PATCH 310/908] [MachineSink] replace MachineLoop with MachineCycle MachineCycle can handle irreducible loop. Natural loop analysis (MachineLoop) can not return correct loop depth if the loop is irreducible loop. And MachineSink is sensitive to the loop depth, see MachineSinking::isProfitableToSinkTo(). This patch tries to use MachineCycle so that we can handle irreducible loop better. Reviewed By: sameerds, MatzeB Differential Revision: https://reviews.llvm.org/D123995 --- llvm/include/llvm/ADT/GenericCycleImpl.h | 50 +++++ llvm/include/llvm/ADT/GenericCycleInfo.h | 15 ++ .../llvm/CodeGen/MachineCycleAnalysis.h | 35 ++++ llvm/include/llvm/CodeGen/MachineSSAContext.h | 2 + llvm/lib/CodeGen/MachineCycleAnalysis.cpp | 100 +++++---- llvm/lib/CodeGen/MachineSink.cpp | 190 +++++++++--------- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + llvm/test/CodeGen/AArch64/loop-sink-limit.mir | 4 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 4 + llvm/test/CodeGen/ARM/O3-pipeline.ll | 1 + llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + llvm/test/CodeGen/X86/opt-pipeline.ll | 1 + llvm/test/CodeGen/X86/pr38795.ll | 93 ++++----- llvm/test/CodeGen/X86/switch-phi-const.ll | 4 +- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 36 ++-- 16 files changed, 340 insertions(+), 198 deletions(-) diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h index 17f3a793c985b2..ea2847f8c8ee62 100644 --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -66,6 +66,44 @@ void GenericCycle::getExitBlocks( } } +template +auto GenericCycle::getCyclePreheader() const -> BlockT * { + BlockT *Predecessor = getCyclePredecessor(); + if (!Predecessor) + return nullptr; + + assert(isReducible() && "Cycle Predecessor must be in a reducible cycle!"); + + if (succ_size(Predecessor) != 1) + return nullptr; + + // Make sure we are allowed to hoist instructions into the predecessor. + if (!Predecessor->isLegalToHoistInto()) + return nullptr; + + return Predecessor; +} + +template +auto GenericCycle::getCyclePredecessor() const -> BlockT * { + if (!isReducible()) + return nullptr; + + BlockT *Out = nullptr; + + // Loop over the predecessors of the header node... + BlockT *Header = getHeader(); + for (const auto Pred : predecessors(Header)) { + if (!contains(Pred)) { + if (Out && Out != Pred) + return nullptr; + Out = Pred; + } + } + + return Out; +} + /// \brief Helper class for computing cycle information. template class GenericCycleInfoCompute { using BlockT = typename ContextT::BlockT; @@ -326,6 +364,18 @@ auto GenericCycleInfo::getCycle(const BlockT *Block) const return nullptr; } +/// \brief get the depth for the cycle which containing a given block. +/// +/// \returns the depth for the innermost cycle containing \p Block or 0 if it is +/// not contained in any cycle. +template +unsigned GenericCycleInfo::getCycleDepth(const BlockT *Block) const { + CycleT *Cycle = getCycle(Block); + if (!Cycle) + return 0; + return Cycle->getDepth(); +} + #ifndef NDEBUG /// \brief Validate the internal consistency of the cycle tree. /// diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h index 5c2190411c1e04..970664b8571544 100644 --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -100,6 +100,10 @@ template class GenericCycle { BlockT *getHeader() const { return Entries[0]; } + const SmallVectorImpl & getEntries() const { + return Entries; + } + /// \brief Return whether \p Block is an entry block of the cycle. bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); } @@ -124,6 +128,16 @@ template class GenericCycle { /// branched to. void getExitBlocks(SmallVectorImpl &TmpStorage) const; + /// Return the preheader block for this cycle. Pre-header is well-defined for + /// reducible cycle in docs/LoopTerminology.rst as: the only one entering + /// block and its only edge is to the entry block. Return null for irreducible + /// cycles. + BlockT *getCyclePreheader() const; + + /// If the cycle has exactly one entry with exactly one predecessor, return + /// it, otherwise return nullptr. + BlockT *getCyclePredecessor() const; + /// Iteration over child cycles. //@{ using const_child_iterator_base = @@ -239,6 +253,7 @@ template class GenericCycleInfo { const ContextT &getSSAContext() const { return Context; } CycleT *getCycle(const BlockT *Block) const; + unsigned getCycleDepth(const BlockT *Block) const; CycleT *getTopLevelParentCycle(const BlockT *Block) const; /// Move \p Child to \p NewParent by manipulating Children vectors. diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h index 54d9860e54c9a5..bdcf64a2075337 100644 --- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -16,6 +16,8 @@ #include "llvm/ADT/GenericCycleInfo.h" #include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" namespace llvm { @@ -25,6 +27,39 @@ extern template class GenericCycle; using MachineCycleInfo = GenericCycleInfo; using MachineCycle = MachineCycleInfo::CycleT; +/// Legacy analysis pass which computes a \ref MachineCycleInfo. +class MachineCycleInfoWrapperPass : public MachineFunctionPass { + MachineFunction *F = nullptr; + MachineCycleInfo CI; + +public: + static char ID; + + MachineCycleInfoWrapperPass(); + + MachineCycleInfo &getCycleInfo() { return CI; } + const MachineCycleInfo &getCycleInfo() const { return CI; } + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; +}; + +class MachineCycleInfoPrinterPass : public MachineFunctionPass { +public: + static char ID; + + MachineCycleInfoPrinterPass(); + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +// TODO: add this function to GenericCycle template after implementing IR +// version. +bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I); + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h index c945b8fefc80d3..f59d7cf8a52295 100644 --- a/llvm/include/llvm/CodeGen/MachineSSAContext.h +++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -28,6 +28,8 @@ template class DominatorTreeBase; inline auto successors(MachineBasicBlock *BB) { return BB->successors(); } inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); } +inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); } +inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); } template <> class GenericSSAContext { const MachineRegisterInfo *RegInfo = nullptr; diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp index 42a5e2b7af01ba..be171c2aa409dc 100644 --- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -8,50 +8,15 @@ #include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/ADT/GenericCycleImpl.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineSSAContext.h" -#include "llvm/InitializePasses.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" using namespace llvm; template class llvm::GenericCycleInfo; template class llvm::GenericCycle; -namespace { - -/// Legacy analysis pass which computes a \ref MachineCycleInfo. -class MachineCycleInfoWrapperPass : public MachineFunctionPass { - MachineFunction *F = nullptr; - MachineCycleInfo CI; - -public: - static char ID; - - MachineCycleInfoWrapperPass(); - - MachineCycleInfo &getCycleInfo() { return CI; } - const MachineCycleInfo &getCycleInfo() const { return CI; } - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; - void print(raw_ostream &OS, const Module *M = nullptr) const override; - - // TODO: verify analysis -}; - -class MachineCycleInfoPrinterPass : public MachineFunctionPass { -public: - static char ID; - - MachineCycleInfoPrinterPass(); - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; -}; - -} // namespace - char MachineCycleInfoWrapperPass::ID = 0; MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass() @@ -111,3 +76,62 @@ bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) { CI.print(errs()); return false; } + +bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) { + MachineFunction *MF = I.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + const TargetInstrInfo *TII = ST.getInstrInfo(); + + // The instruction is cycle invariant if all of its operands are. + for (const MachineOperand &MO : I.operands()) { + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + // An instruction that uses or defines a physical register can't e.g. be + // hoisted, so mark this as not invariant. + if (Register::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && + !TII->isIgnorableUse(MO)) + return false; + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead can't be moved. + return false; + } else if (any_of(Cycle->getEntries(), + [&](const MachineBasicBlock *Block) { + return Block->isLiveIn(Reg); + })) { + // If the reg is live into any header of the cycle we can't hoist an + // instruction which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!"); + + // If the cycle contains the definition of an operand, then the instruction + // isn't cycle invariant. + if (Cycle->contains(MRI->getVRegDef(Reg)->getParent())) + return false; + } + + // If we got this far, the instruction is cycle invariant! + return true; +} diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 966b012725d81f..797433e7a2809c 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -95,18 +96,18 @@ static cl::opt SinkLoadBlocksThreshold( cl::init(20), cl::Hidden); static cl::opt -SinkInstsIntoLoop("sink-insts-to-avoid-spills", - cl::desc("Sink instructions into loops to avoid " - "register spills"), - cl::init(false), cl::Hidden); - -static cl::opt SinkIntoLoopLimit( - "machine-sink-loop-limit", - cl::desc("The maximum number of instructions considered for loop sinking."), + SinkInstsIntoCycle("sink-insts-to-avoid-spills", + cl::desc("Sink instructions into cycles to avoid " + "register spills"), + cl::init(false), cl::Hidden); + +static cl::opt SinkIntoCycleLimit( + "machine-sink-cycle-limit", + cl::desc("The maximum number of instructions considered for cycle sinking."), cl::init(50), cl::Hidden); STATISTIC(NumSunk, "Number of machine instructions sunk"); -STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); +STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); @@ -119,7 +120,7 @@ namespace { MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree - MachineLoopInfo *LI; + MachineCycleInfo *CI; MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; @@ -180,8 +181,9 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); @@ -232,9 +234,9 @@ namespace { MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, - SmallVectorImpl &Candidates); - bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, + SmallVectorImpl &Candidates); + bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -261,7 +263,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) @@ -378,26 +380,27 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { return false; } -void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, +void MachineSinking::FindCycleSinkCandidates( + MachineCycle *Cycle, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); if (!TII->shouldSink(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " "target\n"); continue; } - if (!L->isLoopInvariant(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); + if (!isCycleInvariant(Cycle, MI)) { + LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n"); continue; } bool DontMoveAcrossStore = true; if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n"); continue; } if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n"); continue; } if (MI.isConvergent()) @@ -409,7 +412,7 @@ void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *B if (!MRI->hasOneDef(MO.getReg())) continue; - LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); } } @@ -425,22 +428,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DT = &getAnalysis(); PDT = &getAnalysis(); - LI = &getAnalysis(); + CI = &getAnalysis().getCycleInfo(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); - // MachineSink currently uses MachineLoopInfo, which only recognizes natural - // loops. As such, we could sink instructions into irreducible cycles, which - // would be non-profitable. - // WARNING: The current implementation of hasStoreBetween() is incorrect for - // sinking into irreducible cycles (PR53990), this bailout is currently - // necessary for correctness, not just profitability. - ReversePostOrderTraversal RPOT(&*MF.begin()); - if (containsIrreducibleCFG(RPOT, *LI)) - return false; - bool EverMadeChange = false; while (true) { @@ -473,32 +466,33 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { EverMadeChange = true; } - if (SinkInstsIntoLoop) { - SmallVector Loops(LI->begin(), LI->end()); - for (auto *L : Loops) { - MachineBasicBlock *Preheader = LI->findLoopPreheader(L); + if (SinkInstsIntoCycle) { + SmallVector Cycles(CI->toplevel_begin(), + CI->toplevel_end()); + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); if (!Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); continue; } SmallVector Candidates; - FindLoopSinkCandidates(L, Preheader, Candidates); + FindCycleSinkCandidates(Cycle, Preheader, Candidates); // Walk the candidates in reverse order so that we start with the use // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; for (MachineInstr *I : llvm::reverse(Candidates)) { - if (i++ == SinkIntoLoopLimit) { - LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " + if (i++ == SinkIntoCycleLimit) { + LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " "be analysed."); break; } - if (!SinkIntoLoop(L, *I)) + if (!SinkIntoCycle(Cycle, *I)) break; EverMadeChange = true; - ++NumLoopSunk; + ++NumCycleSunk; } } } @@ -520,12 +514,12 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an - // unreachable loop there may be nowhere to stop. + // unreachable cycle there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; - // Cache all successors, sorted by frequency info and loop depth. + // Cache all successors, sorted by frequency info and cycle depth. AllSuccsCache AllSuccessors; // Walk the basic block bottom-up. Remember if we saw a store. @@ -644,13 +638,16 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) return false; - // Avoid breaking back edge. From == To means backedge for single BB loop. + // Avoid breaking back edge. From == To means backedge for single BB cycle. if (!SplitEdges || FromBB == ToBB) return false; - // Check for backedges of more "complex" loops. - if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && - LI->isLoopHeader(ToBB)) + MachineCycle *FromCycle = CI->getCycle(FromBB); + MachineCycle *ToCycle = CI->getCycle(ToBB); + + // Check for backedges of more "complex" cycles. + if (FromCycle == ToCycle && FromCycle && + (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB)) return false; // It's not always legal to break critical edges and sink the computation @@ -753,9 +750,9 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, if (!PDT->dominates(SuccToSinkTo, MBB)) return true; - // It is profitable to sink an instruction from a deeper loop to a shallower - // loop, even if the latter post-dominates the former (PR21115). - if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) + // It is profitable to sink an instruction from a deeper cycle to a shallower + // cycle, even if the latter post-dominates the former (PR21115). + if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo)) return true; // Check if only use in post dominated block is PHI instruction. @@ -776,11 +773,11 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - MachineLoop *ML = LI->getLoopFor(MBB); + MachineCycle *MCycle = CI->getCycle(MBB); - // If the instruction is not inside a loop, it is not profitable to sink MI to + // If the instruction is not inside a cycle, it is not profitable to sink MI to // a post dominate block SuccToSinkTo. - if (!ML) + if (!MCycle) return false; auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { @@ -798,7 +795,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; }; - // If this instruction is inside a loop and sinking this instruction can make + // If this instruction is inside a Cycle and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. @@ -826,14 +823,15 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; } else { MachineInstr *DefMI = MRI->getVRegDef(Reg); - // DefMI is defined outside of loop. There should be no live range - // impact for this operand. Defination outside of loop means: - // 1: defination is outside of loop. - // 2: defination is in this loop, but it is a PHI in the loop header. - if (LI->getLoopFor(DefMI->getParent()) != ML || - (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + MachineCycle *Cycle = CI->getCycle(DefMI->getParent()); + // DefMI is defined outside of cycle. There should be no live range + // impact for this operand. Defination outside of cycle means: + // 1: defination is outside of cycle. + // 2: defination is in this cycle, but it is a PHI in the cycle header. + if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() && + Cycle->getHeader() == DefMI->getParent())) continue; - // The DefMI is defined inside the loop. + // The DefMI is defined inside the cycle. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { @@ -843,8 +841,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, } } - // If MI is in loop and all its operands are alive across the whole loop or if - // no operand sinking make register pressure set exceed limit, it is + // If MI is in cycle and all its operands are alive across the whole cycle or + // if no operand sinking make register pressure set exceed limit, it is // profitable to sink MI. return true; } @@ -876,14 +874,14 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccs.push_back(DTChild->getBlock()); } - // Sort Successors according to their loop depth or block frequency info. + // Sort Successors according to their cycle depth or block frequency info. llvm::stable_sort( AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; return HasBlockFreq ? LHSFreq < RHSFreq - : LI->getLoopDepth(L) < LI->getLoopDepth(R); + : CI->getCycleDepth(L) < CI->getCycleDepth(R); }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); @@ -898,7 +896,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) { assert (MBB && "Invalid MachineBasicBlock!"); - // Loop over all the operands of the specified instruction. If there is + // loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. // SuccToSinkTo - This is the successor to sink this instruction to, once we @@ -945,7 +943,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, // Otherwise, we should look at all the successors and decide which one // we should sink to. If we have reliable block frequency information // (frequency != 0) available, give successors with smaller frequencies - // higher priority, otherwise prioritize smaller loop depths. + // higher priority, otherwise prioritize smaller cycle depths. for (MachineBasicBlock *SuccBlock : GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { bool LocalUse = false; @@ -968,7 +966,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, } // It is not possible to sink an instruction into its own block. This can - // happen with loops. + // happen with cycles. if (MBB == SuccToSinkTo) return nullptr; @@ -1222,68 +1220,70 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } -/// Sink instructions into loops if profitable. This especially tries to prevent -/// register spills caused by register pressure if there is little to no -/// overhead moving instructions into loops. -bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { - LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); - MachineBasicBlock *Preheader = L->getLoopPreheader(); - assert(Preheader && "Loop sink needs a preheader block"); +/// Sink instructions into cycles if profitable. This especially tries to +/// prevent register spills caused by register pressure if there is little to no +/// overhead moving instructions into cycles. +bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { + LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + assert(Preheader && "Cycle sink needs a preheader block"); MachineBasicBlock *SinkBlock = nullptr; bool CanSink = true; const MachineOperand &MO = I.getOperand(0); for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); - if (!L->contains(&MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); + if (!Cycle->contains(MI.getParent())) { + LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); CanSink = false; break; } // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every loop + // the instruction (and thus possibly executing it on every cycle // iteration) is more expensive than a register. // For now assumes that copies are cheap and thus almost always worth it. if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n"); CanSink = false; break; } if (!SinkBlock) { SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " + LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " << printMBBReference(*SinkBlock) << "\n"); continue; } SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); CanSink = false; break; } - LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << + LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " << printMBBReference(*SinkBlock) << "\n"); } if (!CanSink) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n"); return false; } if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n"); return false; } if (SinkBlock == Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); + LLVM_DEBUG( + dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); return false; } if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) { - LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n"); + LLVM_DEBUG( + dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n"); return false; } - LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n"); SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, I); @@ -1407,9 +1407,11 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, TryBreak = true; } - // Don't sink instructions into a loop. - if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { - LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + // Don't sink instructions into a cycle. + if (!TryBreak && CI->getCycle(SuccToSinkTo) && + (!CI->getCycle(SuccToSinkTo)->isReducible() || + CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) { + LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n"); TryBreak = true; } diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 0f532f4fe3240d..2f205f9ab37356 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -132,6 +132,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir index bdb99f296fcabe..0a886925ffab49 100644 --- a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir +++ b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir @@ -1,10 +1,10 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \ -# RUN: -machine-sink-loop-limit=1 -verify-machineinstrs %s -o - 2>&1 | \ +# RUN: -machine-sink-cycle-limit=1 -verify-machineinstrs %s -o - 2>&1 | \ # RUN: FileCheck %s --check-prefix=SINK1 # # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \ -# RUN: -machine-sink-loop-limit=2 -verify-machineinstrs %s -o - 2>&1 | \ +# RUN: -machine-sink-cycle-limit=2 -verify-machineinstrs %s -o - 2>&1 | \ # RUN: FileCheck %s --check-prefix=SINK2 --- | diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index e23bea234be9fa..684774b18308e0 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -296,6 +296,7 @@ ; GCN-O1-NEXT: Machine Block Frequency Analysis ; GCN-O1-NEXT: Machine Common Subexpression Elimination ; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: Machine Cycle Info Analysis ; GCN-O1-NEXT: Machine code sinking ; GCN-O1-NEXT: Peephole Optimizations ; GCN-O1-NEXT: Remove dead machine instructions @@ -574,6 +575,7 @@ ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Machine Common Subexpression Elimination ; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction +; GCN-O1-OPTS-NEXT: Machine Cycle Info Analysis ; GCN-O1-OPTS-NEXT: Machine code sinking ; GCN-O1-OPTS-NEXT: Peephole Optimizations ; GCN-O1-OPTS-NEXT: Remove dead machine instructions @@ -861,6 +863,7 @@ ; GCN-O2-NEXT: Machine Block Frequency Analysis ; GCN-O2-NEXT: Machine Common Subexpression Elimination ; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: Machine Cycle Info Analysis ; GCN-O2-NEXT: Machine code sinking ; GCN-O2-NEXT: Peephole Optimizations ; GCN-O2-NEXT: Remove dead machine instructions @@ -1161,6 +1164,7 @@ ; GCN-O3-NEXT: Machine Block Frequency Analysis ; GCN-O3-NEXT: Machine Common Subexpression Elimination ; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: Machine Cycle Info Analysis ; GCN-O3-NEXT: Machine code sinking ; GCN-O3-NEXT: Peephole Optimizations ; GCN-O3-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index d2507bf6365b6d..098422a4a770d5 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -91,6 +91,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index d67fed77d35699..eff5184e9c84c8 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -111,6 +111,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 89b14bef9bce3e..21b8b8f7842b18 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -90,6 +90,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index e83266f72e488f..8552ebc348cbb9 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -107,6 +107,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index b526e4f471b1a2..d805dcad8b6e64 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -32,14 +32,13 @@ define dso_local void @fn() { ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_15: # %for.inc +; CHECK-NEXT: .LBB0_16: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_19 Depth 2 +; CHECK-NEXT: # Child Loop BB0_20 Depth 2 ; CHECK-NEXT: cmpb $8, %dl ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: ja .LBB0_3 @@ -56,7 +55,7 @@ define dso_local void @fn() { ; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h ; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: jg .LBB0_9 +; CHECK-NEXT: jg .LBB0_8 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi @@ -65,10 +64,12 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: jne .LBB0_16 ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %if.then @@ -77,82 +78,82 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; CHECK-NEXT: jmp .LBB0_6 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_9: # %if.end21 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_10 -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %dl, %dh ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: movl %edi, %esi -; CHECK-NEXT: movl $0, %edi -; CHECK-NEXT: movb %cl, %dl -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: # %bb.7: # %af +; CHECK-NEXT: je .LBB0_7 +; CHECK-NEXT: .LBB0_11: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_8 -; CHECK-NEXT: .LBB0_16: # %if.end39 +; CHECK-NEXT: jne .LBB0_12 +; CHECK-NEXT: .LBB0_17: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_18 -; CHECK-NEXT: # %bb.17: # %if.then41 +; CHECK-NEXT: je .LBB0_19 +; CHECK-NEXT: # %bb.18: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_18: # %for.end46 +; CHECK-NEXT: .LBB0_19: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_20 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_8: # %if.end21 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_19: # %for.cond47 +; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movb %dl, %dh +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_20: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_19 -; CHECK-NEXT: # %bb.20: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2 +; CHECK-NEXT: jne .LBB0_20 +; CHECK-NEXT: # %bb.21: # %for.cond47 +; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_19 -; CHECK-NEXT: .LBB0_10: # %ae +; CHECK-NEXT: jne .LBB0_20 +; CHECK-NEXT: .LBB0_9: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: # %bb.12: # %if.end26 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je .LBB0_15 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: # %bb.14: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_15 -; CHECK-NEXT: # %bb.14: # %if.then31 +; CHECK-NEXT: jne .LBB0_16 +; CHECK-NEXT: # %bb.15: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_15 +; CHECK-NEXT: jmp .LBB0_16 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_17 +; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl +; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: jmp .LBB0_7 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll index e28169217c01e7..981a60d09545f7 100644 --- a/llvm/test/CodeGen/X86/switch-phi-const.ll +++ b/llvm/test/CodeGen/X86/switch-phi-const.ll @@ -93,12 +93,12 @@ define void @switch_trunc_phi_const(i32 %x) { ; CHECK: # %bb.0: # %bb0 ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: movzbl %dil, %ecx -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: movl $3895, %edx # imm = 0xF37 ; CHECK-NEXT: decl %ecx ; CHECK-NEXT: cmpl $54, %ecx ; CHECK-NEXT: ja .LBB1_8 ; CHECK-NEXT: # %bb.1: # %bb0 +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: movl $3895, %edx # imm = 0xF37 ; CHECK-NEXT: jmpq *.LJTI1_0(,%rcx,8) ; CHECK-NEXT: .LBB1_8: # %default ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index b44895293b4117..0f8bb837f82a51 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1377,8 +1377,6 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: pushq %rbx ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_offset %rbx, -24 -; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1388,20 +1386,24 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_4 -; ENABLE-NEXT: ## %bb.3: ## %for.body4.i +; ENABLE-NEXT: je LBB16_3 +; ENABLE-NEXT: ## %bb.4: ## %for.body4.i +; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something +; ENABLE-NEXT: jmp LBB16_5 +; ENABLE-NEXT: LBB16_3: +; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: .p2align 4, 0x90 -; ENABLE-NEXT: LBB16_4: ## %for.inc +; ENABLE-NEXT: LBB16_5: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: incl %ebx ; ENABLE-NEXT: cmpl $7, %ebx -; ENABLE-NEXT: jl LBB16_4 -; ENABLE-NEXT: ## %bb.5: ## %fn1.exit +; ENABLE-NEXT: jl LBB16_5 +; ENABLE-NEXT: ## %bb.6: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: addq $8, %rsp ; ENABLE-NEXT: popq %rbx @@ -1418,8 +1420,6 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: pushq %rbx ; DISABLE-NEXT: pushq %rax ; DISABLE-NEXT: .cfi_offset %rbx, -24 -; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1429,20 +1429,24 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_4 -; DISABLE-NEXT: ## %bb.3: ## %for.body4.i +; DISABLE-NEXT: je LBB16_3 +; DISABLE-NEXT: ## %bb.4: ## %for.body4.i +; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something +; DISABLE-NEXT: jmp LBB16_5 +; DISABLE-NEXT: LBB16_3: +; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: .p2align 4, 0x90 -; DISABLE-NEXT: LBB16_4: ## %for.inc +; DISABLE-NEXT: LBB16_5: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: incl %ebx ; DISABLE-NEXT: cmpl $7, %ebx -; DISABLE-NEXT: jl LBB16_4 -; DISABLE-NEXT: ## %bb.5: ## %fn1.exit +; DISABLE-NEXT: jl LBB16_5 +; DISABLE-NEXT: ## %bb.6: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: addq $8, %rsp ; DISABLE-NEXT: popq %rbx From 496156ac57da3abd9c8a6dc422852b7bdfaa448f Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Tue, 3 May 2022 18:57:25 +0800 Subject: [PATCH 311/908] [X86][AMX] Multiple configure for AMX register. The previous solution depends on variable name to record the shape information. However it is not reliable, because in release build compiler would not set the variable name. It can be accomplished with an additional option `fno-discard-value-names`, but it is not acceptable for users. This patch is to preconfigure the tile register with machine instruction. It follow the same way what sigle configure does. In the future we can fall back to multiple configure when single configure fails due to the shape dependency issue. The algorithm to configure the tile register is simple in the patch. We may improve it in the future. It configure tile register based on basic block. Compiler would spill the tile register if it live out the basic block. After the configure there should be no spill across tile confgiure in the register alloction. Just like fast register allocation the algorithm walk the instruction in reverse order. When the shape dependency doesn't meet, it insert ldtilecfg after the last instruction that define the shape. In post configuration compiler also walk the basic block to collect the physical tile register number and generate instruction to fill the stack slot for the correponding shape information. TODO: There is some following work in D125602. The risk is modifying the fast RA may cause regression as fast RA is usded for different targets. We may create an independent RA for tile register. Differential Revision: https://reviews.llvm.org/D125075 --- llvm/lib/Target/X86/CMakeLists.txt | 1 + llvm/lib/Target/X86/X86.h | 4 + llvm/lib/Target/X86/X86FastPreTileConfig.cpp | 697 ++++++++++++++++++ llvm/lib/Target/X86/X86FastTileConfig.cpp | 294 +++----- llvm/lib/Target/X86/X86InstrAMX.td | 15 +- llvm/lib/Target/X86/X86TargetMachine.cpp | 9 +- llvm/test/CodeGen/X86/AMX/amx-across-func.ll | 280 +++++++ llvm/test/CodeGen/X86/AMX/amx-configO0toO0.ll | 151 ++-- llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll | 140 ++-- .../CodeGen/X86/AMX/amx-fastconfig-phi.mir | 177 +++++ .../CodeGen/X86/AMX/amx-fastconfig-phi2.mir | 130 ++++ .../CodeGen/X86/AMX/amx-fastconfig-phi4.mir | 144 ++++ .../CodeGen/X86/AMX/amx-fastconfig-spill.mir | 154 ++++ llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir | 146 ++++ .../CodeGen/X86/AMX/amx-fastpreconfig.mir | 61 ++ llvm/test/CodeGen/X86/AMX/amx-zero-config.ll | 81 +- llvm/test/CodeGen/X86/O0-pipeline.ll | 2 +- 17 files changed, 2079 insertions(+), 407 deletions(-) create mode 100644 llvm/lib/Target/X86/X86FastPreTileConfig.cpp create mode 100644 llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir create mode 100644 llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir create mode 100644 llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir create mode 100644 llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir create mode 100644 llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir create mode 100644 llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 5a1d04e2d83568..fadd272049f41f 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -38,6 +38,7 @@ set(sources X86PreAMXConfig.cpp X86LowerAMXIntrinsics.cpp X86TileConfig.cpp + X86FastPreTileConfig.cpp X86FastTileConfig.cpp X86PreTileConfig.cpp X86ExpandPseudo.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 10e1c5d6ed38ed..7344900f2e312d 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -79,6 +79,9 @@ FunctionPass *createX86DynAllocaExpander(); /// Return a pass that config the tile registers. FunctionPass *createX86TileConfigPass(); +/// Return a pass that preconfig the tile registers before fast reg allocation. +FunctionPass *createX86FastPreTileConfigPass(); + /// Return a pass that config the tile registers after fast reg allocation. FunctionPass *createX86FastTileConfigPass(); @@ -175,6 +178,7 @@ void initializeX86PartialReductionPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &); void initializeX86PreTileConfigPass(PassRegistry &); +void initializeX86FastPreTileConfigPass(PassRegistry &); void initializeX86FastTileConfigPass(PassRegistry &); void initializeX86TileConfigPass(PassRegistry &); void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp new file mode 100644 index 00000000000000..08ccfeda5a4abf --- /dev/null +++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp @@ -0,0 +1,697 @@ +//===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Pass to preconfig the shape of physical tile registers +/// It inserts ldtilecfg ahead of each group of tile registers. The algorithm +/// walk each instruction of basic block in reverse order. All the tile +/// registers that live out the basic block would be spilled and reloaded +/// before its user. It also check the depenedency of the shape to ensure +/// the shape is defined before ldtilecfg. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "fastpretileconfig" + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads, "Number of loads added"); + +namespace { + +class X86FastPreTileConfig : public MachineFunctionPass { + MachineFunction *MF = nullptr; + const X86Subtarget *ST = nullptr; + const TargetInstrInfo *TII = nullptr; + MachineRegisterInfo *MRI = nullptr; + X86MachineFunctionInfo *X86FI = nullptr; + MachineFrameInfo *MFI = nullptr; + const TargetRegisterInfo *TRI = nullptr; + MachineBasicBlock *MBB = nullptr; + int CfgSS = -1; + struct PHIInfo { + Register Row; + Register Col; + Register StackAddr; + }; + DenseMap VisitedPHIs; + + /// Maps virtual regs to the frame index where these values are spilled. + IndexedMap StackSlotForVirtReg; + + /// Has a bit set for tile virtual register for which it was determined + /// that it is alive across blocks. + BitVector MayLiveAcrossBlocks; + + int getStackSpaceFor(Register VirtReg); + void InitializeTileConfigStackSpace(); + bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI); + void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill); + void reload(MachineBasicBlock::iterator UseMI, Register VirtReg, + MachineOperand *RowMO, MachineOperand *ColMO); + void canonicalizePHIs(MachineBasicBlock &MBB); + void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI); + void convertPHIs(MachineBasicBlock &MBB); + bool configBasicBlock(MachineBasicBlock &MBB); + +public: + X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {} + + /// Return the pass name. + StringRef getPassName() const override { + return "Fast Tile Register Preconfigure"; + } + + /// Perform tile register configure. + bool runOnMachineFunction(MachineFunction &MFunc) override; + + static char ID; +}; + +} // end anonymous namespace + +char X86FastPreTileConfig::ID = 0; + +INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE, + "Fast Tile Register Preconfigure", false, false) +INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE, + "Fast Tile Register Preconfigure", false, false) + +static bool dominates(MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator A, + MachineBasicBlock::const_iterator B) { + auto MBBEnd = MBB.end(); + if (B == MBBEnd) + return true; + + MachineBasicBlock::const_iterator I = MBB.begin(); + for (; &*I != A && &*I != B; ++I) + ; + + return &*I == A; +} + +/// This allocates space for the specified virtual register to be held on the +/// stack. +int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) { + // Find the location Reg would belong... + int SS = StackSlotForVirtReg[VirtReg]; + // Already has space allocated? + if (SS != -1) + return SS; + + // Allocate a new stack object for this spill location... + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + unsigned Size = TRI->getSpillSize(RC); + Align Alignment = TRI->getSpillAlign(RC); + int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment); + + // Assign the slot. + StackSlotForVirtReg[VirtReg] = FrameIdx; + return FrameIdx; +} + +/// Returns false if \p VirtReg is known to not live out of the current config. +/// If \p VirtReg live out of the current MBB, it must live out of the current +/// config +bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) { + if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) + return true; + + for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) { + if (UseInst.getParent() != MBB) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } + + // The use and def are in the same MBB. If the tile register is + // reconfigured, it is crobbered and we need to spill and reload + // tile register. + if (CfgMI) { + if (dominates(*MBB, *CfgMI, UseInst)) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } + } + } + + return false; +} + +void X86FastPreTileConfig::InitializeTileConfigStackSpace() { + MachineBasicBlock &MBB = MF->front(); + MachineInstr *MI = &*MBB.getFirstNonPHI(); + DebugLoc DL; + if (ST->hasAVX512()) { + Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); + BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS) + .addReg(Zmm); + } else if (ST->hasAVX2()) { + Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); + BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS) + .addReg(Ymm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS, + 32) + .addReg(Ymm); + } else { + assert(ST->hasSSE2() && "AMX should assume SSE2 enabled"); + unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; + Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); + BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS) + .addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16) + .addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32) + .addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48) + .addReg(Xmm); + } + // Fill in the palette first. + addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS) + .addImm(1); +} + +/// Insert spill instruction for \p AssignedReg before \p Before. +/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot. +void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before, + Register VirtReg, bool Kill) { + LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n"); + int FI = getStackSpaceFor(VirtReg); + LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); + + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + // Don't need shape information for tile store, becasue it is adjacent to + // the tile def instruction. + TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI); + ++NumStores; + + // TODO: update DBG_VALUEs +} + +/// Insert reload instruction for \p PhysReg before \p Before. +void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI, + Register OrigReg, MachineOperand *RowMO, + MachineOperand *ColMO) { + int FI = getStackSpaceFor(OrigReg); + const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg); + Register TileReg; + // Fold copy to tileload + // BB1: + // spill src to s + // + // BB2: + // t = copy src + // --> + // t = tileload (s) + if (UseMI->isCopy()) + TileReg = UseMI->getOperand(0).getReg(); + else + TileReg = MRI->createVirtualRegister(&RC); + // Can't use TII->loadRegFromStackSlot(), because we need the shape + // information for reload. + // tileloadd (%sp, %idx), %tmm + unsigned Opc = X86::PTILELOADDV; + Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + // FIXME: MBB is not the parent of UseMI. + MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), + TII->get(X86::MOV64ri), StrideReg) + .addImm(64); + NewMI = addFrameReference( + BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg) + .addReg(RowMO->getReg()) + .addReg(ColMO->getReg()), + FI); + MachineOperand &MO = NewMI->getOperand(5); + MO.setReg(StrideReg); + MO.setIsKill(true); + RowMO->setIsKill(false); + ColMO->setIsKill(false); + // Erase copy instruction after it is folded. + if (UseMI->isCopy()) { + UseMI->eraseFromParent(); + } else { + // Replace the register in the user MI. + for (auto &MO : UseMI->operands()) { + if (MO.isReg() && MO.getReg() == OrigReg) + MO.setReg(TileReg); + } + } + + ++NumLoads; + LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into " + << printReg(TileReg, TRI) << '\n'); +} + +static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { + // The instruction must have 3 operands: tile def, row, col. + if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo()) + return false; + MachineOperand &MO = MI.getOperand(0); + + if (MO.isReg()) { + Register Reg = MO.getReg(); + // FIXME it may be used after Greedy RA and the physical + // register is not rewritten yet. + if (Reg.isVirtual() && + MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) + return true; + if (Reg >= X86::TMM0 && Reg <= X86::TMM7) + return true; + } + + return false; +} + +static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) { + MachineInstr *MI = MRI->getVRegDef(TileReg); + if (isTileDef(MRI, *MI)) { + MachineOperand *RowMO = &MI->getOperand(1); + MachineOperand *ColMO = &MI->getOperand(2); + return ShapeT(RowMO, ColMO, MRI); + } else if (MI->isCopy()) { + TileReg = MI->getOperand(1).getReg(); + return getShape(MRI, TileReg); + } + + // The def should not be PHI node, because we walk the MBB in reverse post + // order. + assert(MI->isPHI() && "Unexpected PHI when get shape."); + llvm_unreachable("Unexpected MI when get shape."); +} + +// BB0: +// spill t0 to s0 +// BB1: +// spill t1 to s1 +// +// BB2: +// t = phi [t0, bb0] [t1, bb1] +// --> +// row = phi [r0, bb0] [r1, bb1] +// col = phi [c0, bb0] [c1, bb1] +// s = phi [s0, bb0] [s1, bb1] +// t = tileload row, col, s +// The new instruction is inserted at the end of the phi node. The order +// of the original phi node is not ensured. +void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB, + MachineInstr &PHI) { + // 1. Create instruction to get stack slot address of each incoming block. + // 2. Create PHI node for the stack address. + // 3. Create PHI node for shape. If one of the incoming shape is immediate + // use the immediate and delete the PHI node. + // 4. Create tileload instruction from the stack address. + Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), + TII->get(X86::PHI), StackAddrReg); + Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass); + MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), + TII->get(X86::PHI), RowReg); + Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass); + MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(), + TII->get(X86::PHI), ColReg); + // Record the mapping of phi node and its row/column information. + VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg}; + + for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) { + // Get the 2 incoming value of tile register and MBB. + Register InTileReg = PHI.getOperand(I).getReg(); + // Mark it as liveout, so that it will be spilled when visit + // the incoming MBB. Otherwise since phi will be deleted, it + // would miss spill when visit incoming MBB. + MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg)); + MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB(); + + MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg); + MachineBasicBlock::iterator InsertPos; + if (TileDefMI->isPHI()) { + InsertPos = TileDefMI->getParent()->getFirstNonPHI(); + if (VisitedPHIs.count(TileDefMI)) { // circular phi reference + // def t1 + // / \ + // def t2 t3 = phi(t1, t4) <-- + // \ / | + // t4 = phi(t2, t3)------------- + // + // For each (row, column and stack address) append phi incoming value. + // Create r3 = phi(r1, r4) + // Create r4 = phi(r2, r3) + Register InRowReg = VisitedPHIs[TileDefMI].Row; + Register InColReg = VisitedPHIs[TileDefMI].Col; + Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr; + RowPHI.addReg(InRowReg).addMBB(InMBB); + ColPHI.addReg(InColReg).addMBB(InMBB); + AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); + continue; + } else { + // Recursively convert PHI to tileload + convertPHI(TileDefMI->getParent(), *TileDefMI); + // The PHI node is coverted to tileload instruction. Get the stack + // address from tileload operands. + MachineInstr *TileLoad = MRI->getVRegDef(InTileReg); + assert(TileLoad->getOpcode() == X86::PTILELOADDV); + Register InRowReg = TileLoad->getOperand(1).getReg(); + Register InColReg = TileLoad->getOperand(2).getReg(); + Register InStackAddrReg = TileLoad->getOperand(3).getReg(); + RowPHI.addReg(InRowReg).addMBB(InMBB); + ColPHI.addReg(InColReg).addMBB(InMBB); + AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); + } + } else { + InsertPos = TileDefMI->getIterator(); + + // Fill the incoming operand of row/column phi instruction. + ShapeT Shape = getShape(MRI, InTileReg); + Shape.getRow()->setIsKill(false); + Shape.getCol()->setIsKill(false); + RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB); + ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB); + + // The incoming tile register live out of its def BB, it would be spilled. + // Create MI to get the spill stack slot address for the tile register + int FI = getStackSpaceFor(InTileReg); + Register InStackAddrReg = + MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(), + TII->get(X86::LEA64r), InStackAddrReg) + .addFrameIndex(FI), + 0); + AddrPHI.addReg(InStackAddrReg).addMBB(InMBB); + } + } + + MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); + Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg) + .addImm(64); + Register TileReg = PHI.getOperand(0).getReg(); + MachineInstr *NewMI = addDirectMem( + BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg) + .addReg(RowReg) + .addReg(ColReg), + StackAddrReg); + MachineOperand &MO = NewMI->getOperand(5); + MO.setReg(StrideReg); + MO.setIsKill(true); + PHI.eraseFromParent(); + VisitedPHIs.erase(&PHI); +} + +static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) { + MachineOperand &MO = MI.getOperand(0); + if (MO.isReg() && MO.getReg().isVirtual() && + MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID) + return true; + return false; +} + +void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) { + SmallVector PHIs; + + for (MachineInstr &MI : MBB) { + if (!MI.isPHI()) + break; + if (!isTileRegDef(MRI, MI)) + continue; + PHIs.push_back(&MI); + } + // Canonicalize the phi node first. One tile phi may depeneds previous + // phi node. For below case, we need convert %t4. + // + // BB0: + // %t3 = phi (t1 BB1, t2 BB0) + // %t4 = phi (t5 BB1, t3 BB0) + // --> + // %t3 = phi (t1 BB1, t2 BB0) + // %t4 = phi (t5 BB1, t2 BB0) + // + while (!PHIs.empty()) { + MachineInstr *PHI = PHIs.pop_back_val(); + + // Find the operand that is incoming from the same MBB and the def + // is also phi node. + MachineOperand *InMO = nullptr; + MachineInstr *DefMI = nullptr; + for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) { + Register InTileReg = PHI->getOperand(I).getReg(); + MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); + DefMI = MRI->getVRegDef(InTileReg); + if (InMBB != &MBB || !DefMI->isPHI()) + continue; + + InMO = &PHI->getOperand(I); + break; + } + // If can't find such operand, do nothing. + if (!InMO) + continue; + + // Current phi node depends on previous phi node. Break the + // dependency. + Register DefTileReg; + for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) { + MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB(); + if (InMBB != &MBB) + continue; + DefTileReg = DefMI->getOperand(I).getReg(); + InMO->setReg(DefTileReg); + break; + } + } +} + +void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) { + SmallVector PHIs; + for (MachineInstr &MI : MBB) { + if (!MI.isPHI()) + break; + if (!isTileRegDef(MRI, MI)) + continue; + PHIs.push_back(&MI); + } + while (!PHIs.empty()) { + MachineInstr *MI = PHIs.pop_back_val(); + VisitedPHIs.clear(); + convertPHI(&MBB, *MI); + } +} + +// PreTileConfig should configure the tile registers based on basic +// block. +bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) { + this->MBB = &MBB; + bool Change = false; + MachineInstr *LastShapeMI = nullptr; + MachineInstr *LastTileCfg = nullptr; + bool HasUnconfigTile = false; + + auto Config = [&](MachineInstr &Before) { + if (CfgSS == -1) + CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(), + ST->getTileConfigAlignment(), false); + LastTileCfg = addFrameReference( + BuildMI(MBB, Before, DebugLoc(), TII->get(X86::LDTILECFG)), CfgSS); + LastShapeMI = nullptr; + Change = true; + }; + auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (Reg.isVirtual() && + MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) + return true; + } + return false; + }; + for (MachineInstr &MI : reverse(MBB)) { + // We have transformed phi node before configuring BB. + if (MI.isPHI()) + break; + // Don't collect the shape of used tile, the tile should be defined + // before the tile use. Spill and reload would happen if there is only + // tile use after ldtilecfg, so the shape can be collected from reload. + // Take below code for example. %t would be reloaded before tilestore + // call + // .... + // tilestore %r, %c, %t + // --> + // call + // ldtilecfg + // %t = tileload %r, %c + // tilestore %r, %c, %t + if (HasTileOperand(MRI, MI)) + HasUnconfigTile = true; + // According to AMX ABI, all the tile registers including config register + // are volatile. Caller need to save/restore config register. + if (MI.isCall() && HasUnconfigTile) { + MachineBasicBlock::iterator I; + if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) + I = ++LastShapeMI->getIterator(); + else + I = ++MI.getIterator(); + Config(*I); + HasUnconfigTile = false; + continue; + } + if (!isTileDef(MRI, MI)) + continue; + // + //--------------------------------------------------------------------- + // Don't handle COPY instruction. If the src and dst of the COPY can be + // in the same config in below case, we just check the shape of t0. + // def row0 + // def col0 + // ldtilecfg + // t0 = tielzero(row0, col0) + // t1 = copy t0 + // ... + // If the src and dst of the COPY can NOT be in the same config in below + // case. Reload would be generated befor the copy instruction. + // def row0 + // def col0 + // t0 = tielzero(row0, col0) + // spill t0 + // ... + // def row1 + // def col1 + // ldtilecfg + // t1 = tilezero(row1, col1) + // reload t0 + // t1 = copy t0 + //--------------------------------------------------------------------- + // + // If MI dominate the last shape def instruction, we need insert + // ldtilecfg after LastShapeMI now. The config doesn't include + // current MI. + // def row0 + // def col0 + // tilezero(row0, col0) <- MI + // def row1 + // def col1 + // ldtilecfg <- insert + // tilezero(row1, col1) + if (LastShapeMI && dominates(MBB, MI, LastShapeMI)) + Config(*(++LastShapeMI->getIterator())); + MachineOperand *RowMO = &MI.getOperand(1); + MachineOperand *ColMO = &MI.getOperand(2); + MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg()); + MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg()); + // If the shape is defined in current MBB, check the domination. + // FIXME how about loop? + if (RowMI->getParent() == &MBB) { + if (!LastShapeMI) + LastShapeMI = RowMI; + else if (dominates(MBB, LastShapeMI, RowMI)) + LastShapeMI = RowMI; + } + if (ColMI->getParent() == &MBB) { + if (!LastShapeMI) + LastShapeMI = ColMI; + else if (dominates(MBB, LastShapeMI, ColMI)) + LastShapeMI = ColMI; + } + // If there is user live out of the tilecfg, spill it and reload in + // before the user. + Register TileReg = MI.getOperand(0).getReg(); + if (mayLiveOut(TileReg, LastTileCfg)) + spill(++MI.getIterator(), TileReg, false); + for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) { + if (UseMI.getParent() == &MBB) { + // check user should not across ldtilecfg + if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI)) + continue; + // reload befor UseMI + reload(UseMI.getIterator(), TileReg, RowMO, ColMO); + } else { + // Don't reload for phi instruction, we handle phi reload separately. + // TODO: merge the reload for the same user MBB. + if (!UseMI.isPHI()) + reload(UseMI.getIterator(), TileReg, RowMO, ColMO); + } + } + } + + // Configure tile registers at the head of the MBB + if (HasUnconfigTile) { + MachineInstr *Before; + if (LastShapeMI == nullptr || LastShapeMI->isPHI()) + Before = &*MBB.getFirstNonPHI(); + else + Before = &*(++LastShapeMI->getIterator()); + + Config(*Before); + } + + return Change; +} + +bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) { + MF = &MFunc; + MRI = &MFunc.getRegInfo(); + ST = &MFunc.getSubtarget(); + TII = ST->getInstrInfo(); + X86FI = MFunc.getInfo(); + MFI = &MFunc.getFrameInfo(); + TRI = ST->getRegisterInfo(); + CfgSS = -1; + + unsigned NumVirtRegs = MRI->getNumVirtRegs(); + StackSlotForVirtReg.resize(NumVirtRegs); + MayLiveAcrossBlocks.clear(); + // We will create register during config. *3 is to make sure + // the virtual register number doesn't exceed the size of + // the bit vector. + MayLiveAcrossBlocks.resize(NumVirtRegs * 3); + bool Change = false; + assert(MRI->isSSA()); + + // Canonicalize the phi node first. + for (MachineBasicBlock &MBB : MFunc) + canonicalizePHIs(MBB); + + // Loop over all of the basic blocks in reverse post order and insert + // ldtilecfg for tile registers. The reserse post order is to facilitate + // PHI node convert. + ReversePostOrderTraversal RPOT(MF); + for (MachineBasicBlock *MBB : RPOT) { + convertPHIs(*MBB); + Change |= configBasicBlock(*MBB); + } + + if (Change) + InitializeTileConfigStackSpace(); + + StackSlotForVirtReg.clear(); + return Change; +} + +FunctionPass *llvm::createX86FastPreTileConfigPass() { + return new X86FastPreTileConfig(); +} diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp index 5ff848243e433d..2949bd048ee00d 100644 --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp @@ -40,40 +40,25 @@ namespace { class X86FastTileConfig : public MachineFunctionPass { // context MachineFunction *MF = nullptr; - const X86Subtarget *ST = nullptr; - const TargetRegisterInfo *TRI = nullptr; const TargetInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; + const TargetRegisterInfo *TRI = nullptr; X86MachineFunctionInfo *X86FI = nullptr; - MachineInstr *getTileConfigPoint(); - void tileConfig(); + bool configBasicBlock(MachineBasicBlock &MBB); public: X86FastTileConfig() : MachineFunctionPass(ID) {} - bool fastTileConfig(); - bool isTileLoad(MachineInstr &MI); - bool isTileStore(MachineInstr &MI); - bool isAMXInstr(MachineInstr &MI); - - MachineInstr *getKeyAMXInstr(MachineInstr *MI); - void getTileShapesCfg(MachineInstr *MI, - SmallVector &ShapedTiles); - void getShapeCfgInstrs(MachineInstr *MI, - std::map &RowCfgs, - std::map &ColCfgs); - /// Return the pass name. StringRef getPassName() const override { return "Fast Tile Register Configure"; } - void materializeTileCfg(MachineInstr *MI); - - void rewriteTileCfg(SmallVector &ShapedTiles, - std::map &RowCfgs, - std::map &ColCfgs); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } /// Perform register allocation. bool runOnMachineFunction(MachineFunction &MFunc) override; @@ -95,210 +80,107 @@ INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, "Fast Tile Register Configure", false, false) -static bool isTilePhysReg(MachineOperand &Op) { - if (!Op.isReg()) +static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { + // There is no phi instruction after register allocation. + assert(MI.isPHI() == false); + // The instruction must have 3 operands: tile def, row, col. + // It should be AMX pseudo instruction that have shape operand. + if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 || + !MI.isPseudo()) return false; + MachineOperand &MO = MI.getOperand(0); + + if (MO.isReg()) { + Register Reg = MO.getReg(); + // FIXME it may be used after Greedy RA and the physical + // register is not rewritten yet. + if (Reg.isVirtual() && + MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) + return true; + if (Reg >= X86::TMM0 && Reg <= X86::TMM7) + return true; + } - Register Reg = Op.getReg(); - if (Reg >= X86::TMM0 && Reg <= X86::TMM7) - return true; return false; } -static unsigned getTilePhysRegIdx(MachineOperand *Op) { - assert(isTilePhysReg(*Op) && "Tile Operand is invalid"); - return Op->getReg() - X86::TMM0; -} - -static inline void adjustRowCfg(unsigned TIdx, MachineInstr *MI) { - unsigned Offset = 48 + TIdx; - MI->getOperand(3).ChangeToImmediate(Offset); -} - -static inline void adjustColCfg(unsigned TIdx, MachineInstr *MI) { - unsigned Offset = 16 + TIdx * 2; - MI->getOperand(3).ChangeToImmediate(Offset); -} - -bool X86FastTileConfig::isTileLoad(MachineInstr &MI) { - return MI.getOpcode() == X86::PTILELOADDV || - MI.getOpcode() == X86::PTILELOADDT1V; -} -bool X86FastTileConfig::isTileStore(MachineInstr &MI) { - return MI.getOpcode() == X86::PTILESTOREDV; -} -bool X86FastTileConfig::isAMXInstr(MachineInstr &MI) { - // TODO: May need to handle some special nontile amx instrucion. - if (MI.getOpcode() == X86::PLDTILECFGV || MI.isDebugInstr()) - return false; - - return llvm::any_of(MI.operands(), isTilePhysReg); -} - -MachineInstr *X86FastTileConfig::getKeyAMXInstr(MachineInstr *MI) { - auto Cfg = MachineBasicBlock::iterator(MI); - MachineBasicBlock *MBB = MI->getParent(); - MachineInstr *KeyMI = nullptr; - int KeyAMXNum = 0; - - for (auto II = Cfg; II != MBB->end(); II++) { - if (isTileLoad(*II)) { - KeyMI = &*II; +// PreTileConfig should configure the tile registers based on basic +// block. +bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { + bool Change = false; + SmallVector, 6> ShapeInfos; + for (MachineInstr &MI : reverse(MBB)) { + if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::LDTILECFG) continue; + // AMX instructions that define tile register. + if (MI.getOpcode() != X86::LDTILECFG) { + MachineOperand &Row = MI.getOperand(1); + MachineOperand &Col = MI.getOperand(2); + unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0; + ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)}); + } else { // LDTILECFG + // Rewrite the shape information to memory. Stack slot should have + // been initialized to zero in pre config. + int SS = MI.getOperand(0).getIndex(); // tile config stack slot. + for (auto &ShapeInfo : ShapeInfos) { + DebugLoc DL; + unsigned TMMIdx = ShapeInfo.first; + Register RowReg = ShapeInfo.second.getRow()->getReg(); + Register ColReg = ShapeInfo.second.getCol()->getReg(); + // Here is the data format for the tile config. + // 0 palette + // 1 start_row + // 2-15 reserved, must be zero + // 16-17 tile0.colsb Tile 0 bytes per row. + // 18-19 tile1.colsb Tile 1 bytes per row. + // 20-21 tile2.colsb Tile 2 bytes per row. + // ... (sequence continues) + // 30-31 tile7.colsb Tile 7 bytes per row. + // 32-47 reserved, must be zero + // 48 tile0.rows Tile 0 rows. + // 49 tile1.rows Tile 1 rows. + // 50 tile2.rows Tile 2 rows. + // ... (sequence continues) + // 55 tile7.rows Tile 7 rows. + // 56-63 reserved, must be zero + int RowOffset = 48 + TMMIdx; + int ColOffset = 16 + TMMIdx * 2; + + Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit); + BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg); + MachineInstrBuilder StoreRow = + BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr)); + addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg); + + MachineInstrBuilder StoreCol = + BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr)); + addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg); + } + ShapeInfos.clear(); + Change = true; } - - if (isTileStore(*II)) { - assert(KeyMI && "Key AMX Should be found before!"); - break; - } - - if (isAMXInstr(*II)) { - assert((KeyAMXNum == 0) && "Too many Key AMX instruction!"); - (void) KeyAMXNum; - KeyAMXNum++; - KeyMI = &*II; - } - } - assert(KeyMI && "There must be an AMX instruction."); - return KeyMI; -} - -// Orderly get the tiles in key amx instruction, uses before defs. -void X86FastTileConfig::getTileShapesCfg( - MachineInstr *CfgMI, SmallVector &ShapedTiles) { - MachineInstr *KeyMI = getKeyAMXInstr(CfgMI); - - SmallVector DefTiles; - for (MachineOperand &MO : KeyMI->operands()) { - if (!isTilePhysReg(MO)) - continue; - if (MO.isDef()) - DefTiles.push_back(&MO); - else - ShapedTiles.push_back(&MO); - } - ShapedTiles.append(DefTiles); -} - -// We pre-config the shapes at position named with "amx.tmm.N.shape.row* and -// amx.shape.N.col*" at pass "Pre AMX Tile Config". -// The 'N' implies the order of tiles in key amx intrinsic. -void X86FastTileConfig::getShapeCfgInstrs( - MachineInstr *MI, std::map &RowCfgs, - std::map &ColCfgs) { - auto Cfg = MachineBasicBlock::iterator(MI); - MachineBasicBlock *MBB = MI->getParent(); - - for (auto II = Cfg; II != MBB->begin(); II--) { - if (isAMXInstr(*II) || II->isTerminator() || II->isCall()) - break; - if (!II->mayStore() || !II->hasOneMemOperand()) - continue; - const Value *MemPtr = II->memoperands()[0]->getValue(); - if (!MemPtr) - continue; - - StringRef Name = MemPtr->getName(); - if (!Name.startswith("amx.tmm.")) - continue; - - // Get the 'N'th tile shape config in key amx instruction. - auto N = Name.find(".shape"); - StringRef STileIdx = Name.slice(8, N); - unsigned Idx; - STileIdx.getAsInteger(10, Idx); - - // And related them with their store instructions. - if (Name.contains("row")) - RowCfgs[Idx] = &*II; - else if (Name.contains("col")) - ColCfgs[Idx] = &*II; - else - llvm_unreachable("Invalid tile shape info!"); } - assert((RowCfgs.size() == ColCfgs.size()) && - "The number of tile row and col must be equal!"); -} - -// Here is the data format for the tile config. -// 0 palette = 1 now. -// 1 start_row = 0 now. -// 2-15 reserved, must be zero -// 16-17 tile0.colsb Tile 0 bytes per row. -// 18-19 tile1.colsb Tile 1 bytes per row. -// 20-21 tile2.colsb Tile 2 bytes per row. -// ... (sequence continues) -// 30-31 tile7.colsb Tile 7 bytes per row. -// 32-47 reserved, must be zero -// 48 tile0.rows Tile 0 rows. -// 49 tile1.rows Tile 1 rows. -// 50 tile2.rows Tile 2 rows. -// ... (sequence continues) -// 55 tile7.rows Tile 7 rows. -// 56-63 reserved, must be zero -void X86FastTileConfig::rewriteTileCfg( - SmallVector &ShapedTiles, - std::map &RowCfgs, - std::map &ColCfgs) { - assert((RowCfgs.size() == ShapedTiles.size()) && - "The number of tile shapes not equal with the number of tiles!"); - // Orderly get the tiles and adjust the shape config. - for (unsigned I = 0, E = ShapedTiles.size(); I < E; I++) { - MachineOperand *MO = ShapedTiles[I]; - unsigned TmmIdx = getTilePhysRegIdx(MO); - if (I == TmmIdx) - continue; - adjustRowCfg(TmmIdx, RowCfgs[I]); - adjustColCfg(TmmIdx, ColCfgs[I]); - } -} - -// We have already preconfig the shapes before fast register allocation at -// X86PreAMXConfig::preWriteTileCfg(). Now, we have done fast register -// allocation, the shapes pre-written before may not rightly corresponding -// to the correct tmm registers, so we need adjust them. -void X86FastTileConfig::materializeTileCfg(MachineInstr *CfgMI) { - SmallVector ShapedTiles; - std::map RowCfgs; - std::map ColCfgs; - - // Orderly keep the tile uses and def in ShapedTiles; - getTileShapesCfg(CfgMI, ShapedTiles); - assert(ShapedTiles.size() && "Not find shapes config!"); - - getShapeCfgInstrs(CfgMI, RowCfgs, ColCfgs); - - rewriteTileCfg(ShapedTiles, RowCfgs, ColCfgs); -} - -bool X86FastTileConfig::fastTileConfig() { - bool Changed = false; - - for (MachineBasicBlock &MBB : *MF) { - SmallVector CFGs; - for (MachineInstr &MI : MBB) - if (MI.getOpcode() == X86::PLDTILECFGV) - CFGs.push_back(&MI); - for (auto *MI : CFGs) - materializeTileCfg(MI); - if (!CFGs.empty()) - Changed = true; - } - if (Changed) + if (Change) X86FI->setHasVirtualTileReg(true); - return Changed; + + return Change; } bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { MF = &MFunc; MRI = &MFunc.getRegInfo(); - ST = &MFunc.getSubtarget(); + const TargetSubtargetInfo *ST = &MFunc.getSubtarget(); TRI = ST->getRegisterInfo(); TII = MFunc.getSubtarget().getInstrInfo(); X86FI = MFunc.getInfo(); + bool Change = false; + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineBasicBlock &MBB : MFunc) + Change |= configBasicBlock(MBB); - return fastTileConfig(); + return Change; } FunctionPass *llvm::createX86FastTileConfigPass() { diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index 368b05ee8db4bc..df8c096204e052 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -48,22 +48,23 @@ let Predicates = [HasAMXTILE, In64BitMode] in { VEX, T8XD; // Pseduo instruction for RA. - let mayLoad = 1 in + let isPseudo = true, mayLoad = 1 in def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), [(int_x86_ldtilecfg_internal addr:$src)]>; - let mayLoad = 1 in + let isPseudo = true, mayLoad = 1 in def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, opaquemem:$src3), []>; - let mayLoad = 1 in + let isPseudo = true, mayLoad = 1 in def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, opaquemem:$src3), []>; - let mayStore = 1 in + let isPseudo = true, mayStore = 1 in def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1, GR16:$src2, opaquemem:$src3, TILE:$src4), []>; - let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in + let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1, + canFoldAsLoad = 1 in def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2), [(set TILE:$dst, (int_x86_tilezero_internal GR16:$src1, GR16:$src2))]>; @@ -106,7 +107,7 @@ let Predicates = [HasAMXINT8, In64BitMode] in { } // Pseduo instruction for RA. - let Constraints = "$src4 = $dst" in { + let isPseudo = true, Constraints = "$src4 = $dst" in { def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), @@ -165,7 +166,7 @@ let Predicates = [HasAMXBF16, In64BitMode] in { []>, VEX_4V, T8XS; // Pseduo instruction for RA. - let Constraints = "$src4 = $dst" in + let isPseudo = true, Constraints = "$src4 = $dst" in def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 04778d31169d05..7834162e1d9c5e 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -78,6 +78,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeX86CallFrameOptimizationPass(PR); initializeX86CmovConverterPassPass(PR); initializeX86TileConfigPass(PR); + initializeX86FastPreTileConfigPass(PR); initializeX86FastTileConfigPass(PR); initializeX86LowerTileCopyPass(PR); initializeX86ExpandPseudoPass(PR); @@ -420,9 +421,6 @@ void X86PassConfig::addIRPasses() { addPass(createX86LowerAMXIntrinsicsPass()); addPass(createX86LowerAMXTypePass()); - if (TM->getOptLevel() == CodeGenOpt::None) - addPass(createX86PreAMXConfigPass()); - TargetPassConfig::addIRPasses(); if (TM->getOptLevel() != CodeGenOpt::None) { @@ -511,9 +509,10 @@ void X86PassConfig::addPreRegAlloc() { addPass(createX86FlagsCopyLoweringPass()); addPass(createX86DynAllocaExpander()); - if (getOptLevel() != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) addPass(createX86PreTileConfigPass()); - } + else + addPass(createX86FastPreTileConfigPass()); } void X86PassConfig::addMachineSSAOptimization() { diff --git a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll index 7c4024cb1a379e..df4102e3b61d89 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s +; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck -check-prefix=O0 %s @buf = dso_local global [3072 x i8] zeroinitializer, align 64 @@ -12,6 +13,10 @@ define internal void @foo() { ; IPRA-LABEL: foo: ; IPRA: # %bb.0: # %entry ; IPRA-NEXT: retq +; +; O0-LABEL: foo: +; O0: # %bb.0: # %entry +; O0-NEXT: retq entry: ret void } @@ -93,6 +98,112 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind { ; IPRA-NEXT: tilerelease ; IPRA-NEXT: vzeroupper ; IPRA-NEXT: retq +; +; O0-LABEL: test_api: +; O0: # %bb.0: +; O0-NEXT: pushq %rbp +; O0-NEXT: movq %rsp, %rbp +; O0-NEXT: andq $-1024, %rsp # imm = 0xFC00 +; O0-NEXT: subq $8192, %rsp # imm = 0x2000 +; O0-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; O0-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) +; O0-NEXT: movb $1, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %si, %cx +; O0-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; O0-NEXT: movw %di, %ax +; O0-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; O0-NEXT: movl $buf, %r8d +; O0-NEXT: movl $32, %r9d +; O0-NEXT: movw $8, %si +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %si, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%r8,%r9), %tmm0 +; O0-NEXT: movl $64, %r8d +; O0-NEXT: movw $8, %si +; O0-NEXT: tilestored %tmm0, (%rdi,%r8) +; O0-NEXT: movl $32, %edi +; O0-NEXT: movl $buf+1024, %esi +; O0-NEXT: movw $8, %ax +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0 +; O0-NEXT: movl $64, %esi +; O0-NEXT: movw $8, %ax +; O0-NEXT: tilestored %tmm0, (%rdx,%rsi) +; O0-NEXT: vzeroupper +; O0-NEXT: callq foo +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload +; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: movl $32, %r10d +; O0-NEXT: movl $buf+2048, %edi +; O0-NEXT: tileloadd (%rdi,%r10), %tmm0 +; O0-NEXT: movl $64, %edi +; O0-NEXT: tilestored %tmm0, (%rsi,%rdi) +; O0-NEXT: movl $64, %r10d +; O0-NEXT: movw $8, %di +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %di, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%r8,%r10), %tmm0 +; O0-NEXT: movabsq $64, %r8 +; O0-NEXT: tilestored %tmm0, 1024(%rsp,%r8) # 1024-byte Folded Spill +; O0-NEXT: movl $64, %r10d +; O0-NEXT: movw $8, %r8w +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %di, {{[0-9]+}}(%rsp) +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; O0-NEXT: # implicit-def: $r8b +; O0-NEXT: movb %r8b, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%r9,%r10), %tmm2 +; O0-NEXT: movl $64, %r8d +; O0-NEXT: tileloadd (%rsi,%r8), %tmm0 +; O0-NEXT: movw $8, %si +; O0-NEXT: movabsq $64, %r8 +; O0-NEXT: tileloadd 1024(%rsp,%r8), %tmm1 # 1024-byte Folded Reload +; O0-NEXT: tdpbssd %tmm2, %tmm1, %tmm0 +; O0-NEXT: movl $64, %esi +; O0-NEXT: tilestored %tmm0, (%rdx,%rsi) +; O0-NEXT: movl $64, %esi +; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 +; O0-NEXT: movl $32, %esi +; O0-NEXT: movl $buf+2048, %edx +; O0-NEXT: tilestored %tmm0, (%rdx,%rsi) +; O0-NEXT: movq %rbp, %rsp +; O0-NEXT: popq %rbp +; O0-NEXT: tilerelease +; O0-NEXT: retq %3 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %0, i16 8, i8* getelementptr inbounds ([3072 x i8], [3072 x i8]* @buf, i64 0, i64 0), i64 32) %4 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %1, i8* getelementptr inbounds ([3072 x i8], [3072 x i8]* @buf, i64 0, i64 1024), i64 32) call void @foo() @@ -226,6 +337,116 @@ define dso_local i32 @test_loop(i32 %0) nounwind { ; IPRA-NEXT: tilerelease ; IPRA-NEXT: vzeroupper ; IPRA-NEXT: retq +; +; O0-LABEL: test_loop: +; O0: # %bb.0: +; O0-NEXT: pushq %rbp +; O0-NEXT: movq %rsp, %rbp +; O0-NEXT: andq $-1024, %rsp # imm = 0xFC00 +; O0-NEXT: subq $4096, %rsp # imm = 0x1000 +; O0-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; O0-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) +; O0-NEXT: movb $1, {{[0-9]+}}(%rsp) +; O0-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; O0-NEXT: vzeroupper +; O0-NEXT: callq foo +; O0-NEXT: # %bb.1: +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; O0-NEXT: xorl %eax, %eax +; O0-NEXT: cmpl $0, %ecx +; O0-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: jg .LBB2_4 +; O0-NEXT: jmp .LBB2_3 +; O0-NEXT: .LBB2_2: +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: cmpl $3, %eax +; O0-NEXT: je .LBB2_5 +; O0-NEXT: jmp .LBB2_4 +; O0-NEXT: .LBB2_3: # =>This Inner Loop Header: Depth=1 +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; O0-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: movl $buf, %edx +; O0-NEXT: movl $32, %esi +; O0-NEXT: movw $8, %ax +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 +; O0-NEXT: movl $64, %edx +; O0-NEXT: movw $8, %ax +; O0-NEXT: tilestored %tmm0, (%rcx,%rdx) +; O0-NEXT: callq foo +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; O0-NEXT: movl $64, %edx +; O0-NEXT: movw $8, %ax +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%rcx,%rdx), %tmm0 +; O0-NEXT: movl $32, %edx +; O0-NEXT: movl $buf+2048, %ecx +; O0-NEXT: movw $8, %ax +; O0-NEXT: tilestored %tmm0, (%rcx,%rdx) +; O0-NEXT: callq foo +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: addl $1, %eax +; O0-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: cmpl $0, %eax +; O0-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: je .LBB2_2 +; O0-NEXT: jmp .LBB2_3 +; O0-NEXT: .LBB2_4: +; O0-NEXT: callq foo +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; O0-NEXT: movl $32, %esi +; O0-NEXT: movl $buf+1024, %edx +; O0-NEXT: movw $8, %ax +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 +; O0-NEXT: movl $64, %edx +; O0-NEXT: movw $8, %ax +; O0-NEXT: tilestored %tmm0, (%rcx,%rdx) +; O0-NEXT: movl $64, %edx +; O0-NEXT: movw $8, %ax +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%rcx,%rdx), %tmm0 +; O0-NEXT: movl $32, %edx +; O0-NEXT: movl $buf+1024, %ecx +; O0-NEXT: movw $8, %ax +; O0-NEXT: tilestored %tmm0, (%rcx,%rdx) +; O0-NEXT: jmp .LBB2_7 +; O0-NEXT: .LBB2_5: +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: cmpl $7, %eax +; O0-NEXT: jne .LBB2_7 +; O0-NEXT: # %bb.6: +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: addl $1, %eax +; O0-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: jmp .LBB2_8 +; O0-NEXT: .LBB2_7: +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: subl $1, %eax +; O0-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: .LBB2_8: +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: movq %rbp, %rsp +; O0-NEXT: popq %rbp +; O0-NEXT: tilerelease +; O0-NEXT: retq call void @foo() br label %2 2: @@ -338,6 +559,65 @@ define dso_local void @test_loop2(i32 %0) nounwind { ; IPRA-NEXT: tilerelease ; IPRA-NEXT: vzeroupper ; IPRA-NEXT: retq +; +; O0-LABEL: test_loop2: +; O0: # %bb.0: +; O0-NEXT: pushq %rbp +; O0-NEXT: movq %rsp, %rbp +; O0-NEXT: andq $-1024, %rsp # imm = 0xFC00 +; O0-NEXT: subq $3072, %rsp # imm = 0xC00 +; O0-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; O0-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) +; O0-NEXT: movb $1, {{[0-9]+}}(%rsp) +; O0-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; O0-NEXT: xorl %eax, %eax +; O0-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: vzeroupper +; O0-NEXT: callq foo +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: cmpl $0, %eax +; O0-NEXT: jle .LBB3_3 +; O0-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; O0-NEXT: movl $buf, %edx +; O0-NEXT: movl $32, %esi +; O0-NEXT: movw $8, %ax +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 +; O0-NEXT: movl $64, %edx +; O0-NEXT: movw $8, %ax +; O0-NEXT: tilestored %tmm0, (%rcx,%rdx) +; O0-NEXT: callq foo +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; O0-NEXT: movl $64, %edx +; O0-NEXT: movw $8, %ax +; O0-NEXT: # implicit-def: $al +; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%rcx,%rdx), %tmm0 +; O0-NEXT: movl $32, %edx +; O0-NEXT: movl $buf+2048, %ecx +; O0-NEXT: movw $8, %ax +; O0-NEXT: tilestored %tmm0, (%rcx,%rdx) +; O0-NEXT: callq foo +; O0-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; O0-NEXT: addl $1, %eax +; O0-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; O0-NEXT: jmp .LBB3_1 +; O0-NEXT: .LBB3_3: +; O0-NEXT: movq %rbp, %rsp +; O0-NEXT: popq %rbp +; O0-NEXT: tilerelease +; O0-NEXT: retq br label %2 2: %3 = phi i32 [ 0, %1 ], [ %7, %5 ] diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO0toO0.ll b/llvm/test/CodeGen/X86/AMX/amx-configO0toO0.ll index 0bc849db31a8bc..8767c86270fc17 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-configO0toO0.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-configO0toO0.ll @@ -12,6 +12,9 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: movq %rsp, %rbp ; AVX512-NEXT: andq $-1024, %rsp # imm = 0xFC00 ; AVX512-NEXT: subq $25600, %rsp # imm = 0x6400 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %dx, %ax ; AVX512-NEXT: movw %si, %cx ; AVX512-NEXT: movl %edi, {{[0-9]+}}(%rsp) @@ -20,6 +23,7 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; AVX512-NEXT: xorl %esi, %esi ; AVX512-NEXT: movl $1088, %edx # imm = 0x440 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: callq memset@PLT ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw %ax, {{[0-9]+}}(%rsp) @@ -44,10 +48,12 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: # %bb.1: # %if.then ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movq $buf, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movabsq $buf, %rax +; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq $32, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw (%rax), %si +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw 2(%rax), %dx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax @@ -57,15 +63,12 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %dil -; AVX512-NEXT: movb %dil, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: addq $64, %rdx @@ -73,37 +76,40 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi) ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movq $buf, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movabsq $buf, %rax +; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq $32, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax -; AVX512-NEXT: movw (%rax), %di +; AVX512-NEXT: movw (%rax), %si +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw 2(%rax), %dx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax -; AVX512-NEXT: movw %di, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %si, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %dx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdi -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %r8b -; AVX512-NEXT: movb %r8b, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; AVX512-NEXT: tileloadd (%rdx,%rdi), %tmm0 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: addq $64, %rdx +; AVX512-NEXT: movl $64, %esi ; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi) ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movq $buf, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movabsq $buf, %rax +; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq $32, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw (%rax), %si +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw 2(%rax), %dx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax @@ -113,15 +119,12 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdi -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %r8b -; AVX512-NEXT: movb %r8b, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: ldtilecfg (%rdi) ; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: addq $64, %rdx @@ -131,10 +134,12 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: .LBB0_2: # %if.else ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movq $buf2, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movabsq $buf2, %rax +; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq $32, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw (%rax), %si +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw 2(%rax), %dx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax @@ -144,15 +149,12 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %dil -; AVX512-NEXT: movb %dil, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: addq $64, %rdx @@ -160,37 +162,40 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi) ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movq $buf2, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movabsq $buf2, %rax +; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq $32, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax -; AVX512-NEXT: movw (%rax), %di +; AVX512-NEXT: movw (%rax), %si +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw 2(%rax), %dx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax -; AVX512-NEXT: movw %di, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %si, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %dx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdi -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %r8b -; AVX512-NEXT: movb %r8b, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; AVX512-NEXT: tileloadd (%rdx,%rdi), %tmm0 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: addq $64, %rdx +; AVX512-NEXT: movl $64, %esi ; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi) ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movq $buf2, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movabsq $buf2, %rax +; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq $32, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw (%rax), %si +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512-NEXT: movw 2(%rax), %dx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax @@ -200,15 +205,12 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdi -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %r8b -; AVX512-NEXT: movb %r8b, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: ldtilecfg (%rdi) ; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: addq $64, %rdx @@ -219,7 +221,6 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; AVX512-NEXT: movl $1088, %edx # imm = 0x440 ; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: callq memcpy@PLT ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdi @@ -516,23 +517,21 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: vmovdqa64 %zmm0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d -; AVX512-NEXT: movw %r10w, %di -; AVX512-NEXT: shrl $2, %r10d -; AVX512-NEXT: movw %r10w, %r9w -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovaps %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %r8b -; AVX512-NEXT: movb %r8b, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r8d +; AVX512-NEXT: movw %r8w, %di +; AVX512-NEXT: shrl $2, %r8d +; AVX512-NEXT: movw %r8w, %r9w +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %r8b, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw %di, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # kill: def $r10b killed $r10b killed $r10d -; AVX512-NEXT: movb %r10b, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $r9b +; AVX512-NEXT: movb %r9b, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %r8b, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %di, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX512-NEXT: movl $64, %r8d @@ -599,9 +598,9 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Reload ; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %dx # 2-byte Reload ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; AVX512-NEXT: # kill: def $rdi killed $rax ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %zmm0 ; AVX512-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %zmm1 ; AVX512-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %zmm2 ; AVX512-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %zmm3 @@ -617,12 +616,10 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %zmm13 ; AVX512-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %zmm14 ; AVX512-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %zmm15 -; AVX512-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %zmm16 ; AVX512-NEXT: movw %si, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %dx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovdqa64 %zmm16, {{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovdqa64 %zmm15, {{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovdqa64 %zmm14, {{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovdqa64 %zmm13, {{[0-9]+}}(%rsp) @@ -638,18 +635,16 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) # ; AVX512-NEXT: vmovdqa64 %zmm3, {{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovdqa64 %zmm2, {{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovdqa64 %zmm1, {{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovdqa64 %zmm0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdi -; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8 -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %r9b -; AVX512-NEXT: movb %r9b, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: ldtilecfg (%r8) ; AVX512-NEXT: movl $64, %r8d ; AVX512-NEXT: tileloadd (%rdi,%r8), %tmm0 ; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi) diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll index 38c01f2f46ccea..4aedf0a9788e52 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll @@ -10,7 +10,10 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) n ; AVX512-NEXT: pushq %rbp ; AVX512-NEXT: movq %rsp, %rbp ; AVX512-NEXT: andq $-1024, %rsp # imm = 0xFC00 -; AVX512-NEXT: subq $6144, %rsp # imm = 0x1800 +; AVX512-NEXT: subq $8192, %rsp # imm = 0x2000 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %dx, %ax ; AVX512-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512-NEXT: movw %si, %ax @@ -30,34 +33,32 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) n ; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %sil -; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw $8, {{[0-9]+}}(%rsp) -; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; AVX512-NEXT: movl $buf, %r9d ; AVX512-NEXT: movl $32, %r10d ; AVX512-NEXT: movw $8, %si +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %si, {{[0-9]+}}(%rsp) +; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX512-NEXT: tileloadd (%r9,%r10), %tmm0 -; AVX512-NEXT: movl $64, %r8d -; AVX512-NEXT: tilestored %tmm0, (%r11,%r8) -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $8, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movl $64, %r9d +; AVX512-NEXT: movw $8, %si +; AVX512-NEXT: tilestored %tmm0, (%r8,%r9) +; AVX512-NEXT: movl $buf, %r8d +; AVX512-NEXT: movl $32, %r9d +; AVX512-NEXT: movw $8, %si +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $sil +; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; AVX512-NEXT: tileloadd (%r9,%r10), %tmm0 +; AVX512-NEXT: tileloadd (%r8,%r9), %tmm0 +; AVX512-NEXT: movl $64, %r8d +; AVX512-NEXT: movw $8, %si ; AVX512-NEXT: tilestored %tmm0, (%rdi,%r8) -; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %dil -; AVX512-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: ldtilecfg (%rsi) ; AVX512-NEXT: movl $buf, %esi ; AVX512-NEXT: movl $32, %edi ; AVX512-NEXT: tileloadd (%rsi,%rdi), %tmm0 @@ -69,34 +70,32 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) n ; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %sil -; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw $8, {{[0-9]+}}(%rsp) -; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; AVX512-NEXT: movl $buf2, %r9d ; AVX512-NEXT: movl $32, %r10d ; AVX512-NEXT: movw $8, %si +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %si, {{[0-9]+}}(%rsp) +; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX512-NEXT: tileloadd (%r9,%r10), %tmm0 -; AVX512-NEXT: movl $64, %r8d -; AVX512-NEXT: tilestored %tmm0, (%r11,%r8) -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $8, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movl $64, %r9d +; AVX512-NEXT: movw $8, %si +; AVX512-NEXT: tilestored %tmm0, (%r8,%r9) +; AVX512-NEXT: movl $buf2, %r8d +; AVX512-NEXT: movl $32, %r9d +; AVX512-NEXT: movw $8, %si +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $sil +; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; AVX512-NEXT: tileloadd (%r9,%r10), %tmm0 +; AVX512-NEXT: tileloadd (%r8,%r9), %tmm0 +; AVX512-NEXT: movl $64, %r8d +; AVX512-NEXT: movw $8, %si ; AVX512-NEXT: tilestored %tmm0, (%rdi,%r8) -; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %dil -; AVX512-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: ldtilecfg (%rsi) ; AVX512-NEXT: movl $buf2, %esi ; AVX512-NEXT: movl $32, %edi ; AVX512-NEXT: tileloadd (%rsi,%rdi), %tmm0 @@ -106,36 +105,45 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) n ; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload ; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %sil -; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512-NEXT: movl $64, %r10d +; AVX512-NEXT: movw $8, %di +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %di, {{[0-9]+}}(%rsp) +; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; AVX512-NEXT: tileloadd (%r8,%r10), %tmm0 +; AVX512-NEXT: movabsq $64, %r8 +; AVX512-NEXT: tilestored %tmm0, 1024(%rsp,%r8) # 1024-byte Folded Spill +; AVX512-NEXT: movl $64, %r10d +; AVX512-NEXT: movw $8, %r8w +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw $8, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $8, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %di, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $al +; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX512-NEXT: # implicit-def: $r8b +; AVX512-NEXT: movb %r8b, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; AVX512-NEXT: movl $64, %esi -; AVX512-NEXT: movw $8, %di -; AVX512-NEXT: tileloadd (%r10,%rsi), %tmm1 -; AVX512-NEXT: tileloadd (%r9,%rsi), %tmm2 -; AVX512-NEXT: tileloadd (%r8,%rsi), %tmm0 +; AVX512-NEXT: tileloadd (%r9,%r10), %tmm2 +; AVX512-NEXT: movl $64, %r8d +; AVX512-NEXT: tileloadd (%rsi,%r8), %tmm0 +; AVX512-NEXT: movw $8, %si +; AVX512-NEXT: movabsq $64, %r8 +; AVX512-NEXT: tileloadd 1024(%rsp,%r8), %tmm1 # 1024-byte Folded Reload ; AVX512-NEXT: tdpbssd %tmm2, %tmm1, %tmm0 +; AVX512-NEXT: movl $64, %esi ; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi) -; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; AVX512-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movb %al, %dil -; AVX512-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: ldtilecfg (%rsi) ; AVX512-NEXT: movl $64, %esi ; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; AVX512-NEXT: movl $buf, %edx diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir new file mode 100644 index 00000000000000..2a300199f61b43 --- /dev/null +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir @@ -0,0 +1,177 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s +# +# This case test tile phi is nested accessed, but the its def block is +# not visited yet. +# +# BB.5 +# %6 = phi(%3, b%10) <----- +# | | | +# | | | +# BB.6 BB.7 | +# \ / | +# \ / | +# \ / | +# BB.8 ------------- +# %10 = phi(%8, %9) +# +# #define STRIDE 32 +# void foo(int cond, char *buf) { +# __tile1024i a = {16, 64}; +# __tile1024i b = {16, 64}; +# __tile1024i c = {16, 64}; +# +# if (cond) { +# __tile_zero(&c); +# } else { +# __tile_loadd(&c, buf, STRIDE); +# } +# __tile_zero(&a); +# __tile_zero(&b); +# for(int i = 0; i < 10; i++) { +# __tile_dpbssd(&c, a, b); +# if (cond) { +# __tile_zero(&c); +# } else { +# __tile_loadd(&c, buf, STRIDE); +# } +# } +# __tile_stored(buf, STRIDE, c); +# } +--- +name: foo +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr8 } + - { id: 1, class: tile } + - { id: 2, class: tile } + - { id: 3, class: tile } + - { id: 4, class: tile } + - { id: 5, class: tile } + - { id: 6, class: gr32 } + - { id: 7, class: tile } + - { id: 8, class: tile } + - { id: 9, class: tile } + - { id: 10, class: tile } + - { id: 11, class: gr32 } + - { id: 12, class: gr32 } + - { id: 13, class: gr32 } + - { id: 14, class: gr64 } + - { id: 15, class: gr64 } + - { id: 16, class: gr8 } + - { id: 17, class: gr16 } + - { id: 18, class: gr16 } + - { id: 19, class: gr64_nosp } + - { id: 20, class: gr16 } + - { id: 21, class: gr16 } + - { id: 22, class: gr32 } + - { id: 23, class: gr16 } + - { id: 24, class: gr16 } + - { id: 25, class: gr16 } + - { id: 26, class: gr16 } + - { id: 27, class: gr16 } + - { id: 28, class: gr16 } + - { id: 29, class: tile } + - { id: 30, class: gr16 } + - { id: 31, class: gr16 } + - { id: 32, class: gr64_nosp } + - { id: 33, class: gr16 } + - { id: 34, class: gr16 } + - { id: 35, class: gr32 } + - { id: 36, class: gr64_nosp } + - { id: 37, class: gr16 } + - { id: 38, class: gr16 } +liveins: + - { reg: '$edi', virtual-reg: '%12' } + - { reg: '$rsi', virtual-reg: '%14' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $edi, $rsi + + %14:gr64 = COPY $rsi + %12:gr32 = COPY $edi + %13:gr32 = COPY killed %12 + %15:gr64 = COPY killed %14 + CMP32ri8 %13, 0, implicit-def $eflags + %16:gr8 = SETCCr 4, implicit $eflags + TEST8ri %16, 1, implicit-def $eflags + JCC_1 %bb.2, 5, implicit $eflags + + bb.1: + %17:gr16 = MOV16ri 64 + %18:gr16 = MOV16ri 16 + %1:tile = PTILEZEROV killed %18, killed %17 + JMP_1 %bb.3 + + bb.2: + %19:gr64_nosp = MOV32ri64 32 + %20:gr16 = MOV16ri 64 + %21:gr16 = MOV16ri 16 + %2:tile = PTILELOADDV killed %21, killed %20, %15, 1, killed %19, 0, $noreg + + bb.3: + ; CHECK: %43:gr16 = PHI %17, %bb.1, %20, %bb.2 + ; CHECK-NEXT: %42:gr16 = PHI %18, %bb.1, %21, %bb.2 + ; CHECK-NEXT: %41:gr64_nosp = PHI %44, %bb.1, %45, %bb.2 + ; CHECK-NEXT: LDTILECFG + + %3:tile = PHI %1, %bb.1, %2, %bb.2 + %25:gr16 = MOV16ri 64 + %26:gr16 = MOV16ri 16 + %4:tile = PTILEZEROV killed %26, killed %25 + %23:gr16 = MOV16ri 64 + %24:gr16 = MOV16ri 16 + %5:tile = PTILEZEROV killed %24, killed %23 + %22:gr32 = MOV32r0 implicit-def $eflags + JMP_1 %bb.5 + + bb.4: + %36:gr64_nosp = MOV32ri64 32 + %37:gr16 = MOV16ri 64 + %38:gr16 = MOV16ri 16 + PTILESTOREDV killed %38, killed %37, %15, 1, killed %36, 0, $noreg, %10 + RET64 + + bb.5: + ; CHECK: %6:gr32 = PHI %22, %bb.3, %35, %bb.8 + ; CHECK-NEXT: %56:gr16 = PHI %43, %bb.3, %60, %bb.8 + ; CHECK-NEXT: %55:gr16 = PHI %42, %bb.3, %59, %bb.8 + ; CHECK-NEXT: %54:gr64_nosp = PHI %57, %bb.3, %58, %bb.8 + ; CHECK-NEXT: LDTILECFG + + %6:gr32 = PHI %22, %bb.3, %35, %bb.8 + %7:tile = PHI %3, %bb.3, %10, %bb.8 + %27:gr16 = MOV16ri 64 + %28:gr16 = MOV16ri 16 + %29:tile = PTDPBSSDV killed %28, %27, %27, %7, %4, %5 + TEST8ri %16, 1, implicit-def $eflags + JCC_1 %bb.7, 5, implicit $eflags + + bb.6: + %30:gr16 = MOV16ri 64 + %31:gr16 = MOV16ri 16 + %8:tile = PTILEZEROV killed %31, killed %30 + JMP_1 %bb.8 + + bb.7: + %32:gr64_nosp = MOV32ri64 32 + %33:gr16 = MOV16ri 64 + %34:gr16 = MOV16ri 16 + %9:tile = PTILELOADDV killed %34, killed %33, %15, 1, killed %32, 0, $noreg + + bb.8: + ; CHECK: %60:gr16 = PHI %30, %bb.6, %33, %bb.7 + ; CHECK-NEXT: %59:gr16 = PHI %31, %bb.6, %34, %bb.7 + ; CHECK-NEXT: %58:gr64_nosp = PHI %61, %bb.6, %62, %bb.7 + ; CHECK-NEXT: LDTILECFG + + %10:tile = PHI %8, %bb.6, %9, %bb.7 + %35:gr32 = ADD32ri8 %6, 1, implicit-def $eflags + CMP32ri8 %35, 10, implicit-def $eflags + JCC_1 %bb.4, 4, implicit $eflags + JMP_1 %bb.5 + +... diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir new file mode 100644 index 00000000000000..4cea456e7e9170 --- /dev/null +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir @@ -0,0 +1,130 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s +# +# bb.0 +# def %0 +# / \ +# bb.1 bb.2 <--------- +# def %1 %2=phi(%0, %3) | +# \ / | +# bb.3 ------------------- +# %3=phi(%1, %2) +# +# This case test tile PHIs depend each other, and the its def block is +# not visited yet. +--- +name: foo +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 1, class: tile } + - { id: 2, class: tile } + - { id: 3, class: tile } + - { id: 4, class: tile } + - { id: 12, class: gr32 } + - { id: 13, class: gr32 } + - { id: 16, class: gr8 } + - { id: 17, class: gr16 } + - { id: 18, class: gr16 } + - { id: 19, class: gr64_nosp } + - { id: 22, class: gr32 } + - { id: 23, class: gr16 } + - { id: 24, class: gr16 } +liveins: + - { reg: '$edi', virtual-reg: '%12' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_SET0_:%[0-9]+]]:vr128 = V_SET0 + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 0, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1, align 4) + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 16, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 16, align 4) + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 32, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 32, align 4) + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 48, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 48, align 4) + ; CHECK-NEXT: MOV8mi %stack.1, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.1, align 4) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed [[COPY]] + ; CHECK-NEXT: %r0:gr16 = MOV16ri 64 + ; CHECK-NEXT: %c0:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: %t0:tile = PTILEZEROV %r0, %c0 + ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.0, 1, killed [[MOV64ri]], 0, $noreg, %t0 :: (store (s8192) into %stack.0) + ; CHECK-NEXT: CMP32ri8 [[COPY1]], 0, implicit-def $eflags + ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK-NEXT: TEST8ri [[SETCCr]], 1, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 5, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] + ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.2, 1, killed [[MOV64ri1]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.2) + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %24, %bb.3 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %23, %bb.3 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.0, %22, %bb.3 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV]] :: (store (s8192) into %stack.3) + ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[PHI]], %bb.2 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[PHI1]], %bb.2 + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r1]], %bb.1, [[PHI2]], %bb.2 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri4]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.4, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.4) + ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: JMP_1 %bb.2 + bb.0.entry: + liveins: $edi + + %12:gr32 = COPY $edi + %13:gr32 = COPY killed %12 + %r0:gr16 = MOV16ri 64 + %c0:gr16 = MOV16ri 16 + %t0:tile = PTILEZEROV killed %r0, killed %c0 + CMP32ri8 %13, 0, implicit-def $eflags + %16:gr8 = SETCCr 4, implicit $eflags + TEST8ri %16, 1, implicit-def $eflags + JCC_1 %bb.2, 5, implicit $eflags + + bb.1: + %17:gr16 = MOV16ri 64 + %18:gr16 = MOV16ri 16 + %1:tile = PTILEZEROV killed %18, killed %17 + JMP_1 %bb.3 + + bb.2: + %2:tile = PHI %t0, %bb.0, %3, %bb.3 + %19:gr64_nosp = MOV32ri64 32 + + bb.3: + %3:tile = PHI %1, %bb.1, %2, %bb.2 + %23:gr16 = MOV16ri 64 + %24:gr16 = MOV16ri 16 + JMP_1 %bb.2 diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir new file mode 100644 index 00000000000000..14ff3b2996d622 --- /dev/null +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir @@ -0,0 +1,144 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s +# +# bb.0 +# def %0 +# / \ +# bb.1 |-->bb.2 <----------- +# | %2=phi(%0, %3, %4) | +# def %1 -- %5=phi(%0, %3, %2) | +# \ / | +# bb.3 --------------------- +# def %3 +# +# This case test tile PHIs depend each other, and the its def block is +# not visited yet. +--- +name: foo +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 1, class: tile } + - { id: 2, class: tile } + - { id: 3, class: tile } + - { id: 4, class: tile } + - { id: 12, class: gr32 } + - { id: 13, class: gr32 } + - { id: 16, class: gr8 } + - { id: 17, class: gr16 } + - { id: 18, class: gr16 } + - { id: 19, class: gr64_nosp } + - { id: 22, class: gr32 } + - { id: 23, class: gr16 } + - { id: 24, class: gr16 } +liveins: + - { reg: '$edi', virtual-reg: '%12' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_SET0_:%[0-9]+]]:vr128 = V_SET0 + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 0, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1, align 4) + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 16, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 16, align 4) + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 32, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 32, align 4) + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 48, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 48, align 4) + ; CHECK-NEXT: MOV8mi %stack.1, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.1, align 4) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed [[COPY]] + ; CHECK-NEXT: %r0:gr16 = MOV16ri 64 + ; CHECK-NEXT: %c0:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: %t0:tile = PTILEZEROV %r0, %c0 + ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.0, 1, killed [[MOV64ri]], 0, $noreg, %t0 :: (store (s8192) into %stack.0) + ; CHECK-NEXT: CMP32ri8 [[COPY1]], 0, implicit-def $eflags + ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK-NEXT: TEST8ri [[SETCCr]], 1, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 5, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV killed [[MOV16ri1]], killed [[MOV16ri]] + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %11, %bb.3, %17, %bb.2 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %12, %bb.3, %18, %bb.2 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r1]], %bb.0, %31, %bb.3, %32, %bb.2 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %11, %bb.3, %17, %bb.2 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %12, %bb.3, %18, %bb.2 + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.0, %24, %bb.3, %25, %bb.2 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri1]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri2]], 0, $noreg + ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.3, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.3, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri3]], [[MOV16ri2]] + ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILEZEROV1]] :: (store (s8192) into %stack.3) + ; CHECK-NEXT: TEST8ri [[SETCCr]], 1, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 5, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[LEA64r5:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]] + ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.2, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILEZEROV2]] :: (store (s8192) into %stack.2) + ; CHECK-NEXT: JMP_1 %bb.2 + bb.0.entry: + liveins: $edi + + %12:gr32 = COPY $edi + %13:gr32 = COPY killed %12 + %r0:gr16 = MOV16ri 64 + %c0:gr16 = MOV16ri 16 + %t0:tile = PTILEZEROV %r0, %c0 + CMP32ri8 %13, 0, implicit-def $eflags + %16:gr8 = SETCCr 4, implicit $eflags + TEST8ri %16, 1, implicit-def $eflags + JCC_1 %bb.2, 5, implicit $eflags + + bb.1: + %17:gr16 = MOV16ri 64 + %18:gr16 = MOV16ri 16 + %1:tile = PTILEZEROV killed %18, killed %17 + JMP_1 %bb.3 + + bb.2: + %2:tile = PHI %t0, %bb.0, %3, %bb.3, %4, %bb.2 + %5:tile = PHI %t0, %bb.0, %3, %bb.3, %2, %bb.2 + %25:gr16 = MOV16ri 64 + %26:gr16 = MOV16ri 16 + %4:tile = PTILEZEROV killed %26, killed %25 + TEST8ri %16, 1, implicit-def $eflags + JCC_1 %bb.2, 5, implicit $eflags + + bb.3: + %23:gr16 = MOV16ri 64 + %24:gr16 = MOV16ri 16 + %3:tile = PTILEZEROV killed %24, killed %23 + JMP_1 %bb.2 diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir new file mode 100644 index 00000000000000..c797ce764c2c9c --- /dev/null +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir @@ -0,0 +1,154 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass=fastpretileconfig -o - %s | FileCheck %s + +# Test spill/reload across basic block. + +--- +name: foo +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr16 } + - { id: 1, class: gr16 } + - { id: 2, class: tile } + - { id: 3, class: gr64_nosp } + - { id: 4, class: gr64 } + - { id: 5, class: tile } + - { id: 6, class: tile } + - { id: 7, class: tile } + - { id: 8, class: gr32 } + - { id: 9, class: vr512 } +frameInfo: + maxAlignment: 16 +stack: + - { id: 0, size: 1024, alignment: 16 } + - { id: 1, size: 64, alignment: 4 } +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 + ; CHECK-NEXT: VMOVUPSZmr %stack.4, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.4, align 4) + ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4) + ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32 + ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 + ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] + ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.3) + ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.2, 1, killed [[MOV64ri1]], 0, $noreg, [[PTILELOADDV]] :: (store (s8192) into %stack.2) + ; CHECK-NEXT: %row:gr16 = MOV16ri 32 + ; CHECK-NEXT: %col:gr16 = MOV16ri 8 + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV %row, %col, [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.2, 1, killed [[MOV64ri2]], 0, $noreg :: (load (s8192) from %stack.2) + ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.3, 1, killed [[MOV64ri3]], 0, $noreg :: (load (s8192) from %stack.3) + ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[MOV16ri1]], [[MOV16ri]], [[MOV16ri]], killed [[PTILELOADDV1]], killed [[PTILELOADDV3]], killed [[PTILELOADDV2]] + ; CHECK-NEXT: PTILESTOREDV killed [[MOV16ri1]], killed [[MOV16ri]], killed [[LEA64r]], 1, killed [[MOV32ri64_]], 0, $noreg, killed [[PTDPBSSDV]] + ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: $eax = COPY killed [[MOV32r0_]] + ; CHECK-NEXT: RET 0, killed $eax + bb.0.entry: + %0:gr16 = MOV16ri 32 + %1:gr16 = MOV16ri 8 + %2:tile = PTILEZEROV %1, %0 + %3:gr64_nosp = MOV32ri64 32 + %4:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg + %5:tile = PTILELOADDV %1, %0, %4, 1, %3, 0, $noreg + %row:gr16 = MOV16ri 32 + %col:gr16 = MOV16ri 8 + JMP_1 %bb.1 + bb.1: + %6:tile = PTILELOADDV %row, %col, %4, 1, %3, 0, $noreg + %7:tile = PTDPBSSDV %1, %0, %0, killed %6, killed %2, killed %5 + PTILESTOREDV killed %1, killed %0, killed %4, 1, killed %3, 0, $noreg, killed %7 + %8:gr32 = MOV32r0 implicit-def dead $eflags + $eax = COPY killed %8 + RET 0, killed $eax + +... + +# Test tile copy fold +--- +name: copy +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr16 } + - { id: 1, class: gr16 } + - { id: 2, class: tile } + - { id: 3, class: gr64_nosp } + - { id: 4, class: gr64 } + - { id: 5, class: tile } + - { id: 6, class: tile } + - { id: 7, class: tile } + - { id: 8, class: gr32 } + - { id: 9, class: vr512 } +frameInfo: + maxAlignment: 16 +stack: + - { id: 0, size: 1024, alignment: 16 } + - { id: 1, size: 64, alignment: 4 } +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: copy + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 + ; CHECK-NEXT: VMOVUPSZmr %stack.4, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.4, align 4) + ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4) + ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32 + ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 + ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] + ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.3) + ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.2, 1, killed [[MOV64ri1]], 0, $noreg, [[PTILELOADDV]] :: (store (s8192) into %stack.2) + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: %t:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.2, 1, killed [[MOV64ri2]], 0, $noreg :: (load (s8192) from %stack.2) + ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.3, 1, killed [[MOV64ri3]], 0, $noreg :: (load (s8192) from %stack.3) + ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[MOV16ri1]], [[MOV16ri]], [[MOV16ri]], killed [[PTILELOADDV1]], killed [[PTILELOADDV2]], killed %t + ; CHECK-NEXT: PTILESTOREDV killed [[MOV16ri1]], killed [[MOV16ri]], killed [[LEA64r]], 1, killed [[MOV32ri64_]], 0, $noreg, killed [[PTDPBSSDV]] + ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: $eax = COPY killed [[MOV32r0_]] + ; CHECK-NEXT: RET 0, killed $eax + bb.0.entry: + %0:gr16 = MOV16ri 32 + %1:gr16 = MOV16ri 8 + %2:tile = PTILEZEROV %1, %0 + %3:gr64_nosp = MOV32ri64 32 + %4:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg + %5:tile = PTILELOADDV %1, %0, %4, 1, %3, 0, $noreg + JMP_1 %bb.1 + bb.1: + %6:tile = PTILELOADDV %1, %0, %4, 1, %3, 0, $noreg + %t:tile = COPY %5 + %7:tile = PTDPBSSDV %1, %0, %0, killed %6, killed %2, killed %t + PTILESTOREDV killed %1, killed %0, killed %4, 1, killed %3, 0, $noreg, killed %7 + %8:gr32 = MOV32r0 implicit-def dead $eflags + $eax = COPY killed %8 + RET 0, killed $eax + +... diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir new file mode 100644 index 00000000000000..ff0fdbe3aaf71a --- /dev/null +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir @@ -0,0 +1,146 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s + +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-unknown" + + @buf = dso_local global [1024 x i8] zeroinitializer, align 16 + @buf2 = dso_local global [1024 x i8] zeroinitializer, align 16 + + define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) local_unnamed_addr #0 { + entry: + %tobool.not = icmp eq i32 %cond, 0 + br i1 %tobool.not, label %if.else, label %if.then + + if.then: ; preds = %entry + %0 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32) + %1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32) + %2 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32) + br label %if.end + + if.else: ; preds = %entry + %3 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32) + %4 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32) + %5 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %a.sroa.1094.0.in = phi x86_amx [ %3, %if.else ], [ %0, %if.then ] + %b.sroa.1069.0.in = phi x86_amx [ %4, %if.else ], [ %1, %if.then ] + %c.sroa.1044.0.in = phi x86_amx [ %5, %if.else ], [ %2, %if.then ] + %6 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 8, x86_amx %c.sroa.1044.0.in, x86_amx %a.sroa.1094.0.in, x86_amx %b.sroa.1069.0.in) + tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32, x86_amx %6) + ret void + } + + declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64) #1 + declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #1 + declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) #1 + + attributes #0 = { "target-features"="+amx-int8,+avx512f" } + attributes #1 = { nounwind "target-features"="+amx-int8,+avx512f" } + +... +--- +name: test_api +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: tile } + - { id: 1, class: tile } + - { id: 2, class: tile } + - { id: 3, class: tile } + - { id: 4, class: tile } + - { id: 5, class: tile } + - { id: 6, class: tile } + - { id: 7, class: tile } + - { id: 8, class: tile } + - { id: 9, class: gr32 } + - { id: 10, class: gr32 } + - { id: 11, class: gr32 } + - { id: 12, class: gr16 } + - { id: 13, class: gr16 } + - { id: 14, class: gr64 } + - { id: 15, class: gr64_nosp } + - { id: 16, class: gr16 } + - { id: 17, class: gr64 } + - { id: 18, class: gr64_nosp } + - { id: 19, class: gr16 } + - { id: 20, class: gr16 } + - { id: 21, class: tile } + - { id: 22, class: gr64 } + - { id: 23, class: gr64_nosp } +liveins: + - { reg: '$edi', virtual-reg: '%9' } + - { reg: '$esi', virtual-reg: '%10' } + - { reg: '$edx', virtual-reg: '%11' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $edi, $esi, $edx + + ; CHECK: {{%.*}}:vr512 = AVX512_512_SET0 + ; CHECK-NEXT: VMOVUPSZmr %stack.3, 1, $noreg, 0, $noreg, {{%.*}} + + %11:gr32 = COPY killed $edx + %10:gr32 = COPY killed $esi + %9:gr32 = COPY killed $edi + %13:gr16 = COPY killed %11.sub_16bit + %12:gr16 = COPY killed %10.sub_16bit + TEST32rr killed %9, %9, implicit-def $eflags + JCC_1 %bb.2, 4, implicit killed $eflags + JMP_1 %bb.1 + + bb.1.if.then: + %14:gr64 = MOV32ri64 @buf + %15:gr64_nosp = MOV32ri64 32 + %16:gr16 = MOV16ri 8 + ; CHECK: LDTILECFG + %0:tile = PTILELOADDV %12, %16, %14, 1, %15, 0, $noreg + %1:tile = PTILELOADDV killed %16, %13, %14, 1, %15, 0, $noreg + %2:tile = PTILELOADDV %12, %13, killed %14, 1, killed %15, 0, $noreg + JMP_1 %bb.3 + + bb.2.if.else: + %17:gr64 = MOV32ri64 @buf2 + %18:gr64_nosp = MOV32ri64 32 + %19:gr16 = MOV16ri 8 + ; CHECK: LDTILECFG + %3:tile = PTILELOADDV %12, %19, %17, 1, %18, 0, $noreg + %4:tile = PTILELOADDV killed %19, %13, %17, 1, %18, 0, $noreg + %5:tile = PTILELOADDV %12, %13, killed %17, 1, killed %18, 0, $noreg + + bb.3.if.end: + + ; CHECK: bb.3.if.end + ; CHECK-NEXT: %44:gr16 = PHI %16, %bb.1, %19, %bb.2 + ; CHECK-NEXT: %43:gr16 = PHI %12, %bb.1, %12, %bb.2 + ; CHECK-NEXT: %42:gr64_nosp = PHI %45, %bb.1, %46, %bb.2 + ; CHECK-NEXT: %38:gr16 = PHI %13, %bb.1, %13, %bb.2 + ; CHECK-NEXT: %37:gr16 = PHI %16, %bb.1, %19, %bb.2 + ; CHECK-NEXT: %36:gr64_nosp = PHI %39, %bb.1, %40, %bb.2 + ; CHECK-NEXT: %32:gr16 = PHI %13, %bb.1, %13, %bb.2 + ; CHECK-NEXT: %31:gr16 = PHI %12, %bb.1, %12, %bb.2 + ; CHECK-NEXT: %30:gr64_nosp = PHI %33, %bb.1, %34, %bb.2 + ; CHECK-NEXT: LDTILECFG + ; CHECK-NEXT: %47:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: %6:tile = PTILELOADDV %43, %44, %42, 1, killed %47, 0, $noreg + ; CHECK-NEXT: %41:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: %7:tile = PTILELOADDV %37, %38, %36, 1, killed %41, 0, $noreg + ; CHECK-NEXT: %35:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: %8:tile = PTILELOADDV %31, %32, %30, 1, killed %35, 0, $noreg + + %6:tile = PHI %0, %bb.1, %3, %bb.2 + %7:tile = PHI %1, %bb.1, %4, %bb.2 + %8:tile = PHI %2, %bb.1, %5, %bb.2 + %20:gr16 = MOV16ri 8 + %21:tile = PTDPBSSDV %12, %13, killed %20, killed %8, killed %6, killed %7 + %22:gr64 = MOV32ri64 @buf + %23:gr64_nosp = MOV32ri64 32 + PTILESTOREDV killed %12, killed %13, killed %22, 1, killed %23, 0, $noreg, killed %21 + RET 0 + +... diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir new file mode 100644 index 00000000000000..53877b5da84a30 --- /dev/null +++ b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir @@ -0,0 +1,61 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass=fastpretileconfig -o - %s | FileCheck %s + +# Test the case which has TILELOADD being mixed in psuedo AMX instruction +... +--- +name: main +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64_nosp } + - { id: 1, class: gr64 } + - { id: 2, class: gr16 } + - { id: 3, class: gr16 } + - { id: 4, class: tile } + - { id: 5, class: tile } + - { id: 6, class: tile } + - { id: 7, class: tile } + - { id: 8, class: gr32 } + - { id: 9, class: vr512 } +frameInfo: + maxAlignment: 16 +stack: + - { id: 0, size: 1024, alignment: 16 } + - { id: 1, size: 64, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.0.entry: + ; CHECK-LABEL: name: main + ; CHECK: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 + ; CHECK-NEXT: VMOVUPSZmr %stack.2, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.2, align 4) + ; CHECK-NEXT: MOV8mi %stack.2, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.2, align 4) + ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32 + ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 + ; CHECK-NEXT: LDTILECFG %stack.2, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.2, align 4) + ; CHECK-NEXT: $tmm0 = TILELOADD [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[MOV16ri1]], [[MOV16ri]], [[MOV16ri]], killed [[PTILELOADDV2]], killed [[PTILELOADDV]], killed [[PTILELOADDV1]] + ; CHECK-NEXT: PTILESTOREDV killed [[MOV16ri1]], killed [[MOV16ri]], killed [[LEA64r]], 1, killed [[MOV32ri64_]], 0, $noreg, killed [[PTDPBSSDV]] + ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: $eax = COPY killed [[MOV32r0_]] + ; CHECK-NEXT: RET 0, killed $eax + %0:gr64_nosp = MOV32ri64 32 + %1:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg + %2:gr16 = MOV16ri 32 + %3:gr16 = MOV16ri 8 + $tmm0 = TILELOADD %1, 1, %0, 0, $noreg + %4:tile = PTILELOADDV %3, %2, %1, 1, %0, 0, $noreg + %5:tile = PTILELOADDV %3, %2, %1, 1, %0, 0, $noreg + %6:tile = PTILELOADDV %3, %2, %1, 1, %0, 0, $noreg + %7:tile = PTDPBSSDV %3, %2, %2, killed %6, killed %4, killed %5 + PTILESTOREDV killed %3, killed %2, killed %1, 1, killed %0, 0, $noreg, killed %7 + %8:gr32 = MOV32r0 implicit-def dead $eflags + $eax = COPY killed %8 + RET 0, killed $eax + +... diff --git a/llvm/test/CodeGen/X86/AMX/amx-zero-config.ll b/llvm/test/CodeGen/X86/AMX/amx-zero-config.ll index a76a1add0676a2..7e0fd385239968 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-zero-config.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-zero-config.ll @@ -66,30 +66,29 @@ define void @foo(i8 *%buf) nounwind { ; AVX512-O0-NEXT: pushq %rbp ; AVX512-O0-NEXT: movq %rsp, %rbp ; AVX512-O0-NEXT: andq $-1024, %rsp # imm = 0xFC00 -; AVX512-O0-NEXT: subq $2048, %rsp # imm = 0x800 -; AVX512-O0-NEXT: movq %rsp, %rdx +; AVX512-O0-NEXT: subq $3072, %rsp # imm = 0xC00 ; AVX512-O0-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-O0-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) +; AVX512-O0-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; AVX512-O0-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-O0-NEXT: movb $8, {{[0-9]+}}(%rsp) -; AVX512-O0-NEXT: movw $32, {{[0-9]+}}(%rsp) -; AVX512-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; AVX512-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; AVX512-O0-NEXT: movw $32, %cx ; AVX512-O0-NEXT: movw $8, %ax +; AVX512-O0-NEXT: # implicit-def: $al +; AVX512-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX512-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX512-O0-NEXT: tilezero %tmm0 ; AVX512-O0-NEXT: movl $64, %esi +; AVX512-O0-NEXT: movw $32, %cx +; AVX512-O0-NEXT: movw $8, %ax ; AVX512-O0-NEXT: tilestored %tmm0, (%rdx,%rsi) -; AVX512-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; AVX512-O0-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) -; AVX512-O0-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX512-O0-NEXT: movw $8, %cx -; AVX512-O0-NEXT: # kill: def $cl killed $cl killed $cx -; AVX512-O0-NEXT: movb %cl, {{[0-9]+}}(%rsp) -; AVX512-O0-NEXT: movw $32, {{[0-9]+}}(%rsp) -; AVX512-O0-NEXT: ldtilecfg (%rax) ; AVX512-O0-NEXT: movl $64, %esi ; AVX512-O0-NEXT: movw $32, %cx ; AVX512-O0-NEXT: movw $8, %ax +; AVX512-O0-NEXT: # implicit-def: $al +; AVX512-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX512-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX512-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX512-O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; AVX512-O0-NEXT: movl $1024, %edx # imm = 0x400 ; AVX512-O0-NEXT: movw $32, %cx @@ -106,32 +105,30 @@ define void @foo(i8 *%buf) nounwind { ; AVX2-O0-NEXT: pushq %rbp ; AVX2-O0-NEXT: movq %rsp, %rbp ; AVX2-O0-NEXT: andq $-1024, %rsp # imm = 0xFC00 -; AVX2-O0-NEXT: subq $2048, %rsp # imm = 0x800 -; AVX2-O0-NEXT: movq %rsp, %rdx +; AVX2-O0-NEXT: subq $3072, %rsp # imm = 0xC00 ; AVX2-O0-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX2-O0-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; AVX2-O0-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; AVX2-O0-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX2-O0-NEXT: movb $8, {{[0-9]+}}(%rsp) -; AVX2-O0-NEXT: movw $32, {{[0-9]+}}(%rsp) -; AVX2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; AVX2-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; AVX2-O0-NEXT: movw $32, %cx ; AVX2-O0-NEXT: movw $8, %ax +; AVX2-O0-NEXT: # implicit-def: $al +; AVX2-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX2-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX2-O0-NEXT: tilezero %tmm0 ; AVX2-O0-NEXT: movl $64, %esi +; AVX2-O0-NEXT: movw $32, %cx +; AVX2-O0-NEXT: movw $8, %ax ; AVX2-O0-NEXT: tilestored %tmm0, (%rdx,%rsi) -; AVX2-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; AVX2-O0-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-O0-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-O0-NEXT: movb $1, {{[0-9]+}}(%rsp) -; AVX2-O0-NEXT: movw $8, %cx -; AVX2-O0-NEXT: # kill: def $cl killed $cl killed $cx -; AVX2-O0-NEXT: movb %cl, {{[0-9]+}}(%rsp) -; AVX2-O0-NEXT: movw $32, {{[0-9]+}}(%rsp) -; AVX2-O0-NEXT: ldtilecfg (%rax) ; AVX2-O0-NEXT: movl $64, %esi ; AVX2-O0-NEXT: movw $32, %cx ; AVX2-O0-NEXT: movw $8, %ax +; AVX2-O0-NEXT: # implicit-def: $al +; AVX2-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; AVX2-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; AVX2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; AVX2-O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; AVX2-O0-NEXT: movl $1024, %edx # imm = 0x400 ; AVX2-O0-NEXT: movw $32, %cx @@ -148,36 +145,32 @@ define void @foo(i8 *%buf) nounwind { ; SSE2-O0-NEXT: pushq %rbp ; SSE2-O0-NEXT: movq %rsp, %rbp ; SSE2-O0-NEXT: andq $-1024, %rsp # imm = 0xFC00 -; SSE2-O0-NEXT: subq $2048, %rsp # imm = 0x800 -; SSE2-O0-NEXT: movq %rsp, %rdx +; SSE2-O0-NEXT: subq $3072, %rsp # imm = 0xC00 ; SSE2-O0-NEXT: xorps %xmm0, %xmm0 ; SSE2-O0-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: movb $1, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: movb $8, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: movw $32, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; SSE2-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; SSE2-O0-NEXT: movw $32, %cx ; SSE2-O0-NEXT: movw $8, %ax +; SSE2-O0-NEXT: # implicit-def: $al +; SSE2-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; SSE2-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; SSE2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: tilezero %tmm0 ; SSE2-O0-NEXT: movl $64, %esi +; SSE2-O0-NEXT: movw $32, %cx +; SSE2-O0-NEXT: movw $8, %ax ; SSE2-O0-NEXT: tilestored %tmm0, (%rdx,%rsi) -; SSE2-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; SSE2-O0-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: movb $1, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: movw $8, %cx -; SSE2-O0-NEXT: # kill: def $cl killed $cl killed $cx -; SSE2-O0-NEXT: movb %cl, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: movw $32, {{[0-9]+}}(%rsp) -; SSE2-O0-NEXT: ldtilecfg (%rax) ; SSE2-O0-NEXT: movl $64, %esi ; SSE2-O0-NEXT: movw $32, %cx ; SSE2-O0-NEXT: movw $8, %ax +; SSE2-O0-NEXT: # implicit-def: $al +; SSE2-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; SSE2-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; SSE2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; SSE2-O0-NEXT: movl $1024, %edx # imm = 0x400 ; SSE2-O0-NEXT: movw $32, %cx diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 54eecb113540f9..43d0c0839940e9 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -20,7 +20,6 @@ ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: Lower AMX intrinsics ; CHECK-NEXT: Lower AMX type for load/store -; CHECK-NEXT: Pre AMX Tile Config ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering @@ -42,6 +41,7 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: X86 EFLAGS copy lowering ; CHECK-NEXT: X86 DynAlloca Expander +; CHECK-NEXT: Fast Tile Register Preconfigure ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator From 1868f3c17db971bde79f54530aa8686f04bff287 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 24 May 2022 05:44:48 +0000 Subject: [PATCH 312/908] [gn build] Port 496156ac57da --- llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn index 6604ae5deff72a..083dd1b1ecdd0a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn @@ -89,6 +89,7 @@ static_library("LLVMX86CodeGen") { "X86EvexToVex.cpp", "X86ExpandPseudo.cpp", "X86FastISel.cpp", + "X86FastPreTileConfig.cpp", "X86FastTileConfig.cpp", "X86FixupBWInsts.cpp", "X86FixupLEAs.cpp", From cb8681a2b3ad406f1215a17fd32989211c561e3e Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 16 May 2022 10:57:32 +0100 Subject: [PATCH 313/908] [RISCV] Fix RVV stack frame alignment bugs This patch addresses several alignment issues in the stack frame when RVV objects are taken into account. One bug is that the RVV stack was never guaranteed to keep the alignment of the stack *as a whole*. We must maintain a 16-byte aligned stack at all times, especially when calling other functions. With the standard V extension, this is conveniently happening since VLEN is at least 128 and always 16-byte aligned. However, we support Zvl64b which does not guarantee this. To fix this, the RVV stack size is rounded up to be aligned to 16 bytes. This in practice generally makes us allocate a stack sized at least 2*VLEN in size, and a multiple of 2. |------------------------------| -- <-- FP | 8-byte callee-save | | | |------------------------------| | | | one VLENB-sized RVV object | | | |------------------------------| | | | 8-byte local variable | | | |------------------------------| -- <-- SP (must be aligned to 16) In the example above, with Zvl64b we are decrementing SP by 12 bytes which does not leave SP correctly aligned. We therefore introduce an extra VLENB-sized amount used for alignment. This would therefore ensure the total stack size was 16 bytes (48 for Zvl128b, 80 for Zvl256b, etc): |------------------------------| -- <-- FP | 8-byte callee-save | | | |------------------------------| | | | one VLENB-sized padding obj | | | | one VLENB-sized RVV object | | | |------------------------------| | | | 8-byte local variable | | | |------------------------------| -- <-- SP A new RVV invariant has been introduced in this patch, which is that the base of the RVV stack itself is now always aligned to 16 bytes, not 8 as before. This keeps us more in line with the scalar stack and should be easier to reason about. The calculation of the RVV padding has thus changed to be the amount required to align the scalar local variable section to the RVV section's alignment. This amount is further rounded up when setting up the initial stack to keep everything aligned: |------------------------------| -- <-- FP | 8-byte callee-save | |------------------------------| | | | RVV objects | | (aligned to at least 16) | | | |------------------------------| | RVV padding of 8 bytes | |------------------------------| | 8-byte local variable | |------------------------------| -- <-- SP In the example above, it's clear that we need 8 bytes of padding to keep the RVV section aligned to 16 when using SP. But to keep SP *itself* aligned to 16 we can't decrement the initial stack pointer by 24 - we have to round up to 32. With the RVV section correctly aligned, the second bug fixed by this patch is that RVV objects themselves are now correctly aligned. We were previously only guaranteeing an alignment of 8 bytes, even if they required a higher alignment. This is relatively simple and in practice we see more rounding up of VLEN amounts to account for alignment in between objects: |------------------------------| | RVV object (aligned to 16) | |------------------------------| | no padding necessary | |------------------------------| | 2*VLENB RVV object (align 16)| |------------------------------| | VLENB alignment padding | |------------------------------| | RVV object (align 32) | |------------------------------| | 3*VLENB alignment padding | |------------------------------| | VLENB RVV object (align 32) | |------------------------------| -- <-- base of RVV section Note that a lot of the regressions in codegen owing to the new alignment rules are correct but actually only strictly necessary for Zvl64b (and Zvl32b but that's not really supported). I plan a follow-up patch to take the known VLEN into account when padding for alignment. Reviewed By: StephenFan Differential Revision: https://reviews.llvm.org/D125787 --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 155 ++++--- llvm/lib/Target/RISCV/RISCVFrameLowering.h | 5 +- .../Target/RISCV/RISCVMachineFunctionInfo.h | 5 + .../RISCV/rvv/access-fixed-objects-by-rvv.ll | 2 + .../RISCV/rvv/addi-scalable-offset.mir | 3 + .../CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll | 417 +++++++----------- .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 136 +++--- llvm/test/CodeGen/RISCV/rvv/calling-conv.ll | 32 +- .../test/CodeGen/RISCV/rvv/emergency-slot.mir | 2 +- .../rvv/fixed-vectors-insert-subvector.ll | 12 +- .../RISCV/rvv/fixed-vectors-vpscatter.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/localvar.ll | 72 +-- llvm/test/CodeGen/RISCV/rvv/memory-args.ll | 28 +- .../CodeGen/RISCV/rvv/no-reserved-frame.ll | 23 +- .../RISCV/rvv/rv32-spill-vector-csr.ll | 20 +- .../CodeGen/RISCV/rvv/rv32-spill-vector.ll | 8 + .../CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll | 4 + .../RISCV/rvv/rv64-spill-vector-csr.ll | 16 +- .../CodeGen/RISCV/rvv/rv64-spill-vector.ll | 4 + .../CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll | 4 + .../test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll | 18 +- .../test/CodeGen/RISCV/rvv/rvv-framelayout.ll | 42 +- .../CodeGen/RISCV/rvv/rvv-stack-align.mir | 64 ++- .../CodeGen/RISCV/rvv/scalar-stack-align.ll | 20 +- .../rvv/wrong-stack-offset-for-rvv-object.mir | 37 +- .../RISCV/rvv/wrong-stack-slot-rv32.mir | 2 +- .../RISCV/rvv/wrong-stack-slot-rv64.mir | 10 +- 27 files changed, 567 insertions(+), 582 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 122ab22aff8fd6..5b789a97e75f25 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -250,6 +250,7 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { // Determines the size of the frame and maximum call frame size. void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); // Get the number of bytes to allocate from the FrameInfo. uint64_t FrameSize = MFI.getStackSize(); @@ -262,6 +263,28 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { // Update frame info. MFI.setStackSize(FrameSize); + + // When using SP or BP to access stack objects, we may require extra padding + // to ensure the bottom of the RVV stack is correctly aligned within the main + // stack. We calculate this as the amount required to align the scalar local + // variable section up to the RVV alignment. + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + if (RVFI->getRVVStackSize() && (!hasFP(MF) || TRI->hasStackRealignment(MF))) { + int ScalarLocalVarSize = FrameSize - RVFI->getCalleeSavedStackSize() - + RVFI->getVarArgsSaveSize(); + if (auto RVVPadding = + offsetToAlignment(ScalarLocalVarSize, RVFI->getRVVStackAlign())) + RVFI->setRVVPadding(RVVPadding); + } +} + +// Returns the stack size including RVV padding (when required), rounded back +// up to the required stack alignment. +uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding( + const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); + return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign()); } void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, @@ -401,7 +424,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // FIXME (note copied from Lanai): This appears to be overallocating. Needs // investigation. Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding(); + uint64_t StackSize = getStackSizeWithRVVPadding(MF); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -482,7 +505,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // Emit the second SP adjustment after saving callee saved registers. if (FirstSPAdjustAmount) { - uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + uint64_t SecondSPAdjustAmount = + getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount; assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, @@ -492,8 +516,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // don't emit an sp-based .cfi_def_cfa_offset if (!hasFP(MF)) { // Emit ".cfi_def_cfa_offset StackSize" - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset( + nullptr, getStackSizeWithRVVPadding(MF))); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); @@ -583,7 +607,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, if (!CSI.empty()) LastFrameDestroy = std::prev(MBBI, CSI.size()); - uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding(); + uint64_t StackSize = getStackSizeWithRVVPadding(MF); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -603,7 +627,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); if (FirstSPAdjustAmount) { - uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + uint64_t SecondSPAdjustAmount = + getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount; assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); @@ -661,8 +686,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, if (FirstSPAdjustAmount) Offset += StackOffset::getFixed(FirstSPAdjustAmount); else - Offset += - StackOffset::getFixed(MFI.getStackSize() + RVFI->getRVVPadding()); + Offset += StackOffset::getFixed(getStackSizeWithRVVPadding(MF)); } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { // If the stack was realigned, the frame pointer is set in order to allow // SP to be restored, so we need another base register to record the stack @@ -678,18 +702,21 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // | realignment (the size of | | | // | this area is not counted | | | // | in MFI.getStackSize()) | | | - // |--------------------------| -- | - // | Padding after RVV | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | // |--------------------------| -- |-- MFI.getStackSize() + // | RVV alignment padding | | | + // | (not counted in | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | + // |--------------------------| -- | // | RVV objects | | | // | (not counted in | | | // | MFI.getStackSize()) | | | // |--------------------------| -- | - // | Padding before RVV | | | + // | padding before RVV | | | // | (not counted in | | | - // | MFI.getStackSize()) | | | + // | MFI.getStackSize() or in | | | + // | RVFI.getRVVStackSize()) | | | // |--------------------------| -- | // | scalar local variables | | <----' // |--------------------------| -- <-- BP @@ -707,32 +734,38 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // | this area is not counted | | | // | in MFI.getStackSize()) | | | // |--------------------------| -- | - // | Padding after RVV | | | + // | RVV alignment padding | | | // | (not counted in | | | - // | MFI.getStackSize()) | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | // |--------------------------| -- |-- MFI.getStackSize() // | RVV objects | | | // | (not counted in | | | // | MFI.getStackSize()) | | | // |--------------------------| -- | - // | Padding before RVV | | | + // | padding before RVV | | | // | (not counted in | | | - // | MFI.getStackSize()) | | | + // | MFI.getStackSize() or in | | | + // | RVFI.getRVVStackSize()) | | | // |--------------------------| -- | // | scalar local variables | | <----' // |--------------------------| -- <-- SP } // The total amount of padding surrounding RVV objects is described by // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV - // objects to 8 bytes. + // objects to the required alignment. if (MFI.getStackID(FI) == TargetStackID::Default) { Offset += StackOffset::getFixed(MFI.getStackSize()); if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - Offset += StackOffset::get( - alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8), - RVFI->getRVVStackSize()); + // Ensure the base of the RVV stack is correctly aligned: add on the + // alignment padding. + int ScalarLocalVarSize = MFI.getStackSize() - + RVFI->getCalleeSavedStackSize() + + RVFI->getRVVPadding(); + Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize()); } } else { FrameReg = RI->getFrameRegister(MF); @@ -755,8 +788,14 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // |--------------------------| // | VarSize objects | // |--------------------------| <-- SP - if (MFI.getStackID(FI) == TargetStackID::ScalableVector) + if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + // We don't expect any extra RVV alignment padding, as the stack size + // and RVV object sections should be correct aligned in their own + // right. + assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) && + "Inconsistent stack layout"); Offset -= StackOffset::getFixed(MFI.getStackSize()); + } } else { // When using SP to access frame objects, we need to add RVV stack size. // @@ -766,15 +805,17 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // |--------------------------| | | // | callee-saved registers | | | // |--------------------------| -- | - // | Padding after RVV | | | + // | RVV alignment padding | | | // | (not counted in | | | - // | MFI.getStackSize()) | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | // |--------------------------| -- | // | RVV objects | | |-- MFI.getStackSize() // | (not counted in | | | // | MFI.getStackSize()) | | | // |--------------------------| -- | - // | Padding before RVV | | | + // | padding before RVV | | | // | (not counted in | | | // | MFI.getStackSize()) | | | // |--------------------------| -- | @@ -783,23 +824,22 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // // The total amount of padding surrounding RVV objects is described by // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV - // objects to 8 bytes. + // objects to the required alignment. if (MFI.getStackID(FI) == TargetStackID::Default) { if (MFI.isFixedObjectIndex(FI)) { - Offset += - StackOffset::get(MFI.getStackSize() + RVFI->getRVVPadding() + - RVFI->getLibCallStackSize(), - RVFI->getRVVStackSize()); + Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) + + RVFI->getLibCallStackSize(), + RVFI->getRVVStackSize()); } else { Offset += StackOffset::getFixed(MFI.getStackSize()); } } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - int ScalarLocalVarSize = MFI.getStackSize() - - RVFI->getCalleeSavedStackSize() - - RVFI->getVarArgsSaveSize(); - Offset += StackOffset::get( - alignTo(ScalarLocalVarSize, 8), - RVFI->getRVVStackSize()); + // Ensure the base of the RVV stack is correctly aligned: add on the + // alignment padding. + int ScalarLocalVarSize = + MFI.getStackSize() - RVFI->getCalleeSavedStackSize() - + RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding(); + Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize()); } } } @@ -852,7 +892,7 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, } } -int64_t +std::pair RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const { // Create a buffer of RVV objects to allocate. SmallVector ObjectsToAllocate; @@ -868,19 +908,33 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const { // Allocate all RVV locals and spills int64_t Offset = 0; + // The minimum alignment is 16 bytes. + Align RVVStackAlign(16); for (int FI : ObjectsToAllocate) { // ObjectSize in bytes. int64_t ObjectSize = MFI.getObjectSize(FI); + auto ObjectAlign = std::max(Align(8), MFI.getObjectAlign(FI)); // If the data type is the fractional vector type, reserve one vector // register for it. if (ObjectSize < 8) ObjectSize = 8; - // Currently, all scalable vector types are aligned to 8 bytes. - Offset = alignTo(Offset + ObjectSize, 8); + Offset = alignTo(Offset + ObjectSize, ObjectAlign); MFI.setObjectOffset(FI, -Offset); + // Update the maximum alignment of the RVV stack section + RVVStackAlign = std::max(RVVStackAlign, ObjectAlign); } - return Offset; + // Ensure the alignment of the RVV stack. Since we want the most-aligned + // object right at the bottom (i.e., any padding at the top of the frame), + // readjust all RVV objects down by the alignment padding. + uint64_t StackSize = Offset; + if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) { + StackSize += AlignmentPadding; + for (int FI : ObjectsToAllocate) + MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); + } + + return std::make_pair(StackSize, RVVStackAlign); } static bool hasRVVSpillWithFIs(MachineFunction &MF, const RISCVInstrInfo &TII) { @@ -901,8 +955,13 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( const TargetRegisterClass *RC = &RISCV::GPRRegClass; auto *RVFI = MF.getInfo(); - int64_t RVVStackSize = assignRVVStackObjectOffsets(MFI); + int64_t RVVStackSize; + Align RVVStackAlign; + std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MFI); + RVFI->setRVVStackSize(RVVStackSize); + RVFI->setRVVStackAlign(RVVStackAlign); + const RISCVInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // estimateStackSize has been observed to under-estimate the final stack @@ -941,16 +1000,6 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( Size += MFI.getObjectSize(FrameIdx); } RVFI->setCalleeSavedStackSize(Size); - - // Padding required to keep the RVV stack aligned to 8 bytes within the main - // stack. We only need this when using SP or BP to access stack objects. - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - if (RVVStackSize && (!hasFP(MF) || TRI->hasStackRealignment(MF)) && - Size % 8 != 0) { - // Because we add the padding to the size of the stack, adding - // getStackAlign() will keep it aligned. - RVFI->setRVVPadding(getStackAlign().value()); - } } static bool hasRVVFrameObject(const MachineFunction &MF) { @@ -1025,7 +1074,7 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { const auto *RVFI = MF.getInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); - uint64_t StackSize = MFI.getStackSize(); + uint64_t StackSize = getStackSizeWithRVVPadding(MF); // Disable SplitSPAdjust if save-restore libcall is used. The callee-saved // registers will be pushed by the save-restore libcalls, so we don't have to diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index bc3ace786272d2..c905cf9f1df2bc 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -30,6 +30,8 @@ class RISCVFrameLowering : public TargetFrameLowering { void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + uint64_t getStackSizeWithRVVPadding(const MachineFunction &MF) const; + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; @@ -79,7 +81,8 @@ class RISCVFrameLowering : public TargetFrameLowering { void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount, MachineInstr::MIFlag Flag) const; - int64_t assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; + std::pair + assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; }; } #endif diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h index a549ec211a9408..abb7407a8c5705 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -57,6 +57,8 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo { unsigned LibCallStackSize = 0; /// Size of RVV stack. uint64_t RVVStackSize = 0; + /// Alignment of RVV stack. + Align RVVStackAlign; /// Padding required to keep RVV stack aligned within the main stack. uint64_t RVVPadding = 0; /// Size of stack frame to save callee saved registers @@ -92,6 +94,9 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo { uint64_t getRVVStackSize() const { return RVVStackSize; } void setRVVStackSize(uint64_t Size) { RVVStackSize = Size; } + Align getRVVStackAlign() const { return RVVStackAlign; } + void setRVVStackAlign(Align StackAlign) { RVVStackAlign = StackAlign; } + uint64_t getRVVPadding() const { return RVVPadding; } void setRVVPadding(uint64_t Padding) { RVVPadding = Padding; } diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll index 3e370c4e9a0bd1..d9e6ec6d361bcb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll @@ -34,6 +34,7 @@ define @access_fixed_and_vector_objects(i64 *%val) { ; RV64IV-NEXT: addi sp, sp, -544 ; RV64IV-NEXT: .cfi_def_cfa_offset 544 ; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: addi a0, sp, 24 ; RV64IV-NEXT: vl1re64.v v8, (a0) @@ -43,6 +44,7 @@ define @access_fixed_and_vector_objects(i64 *%val) { ; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; RV64IV-NEXT: vadd.vv v8, v8, v9 ; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: addi sp, sp, 544 ; RV64IV-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir index 9b7c1eaf502de4..63b3ead09b65aa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -38,16 +38,19 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -240 ; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB + ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 6 /* e64 */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) ; CHECK-NEXT: $x11 = PseudoReadVLENB + ; CHECK-NEXT: $x11 = SLLI killed $x11, 1 ; CHECK-NEXT: $x10 = LUI 1048575 ; CHECK-NEXT: $x10 = ADDIW killed $x10, 1824 ; CHECK-NEXT: $x10 = ADD $x8, killed $x10 ; CHECK-NEXT: $x10 = SUB killed $x10, killed $x11 ; CHECK-NEXT: VS1R_V killed renamable $v8, killed renamable $x10 ; CHECK-NEXT: $x10 = frame-destroy PseudoReadVLENB + ; CHECK-NEXT: $x10 = frame-destroy SLLI killed $x10, 1 ; CHECK-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 240 ; CHECK-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.3) diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll index 327f28781a9e43..49c46942c26329 100644 --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -1,15 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,NOZBA +; RUN: | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA +; RUN: | FileCheck %s define void @lmul1() nounwind { ; CHECK-LABEL: lmul1: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ret %v = alloca @@ -33,18 +35,18 @@ define void @lmul2() nounwind { define void @lmul4() nounwind { ; CHECK-LABEL: lmul4: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v = alloca ret void @@ -53,45 +55,33 @@ define void @lmul4() nounwind { define void @lmul8() nounwind { ; CHECK-LABEL: lmul8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %v = alloca ret void } define void @lmul1_and_2() nounwind { -; NOZBA-LABEL: lmul1_and_2: -; NOZBA: # %bb.0: -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: add sp, sp, a0 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul1_and_2: -; ZBA: # %bb.0: -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: add sp, sp, a0 -; ZBA-NEXT: ret +; CHECK-LABEL: lmul1_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -100,19 +90,18 @@ define void @lmul1_and_2() nounwind { define void @lmul2_and_4() nounwind { ; CHECK-LABEL: lmul2_and_4: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 6 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -120,103 +109,57 @@ define void @lmul2_and_4() nounwind { } define void @lmul1_and_4() nounwind { -; NOZBA-LABEL: lmul1_and_4: -; NOZBA: # %bb.0: -; NOZBA-NEXT: addi sp, sp, -32 -; NOZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; NOZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; NOZBA-NEXT: addi s0, sp, 32 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 2 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: andi sp, sp, -32 -; NOZBA-NEXT: addi sp, s0, -32 -; NOZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; NOZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; NOZBA-NEXT: addi sp, sp, 32 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul1_and_4: -; ZBA: # %bb.0: -; ZBA-NEXT: addi sp, sp, -32 -; ZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; ZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; ZBA-NEXT: addi s0, sp, 32 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh2add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: andi sp, sp, -32 -; ZBA-NEXT: addi sp, s0, -32 -; ZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; ZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; ZBA-NEXT: addi sp, sp, 32 -; ZBA-NEXT: ret +; CHECK-LABEL: lmul1_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul2_and_1() nounwind { -; NOZBA-LABEL: lmul2_and_1: -; NOZBA: # %bb.0: -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: add sp, sp, a0 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul2_and_1: -; ZBA: # %bb.0: -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: add sp, sp, a0 -; ZBA-NEXT: ret +; CHECK-LABEL: lmul2_and_1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul4_and_1() nounwind { -; NOZBA-LABEL: lmul4_and_1: -; NOZBA: # %bb.0: -; NOZBA-NEXT: addi sp, sp, -32 -; NOZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; NOZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; NOZBA-NEXT: addi s0, sp, 32 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 2 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: andi sp, sp, -32 -; NOZBA-NEXT: addi sp, s0, -32 -; NOZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; NOZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; NOZBA-NEXT: addi sp, sp, 32 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul4_and_1: -; ZBA: # %bb.0: -; ZBA-NEXT: addi sp, sp, -32 -; ZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; ZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; ZBA-NEXT: addi s0, sp, 32 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh2add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: andi sp, sp, -32 -; ZBA-NEXT: addi sp, s0, -32 -; ZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; ZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; ZBA-NEXT: addi sp, sp, 32 -; ZBA-NEXT: ret +; CHECK-LABEL: lmul4_and_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -225,19 +168,18 @@ define void @lmul4_and_1() nounwind { define void @lmul4_and_2() nounwind { ; CHECK-LABEL: lmul4_and_2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 6 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -247,19 +189,18 @@ define void @lmul4_and_2() nounwind { define void @lmul4_and_2_x2_0() nounwind { ; CHECK-LABEL: lmul4_and_2_x2_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 12 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -271,19 +212,19 @@ define void @lmul4_and_2_x2_0() nounwind { define void @lmul4_and_2_x2_1() nounwind { ; CHECK-LABEL: lmul4_and_2_x2_1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 12 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v1 = alloca %v3 = alloca @@ -294,35 +235,19 @@ define void @lmul4_and_2_x2_1() nounwind { define void @gpr_and_lmul1_and_2() nounwind { -; NOZBA-LABEL: gpr_and_lmul1_and_2: -; NOZBA: # %bb.0: -; NOZBA-NEXT: addi sp, sp, -16 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: li a0, 3 -; NOZBA-NEXT: sd a0, 8(sp) -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: add sp, sp, a0 -; NOZBA-NEXT: addi sp, sp, 16 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: gpr_and_lmul1_and_2: -; ZBA: # %bb.0: -; ZBA-NEXT: addi sp, sp, -16 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: li a0, 3 -; ZBA-NEXT: sd a0, 8(sp) -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: add sp, sp, a0 -; ZBA-NEXT: addi sp, sp, 16 -; ZBA-NEXT: ret +; CHECK-LABEL: gpr_and_lmul1_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: sd a0, 8(sp) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca @@ -331,42 +256,23 @@ define void @gpr_and_lmul1_and_2() nounwind { } define void @gpr_and_lmul1_and_4() nounwind { -; NOZBA-LABEL: gpr_and_lmul1_and_4: -; NOZBA: # %bb.0: -; NOZBA-NEXT: addi sp, sp, -32 -; NOZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; NOZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; NOZBA-NEXT: addi s0, sp, 32 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 2 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: andi sp, sp, -32 -; NOZBA-NEXT: li a0, 3 -; NOZBA-NEXT: sd a0, 8(sp) -; NOZBA-NEXT: addi sp, s0, -32 -; NOZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; NOZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; NOZBA-NEXT: addi sp, sp, 32 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: gpr_and_lmul1_and_4: -; ZBA: # %bb.0: -; ZBA-NEXT: addi sp, sp, -32 -; ZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; ZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; ZBA-NEXT: addi s0, sp, 32 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh2add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: andi sp, sp, -32 -; ZBA-NEXT: li a0, 3 -; ZBA-NEXT: sd a0, 8(sp) -; ZBA-NEXT: addi sp, s0, -32 -; ZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; ZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; ZBA-NEXT: addi sp, sp, 32 -; ZBA-NEXT: ret +; CHECK-LABEL: gpr_and_lmul1_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: sd a0, 8(sp) +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca @@ -377,19 +283,18 @@ define void @gpr_and_lmul1_and_4() nounwind { define void @lmul_1_2_4_8() nounwind { ; CHECK-LABEL: lmul_1_2_4_8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -401,19 +306,18 @@ define void @lmul_1_2_4_8() nounwind { define void @lmul_1_2_4_8_x2_0() nounwind { ; CHECK-LABEL: lmul_1_2_4_8_x2_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 30 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -429,19 +333,18 @@ define void @lmul_1_2_4_8_x2_0() nounwind { define void @lmul_1_2_4_8_x2_1() nounwind { ; CHECK-LABEL: lmul_1_2_4_8_x2_1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 30 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %v8 = alloca %v7 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll index 6bae5356728671..458e5b93bac7b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -286,9 +286,9 @@ declare @ext3(, , @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( %x, %y, i32 %w) { ; RV32-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: addi sp, sp, -144 +; RV32-NEXT: .cfi_def_cfa_offset 144 +; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 @@ -298,12 +298,12 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ext2( %y, %x, i32 %w, i32 2) ret %t @@ -352,9 +352,9 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_i32( %x, %y, %z, i32 %w) { ; RV32-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: addi sp, sp, -144 +; RV32-NEXT: .cfi_def_cfa_offset 144 +; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 48 @@ -367,66 +367,66 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 128 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: add a3, a0, a1 ; RV32-NEXT: vl8re32.v v24, (a3) ; RV32-NEXT: vl8re32.v v0, (a2) -; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: addi a2, sp, 128 ; RV32-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vl8re32.v v0, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: vs8r.v v16, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: vs8r.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a2, a1, 16 +; RV32-NEXT: addi a2, a1, 128 ; RV32-NEXT: li a5, 42 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: addi a1, a1, 128 ; RV32-NEXT: vs8r.v v0, (a1) -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 128 ; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: addi a1, a1, 128 ; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: call ext3@plt ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 144 ; RV32-NEXT: ret ; ; RV64-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -144 +; RV64-NEXT: .cfi_def_cfa_offset 144 +; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 48 @@ -439,59 +439,59 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 24 +; RV64-NEXT: addi a3, a3, 128 ; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV64-NEXT: add a3, a0, a1 ; RV64-NEXT: vl8re32.v v24, (a3) ; RV64-NEXT: vl8re32.v v0, (a2) -; RV64-NEXT: addi a2, sp, 24 +; RV64-NEXT: addi a2, sp, 128 ; RV64-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vl8re32.v v0, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 24 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vs8r.v v16, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 5 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 24 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vs8r.v v24, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 24 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 5 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 24 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a2, a1, 24 +; RV64-NEXT: addi a2, a1, 128 ; RV64-NEXT: li a5, 42 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 24 +; RV64-NEXT: addi a1, a1, 128 ; RV64-NEXT: vs8r.v v0, (a1) -; RV64-NEXT: addi a1, sp, 24 +; RV64-NEXT: addi a1, sp, 128 ; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 24 +; RV64-NEXT: addi a1, a1, 128 ; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: call ext3@plt ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 48 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 144 ; RV64-NEXT: ret %t = call fastcc @ext3( %z, %y, %x, i32 %w, i32 42) ret %t @@ -520,16 +520,16 @@ define fastcc @vector_arg_indirect_stack(i32 %0, i32 %1, i32 define fastcc @pass_vector_arg_indirect_stack( %x, %y, %z) { ; RV32-LABEL: pass_vector_arg_indirect_stack: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: addi sp, sp, -144 +; RV32-NEXT: .cfi_def_cfa_offset 144 +; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 128 ; RV32-NEXT: add a1, a1, a0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, mu ; RV32-NEXT: vmv.v.i v8, 0 @@ -537,10 +537,10 @@ define fastcc @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack( @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, zeroinitializer, zeroinitializer, zeroinitializer, i32 8) ret %s diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll index 2d0436d3b8dcbf..2875e060401838 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll @@ -23,58 +23,58 @@ define @callee_scalable_vector_split_indirect( @caller_scalable_vector_split_indirect( %x) { ; RV32-LABEL: caller_scalable_vector_split_indirect: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: addi sp, sp, -144 +; RV32-NEXT: .cfi_def_cfa_offset 144 +; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: addi a1, sp, 128 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: vs8r.v v16, (a0) -; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: call callee_scalable_vector_split_indirect@plt ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 144 ; RV32-NEXT: ret ; ; RV64-LABEL: caller_scalable_vector_split_indirect: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -144 +; RV64-NEXT: .cfi_def_cfa_offset 144 +; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: sub sp, sp, a0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: addi a1, sp, 24 +; RV64-NEXT: addi a1, sp, 128 ; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: vs8r.v v16, (a0) -; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: call callee_scalable_vector_split_indirect@plt ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 144 ; RV64-NEXT: ret %c = alloca i64 %a = call @callee_scalable_vector_split_indirect( zeroinitializer, %x) diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir index 53e4a95794ae0f..dab67d6c5e075c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -83,7 +83,7 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -272 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x11 = frame-setup ADDI killed $x0, 51 + ; CHECK-NEXT: $x11 = frame-setup ADDI killed $x0, 52 ; CHECK-NEXT: $x10 = frame-setup MUL killed $x10, killed $x11 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: $x2 = frame-setup ANDI $x2, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index d584b47e70d5f2..90761f2b0cffaa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -559,21 +559,21 @@ define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, * %ou define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, * %out) { ; CHECK-LABEL: insert_v2i64_nxv16i64_hi: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 80 +; CHECK-NEXT: addi a0, sp, 128 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: addi a2, sp, 64 ; CHECK-NEXT: add a2, a2, a0 ; CHECK-NEXT: vl8re64.v v8, (a2) -; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: addi a2, sp, 64 ; CHECK-NEXT: vl8re64.v v16, (a2) ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: vs8r.v v8, (a0) @@ -581,7 +581,7 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, * %out ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret %sv = load <2 x i64>, <2 x i64>* %psv %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 8) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index 53e34f3735c2a6..082b3739809d60 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1833,8 +1833,8 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, double* %base, ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a4, a3, 3 -; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, mu @@ -1882,8 +1882,8 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, double* %base, ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 3 -; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll index 0f7fa0b9a44efb..329ba354d08707 100644 --- a/llvm/test/CodeGen/RISCV/rvv/localvar.ll +++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -85,13 +85,13 @@ define void @local_var_m2() { define void @local_var_m4() { ; RV64IV-LABEL: local_var_m4: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -32 -; RV64IV-NEXT: .cfi_def_cfa_offset 32 -; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IV-NEXT: addi sp, sp, -48 +; RV64IV-NEXT: .cfi_def_cfa_offset 48 +; RV64IV-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 32 +; RV64IV-NEXT: addi s0, sp, 48 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 @@ -100,14 +100,14 @@ define void @local_var_m4() { ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: addi a0, a0, 32 ; RV64IV-NEXT: vl4r.v v8, (a0) -; RV64IV-NEXT: addi a0, sp, 16 +; RV64IV-NEXT: addi a0, sp, 32 ; RV64IV-NEXT: vl4r.v v8, (a0) -; RV64IV-NEXT: addi sp, s0, -32 -; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 32 +; RV64IV-NEXT: addi sp, s0, -48 +; RV64IV-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 48 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca @@ -119,13 +119,13 @@ define void @local_var_m4() { define void @local_var_m8() { ; RV64IV-LABEL: local_var_m8: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -64 -; RV64IV-NEXT: .cfi_def_cfa_offset 64 -; RV64IV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64IV-NEXT: addi sp, sp, -80 +; RV64IV-NEXT: .cfi_def_cfa_offset 80 +; RV64IV-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 64(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 64 +; RV64IV-NEXT: addi s0, sp, 80 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 4 @@ -134,14 +134,14 @@ define void @local_var_m8() { ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: addi a0, a0, 64 ; RV64IV-NEXT: vl8r.v v8, (a0) -; RV64IV-NEXT: addi a0, sp, 48 +; RV64IV-NEXT: addi a0, sp, 64 ; RV64IV-NEXT: vl8r.v v8, (a0) -; RV64IV-NEXT: addi sp, s0, -64 -; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 64 +; RV64IV-NEXT: addi sp, s0, -80 +; RV64IV-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 80 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca @@ -233,15 +233,15 @@ define void @local_var_m2_with_varsize_object(i64 %n) { define void @local_var_m2_with_bp(i64 %n) { ; RV64IV-LABEL: local_var_m2_with_bp: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -256 -; RV64IV-NEXT: .cfi_def_cfa_offset 256 -; RV64IV-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s1, 232(sp) # 8-byte Folded Spill +; RV64IV-NEXT: addi sp, sp, -272 +; RV64IV-NEXT: .cfi_def_cfa_offset 272 +; RV64IV-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s1, 248(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 ; RV64IV-NEXT: .cfi_offset s1, -24 -; RV64IV-NEXT: addi s0, sp, 256 +; RV64IV-NEXT: addi s0, sp, 272 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a1, vlenb ; RV64IV-NEXT: slli a1, a1, 2 @@ -256,22 +256,22 @@ define void @local_var_m2_with_bp(i64 %n) { ; RV64IV-NEXT: csrr a2, vlenb ; RV64IV-NEXT: slli a2, a2, 1 ; RV64IV-NEXT: add a2, s1, a2 -; RV64IV-NEXT: addi a2, a2, 232 +; RV64IV-NEXT: addi a2, a2, 240 ; RV64IV-NEXT: call notdead2@plt ; RV64IV-NEXT: lw a0, 124(s1) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add a0, s1, a0 -; RV64IV-NEXT: addi a0, a0, 232 +; RV64IV-NEXT: addi a0, a0, 240 ; RV64IV-NEXT: vl2r.v v8, (a0) -; RV64IV-NEXT: addi a0, s1, 232 +; RV64IV-NEXT: addi a0, s1, 240 ; RV64IV-NEXT: vl2r.v v8, (a0) ; RV64IV-NEXT: lw a0, 120(s1) -; RV64IV-NEXT: addi sp, s0, -256 -; RV64IV-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s1, 232(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 256 +; RV64IV-NEXT: addi sp, s0, -272 +; RV64IV-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s1, 248(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 272 ; RV64IV-NEXT: ret %1 = alloca i8, i64 %n %2 = alloca i32, align 128 diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll index 1e0950eeae6de8..22397c28ca8cd2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll @@ -27,13 +27,13 @@ define @callee( %arg0, % define @caller() { ; RV64IV-LABEL: caller: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -64 -; RV64IV-NEXT: .cfi_def_cfa_offset 64 -; RV64IV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64IV-NEXT: addi sp, sp, -80 +; RV64IV-NEXT: .cfi_def_cfa_offset 80 +; RV64IV-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 64(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 64 +; RV64IV-NEXT: addi s0, sp, 80 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 5 @@ -43,26 +43,26 @@ define @caller() { ; RV64IV-NEXT: li a1, 24 ; RV64IV-NEXT: mul a0, a0, a1 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: addi a0, a0, 64 ; RV64IV-NEXT: vl8r.v v8, (a0) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 4 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: addi a0, a0, 64 ; RV64IV-NEXT: vl8r.v v16, (a0) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: addi a0, a0, 64 ; RV64IV-NEXT: vl8r.v v24, (a0) -; RV64IV-NEXT: addi a0, sp, 48 -; RV64IV-NEXT: addi a1, sp, 48 +; RV64IV-NEXT: addi a0, sp, 64 +; RV64IV-NEXT: addi a1, sp, 64 ; RV64IV-NEXT: vs8r.v v24, (a1) ; RV64IV-NEXT: call callee@plt -; RV64IV-NEXT: addi sp, s0, -64 -; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 64 +; RV64IV-NEXT: addi sp, s0, -80 +; RV64IV-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 80 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll index de2bc6353c820a..3c2fda4a7f8045 100644 --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -5,17 +5,18 @@ define signext i32 @foo(i32 signext %aa) #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -80 -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -96 +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: addi s0, sp, 80 +; CHECK-NEXT: addi s0, sp, 96 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: andi sp, sp, -8 ; CHECK-NEXT: mv s1, sp @@ -40,11 +41,11 @@ define signext i32 @foo(i32 signext %aa) #0 { ; CHECK-NEXT: call gfunc@plt ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: addi sp, s0, -80 -; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 80 +; CHECK-NEXT: addi sp, s0, -96 +; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret entry: %aa.addr = alloca i32, align 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll index 1fe6141f006ed2..251e4bea5c3b71 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -45,20 +45,20 @@ define @foo( %a, @foo( %a, @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i32 %gvl) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll index cb21513f702c5e..f332cd0c7beeb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll @@ -9,6 +9,7 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -17,6 +18,7 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -25,6 +27,7 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -33,6 +36,7 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -48,6 +52,7 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -56,6 +61,7 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -64,6 +70,7 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -72,6 +79,7 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll index 166f96b8f5a8e7..9ef58a3b92d6ee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -9,6 +9,7 @@ define @spill_zvlsseg_nxv1i32(i32* %base, i32 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -20,6 +21,7 @@ define @spill_zvlsseg_nxv1i32(i32* %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -63,6 +65,7 @@ define @spill_zvlsseg_nxv2i32(i32* %base, i32 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -74,6 +77,7 @@ define @spill_zvlsseg_nxv2i32(i32* %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll index 453aabd3466c27..c19fd63b8aad95 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -9,28 +9,28 @@ define @foo( %a, %b, %c, i64 %gvl) nounwind ; SPILL-O0-LABEL: foo: ; SPILL-O0: # %bb.0: -; SPILL-O0-NEXT: addi sp, sp, -32 -; SPILL-O0-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -48 +; SPILL-O0-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: slli a1, a1, 1 ; SPILL-O0-NEXT: sub sp, sp, a1 ; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: addi a1, a1, 24 +; SPILL-O0-NEXT: addi a1, a1, 32 ; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; SPILL-O0-NEXT: vfadd.vv v8, v8, v9 -; SPILL-O0-NEXT: addi a0, sp, 24 +; SPILL-O0-NEXT: addi a0, sp, 32 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: lui a0, %hi(.L.str) ; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) ; SPILL-O0-NEXT: call puts@plt -; SPILL-O0-NEXT: addi a1, sp, 24 +; SPILL-O0-NEXT: addi a1, sp, 32 ; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: addi a1, a1, 24 +; SPILL-O0-NEXT: addi a1, a1, 32 ; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: # kill: def $x11 killed $x10 ; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload @@ -39,8 +39,8 @@ define @foo( %a, @spill_lmul_1( %va) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -17,6 +18,7 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -25,6 +27,7 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -33,6 +36,7 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll index 21b5833c627b87..99e0dd97d8dcac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -9,6 +9,7 @@ define @spill_zvlsseg_nxv1i32(i32* %base, i64 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -20,6 +21,7 @@ define @spill_zvlsseg_nxv1i32(i32* %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -63,6 +65,7 @@ define @spill_zvlsseg_nxv2i32(i32* %base, i64 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -74,6 +77,7 @@ define @spill_zvlsseg_nxv2i32(i32* %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll index 35f7411a921ae0..d7be0ea154e915 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll @@ -22,34 +22,34 @@ define @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, define @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %x) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 4 ; CHECK-NEXT: sub sp, sp, t0 -; CHECK-NEXT: addi t0, sp, 40 +; CHECK-NEXT: addi t0, sp, 64 ; CHECK-NEXT: sd t0, 8(sp) ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 3 ; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 40 +; CHECK-NEXT: addi t0, t0, 64 ; CHECK-NEXT: sd t0, 0(sp) -; CHECK-NEXT: addi t0, sp, 40 +; CHECK-NEXT: addi t0, sp, 64 ; CHECK-NEXT: vs8r.v v8, (t0) ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 3 ; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 40 +; CHECK-NEXT: addi t0, t0, 64 ; CHECK-NEXT: vs8r.v v8, (t0) ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: call bar@plt ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %ret = call @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %x, %x, %x, %x) ret %ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll index 858ce54610cf7b..87b4e60aaca2cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll @@ -9,8 +9,7 @@ define void @rvv_vla(i64 %n, i64 %i) nounwind { ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 1 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, 15 @@ -22,8 +21,7 @@ define void @rvv_vla(i64 %n, i64 %i) nounwind { ; CHECK-NEXT: addi a2, a2, -32 ; CHECK-NEXT: vl1re64.v v8, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 1 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: sub a2, s0, a2 ; CHECK-NEXT: addi a2, a2, -32 ; CHECK-NEXT: vl2re64.v v8, (a2) @@ -56,12 +54,12 @@ define void @rvv_overaligned() nounwind { ; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 128 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 1 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 112 ; CHECK-NEXT: vl1re64.v v8, (a0) @@ -88,14 +86,13 @@ define void @rvv_overaligned() nounwind { define void @rvv_vla_and_overaligned(i64 %n, i64 %i) nounwind { ; CHECK-LABEL: rvv_vla_and_overaligned: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -128 -; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 128 +; CHECK-NEXT: addi sp, sp, -144 +; CHECK-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 144 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 1 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: mv s1, sp @@ -105,21 +102,22 @@ define void @rvv_vla_and_overaligned(i64 %n, i64 %i) nounwind { ; CHECK-NEXT: sub a0, sp, a0 ; CHECK-NEXT: mv sp, a0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: add a2, s1, a2 -; CHECK-NEXT: addi a2, a2, 104 +; CHECK-NEXT: addi a2, a2, 112 ; CHECK-NEXT: vl1re64.v v8, (a2) -; CHECK-NEXT: addi a2, s1, 104 +; CHECK-NEXT: addi a2, s1, 112 ; CHECK-NEXT: vl2re64.v v8, (a2) ; CHECK-NEXT: lw a2, 64(s1) ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: lw a0, 0(a0) -; CHECK-NEXT: addi sp, s0, -128 -; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 128 +; CHECK-NEXT: addi sp, s0, -144 +; CHECK-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 144 ; CHECK-NEXT: ret %overaligned = alloca i32, align 64 %vla.addr = alloca i32, i64 %n diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir index f89638e4b58621..2d77024872004e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir @@ -34,20 +34,20 @@ ; ; RV64-LABEL: rvv_stack_align8: ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -32 - ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill + ; RV64-NEXT: addi sp, sp, -48 + ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: addi a0, sp, 24 + ; RV64-NEXT: addi a0, sp, 32 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: addi a2, sp, 8 ; RV64-NEXT: call extern@plt ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 - ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 32 + ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-NEXT: addi sp, sp, 48 ; RV64-NEXT: ret %a = alloca , align 8 %b = alloca i64 @@ -56,8 +56,6 @@ ret void } - ; FIXME: The alloca is not correctly aligned to 16 bytes. - define void @rvv_stack_align16() #0 { ; RV32-LABEL: rvv_stack_align16: ; RV32: # %bb.0: @@ -79,20 +77,20 @@ ; ; RV64-LABEL: rvv_stack_align16: ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -32 - ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill + ; RV64-NEXT: addi sp, sp, -48 + ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: addi a0, sp, 24 + ; RV64-NEXT: addi a0, sp, 32 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: addi a2, sp, 8 ; RV64-NEXT: call extern@plt ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 - ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 32 + ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-NEXT: addi sp, sp, 48 ; RV64-NEXT: ret %a = alloca , align 16 %b = alloca i64 @@ -101,47 +99,45 @@ ret void } - ; FIXME: The alloca is not correctly aligned to 32 bytes. - define void @rvv_stack_align32() #0 { ; RV32-LABEL: rvv_stack_align32: ; RV32: # %bb.0: - ; RV32-NEXT: addi sp, sp, -32 - ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill - ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill - ; RV32-NEXT: addi s0, sp, 32 + ; RV32-NEXT: addi sp, sp, -48 + ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill + ; RV32-NEXT: addi s0, sp, 48 ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 + ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: andi sp, sp, -32 - ; RV32-NEXT: addi a0, sp, 24 + ; RV32-NEXT: addi a0, sp, 32 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: addi a2, sp, 8 ; RV32-NEXT: call extern@plt - ; RV32-NEXT: addi sp, s0, -32 - ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload - ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload - ; RV32-NEXT: addi sp, sp, 32 + ; RV32-NEXT: addi sp, s0, -48 + ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload + ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: rvv_stack_align32: ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -32 - ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill - ; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill - ; RV64-NEXT: addi s0, sp, 32 + ; RV64-NEXT: addi sp, sp, -48 + ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill + ; RV64-NEXT: addi s0, sp, 48 ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 + ; RV64-NEXT: slli a0, a0, 2 ; RV64-NEXT: sub sp, sp, a0 ; RV64-NEXT: andi sp, sp, -32 - ; RV64-NEXT: addi a0, sp, 16 + ; RV64-NEXT: addi a0, sp, 32 ; RV64-NEXT: addi a1, sp, 8 ; RV64-NEXT: mv a2, sp ; RV64-NEXT: call extern@plt - ; RV64-NEXT: addi sp, s0, -32 - ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload - ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 32 + ; RV64-NEXT: addi sp, s0, -48 + ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload + ; RV64-NEXT: addi sp, sp, 48 ; RV64-NEXT: ret %a = alloca , align 32 %b = alloca i64 diff --git a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll index d51a18df799d85..44ad494eed0c7e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll +++ b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll @@ -8,11 +8,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV64 -; FIXME: The stack is assumed and required to be aligned to 16 bytes, but we -; only ensure an 8-byte alignment for the size of the section containing RVV -; objects. After establishing sp, on zve64x the stack is only 8-byte aligned. -; This is wrong in and of itself, but we can see that this also has the effect -; that the 16-byte-aligned object at the bottom of the stack is misaligned. +; FIXME: We are over-aligning the stack on V, wasting stack space. define i64* @scalar_stack_align16() nounwind { ; RV32-LABEL: scalar_stack_align16: @@ -20,11 +16,13 @@ define i64* @scalar_stack_align16() nounwind { ; RV32-NEXT: addi sp, sp, -32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: call extern@plt ; RV32-NEXT: mv a0, sp ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: add sp, sp, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -32,17 +30,19 @@ define i64* @scalar_stack_align16() nounwind { ; ; RV64-LABEL: scalar_stack_align16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: call extern@plt ; RV64-NEXT: mv a0, sp ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add sp, sp, a1 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 32 ; RV64-NEXT: ret %a = alloca %c = alloca i64, align 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir index 4342cb71a859f9..bab91ed3e7e5f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir @@ -4,17 +4,19 @@ # Stack layout of this program # |--------------------------| -- <-- Incoming SP # | a7 (Vaarg) | -# | ------------------------ | -- <-- New SP + vlenb + 56 +# | ------------------------ | -- <-- New SP + vlenb + 72 # | a6 (Vaarg) | -# | ------------------------ | -- <-- New SP + vlenb + 48 +# | ------------------------ | -- <-- New SP + vlenb + 64 # | ra (Callee-saved reg) | -# | ------------------------ | -- <-- New SP + vlenb + 40 +# | ------------------------ | -- <-- New SP + vlenb + 56 # | s0 (Callee-saved reg) | -# | ------------------------ | -- <-- New SP + vlenb + 32 +# | ------------------------ | -- <-- New SP + vlenb + 48 # | s1 (Callee-saved reg) | -# | ------------------------ | -- <-- New SP + vlenb + 24 +# | ------------------------ | -- <-- New SP + vlenb + 40 +# | 8 bytes of padding | +# | ------------------------ | -- <-- New SP + vlenb # | v8 (RVV objects) | -# | ------------------------ | -- <-- New SP + 24 +# | ------------------------ | -- <-- New SP + 32 # | buf1 | # |--------------------------| -- <-- New SP + 16 # | Stack ID 5 | @@ -122,7 +124,7 @@ body: | ; CHECK-NEXT: - { id: 0, name: buf1, type: default, offset: -48, size: 1, alignment: 8, ; CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, ; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - ; CHECK-NEXT: - { id: 1, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + ; CHECK-NEXT: - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true, ; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8, @@ -144,27 +146,30 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $x11, $x14, $x16, $x17, $x1, $x8, $x9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -64 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 64 - ; CHECK-NEXT: SD killed $x1, $x2, 40 :: (store (s64) into %stack.2) - ; CHECK-NEXT: SD killed $x8, $x2, 32 :: (store (s64) into %stack.3) - ; CHECK-NEXT: SD killed $x9, $x2, 24 :: (store (s64) into %stack.4) + ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -80 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 80 + ; CHECK-NEXT: SD killed $x1, $x2, 56 :: (store (s64) into %stack.2) + ; CHECK-NEXT: SD killed $x8, $x2, 48 :: (store (s64) into %stack.3) + ; CHECK-NEXT: SD killed $x9, $x2, 40 :: (store (s64) into %stack.4) ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -24 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x8, -32 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x9, -40 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB + ; CHECK-NEXT: $x10 = frame-setup SLLI killed $x10, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: renamable $x8 = COPY $x14 ; CHECK-NEXT: renamable $x9 = COPY $x11 ; CHECK-NEXT: $x10 = PseudoReadVLENB + ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 - ; CHECK-NEXT: SD killed renamable $x17, killed $x10, 56 :: (store (s64)) + ; CHECK-NEXT: SD killed renamable $x17, killed $x10, 72 :: (store (s64)) ; CHECK-NEXT: $x10 = PseudoReadVLENB + ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 - ; CHECK-NEXT: SD killed renamable $x16, killed $x10, 48 :: (store (s64) into %fixed-stack.1, align 16) + ; CHECK-NEXT: SD killed renamable $x16, killed $x10, 64 :: (store (s64) into %fixed-stack.1, align 16) ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVMV_V_I_MF8 0, 2, 3 /* e8 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $x10 = ADDI $x2, 24 + ; CHECK-NEXT: $x10 = ADDI $x2, 32 ; CHECK-NEXT: PseudoVSPILL_M1 killed renamable $v8, killed $x10 :: (store unknown-size into %stack.1, align 8) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.while.cond: @@ -179,7 +184,7 @@ body: | ; CHECK-NEXT: liveins: $x8, $x9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $x10 = ADDI $x2, 24 + ; CHECK-NEXT: $x10 = ADDI $x2, 32 ; CHECK-NEXT: renamable $v8 = PseudoVRELOAD_M1 killed $x10 :: (load unknown-size from %stack.1, align 8) ; CHECK-NEXT: PseudoVSE8_V_MF8 killed renamable $v8, renamable $x8, 2, 3 /* e8 */, implicit $vl, implicit $vtype :: (store (s16) into %ir.0, align 1) ; CHECK-NEXT: $x10 = COPY renamable $x9 diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir index 202612ca147f19..3a5f7f7a863d36 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir @@ -40,7 +40,7 @@ ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: sw a0, 32(sp) # 4-byte Folded Spill - ; CHECK-NEXT: addi a0, sp, 56 + ; CHECK-NEXT: addi a0, sp, 64 ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi sp, s0, -80 ; CHECK-NEXT: lw ra, 76(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir index 994857ba81cb33..16441e223e0996 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir @@ -9,19 +9,19 @@ define void @foo() #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: addi sp, sp, -32 - ; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill + ; CHECK-NEXT: addi sp, sp, -48 + ; CHECK-NEXT: sd s9, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: sd a0, 16(sp) # 8-byte Folded Spill - ; CHECK-NEXT: addi a0, sp, 24 + ; CHECK-NEXT: addi a0, sp, 32 ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 - ; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload - ; CHECK-NEXT: addi sp, sp, 32 + ; CHECK-NEXT: ld s9, 40(sp) # 8-byte Folded Reload + ; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret entry: ret void From 08c9fb8447108fd436bd342a573181c624485608 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 19 May 2022 14:47:40 +0100 Subject: [PATCH 314/908] [RISCV] Ensure the entire stack is aligned to the RVV stack alignment This patch fixes another bug in the RVV frame lowering. While some frame objects with non-default stack IDs (such scalable-vector alloca instructions) are considered in the target-independent max alignment calculations, others (for example, during calling-convention lowering) are not. This means we'd occasionally align the base of the stack to only 16 bytes, with no way to ensure that the RVV section contained within that is aligned to anything higher. Reviewed By: StephenFan Differential Revision: https://reviews.llvm.org/D125973 --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 5 +++ .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 44 ++++++++++--------- llvm/test/CodeGen/RISCV/rvv/calling-conv.ll | 14 +++--- .../rvv/fixed-vectors-emergency-slot.mir | 4 +- .../rvv/fixed-vectors-insert-subvector.ll | 7 +-- .../CodeGen/RISCV/rvv/no-reserved-frame.ll | 2 +- .../test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll | 16 ++++--- 7 files changed, 53 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 5b789a97e75f25..f77443386eb9d8 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -962,6 +962,11 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( RVFI->setRVVStackSize(RVVStackSize); RVFI->setRVVStackAlign(RVVStackAlign); + // Ensure the entire stack is aligned to at least the RVV requirement: some + // scalable-vector object alignments are not considered by the + // target-independent code. + MFI.ensureMaxAlignment(RVVStackAlign); + const RISCVInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // estimateStackSize has been observed to under-estimate the final stack diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll index 458e5b93bac7b2..c0cdad9a59b079 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -290,9 +290,12 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV32-NEXT: .cfi_def_cfa_offset 144 ; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: addi s0, sp, 144 +; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: andi sp, sp, -128 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a3, a2, a1 @@ -414,10 +419,7 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV32-NEXT: addi a1, a1, 128 ; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: call ext3@plt -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, s0, -144 ; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 144 ; RV32-NEXT: ret @@ -428,10 +430,13 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV64-NEXT: .cfi_def_cfa_offset 144 ; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: addi s0, sp, 144 +; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 48 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a3, a2, a1 @@ -486,10 +491,7 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV64-NEXT: addi a1, a1, 128 ; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: call ext3@plt -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 48 -; RV64-NEXT: mul a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, s0, -144 ; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 144 ; RV64-NEXT: ret @@ -524,9 +526,12 @@ define fastcc @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack( @caller_scalable_vector_split_indirect( @caller_scalable_vector_split_indirect( @caller_scalable_vector_split_indirect( @caller_scalable_vector_split_indirect(* %psv, * %out ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -64 ; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, sp, 128 @@ -578,9 +581,7 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, * %out ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: vs8r.v v16, (a1) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -64 ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret %sv = load <2 x i64>, <2 x i64>* %psv diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll index 3c2fda4a7f8045..bedcfd6667f0c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -18,7 +18,7 @@ define signext i32 @foo(i32 signext %aa) #0 { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: andi sp, sp, -8 +; CHECK-NEXT: andi sp, sp, -16 ; CHECK-NEXT: mv s1, sp ; CHECK-NEXT: lw t0, 44(s1) ; CHECK-NEXT: lw a2, 40(s1) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll index d7be0ea154e915..1c517580bad9e6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll @@ -26,28 +26,30 @@ define @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: addi s0, sp, 80 +; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 4 ; CHECK-NEXT: sub sp, sp, t0 -; CHECK-NEXT: addi t0, sp, 64 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: mv s1, sp +; CHECK-NEXT: addi t0, s1, 64 ; CHECK-NEXT: sd t0, 8(sp) ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 3 -; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: add t0, s1, t0 ; CHECK-NEXT: addi t0, t0, 64 ; CHECK-NEXT: sd t0, 0(sp) -; CHECK-NEXT: addi t0, sp, 64 +; CHECK-NEXT: addi t0, s1, 64 ; CHECK-NEXT: vs8r.v v8, (t0) ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 3 -; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: add t0, s1, t0 ; CHECK-NEXT: addi t0, t0, 64 ; CHECK-NEXT: vs8r.v v8, (t0) ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: call bar@plt -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -80 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret From be84f91f87a787ebfa8a346e936a5c88bc87b047 Mon Sep 17 00:00:00 2001 From: Lian Wang Date: Tue, 24 May 2022 07:12:31 +0000 Subject: [PATCH 315/908] [LegalizeTypes][VP] Fix OpNo in WidenVecOp_VP_SCATTER Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D126276 --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6deba8135b2e47..2a280a86aea1e4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5945,7 +5945,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) { Mask = GetWidenedMask(Mask, WideEC); WideMemVT = EVT::getVectorVT(*DAG.getContext(), VPSC->getMemoryVT().getScalarType(), WideEC); - } else if (OpNo == 4) { + } else if (OpNo == 3) { // Just widen the index. It's allowed to have extra elements. Index = GetWidenedVector(Index); } else From 81c648a3d909cf0c1c1c635309f35b93d07307d1 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 18 May 2022 17:09:36 +0200 Subject: [PATCH 316/908] [LoopUnroll] Freeze tripcount rather than condition This is a followup to D125754. We introduce two branches, one before the unrolled loop and one before the epilogue (and similar for the prologue case). The previous patch only froze the condition on the first branch. Rather than independently freezing the second condition, this patch instead freezes TripCount and bases BECount on it. These are the two quantities involved in the conditions, and this ensures that both work on a consistent, non-poisonous trip count. Differential Revision: https://reviews.llvm.org/D125896 --- .../Transforms/Utils/LoopUnrollRuntime.cpp | 28 +- .../runtime-loop-at-most-two-exits.ll | 10 +- .../runtime-loop-multiexit-dom-verify.ll | 38 +- .../LoopUnroll/runtime-loop-multiple-exits.ll | 702 +++++++++--------- .../LoopUnroll/runtime-multiexit-heuristic.ll | 86 +-- 5 files changed, 442 insertions(+), 422 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 71c15f99b76576..37a013c8d2180e 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -738,11 +738,28 @@ bool llvm::UnrollRuntimeLoopRemainder( // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast(PreHeader->getTerminator()); + IRBuilder<> B(PreHeaderBR); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); - Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), - PreHeaderBR); - IRBuilder<> B(PreHeaderBR); + Value *BECount; + // If there are other exits before the latch, that may cause the latch exit + // branch to never be executed, and the latch exit count may be poison. + // In this case, freeze the TripCount and base BECount on the frozen + // TripCount. We will introduce two branches using these values, and it's + // important that they see a consistent value (which would not be guaranteed + // if were frozen independently.) + if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) && + !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) { + TripCount = B.CreateFreeze(TripCount); + BECount = + B.CreateAdd(TripCount, ConstantInt::get(TripCount->getType(), -1)); + } else { + // If we don't need to freeze, use SCEVExpander for BECount as well, to + // allow slightly better value reuse. + BECount = + Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); + } + Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count); Value *BranchVal = @@ -752,11 +769,6 @@ bool llvm::UnrollRuntimeLoopRemainder( B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; - // Freeze the condition if there are other exits before the latch that may - // cause the latch exit branch to never be executed. - if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) && - !isGuaranteedNotToBeUndefOrPoison(BranchVal, AC, PreHeaderBR, DT)) - BranchVal = B.CreateFreeze(BranchVal); // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll index 8bf36c8120762b..2a30167222073c 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll @@ -5,13 +5,13 @@ define i32 @test(i32* nocapture %a, i64 %n) { ; ENABLED-LABEL: @test( ; ENABLED-NEXT: entry: -; ENABLED-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 -; ENABLED-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 -; ENABLED-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; ENABLED-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; ENABLED-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]] +; ENABLED-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1 +; ENABLED-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; ENABLED-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7 ; ENABLED-NEXT: br i1 [[TMP2]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] ; ENABLED: entry.new: -; ENABLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; ENABLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]] ; ENABLED-NEXT: br label [[HEADER:%.*]] ; ENABLED: header: ; ENABLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ] diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll index ecd01a56056c78..99e2c15e1a66ae 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll @@ -252,11 +252,11 @@ define void @test4(i16 %c3) { ; CHECK-NEXT: preheader: ; CHECK-NEXT: [[C1:%.*]] = zext i32 undef to i64 ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[C1]], i64 1) -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[UMAX]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[UMAX]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 3 ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = freeze i1 [[LCMP_MOD]] -; CHECK-NEXT: br i1 [[TMP1]], label [[HEADER_PROL_PREHEADER:%.*]], label [[HEADER_PROL_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_PROL_PREHEADER:%.*]], label [[HEADER_PROL_LOOPEXIT:%.*]] ; CHECK: header.prol.preheader: ; CHECK-NEXT: br label [[HEADER_PROL:%.*]] ; CHECK: header.prol: @@ -279,7 +279,7 @@ define void @test4(i16 %c3) { ; CHECK-NEXT: br label [[HEADER_PROL_LOOPEXIT]] ; CHECK: header.prol.loopexit: ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[PREHEADER:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 3 ; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT:%.*]], label [[PREHEADER_NEW:%.*]] ; CHECK: preheader.new: ; CHECK-NEXT: br label [[HEADER:%.*]] @@ -375,7 +375,11 @@ define void @test5() { ; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i32 undef, 79 ; CHECK-NEXT: br i1 [[TMP]], label [[OUTERLATCHEXIT:%.*]], label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: br i1 false, label [[OUTERH_PROL_PREHEADER:%.*]], label [[OUTERH_PROL_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 undef +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP0]], 3 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[OUTERH_PROL_PREHEADER:%.*]], label [[OUTERH_PROL_LOOPEXIT:%.*]] ; CHECK: outerH.prol.preheader: ; CHECK-NEXT: br label [[OUTERH_PROL:%.*]] ; CHECK: outerH.prol: @@ -418,14 +422,15 @@ define void @test5() { ; CHECK-NEXT: [[TMP6_PROL]] = add i32 [[TMP4_PROL]], 1 ; CHECK-NEXT: [[TMP7_PROL:%.*]] = icmp sgt i32 [[TMP6_PROL]], 79 ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i32 [[PROL_ITER]], 1 -; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_NEXT]], 0 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_NEXT]], [[XTRAITER]] ; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[OUTERH_PROL]], label [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: outerH.prol.loopexit.unr-lcssa: ; CHECK-NEXT: [[TMP4_UNR_PH:%.*]] = phi i32 [ [[TMP6_PROL]], [[OUTERLATCH_PROL]] ] ; CHECK-NEXT: br label [[OUTERH_PROL_LOOPEXIT]] ; CHECK: outerH.prol.loopexit: ; CHECK-NEXT: [[TMP4_UNR:%.*]] = phi i32 [ undef, [[BB1]] ], [ [[TMP4_UNR_PH]], [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: br i1 false, label [[OUTERLATCHEXIT_LOOPEXIT:%.*]], label [[BB1_NEW:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[OUTERLATCHEXIT_LOOPEXIT:%.*]], label [[BB1_NEW:%.*]] ; CHECK: bb1.new: ; CHECK-NEXT: br label [[OUTERH:%.*]] ; CHECK: outerH: @@ -659,10 +664,11 @@ define void @test6(i64 %start) { ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[START]] ; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw i64 [[TMP3]], 1 -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = freeze i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP5]], 3 ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[LCMP_MOD]] -; CHECK-NEXT: br i1 [[TMP5]], label [[HEADER_PROL_PREHEADER:%.*]], label [[HEADER_PROL_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_PROL_PREHEADER:%.*]], label [[HEADER_PROL_LOOPEXIT:%.*]] ; CHECK: header.prol.preheader: ; CHECK-NEXT: br label [[HEADER_PROL:%.*]] ; CHECK: header.prol: @@ -673,7 +679,7 @@ define void @test6(i64 %start) { ; CHECK-NEXT: br i1 [[C1_PROL]], label [[LATCH_PROL]], label [[OTHEREXIT_LOOPEXIT1:%.*]] ; CHECK: latch.prol: ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nsw i64 [[INDVARS_IV_PROL]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], 616 +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], 616 ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] ; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP9:![0-9]+]] @@ -682,8 +688,8 @@ define void @test6(i64 %start) { ; CHECK-NEXT: br label [[HEADER_PROL_LOOPEXIT]] ; CHECK: header.prol.loopexit: ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP3]], 3 -; CHECK-NEXT: br i1 [[TMP7]], label [[LATCHEXIT:%.*]], label [[ENTRY_NEW:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP6]], 3 +; CHECK-NEXT: br i1 [[TMP8]], label [[LATCHEXIT:%.*]], label [[ENTRY_NEW:%.*]] ; CHECK: entry.new: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: @@ -708,8 +714,8 @@ define void @test6(i64 %start) { ; CHECK-NEXT: br i1 [[C1_3]], label [[LATCH_3]], label [[OTHEREXIT_LOOPEXIT]] ; CHECK: latch.3: ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nsw i64 [[INDVARS_IV_NEXT_2]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_3]], 616 -; CHECK-NEXT: br i1 [[TMP8]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_3]], 616 +; CHECK-NEXT: br i1 [[TMP9]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: latchexit.unr-lcssa: ; CHECK-NEXT: br label [[LATCHEXIT]] ; CHECK: latchexit: diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll index c16cf14ce8d73a..0750b3c435f3f8 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -11,13 +11,13 @@ define void @test1(i64 %trip, i1 %cond) { ; EPILOG-LABEL: @test1( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %trip +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %exit2.loopexit.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] @@ -130,13 +130,13 @@ define void @test1(i64 %trip, i1 %cond) { ; ; EPILOG-BLOCK-LABEL: @test1( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %trip +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %exit2.loopexit.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] @@ -187,11 +187,11 @@ define void @test1(i64 %trip, i1 %cond) { ; ; PROLOG-LABEL: @test1( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %trip, -1 -; PROLOG-NEXT: %xtraiter = and i64 %trip, 7 +; PROLOG-NEXT: %0 = freeze i64 %trip +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.preheader: ; PROLOG-NEXT: br label %loop_header.prol ; PROLOG: loop_header.prol: @@ -213,7 +213,7 @@ define void @test1(i64 %trip, i1 %cond) { ; PROLOG-NEXT: br label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.loopexit: ; PROLOG-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %2, label %exit2.loopexit, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %loop_header @@ -296,11 +296,11 @@ define void @test1(i64 %trip, i1 %cond) { ; ; PROLOG-BLOCK-LABEL: @test1( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %trip +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %loop_header.prol ; PROLOG-BLOCK: loop_header.prol: @@ -313,7 +313,7 @@ define void @test1(i64 %trip, i1 %cond) { ; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %exit2.loopexit, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %loop_header @@ -386,13 +386,13 @@ exit2.loopexit: define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-LABEL: @test2( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %n, -1 -; EPILOG-NEXT: %xtraiter = and i64 %n, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %n +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %for.end.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: ; EPILOG-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %for.body.7 ] @@ -528,13 +528,13 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; ; EPILOG-BLOCK-LABEL: @test2( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %n +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %for.end.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %for.body.1 ] @@ -597,11 +597,11 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; ; PROLOG-LABEL: @test2( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %n, -1 -; PROLOG-NEXT: %xtraiter = and i64 %n, 7 +; PROLOG-NEXT: %0 = freeze i64 %n +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG: header.prol.preheader: ; PROLOG-NEXT: br label %header.prol ; PROLOG: header.prol: @@ -630,7 +630,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %sum.0.lcssa.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %3 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %3 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %3, label %for.end, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %header @@ -729,11 +729,11 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; ; PROLOG-BLOCK-LABEL: @test2( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %n +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: @@ -748,7 +748,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %2, %for.body.prol ] ; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %for.body.prol ] ; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %2, %for.body.prol ] -; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %3, label %for.end, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %header @@ -824,13 +824,13 @@ for.exit2: define void @test3(i64 %trip, i64 %add) { ; EPILOG-LABEL: @test3( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %trip +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %exit2.loopexit.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] @@ -965,13 +965,13 @@ define void @test3(i64 %trip, i64 %add) { ; ; EPILOG-BLOCK-LABEL: @test3( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %trip +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %exit2.loopexit.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] @@ -1030,11 +1030,11 @@ define void @test3(i64 %trip, i64 %add) { ; ; PROLOG-LABEL: @test3( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %trip, -1 -; PROLOG-NEXT: %xtraiter = and i64 %trip, 7 +; PROLOG-NEXT: %0 = freeze i64 %trip +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.preheader: ; PROLOG-NEXT: br label %loop_header.prol ; PROLOG: loop_header.prol: @@ -1061,7 +1061,7 @@ define void @test3(i64 %trip, i64 %add) { ; PROLOG: loop_header.prol.loopexit: ; PROLOG-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %2, label %exit2.loopexit, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %loop_header @@ -1161,11 +1161,11 @@ define void @test3(i64 %trip, i64 %add) { ; ; PROLOG-BLOCK-LABEL: @test3( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %trip +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %loop_header.prol ; PROLOG-BLOCK: loop_header.prol: @@ -1180,7 +1180,7 @@ define void @test3(i64 %trip, i64 %add) { ; PROLOG-BLOCK: loop_header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] ; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %add, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %exit2.loopexit, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %loop_header @@ -1256,13 +1256,13 @@ exit2.loopexit: define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-LABEL: @hdr_latch_same_exit( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %n, -1 -; EPILOG-NEXT: %xtraiter = and i64 %n, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %n +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: ; EPILOG-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ] @@ -1401,13 +1401,13 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; ; EPILOG-BLOCK-LABEL: @hdr_latch_same_exit( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %n +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ] @@ -1474,11 +1474,11 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; ; PROLOG-LABEL: @hdr_latch_same_exit( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %n, -1 -; PROLOG-NEXT: %xtraiter = and i64 %n, 7 +; PROLOG-NEXT: %0 = freeze i64 %n +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG: header.prol.preheader: ; PROLOG-NEXT: br label %header.prol ; PROLOG: header.prol: @@ -1507,7 +1507,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %result.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %3 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %3 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %3, label %latchExit, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %header @@ -1609,11 +1609,11 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; ; PROLOG-BLOCK-LABEL: @hdr_latch_same_exit( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %n +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: @@ -1628,7 +1628,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %2, %latch.prol ] ; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %2, %latch.prol ] -; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %3, label %latchExit, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %header @@ -1704,13 +1704,13 @@ for.exit2: define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-LABEL: @otherblock_latch_same_exit( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %n, -1 -; EPILOG-NEXT: %xtraiter = and i64 %n, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %n +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: ; EPILOG-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ] @@ -1849,13 +1849,13 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; ; EPILOG-BLOCK-LABEL: @otherblock_latch_same_exit( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %n +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ] @@ -1922,11 +1922,11 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; ; PROLOG-LABEL: @otherblock_latch_same_exit( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %n, -1 -; PROLOG-NEXT: %xtraiter = and i64 %n, 7 +; PROLOG-NEXT: %0 = freeze i64 %n +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG: header.prol.preheader: ; PROLOG-NEXT: br label %header.prol ; PROLOG: header.prol: @@ -1955,7 +1955,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %result.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %3 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %3 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %3, label %latchExit, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %header @@ -2057,11 +2057,11 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; ; PROLOG-BLOCK-LABEL: @otherblock_latch_same_exit( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %n +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: @@ -2076,7 +2076,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %2, %latch.prol ] ; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %2, %latch.prol ] -; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %3, label %latchExit, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %header @@ -2153,13 +2153,13 @@ for.exit2: define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-LABEL: @otherblock_latch_same_exit2( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %n, -1 -; EPILOG-NEXT: %xtraiter = and i64 %n, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %n +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: ; EPILOG-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ] @@ -2298,13 +2298,13 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; ; EPILOG-BLOCK-LABEL: @otherblock_latch_same_exit2( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %n +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ] @@ -2371,11 +2371,11 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; ; PROLOG-LABEL: @otherblock_latch_same_exit2( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %n, -1 -; PROLOG-NEXT: %xtraiter = and i64 %n, 7 +; PROLOG-NEXT: %0 = freeze i64 %n +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG: header.prol.preheader: ; PROLOG-NEXT: br label %header.prol ; PROLOG: header.prol: @@ -2404,7 +2404,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %result.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %3 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %3 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %3, label %latchExit, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %header @@ -2506,11 +2506,11 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; ; PROLOG-BLOCK-LABEL: @otherblock_latch_same_exit2( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %n +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: @@ -2525,7 +2525,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %2, %latch.prol ] ; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %2, %latch.prol ] -; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %3, label %latchExit, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %header @@ -2603,13 +2603,13 @@ for.exit2: define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-LABEL: @otherblock_latch_same_exit3( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %n, -1 -; EPILOG-NEXT: %xtraiter = and i64 %n, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %n +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: ; EPILOG-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ] @@ -2748,13 +2748,13 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; ; EPILOG-BLOCK-LABEL: @otherblock_latch_same_exit3( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %n +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ] @@ -2821,11 +2821,11 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; ; PROLOG-LABEL: @otherblock_latch_same_exit3( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %n, -1 -; PROLOG-NEXT: %xtraiter = and i64 %n, 7 +; PROLOG-NEXT: %0 = freeze i64 %n +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG: header.prol.preheader: ; PROLOG-NEXT: br label %header.prol ; PROLOG: header.prol: @@ -2854,7 +2854,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %result.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %3 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %3 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %3, label %latchExit, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %header @@ -2956,11 +2956,11 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; ; PROLOG-BLOCK-LABEL: @otherblock_latch_same_exit3( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %n +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: @@ -2975,7 +2975,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %2, %latch.prol ] ; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %2, %latch.prol ] -; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %3, label %latchExit, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %header @@ -3097,13 +3097,13 @@ define void @unique_exit(i32 %N, i32 %M) { ; EPILOG-NEXT: preheader: ; EPILOG-NEXT: %M.shifted = shl i32 %M, 3 ; EPILOG-NEXT: %umax = call i32 @llvm.umax.i32(i32 %M.shifted, i32 1) -; EPILOG-NEXT: %0 = add i32 %umax, -1 -; EPILOG-NEXT: %xtraiter = and i32 %umax, 7 -; EPILOG-NEXT: %1 = icmp ult i32 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i32 %umax +; EPILOG-NEXT: %1 = add i32 %0, -1 +; EPILOG-NEXT: %xtraiter = and i32 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i32 %1, 7 ; EPILOG-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %preheader.new ; EPILOG: preheader.new: -; EPILOG-NEXT: %unroll_iter = sub i32 %umax, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i32 %0, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: ; EPILOG-NEXT: %i4 = phi i32 [ 0, %preheader.new ], [ %inc.7, %latch.7 ] @@ -3189,13 +3189,13 @@ define void @unique_exit(i32 %N, i32 %M) { ; EPILOG-BLOCK-NEXT: preheader: ; EPILOG-BLOCK-NEXT: %M.shifted = shl i32 %M, 3 ; EPILOG-BLOCK-NEXT: %umax = call i32 @llvm.umax.i32(i32 %M.shifted, i32 1) -; EPILOG-BLOCK-NEXT: %0 = add i32 %umax, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i32 %umax, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i32 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i32 %umax +; EPILOG-BLOCK-NEXT: %1 = add i32 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i32 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i32 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %preheader.new ; EPILOG-BLOCK: preheader.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i32 %umax, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i32 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %i4 = phi i32 [ 0, %preheader.new ], [ %inc.1, %latch.1 ] @@ -3243,11 +3243,11 @@ define void @unique_exit(i32 %N, i32 %M) { ; PROLOG-NEXT: preheader: ; PROLOG-NEXT: %M.shifted = shl i32 %M, 3 ; PROLOG-NEXT: %umax = call i32 @llvm.umax.i32(i32 %M.shifted, i32 1) -; PROLOG-NEXT: %0 = add i32 %umax, -1 -; PROLOG-NEXT: %xtraiter = and i32 %umax, 7 +; PROLOG-NEXT: %0 = freeze i32 %umax +; PROLOG-NEXT: %1 = add i32 %0, -1 +; PROLOG-NEXT: %xtraiter = and i32 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG: header.prol.preheader: ; PROLOG-NEXT: br label %header.prol ; PROLOG: header.prol: @@ -3268,7 +3268,7 @@ define void @unique_exit(i32 %N, i32 %M) { ; PROLOG: header.prol.loopexit: ; PROLOG-NEXT: %i4.unr = phi i32 [ 0, %preheader ], [ %i4.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %i2.ph.unr = phi i32 [ undef, %preheader ], [ %i2.ph.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i32 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i32 %1, 7 ; PROLOG-NEXT: br i1 %2, label %latchExit, label %preheader.new ; PROLOG: preheader.new: ; PROLOG-NEXT: br label %header @@ -3325,11 +3325,11 @@ define void @unique_exit(i32 %N, i32 %M) { ; PROLOG-BLOCK-NEXT: preheader: ; PROLOG-BLOCK-NEXT: %M.shifted = shl i32 %M, 3 ; PROLOG-BLOCK-NEXT: %umax = call i32 @llvm.umax.i32(i32 %M.shifted, i32 1) -; PROLOG-BLOCK-NEXT: %0 = add i32 %umax, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i32 %umax, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i32 %umax +; PROLOG-BLOCK-NEXT: %1 = add i32 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i32 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: @@ -3340,7 +3340,7 @@ define void @unique_exit(i32 %N, i32 %M) { ; PROLOG-BLOCK: header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %i4.unr = phi i32 [ 0, %preheader ], [ 1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %i2.ph.unr = phi i32 [ undef, %preheader ], [ -1, %latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i32 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i32 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %preheader.new ; PROLOG-BLOCK: preheader.new: ; PROLOG-BLOCK-NEXT: br label %header @@ -3392,13 +3392,13 @@ latchExit: ; preds = %header, %latch define i64 @test5(i64 %trip, i64 %add, i1 %cond) { ; EPILOG-LABEL: @test5( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %trip +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %latchexit.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] @@ -3543,13 +3543,13 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) { ; ; EPILOG-BLOCK-LABEL: @test5( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %trip +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %latchexit.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] @@ -3615,11 +3615,11 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) { ; ; PROLOG-LABEL: @test5( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %trip, -1 -; PROLOG-NEXT: %xtraiter = and i64 %trip, 7 +; PROLOG-NEXT: %0 = freeze i64 %trip +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.preheader: ; PROLOG-NEXT: br label %loop_header.prol ; PROLOG: loop_header.prol: @@ -3649,7 +3649,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) { ; PROLOG-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %sum.next.lcssa.unr = phi i64 [ undef, %entry ], [ %sum.next.lcssa.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %2, label %latchexit, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %loop_header @@ -3756,11 +3756,11 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) { ; ; PROLOG-BLOCK-LABEL: @test5( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %trip +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %loop_header.prol ; PROLOG-BLOCK: loop_header.prol: @@ -3776,7 +3776,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) { ; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] ; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %add, %loop_latch.prol ] ; PROLOG-BLOCK-NEXT: %sum.next.lcssa.unr = phi i64 [ undef, %entry ], [ %add, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %latchexit, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %loop_header @@ -3852,13 +3852,13 @@ latchexit: define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-LABEL: @test6( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %n, -1 -; EPILOG-NEXT: %xtraiter = and i64 %n, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %n +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %latch_exit.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: ; EPILOG-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ] @@ -3999,13 +3999,13 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; ; EPILOG-BLOCK-LABEL: @test6( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %n +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %latch_exit.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %n, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ] @@ -4073,11 +4073,11 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; ; PROLOG-LABEL: @test6( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %n, -1 -; PROLOG-NEXT: %xtraiter = and i64 %n, 7 +; PROLOG-NEXT: %0 = freeze i64 %n +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG: header.prol.preheader: ; PROLOG-NEXT: br label %header.prol ; PROLOG: header.prol: @@ -4106,7 +4106,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %sum.0.lcssa.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %2, label %latch_exit, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %header @@ -4210,11 +4210,11 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; ; PROLOG-BLOCK-LABEL: @test6( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %n, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %n, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %n +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: @@ -4229,7 +4229,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %load.prol, %latch.prol ] ; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %load.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %latch_exit, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %header @@ -4321,13 +4321,13 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG-NEXT: br i1 %i, label %loopexit2, label %preheader ; EPILOG: preheader: ; EPILOG-NEXT: %0 = add nsw i64 %sext, -1 -; EPILOG-NEXT: %1 = add nsw i64 %sext, -2 -; EPILOG-NEXT: %xtraiter = and i64 %0, 7 -; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 -; EPILOG-NEXT: %3 = freeze i1 %2 +; EPILOG-NEXT: %1 = freeze i64 %0 +; EPILOG-NEXT: %2 = add i64 %1, -1 +; EPILOG-NEXT: %xtraiter = and i64 %1, 7 +; EPILOG-NEXT: %3 = icmp ult i64 %2, 7 ; EPILOG-NEXT: br i1 %3, label %latchexit.unr-lcssa, label %preheader.new ; EPILOG: preheader.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %1, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: ; EPILOG-NEXT: %i6 = phi i64 [ 1, %preheader.new ], [ %add.7, %latch.7 ] @@ -4409,13 +4409,13 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG-BLOCK-NEXT: br i1 %i, label %loopexit2, label %preheader ; EPILOG-BLOCK: preheader: ; EPILOG-BLOCK-NEXT: %0 = add nsw i64 %sext, -1 -; EPILOG-BLOCK-NEXT: %1 = add nsw i64 %sext, -2 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 -; EPILOG-BLOCK-NEXT: %3 = freeze i1 %2 +; EPILOG-BLOCK-NEXT: %1 = freeze i64 %0 +; EPILOG-BLOCK-NEXT: %2 = add i64 %1, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %1, 1 +; EPILOG-BLOCK-NEXT: %3 = icmp ult i64 %2, 1 ; EPILOG-BLOCK-NEXT: br i1 %3, label %latchexit.unr-lcssa, label %preheader.new ; EPILOG-BLOCK: preheader.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %1, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %i6 = phi i64 [ 1, %preheader.new ], [ %add.1, %latch.1 ] @@ -4460,11 +4460,11 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-NEXT: br i1 %i, label %loopexit2, label %preheader ; PROLOG: preheader: ; PROLOG-NEXT: %0 = add nsw i64 %sext, -1 -; PROLOG-NEXT: %1 = add nsw i64 %sext, -2 -; PROLOG-NEXT: %xtraiter = and i64 %0, 7 +; PROLOG-NEXT: %1 = freeze i64 %0 +; PROLOG-NEXT: %2 = add i64 %1, -1 +; PROLOG-NEXT: %xtraiter = and i64 %1, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %2 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %2, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG: header.prol.preheader: ; PROLOG-NEXT: br label %header.prol ; PROLOG: header.prol: @@ -4482,7 +4482,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-NEXT: br label %header.prol.loopexit ; PROLOG: header.prol.loopexit: ; PROLOG-NEXT: %i6.unr = phi i64 [ 1, %preheader ], [ %i6.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %3 = icmp ult i64 %1, 7 +; PROLOG-NEXT: %3 = icmp ult i64 %2, 7 ; PROLOG-NEXT: br i1 %3, label %latchexit, label %preheader.new ; PROLOG: preheader.new: ; PROLOG-NEXT: br label %header @@ -4538,11 +4538,11 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-BLOCK-NEXT: br i1 %i, label %loopexit2, label %preheader ; PROLOG-BLOCK: preheader: ; PROLOG-BLOCK-NEXT: %0 = add nsw i64 %sext, -1 -; PROLOG-BLOCK-NEXT: %1 = add nsw i64 %sext, -2 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; PROLOG-BLOCK-NEXT: %1 = freeze i64 %0 +; PROLOG-BLOCK-NEXT: %2 = add i64 %1, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %1, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %2 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %2, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: @@ -4551,7 +4551,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %i6.unr = phi i64 [ 1, %preheader ], [ 2, %latch.prol ] -; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %1, 1 +; PROLOG-BLOCK-NEXT: %3 = icmp ult i64 %2, 1 ; PROLOG-BLOCK-NEXT: br i1 %3, label %latchexit, label %preheader.new ; PROLOG-BLOCK: preheader.new: ; PROLOG-BLOCK-NEXT: br label %header @@ -4940,7 +4940,6 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-LABEL: @test9( ; EPILOG-NEXT: bb: ; EPILOG-NEXT: %0 = add i32 %n, -1 -; EPILOG-NEXT: %1 = add i32 %n, -2 ; EPILOG-NEXT: br label %outerloopHdr ; EPILOG: outerloopHdr: ; EPILOG-NEXT: %trip = add i32 %n, -1 @@ -4948,12 +4947,13 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-NEXT: br i1 %outercnd, label %preheader, label %outerLatch ; EPILOG: preheader: ; EPILOG-NEXT: %i4 = zext i32 0 to i64 -; EPILOG-NEXT: %xtraiter = and i32 %0, 7 -; EPILOG-NEXT: %2 = icmp ult i32 %1, 7 -; EPILOG-NEXT: %3 = freeze i1 %2 +; EPILOG-NEXT: %1 = freeze i32 %0 +; EPILOG-NEXT: %2 = add i32 %1, -1 +; EPILOG-NEXT: %xtraiter = and i32 %1, 7 +; EPILOG-NEXT: %3 = icmp ult i32 %2, 7 ; EPILOG-NEXT: br i1 %3, label %outerLatch.loopexit.unr-lcssa, label %preheader.new ; EPILOG: preheader.new: -; EPILOG-NEXT: %unroll_iter = sub i32 %0, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i32 %1, %xtraiter ; EPILOG-NEXT: br label %header ; EPILOG: header: ; EPILOG-NEXT: %phi = phi i64 [ %i4, %preheader.new ], [ %iv.next.7, %latch.7 ] @@ -5033,19 +5033,19 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-BLOCK-LABEL: @test9( ; EPILOG-BLOCK-NEXT: bb: ; EPILOG-BLOCK-NEXT: %0 = add i32 %n, -1 -; EPILOG-BLOCK-NEXT: %1 = add i32 %n, -2 ; EPILOG-BLOCK-NEXT: br label %outerloopHdr ; EPILOG-BLOCK: outerloopHdr: ; EPILOG-BLOCK-NEXT: %trip = add i32 %n, -1 ; EPILOG-BLOCK-NEXT: %outercnd = icmp slt i32 0, %trip ; EPILOG-BLOCK-NEXT: br i1 %outercnd, label %preheader, label %outerLatch ; EPILOG-BLOCK: preheader: -; EPILOG-BLOCK-NEXT: %xtraiter = and i32 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = icmp ult i32 %1, 1 -; EPILOG-BLOCK-NEXT: %3 = freeze i1 %2 +; EPILOG-BLOCK-NEXT: %1 = freeze i32 %0 +; EPILOG-BLOCK-NEXT: %2 = add i32 %1, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i32 %1, 1 +; EPILOG-BLOCK-NEXT: %3 = icmp ult i32 %2, 1 ; EPILOG-BLOCK-NEXT: br i1 %3, label %outerLatch.loopexit.unr-lcssa, label %preheader.new ; EPILOG-BLOCK: preheader.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i32 %0, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i32 %1, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header ; EPILOG-BLOCK: header: ; EPILOG-BLOCK-NEXT: %phi = phi i64 [ 0, %preheader.new ], [ %iv.next.1, %latch.1 ] @@ -5094,12 +5094,13 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-BLOCK-NEXT: %outercnd.1 = icmp slt i32 0, %trip.1 ; EPILOG-BLOCK-NEXT: br i1 %outercnd.1, label %preheader.1, label %outerLatch.1 ; EPILOG-BLOCK: preheader.1: -; EPILOG-BLOCK-NEXT: %xtraiter.1 = and i32 %0, 1 -; EPILOG-BLOCK-NEXT: %4 = icmp ult i32 %1, 1 -; EPILOG-BLOCK-NEXT: %5 = freeze i1 %4 -; EPILOG-BLOCK-NEXT: br i1 %5, label %outerLatch.loopexit.unr-lcssa.1, label %preheader.new.1 +; EPILOG-BLOCK-NEXT: %4 = freeze i32 %0 +; EPILOG-BLOCK-NEXT: %5 = add i32 %4, -1 +; EPILOG-BLOCK-NEXT: %xtraiter.1 = and i32 %4, 1 +; EPILOG-BLOCK-NEXT: %6 = icmp ult i32 %5, 1 +; EPILOG-BLOCK-NEXT: br i1 %6, label %outerLatch.loopexit.unr-lcssa.1, label %preheader.new.1 ; EPILOG-BLOCK: preheader.new.1: -; EPILOG-BLOCK-NEXT: %unroll_iter.1 = sub i32 %0, %xtraiter.1 +; EPILOG-BLOCK-NEXT: %unroll_iter.1 = sub i32 %4, %xtraiter.1 ; EPILOG-BLOCK-NEXT: br label %header.1 ; EPILOG-BLOCK: header.1: ; EPILOG-BLOCK-NEXT: %phi.1 = phi i64 [ 0, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ] @@ -5133,7 +5134,6 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-LABEL: @test9( ; PROLOG-NEXT: bb: ; PROLOG-NEXT: %0 = add i32 %n, -1 -; PROLOG-NEXT: %1 = add i32 %n, -2 ; PROLOG-NEXT: br label %outerloopHdr ; PROLOG: outerloopHdr: ; PROLOG-NEXT: %trip = add i32 %n, -1 @@ -5141,10 +5141,11 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-NEXT: br i1 %outercnd, label %preheader, label %outerLatch ; PROLOG: preheader: ; PROLOG-NEXT: %i4 = zext i32 0 to i64 -; PROLOG-NEXT: %xtraiter = and i32 %0, 7 +; PROLOG-NEXT: %1 = freeze i32 %0 +; PROLOG-NEXT: %2 = add i32 %1, -1 +; PROLOG-NEXT: %xtraiter = and i32 %1, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 -; PROLOG-NEXT: %2 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %2, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG: header.prol.preheader: ; PROLOG-NEXT: br label %header.prol ; PROLOG: header.prol: @@ -5164,7 +5165,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-NEXT: br label %header.prol.loopexit ; PROLOG: header.prol.loopexit: ; PROLOG-NEXT: %phi.unr = phi i64 [ %i4, %preheader ], [ %phi.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %3 = icmp ult i32 %1, 7 +; PROLOG-NEXT: %3 = icmp ult i32 %2, 7 ; PROLOG-NEXT: br i1 %3, label %outerLatch.loopexit, label %preheader.new ; PROLOG: preheader.new: ; PROLOG-NEXT: br label %header @@ -5218,17 +5219,17 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK-LABEL: @test9( ; PROLOG-BLOCK-NEXT: bb: ; PROLOG-BLOCK-NEXT: %0 = add i32 %n, -1 -; PROLOG-BLOCK-NEXT: %1 = add i32 %n, -2 ; PROLOG-BLOCK-NEXT: br label %outerloopHdr ; PROLOG-BLOCK: outerloopHdr: ; PROLOG-BLOCK-NEXT: %trip = add i32 %n, -1 ; PROLOG-BLOCK-NEXT: %outercnd = icmp slt i32 0, %trip ; PROLOG-BLOCK-NEXT: br i1 %outercnd, label %preheader, label %outerLatch ; PROLOG-BLOCK: preheader: -; PROLOG-BLOCK-NEXT: %xtraiter = and i32 %0, 1 +; PROLOG-BLOCK-NEXT: %1 = freeze i32 %0 +; PROLOG-BLOCK-NEXT: %2 = add i32 %1, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i32 %1, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %2 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %2, label %header.prol.preheader, label %header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: @@ -5237,7 +5238,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %phi.unr = phi i64 [ 0, %preheader ], [ 1, %latch.prol ] -; PROLOG-BLOCK-NEXT: %3 = icmp ult i32 %1, 1 +; PROLOG-BLOCK-NEXT: %3 = icmp ult i32 %2, 1 ; PROLOG-BLOCK-NEXT: br i1 %3, label %outerLatch.loopexit, label %preheader.new ; PROLOG-BLOCK: preheader.new: ; PROLOG-BLOCK-NEXT: br label %header @@ -5278,10 +5279,11 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK-NEXT: %outercnd.1 = icmp slt i32 0, %trip.1 ; PROLOG-BLOCK-NEXT: br i1 %outercnd.1, label %preheader.1, label %outerLatch.1 ; PROLOG-BLOCK: preheader.1: -; PROLOG-BLOCK-NEXT: %xtraiter.1 = and i32 %0, 1 +; PROLOG-BLOCK-NEXT: %4 = freeze i32 %0 +; PROLOG-BLOCK-NEXT: %5 = add i32 %4, -1 +; PROLOG-BLOCK-NEXT: %xtraiter.1 = and i32 %4, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod.1 = icmp ne i32 %xtraiter.1, 0 -; PROLOG-BLOCK-NEXT: %4 = freeze i1 %lcmp.mod.1 -; PROLOG-BLOCK-NEXT: br i1 %4, label %header.prol.preheader.1, label %header.prol.loopexit.1 +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod.1, label %header.prol.preheader.1, label %header.prol.loopexit.1 ; PROLOG-BLOCK: header.prol.preheader.1: ; PROLOG-BLOCK-NEXT: br label %header.prol.1 ; PROLOG-BLOCK: header.prol.1: @@ -5290,8 +5292,8 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit.1 ; PROLOG-BLOCK: header.prol.loopexit.1: ; PROLOG-BLOCK-NEXT: %phi.unr.1 = phi i64 [ 0, %preheader.1 ], [ 1, %latch.prol.1 ] -; PROLOG-BLOCK-NEXT: %5 = icmp ult i32 %1, 1 -; PROLOG-BLOCK-NEXT: br i1 %5, label %outerLatch.loopexit.1, label %preheader.new.1 +; PROLOG-BLOCK-NEXT: %6 = icmp ult i32 %5, 1 +; PROLOG-BLOCK-NEXT: br i1 %6, label %outerLatch.loopexit.1, label %preheader.new.1 ; PROLOG-BLOCK: preheader.new.1: ; PROLOG-BLOCK-NEXT: br label %header.1 ; PROLOG-BLOCK: header.1: @@ -5352,13 +5354,13 @@ declare void @bar() define void @test10(i64 %trip, i64 %trip2) { ; EPILOG-LABEL: @test10( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %trip +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %exit2.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] @@ -5447,13 +5449,13 @@ define void @test10(i64 %trip, i64 %trip2) { ; ; EPILOG-BLOCK-LABEL: @test10( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %trip +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %exit2.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] @@ -5496,11 +5498,11 @@ define void @test10(i64 %trip, i64 %trip2) { ; ; PROLOG-LABEL: @test10( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %trip, -1 -; PROLOG-NEXT: %xtraiter = and i64 %trip, 7 +; PROLOG-NEXT: %0 = freeze i64 %trip +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.preheader: ; PROLOG-NEXT: br label %loop_header.prol ; PROLOG: loop_header.prol: @@ -5520,7 +5522,7 @@ define void @test10(i64 %trip, i64 %trip2) { ; PROLOG-NEXT: br label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.loopexit: ; PROLOG-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %2, label %exit2, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %loop_header @@ -5581,11 +5583,11 @@ define void @test10(i64 %trip, i64 %trip2) { ; ; PROLOG-BLOCK-LABEL: @test10( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %trip +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %loop_header.prol ; PROLOG-BLOCK: loop_header.prol: @@ -5596,7 +5598,7 @@ define void @test10(i64 %trip, i64 %trip2) { ; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %exit2, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %loop_header @@ -5649,13 +5651,13 @@ exit2: define void @test11(i64 %trip, i1 %cond) { ; EPILOG-LABEL: @test11( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %trip +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %exit2.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] @@ -5735,13 +5737,13 @@ define void @test11(i64 %trip, i1 %cond) { ; ; EPILOG-BLOCK-LABEL: @test11( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %trip +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %exit2.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] @@ -5779,11 +5781,11 @@ define void @test11(i64 %trip, i1 %cond) { ; ; PROLOG-LABEL: @test11( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %trip, -1 -; PROLOG-NEXT: %xtraiter = and i64 %trip, 7 +; PROLOG-NEXT: %0 = freeze i64 %trip +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.preheader: ; PROLOG-NEXT: br label %loop_header.prol ; PROLOG: loop_header.prol: @@ -5802,7 +5804,7 @@ define void @test11(i64 %trip, i1 %cond) { ; PROLOG-NEXT: br label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.loopexit: ; PROLOG-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %2, label %exit2, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %loop_header @@ -5855,11 +5857,11 @@ define void @test11(i64 %trip, i1 %cond) { ; ; PROLOG-BLOCK-LABEL: @test11( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %trip +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %loop_header.prol ; PROLOG-BLOCK: loop_header.prol: @@ -5869,7 +5871,7 @@ define void @test11(i64 %trip, i1 %cond) { ; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %exit2, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %loop_header @@ -5919,13 +5921,13 @@ exit2: define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-LABEL: @test12( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %trip +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] @@ -6030,13 +6032,13 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; ; EPILOG-BLOCK-LABEL: @test12( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %trip +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] @@ -6085,11 +6087,11 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; ; PROLOG-LABEL: @test12( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %trip, -1 -; PROLOG-NEXT: %xtraiter = and i64 %trip, 7 +; PROLOG-NEXT: %0 = freeze i64 %trip +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.preheader: ; PROLOG-NEXT: br label %loop_header.prol ; PROLOG: loop_header.prol: @@ -6111,7 +6113,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; PROLOG-NEXT: br label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.loopexit: ; PROLOG-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %2, label %exit1, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %loop_header @@ -6186,11 +6188,11 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; ; PROLOG-BLOCK-LABEL: @test12( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %trip +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %loop_header.prol ; PROLOG-BLOCK: loop_header.prol: @@ -6203,7 +6205,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %exit1, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %loop_header @@ -6259,13 +6261,13 @@ declare i1 @unknown_cond() define void @test13(i64 %trip, i64 %trip2) { ; EPILOG-LABEL: @test13( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %trip +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] @@ -6379,13 +6381,13 @@ define void @test13(i64 %trip, i64 %trip2) { ; ; EPILOG-BLOCK-LABEL: @test13( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %trip +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] @@ -6437,11 +6439,11 @@ define void @test13(i64 %trip, i64 %trip2) { ; ; PROLOG-LABEL: @test13( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %trip, -1 -; PROLOG-NEXT: %xtraiter = and i64 %trip, 7 +; PROLOG-NEXT: %0 = freeze i64 %trip +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.preheader: ; PROLOG-NEXT: br label %loop_header.prol ; PROLOG: loop_header.prol: @@ -6464,7 +6466,7 @@ define void @test13(i64 %trip, i64 %trip2) { ; PROLOG-NEXT: br label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.loopexit: ; PROLOG-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %2, label %exit1, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %loop_header @@ -6547,11 +6549,11 @@ define void @test13(i64 %trip, i64 %trip2) { ; ; PROLOG-BLOCK-LABEL: @test13( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %trip +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %loop_header.prol ; PROLOG-BLOCK: loop_header.prol: @@ -6565,7 +6567,7 @@ define void @test13(i64 %trip, i64 %trip2) { ; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %exit1, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %loop_header @@ -6622,13 +6624,13 @@ exit1: define void @test14(i64 %trip, i1 %cond) { ; EPILOG-LABEL: @test14( ; EPILOG-NEXT: entry: -; EPILOG-NEXT: %0 = add i64 %trip, -1 -; EPILOG-NEXT: %xtraiter = and i64 %trip, 7 -; EPILOG-NEXT: %1 = icmp ult i64 %0, 7 -; EPILOG-NEXT: %2 = freeze i1 %1 +; EPILOG-NEXT: %0 = freeze i64 %trip +; EPILOG-NEXT: %1 = add i64 %0, -1 +; EPILOG-NEXT: %xtraiter = and i64 %0, 7 +; EPILOG-NEXT: %2 = icmp ult i64 %1, 7 ; EPILOG-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG: entry.new: -; EPILOG-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-NEXT: br label %loop_header ; EPILOG: loop_header: ; EPILOG-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ] @@ -6733,13 +6735,13 @@ define void @test14(i64 %trip, i1 %cond) { ; ; EPILOG-BLOCK-LABEL: @test14( ; EPILOG-BLOCK-NEXT: entry: -; EPILOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 -; EPILOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = freeze i1 %1 +; EPILOG-BLOCK-NEXT: %0 = freeze i64 %trip +; EPILOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 +; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; EPILOG-BLOCK-NEXT: br i1 %2, label %exit1.unr-lcssa, label %entry.new ; EPILOG-BLOCK: entry.new: -; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %trip, %xtraiter +; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %loop_header ; EPILOG-BLOCK: loop_header: ; EPILOG-BLOCK-NEXT: %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ] @@ -6786,11 +6788,11 @@ define void @test14(i64 %trip, i1 %cond) { ; ; PROLOG-LABEL: @test14( ; PROLOG-NEXT: entry: -; PROLOG-NEXT: %0 = add i64 %trip, -1 -; PROLOG-NEXT: %xtraiter = and i64 %trip, 7 +; PROLOG-NEXT: %0 = freeze i64 %trip +; PROLOG-NEXT: %1 = add i64 %0, -1 +; PROLOG-NEXT: %xtraiter = and i64 %0, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.preheader: ; PROLOG-NEXT: br label %loop_header.prol ; PROLOG: loop_header.prol: @@ -6812,7 +6814,7 @@ define void @test14(i64 %trip, i1 %cond) { ; PROLOG-NEXT: br label %loop_header.prol.loopexit ; PROLOG: loop_header.prol.loopexit: ; PROLOG-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %loop_header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i64 %0, 7 +; PROLOG-NEXT: %2 = icmp ult i64 %1, 7 ; PROLOG-NEXT: br i1 %2, label %exit1, label %entry.new ; PROLOG: entry.new: ; PROLOG-NEXT: br label %loop_header @@ -6887,11 +6889,11 @@ define void @test14(i64 %trip, i1 %cond) { ; ; PROLOG-BLOCK-LABEL: @test14( ; PROLOG-BLOCK-NEXT: entry: -; PROLOG-BLOCK-NEXT: %0 = add i64 %trip, -1 -; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %trip, 1 +; PROLOG-BLOCK-NEXT: %0 = freeze i64 %trip +; PROLOG-BLOCK-NEXT: %1 = add i64 %0, -1 +; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: %1 = freeze i1 %lcmp.mod -; PROLOG-BLOCK-NEXT: br i1 %1, label %loop_header.prol.preheader, label %loop_header.prol.loopexit +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.prol.preheader, label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %loop_header.prol ; PROLOG-BLOCK: loop_header.prol: @@ -6904,7 +6906,7 @@ define void @test14(i64 %trip, i1 %cond) { ; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 +; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %exit1, label %entry.new ; PROLOG-BLOCK: entry.new: ; PROLOG-BLOCK-NEXT: br label %loop_header diff --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll index fc6673e8b3fe55..8c3ff19435e962 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll @@ -15,13 +15,13 @@ define i32 @test1(i32* nocapture %a, i64 %n) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[N_FR:%.*]] = freeze i64 [[N:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N_FR]], -1 -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N_FR]], 7 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; CHECK-NEXT: br i1 [[TMP1]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7 +; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] ; CHECK: entry.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[N_FR]], -8 +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], -8 ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] @@ -29,75 +29,75 @@ define i32 @test1(i32* nocapture %a, i64 %n) { ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ] ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]] ; CHECK: for.exiting_block: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N_FR]], 42 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[SUM_02]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]] ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]] ; CHECK: for.exiting_block.1: -; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N_FR]], 42 +; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]] ; CHECK: latch.1: ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP3]], [[ADD]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]] ; CHECK: for.exiting_block.2: -; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N_FR]], 42 +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]] ; CHECK: latch.2: ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP4]], [[ADD_1]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]] ; CHECK: for.exiting_block.3: -; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N_FR]], 42 +; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]] ; CHECK: latch.3: ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP5]], [[ADD_2]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]] ; CHECK: for.exiting_block.4: -; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N_FR]], 42 +; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]] ; CHECK: latch.4: ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP6]], [[ADD_3]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]] ; CHECK: for.exiting_block.5: -; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N_FR]], 42 +; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]] ; CHECK: latch.5: ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP7]], [[ADD_4]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]] ; CHECK: for.exiting_block.6: -; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N_FR]], 42 +; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]] ; CHECK: latch.6: ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP8]], [[ADD_5]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]] ; CHECK: for.exiting_block.7: -; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N_FR]], 42 +; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]] ; CHECK: latch.7: ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 -; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP9]], [[ADD_6]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8 ; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8 ; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]] @@ -118,12 +118,12 @@ define i32 @test1(i32* nocapture %a, i64 %n) { ; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ], [ 0, [[HEADER_EPIL_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_EPIL:%.*]] ; CHECK: for.exiting_block.epil: -; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N_FR]], 42 +; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]] ; CHECK: latch.epil: ; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 -; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP10]], [[SUM_02_EPIL]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 +; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1 ; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 ; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] @@ -170,13 +170,13 @@ define i32 @test1(i32* nocapture %a, i64 %n) { ; ; ENABLED-LABEL: @test1( ; ENABLED-NEXT: entry: -; ENABLED-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 -; ENABLED-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 -; ENABLED-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; ENABLED-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; ENABLED-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]] +; ENABLED-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1 +; ENABLED-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; ENABLED-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7 ; ENABLED-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] ; ENABLED: entry.new: -; ENABLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; ENABLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]] ; ENABLED-NEXT: br label [[HEADER:%.*]] ; ENABLED: header: ; ENABLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] @@ -391,13 +391,13 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; ; ENABLED-LABEL: @test2( ; ENABLED-NEXT: entry: -; ENABLED-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 -; ENABLED-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 -; ENABLED-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; ENABLED-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; ENABLED-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]] +; ENABLED-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1 +; ENABLED-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; ENABLED-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7 ; ENABLED-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] ; ENABLED: entry.new: -; ENABLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; ENABLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]] ; ENABLED-NEXT: br label [[HEADER:%.*]] ; ENABLED: header: ; ENABLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] From bb8e02325ffc96d4a3a21aa9eefabb8bd205b214 Mon Sep 17 00:00:00 2001 From: Markus Lavin Date: Tue, 24 May 2022 09:42:07 +0200 Subject: [PATCH 317/908] llvm-reduce: improve basic-blocks removal pass When the single branch target of a block has been removed try updating it to target a block that is kept (by scanning forward in the sequence) instead of replacing the branch with a return instruction. Doing so reduces the risk of breaking loop structures meaning that when the loop is 'interesting' these reductions should have more blocks eliminated. Differential Revision: https://reviews.llvm.org/D125766 --- .../tools/llvm-reduce/remove-bbs-sequence.ll | 30 +++++++++++++++++++ .../tools/llvm-reduce/remove-bbs-sequence.py | 15 ++++++++++ .../llvm-reduce/deltas/ReduceBasicBlocks.cpp | 13 ++++++++ 3 files changed, 58 insertions(+) create mode 100644 llvm/test/tools/llvm-reduce/remove-bbs-sequence.ll create mode 100755 llvm/test/tools/llvm-reduce/remove-bbs-sequence.py diff --git a/llvm/test/tools/llvm-reduce/remove-bbs-sequence.ll b/llvm/test/tools/llvm-reduce/remove-bbs-sequence.ll new file mode 100644 index 00000000000000..0b2fdb5bd8bded --- /dev/null +++ b/llvm/test/tools/llvm-reduce/remove-bbs-sequence.ll @@ -0,0 +1,30 @@ +; RUN: llvm-reduce --delta-passes=basic-blocks --test %python --test-arg %p/remove-bbs-sequence.py %s -o %t +; RUN: cat %t | FileCheck %s + +; The interestingness test is that the CFG contains a loop. Verify that the +; unnecessary bb2 and bb3 are removed while still maintaining a loop. + +define void @main() { + bb0: + br label %bb1 + bb1: + br label %bb2 + bb2: + br label %bb3 + bb3: + %phi = phi i32 [ undef, %bb2 ] + br label %bb4 + bb4: + br label %bb1 +} + +; CHECK:define void @main() { +; CHECK-NEXT: bb0: +; CHECK-NEXT: br label %bb1 +; CHECK-EMPTY: +; CHECK-NEXT: bb1: +; CHECK-NEXT: br label %bb4 +; CHECK-EMPTY: +; CHECK-NEXT: bb4: +; CHECK-NEXT: br label %bb1 +; CHECK-NEXT:} diff --git a/llvm/test/tools/llvm-reduce/remove-bbs-sequence.py b/llvm/test/tools/llvm-reduce/remove-bbs-sequence.py new file mode 100755 index 00000000000000..f38d501d137003 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/remove-bbs-sequence.py @@ -0,0 +1,15 @@ +import subprocess +import sys + +opt = subprocess.run( [ 'opt', '-passes=print','-disable-output', sys.argv[1]], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) + +stdout = opt.stdout.decode() + +pattern = 'Loop at depth 1 containing' + +if (pattern in opt.stderr.decode()): + print('This is interesting!') + sys.exit(0) +else: + print('This is NOT interesting!') + sys.exit(1) diff --git a/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp b/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp index a9b9db2d09ee7e..2b319efb3868aa 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp @@ -47,6 +47,19 @@ static void replaceBranchTerminator(BasicBlock &BB, Term->eraseFromParent(); if (ChunkSuccessors.empty()) { + // Scan forward in BB list to try find a block that is kept. + Function &F = *BB.getParent(); + Function::iterator FI = BB.getIterator(); + FI++; + while (FI != F.end()) { + auto &FIB = *FI; + if (BBsToKeep.count(&FIB) && !isa(FIB.begin())) { + BranchInst::Create(&FIB, &BB); + return; + } + FI++; + } + // If that fails then resort to replacing with a ret. auto *FnRetTy = BB.getParent()->getReturnType(); ReturnInst::Create(BB.getContext(), FnRetTy->isVoidTy() ? nullptr : UndefValue::get(FnRetTy), From 973c7e0654b21c6ae7093b4ea0362c6096945c38 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 10:04:24 +0200 Subject: [PATCH 318/908] [InstCombine] Use different icmp pattern in test (NFC) Use an and/or of icmp pattern that produces different code depending on whether it is part of a logical or bitwise and/or. --- .../Transforms/InstCombine/and-or-icmps.ll | 552 ++++++++++++------ 1 file changed, 376 insertions(+), 176 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 7f0792da52f63e..698816e1d87a2e 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -1277,517 +1277,717 @@ define i1 @is_ascii_alphabetic_inverted(i32 %char) { ret i1 %logical } -define i1 @bitwise_and_bitwise_and_icmps(i8 %x, i8 %y) { +define i1 @bitwise_and_bitwise_and_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_bitwise_and_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[C1]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = and i1 %c1, %c2 %and2 = and i1 %and1, %c3 ret i1 %and2 } -define i1 @bitwise_and_bitwise_and_icmps_comm1(i8 %x, i8 %y) { +define i1 @bitwise_and_bitwise_and_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_bitwise_and_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[C1]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = and i1 %c1, %c2 %and2 = and i1 %c3, %and1 ret i1 %and2 } -define i1 @bitwise_and_bitwise_and_icmps_comm2(i8 %x, i8 %y) { +define i1 @bitwise_and_bitwise_and_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_bitwise_and_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP3]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = and i1 %c2, %c1 %and2 = and i1 %and1, %c3 ret i1 %and2 } -define i1 @bitwise_and_bitwise_and_icmps_comm3(i8 %x, i8 %y) { +define i1 @bitwise_and_bitwise_and_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_bitwise_and_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP3]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = and i1 %c2, %c1 %and2 = and i1 %c3, %and1 ret i1 %and2 } -define i1 @bitwise_and_logical_and_icmps(i8 %x, i8 %y) { +define i1 @bitwise_and_logical_and_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_logical_and_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 ; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false ; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C3]] ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = select i1 %c1, i1 %c2, i1 false %and2 = and i1 %and1, %c3 ret i1 %and2 } -define i1 @bitwise_and_logical_and_icmps_comm1(i8 %x, i8 %y) { +define i1 @bitwise_and_logical_and_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_logical_and_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 ; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false ; CHECK-NEXT: [[AND2:%.*]] = and i1 [[C3]], [[AND1]] ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = select i1 %c1, i1 %c2, i1 false %and2 = and i1 %c3, %and1 ret i1 %and2 } -define i1 @bitwise_and_logical_and_icmps_comm2(i8 %x, i8 %y) { +define i1 @bitwise_and_logical_and_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_logical_and_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 ; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C2]], i1 [[C1]], i1 false ; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C3]] ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = select i1 %c2, i1 %c1, i1 false %and2 = and i1 %and1, %c3 ret i1 %and2 } -define i1 @bitwise_and_logical_and_icmps_comm3(i8 %x, i8 %y) { +define i1 @bitwise_and_logical_and_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_logical_and_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 ; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C2]], i1 [[C1]], i1 false ; CHECK-NEXT: [[AND2:%.*]] = and i1 [[C3]], [[AND1]] ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = select i1 %c2, i1 %c1, i1 false %and2 = and i1 %c3, %and1 ret i1 %and2 } -define i1 @logical_and_bitwise_and_icmps(i8 %x, i8 %y) { +define i1 @logical_and_bitwise_and_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_and_bitwise_and_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[C1]], [[TMP1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 +; CHECK-NEXT: [[AND1:%.*]] = and i1 [[C1]], [[C2]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 [[C3]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = and i1 %c1, %c2 %and2 = select i1 %and1, i1 %c3, i1 false ret i1 %and2 } -define i1 @logical_and_bitwise_and_icmps_comm1(i8 %x, i8 %y) { +define i1 @logical_and_bitwise_and_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_and_bitwise_and_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 ; CHECK-NEXT: [[AND1:%.*]] = and i1 [[C1]], [[C2]] ; CHECK-NEXT: [[AND2:%.*]] = select i1 [[C3]], i1 [[AND1]], i1 false ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = and i1 %c1, %c2 %and2 = select i1 %c3, i1 %and1, i1 false ret i1 %and2 } -define i1 @logical_and_bitwise_and_icmps_comm2(i8 %x, i8 %y) { +define i1 @logical_and_bitwise_and_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_and_bitwise_and_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 +; CHECK-NEXT: [[AND1:%.*]] = and i1 [[C2]], [[C1]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 [[C3]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = and i1 %c2, %c1 %and2 = select i1 %and1, i1 %c3, i1 false ret i1 %and2 } -define i1 @logical_and_bitwise_and_icmps_comm3(i8 %x, i8 %y) { +define i1 @logical_and_bitwise_and_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_and_bitwise_and_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 ; CHECK-NEXT: [[AND1:%.*]] = and i1 [[C2]], [[C1]] ; CHECK-NEXT: [[AND2:%.*]] = select i1 [[C3]], i1 [[AND1]], i1 false ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = and i1 %c2, %c1 %and2 = select i1 %c3, i1 %and1, i1 false ret i1 %and2 } -define i1 @logical_and_logical_and_icmps(i8 %x, i8 %y) { +define i1 @logical_and_logical_and_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_and_logical_and_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 ; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false ; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 [[C3]], i1 false ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = select i1 %c1, i1 %c2, i1 false %and2 = select i1 %and1, i1 %c3, i1 false ret i1 %and2 } -define i1 @logical_and_logical_and_icmps_comm1(i8 %x, i8 %y) { +define i1 @logical_and_logical_and_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_and_logical_and_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C3]], i1 [[C1]], i1 false -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[TMP1]], [[C2]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[TMP1]], i1 [[C2]], i1 false ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = select i1 %c1, i1 %c2, i1 false %and2 = select i1 %c3, i1 %and1, i1 false ret i1 %and2 } -define i1 @logical_and_logical_and_icmps_comm2(i8 %x, i8 %y) { +define i1 @logical_and_logical_and_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_and_logical_and_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 ; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C2]], i1 [[C1]], i1 false -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C3]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 [[C3]], i1 false ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = select i1 %c2, i1 %c1, i1 false %and2 = select i1 %and1, i1 %c3, i1 false ret i1 %and2 } -define i1 @logical_and_logical_and_icmps_comm3(i8 %x, i8 %y) { +define i1 @logical_and_logical_and_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_and_logical_and_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: [[AND2:%.*]] = select i1 [[TMP1]], i1 [[C1]], i1 false +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[TMP3]], i1 [[C1]], i1 false ; CHECK-NEXT: ret i1 [[AND2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp slt i8 %x, 1 - %c3 = icmp sgt i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp ne i8 %x.m1, 0 + %c3 = icmp ne i8 %x.m2, 0 %and1 = select i1 %c2, i1 %c1, i1 false %and2 = select i1 %c3, i1 %and1, i1 false ret i1 %and2 } -define i1 @bitwise_or_bitwise_or_icmps(i8 %x, i8 %y) { +define i1 @bitwise_or_bitwise_or_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_bitwise_or_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[C1]], [[TMP1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[C1]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = or i1 %c1, %c2 %or2 = or i1 %or1, %c3 ret i1 %or2 } -define i1 @bitwise_or_bitwise_or_icmps_comm1(i8 %x, i8 %y) { +define i1 @bitwise_or_bitwise_or_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_bitwise_or_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[C1]], [[TMP1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[C1]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = or i1 %c1, %c2 %or2 = or i1 %c3, %or1 ret i1 %or2 } -define i1 @bitwise_or_bitwise_or_icmps_comm2(i8 %x, i8 %y) { +define i1 @bitwise_or_bitwise_or_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_bitwise_or_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = or i1 %c2, %c1 %or2 = or i1 %or1, %c3 ret i1 %or2 } -define i1 @bitwise_or_bitwise_or_icmps_comm3(i8 %x, i8 %y) { +define i1 @bitwise_or_bitwise_or_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_bitwise_or_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[C1]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = or i1 %c2, %c1 %or2 = or i1 %c3, %or1 ret i1 %or2 } -define i1 @bitwise_or_logical_or_icmps(i8 %x, i8 %y) { +define i1 @bitwise_or_logical_or_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_logical_or_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 ; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] ; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C3]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = select i1 %c1, i1 true, i1 %c2 %or2 = or i1 %or1, %c3 ret i1 %or2 } -define i1 @bitwise_or_logical_or_icmps_comm1(i8 %x, i8 %y) { +define i1 @bitwise_or_logical_or_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_logical_or_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 ; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] ; CHECK-NEXT: [[OR2:%.*]] = or i1 [[C3]], [[OR1]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = select i1 %c1, i1 true, i1 %c2 %or2 = or i1 %c3, %or1 ret i1 %or2 } -define i1 @bitwise_or_logical_or_icmps_comm2(i8 %x, i8 %y) { +define i1 @bitwise_or_logical_or_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_logical_or_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 ; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C2]], i1 true, i1 [[C1]] ; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C3]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = select i1 %c2, i1 true, i1 %c1 %or2 = or i1 %or1, %c3 ret i1 %or2 } -define i1 @bitwise_or_logical_or_icmps_comm3(i8 %x, i8 %y) { +define i1 @bitwise_or_logical_or_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_logical_or_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 ; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C2]], i1 true, i1 [[C1]] ; CHECK-NEXT: [[OR2:%.*]] = or i1 [[C3]], [[OR1]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = select i1 %c2, i1 true, i1 %c1 %or2 = or i1 %c3, %or1 ret i1 %or2 } -define i1 @logical_or_bitwise_or_icmps(i8 %x, i8 %y) { +define i1 @logical_or_bitwise_or_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_or_bitwise_or_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[C1]], [[TMP1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 +; CHECK-NEXT: [[OR1:%.*]] = or i1 [[C1]], [[C2]] +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[OR1]], i1 true, i1 [[C3]] +; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = or i1 %c1, %c2 %or2 = select i1 %or1, i1 true, i1 %c3 ret i1 %or2 } -define i1 @logical_or_bitwise_or_icmps_comm1(i8 %x, i8 %y) { +define i1 @logical_or_bitwise_or_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_or_bitwise_or_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 ; CHECK-NEXT: [[OR1:%.*]] = or i1 [[C1]], [[C2]] ; CHECK-NEXT: [[OR2:%.*]] = select i1 [[C3]], i1 true, i1 [[OR1]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = or i1 %c1, %c2 %or2 = select i1 %c3, i1 true, i1 %or1 ret i1 %or2 } -define i1 @logical_or_bitwise_or_icmps_comm2(i8 %x, i8 %y) { +define i1 @logical_or_bitwise_or_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_or_bitwise_or_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[C1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 +; CHECK-NEXT: [[OR1:%.*]] = or i1 [[C2]], [[C1]] +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[OR1]], i1 true, i1 [[C3]] +; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = or i1 %c2, %c1 %or2 = select i1 %or1, i1 true, i1 %c3 ret i1 %or2 } -define i1 @logical_or_bitwise_or_icmps_comm3(i8 %x, i8 %y) { +define i1 @logical_or_bitwise_or_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_or_bitwise_or_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 ; CHECK-NEXT: [[OR1:%.*]] = or i1 [[C2]], [[C1]] ; CHECK-NEXT: [[OR2:%.*]] = select i1 [[C3]], i1 true, i1 [[OR1]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = or i1 %c2, %c1 %or2 = select i1 %c3, i1 true, i1 %or1 ret i1 %or2 } -define i1 @logical_or_logical_or_icmps(i8 %x, i8 %y) { +define i1 @logical_or_logical_or_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_or_logical_or_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 ; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] ; CHECK-NEXT: [[OR2:%.*]] = select i1 [[OR1]], i1 true, i1 [[C3]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = select i1 %c1, i1 true, i1 %c2 %or2 = select i1 %or1, i1 true, i1 %c3 ret i1 %or2 } -define i1 @logical_or_logical_or_icmps_comm1(i8 %x, i8 %y) { +define i1 @logical_or_logical_or_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_or_logical_or_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C3]], i1 true, i1 [[C1]] -; CHECK-NEXT: [[OR2:%.*]] = or i1 [[TMP1]], [[C2]] +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[TMP1]], i1 true, i1 [[C2]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = select i1 %c1, i1 true, i1 %c2 %or2 = select i1 %c3, i1 true, i1 %or1 ret i1 %or2 } -define i1 @logical_or_logical_or_icmps_comm2(i8 %x, i8 %y) { +define i1 @logical_or_logical_or_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_or_logical_or_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 +; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 ; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C2]], i1 true, i1 [[C1]] -; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C3]] +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[OR1]], i1 true, i1 [[C3]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = select i1 %c2, i1 true, i1 %c1 %or2 = select i1 %or1, i1 true, i1 %c3 ret i1 %or2 } -define i1 @logical_or_logical_or_icmps_comm3(i8 %x, i8 %y) { +define i1 @logical_or_logical_or_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @logical_or_logical_or_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: [[OR2:%.*]] = select i1 [[TMP1]], i1 true, i1 [[C1]] +; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[TMP3]], i1 true, i1 [[C1]] ; CHECK-NEXT: ret i1 [[OR2]] ; %c1 = icmp eq i8 %y, 42 - %c2 = icmp sge i8 %x, 1 - %c3 = icmp sle i8 %x, -1 + %x.m1 = and i8 %x, 1 + %z.shift = shl i8 1, %z + %x.m2 = and i8 %x, %z.shift + %c2 = icmp eq i8 %x.m1, 0 + %c3 = icmp eq i8 %x.m2, 0 %or1 = select i1 %c2, i1 true, i1 %c1 %or2 = select i1 %c3, i1 true, i1 %or1 ret i1 %or2 From c0e06c7448a2dd16796e210a66de68f269363e31 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 10:07:00 +0200 Subject: [PATCH 319/908] [InstCombine] Handle logical and/or in recursive and/or of icmps fold The and/or of icmps fold is also applied in reassociated form. However, this currently only happens for bitwise and of bitwise and, but not for bitwise and of logical and (or other combinations, but this is the one being addressed here). We can do this for bitwise+logical combinations as well, but need to be a bit careful about which of the resulting ands are logical: https://alive2.llvm.org/ce/z/WYSjGh https://alive2.llvm.org/ce/z/guxYnz https://alive2.llvm.org/ce/z/S5SYxY https://alive2.llvm.org/ce/z/2rAWeW --- .../InstCombine/InstCombineAndOrXor.cpp | 76 ++++++++++---- .../Transforms/InstCombine/and-or-icmps.ll | 98 ++++++++----------- llvm/test/Transforms/InstCombine/and2.ll | 8 +- 3 files changed, 101 insertions(+), 81 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 107a57f409a92f..0ddbaa4b065990 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1989,21 +1989,39 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { // TODO: Make this recursive; it's a little tricky because an arbitrary // number of 'and' instructions might have to be created. - if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { + if (LHS && match(Op1, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op1); + // LHS & (X && Y) --> (LHS && X) && Y if (auto *Cmp = dyn_cast(X)) - if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true)) - return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); + if (Value *Res = + foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, IsLogical)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalAnd(Res, Y) + : Builder.CreateAnd(Res, Y)); + // LHS & (X && Y) --> X && (LHS & Y) if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true)) - return replaceInstUsesWith(I, Builder.CreateAnd(X, Res)); + if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalAnd(X, Res) + : Builder.CreateAnd(X, Res)); } - if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { + if (RHS && match(Op0, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op0); + // (X && Y) & RHS --> (X && RHS) && Y if (auto *Cmp = dyn_cast(X)) - if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true)) - return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); + if (Value *Res = + foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, IsLogical)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalAnd(Res, Y) + : Builder.CreateAnd(Res, Y)); + // (X && Y) & RHS --> X && (Y & RHS) if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true)) - return replaceInstUsesWith(I, Builder.CreateAnd(X, Res)); + if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalAnd(X, Res) + : Builder.CreateAnd(X, Res)); } } @@ -2788,21 +2806,39 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { // TODO: Make this recursive; it's a little tricky because an arbitrary // number of 'or' instructions might have to be created. Value *X, *Y; - if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { + if (LHS && match(Op1, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op1); + // LHS | (X || Y) --> (LHS || X) || Y if (auto *Cmp = dyn_cast(X)) - if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false)) - return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); + if (Value *Res = + foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, IsLogical)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalOr(Res, Y) + : Builder.CreateOr(Res, Y)); + // LHS | (X || Y) --> X || (LHS | Y) if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false)) - return replaceInstUsesWith(I, Builder.CreateOr(X, Res)); + if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalOr(X, Res) + : Builder.CreateOr(X, Res)); } - if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { + if (RHS && match(Op0, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op0); + // (X || Y) | RHS --> (X || RHS) || Y if (auto *Cmp = dyn_cast(X)) - if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false)) - return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); + if (Value *Res = + foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, IsLogical)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalOr(Res, Y) + : Builder.CreateOr(Res, Y)); + // (X || Y) | RHS --> X || (Y | RHS) if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false)) - return replaceInstUsesWith(I, Builder.CreateOr(X, Res)); + if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical + ? Builder.CreateLogicalOr(X, Res) + : Builder.CreateOr(X, Res)); } } diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 698816e1d87a2e..90f995faee327b 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -1364,14 +1364,12 @@ define i1 @bitwise_and_bitwise_and_icmps_comm3(i8 %x, i8 %y, i8 %z) { define i1 @bitwise_and_logical_and_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_logical_and_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 ; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] -; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] -; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C3]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[C1]], i1 [[TMP3]], i1 false +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 %x.m1 = and i8 %x, 1 @@ -1387,14 +1385,12 @@ define i1 @bitwise_and_logical_and_icmps(i8 %x, i8 %y, i8 %z) { define i1 @bitwise_and_logical_and_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_logical_and_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 ; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] -; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] -; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[C3]], [[AND1]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[C1]], i1 [[TMP3]], i1 false +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 %x.m1 = and i8 %x, 1 @@ -1410,14 +1406,13 @@ define i1 @bitwise_and_logical_and_icmps_comm1(i8 %x, i8 %y, i8 %z) { define i1 @bitwise_and_logical_and_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_logical_and_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 ; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] -; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] -; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C2]], i1 [[C1]], i1 false -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C3]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i8 [[Z_SHIFT]] +; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], [[X:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i1 [[C1]], i1 false +; CHECK-NEXT: ret i1 [[TMP5]] ; %c1 = icmp eq i8 %y, 42 %x.m1 = and i8 %x, 1 @@ -1433,14 +1428,12 @@ define i1 @bitwise_and_logical_and_icmps_comm2(i8 %x, i8 %y, i8 %z) { define i1 @bitwise_and_logical_and_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_and_logical_and_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 ; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] -; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] -; CHECK-NEXT: [[C2:%.*]] = icmp ne i8 [[X_M1]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp ne i8 [[X_M2]], 0 -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C2]], i1 [[C1]], i1 false -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[C3]], [[AND1]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i1 [[C1]], i1 false +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 %x.m1 = and i8 %x, 1 @@ -1722,14 +1715,12 @@ define i1 @bitwise_or_bitwise_or_icmps_comm3(i8 %x, i8 %y, i8 %z) { define i1 @bitwise_or_logical_or_icmps(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_logical_or_icmps( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 ; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] -; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] -; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 -; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] -; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C3]] -; CHECK-NEXT: ret i1 [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[C1]], i1 true, i1 [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 %x.m1 = and i8 %x, 1 @@ -1745,14 +1736,12 @@ define i1 @bitwise_or_logical_or_icmps(i8 %x, i8 %y, i8 %z) { define i1 @bitwise_or_logical_or_icmps_comm1(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_logical_or_icmps_comm1( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 ; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] -; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] -; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 -; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C1]], i1 true, i1 [[C2]] -; CHECK-NEXT: [[OR2:%.*]] = or i1 [[C3]], [[OR1]] -; CHECK-NEXT: ret i1 [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[C1]], i1 true, i1 [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 %x.m1 = and i8 %x, 1 @@ -1768,14 +1757,13 @@ define i1 @bitwise_or_logical_or_icmps_comm1(i8 %x, i8 %y, i8 %z) { define i1 @bitwise_or_logical_or_icmps_comm2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_logical_or_icmps_comm2( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 ; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] -; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] -; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 -; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C2]], i1 true, i1 [[C1]] -; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[C3]] -; CHECK-NEXT: ret i1 [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i8 [[Z_SHIFT]] +; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], [[X:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i1 true, i1 [[C1]] +; CHECK-NEXT: ret i1 [[TMP5]] ; %c1 = icmp eq i8 %y, 42 %x.m1 = and i8 %x, 1 @@ -1791,14 +1779,12 @@ define i1 @bitwise_or_logical_or_icmps_comm2(i8 %x, i8 %y, i8 %z) { define i1 @bitwise_or_logical_or_icmps_comm3(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @bitwise_or_logical_or_icmps_comm3( ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42 -; CHECK-NEXT: [[X_M1:%.*]] = and i8 [[X:%.*]], 1 ; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl i8 1, [[Z:%.*]] -; CHECK-NEXT: [[X_M2:%.*]] = and i8 [[Z_SHIFT]], [[X]] -; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[X_M1]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[X_M2]], 0 -; CHECK-NEXT: [[OR1:%.*]] = select i1 [[C2]], i1 true, i1 [[C1]] -; CHECK-NEXT: [[OR2:%.*]] = or i1 [[C3]], [[OR1]] -; CHECK-NEXT: ret i1 [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i1 true, i1 [[C1]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %c1 = icmp eq i8 %y, 42 %x.m1 = and i8 %x, 1 diff --git a/llvm/test/Transforms/InstCombine/and2.ll b/llvm/test/Transforms/InstCombine/and2.ll index d6488761bf13c8..9bb028185db5cb 100644 --- a/llvm/test/Transforms/InstCombine/and2.ll +++ b/llvm/test/Transforms/InstCombine/and2.ll @@ -46,11 +46,9 @@ define i1 @test7(i32 %i, i1 %b) { define i1 @test7_logical(i32 %i, i1 %b) { ; CHECK-LABEL: @test7_logical( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I:%.*]], 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[I]], -1 -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[CMP1]], i1 [[B:%.*]], i1 false -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[CMP2]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[I:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i1 [[B:%.*]], i1 false +; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp slt i32 %i, 1 %cmp2 = icmp sgt i32 %i, -1 From c3a24882903df5b25c011628162e82e47401f71f Mon Sep 17 00:00:00 2001 From: Aaron Jacobs Date: Tue, 24 May 2022 10:21:05 +0200 Subject: [PATCH 320/908] [libc++] type_traits: use __is_core_convertible in __invokable_r. This fixes incorrect handling of non-moveable types, adding tests for this case. See [issue 55346](https://github.com/llvm/llvm-project/issues/55346). The current implementation is based on is_convertible, which is [defined](https://timsong-cpp.github.io/cppwp/n4659/meta.rel#5) in terms of validity of the following function: ``` To test() { return declval(); } ``` But this doesn't work if To and From are both some non-moveable type, which the [definition](https://timsong-cpp.github.io/cppwp/n4659/conv#3) of implicit conversions says should work due to guaranteed copy elision: ``` To to = E; // E has type From ``` It is this latter definition that is used in the [definition](https://timsong-cpp.github.io/cppwp/n4659/function.objects#func.require-2) of INVOKE. Make __invokable_r use __is_core_convertible, which captures the ability to use guaranteed copy elision, making the definition correct for non-moveable types. Fixes llvm/llvm-project#55346. Reviewed By: #libc, philnik, EricWF Spies: EricWF, jloser, ldionne, philnik, libcxx-commits Differential Revision: https://reviews.llvm.org/D125300 --- libcxx/include/type_traits | 12 +- .../meta.rel/is_invocable_r.compile.pass.cpp | 103 ++++++++++++++++++ .../is_invocable_r_v.compile.pass.cpp | 103 ++++++++++++++++++ .../meta.rel/is_nothrow_invocable.pass.cpp | 18 +++ 4 files changed, 227 insertions(+), 9 deletions(-) create mode 100644 libcxx/test/std/utilities/meta/meta.rel/is_invocable_r.compile.pass.cpp create mode 100644 libcxx/test/std/utilities/meta/meta.rel/is_invocable_r_v.compile.pass.cpp diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index b56f5395dd2611..5908850cba5edd 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -2992,16 +2992,10 @@ struct __invokable_r // or incomplete array types as required by the standard. using _Result = decltype(__try_call<_Fp, _Args...>(0)); - using type = - typename conditional< + using type = typename conditional< _IsNotSame<_Result, __nat>::value, - typename conditional< - is_void<_Ret>::value, - true_type, - is_convertible<_Result, _Ret> - >::type, - false_type - >::type; + typename conditional< is_void<_Ret>::value, true_type, __is_core_convertible<_Result, _Ret> >::type, + false_type >::type; static const bool value = type::value; }; template diff --git a/libcxx/test/std/utilities/meta/meta.rel/is_invocable_r.compile.pass.cpp b/libcxx/test/std/utilities/meta/meta.rel/is_invocable_r.compile.pass.cpp new file mode 100644 index 00000000000000..778b24d99e91e9 --- /dev/null +++ b/libcxx/test/std/utilities/meta/meta.rel/is_invocable_r.compile.pass.cpp @@ -0,0 +1,103 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 + +// is_invocable_r + +#include + +// Non-invocable types + +static_assert(!std::is_invocable_r::value); +static_assert(!std::is_invocable_r::value); +static_assert(!std::is_invocable_r::value); +static_assert(!std::is_invocable_r::value); +static_assert(!std::is_invocable_r::value); + +// Result type matches + +template +T Return(); + +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); + +// void result type + +// Any actual return type should be useable with a result type of void. +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); + +// const- and volatile-qualified void should work too. +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); + +// Conversion of result type + +// It should be possible to use a result type to which the actual return type +// can be converted. +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); +static_assert(std::is_invocable_r)>::value); + +// But not a result type where the conversion doesn't work. +static_assert(!std::is_invocable_r)>::value); +static_assert(!std::is_invocable_r)>::value); + +// Non-moveable result type + +// Define a type that can't be move-constructed. +struct CantMove { + CantMove() = default; + CantMove(CantMove&&) = delete; +}; + +static_assert(!std::is_move_constructible_v); +static_assert(!std::is_copy_constructible_v); + +// Define functions that return that type. +CantMove MakeCantMove() { return {}; } +CantMove MakeCantMoveWithArg(int) { return {}; } + +// Assumption check: it should be possible to call one of those functions and +// use it to initialize a CantMove object. +CantMove cant_move = MakeCantMove(); + +// Therefore std::is_invocable_r should agree that they can be invoked to yield +// a CantMove. +static_assert(std::is_invocable_r::value); +static_assert(std::is_invocable_r::value); + +// Of course it still shouldn't be possible to call one of the functions and get +// back some other type. +static_assert(!std::is_invocable_r::value); + +// And the argument types should still be important. +static_assert(!std::is_invocable_r::value); +static_assert(!std::is_invocable_r::value); + +// is_invocable_r + +// The struct form should be available too, not just the _v variant. +static_assert(std::is_invocable_r)>::value); +static_assert(!std::is_invocable_r)>::value); diff --git a/libcxx/test/std/utilities/meta/meta.rel/is_invocable_r_v.compile.pass.cpp b/libcxx/test/std/utilities/meta/meta.rel/is_invocable_r_v.compile.pass.cpp new file mode 100644 index 00000000000000..32530f2c208652 --- /dev/null +++ b/libcxx/test/std/utilities/meta/meta.rel/is_invocable_r_v.compile.pass.cpp @@ -0,0 +1,103 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 + +// is_invocable_r + +#include + +// Non-invocable types + +static_assert(!std::is_invocable_r_v); +static_assert(!std::is_invocable_r_v); +static_assert(!std::is_invocable_r_v); +static_assert(!std::is_invocable_r_v); +static_assert(!std::is_invocable_r_v); + +// Result type matches + +template +T Return(); + +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); + +// void result type + +// Any actual return type should be useable with a result type of void. +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); + +// const- and volatile-qualified void should work too. +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); + +// Conversion of result type + +// It should be possible to use a result type to which the actual return type +// can be converted. +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); +static_assert(std::is_invocable_r_v)>); + +// But not a result type where the conversion doesn't work. +static_assert(!std::is_invocable_r_v)>); +static_assert(!std::is_invocable_r_v)>); + +// Non-moveable result type + +// Define a type that can't be move-constructed. +struct CantMove { + CantMove() = default; + CantMove(CantMove&&) = delete; +}; + +static_assert(!std::is_move_constructible_v); +static_assert(!std::is_copy_constructible_v); + +// Define functions that return that type. +CantMove MakeCantMove() { return {}; } +CantMove MakeCantMoveWithArg(int) { return {}; } + +// Assumption check: it should be possible to call one of those functions and +// use it to initialize a CantMove object. +CantMove cant_move = MakeCantMove(); + +// Therefore std::is_invocable_r should agree that they can be invoked to yield +// a CantMove. +static_assert(std::is_invocable_r_v); +static_assert(std::is_invocable_r_v); + +// Of course it still shouldn't be possible to call one of the functions and get +// back some other type. +static_assert(!std::is_invocable_r_v); + +// And the argument types should still be important. +static_assert(!std::is_invocable_r_v); +static_assert(!std::is_invocable_r_v); + +// is_invocable_r + +// The struct form should be available too, not just the _v variant. +static_assert(std::is_invocable_r)>::value); +static_assert(!std::is_invocable_r)>::value); diff --git a/libcxx/test/std/utilities/meta/meta.rel/is_nothrow_invocable.pass.cpp b/libcxx/test/std/utilities/meta/meta.rel/is_nothrow_invocable.pass.cpp index 63ecc202ca83d0..fa6048e869e182 100644 --- a/libcxx/test/std/utilities/meta/meta.rel/is_nothrow_invocable.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.rel/is_nothrow_invocable.pass.cpp @@ -185,6 +185,24 @@ int main(int, char**) { static_assert(std::is_nothrow_invocable_r::value, ""); static_assert(throws_invocable_r(), ""); } + { + // Check that it's fine if the result type is non-moveable. + struct CantMove { + CantMove() = default; + CantMove(CantMove&&) = delete; + }; + + static_assert(!std::is_move_constructible_v); + static_assert(!std::is_copy_constructible_v); + + using Fn = CantMove() noexcept; + + static_assert(std::is_nothrow_invocable_r::value); + static_assert(!std::is_nothrow_invocable_r::value); + + static_assert(std::is_nothrow_invocable_r_v); + static_assert(!std::is_nothrow_invocable_r_v); + } { // Check for is_nothrow_invocable_v using Fn = CallObject; From 7f7ef0ed617ec4a51f10b82390cafb08433d03b5 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 24 May 2022 09:16:18 +0100 Subject: [PATCH 321/908] [LegalizeTypes][NFC] Fix node name in assertion message This was probably copy/pasted from the MSCATTER widening. --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 2a280a86aea1e4..2a1f313b8493d2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5949,7 +5949,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) { // Just widen the index. It's allowed to have extra elements. Index = GetWidenedVector(Index); } else - llvm_unreachable("Can't widen this operand of mscatter"); + llvm_unreachable("Can't widen this operand of VP_SCATTER"); SDValue Ops[] = { VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask, From b07880454ba32e48a2e7f7be35516e0b76f60077 Mon Sep 17 00:00:00 2001 From: Laramie Leavitt Date: Tue, 24 May 2022 10:27:37 +0200 Subject: [PATCH 322/908] [libc++] Replace modulus operations in std::seed_seq::generate with conditional checks. Abseil benchmarks suggest that the conditional checks result in faster code (4-5x) as they are compiled into conditional move instructions (cmov on x86). Reviewed By: #libc, philnik, Mordante Spies: pengfei, Mordante, philnik, libcxx-commits Differential Revision: https://reviews.llvm.org/D125329 --- libcxx/benchmarks/random.bench.cpp | 33 +++++++++++++ libcxx/include/__random/seed_seq.h | 74 ++++++++++++++++++++---------- 2 files changed, 82 insertions(+), 25 deletions(-) create mode 100644 libcxx/benchmarks/random.bench.cpp diff --git a/libcxx/benchmarks/random.bench.cpp b/libcxx/benchmarks/random.bench.cpp new file mode 100644 index 00000000000000..e437849321da06 --- /dev/null +++ b/libcxx/benchmarks/random.bench.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +constexpr std::size_t MAX_BUFFER_LEN = 256; +constexpr std::size_t MAX_SEED_LEN = 16; + +static void BM_SeedSeq_Generate(benchmark::State& state) { + std::array buffer; + std::array seeds; + { + std::random_device rd; + std::generate(std::begin(seeds), std::begin(seeds) + state.range(0), [&]() { return rd(); }); + } + std::seed_seq seed(std::begin(seeds), std::begin(seeds) + state.range(0)); + for (auto _ : state) { + seed.generate(std::begin(buffer), std::begin(buffer) + state.range(1)); + } +} +BENCHMARK(BM_SeedSeq_Generate)->Ranges({{1, MAX_SEED_LEN}, {1, MAX_BUFFER_LEN}}); + +BENCHMARK_MAIN(); diff --git a/libcxx/include/__random/seed_seq.h b/libcxx/include/__random/seed_seq.h index 8640cd1b4e43a4..330537fa002342 100644 --- a/libcxx/include/__random/seed_seq.h +++ b/libcxx/include/__random/seed_seq.h @@ -109,39 +109,63 @@ seed_seq::generate(_RandomAccessIterator __first, _RandomAccessIterator __last) __first[__q] += __r; __first[0] = __r; } + // Initialize indexing terms used with if statements as an optimization to + // avoid calculating modulo n on every loop iteration for each term. + size_t __kmodn = 0; // __k % __n + size_t __k1modn = __n - 1; // (__k - 1) % __n + size_t __kpmodn = __p % __n; // (__k + __p) % __n + size_t __kqmodn = __q % __n; // (__k + __q) % __n + for (size_t __k = 1; __k <= __s; ++__k) { - const size_t __kmodn = __k % __n; - const size_t __kpmodn = (__k + __p) % __n; - result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] - ^ __first[(__k - 1) % __n]); - __first[__kpmodn] += __r; - __r += __kmodn + __v_[__k-1]; - __first[(__k + __q) % __n] += __r; - __first[__kmodn] = __r; + if (++__kmodn == __n) + __kmodn = 0; + if (++__k1modn == __n) + __k1modn = 0; + if (++__kpmodn == __n) + __kpmodn = 0; + if (++__kqmodn == __n) + __kqmodn = 0; + + result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] ^ __first[__k1modn]); + __first[__kpmodn] += __r; + __r += __kmodn + __v_[__k - 1]; + __first[__kqmodn] += __r; + __first[__kmodn] = __r; } for (size_t __k = __s + 1; __k < __m; ++__k) { - const size_t __kmodn = __k % __n; - const size_t __kpmodn = (__k + __p) % __n; - result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] - ^ __first[(__k - 1) % __n]); - __first[__kpmodn] += __r; - __r += __kmodn; - __first[(__k + __q) % __n] += __r; - __first[__kmodn] = __r; + if (++__kmodn == __n) + __kmodn = 0; + if (++__k1modn == __n) + __k1modn = 0; + if (++__kpmodn == __n) + __kpmodn = 0; + if (++__kqmodn == __n) + __kqmodn = 0; + + result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] ^ __first[__k1modn]); + __first[__kpmodn] += __r; + __r += __kmodn; + __first[__kqmodn] += __r; + __first[__kmodn] = __r; } for (size_t __k = __m; __k < __m + __n; ++__k) { - const size_t __kmodn = __k % __n; - const size_t __kpmodn = (__k + __p) % __n; - result_type __r = 1566083941 * _Tp(__first[__kmodn] + - __first[__kpmodn] + - __first[(__k - 1) % __n]); - __first[__kpmodn] ^= __r; - __r -= __kmodn; - __first[(__k + __q) % __n] ^= __r; - __first[__kmodn] = __r; + if (++__kmodn == __n) + __kmodn = 0; + if (++__k1modn == __n) + __k1modn = 0; + if (++__kpmodn == __n) + __kpmodn = 0; + if (++__kqmodn == __n) + __kqmodn = 0; + + result_type __r = 1566083941 * _Tp(__first[__kmodn] + __first[__kpmodn] + __first[__k1modn]); + __first[__kpmodn] ^= __r; + __r -= __kmodn; + __first[__kqmodn] ^= __r; + __first[__kmodn] = __r; } } } From 1d1a191edcfa87bf77331ffcc8fa29562b17f517 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 24 May 2022 10:32:50 +0200 Subject: [PATCH 323/908] [libc++] Implement ranges::reverse Reviewed By: var-const, #libc Spies: libcxx-commits, mgorny Differential Revision: https://reviews.llvm.org/D125752 --- libcxx/docs/Status/RangesAlgorithms.csv | 2 +- libcxx/include/CMakeLists.txt | 1 + libcxx/include/__algorithm/ranges_reverse.h | 83 ++++++++++++ libcxx/include/algorithm | 10 ++ libcxx/include/module.modulemap | 1 + libcxx/test/libcxx/private_headers.verify.cpp | 1 + .../alg.reverse/ranges.reverse.pass.cpp | 120 ++++++++++++++++++ .../niebloid.compile.pass.cpp | 2 +- libcxx/test/support/almost_satisfies_types.h | 81 ++++++++++++ 9 files changed, 299 insertions(+), 2 deletions(-) create mode 100644 libcxx/include/__algorithm/ranges_reverse.h create mode 100644 libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/ranges.reverse.pass.cpp diff --git a/libcxx/docs/Status/RangesAlgorithms.csv b/libcxx/docs/Status/RangesAlgorithms.csv index 3664106093c41e..4395c91aa24b75 100644 --- a/libcxx/docs/Status/RangesAlgorithms.csv +++ b/libcxx/docs/Status/RangesAlgorithms.csv @@ -67,7 +67,7 @@ Merge,set_symmetric_difference,Not assigned,n/a,Not started Merge,set_union,Not assigned,n/a,Not started Permutation,remove,Not assigned,n/a,Not started Permutation,remove_if,Not assigned,n/a,Not started -Permutation,reverse,Not assigned,n/a,Not started +Permutation,reverse,Nikolas Klauser,`D125752 `_,✅ Permutation,rotate,Not assigned,n/a,Not started Permutation,shuffle,Not assigned,n/a,Not started Permutation,unique,Not assigned,n/a,Not started diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 74336ed19703e6..b6e4f6d6cf5264 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -85,6 +85,7 @@ set(files __algorithm/ranges_minmax.h __algorithm/ranges_minmax_element.h __algorithm/ranges_mismatch.h + __algorithm/ranges_reverse.h __algorithm/ranges_swap_ranges.h __algorithm/ranges_transform.h __algorithm/remove.h diff --git a/libcxx/include/__algorithm/ranges_reverse.h b/libcxx/include/__algorithm/ranges_reverse.h new file mode 100644 index 00000000000000..e7555d0f9acd93 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_reverse.h @@ -0,0 +1,83 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_REVERSE_H +#define _LIBCPP___ALGORITHM_RANGES_REVERSE_H + +#include <__config> +#include <__iterator/concepts.h> +#include <__iterator/iter_swap.h> +#include <__iterator/next.h> +#include <__iterator/permutable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __reverse { +struct __fn { + + template _Sent> + requires permutable<_Iter> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Sent __last) const { + if constexpr (random_access_iterator<_Iter>) { + if (__first == __last) + return __first; + + auto __end = ranges::next(__first, __last); + auto __ret = __end; + + while (__first < --__end) { + ranges::iter_swap(__first, __end); + ++__first; + } + return __ret; + } else { + auto __end = ranges::next(__first, __last); + auto __ret = __end; + + while (__first != __end) { + if (__first == --__end) + break; + + ranges::iter_swap(__first, __end); + ++__first; + } + return __ret; + } + } + + template + requires permutable> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __range) const { + return (*this)(ranges::begin(__range), ranges::end(__range)); + } + +}; +} // namespace __reverse + +inline namespace __cpo { + inline constexpr auto reverse = __reverse::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_REVERSE_H diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index a95fa769f024a5..224c9e1ab84656 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -274,6 +274,15 @@ namespace ranges { indirect_unary_predicate, Proj>> Pred> constexpr bool ranges::is_partitioned(R&& r, Pred pred, Proj proj = {}); // since C++20 + + template S> + requires permutable + constexpr I ranges::reverse(I first, S last); // since C++20 + + template + requires permutable> + constexpr borrowed_iterator_t ranges::reverse(R&& r); // since C++20 + } constexpr bool // constexpr in C++20 @@ -1009,6 +1018,7 @@ template #include <__algorithm/ranges_minmax.h> #include <__algorithm/ranges_minmax_element.h> #include <__algorithm/ranges_mismatch.h> +#include <__algorithm/ranges_reverse.h> #include <__algorithm/ranges_swap_ranges.h> #include <__algorithm/ranges_transform.h> #include <__algorithm/remove.h> diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index d62e6ae4df47f4..132793f1172c39 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -318,6 +318,7 @@ module std [system] { module ranges_minmax { private header "__algorithm/ranges_minmax.h" } module ranges_minmax_element { private header "__algorithm/ranges_minmax_element.h" } module ranges_mismatch { private header "__algorithm/ranges_mismatch.h" } + module ranges_reverse { private header "__algorithm/ranges_reverse.h" } module ranges_swap_ranges { private header "__algorithm/ranges_swap_ranges.h" } module ranges_transform { private header "__algorithm/ranges_transform.h" } module remove { private header "__algorithm/remove.h" } diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index d85a7e99fa81f9..fe0f19baf6af9e 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -122,6 +122,7 @@ END-SCRIPT #include <__algorithm/ranges_minmax.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_minmax.h'}} #include <__algorithm/ranges_minmax_element.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_minmax_element.h'}} #include <__algorithm/ranges_mismatch.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_mismatch.h'}} +#include <__algorithm/ranges_reverse.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_reverse.h'}} #include <__algorithm/ranges_swap_ranges.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_swap_ranges.h'}} #include <__algorithm/ranges_transform.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_transform.h'}} #include <__algorithm/remove.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/remove.h'}} diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/ranges.reverse.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/ranges.reverse.pass.cpp new file mode 100644 index 00000000000000..14474706a15e71 --- /dev/null +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/ranges.reverse.pass.cpp @@ -0,0 +1,120 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// + +// template S> +// requires permutable +// constexpr I ranges::reverse(I first, S last); +// template +// requires permutable> +// constexpr borrowed_iterator_t ranges::reverse(R&& r); + +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +template > +concept HasReverseIt = requires (Iter first, Sent last) { std::ranges::reverse(first, last); }; + +static_assert(HasReverseIt); +static_assert(!HasReverseIt); +static_assert(!HasReverseIt); +static_assert(!HasReverseIt); +static_assert(!HasReverseIt); + + +template +concept HasReverseR = requires (Range range) { std::ranges::reverse(range); }; + +static_assert(HasReverseR>); +static_assert(!HasReverseR); +static_assert(!HasReverseR); +static_assert(!HasReverseR); +static_assert(!HasReverseR); + +template +constexpr void test(std::array value, std::array expected) { + { + auto val = value; + std::same_as decltype(auto) ret = std::ranges::reverse(Iter(val.data()), Sent(Iter(val.data() + val.size()))); + assert(val == expected); + assert(base(ret) == val.data() + val.size()); + } + { + auto val = value; + auto range = std::ranges::subrange(Iter(val.data()), Sent(Iter(val.data() + val.size()))); + std::same_as decltype(auto) ret = std::ranges::reverse(range); + assert(val == expected); + assert(base(ret) == val.data() + val.size()); + } +} + +template +constexpr void test_iterators() { + // simple test + test({1, 2, 3, 4}, {4, 3, 2, 1}); + // check that an odd number of elements works + test({1, 2, 3, 4, 5, 6, 7}, {7, 6, 5, 4, 3, 2, 1}); + // check that an empty range works + test({}, {}); + // check that a single element works + test({5}, {5}); +} + +struct SwapCounter { + int* counter; + constexpr SwapCounter(int* counter_) : counter(counter_) {} + friend constexpr void swap(SwapCounter& lhs, SwapCounter&) { ++*lhs.counter; } +}; + +constexpr bool test() { + test_iterators>(); + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators, sentinel_wrapper>>(); + test_iterators(); + + // check that std::ranges::dangling is returned + { + [[maybe_unused]] std::same_as auto ret = std::ranges::reverse(std::array {1, 2, 3, 4}); + } + + { + { + int counter = 0; + SwapCounter a[] = {&counter, &counter, &counter, &counter}; + std::ranges::reverse(a); + assert(counter == 2); + } + { + int counter = 0; + SwapCounter a[] = {&counter, &counter, &counter, &counter}; + std::ranges::reverse(a, a + 4); + assert(counter == 2); + } + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp index 812b58595b9a4d..03c0563597b985 100644 --- a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp @@ -125,7 +125,7 @@ static_assert(test(std::ranges::mismatch, a, a)); //static_assert(test(std::ranges::replace_copy, a, a, 42, 43)); //static_assert(test(std::ranges::replace_copy_if, a, a, odd, 43)); //static_assert(test(std::ranges::replace_if, a, odd, 43)); -//static_assert(test(std::ranges::reverse, a)); +static_assert(test(std::ranges::reverse, a)); //static_assert(test(std::ranges::reverse_copy, a, a)); //static_assert(test(std::ranges::rotate, a, a+5)); //static_assert(test(std::ranges::rotate_copy, a, a+5, a)); diff --git a/libcxx/test/support/almost_satisfies_types.h b/libcxx/test/support/almost_satisfies_types.h index 95d9b4736c3b72..49984a42c88e43 100644 --- a/libcxx/test/support/almost_satisfies_types.h +++ b/libcxx/test/support/almost_satisfies_types.h @@ -139,4 +139,85 @@ class WeaklyIncrementableNotMovable { static_assert(!std::movable); static_assert(!std::weakly_incrementable); +class BidirectionalIteratorNotDerivedFrom { +public: + using difference_type = long; + using value_type = int; + using iterator_category = std::forward_iterator_tag; + + BidirectionalIteratorNotDerivedFrom& operator++(); + BidirectionalIteratorNotDerivedFrom operator++(int); + BidirectionalIteratorNotDerivedFrom& operator--(); + BidirectionalIteratorNotDerivedFrom operator--(int); + int& operator*() const; + + bool operator==(const BidirectionalIteratorNotDerivedFrom&) const = default; +}; + +using BidirectionalRangeNotDerivedFrom = UncheckedRange; + +static_assert(std::forward_iterator); +static_assert(!std::bidirectional_iterator); +static_assert(!std::ranges::bidirectional_range); + +class BidirectionalIteratorNotDecrementable { +public: + using difference_type = long; + using value_type = int; + using iterator_category = std::bidirectional_iterator_tag; + + BidirectionalIteratorNotDecrementable& operator++(); + BidirectionalIteratorNotDecrementable operator++(int); + int& operator*() const; + + bool operator==(const BidirectionalIteratorNotDecrementable&) const = default; +}; + +using BidirectionalRangeNotDecrementable = UncheckedRange; + +static_assert(std::forward_iterator); +static_assert(!std::bidirectional_iterator); +static_assert(!std::ranges::bidirectional_range); + +class PermutableNotForwardIterator { +public: + using difference_type = long; + using value_type = int; + using iterator_category = std::input_iterator_tag; + + PermutableNotForwardIterator& operator++(); + void operator++(int); + int& operator*() const; +}; + +using PermutableRangeNotForwardIterator = UncheckedRange; + +static_assert(std::input_iterator); +static_assert(!std::forward_iterator); +static_assert(!std::permutable); + +class PermutableNotSwappable { +public: + class NotSwappable { + NotSwappable(NotSwappable&&) = delete; + }; + + using difference_type = long; + using value_type = NotSwappable; + using iterator_category = std::contiguous_iterator_tag; + + PermutableNotSwappable& operator++(); + PermutableNotSwappable operator++(int); + NotSwappable& operator*() const; + + bool operator==(const PermutableNotSwappable&) const = default; +}; + +using PermutableRangeNotSwappable = UncheckedRange; + +static_assert(std::input_iterator); +static_assert(std::forward_iterator); +static_assert(!std::permutable); +static_assert(!std::indirectly_swappable); + #endif // ALMOST_SATISFIES_TYPES_H From 3245e2edd50c87fe9ceb98e8742566dd5f67dc44 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 24 May 2022 08:37:12 +0000 Subject: [PATCH 324/908] [gn build] Port 1d1a191edcfa --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 7c07e1cb44c17d..335397c3f974af 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -141,6 +141,7 @@ if (current_toolchain == default_toolchain) { "__algorithm/ranges_minmax.h", "__algorithm/ranges_minmax_element.h", "__algorithm/ranges_mismatch.h", + "__algorithm/ranges_reverse.h", "__algorithm/ranges_swap_ranges.h", "__algorithm/ranges_transform.h", "__algorithm/remove.h", From e1fcf998dc95f832da440ce9227c83fa6af96011 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 10:49:08 +0200 Subject: [PATCH 325/908] [InstCombine] Add splat vector test for asymmetric masked icmp fold (NFC) --- .../test/Transforms/InstCombine/icmp-logical.ll | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-logical.ll b/llvm/test/Transforms/InstCombine/icmp-logical.ll index 4bdecea2dc56f0..cd15f5a088d125 100644 --- a/llvm/test/Transforms/InstCombine/icmp-logical.ll +++ b/llvm/test/Transforms/InstCombine/icmp-logical.ll @@ -457,6 +457,23 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) { ret i1 %t5 } +define <2 x i1> @masked_icmps_mask_notallzeros_bmask_mixed_1_vector(<2 x i32> %x) { +; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1_vector( +; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T2:%.*]] = icmp ne <2 x i32> [[T1]], zeroinitializer +; CHECK-NEXT: [[T3:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[T4:%.*]] = icmp eq <2 x i32> [[T3]], +; CHECK-NEXT: [[T5:%.*]] = and <2 x i1> [[T2]], [[T4]] +; CHECK-NEXT: ret <2 x i1> [[T5]] +; + %t1 = and <2 x i32> %x, + %t2 = icmp ne <2 x i32> %t1, zeroinitializer + %t3 = and <2 x i32> %x, + %t4 = icmp eq <2 x i32> %t3, + %t5 = and <2 x i1> %t2, %t4 + ret <2 x i1> %t5 +} + define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1_logical(i32 %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 From 5abaabed22d749360d2c1d77907d927207de87b4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 10:56:16 +0200 Subject: [PATCH 326/908] [InstCombine] Use m_APInt() in asymmetric masked icmp fold This is mostly intended as code cleanup, but it does also add support for splat vectors to this fold. --- .../InstCombine/InstCombineAndOrXor.cpp | 38 +++++++++---------- .../Transforms/InstCombine/icmp-logical.ll | 9 ++--- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 0ddbaa4b065990..41967ee21a065e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -379,9 +379,9 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // // We currently handle the case of B, C, D, E are constant. // - ConstantInt *BCst, *CCst, *DCst, *ECst; - if (!match(B, m_ConstantInt(BCst)) || !match(C, m_ConstantInt(CCst)) || - !match(D, m_ConstantInt(DCst)) || !match(E, m_ConstantInt(ECst))) + const APInt *BCst, *CCst, *DCst, *OrigECst; + if (!match(B, m_APInt(BCst)) || !match(C, m_APInt(CCst)) || + !match(D, m_APInt(DCst)) || !match(E, m_APInt(OrigECst))) return nullptr; ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; @@ -390,19 +390,20 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // canonicalized as, // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or // (icmp ne (A & D), D) -> (icmp eq (A & D), 0). + APInt ECst = *OrigECst; if (PredR != NewCC) - ECst = cast(ConstantExpr::getXor(DCst, ECst)); + ECst ^= *DCst; // If B or D is zero, skip because if LHS or RHS can be trivially folded by // other folding rules and this pattern won't apply any more. - if (BCst->getValue() == 0 || DCst->getValue() == 0) + if (*BCst == 0 || *DCst == 0) return nullptr; // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't // deduce anything from it. // For example, // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding. - if ((BCst->getValue() & DCst->getValue()) == 0) + if ((*BCst & *DCst) == 0) return nullptr; // If the following two conditions are met: @@ -421,22 +422,21 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // For example, // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9) // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8) - if ((((BCst->getValue() & DCst->getValue()) & ECst->getValue()) == 0) && - (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())).isPowerOf2()) { - APInt BorD = BCst->getValue() | DCst->getValue(); - APInt BandBxorDorE = (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())) | - ECst->getValue(); - Value *NewMask = ConstantInt::get(BCst->getType(), BorD); - Value *NewMaskedValue = ConstantInt::get(BCst->getType(), BandBxorDorE); + if ((((*BCst & *DCst) & ECst) == 0) && + (*BCst & (*BCst ^ *DCst)).isPowerOf2()) { + APInt BorD = *BCst | *DCst; + APInt BandBxorDorE = (*BCst & (*BCst ^ *DCst)) | ECst; + Value *NewMask = ConstantInt::get(A->getType(), BorD); + Value *NewMaskedValue = ConstantInt::get(A->getType(), BandBxorDorE); Value *NewAnd = Builder.CreateAnd(A, NewMask); return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue); } - auto IsSubSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) { - return (C1->getValue() & C2->getValue()) == C1->getValue(); + auto IsSubSetOrEqual = [](const APInt *C1, const APInt *C2) { + return (*C1 & *C2) == *C1; }; - auto IsSuperSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) { - return (C1->getValue() & C2->getValue()) == C2->getValue(); + auto IsSuperSetOrEqual = [](const APInt *C1, const APInt *C2) { + return (*C1 & *C2) == *C2; }; // In the following, we consider only the cases where B is a superset of D, B @@ -456,7 +456,7 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // For example, // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false. // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding. - if (ECst->isZero()) { + if (ECst.isZero()) { if (IsSubSetOrEqual(BCst, DCst)) return ConstantInt::get(LHS->getType(), !IsAnd); return nullptr; @@ -474,7 +474,7 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( // ie. (B & E) != 0, then LHS is subsumed by RHS. For example. // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code"); - if ((BCst->getValue() & ECst->getValue()) != 0) + if ((*BCst & ECst) != 0) return RHS; // Otherwise, LHS and RHS contradict and the whole expression becomes false // (or true if negated.) For example, diff --git a/llvm/test/Transforms/InstCombine/icmp-logical.ll b/llvm/test/Transforms/InstCombine/icmp-logical.ll index cd15f5a088d125..a96f66de821c2d 100644 --- a/llvm/test/Transforms/InstCombine/icmp-logical.ll +++ b/llvm/test/Transforms/InstCombine/icmp-logical.ll @@ -459,12 +459,9 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) { define <2 x i1> @masked_icmps_mask_notallzeros_bmask_mixed_1_vector(<2 x i32> %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1_vector( -; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T2:%.*]] = icmp ne <2 x i32> [[T1]], zeroinitializer -; CHECK-NEXT: [[T3:%.*]] = and <2 x i32> [[X]], -; CHECK-NEXT: [[T4:%.*]] = icmp eq <2 x i32> [[T3]], -; CHECK-NEXT: [[T5:%.*]] = and <2 x i1> [[T2]], [[T4]] -; CHECK-NEXT: ret <2 x i1> [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %t1 = and <2 x i32> %x, %t2 = icmp ne <2 x i32> %t1, zeroinitializer From 4aa32e1b17da4d29fe2db0fe1cfb0171243b4fed Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 11:10:36 +0200 Subject: [PATCH 327/908] [InstCombine] Add tests for masked icmps with bitwise+logical and (NFC) --- .../Transforms/InstCombine/and-or-icmps.ll | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 90f995faee327b..6a79862d9f1e22 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -1979,3 +1979,135 @@ define i1 @logical_or_logical_or_icmps_comm3(i8 %x, i8 %y, i8 %z) { ret i1 %or2 } +define i1 @bitwise_and_logical_and_masked_icmp_asymmetric(i1 %c, i32 %x) { +; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_asymmetric( +; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], 255 +; CHECK-NEXT: [[C1:%.*]] = icmp ne i32 [[X_M1]], 0 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 11 +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 11 +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %x.m1 = and i32 %x, 255 + %c1 = icmp ne i32 %x.m1, 0 + %and1 = select i1 %c1, i1 %c, i1 false + %x.m2 = and i32 %x, 11 + %c2 = icmp eq i32 %x.m2, 11 + %and2 = and i1 %and1, %c2 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_masked_icmp_allzeros(i1 %c, i32 %x) { +; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allzeros( +; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], 8 +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X_M1]], 0 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 0 +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %x.m1 = and i32 %x, 8 + %c1 = icmp eq i32 %x.m1, 0 + %and1 = select i1 %c1, i1 %c, i1 false + %x.m2 = and i32 %x, 7 + %c2 = icmp eq i32 %x.m2, 0 + %and2 = and i1 %and1, %c2 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_masked_icmp_allzeros_poison1(i1 %c, i32 %x, i32 %y) { +; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allzeros_poison1( +; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X_M1]], 0 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 0 +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %x.m1 = and i32 %x, %y + %c1 = icmp eq i32 %x.m1, 0 + %and1 = select i1 %c1, i1 %c, i1 false + %x.m2 = and i32 %x, 7 + %c2 = icmp eq i32 %x.m2, 0 + %and2 = and i1 %and1, %c2 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_masked_icmp_allzeros_poison2(i1 %c, i32 %x, i32 %y) { +; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allzeros_poison2( +; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], 8 +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X_M1]], 0 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 0 +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %x.m1 = and i32 %x, 8 + %c1 = icmp eq i32 %x.m1, 0 + %and1 = select i1 %c1, i1 %c, i1 false + %x.m2 = and i32 %x, %y + %c2 = icmp eq i32 %x.m2, 0 + %and2 = and i1 %and1, %c2 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_masked_icmp_allones(i1 %c, i32 %x) { +; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allones( +; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], 8 +; CHECK-NEXT: [[C1:%.*]] = icmp ne i32 [[X_M1]], 0 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 7 +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %x.m1 = and i32 %x, 8 + %c1 = icmp eq i32 %x.m1, 8 + %and1 = select i1 %c1, i1 %c, i1 false + %x.m2 = and i32 %x, 7 + %c2 = icmp eq i32 %x.m2, 7 + %and2 = and i1 %and1, %c2 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_masked_icmp_allones_poison1(i1 %c, i32 %x, i32 %y) { +; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allones_poison1( +; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X_M1]], [[Y]] +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 7 +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %x.m1 = and i32 %x, %y + %c1 = icmp eq i32 %x.m1, %y + %and1 = select i1 %c1, i1 %c, i1 false + %x.m2 = and i32 %x, 7 + %c2 = icmp eq i32 %x.m2, 7 + %and2 = and i1 %and1, %c2 + ret i1 %and2 +} + +define i1 @bitwise_and_logical_and_masked_icmp_allones_poison2(i1 %c, i32 %x, i32 %y) { +; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allones_poison2( +; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], 8 +; CHECK-NEXT: [[C1:%.*]] = icmp ne i32 [[X_M1]], 0 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], [[Y]] +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %x.m1 = and i32 %x, 8 + %c1 = icmp eq i32 %x.m1, 8 + %and1 = select i1 %c1, i1 %c, i1 false + %x.m2 = and i32 %x, %y + %c2 = icmp eq i32 %x.m2, %y + %and2 = and i1 %and1, %c2 + ret i1 %and2 +} From a7c079aaa227d55ad9cb6b916500d99b4fdf26d2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 10:45:29 +0200 Subject: [PATCH 328/908] [InstCombine] Support logical and in masked icmp fold Most of the folds implemented in this function work fine with logical operations. We only need to be careful for the cases that work on non-constant masks, where the RHS operand shouldn't be poison. This is a conservative implementation that bails out of illegal transforms, but we could also change these to insert freeze instead. --- .../InstCombine/InstCombineAndOrXor.cpp | 23 +++++--- .../Transforms/InstCombine/and-or-icmps.ll | 55 +++++++------------ 2 files changed, 35 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 41967ee21a065e..ebe2a3f74d5dbb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -365,6 +365,7 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, /// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros /// and the right hand side is of type BMask_Mixed. For example, /// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8). +/// Also used for logical and/or, must be poison safe. static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, @@ -486,6 +487,7 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( /// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single /// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side /// aren't of the common mask pattern type. +/// Also used for logical and/or, must be poison safe. static Value *foldLogOpOfMaskedICmpsAsymmetric( ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, @@ -520,6 +522,7 @@ static Value *foldLogOpOfMaskedICmpsAsymmetric( /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// into a single (icmp(A & X) ==/!= Y). static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, + bool IsLogical, InstCombiner::BuilderTy &Builder) { Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); @@ -564,6 +567,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, if (Mask & Mask_AllZeros) { // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) // -> (icmp eq (A & (B|D)), 0) + if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D)) + return nullptr; // TODO: Use freeze? Value *NewOr = Builder.CreateOr(B, D); Value *NewAnd = Builder.CreateAnd(A, NewOr); // We can't use C as zero because we might actually handle @@ -575,6 +580,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, if (Mask & BMask_AllOnes) { // (icmp eq (A & B), B) & (icmp eq (A & D), D) // -> (icmp eq (A & (B|D)), (B|D)) + if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D)) + return nullptr; // TODO: Use freeze? Value *NewOr = Builder.CreateOr(B, D); Value *NewAnd = Builder.CreateAnd(A, NewOr); return Builder.CreateICmp(NewCC, NewAnd, NewOr); @@ -582,6 +589,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, if (Mask & AMask_AllOnes) { // (icmp eq (A & B), A) & (icmp eq (A & D), A) // -> (icmp eq (A & (B&D)), A) + if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D)) + return nullptr; // TODO: Use freeze? Value *NewAnd1 = Builder.CreateAnd(B, D); Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1); return Builder.CreateICmp(NewCC, NewAnd2, A); @@ -2442,15 +2451,11 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, } } - // TODO: Some (but not all) of the patterns handled by this function are - // safe with logical and/or. - if (!IsLogical) { - // handle (roughly): - // (icmp ne (A & B), C) | (icmp ne (A & D), E) - // (icmp eq (A & B), C) & (icmp eq (A & D), E) - if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, Builder)) - return V; - } + // handle (roughly): + // (icmp ne (A & B), C) | (icmp ne (A & D), E) + // (icmp eq (A & B), C) & (icmp eq (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder)) + return V; // TODO: One of these directions is fine with logical and/or, the other could // be supported by inserting freeze. diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 6a79862d9f1e22..d8629ed7dc867b 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -1981,13 +1981,10 @@ define i1 @logical_or_logical_or_icmps_comm3(i8 %x, i8 %y, i8 %z) { define i1 @bitwise_and_logical_and_masked_icmp_asymmetric(i1 %c, i32 %x) { ; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_asymmetric( -; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], 255 -; CHECK-NEXT: [[C1:%.*]] = icmp ne i32 [[X_M1]], 0 -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false -; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 11 +; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X:%.*]], 11 ; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 11 -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C2]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: ret i1 [[TMP1]] ; %x.m1 = and i32 %x, 255 %c1 = icmp ne i32 %x.m1, 0 @@ -2000,13 +1997,10 @@ define i1 @bitwise_and_logical_and_masked_icmp_asymmetric(i1 %c, i32 %x) { define i1 @bitwise_and_logical_and_masked_icmp_allzeros(i1 %c, i32 %x) { ; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allzeros( -; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], 8 -; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X_M1]], 0 -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false -; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 7 -; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 0 -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: ret i1 [[TMP3]] ; %x.m1 = and i32 %x, 8 %c1 = icmp eq i32 %x.m1, 0 @@ -2019,13 +2013,11 @@ define i1 @bitwise_and_logical_and_masked_icmp_allzeros(i1 %c, i32 %x) { define i1 @bitwise_and_logical_and_masked_icmp_allzeros_poison1(i1 %c, i32 %x, i32 %y) { ; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allzeros_poison1( -; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X_M1]], 0 -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false -; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 7 -; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 0 -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: ret i1 [[TMP4]] ; %x.m1 = and i32 %x, %y %c1 = icmp eq i32 %x.m1, 0 @@ -2057,13 +2049,10 @@ define i1 @bitwise_and_logical_and_masked_icmp_allzeros_poison2(i1 %c, i32 %x, i define i1 @bitwise_and_logical_and_masked_icmp_allones(i1 %c, i32 %x) { ; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allones( -; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], 8 -; CHECK-NEXT: [[C1:%.*]] = icmp ne i32 [[X_M1]], 0 -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false -; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 7 -; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 7 -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 15 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: ret i1 [[TMP3]] ; %x.m1 = and i32 %x, 8 %c1 = icmp eq i32 %x.m1, 8 @@ -2076,13 +2065,11 @@ define i1 @bitwise_and_logical_and_masked_icmp_allones(i1 %c, i32 %x) { define i1 @bitwise_and_logical_and_masked_icmp_allones_poison1(i1 %c, i32 %x, i32 %y) { ; CHECK-LABEL: @bitwise_and_logical_and_masked_icmp_allones_poison1( -; CHECK-NEXT: [[X_M1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X_M1]], [[Y]] -; CHECK-NEXT: [[AND1:%.*]] = select i1 [[C1]], i1 [[C:%.*]], i1 false -; CHECK-NEXT: [[X_M2:%.*]] = and i32 [[X]], 7 -; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[X_M2]], 7 -; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[C2]] -; CHECK-NEXT: ret i1 [[AND2]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i1 [[C:%.*]], i1 false +; CHECK-NEXT: ret i1 [[TMP4]] ; %x.m1 = and i32 %x, %y %c1 = icmp eq i32 %x.m1, %y From b166aa833e44a5af6d6f39c34b79fe21b443e424 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Fri, 20 May 2022 16:24:37 +0800 Subject: [PATCH 329/908] [RISCV][NFC] Change interface of RVVIntrinsic::getSuffixStr This NFC patch is splited from D111617. Using llvm::ArrayRef rather than llvm::SmallVector, ArrayRef is more generic interface that could accept both llvm::ArrayRef and llvm::SmallVector. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D125893 --- clang/include/clang/Support/RISCVVIntrinsicUtils.h | 6 +++--- clang/lib/Support/RISCVVIntrinsicUtils.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h index 2de7f855559f0e..0a7186d42c8561 100644 --- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h +++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h @@ -340,9 +340,9 @@ class RVVIntrinsic { // Return the type string for a BUILTIN() macro in Builtins.def. std::string getBuiltinTypeStr() const; - static std::string getSuffixStr( - BasicType Type, int Log2LMUL, - const llvm::SmallVector &PrototypeDescriptors); + static std::string + getSuffixStr(BasicType Type, int Log2LMUL, + llvm::ArrayRef PrototypeDescriptors); }; } // end namespace RISCV diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp index 43189f6cf9f50a..92d29043f2cc0c 100644 --- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp +++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp @@ -922,7 +922,7 @@ std::string RVVIntrinsic::getBuiltinTypeStr() const { std::string RVVIntrinsic::getSuffixStr( BasicType Type, int Log2LMUL, - const llvm::SmallVector &PrototypeDescriptors) { + llvm::ArrayRef PrototypeDescriptors) { SmallVector SuffixStrs; for (auto PD : PrototypeDescriptors) { auto T = RVVType::computeType(Type, Log2LMUL, PD); From f50be3d21808ae113c40a68a9ac6581f203d92d2 Mon Sep 17 00:00:00 2001 From: Sheng Date: Tue, 24 May 2022 10:53:48 +0800 Subject: [PATCH 330/908] [TableGen] Remove code beads Code beads is useless since the only user, M68k, has moved on to a new encoding/decoding infrastructure. --- llvm/lib/Target/M68k/CMakeLists.txt | 1 - llvm/utils/TableGen/CMakeLists.txt | 1 - llvm/utils/TableGen/CodeBeadsGen.cpp | 135 ------------------ llvm/utils/TableGen/TableGen.cpp | 6 - llvm/utils/TableGen/TableGenBackends.h | 1 - .../gn/secondary/llvm/utils/TableGen/BUILD.gn | 1 - 6 files changed, 145 deletions(-) delete mode 100644 llvm/utils/TableGen/CodeBeadsGen.cpp diff --git a/llvm/lib/Target/M68k/CMakeLists.txt b/llvm/lib/Target/M68k/CMakeLists.txt index 5e398217c181ca..86f785f301aee4 100644 --- a/llvm/lib/Target/M68k/CMakeLists.txt +++ b/llvm/lib/Target/M68k/CMakeLists.txt @@ -7,7 +7,6 @@ tablegen(LLVM M68kGenRegisterInfo.inc -gen-register-info) tablegen(LLVM M68kGenRegisterBank.inc -gen-register-bank) tablegen(LLVM M68kGenInstrInfo.inc -gen-instr-info) tablegen(LLVM M68kGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM M68kGenMCCodeBeads.inc -gen-code-beads) tablegen(LLVM M68kGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM M68kGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM M68kGenDAGISel.inc -gen-dag-isel) diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt index af77744de4ce56..0974ee8eb244b3 100644 --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -8,7 +8,6 @@ add_tablegen(llvm-tblgen LLVM AsmWriterInst.cpp Attributes.cpp CallingConvEmitter.cpp - CodeBeadsGen.cpp CodeEmitterGen.cpp CodeGenDAGPatterns.cpp CodeGenHwModes.cpp diff --git a/llvm/utils/TableGen/CodeBeadsGen.cpp b/llvm/utils/TableGen/CodeBeadsGen.cpp deleted file mode 100644 index 512f3541fa1ff4..00000000000000 --- a/llvm/utils/TableGen/CodeBeadsGen.cpp +++ /dev/null @@ -1,135 +0,0 @@ -//===---------- CodeBeadsGen.cpp - Code Beads Generator -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// CodeBeads are data fields carrying auxiliary information for instructions. -// -// Under the hood it's simply implemented by a `bits` field (with arbitrary -// length) in each TG instruction description, where this TG backend will -// generate a helper function to access it. -// -// This is especially useful for expressing variable length encoding -// instructions and complex addressing modes. Since in those cases each -// instruction is usually associated with large amount of information like -// addressing mode details used on a specific operand. Instead of retreating to -// ad-hoc methods to figure out these information when encoding an instruction, -// CodeBeads provide a clean table for the instruction encoder to lookup. -//===----------------------------------------------------------------------===// - -#include "CodeGenInstruction.h" -#include "CodeGenTarget.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" -#include -using namespace llvm; - -namespace { - -class CodeBeadsGen { - RecordKeeper &Records; - -public: - CodeBeadsGen(RecordKeeper &R) : Records(R) {} - void run(raw_ostream &OS); -}; - -void CodeBeadsGen::run(raw_ostream &OS) { - CodeGenTarget Target(Records); - std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); - - // For little-endian instruction bit encodings, reverse the bit order - Target.reverseBitsForLittleEndianEncoding(); - - ArrayRef NumberedInstructions = - Target.getInstructionsByEnumValue(); - - // Emit function declaration - OS << "const uint8_t *llvm::" << Target.getInstNamespace(); - OS << "::getMCInstrBeads(unsigned Opcode) {\n"; - - // First, get the maximum bit length among all beads. And do some - // simple validation - unsigned MaxBitLength = 0; - - for (const CodeGenInstruction *CGI : NumberedInstructions) { - Record *R = CGI->TheDef; - if (!R->getValue("Beads")) - continue; - - BitsInit *BI = R->getValueAsBitsInit("Beads"); - if (!BI->isComplete()) { - PrintFatalError(R->getLoc(), "Record `" + R->getName() + - "', bit field 'Beads' is not complete"); - } - - MaxBitLength = std::max(MaxBitLength, BI->getNumBits()); - } - - // Number of bytes - unsigned Parts = MaxBitLength / 8; - - // Emit instruction base values - OS << " static const uint8_t InstBits[][" << Parts << "] = {\n"; - for (const CodeGenInstruction *CGI : NumberedInstructions) { - Record *R = CGI->TheDef; - - if (R->getValueAsString("Namespace") == "TargetOpcode" || - !R->getValue("Beads")) { - OS << "\t{ 0x0 },\t// "; - if (R->getValueAsBit("isPseudo")) - OS << "(Pseudo) "; - OS << R->getName() << "\n"; - continue; - } - - BitsInit *BI = R->getValueAsBitsInit("Beads"); - - // Convert to byte array: - // [dcba] -> [a][b][c][d] - OS << "\t{"; - for (unsigned p = 0; p < Parts; ++p) { - unsigned Right = 8 * p; - unsigned Left = Right + 8; - - uint8_t Value = 0; - for (unsigned i = Right; i != Left; ++i) { - unsigned Shift = i % 8; - if (auto *B = dyn_cast(BI->getBit(i))) { - Value |= (static_cast(B->getValue()) << Shift); - } else { - PrintFatalError(R->getLoc(), "Record `" + R->getName() + - "', bit 'Beads[" + Twine(i) + - "]' is not defined"); - } - } - - if (p) - OS << ','; - OS << " 0x"; - OS.write_hex(Value); - OS << ""; - } - OS << " }," << '\t' << "// " << R->getName() << "\n"; - } - OS << "\t{ 0x0 }\n };\n"; - - // Emit initial function code - OS << " return InstBits[Opcode];\n" - << "}\n\n"; -} - -} // End anonymous namespace - -namespace llvm { - -void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS) { - emitSourceFileHeader("Machine Code Beads", OS); - CodeBeadsGen(RK).run(OS); -} - -} // namespace llvm diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index cde49919f54fe5..e9279a64267005 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -25,7 +25,6 @@ enum ActionType { NullBackend, DumpJSON, GenEmitter, - GenCodeBeads, GenRegisterInfo, GenInstrInfo, GenInstrDocs, @@ -82,8 +81,6 @@ cl::opt Action( clEnumValN(DumpJSON, "dump-json", "Dump all records as machine-readable JSON"), clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"), - clEnumValN(GenCodeBeads, "gen-code-beads", - "Generate machine code beads"), clEnumValN(GenRegisterInfo, "gen-register-info", "Generate registers and register classes info"), clEnumValN(GenInstrInfo, "gen-instr-info", @@ -164,9 +161,6 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenEmitter: EmitCodeEmitter(Records, OS); break; - case GenCodeBeads: - EmitCodeBeads(Records, OS); - break; case GenRegisterInfo: EmitRegisterInfo(Records, OS); break; diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h index 224efa98bae16c..c53b71cad5996c 100644 --- a/llvm/utils/TableGen/TableGenBackends.h +++ b/llvm/utils/TableGen/TableGenBackends.h @@ -67,7 +67,6 @@ void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS); void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS); void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS); void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS); -void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS); void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS); void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS); void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS); diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index 412557df6015fb..3ab4abda31971a 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -12,7 +12,6 @@ executable("llvm-tblgen") { "Attributes.cpp", "CTagsEmitter.cpp", "CallingConvEmitter.cpp", - "CodeBeadsGen.cpp", "CodeEmitterGen.cpp", "CodeGenDAGPatterns.cpp", "CodeGenHwModes.cpp", From 64186e9b351a000123f253557f4f607c33405413 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 May 2022 10:45:12 +0100 Subject: [PATCH 331/908] [X86] Add test showing failure to expand <2 x float> fpow without widening to <4 x float> Similar to D125988 (and I have a pending follow up patch to handle fpow). --- llvm/test/CodeGen/X86/pow-libcall.ll | 53 ++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pow-libcall.ll diff --git a/llvm/test/CodeGen/X86/pow-libcall.ll b/llvm/test/CodeGen/X86/pow-libcall.ll new file mode 100644 index 00000000000000..e90155e5978cf3 --- /dev/null +++ b/llvm/test/CodeGen/X86/pow-libcall.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s + +; FIXME: Ensure vectorized FPOWs are not widened/unrolled such that they get lowered +; into libcalls on undef elements. + +define float @test_fpow_v2f32_multiuse(<2 x float> %a0, <2 x float> %a1, <2 x float> *%p3) nounwind { +; CHECK-LABEL: test_fpow_v2f32_multiuse: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $64, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; CHECK-NEXT: callq powf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq powf@PLT +; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq powf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; CHECK-NEXT: callq powf@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: movlps %xmm1, (%rbx) +; CHECK-NEXT: addq $64, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %fp = call <2 x float> @llvm.pow.v2f32(<2 x float> %a0, <2 x float> %a1) + %fp0 = extractelement <2 x float> %fp, i32 0 + %fp1 = extractelement <2 x float> %fp, i32 1 + %res = fadd float %fp0, %fp1 + store <2 x float> %fp, <2 x float> *%p3 + ret float %res +} +declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) + From 3b1de7ab6097adc92d5057bc9172c48806704e8c Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Tue, 24 May 2022 16:46:27 +0800 Subject: [PATCH 332/908] [X86][AMX] Reduce the compiling time for non-amx code. Differential Revision: https://reviews.llvm.org/D126280 --- llvm/lib/Target/X86/X86FastPreTileConfig.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp index 08ccfeda5a4abf..da10964d51ecbd 100644 --- a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp @@ -663,6 +663,18 @@ bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) { CfgSS = -1; unsigned NumVirtRegs = MRI->getNumVirtRegs(); + // Abandon early if there is no tile register to config. + bool HasVirtTileReg = false; + for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) { + Register VirtReg = Register::index2VirtReg(I); + if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) { + HasVirtTileReg = true; + break; + } + } + if (!HasVirtTileReg) + return false; + StackSlotForVirtReg.resize(NumVirtRegs); MayLiveAcrossBlocks.clear(); // We will create register during config. *3 is to make sure From 1968f765c359304860b063ebe51cee2544433728 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 24 May 2022 17:17:05 +0700 Subject: [PATCH 333/908] [Test] Add LICM test for PR55672 showing problem with freeze instruction --- llvm/test/Transforms/LICM/pr55672.ll | 105 +++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 llvm/test/Transforms/LICM/pr55672.ll diff --git a/llvm/test/Transforms/LICM/pr55672.ll b/llvm/test/Transforms/LICM/pr55672.ll new file mode 100644 index 00000000000000..479bd39f35e714 --- /dev/null +++ b/llvm/test/Transforms/LICM/pr55672.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes='licm' < %s | FileCheck %s + +define void @test_01(i8 addrspace(1)* addrspace(1)* %arg, i32 %arg2) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP103:%.*]] = load atomic i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[ARG:%.*]] unordered, align 8, !dereferenceable_or_null !0, !align !1 +; CHECK-NEXT: [[TMP117:%.*]] = icmp eq i8 addrspace(1)* [[TMP103]], null +; CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP103]], i64 8 +; CHECK-NEXT: [[TMP119:%.*]] = bitcast i8 addrspace(1)* [[TMP118]] to i32 addrspace(1)* +; CHECK-NEXT: br i1 [[TMP117]], label [[FAIL:%.*]], label [[PREHEADER:%.*]] +; CHECK: fail: +; CHECK-NEXT: ret void +; CHECK: preheader: +; CHECK-NEXT: [[TMP157:%.*]] = load atomic i32, i32 addrspace(1)* [[TMP119]] unordered, align 8 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[TMP151:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[TMP163:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP152:%.*]] = icmp ult i32 [[TMP151]], [[ARG2:%.*]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP152]]) [ "deopt"() ] +; CHECK-NEXT: [[TMP158:%.*]] = icmp ult i32 [[TMP151]], [[TMP157]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP158]]) [ "deopt"() ] +; CHECK-NEXT: [[TMP163]] = add i32 [[TMP151]], 1 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + %tmp103 = load atomic i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %arg unordered, align 8, !dereferenceable_or_null !0, !align !1 + %tmp117 = icmp eq i8 addrspace(1)* %tmp103, null + %tmp118 = getelementptr inbounds i8, i8 addrspace(1)* %tmp103, i64 8 + %tmp119 = bitcast i8 addrspace(1)* %tmp118 to i32 addrspace(1)* + br i1 %tmp117, label %fail, label %preheader + +fail: ; preds = %entry + ret void + +preheader: ; preds = %bb + br label %loop + +loop: ; preds = %loop, %preheader + %tmp151 = phi i32 [ 0, %preheader ], [ %tmp163, %loop ] + %tmp152 = icmp ult i32 %tmp151, %arg2 + call void (i1, ...) @llvm.experimental.guard(i1 %tmp152) [ "deopt"() ] + %tmp157 = load atomic i32, i32 addrspace(1)* %tmp119 unordered, align 8 + %tmp158 = icmp ult i32 %tmp151, %tmp157 + call void (i1, ...) @llvm.experimental.guard(i1 %tmp158) [ "deopt"() ] + %tmp163 = add i32 %tmp151, 1 + br label %loop +} + +; FIXME: should be able to hoist load just as test_01 +define void @test_02(i8 addrspace(1)* addrspace(1)* %arg, i32 %arg2) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP103:%.*]] = load atomic i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[ARG:%.*]] unordered, align 8, !dereferenceable_or_null !0, !align !1 +; CHECK-NEXT: [[TMP117:%.*]] = icmp eq i8 addrspace(1)* [[TMP103]], null +; CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP103]], i64 8 +; CHECK-NEXT: [[TMP119:%.*]] = bitcast i8 addrspace(1)* [[TMP118]] to i32 addrspace(1)* +; CHECK-NEXT: [[FREEZE:%.*]] = freeze i1 [[TMP117]] +; CHECK-NEXT: br i1 [[FREEZE]], label [[FAIL:%.*]], label [[PREHEADER:%.*]] +; CHECK: fail: +; CHECK-NEXT: ret void +; CHECK: preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[TMP151:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[TMP163:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP152:%.*]] = icmp ult i32 [[TMP151]], [[ARG2:%.*]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP152]]) [ "deopt"() ] +; CHECK-NEXT: [[TMP157:%.*]] = load atomic i32, i32 addrspace(1)* [[TMP119]] unordered, align 8 +; CHECK-NEXT: [[TMP158:%.*]] = icmp ult i32 [[TMP151]], [[TMP157]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP158]]) [ "deopt"() ] +; CHECK-NEXT: [[TMP163]] = add i32 [[TMP151]], 1 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + %tmp103 = load atomic i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %arg unordered, align 8, !dereferenceable_or_null !0, !align !1 + %tmp117 = icmp eq i8 addrspace(1)* %tmp103, null + %tmp118 = getelementptr inbounds i8, i8 addrspace(1)* %tmp103, i64 8 + %tmp119 = bitcast i8 addrspace(1)* %tmp118 to i32 addrspace(1)* + %freeze = freeze i1 %tmp117 + br i1 %freeze, label %fail, label %preheader + +fail: ; preds = %entry + ret void + +preheader: ; preds = %entry + br label %loop + +loop: ; preds = %loop, %preheader + %tmp151 = phi i32 [ 0, %preheader ], [ %tmp163, %loop ] + %tmp152 = icmp ult i32 %tmp151, %arg2 + call void (i1, ...) @llvm.experimental.guard(i1 %tmp152) [ "deopt"() ] + %tmp157 = load atomic i32, i32 addrspace(1)* %tmp119 unordered, align 8 + %tmp158 = icmp ult i32 %tmp151, %tmp157 + call void (i1, ...) @llvm.experimental.guard(i1 %tmp158) [ "deopt"() ] + %tmp163 = add i32 %tmp151, 1 + br label %loop +} + +; Function Attrs: nocallback nofree nosync willreturn +declare void @llvm.experimental.guard(i1, ...) #0 + +attributes #0 = { nocallback nofree nosync willreturn } + +!0 = !{i64 16} +!1 = !{i64 8} From 143ca151061d7266dd97ef6bda7b9db58fb5e4dc Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 24 May 2022 17:22:16 +0700 Subject: [PATCH 334/908] Fix comment in test. NFC --- llvm/test/Transforms/LICM/pr55672.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/LICM/pr55672.ll b/llvm/test/Transforms/LICM/pr55672.ll index 479bd39f35e714..b78d9073ad4ae7 100644 --- a/llvm/test/Transforms/LICM/pr55672.ll +++ b/llvm/test/Transforms/LICM/pr55672.ll @@ -33,7 +33,7 @@ entry: fail: ; preds = %entry ret void -preheader: ; preds = %bb +preheader: ; preds = %entry br label %loop loop: ; preds = %loop, %preheader From d61ded1034bb9563c43d048ba0be4eff662e6144 Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Tue, 24 May 2022 11:26:43 +0100 Subject: [PATCH 335/908] [OpenCL] Make -cl-ext a driver option. For generic targets such as SPIR-V clang sets all OpenCL extensions/features as supported by default. However concrete targets are unlikely to support all extensions features, which creates a problem when such generic SPIR-V binary is compiled for a specific target later on. To allow compile time diagnostics for unsupported features this flag is now being exposed in the clang driver. Differential Revision: https://reviews.llvm.org/D125243 --- clang/docs/OpenCLSupport.rst | 25 ----------------- clang/docs/UsersManual.rst | 39 ++++++++++++++++++++++++--- clang/include/clang/Driver/Options.td | 15 +++++------ clang/lib/Driver/ToolChains/Clang.cpp | 3 +++ clang/test/Driver/opencl.cl | 3 +++ 5 files changed, 48 insertions(+), 37 deletions(-) diff --git a/clang/docs/OpenCLSupport.rst b/clang/docs/OpenCLSupport.rst index 0a4f81cf5ca11a..6b05b5df1d0b41 100644 --- a/clang/docs/OpenCLSupport.rst +++ b/clang/docs/OpenCLSupport.rst @@ -68,31 +68,6 @@ All the options in this section are frontend-only and therefore if used with regular clang driver they require frontend forwarding, e.g. ``-cc1`` or ``-Xclang``. -.. _opencl_cl_ext: - -.. option:: -cl-ext - -Disables support of OpenCL extensions. All OpenCL targets provide a list -of extensions that they support. Clang allows to amend this using the ``-cl-ext`` -flag with a comma-separated list of extensions prefixed with ``'+'`` or ``'-'``. -The syntax: ``-cl-ext=<(['-'|'+'][,])+>``, where extensions -can be either one of `the OpenCL published extensions -`_ -or any vendor extension. Alternatively, ``'all'`` can be used to enable -or disable all known extensions. - -Example disabling double support for the 64-bit SPIR target: - - .. code-block:: console - - $ clang -cc1 -triple spir64-unknown-unknown -cl-ext=-cl_khr_fp64 test.cl - -Enabling all extensions except double support in R600 AMD GPU can be done using: - - .. code-block:: console - - $ clang -cc1 -triple r600-unknown-unknown -cl-ext=-all,+cl_khr_fp16 test.cl - .. _opencl_finclude_default_header: .. option:: -finclude-default-header diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 8eb676650ec61d..aaba6dffd2e5bf 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -3120,6 +3120,34 @@ compile. More information about the standard types and functions is provided in :ref:`the section on the OpenCL Header `. +.. _opencl_cl_ext: + +.. option:: -cl-ext + +Enables/Disables support of OpenCL extensions and optional features. All OpenCL +targets set a list of extensions that they support. Clang allows to amend this using +the ``-cl-ext`` flag with a comma-separated list of extensions prefixed with +``'+'`` or ``'-'``. The syntax: ``-cl-ext=<(['-'|'+'][,])+>``, where +extensions can be either one of `the OpenCL published extensions +`_ +or any vendor extension. Alternatively, ``'all'`` can be used to enable +or disable all known extensions. + +Example disabling double support for the 64-bit SPIR-V target: + + .. code-block:: console + + $ clang -target spirv64 -cl-ext=-cl_khr_fp64 test.cl + +Enabling all extensions except double support in R600 AMD GPU can be done using: + + .. code-block:: console + + $ clang -target r600 -cl-ext=-all,+cl_khr_fp16 test.cl + +Note that some generic targets e.g. SPIR/SPIR-V enable all extensions/features in +clang by default. + OpenCL Targets -------------- @@ -3167,9 +3195,8 @@ Generic Targets $ clang -target spir test.cl -emit-llvm -c $ clang -target spir64 test.cl -emit-llvm -c - All known OpenCL extensions are supported in the SPIR targets. Clang will - generate SPIR v1.2 compatible IR for OpenCL versions up to 2.0 and SPIR v2.0 - for OpenCL v2.0 or C++ for OpenCL. + Clang will generate SPIR v1.2 compatible IR for OpenCL versions up to 2.0 and + SPIR v2.0 for OpenCL v2.0 or C++ for OpenCL. - x86 is used by some implementations that are x86 compatible and currently remains for backwards compatibility (with older implementations prior to @@ -3183,6 +3210,12 @@ Generic Targets address space map can be added using the :ref:`-ffake-address-space-map ` flag. + All known OpenCL extensions and features are set to supported in the generic targets, + however :option:`-cl-ext` flag can be used to toggle individual extensions and + features. + + + .. _opencl_header: OpenCL Header diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f19734172793ac..9888144b7e9445 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -844,6 +844,7 @@ def bundle : Flag<["-"], "bundle">; def b : JoinedOrSeparate<["-"], "b">, Flags<[LinkerInput, RenderAsInput]>, HelpText<"Pass -b to the linker on AIX (only).">, MetaVarName<"">, Group; +// OpenCL-only Options def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group, Flags<[CC1Option]>, HelpText<"OpenCL only. This option disables all optimizations. By default optimizations are enabled.">; def cl_strict_aliasing : Flag<["-"], "cl-strict-aliasing">, Group, Flags<[CC1Option]>, @@ -883,6 +884,11 @@ def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Grou MarshallingInfoFlag>; def cl_no_stdinc : Flag<["-"], "cl-no-stdinc">, Group, HelpText<"OpenCL only. Disables all standard includes containing non-native compiler types and functions.">; +def cl_ext_EQ : CommaJoined<["-"], "cl-ext=">, Group, Flags<[CC1Option]>, + HelpText<"OpenCL only. Enable or disable OpenCL extensions/optional features. The argument is a comma-separated " + "sequence of one or more extension names, each prefixed by '+' or '-'.">, + MarshallingInfoStringVector>; + def client__name : JoinedOrSeparate<["-"], "client_name">; def combine : Flag<["-", "--"], "combine">, Flags<[NoXarchOption, Unsupported]>; def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">; @@ -6130,15 +6136,6 @@ def disable_pragma_debug_crash : Flag<["-"], "disable-pragma-debug-crash">, } // let Flags = [CC1Option, NoDriverOption] -//===----------------------------------------------------------------------===// -// OpenCL Options -//===----------------------------------------------------------------------===// - -def cl_ext_EQ : CommaJoined<["-"], "cl-ext=">, - HelpText<"OpenCL only. Enable or disable OpenCL extensions. The argument is a comma-separated sequence of one or more extension names, each prefixed by '+' or '-'.">, - Flags<[CC1Option, NoDriverOption]>, - MarshallingInfoStringVector>; - //===----------------------------------------------------------------------===// // CUDA Options //===----------------------------------------------------------------------===// diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index efaedb6b893d67..bc43bceb8ffbbf 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3458,6 +3458,9 @@ static void RenderOpenCLOptions(const ArgList &Args, ArgStringList &CmdArgs, if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) { std::string CLStdStr = std::string("-cl-std=") + A->getValue(); CmdArgs.push_back(Args.MakeArgString(CLStdStr)); + } else if (Arg *A = Args.getLastArg(options::OPT_cl_ext_EQ)) { + std::string CLExtStr = std::string("-cl-ext=") + A->getValue(); + CmdArgs.push_back(Args.MakeArgString(CLExtStr)); } for (const auto &Arg : ForwardedArguments) diff --git a/clang/test/Driver/opencl.cl b/clang/test/Driver/opencl.cl index b6a9b8a324486d..ba1ace5081503b 100644 --- a/clang/test/Driver/opencl.cl +++ b/clang/test/Driver/opencl.cl @@ -21,6 +21,7 @@ // RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s // RUN: %clang -S -### -target spir-unknown-unknown %s 2>&1 | FileCheck --check-prefix=CHECK-W-SPIR-COMPAT %s // RUN: %clang -S -### -target amdgcn-amd-amdhsa-opencl %s 2>&1 | FileCheck --check-prefix=CHECK-NO-W-SPIR-COMPAT %s +// RUN: %clang -S -### -cl-ext="+test_ext" %s 2>&1 | FileCheck --check-prefix=CHECK-EXT %s // CHECK-CL: "-cc1" {{.*}} "-cl-std=CL" // CHECK-CL10: "-cc1" {{.*}} "-cl-std=CL1.0" @@ -50,4 +51,6 @@ // CHECK-W-SPIR-COMPAT: "-Wspir-compat" // CHECK-NO-W-SPIR-COMPAT-NOT: "-Wspir-compat" +// CHECK-EXT: "-cc1" {{.*}} "-cl-ext=+test_ext" + kernel void func(void); From 96323c9f4c10bef5cb5d527970cabc73eab8aa21 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 May 2022 11:55:36 +0100 Subject: [PATCH 336/908] [X86] scalar_widen_div.ll - remove non-generated CHECKs These were left over from when we converted to the update_llc_test_checks script and were just matching some asm/cfi directives - we have CHECK-LABEL to do this properly now. --- llvm/test/CodeGen/X86/scalar_widen_div.ll | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll index 7905babe2ef929..2e99f7838e63d5 100644 --- a/llvm/test/CodeGen/X86/scalar_widen_div.ll +++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll @@ -4,7 +4,6 @@ ; Verify when widening a divide/remainder operation, we only generate a ; divide/rem per element since divide/remainder can trap. -; CHECK: vectorDiv define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind { ; CHECK-LABEL: vectorDiv: ; CHECK: # %bb.0: # %entry @@ -54,7 +53,6 @@ entry: ret void } -; CHECK: test_char_div define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { ; CHECK-LABEL: test_char_div: ; CHECK: # %bb.0: @@ -74,7 +72,6 @@ define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { ret <3 x i8> %div.r } -; CHECK: test_uchar_div define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { ; CHECK-LABEL: test_uchar_div: ; CHECK: # %bb.0: @@ -94,7 +91,6 @@ define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { ret <3 x i8> %div.r } -; CHECK: test_short_div define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { ; CHECK-LABEL: test_short_div: ; CHECK: # %bb.0: @@ -138,7 +134,6 @@ define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { ret <5 x i16> %div.r } -; CHECK: test_ushort_div define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { ; CHECK-LABEL: test_ushort_div: ; CHECK: # %bb.0: @@ -176,7 +171,6 @@ define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { ret <4 x i16> %div.r } -; CHECK: test_uint_div define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { ; CHECK-LABEL: test_uint_div: ; CHECK: # %bb.0: @@ -202,7 +196,6 @@ define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { ret <3 x i32> %div.r } -; CHECK: test_long_div define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { ; CHECK-LABEL: test_long_div: ; CHECK: # %bb.0: @@ -227,7 +220,6 @@ define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { ret <3 x i64> %div.r } -; CHECK: test_ulong_div define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { ; CHECK-LABEL: test_ulong_div: ; CHECK: # %bb.0: @@ -252,7 +244,6 @@ define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { ret <3 x i64> %div.r } -; CHECK: test_char_rem define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) { ; CHECK-LABEL: test_char_rem: ; CHECK: # %bb.0: @@ -286,7 +277,6 @@ define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) { ret <4 x i8> %rem.r } -; CHECK: test_short_rem define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { ; CHECK-LABEL: test_short_rem: ; CHECK: # %bb.0: @@ -330,7 +320,6 @@ define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { ret <5 x i16> %rem.r } -; CHECK: test_uint_rem define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { ; CHECK-LABEL: test_uint_rem: ; CHECK: # %bb.0: @@ -362,7 +351,6 @@ define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { } -; CHECK: test_ulong_rem define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { ; CHECK-LABEL: test_ulong_rem: ; CHECK: # %bb.0: @@ -396,7 +384,6 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { ret <5 x i64> %rem.r } -; CHECK: test_int_div define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { ; CHECK-LABEL: test_int_div: ; CHECK: # %bb.0: # %entry From 8d2918750684fb54ba32efe410553f5e7711f503 Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Tue, 24 May 2022 13:46:08 +0200 Subject: [PATCH 337/908] Minutes for security group sync-ups have moved to Discourse. --- llvm/docs/GettingInvolved.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 05253154c8f349..4441d858894210 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -172,7 +172,7 @@ writing, the following sync-ups are organized: - Monthly, every 3rd Tuesday - `ics `__ `gcal `__ - - `Minutes/docs `__ + - `Minutes/docs `__ * - `CIRCT `__ - Weekly, on Wednesday - From be4eaf10eef76869eb404f68703a1a09b344a7e5 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Tue, 24 May 2022 12:32:48 +0100 Subject: [PATCH 338/908] [Clang][CodeGen] Fix the cmse-clear-return.c test. Caught with D125604. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D126191 --- clang/test/CodeGen/cmse-clear-return.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clang/test/CodeGen/cmse-clear-return.c b/clang/test/CodeGen/cmse-clear-return.c index 791f485859f32e..158b545159427b 100644 --- a/clang/test/CodeGen/cmse-clear-return.c +++ b/clang/test/CodeGen/cmse-clear-return.c @@ -228,11 +228,12 @@ typedef struct __attribute__((packed)) T14 { T14 t14; __attribute__((cmse_nonsecure_entry)) T14 f14(void) { return t14; } -// CHECK: define {{.*}} @f14() -// CHECK: [[R:%.*]] = load -// CHECK-LE-NOPT-NEXT: [[AND:%.+]] = and i32 [[R]], -1 -// CHECK-BE-NOPT-NEXT: [[AND:%.+]] = and i32 [[R]], -1 -// CHECK_NEXT: ret i32 [[AND]] +// CHECK: define {{.*}} @f14() +// CHECK: [[R:%.*]] = load +// CHECK-LE-OPT: ret i32 [[R]] +// CHECK-LE-NOPT: [[AND:%.+]] = and i32 [[R]], -1 +// CHECK-LE-NOPT: ret i32 [[AND]] +// CHECK-BE-OPT: ret i32 [[R]] // LE: 1111..11 1111..11 11111111 11111111 0xfffff3f3/-3085 // BE: 11..1111 11..1111 11111111 11111111 0xcfcfffff/-808452097 From 1e2b746390e3c869d34a8651c35e68e26214e410 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 24 May 2022 14:17:49 +0200 Subject: [PATCH 339/908] Revert "[TableGen] Remove code beads" It is breaking the build with: /build/llvm-toolchain-snapshot-15~++20220524114008+96323c9f4c10/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp:478:10: fatal error: 'M68kGenMCCodeBeads.inc' file not found ^~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Remove the #include causes: error: undefined reference to 'llvm::M68k::getMCInstrBeads(unsigned int)' This reverts commit f50be3d21808ae113c40a68a9ac6581f203d92d2. --- llvm/lib/Target/M68k/CMakeLists.txt | 1 + llvm/utils/TableGen/CMakeLists.txt | 1 + llvm/utils/TableGen/CodeBeadsGen.cpp | 135 ++++++++++++++++++ llvm/utils/TableGen/TableGen.cpp | 6 + llvm/utils/TableGen/TableGenBackends.h | 1 + .../gn/secondary/llvm/utils/TableGen/BUILD.gn | 1 + 6 files changed, 145 insertions(+) create mode 100644 llvm/utils/TableGen/CodeBeadsGen.cpp diff --git a/llvm/lib/Target/M68k/CMakeLists.txt b/llvm/lib/Target/M68k/CMakeLists.txt index 86f785f301aee4..5e398217c181ca 100644 --- a/llvm/lib/Target/M68k/CMakeLists.txt +++ b/llvm/lib/Target/M68k/CMakeLists.txt @@ -7,6 +7,7 @@ tablegen(LLVM M68kGenRegisterInfo.inc -gen-register-info) tablegen(LLVM M68kGenRegisterBank.inc -gen-register-bank) tablegen(LLVM M68kGenInstrInfo.inc -gen-instr-info) tablegen(LLVM M68kGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM M68kGenMCCodeBeads.inc -gen-code-beads) tablegen(LLVM M68kGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM M68kGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM M68kGenDAGISel.inc -gen-dag-isel) diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt index 0974ee8eb244b3..af77744de4ce56 100644 --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -8,6 +8,7 @@ add_tablegen(llvm-tblgen LLVM AsmWriterInst.cpp Attributes.cpp CallingConvEmitter.cpp + CodeBeadsGen.cpp CodeEmitterGen.cpp CodeGenDAGPatterns.cpp CodeGenHwModes.cpp diff --git a/llvm/utils/TableGen/CodeBeadsGen.cpp b/llvm/utils/TableGen/CodeBeadsGen.cpp new file mode 100644 index 00000000000000..512f3541fa1ff4 --- /dev/null +++ b/llvm/utils/TableGen/CodeBeadsGen.cpp @@ -0,0 +1,135 @@ +//===---------- CodeBeadsGen.cpp - Code Beads Generator -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// CodeBeads are data fields carrying auxiliary information for instructions. +// +// Under the hood it's simply implemented by a `bits` field (with arbitrary +// length) in each TG instruction description, where this TG backend will +// generate a helper function to access it. +// +// This is especially useful for expressing variable length encoding +// instructions and complex addressing modes. Since in those cases each +// instruction is usually associated with large amount of information like +// addressing mode details used on a specific operand. Instead of retreating to +// ad-hoc methods to figure out these information when encoding an instruction, +// CodeBeads provide a clean table for the instruction encoder to lookup. +//===----------------------------------------------------------------------===// + +#include "CodeGenInstruction.h" +#include "CodeGenTarget.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include +using namespace llvm; + +namespace { + +class CodeBeadsGen { + RecordKeeper &Records; + +public: + CodeBeadsGen(RecordKeeper &R) : Records(R) {} + void run(raw_ostream &OS); +}; + +void CodeBeadsGen::run(raw_ostream &OS) { + CodeGenTarget Target(Records); + std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); + + // For little-endian instruction bit encodings, reverse the bit order + Target.reverseBitsForLittleEndianEncoding(); + + ArrayRef NumberedInstructions = + Target.getInstructionsByEnumValue(); + + // Emit function declaration + OS << "const uint8_t *llvm::" << Target.getInstNamespace(); + OS << "::getMCInstrBeads(unsigned Opcode) {\n"; + + // First, get the maximum bit length among all beads. And do some + // simple validation + unsigned MaxBitLength = 0; + + for (const CodeGenInstruction *CGI : NumberedInstructions) { + Record *R = CGI->TheDef; + if (!R->getValue("Beads")) + continue; + + BitsInit *BI = R->getValueAsBitsInit("Beads"); + if (!BI->isComplete()) { + PrintFatalError(R->getLoc(), "Record `" + R->getName() + + "', bit field 'Beads' is not complete"); + } + + MaxBitLength = std::max(MaxBitLength, BI->getNumBits()); + } + + // Number of bytes + unsigned Parts = MaxBitLength / 8; + + // Emit instruction base values + OS << " static const uint8_t InstBits[][" << Parts << "] = {\n"; + for (const CodeGenInstruction *CGI : NumberedInstructions) { + Record *R = CGI->TheDef; + + if (R->getValueAsString("Namespace") == "TargetOpcode" || + !R->getValue("Beads")) { + OS << "\t{ 0x0 },\t// "; + if (R->getValueAsBit("isPseudo")) + OS << "(Pseudo) "; + OS << R->getName() << "\n"; + continue; + } + + BitsInit *BI = R->getValueAsBitsInit("Beads"); + + // Convert to byte array: + // [dcba] -> [a][b][c][d] + OS << "\t{"; + for (unsigned p = 0; p < Parts; ++p) { + unsigned Right = 8 * p; + unsigned Left = Right + 8; + + uint8_t Value = 0; + for (unsigned i = Right; i != Left; ++i) { + unsigned Shift = i % 8; + if (auto *B = dyn_cast(BI->getBit(i))) { + Value |= (static_cast(B->getValue()) << Shift); + } else { + PrintFatalError(R->getLoc(), "Record `" + R->getName() + + "', bit 'Beads[" + Twine(i) + + "]' is not defined"); + } + } + + if (p) + OS << ','; + OS << " 0x"; + OS.write_hex(Value); + OS << ""; + } + OS << " }," << '\t' << "// " << R->getName() << "\n"; + } + OS << "\t{ 0x0 }\n };\n"; + + // Emit initial function code + OS << " return InstBits[Opcode];\n" + << "}\n\n"; +} + +} // End anonymous namespace + +namespace llvm { + +void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS) { + emitSourceFileHeader("Machine Code Beads", OS); + CodeBeadsGen(RK).run(OS); +} + +} // namespace llvm diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index e9279a64267005..cde49919f54fe5 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -25,6 +25,7 @@ enum ActionType { NullBackend, DumpJSON, GenEmitter, + GenCodeBeads, GenRegisterInfo, GenInstrInfo, GenInstrDocs, @@ -81,6 +82,8 @@ cl::opt Action( clEnumValN(DumpJSON, "dump-json", "Dump all records as machine-readable JSON"), clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"), + clEnumValN(GenCodeBeads, "gen-code-beads", + "Generate machine code beads"), clEnumValN(GenRegisterInfo, "gen-register-info", "Generate registers and register classes info"), clEnumValN(GenInstrInfo, "gen-instr-info", @@ -161,6 +164,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenEmitter: EmitCodeEmitter(Records, OS); break; + case GenCodeBeads: + EmitCodeBeads(Records, OS); + break; case GenRegisterInfo: EmitRegisterInfo(Records, OS); break; diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h index c53b71cad5996c..224efa98bae16c 100644 --- a/llvm/utils/TableGen/TableGenBackends.h +++ b/llvm/utils/TableGen/TableGenBackends.h @@ -67,6 +67,7 @@ void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS); void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS); void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS); void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS); +void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS); void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS); void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS); void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS); diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index 3ab4abda31971a..412557df6015fb 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -12,6 +12,7 @@ executable("llvm-tblgen") { "Attributes.cpp", "CTagsEmitter.cpp", "CallingConvEmitter.cpp", + "CodeBeadsGen.cpp", "CodeEmitterGen.cpp", "CodeGenDAGPatterns.cpp", "CodeGenHwModes.cpp", From 8b5d9cbbfedc7a91e3b462030e534c5982de674d Mon Sep 17 00:00:00 2001 From: Nabeel Omer Date: Tue, 24 May 2022 13:30:47 +0100 Subject: [PATCH 340/908] [x86][DAG] Unroll vectorized FREMs that will become libcalls Currently, two element vectors produced as the result of a binary op are widened to four element vectors on x86 by DAGTypeLegalizer::WidenVecRes_BinaryCanTrap. If the op still isn't legal after widening it is unrolled into 4 scalar ops in SelectionDAG before being converted into a libcall. This way we end up with 4 libcalls (two of them on known undef elements) instead of the original two libcalls. This patch modifies DAGTypeLegalizer::WidenVectorResult to ensure that if it is known that a binary op will be tunred into a libcall, it is unrolled instead of being widened. This prevents the creation of the extra scalar instructions on known undef elements and (eventually) libacalls with known undef parameters which would otherwise be created when the op gets expanded post widening. Differential Revision: https://reviews.llvm.org/D125988 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 47 ++++++++++++------- llvm/test/CodeGen/X86/frem-libcall.ll | 24 ++-------- 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 2a1f313b8493d2..835d5644cfb4f5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3580,6 +3580,22 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { return; SDValue Res = SDValue(); + + auto unrollExpandedOp = [&]() { + // We're going to widen this vector op to a legal type by padding with undef + // elements. If the wide vector op is eventually going to be expanded to + // scalar libcalls, then unroll into scalar ops now to avoid unnecessary + // libcalls on the undef elements. + EVT VT = N->getValueType(0); + EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && + TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { + Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + return true; + } + return false; + }; + switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -3678,12 +3694,19 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Binary(N); break; + case ISD::FREM: + if (unrollExpandedOp()) + break; + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those + // like any other binary ops. + LLVM_FALLTHROUGH; + case ISD::FADD: case ISD::FMUL: case ISD::FPOW: case ISD::FSUB: case ISD::FDIV: - case ISD::FREM: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: @@ -3766,23 +3789,13 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: - case ISD::FTRUNC: { - // We're going to widen this vector op to a legal type by padding with undef - // elements. If the wide vector op is eventually going to be expanded to - // scalar libcalls, then unroll into scalar ops now to avoid unnecessary - // libcalls on the undef elements. - EVT VT = N->getValueType(0); - EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && - TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { - Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + case ISD::FTRUNC: + if (unrollExpandedOp()) break; - } - } - // If the target has custom/legal support for the scalar FP intrinsic ops - // (they are probably not destined to become libcalls), then widen those like - // any other unary ops. - LLVM_FALLTHROUGH; + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those + // like any other unary ops. + LLVM_FALLTHROUGH; case ISD::ABS: case ISD::BITREVERSE: diff --git a/llvm/test/CodeGen/X86/frem-libcall.ll b/llvm/test/CodeGen/X86/frem-libcall.ll index b48139215d6ff3..1809656301c06e 100644 --- a/llvm/test/CodeGen/X86/frem-libcall.ll +++ b/llvm/test/CodeGen/X86/frem-libcall.ll @@ -1,49 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s -; FIXME: Ensure vectorized FREMs are not widened/unrolled such that they get lowered +; Ensure vectorized FREMs are not widened/unrolled such that they get lowered ; into libcalls on undef elements. define float @frem(<2 x float> %a0, <2 x float> %a1, <2 x float> %a2, <2 x float> *%p3) nounwind { ; CHECK-LABEL: frem: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $80, %rsp +; CHECK-NEXT: subq $64, %rsp ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] -; CHECK-NEXT: callq fmodf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] -; CHECK-NEXT: callq fmodf@PLT -; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: callq fmodf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] ; CHECK-NEXT: callq fmodf@PLT ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] ; CHECK-NEXT: divps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1] ; CHECK-NEXT: addss %xmm1, %xmm0 ; CHECK-NEXT: movlps %xmm1, (%rbx) -; CHECK-NEXT: addq $80, %rsp +; CHECK-NEXT: addq $64, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq %frem = frem <2 x float> %a0, %a1 From f9c806ae5c53c990a935c46ba351cdcfb1271c58 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 24 May 2022 06:01:17 -0700 Subject: [PATCH 341/908] [SLP][NFC]Make isFirstInsertElement a weak strict ordering comparator. To be used correctly in a sort-like function, isFirstInsertElement function must follow weak strict ordering rule, i.e. isFirstInsertElement(IE1, IE1) should return false. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index beb54dc7822caa..a8de0bffe8d527 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6659,6 +6659,8 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU, /// buildvector sequence. static bool isFirstInsertElement(const InsertElementInst *IE1, const InsertElementInst *IE2) { + if (IE1 == IE2) + return false; const auto *I1 = IE1; const auto *I2 = IE2; const InsertElementInst *PrevI1; From f37101983fc9fabbbde4b10f613ed248a424d71a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 23 May 2022 17:51:30 -0400 Subject: [PATCH 342/908] [OpenMP] Add `-Xoffload-linker` to forward input to the device linker We use the clang-linker-wrapper to perform device linking of embedded offloading object files. This is done by generating those jobs inside of the linker-wrapper itself. This patch adds an argument in Clang and the linker-wrapper that allows users to forward input to the device linking phase. This can either be done for every device linker, or for a specific target triple. We use the `-Xoffload-linker ` and the `-Xoffload-linker- ` syntax to accomplish this. Reviewed By: markdewing, tra Differential Revision: https://reviews.llvm.org/D126226 --- clang/include/clang/Driver/Options.td | 3 +++ clang/lib/Driver/ToolChains/Clang.cpp | 14 +++++++++++++ clang/test/Driver/linker-wrapper.c | 12 +++++++++++ clang/test/Driver/openmp-offload-gpu-new.c | 6 ++++++ .../ClangLinkerWrapper.cpp | 20 +++++++++++++++++++ 5 files changed, 55 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9888144b7e9445..55f4f0999c403a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -823,6 +823,9 @@ def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>, def Xlinker : Separate<["-"], "Xlinker">, Flags<[LinkerInput, RenderAsInput]>, HelpText<"Pass to the linker">, MetaVarName<"">, Group; +def Xoffload_linker : JoinedAndSeparate<["-"], "Xoffload-linker">, + HelpText<"Pass to the offload linkers or the ones idenfied by -">, + MetaVarName<" ">, Group; def Xpreprocessor : Separate<["-"], "Xpreprocessor">, Group, HelpText<"Pass to the preprocessor">, MetaVarName<"">; def X_Flag : Flag<["-"], "X">, Group; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index bc43bceb8ffbbf..367001ddd2d393 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8393,6 +8393,20 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput); const auto &LinkCommand = C.getJobs().getJobs().back(); + // Forward -Xoffload-linker<-triple> arguments to the device link job. + for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker)) { + StringRef Val = Arg->getValue(0); + if (Val.empty()) + CmdArgs.push_back( + Args.MakeArgString(Twine("-device-linker=") + Arg->getValue(1))); + else + CmdArgs.push_back(Args.MakeArgString( + "-device-linker=" + + ToolChain::getOpenMPTriple(Val.drop_front()).getTriple() + "=" + + Arg->getValue(1))); + } + Args.ClaimAllArgs(options::OPT_Xoffload_linker); + // Add the linker arguments to be forwarded by the wrapper. CmdArgs.push_back("-linker-path"); CmdArgs.push_back(LinkCommand->getExecutable()); diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 62bbd62fd6c29e..51b5f732236391 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -80,3 +80,15 @@ // CUDA: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_52 {{.*}}.o // CUDA: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o // CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_52,file={{.*}}.out --image=profile=sm_70,file={{.*}}.out + +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ +// RUN: -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --dry-run --host-triple x86_64-unknown-linux-gnu -linker-path \ +// RUN: /usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \ +// RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=LINKER_ARGS + +// LINKER_ARGS: lld{{.*}}-flavor gnu --no-undefined -shared -o {{.*}}.out {{.*}}.o a +// LINKER_ARGS: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o a b diff --git a/clang/test/Driver/openmp-offload-gpu-new.c b/clang/test/Driver/openmp-offload-gpu-new.c index 5ea73ccadc071c..d1bd497f65ff46 100644 --- a/clang/test/Driver/openmp-offload-gpu-new.c +++ b/clang/test/Driver/openmp-offload-gpu-new.c @@ -104,3 +104,9 @@ // RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBRARY %s // CHECK-LTO-LIBRARY: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \ +// RUN: -Xoffload-linker a -Xoffload-linker-nvptx64-nvidia-cuda b -Xoffload-linker-nvptx64 c \ +// RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-XLINKER %s + +// CHECK-XLINKER: -device-linker=a{{.*}}-device-linker=nvptx64-nvidia-cuda=b{{.*}}-device-linker=nvptx64-nvidia-cuda=c{{.*}}-- diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 7b1dd15676649b..a06409fdab610e 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -108,6 +108,12 @@ static cl::list cl::desc("Argument to pass to the ptxas invocation"), cl::cat(ClangLinkerWrapperCategory)); +static cl::list + LinkerArgs("device-linker", cl::ZeroOrMore, + cl::desc("Arguments to pass to the device linker invocation"), + cl::value_desc(" or ="), + cl::cat(ClangLinkerWrapperCategory)); + static cl::opt Verbose("v", cl::ZeroOrMore, cl::desc("Verbose output from tools"), cl::init(false), @@ -226,6 +232,17 @@ void printCommands(ArrayRef CmdArgs) { llvm::errs() << *IC << (std::next(IC) != IE ? " " : "\n"); } +// Forward user requested arguments to the device linking job. +void renderXLinkerArgs(SmallVectorImpl &Args, StringRef Triple) { + for (StringRef Arg : LinkerArgs) { + auto TripleAndValue = Arg.split('='); + if (TripleAndValue.second.empty()) + Args.push_back(TripleAndValue.first); + else if (TripleAndValue.first == Triple) + Args.push_back(TripleAndValue.second); + } +} + std::string getMainExecutable(const char *Name) { void *Ptr = (void *)(intptr_t)&getMainExecutable; auto COWPath = sys::fs::getMainExecutable(Name, Ptr); @@ -531,6 +548,7 @@ Expected link(ArrayRef InputFiles, Triple TheTriple, for (StringRef Input : InputFiles) CmdArgs.push_back(Input); + renderXLinkerArgs(CmdArgs, TheTriple.getTriple()); if (Error Err = executeCommands(*NvlinkPath, CmdArgs)) return std::move(Err); @@ -599,6 +617,7 @@ Expected link(ArrayRef InputFiles, Triple TheTriple, for (StringRef Input : InputFiles) CmdArgs.push_back(Input); + renderXLinkerArgs(CmdArgs, TheTriple.getTriple()); if (Error Err = executeCommands(*LLDPath, CmdArgs)) return std::move(Err); @@ -676,6 +695,7 @@ Expected link(ArrayRef InputFiles, Triple TheTriple, for (StringRef Input : InputFiles) CmdArgs.push_back(Input); + renderXLinkerArgs(CmdArgs, TheTriple.getTriple()); if (Error Err = executeCommands(LinkerUserPath, CmdArgs)) return std::move(Err); From 0360b9f1599b0b13f164d8170a619b19f9cb8bb4 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 18 May 2022 19:24:07 +0200 Subject: [PATCH 343/908] [pseudo] (trivial) bracket-matching Error-tolerant bracket matching enables our error-tolerant parsing strategies. The implementation here is *not* yet error tolerant: this patch sets up the APIs and plumbing, and describes the planned approach. Differential Revision: https://reviews.llvm.org/D125911 --- .../pseudo/benchmarks/Benchmark.cpp | 15 +- .../pseudo/include/clang-pseudo/Bracket.h | 41 +++++ .../pseudo/include/clang-pseudo/Token.h | 11 +- clang-tools-extra/pseudo/lib/Bracket.cpp | 155 ++++++++++++++++++ clang-tools-extra/pseudo/lib/CMakeLists.txt | 1 + clang-tools-extra/pseudo/tool/ClangPseudo.cpp | 2 + .../pseudo/unittests/BracketTest.cpp | 117 +++++++++++++ .../pseudo/unittests/CMakeLists.txt | 2 + 8 files changed, 342 insertions(+), 2 deletions(-) create mode 100644 clang-tools-extra/pseudo/include/clang-pseudo/Bracket.h create mode 100644 clang-tools-extra/pseudo/lib/Bracket.cpp create mode 100644 clang-tools-extra/pseudo/unittests/BracketTest.cpp diff --git a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp index fb028be0c7abd9..b10ff3a175bd20 100644 --- a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp +++ b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp @@ -20,6 +20,7 @@ //===----------------------------------------------------------------------===// #include "benchmark/benchmark.h" +#include "clang-pseudo/Bracket.h" #include "clang-pseudo/DirectiveTree.h" #include "clang-pseudo/Forest.h" #include "clang-pseudo/GLR.h" @@ -89,7 +90,9 @@ TokenStream lexAndPreprocess() { chooseConditionalBranches(DirectiveStructure, RawStream); TokenStream Cook = cook(DirectiveStructure.stripDirectives(RawStream), LangOpts); - return stripComments(Cook); + auto Stream = stripComments(Cook); + pairBrackets(Stream); + return Stream; } static void lex(benchmark::State &State) { @@ -101,6 +104,16 @@ static void lex(benchmark::State &State) { } BENCHMARK(lex); +static void pairBrackets(benchmark::State &State) { + clang::LangOptions LangOpts = genericLangOpts(); + auto Stream = clang::pseudo::lex(*SourceText, LangOpts); + for (auto _ : State) + pairBrackets(Stream); + State.SetBytesProcessed(static_cast(State.iterations()) * + SourceText->size()); +} +BENCHMARK(pairBrackets); + static void preprocess(benchmark::State &State) { clang::LangOptions LangOpts = genericLangOpts(); TokenStream RawStream = clang::pseudo::lex(*SourceText, LangOpts); diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Bracket.h b/clang-tools-extra/pseudo/include/clang-pseudo/Bracket.h new file mode 100644 index 00000000000000..268cfff1ab07ab --- /dev/null +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Bracket.h @@ -0,0 +1,41 @@ +//===--- Bracket.h - Analyze bracket structure --------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Bracket structure (particularly braces) is key to isolating broken regions +// of code and preventing parsing from going "off the rails". +// +// For correct C++ code, brackets are well-nested and identifying pairs and +// therefore blocks is simple. In broken code, brackets are not properly nested. +// We cannot match them all and must choose which pairs to form. +// +// Rather than have the grammar-based parser make these choices, we pair +// brackets up-front based on textual features like indentation. +// This mirrors the way humans read code, and so is likely to produce the +// "correct" interpretation of broken code. +// +// This interpretation then guides the parse: a rule containing a bracket pair +// must match against paired bracket tokens. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_PSEUDO_BRACKET_H +#define CLANG_PSEUDO_BRACKET_H + +#include "clang-pseudo/Token.h" + +namespace clang { +namespace pseudo { + +/// Identifies bracket token in the stream which should be paired. +/// Sets Token::Pair accordingly. +void pairBrackets(TokenStream &); + +} // namespace pseudo +} // namespace clang + +#endif diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h index 1750f547abd154..b558891f0a8628 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h @@ -88,11 +88,15 @@ struct Token { while (T->Kind == tok::comment); return *T; } + /// Returns the bracket paired with this one, if any. + const Token *pair() const { return Pair == 0 ? nullptr : this + Pair; } /// The type of token as determined by clang's lexer. clang::tok::TokenKind Kind = clang::tok::unknown; + /// If this token is a paired bracket, the offset of the pair in the stream. + int32_t Pair = 0; }; -static_assert(sizeof(Token) <= sizeof(char *) + 16, "Careful with layout!"); +static_assert(sizeof(Token) <= sizeof(char *) + 20, "Careful with layout!"); llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Token &); /// A half-open range of tokens within a stream. @@ -155,6 +159,11 @@ class TokenStream { return tokens().slice(R.Begin, R.End - R.Begin); } + MutableArrayRef tokens() { + assert(isFinalized()); + return Tokens; + } + /// May return the end sentinel if the stream is empty. const Token &front() const { assert(isFinalized()); diff --git a/clang-tools-extra/pseudo/lib/Bracket.cpp b/clang-tools-extra/pseudo/lib/Bracket.cpp new file mode 100644 index 00000000000000..07836146ad8a58 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/Bracket.cpp @@ -0,0 +1,155 @@ +//===--- Bracket.cpp - Analyze bracket structure --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The basic phases of our bracket matching are: +// +// 1) A simple "greedy" match looks for well-nested subsequences. +// +// We can't fully trust the results of this, consider: +// while (1) { // A +// if (true) { // B +// break; +// } // C +// Greedy matching will match B=C, when we should at least consider A=C. +// However for the correct parts of the file, the greedy match gives the +// right answer. It produces useful candidates for phase 2. +// +// simplePairBrackets handles this step. +// +// 2) Try to identify places where formatting indicates that the greedy match +// was correct. This is similar to how a human would scan a large file. +// +// For example: +// int foo() { // X +// // indented +// while (1) { +// // valid code +// } +// return bar(42); +// } // Y +// We can "verify" that X..Y looks like a braced block, and the greedy match +// tells us that substring is perfectly nested. +// We trust the pairings of those brackets and don't examine them further. +// However in the first example above, we do not trust B=C because the brace +// indentation is suspect. +// +// FIXME: implement this step. +// +// 3) Run full best-match optimization on remaining brackets. +// +// Conceptually, this considers all possible matchings and optimizes cost: +// - there is a cost for failing to match a bracket +// - there is a variable cost for matching two brackets. +// (For example if brace indentation doesn't match). +// +// In the first example we have three alternatives, and they are ranked: +// 1) A=C, skip B +// 2) B=C, skip A +// 3) skip A, skip B, skip C +// The cost for skipping a bracket is high, so option 3 is worst. +// B=C costs more than A=C, because the indentation doesn't match. +// +// It would be correct to run this step alone, but it would be too slow. +// The implementation is dynamic programming in N^3 space and N^2 time. +// Having earlier steps filter out most brackets is key to performance. +// +// FIXME: implement this step. +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/Bracket.h" + +namespace clang { +namespace pseudo { +namespace { + +struct Bracket { + using Index = unsigned; + constexpr static Index None = -1; + + enum BracketKind : char { Paren, Brace, Square } Kind; + enum Direction : bool { Open, Close } Dir; + unsigned Line; + unsigned Indent; + Token::Index Tok; + Bracket::Index Pair = None; +}; + +// Find brackets in the stream and convert to Bracket struct. +std::vector findBrackets(const TokenStream &Stream) { + std::vector Brackets; + auto Add = [&](const pseudo::Token &Tok, Bracket::BracketKind K, + Bracket::Direction D) { + Brackets.push_back( + {K, D, Tok.Line, Tok.Indent, Stream.index(Tok), Bracket::None}); + }; + for (const auto &Tok : Stream.tokens()) { + switch (Tok.Kind) { + case clang::tok::l_paren: + Add(Tok, Bracket::Paren, Bracket::Open); + break; + case clang::tok::r_paren: + Add(Tok, Bracket::Paren, Bracket::Close); + break; + case clang::tok::l_brace: + Add(Tok, Bracket::Brace, Bracket::Open); + break; + case clang::tok::r_brace: + Add(Tok, Bracket::Brace, Bracket::Close); + break; + case clang::tok::l_square: + Add(Tok, Bracket::Square, Bracket::Open); + break; + case clang::tok::r_square: + Add(Tok, Bracket::Square, Bracket::Close); + break; + default: + break; + } + } + return Brackets; +} + +// Write the bracket pairings from Brackets back to Tokens. +void applyPairings(ArrayRef Brackets, TokenStream &Tokens) { + for (const auto &B : Brackets) + Tokens.tokens()[B.Tok].Pair = + (B.Pair == Bracket::None) ? 0 : (int32_t)Brackets[B.Pair].Tok - B.Tok; +} + +// Find perfect pairings (ignoring whitespace) via greedy algorithm. +// This means two brackets are paired if they match and the brackets between +// them nest perfectly, with no skipped or crossed brackets. +void simplePairBrackets(MutableArrayRef Brackets) { + std::vector Stack; + for (unsigned I = 0; I < Brackets.size(); ++I) { + if (Brackets[I].Dir == Bracket::Open) { + Stack.push_back(I); + } else if (!Stack.empty() && + Brackets[Stack.back()].Kind == Brackets[I].Kind) { + Brackets[Stack.back()].Pair = I; + Brackets[I].Pair = Stack.back(); + Stack.pop_back(); + } else { + // Unpaired closer, no brackets on stack are part of a perfect sequence. + Stack.clear(); + } + } + // Any remaining brackets on the stack stay unpaired. +} + +} // namespace + +void pairBrackets(TokenStream &Stream) { + auto Brackets = findBrackets(Stream); + simplePairBrackets(Brackets); + applyPairings(Brackets, Stream); +} + +} // namespace pseudo +} // namespace clang diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt index b11d2dd12e2801..6dc8ed5b5e7a26 100644 --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_LINK_COMPONENTS Support) add_clang_library(clangPseudo + Bracket.cpp DirectiveTree.cpp Forest.cpp GLR.cpp diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp index 5a6956df1f708f..1d3ab19b3c09d1 100644 --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "clang-pseudo/Bracket.h" #include "clang-pseudo/DirectiveTree.h" #include "clang-pseudo/GLR.h" #include "clang-pseudo/Grammar.h" @@ -89,6 +90,7 @@ int main(int argc, char *argv[]) { llvm::outs() << DirectiveStructure; ParseableStream = clang::pseudo::stripComments(cook(*Stream, LangOpts)); + pairBrackets(*ParseableStream); } if (Grammar.getNumOccurrences()) { diff --git a/clang-tools-extra/pseudo/unittests/BracketTest.cpp b/clang-tools-extra/pseudo/unittests/BracketTest.cpp new file mode 100644 index 00000000000000..1247ddbd49a1db --- /dev/null +++ b/clang-tools-extra/pseudo/unittests/BracketTest.cpp @@ -0,0 +1,117 @@ +//===--- BracketTest.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/Bracket.h" +#include "clang-pseudo/Token.h" +#include "clang/Basic/LangOptions.h" +#include "llvm/Testing/Support/Annotations.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace pseudo { + +// Return a version of Code with each paired bracket marked with ^. +std::string decorate(llvm::StringRef Code, const TokenStream &Stream) { + std::string Result; + const char *Pos = Code.data(); + for (const Token &Tok : Stream.tokens()) { + if (Tok.Pair == 0) + continue; + const char *NewPos = Tok.text().begin(); + assert(NewPos >= Code.begin() && NewPos < Code.end()); + Result.append(Pos, NewPos - Pos); + Result.push_back('^'); + Pos = NewPos; + } + Result.append(Pos, Code.end() - Pos); + return Result; +} + +// Checks that the brackets matched in Stream are those annotated in MarkedCode. +void verifyMatchedSet(llvm::StringRef Code, llvm::StringRef MarkedCode, + const TokenStream &Stream) { + EXPECT_EQ(MarkedCode, decorate(Code, Stream)); +} + +// Checks that paired brackets within the stream nest properly. +void verifyNesting(const TokenStream &Stream) { + std::vector Stack; + for (const auto &Tok : Stream.tokens()) { + if (Tok.Pair > 0) + Stack.push_back(&Tok); + else if (Tok.Pair < 0) { + ASSERT_FALSE(Stack.empty()) << Tok; + ASSERT_EQ(Stack.back(), Tok.pair()) + << *Stack.back() << " != " << *Tok.pair() << " = pair of " << Tok; + Stack.pop_back(); + } + } + ASSERT_THAT(Stack, testing::IsEmpty()); +} + +// Checks that ( pairs with a ) on its right, etc. +void verifyMatchKind(const TokenStream &Stream) { + for (const auto &Tok : Stream.tokens()) { + if (Tok.Pair == 0) + continue; + auto Want = [&]() -> std::pair { + switch (Tok.Kind) { + case tok::l_paren: + return {true, tok::r_paren}; + case tok::r_paren: + return {false, tok::l_paren}; + case tok::l_brace: + return {true, tok::r_brace}; + case tok::r_brace: + return {false, tok::l_brace}; + case tok::l_square: + return {true, tok::r_square}; + case tok::r_square: + return {false, tok::l_square}; + default: + ADD_FAILURE() << "Paired non-bracket " << Tok; + return {false, tok::eof}; + } + }(); + EXPECT_EQ(Tok.Pair > 0, Want.first) << Tok; + EXPECT_EQ(Tok.pair()->Kind, Want.second) << Tok; + } +} + +// Verifies an expected bracket pairing like: +// ^( [ ^) +// The input is annotated code, with the brackets expected to be matched marked. +// +// The input doesn't specify which bracket matches with which, but we verify: +// - exactly the marked subset are paired +// - ( is paired to a later ), etc +// - brackets properly nest +// This uniquely determines the bracket structure, so we indirectly verify it. +// If particular tests should emphasize which brackets are paired, use comments. +void verifyBrackets(llvm::StringRef MarkedCode) { + SCOPED_TRACE(MarkedCode); + llvm::Annotations A(MarkedCode); + std::string Code = A.code().str(); + LangOptions LangOpts; + auto Stream = lex(Code, LangOpts); + pairBrackets(Stream); + + verifyMatchedSet(Code, MarkedCode, Stream); + verifyNesting(Stream); + verifyMatchKind(Stream); +} + +TEST(Bracket, SimplePair) { + verifyBrackets("^{ ^[ ^( ^) ^( ^) ^] ^}"); + verifyBrackets(") ^{ ^[ ^] ^} ("); + verifyBrackets("{ [ ( ] }"); // FIXME +} + +} // namespace pseudo +} // namespace clang diff --git a/clang-tools-extra/pseudo/unittests/CMakeLists.txt b/clang-tools-extra/pseudo/unittests/CMakeLists.txt index aba8a166748994..73b13984d93e65 100644 --- a/clang-tools-extra/pseudo/unittests/CMakeLists.txt +++ b/clang-tools-extra/pseudo/unittests/CMakeLists.txt @@ -1,9 +1,11 @@ set(LLVM_LINK_COMPONENTS Support + TestingSupport ) add_custom_target(ClangPseudoUnitTests) add_unittest(ClangPseudoUnitTests ClangPseudoTests + BracketTest.cpp DirectiveTreeTest.cpp ForestTest.cpp GLRTest.cpp From e7ddb4f6b4e0b580f7a6f2fdc654aff0a82cffa9 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 15:17:19 +0200 Subject: [PATCH 344/908] [InstCombine] Add tests for freeze of load with metadata (NFC) --- llvm/test/Transforms/InstCombine/freeze.ll | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll index b17b32addd3b18..cda37fa633d9b1 100644 --- a/llvm/test/Transforms/InstCombine/freeze.ll +++ b/llvm/test/Transforms/InstCombine/freeze.ll @@ -821,5 +821,37 @@ exit: ; preds = %loop ret void } +define i8* @freeze_load_noundef(i8** %ptr) { +; CHECK-LABEL: @freeze_load_noundef( +; CHECK-NEXT: [[P:%.*]] = load i8*, i8** [[PTR:%.*]], align 8, !noundef !0 +; CHECK-NEXT: ret i8* [[P]] +; + %p = load i8*, i8** %ptr, !noundef !0 + %p.fr = freeze i8* %p + ret i8* %p.fr +} +define i8* @freeze_load_dereferenceable(i8** %ptr) { +; CHECK-LABEL: @freeze_load_dereferenceable( +; CHECK-NEXT: [[P:%.*]] = load i8*, i8** [[PTR:%.*]], align 8, !dereferenceable !1 +; CHECK-NEXT: [[P_FR:%.*]] = freeze i8* [[P]] +; CHECK-NEXT: ret i8* [[P_FR]] +; + %p = load i8*, i8** %ptr, !dereferenceable !1 + %p.fr = freeze i8* %p + ret i8* %p.fr +} + +define i8* @freeze_load_dereferenceable_or_null(i8** %ptr) { +; CHECK-LABEL: @freeze_load_dereferenceable_or_null( +; CHECK-NEXT: [[P:%.*]] = load i8*, i8** [[PTR:%.*]], align 8, !dereferenceable_or_null !1 +; CHECK-NEXT: [[P_FR:%.*]] = freeze i8* [[P]] +; CHECK-NEXT: ret i8* [[P_FR]] +; + %p = load i8*, i8** %ptr, !dereferenceable_or_null !1 + %p.fr = freeze i8* %p + ret i8* %p.fr +} +!0 = !{} +!1 = !{i64 4} From b2a13d3e2ddb905a7340dae42a08b9fc985b1054 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 15:48:28 +0200 Subject: [PATCH 345/908] [InstCombine] Use IRBuilder in freeze pushing transform (PR55619) Use IRBuilder so that the newly created freeze instructions automatically gets inserted back into the IC worklist. The changed worklist processing order leads to some cosmetic differences in tests. Fixes https://github.com/llvm/llvm-project/issues/55619. --- .../InstCombine/InstructionCombining.cpp | 4 +- llvm/test/Transforms/InstCombine/freeze.ll | 2 +- llvm/test/Transforms/PGOProfile/chr.ll | 60 +++++++++---------- .../X86/vector-reductions-logical.ll | 34 +++++------ 4 files changed, 50 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4fc6b4de976b40..cd1e4f409d9e00 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3779,11 +3779,11 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { if (!MaybePoisonOperand) return OrigOp; - auto *FrozenMaybePoisonOperand = new FreezeInst( + Builder.SetInsertPoint(OrigOpInst); + auto *FrozenMaybePoisonOperand = Builder.CreateFreeze( MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr"); replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand); - FrozenMaybePoisonOperand->insertBefore(OrigOpInst); return OrigOp; } diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll index cda37fa633d9b1..43b4c4a90e49f4 100644 --- a/llvm/test/Transforms/InstCombine/freeze.ll +++ b/llvm/test/Transforms/InstCombine/freeze.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes=instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s define i32 @fold(i32 %x) { ; CHECK-LABEL: @fold( diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll index e2a2e38616609b..18fd7c0182a589 100644 --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -387,8 +387,8 @@ define i32 @test_chr_4(i32* %i, i32 %sum0) !prof !14 { ; CHECK-LABEL: @test_chr_4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[DOTFR2:%.*]] = freeze i32 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[DOTFR2]], 3 +; CHECK-NEXT: [[DOTFR1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[DOTFR1]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 ; CHECK-NEXT: br i1 [[TMP2]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: common.ret: @@ -398,11 +398,11 @@ define i32 @test_chr_4(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[TMP3]] = add i32 [[SUM0:%.*]], 85 ; CHECK-NEXT: br label [[COMMON_RET:%.*]] ; CHECK: entry.split.nonchr: -; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[DOTFR2]], 1 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SUM0]], 42 -; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[DOTNOT]], i32 [[SUM0]], i32 [[TMP5]], !prof [[PROF16]] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[DOTFR2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 42 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[DOTFR1]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[DOTNOT]], i32 [[SUM0]], i32 [[TMP4]], !prof [[PROF16]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[DOTFR1]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM1_NONCHR]], 43 ; CHECK-NEXT: [[SUM2_NONCHR]] = select i1 [[TMP7]], i32 [[SUM1_NONCHR]], i32 [[TMP8]], !prof [[PROF16]] @@ -450,8 +450,8 @@ define i32 @test_chr_5(i32* %i, i32 %sum0) !prof !14 { ; CHECK-LABEL: @test_chr_5( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[DOTFR3:%.*]] = freeze i32 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[DOTFR3]], 15 +; CHECK-NEXT: [[DOTFR1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[DOTFR1]], 15 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 15 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: bb0: @@ -459,21 +459,21 @@ define i32 @test_chr_5(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 173 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: entry.split.nonchr: -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[DOTFR3]], 255 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[DOTFR1]], 255 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0 ; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof [[PROF16]] ; CHECK: bb0.nonchr: -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[DOTFR3]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[DOTFR1]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM0]], 42 ; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP7]], i32 [[SUM0]], i32 [[TMP8]], !prof [[PROF16]] -; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[DOTFR3]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[DOTFR1]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[SUM1_NONCHR]], 43 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP10]], i32 [[SUM1_NONCHR]], i32 [[TMP11]], !prof [[PROF16]] -; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[DOTFR3]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[DOTFR1]], 4 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[DOTFR3]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[DOTFR1]], 8 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 ; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP15]], i32 44, i32 88 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] @@ -548,10 +548,10 @@ define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 { ; CHECK-LABEL: @test_chr_5_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[DOTFR3:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[DOTFR1:%.*]] = freeze i32 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUM0:%.*]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[DOTFR3]], 11 +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[DOTFR1]], 11 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 11 ; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]] ; CHECK-NEXT: br i1 [[TMP5]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] @@ -560,21 +560,21 @@ define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SUM0]], 173 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: entry.split.nonchr: -; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[DOTFR3]], 255 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[DOTFR1]], 255 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP8]], 0 ; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof [[PROF16]] ; CHECK: bb0.nonchr: -; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[DOTFR3]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[DOTFR1]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[SUM0]], 42 ; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP10]], i32 [[SUM0]], i32 [[TMP11]], !prof [[PROF16]] -; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[DOTFR3]], 2 +; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[DOTFR1]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[SUM1_NONCHR]], 43 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP13]], i32 [[SUM1_NONCHR]], i32 [[TMP14]], !prof [[PROF16]] ; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[SUM0]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[DOTFR3]], 8 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[DOTFR1]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP17]], 0 ; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP18]], i32 44, i32 88 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] @@ -1286,8 +1286,8 @@ define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Z_FR:%.*]] = freeze i32 [[Z:%.*]] ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[V1_NOT:%.*]] = icmp eq i32 [[Z_FR]], 1 -; CHECK-NEXT: br i1 [[V1_NOT]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] +; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[Z_FR]], 1 +; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: entry.split.nonchr: ; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z_FR]], 0 ; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]] @@ -1659,8 +1659,6 @@ define i32 @test_chr_18(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP2:%.*]], [[BB2:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[LI:%.*]] = load i32, i32* [[I:%.*]], align 4 ; CHECK-NEXT: [[LI_FR:%.*]] = freeze i32 [[LI]] -; CHECK-NEXT: [[A1:%.*]] = and i32 [[LI_FR]], 1 -; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i32 [[A1]], 0 ; CHECK-NEXT: [[SUM1:%.*]] = add i32 [[SUM0:%.*]], 42 ; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[LI_FR]], 5 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 5 @@ -1673,6 +1671,8 @@ define i32 @test_chr_18(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[CMP4_NONCHR:%.*]] = icmp eq i32 [[A4_NONCHR]], 0 ; CHECK-NEXT: br i1 [[CMP4_NONCHR]], label [[BB2]], label [[BB1_NONCHR:%.*]], !prof [[PROF16]] ; CHECK: bb1.nonchr: +; CHECK-NEXT: [[A1:%.*]] = and i32 [[LI_FR]], 1 +; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i32 [[A1]], 0 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[CMP1_NOT]], i32 [[SUM0]], i32 [[SUM1]], !prof [[PROF16]] ; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], 44 ; CHECK-NEXT: br label [[BB2]] @@ -1746,8 +1746,8 @@ define i32 @test_chr_19(i32* %i, i32 %sum0) !prof !14 { ; CHECK-LABEL: @test_chr_19( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[DOTFR2:%.*]] = freeze i32 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[DOTFR2]], 9 +; CHECK-NEXT: [[DOTFR1:%.*]] = freeze i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[DOTFR1]], 9 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: bb0: @@ -1755,15 +1755,15 @@ define i32 @test_chr_19(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 173 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: entry.split.nonchr: -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[DOTFR2]], 255 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[DOTFR1]], 255 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0 ; CHECK-NEXT: br i1 [[DOTNOT]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof [[PROF16]] ; CHECK: bb0.nonchr: -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[DOTFR2]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[DOTFR1]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM0]], 85 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP7]], i32 [[SUM0]], i32 [[TMP8]], !prof [[PROF16]] -; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[DOTFR2]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[DOTFR1]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 ; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP10]], i32 44, i32 88 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] @@ -1853,9 +1853,9 @@ define i32 @test_chr_20(i32* %i, i32 %sum0, i1 %j) !prof !14 { ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB4]] ; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0]], 43 ; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0_FR]], 2 ; CHECK-NEXT: [[V4_NOT:%.*]] = icmp eq i32 [[V3]], 0 -; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0]], 43 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4_NOT]], i32 [[SUM0]], i32 [[V8]], !prof [[PROF16]] ; CHECK-NEXT: [[V6_NONCHR:%.*]] = and i32 [[I0_FR]], 4 ; CHECK-NEXT: [[V5_NONCHR:%.*]] = icmp eq i32 [[V6_NONCHR]], 0 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index 00ac1265b0629d..2a01774b3ad5da 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -17,7 +17,7 @@ define float @test_merge_allof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i64 0 ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP5]], float 0.000000e+00, float [[ADD]] ; CHECK-NEXT: br label [[RETURN]] @@ -90,14 +90,14 @@ return: define float @test_merge_anyof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_merge_anyof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[T_FR7:%.*]] = freeze <4 x float> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR7]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[T_FR7]], +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[T_FR]], ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i1> [[TMP1]], [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP3]], 0 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR7]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR7]] +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP4]], i64 0 ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[ADD]], float 0.000000e+00 ; CHECK-NEXT: ret float [[RETVAL_0]] @@ -177,7 +177,7 @@ define float @test_separate_allof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i64 0 ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP5]], float 0.000000e+00, float [[ADD]] ; CHECK-NEXT: br label [[RETURN]] @@ -261,11 +261,11 @@ define float @test_separate_anyof_v4sf(<4 x float> %t) { ; CHECK: if.end: ; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[T_FR]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 -; CHECK-NEXT: [[DOTNOT7:%.*]] = icmp eq i4 [[TMP3]], 0 +; CHECK-NEXT: [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP4]], i64 0 -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[DOTNOT7]], float [[ADD]], float 0.000000e+00 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[DOTNOT6]], float [[ADD]], float 0.000000e+00 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[IF_END]] ] @@ -412,13 +412,13 @@ return: define float @test_merge_anyof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_merge_anyof_v4si( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[T_FR7:%.*]] = freeze <4 x i32> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i32> [[T_FR7]], +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i32> [[T_FR]], ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i1> [[TMP1]] to i4 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP2]], 0 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR7]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR7]] +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[CONV]], float 0.000000e+00 @@ -568,11 +568,11 @@ define i32 @test_separate_anyof_v4si(<4 x i32> %t) { ; CHECK: if.end: ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[T_FR]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 -; CHECK-NEXT: [[DOTNOT7:%.*]] = icmp eq i4 [[TMP3]], 0 +; CHECK-NEXT: [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <4 x i32> [[SHIFT]], [[T_FR]] +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <4 x i32> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP4]], i64 0 -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[DOTNOT7]], i32 [[ADD]], i32 0 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[DOTNOT6]], i32 [[ADD]], i32 0 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[IF_END]] ] From a9a6e200125e39c7cf1d3d0724bbcebbd17212aa Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 24 May 2022 09:58:04 -0400 Subject: [PATCH 346/908] [libc++] Rename the generic-singlethreaded CI job to generic-no-threads for consistency --- ...{Generic-singlethreaded.cmake => Generic-no-threads.cmake} | 0 libcxx/utils/ci/buildkite-pipeline.yml | 2 +- libcxx/utils/ci/run-buildbot | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename libcxx/cmake/caches/{Generic-singlethreaded.cmake => Generic-no-threads.cmake} (100%) diff --git a/libcxx/cmake/caches/Generic-singlethreaded.cmake b/libcxx/cmake/caches/Generic-no-threads.cmake similarity index 100% rename from libcxx/cmake/caches/Generic-singlethreaded.cmake rename to libcxx/cmake/caches/Generic-no-threads.cmake diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index cf011d123fe6f5..d3db1e125c5cb2 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -410,7 +410,7 @@ steps: - group: "Parts disabled" steps: - label: "No threads" - command: "libcxx/utils/ci/run-buildbot generic-singlethreaded" + command: "libcxx/utils/ci/run-buildbot generic-no-threads" artifact_paths: - "**/test-results.xml" - "**/*.abilist" diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index bc57927e4aef17..0732ffc2e6f50a 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -342,9 +342,9 @@ generic-with_llvm_unwinder) -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" check-runtimes ;; -generic-singlethreaded) +generic-no-threads) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-singlethreaded.cmake" \ + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-threads.cmake" \ -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" check-runtimes ;; From 3eab6c5911415ce1f9b72738025f8f5051b55c34 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 16:09:26 +0200 Subject: [PATCH 347/908] [InstCombine] Add test for GEP difference with bitcasts (NFC) --- llvm/test/Transforms/InstCombine/sub-gep.ll | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index 4450459329dc2f..b198e1423ccd29 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -369,3 +369,22 @@ define i64 @gep_diff_second_inbounds(i8* %foo, i64 %i, i64 %j) { %sub = sub i64 %cast1, %cast2 ret i64 %sub } + +define i64 @gep_diff_with_bitcast(i64* %p, i64 %idx) { +; CHECK-LABEL: @gep_diff_with_bitcast( +; CHECK-NEXT: [[I0:%.*]] = bitcast i64* [[P:%.*]] to [4 x i64]* +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[I0]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[I3:%.*]] = ptrtoint [4 x i64]* [[I1]] to i64 +; CHECK-NEXT: [[I4:%.*]] = ptrtoint i64* [[P]] to i64 +; CHECK-NEXT: [[I5:%.*]] = sub nuw i64 [[I3]], [[I4]] +; CHECK-NEXT: [[I6:%.*]] = lshr i64 [[I5]], 5 +; CHECK-NEXT: ret i64 [[I6]] +; + %i0 = bitcast i64* %p to [4 x i64]* + %i1 = getelementptr inbounds [4 x i64], [4 x i64]* %i0, i64 %idx + %i3 = ptrtoint [4 x i64]* %i1 to i64 + %i4 = ptrtoint i64* %p to i64 + %i5 = sub nuw i64 %i3, %i4 + %i6 = lshr i64 %i5, 5 + ret i64 %i6 +} From e6e0eb3bc89d054889c1c663ccf600581ed77c7e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 16:08:21 +0200 Subject: [PATCH 348/908] [InstCombine] Strip bitcasts in GEP diff fold Bitcasts were stripped in one case, but not the other. Of course, this no longer really matters with opaque pointers, but as I went through the trouble of tracking this down, we may as well remove one typed vs opaque pointer optimization discrepancy. --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 3 ++- llvm/test/Transforms/InstCombine/getelementptr.ll | 7 +++---- llvm/test/Transforms/InstCombine/sub-gep.ll | 8 +------- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 09d2e1726b72f1..486641009ecb24 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1688,7 +1688,8 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, // Require at least one GEP with a common base pointer on both sides. if (auto *LHSGEP = dyn_cast(LHS)) { // (gep X, ...) - X - if (LHSGEP->getOperand(0) == RHS) { + if (LHSGEP->getOperand(0)->stripPointerCasts() == + RHS->stripPointerCasts()) { GEP1 = LHSGEP; } else if (auto *RHSGEP = dyn_cast(RHS)) { // (gep X, ...) - (gep X, ...) diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index d5ffd9bd5769ac..b0b964c151eb1f 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -552,7 +552,7 @@ define i32 @test27(%struct.compat_siginfo* %to, %struct.siginfo_t* %from) { ; CHECK-NEXT: [[T349:%.*]] = getelementptr [[STRUCT_SIGINFO_T:%.*]], %struct.siginfo_t* [[T344]], i64 0, i32 3, i32 0, i32 3, i32 0 ; CHECK-NEXT: [[T349350:%.*]] = bitcast i8** [[T349]] to i32* ; CHECK-NEXT: [[T351:%.*]] = load i32, i32* [[T349350]], align 8 -; CHECK-NEXT: [[T360:%.*]] = call i32 asm sideeffect "...", "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"(i32 [[T351]], %struct.__large_struct* elementtype(%struct.__large_struct) null, i32 -14, i32 0) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[T360:%.*]] = call i32 asm sideeffect "...", "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"(i32 [[T351]], %struct.__large_struct* elementtype([[STRUCT___LARGE_STRUCT:%.*]]) null, i32 -14, i32 0) #[[ATTR0:[0-9]+]] ; CHECK-NEXT: unreachable ; entry: @@ -1300,9 +1300,8 @@ define i8* @gep_null_inbounds_different_type(i64 %idx1, i64 %idx2) { define i8* @D98588(i8* %c1, i64 %offset) { ; CHECK-LABEL: @D98588( -; CHECK-NEXT: [[C2:%.*]] = bitcast i8* [[C1:%.*]] to i64* -; CHECK-NEXT: [[C2_NEXT:%.*]] = getelementptr inbounds i64, i64* [[C2]], i64 [[OFFSET:%.*]] -; CHECK-NEXT: [[GEP:%.*]] = bitcast i64* [[C2_NEXT]] to i8* +; CHECK-NEXT: [[C2_NEXT_IDX:%.*]] = shl nsw i64 [[OFFSET:%.*]], 3 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[C1:%.*]], i64 [[C2_NEXT_IDX]] ; CHECK-NEXT: ret i8* [[GEP]] ; %c2 = bitcast i8* %c1 to i64* diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index b198e1423ccd29..5cf5941e897464 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -372,13 +372,7 @@ define i64 @gep_diff_second_inbounds(i8* %foo, i64 %i, i64 %j) { define i64 @gep_diff_with_bitcast(i64* %p, i64 %idx) { ; CHECK-LABEL: @gep_diff_with_bitcast( -; CHECK-NEXT: [[I0:%.*]] = bitcast i64* [[P:%.*]] to [4 x i64]* -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[I0]], i64 [[IDX:%.*]] -; CHECK-NEXT: [[I3:%.*]] = ptrtoint [4 x i64]* [[I1]] to i64 -; CHECK-NEXT: [[I4:%.*]] = ptrtoint i64* [[P]] to i64 -; CHECK-NEXT: [[I5:%.*]] = sub nuw i64 [[I3]], [[I4]] -; CHECK-NEXT: [[I6:%.*]] = lshr i64 [[I5]], 5 -; CHECK-NEXT: ret i64 [[I6]] +; CHECK-NEXT: ret i64 [[IDX:%.*]] ; %i0 = bitcast i64* %p to [4 x i64]* %i1 = getelementptr inbounds [4 x i64], [4 x i64]* %i0, i64 %idx From 1586e1dc957677df0f37da603bb35586e1b6a172 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Tue, 24 May 2022 15:12:45 +0100 Subject: [PATCH 349/908] [AMDGPU][MC][GFX11] Support base+soffset+offset SMEM loads. Reviewed By: dp Differential Revision: https://reviews.llvm.org/D126207 --- llvm/lib/Target/AMDGPU/SMInstructions.td | 23 +++++++++++-------- llvm/test/MC/AMDGPU/gfx11_asm_smem.s | 3 +++ .../MC/Disassembler/AMDGPU/gfx11_dasm_all.txt | 3 +++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index ffbc0130ab936f..35a6726e15b2c2 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -1129,16 +1129,19 @@ class SMEM_Real_gfx11 op, SM_Pseudo ps, string opName = ps.Mnemonic> : !if(ps.has_offset, !cast(SGPR_NULL_gfx11plus.HWEncoding), ?)); } -multiclass SM_Real_Loads_gfx11 op, string ps, string opName, - SM_Load_Pseudo immPs = !cast(ps#_IMM), - SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { - def _IMM_gfx11 : SMEM_Real_gfx11 { - let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); - } - def _SGPR_gfx11 : SMEM_Real_gfx11 { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); - } - def : MnemonicAlias, Requires<[isGFX11Plus]>; +class SMEM_Real_Load_gfx11 op, string ps, string opName, dag offsets> : + SMEM_Real_gfx11(ps), opName> { + RegisterClass BaseClass = !cast(ps).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol)); +} + +multiclass SM_Real_Loads_gfx11 op, string ps, string opName> { + def _IMM_gfx11 : SMEM_Real_Load_gfx11; + def _SGPR_gfx11 : SMEM_Real_Load_gfx11; + def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11< + op, ps#"_SGPR_IMM", opName, (ins SReg_32:$soffset, smem_offset_mod:$offset)>; + def : MnemonicAlias(ps#"_IMM").Mnemonic, opName>, + Requires<[isGFX11Plus]>; } defm S_LOAD_B32 : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD", "s_load_b32">; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_smem.s b/llvm/test/MC/AMDGPU/gfx11_asm_smem.s index 89ef9dff4c3219..d230a216f8a333 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_smem.s @@ -41,6 +41,9 @@ s_load_b32 s5, s[2:3], m0 s_load_b32 s5, s[2:3], 0x0 // GFX11: encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8] +s_load_b32 s5, s[2:3], s7 offset:0x12345 +// GFX11: encoding: [0x41,0x01,0x00,0xf4,0x45,0x23,0x01,0x0e] + s_load_b32 s5, s[2:3], s0 glc // GFX11: encoding: [0x41,0x41,0x00,0xf4,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt index 9f4548d7e698ae..4a4d98529767b0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt @@ -9848,6 +9848,9 @@ # GFX11: s_load_b32 s5, s[2:3], 0x1234 glc dlc ; encoding: [0x41,0x61,0x00,0xf4,0x34,0x12,0x00,0xf8] 0x41,0x61,0x00,0xf4,0x34,0x12,0x00,0xf8 +# GFX11: s_load_b32 s5, s[2:3], s7 offset:0x12345 ; encoding: [0x41,0x01,0x00,0xf4,0x45,0x23,0x01,0x0e] +0x41,0x01,0x00,0xf4,0x45,0x23,0x01,0x0e + # GFX11: s_load_b32 s5, s[2:3], m0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa] 0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa From 6c80267d0ff445c0c47c6ddb283da5a8bc4feb64 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 May 2022 15:17:59 +0100 Subject: [PATCH 350/908] [CostModel][X86] getScalarizationOverhead - improve extraction costs for > 128-bit vectors We were using the default getScalarizationOverhead expansion for extraction costs, which adds up all the individual element extraction costs. This is fine for 128-bit vectors, but for 256/512-bit vectors each element extraction also has to account for extracting the upper 128-bit subvector extraction before it can handle the element. For scalarization costs we only need to extract each demanded subvector once. Differential Revision: https://reviews.llvm.org/D125527 --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 54 +- llvm/test/Analysis/CostModel/X86/arith-fp.ll | 16 +- llvm/test/Analysis/CostModel/X86/fptoi_sat.ll | 72 +- llvm/test/Analysis/CostModel/X86/fptosi.ll | 20 +- llvm/test/Analysis/CostModel/X86/fptoui.ll | 30 +- .../CostModel/X86/gather-i16-with-i8-index.ll | 18 +- .../CostModel/X86/gather-i32-with-i8-index.ll | 2 +- .../CostModel/X86/gather-i64-with-i8-index.ll | 2 +- .../CostModel/X86/gather-i8-with-i8-index.ll | 18 +- .../X86/interleaved-load-f32-stride-2.ll | 6 +- .../X86/interleaved-load-f32-stride-3.ll | 10 +- .../X86/interleaved-load-f32-stride-4.ll | 10 +- .../X86/interleaved-load-f32-stride-5.ll | 8 +- .../X86/interleaved-load-f32-stride-6.ll | 8 +- .../X86/interleaved-load-f32-stride-7.ll | 8 +- .../X86/interleaved-load-f32-stride-8.ll | 8 +- .../X86/interleaved-load-f64-stride-2.ll | 8 +- .../X86/interleaved-load-f64-stride-3.ll | 8 +- .../X86/interleaved-load-f64-stride-4.ll | 8 +- .../X86/interleaved-load-f64-stride-5.ll | 6 +- .../X86/interleaved-load-f64-stride-6.ll | 6 +- .../X86/interleaved-load-f64-stride-7.ll | 6 +- .../X86/interleaved-load-f64-stride-8.ll | 6 +- .../X86/interleaved-load-i16-stride-2.ll | 8 +- .../X86/interleaved-load-i16-stride-3.ll | 10 +- .../X86/interleaved-load-i16-stride-4.ll | 10 +- .../X86/interleaved-load-i16-stride-5.ll | 22 +- .../X86/interleaved-load-i16-stride-6.ll | 12 +- .../X86/interleaved-load-i16-stride-7.ll | 22 +- .../X86/interleaved-load-i16-stride-8.ll | 22 +- ...nterleaved-load-i32-stride-2-indices-0u.ll | 6 +- .../X86/interleaved-load-i32-stride-2.ll | 6 +- ...terleaved-load-i32-stride-3-indices-01u.ll | 8 +- ...terleaved-load-i32-stride-3-indices-0uu.ll | 6 +- .../X86/interleaved-load-i32-stride-3.ll | 10 +- ...erleaved-load-i32-stride-4-indices-012u.ll | 10 +- ...erleaved-load-i32-stride-4-indices-01uu.ll | 10 +- .../X86/interleaved-load-i32-stride-4.ll | 10 +- .../X86/interleaved-load-i32-stride-5.ll | 8 +- .../X86/interleaved-load-i32-stride-6.ll | 8 +- .../X86/interleaved-load-i32-stride-7.ll | 8 +- .../X86/interleaved-load-i32-stride-8.ll | 8 +- .../X86/interleaved-load-i64-stride-2.ll | 8 +- .../X86/interleaved-load-i64-stride-3.ll | 8 +- .../X86/interleaved-load-i64-stride-4.ll | 8 +- .../X86/interleaved-load-i64-stride-5.ll | 6 +- .../X86/interleaved-load-i64-stride-6.ll | 6 +- .../X86/interleaved-load-i64-stride-7.ll | 6 +- .../X86/interleaved-load-i64-stride-8.ll | 6 +- .../X86/interleaved-load-i8-stride-2.ll | 6 +- .../X86/interleaved-load-i8-stride-3.ll | 8 +- .../X86/interleaved-load-i8-stride-4.ll | 8 +- .../X86/interleaved-load-i8-stride-5.ll | 18 +- .../X86/interleaved-load-i8-stride-6.ll | 10 +- .../X86/interleaved-load-i8-stride-7.ll | 18 +- .../X86/interleaved-load-i8-stride-8.ll | 18 +- .../X86/interleaved-store-f32-stride-2.ll | 6 +- .../X86/interleaved-store-f32-stride-3.ll | 6 +- .../X86/interleaved-store-f32-stride-4.ll | 6 +- .../X86/interleaved-store-f32-stride-5.ll | 8 +- .../X86/interleaved-store-f32-stride-6.ll | 4 +- .../X86/interleaved-store-f32-stride-7.ll | 8 +- .../X86/interleaved-store-f32-stride-8.ll | 8 +- .../X86/interleaved-store-f64-stride-2.ll | 8 +- .../X86/interleaved-store-f64-stride-3.ll | 6 +- .../X86/interleaved-store-f64-stride-4.ll | 6 +- .../X86/interleaved-store-f64-stride-5.ll | 8 +- .../X86/interleaved-store-f64-stride-6.ll | 4 +- .../X86/interleaved-store-f64-stride-7.ll | 8 +- .../X86/interleaved-store-f64-stride-8.ll | 8 +- .../X86/interleaved-store-i16-stride-2.ll | 6 +- .../X86/interleaved-store-i16-stride-3.ll | 6 +- .../X86/interleaved-store-i16-stride-4.ll | 6 +- .../X86/interleaved-store-i16-stride-5.ll | 14 +- .../X86/interleaved-store-i16-stride-6.ll | 6 +- .../X86/interleaved-store-i16-stride-7.ll | 14 +- .../X86/interleaved-store-i16-stride-8.ll | 14 +- .../X86/interleaved-store-i32-stride-2.ll | 6 +- .../X86/interleaved-store-i32-stride-3.ll | 6 +- .../X86/interleaved-store-i32-stride-4.ll | 6 +- .../X86/interleaved-store-i32-stride-5.ll | 8 +- .../X86/interleaved-store-i32-stride-6.ll | 4 +- .../X86/interleaved-store-i32-stride-7.ll | 8 +- .../X86/interleaved-store-i32-stride-8.ll | 8 +- .../X86/interleaved-store-i64-stride-2.ll | 8 +- .../X86/interleaved-store-i64-stride-3.ll | 6 +- .../X86/interleaved-store-i64-stride-4.ll | 6 +- .../X86/interleaved-store-i64-stride-5.ll | 8 +- .../X86/interleaved-store-i64-stride-6.ll | 4 +- .../X86/interleaved-store-i64-stride-7.ll | 8 +- .../X86/interleaved-store-i64-stride-8.ll | 8 +- .../X86/interleaved-store-i8-stride-2.ll | 4 +- .../X86/interleaved-store-i8-stride-3.ll | 4 +- .../X86/interleaved-store-i8-stride-4.ll | 4 +- .../X86/interleaved-store-i8-stride-5.ll | 8 +- .../X86/interleaved-store-i8-stride-6.ll | 4 +- .../X86/interleaved-store-i8-stride-7.ll | 8 +- .../X86/interleaved-store-i8-stride-8.ll | 8 +- .../X86/masked-gather-i32-with-i8-index.ll | 2 +- .../X86/masked-gather-i64-with-i8-index.ll | 2 +- .../X86/masked-interleaved-load-i16.ll | 12 +- .../X86/masked-interleaved-store-i16.ll | 22 +- .../X86/masked-intrinsic-cost-inseltpoison.ll | 380 ++--- .../CostModel/X86/masked-intrinsic-cost.ll | 380 ++--- .../X86/masked-scatter-i32-with-i8-index.ll | 14 +- .../X86/masked-scatter-i64-with-i8-index.ll | 18 +- .../CostModel/X86/masked-store-i16.ll | 8 +- .../Analysis/CostModel/X86/masked-store-i8.ll | 4 +- .../Analysis/CostModel/X86/reduce-fadd.ll | 36 +- .../Analysis/CostModel/X86/reduce-fmul.ll | 36 +- .../X86/scatter-i16-with-i8-index.ll | 26 +- .../X86/scatter-i32-with-i8-index.ll | 18 +- .../X86/scatter-i64-with-i8-index.ll | 18 +- .../CostModel/X86/scatter-i8-with-i8-index.ll | 26 +- .../CostModel/X86/shuffle-replication-i16.ll | 88 +- .../CostModel/X86/shuffle-replication-i32.ll | 42 +- .../CostModel/X86/shuffle-replication-i64.ll | 42 +- .../CostModel/X86/shuffle-replication-i8.ll | 88 +- llvm/test/Analysis/CostModel/X86/sitofp.ll | 12 +- llvm/test/Analysis/CostModel/X86/trunc.ll | 1386 ++++++++--------- 120 files changed, 1836 insertions(+), 1794 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 6b657de96a9b68..dd7c1bd8d9252f 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3779,15 +3779,19 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) { + assert(DemandedElts.getBitWidth() == + cast(Ty)->getNumElements() && + "Vector size mismatch"); + + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + MVT MScalarTy = LT.second.getScalarType(); + unsigned SizeInBits = LT.second.getSizeInBits(); + InstructionCost Cost = 0; // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT. if (Insert) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); - MVT MScalarTy = LT.second.getScalarType(); - unsigned SizeInBits = LT.second.getSizeInBits(); - if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || (MScalarTy.isInteger() && ST->hasSSE41()) || (MScalarTy == MVT::f32 && ST->hasSSE41())) { @@ -3865,8 +3869,46 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, return MOVMSKCost; } - // TODO: Use default extraction for now, but we should investigate extending - // this to handle repeated subvector extraction. + if (LT.second.isVector()) { + int CostValue = *LT.first.getValue(); + assert(CostValue >= 0 && "Negative cost!"); + + unsigned NumElts = LT.second.getVectorNumElements() * CostValue; + assert(NumElts >= DemandedElts.getBitWidth() && + "Vector has been legalized to smaller element count"); + + // If we're extracting elements from a 128-bit subvector lane, we only need + // to extract each lane once, not for every element. + if (SizeInBits > 128) { + assert((SizeInBits % 128) == 0 && "Illegal vector"); + unsigned NumLegal128Lanes = SizeInBits / 128; + unsigned Num128Lanes = NumLegal128Lanes * CostValue; + APInt WidenedDemandedElts = DemandedElts.zext(NumElts); + unsigned Scale = NumElts / Num128Lanes; + + // Add cost for each demanded 128-bit subvector extraction. + // Luckily this is a lot easier than for insertion. + APInt DemandedUpper128Lanes = + APIntOps::ScaleBitMask(WidenedDemandedElts, Num128Lanes); + auto *Ty128 = FixedVectorType::get(Ty->getElementType(), Scale); + for (unsigned I = 0; I != Num128Lanes; ++I) + if (DemandedUpper128Lanes[I]) + Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, + I * Scale, Ty128); + + // Add all the demanded element extractions together, but adjust the + // index to use the equivalent of the bottom 128 bit lane. + for (unsigned I = 0; I != NumElts; ++I) + if (WidenedDemandedElts[I]) { + unsigned Idx = I % Scale; + Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, Idx); + } + + return Cost; + } + } + + // Fallback to default extraction. Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract); } diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll index 2b9080ad92064d..1f41569d63fc1f 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -663,23 +663,23 @@ define i32 @frem(i32 %arg) { ; AVX-LABEL: 'frem' ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F64 = frem <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = frem <8 x double> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'frem' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = frem <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F64 = frem <8 x double> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'frem' diff --git a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll index 2ed34d07f11f2f..c83d1fb430d2a0 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll @@ -266,8 +266,8 @@ define void @casts() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -276,8 +276,8 @@ define void @casts() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -286,8 +286,8 @@ define void @casts() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -296,8 +296,8 @@ define void @casts() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -306,8 +306,8 @@ define void @casts() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -316,8 +316,8 @@ define void @casts() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'casts' @@ -369,8 +369,8 @@ define void @casts() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -379,8 +379,8 @@ define void @casts() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -389,8 +389,8 @@ define void @casts() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -399,8 +399,8 @@ define void @casts() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -409,8 +409,8 @@ define void @casts() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -419,8 +419,8 @@ define void @casts() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'casts' @@ -472,8 +472,8 @@ define void @casts() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -482,8 +482,8 @@ define void @casts() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -492,8 +492,8 @@ define void @casts() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -502,8 +502,8 @@ define void @casts() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -512,8 +512,8 @@ define void @casts() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -522,8 +522,8 @@ define void @casts() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'casts' diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index a3d86e203be678..cba6b0139b3463 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -28,15 +28,15 @@ define i32 @fptosi_double_i64(i32 %arg) { ; AVX-LABEL: 'fptosi_double_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_double_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptosi_double_i64' @@ -216,17 +216,17 @@ define i32 @fptosi_float_i64(i32 %arg) { ; AVX-LABEL: 'fptosi_float_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_float_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptosi_float_i64' diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll index 26f3ccbc8cceeb..061fe9d0eb06d9 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -28,22 +28,22 @@ define i32 @fptoui_double_i64(i32 %arg) { ; AVX1-LABEL: 'fptoui_double_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fptoui_double_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = fptoui double undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_double_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptoui_double_i64' @@ -223,25 +223,25 @@ define i32 @fptoui_float_i64(i32 %arg) { ; AVX1-LABEL: 'fptoui_float_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fptoui_float_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = fptoui float undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_float_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptoui_float_i64' diff --git a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll index ce87bded62719b..a8f51f534464cd 100644 --- a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll @@ -42,19 +42,19 @@ define void @test() { ; AVX2-FASTGATHER-LABEL: 'test' ; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 28 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 58 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 116 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 108 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 30 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 62 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 124 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 248 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 56 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 112 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 224 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll index aa161ddf5a00fc..556c1f59d1e2e8 100644 --- a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll @@ -57,7 +57,7 @@ define void @test() { ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll index 248375326a1a5f..cf09f84d4f4023 100644 --- a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll @@ -57,7 +57,7 @@ define void @test() { ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX512: LV: Found an estimated cost of 16 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX512: LV: Found an estimated cost of 15 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll index c26891440fc1e2..3e00679c82ac8f 100644 --- a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll @@ -49,19 +49,19 @@ define void @test() { ; AVX2-FASTGATHER-LABEL: 'test' ; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-FASTGATHER: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-FASTGATHER: LV: Found an estimated cost of 28 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-FASTGATHER: LV: Found an estimated cost of 56 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-FASTGATHER: LV: Found an estimated cost of 114 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 52 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 30 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 60 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 122 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 244 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 110 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 220 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll index 1c276e74decc95..67e1ad712c0efc 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll index efdd3bcd1d5f6c..7de0b6d51ea7ca 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll @@ -21,11 +21,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll index b2a5b0d5558e91..77e5ebb1d3f18b 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll @@ -21,11 +21,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll index 0b470aae97ff10..abdc8603447652 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 41 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll index 435cb2f70e4063..552014c451b0a7 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 51 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll index add73d68ebbcc2..4620fcd70bbe72 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 58 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll index 9f6ff5e3162c43..50a4613aadf4a1 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll index cc2818e14c3586..0966411f282031 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll @@ -22,10 +22,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 16 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll index af01ed53a107cf..ab956e28eb6ec6 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll index 8eaa6353585015..c062193be0e781 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 32 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll index 551ce2fbccb31c..7517c92916cc8b 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll @@ -19,9 +19,9 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll index 124c864320a30e..402ff0fb8f0200 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll @@ -19,9 +19,9 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 84 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll index 572795bd90f82c..03d484b34c6b04 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll @@ -19,9 +19,9 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll index 0643d645f0f482..2b6b408694548a 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll @@ -19,9 +19,9 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll index 1be85b1e7a40cc..7f07bb1f9ed42c 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll @@ -24,9 +24,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 86 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 172 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 10 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 20 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 372 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll index 4096f7774a95f6..073236a244779c 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll @@ -23,10 +23,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 31 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 58 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 129 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 258 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 59 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 558 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll index 50fe75474adaeb..54d195cd27e84b 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll @@ -23,10 +23,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 41 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 82 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 172 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 344 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 68 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 77 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 154 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 744 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll index 87e410f72ba3c3..87eb848ee2edc2 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll @@ -22,11 +22,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 99 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 215 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 430 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 43 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -38,12 +38,12 @@ define void @test() { ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 55 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 106 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 229 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 465 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 930 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll index 5fd171b2645fc8..8cf147611ab0f4 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll @@ -22,11 +22,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 31 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 58 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 123 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 258 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 516 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 51 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 102 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 109 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 218 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1116 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll index 97a2f0e7c56e5a..1f4c8f7c7243c6 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll @@ -22,11 +22,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 39 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 301 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 602 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 119 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -38,12 +38,12 @@ define void @test() { ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 39 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 81 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 156 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 322 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 651 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1302 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll index 8169980d47a6d6..84248b0aabbe13 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll @@ -22,11 +22,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 41 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 82 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 164 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 344 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 688 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -38,12 +38,12 @@ define void @test() { ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 41 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 89 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 178 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 372 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 744 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1488 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll index f2ecd028cd6206..4f9fbfdc5a9233 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 24 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 88 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll index fd2f7ba4a1ea28..446c0e1e56eb1b 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 46 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll index 69ffef585783e1..27b79fac10bbb1 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll @@ -22,10 +22,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 47 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 94 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 188 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll index a823bfaa906fbe..cb460163f9ee48 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 25 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 50 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 100 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 48 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll index 1a7fb70d64b6e2..7da728a294054d 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll @@ -21,11 +21,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 69 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 138 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 276 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll index fab78ed8d14580..5229013be0516a 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll @@ -21,11 +21,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 32 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 62 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 124 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 248 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll index baa954df3355a8..2ddf97fc943f05 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll @@ -21,11 +21,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 20 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll index 885024894c64b3..8009c768e7da97 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll @@ -21,11 +21,11 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 92 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 368 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll index 83a98b404fb6d5..6e0eb980aa8445 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 51 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 115 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 230 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll index 8e7e7d9bfd9a52..e72006d8ee6555 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 138 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 276 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll index 07745d4fe390e3..c0e1cdcd42fb2e 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 161 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 322 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll index 53b0810619e30f..3d69b27fcf79e5 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 42 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 368 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll index 964bfcb8355da2..e11644d9bb4987 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll @@ -22,10 +22,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 26 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll index d3e8241c84c5a2..7570c5fdddb052 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 39 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 78 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 156 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll index 03ed8ba667a90e..dc8d07ab0533b2 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll @@ -20,10 +20,10 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 22 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll index 4fb2a1067532c9..dd1f3aed6b3ca8 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll @@ -19,9 +19,9 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 65 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 130 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll index 2bc423853dfaa5..ca68ab5c380a87 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll @@ -19,9 +19,9 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 78 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 156 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll index 76bb8892b97c09..acf0b380df0f2c 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll @@ -19,9 +19,9 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 38 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 91 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 182 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll index 807b6df9617ccc..a89e6b211223c9 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll @@ -19,9 +19,9 @@ define void @test() { ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 44 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 40 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll index 9c81d7aef45dde..e60d75003da67c 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll @@ -25,8 +25,8 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 81 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 166 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 362 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll index 977fe6b56ad09c..7546410cc16f76 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll @@ -24,9 +24,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 59 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 249 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 99 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 543 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll index 9645fea8d0399f..468437c2b45392 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll @@ -24,9 +24,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 81 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 162 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 332 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 25 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 58 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 724 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll index c1d22742ff05ca..894c49244d1d56 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll @@ -23,10 +23,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 195 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 415 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 83 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -39,11 +39,11 @@ define void @test() { ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 107 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 445 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 905 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll index e36240e834a392..b7b1edc18f9907 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll @@ -23,10 +23,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 59 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 243 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 498 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 99 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 198 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 45 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 85 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1086 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll index ce52d84ac5f0db..4e973fef9823dc 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll @@ -23,10 +23,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 73 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 276 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 581 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 118 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -39,11 +39,11 @@ define void @test() { ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 73 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 157 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 308 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 626 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1267 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 233 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll index 62b49001745733..4e3fd72425ecab 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll @@ -23,10 +23,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 81 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 162 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 324 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 664 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -39,11 +39,11 @@ define void @test() { ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 81 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 177 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 354 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 724 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1448 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll index ab904c895e57ae..63bb0262d3d890 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 15 for VF 4 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: store float %v1, ptr %out1, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll index 05f55e6d18aba9..a618f586d0cf10 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 ; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v2, ptr %out2, align 4 ; AVX1: LV: Found an estimated cost of 23 for VF 4 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store float %v2, ptr %out2, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll index cf9cbaaf468c14..164b0759046cdc 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 ; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v3, ptr %out3, align 4 ; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: store float %v3, ptr %out3, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll index 52e553f9562d8c..087aece3f48719 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll @@ -22,15 +22,15 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4 ; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 ; AVX2: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4 ; AVX2: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 95 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 190 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll index 9d692b7f4ac2f8..9b5e4d367aa043 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll @@ -22,8 +22,8 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 ; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store float %v5, ptr %out5, align 4 ; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store float %v5, ptr %out5, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll index 291ee8781ac4f3..59ffea30614a9f 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll @@ -22,15 +22,15 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4 ; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 ; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4 ; AVX2: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 133 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 266 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll index 702444afafc91c..a5e166cf874189 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll @@ -22,15 +22,15 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4 ; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 ; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4 ; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 152 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 304 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll index 73505eca286103..55f9f43b81f3dd 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll @@ -22,10 +22,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 ; AVX1: LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 16 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: store double %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: store double %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 28 for VF 8 For instruction: store double %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 16 For instruction: store double %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 32 For instruction: store double %v1, ptr %out1, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll index 88b425b5ddc52d..2b02db5f3fa3d5 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll @@ -21,9 +21,9 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 ; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 54 for VF 8 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: store double %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: store double %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store double %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store double %v2, ptr %out2, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll index dec47f2bc3c170..f7c9456772365b 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll @@ -21,9 +21,9 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 ; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 32 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: store double %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store double %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 8 For instruction: store double %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 16 For instruction: store double %v3, ptr %out3, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll index e75502aed80d21..e5484827e851ed 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll @@ -20,14 +20,14 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 ; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: store double %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 ; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 49 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 98 for VF 8 For instruction: store double %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll index b5fefd7cdd7f6b..2f66b1d236170c 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll @@ -20,8 +20,8 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 ; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store double %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store double %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store double %v5, ptr %out5, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll index 13326f082833c7..cfa862029beabe 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll @@ -20,14 +20,14 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 ; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 59 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 118 for VF 8 For instruction: store double %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 ; AVX2: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 59 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 118 for VF 8 For instruction: store double %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll index dc91cd3149e77b..e9cbbe01fbd22a 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll @@ -20,14 +20,14 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 ; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store double %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: store double %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 ; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store double %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: store double %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll index 24fecd1ed25d2d..a7506722b9b7d2 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll @@ -25,8 +25,8 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX1: LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 86 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 172 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX512DQ: LV: Found an estimated cost of 10 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 372 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll index ceca668e789dc5..7f721028a91c80 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll @@ -25,8 +25,8 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 129 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 258 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 15 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX512DQ: LV: Found an estimated cost of 57 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 558 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll index 9527912ab239c0..551ef68537d488 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll @@ -25,8 +25,8 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 172 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 344 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 11 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX512DQ: LV: Found an estimated cost of 34 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX512DQ: LV: Found an estimated cost of 68 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 744 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll index d6f300e3c5eb00..ce4bfa5baa5ffd 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll @@ -25,25 +25,25 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 215 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 430 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX2: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX2: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 215 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 430 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX512DQ: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 213 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 465 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 930 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 178 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll index 639bc8a3d2587b..186b32757aca2c 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll @@ -25,8 +25,8 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 258 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 516 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 23 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX512DQ: LV: Found an estimated cost of 61 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX512DQ: LV: Found an estimated cost of 96 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 1116 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll index 8e4f8730c77e1b..ab5d2e56a4f85b 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll @@ -25,25 +25,25 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX1: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX1: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 301 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 602 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX2: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX2: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 301 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 602 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX512DQ: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX512DQ: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 298 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 651 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 1302 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 249 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll index 194ec83e3f0565..515fa75c2fd00e 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll @@ -25,25 +25,25 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX1: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 344 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 688 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX2: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 344 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 688 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX512DQ: LV: Found an estimated cost of 69 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX512DQ: LV: Found an estimated cost of 138 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 340 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 744 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 1488 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 284 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll index 0fcd96667c2035..61f9595f842f77 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 46 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 92 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll index ecacdc06d7d010..2055df64f8bfe3 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 ; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 ; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 69 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 138 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 276 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll index 6fdc9282d6b504..2010d505c3f723 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll @@ -23,9 +23,9 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 ; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 ; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 92 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 368 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll index 7a1921faacc44d..323cd44fc31da6 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll @@ -22,15 +22,15 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 ; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 ; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 115 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 230 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 ; AVX2: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 ; AVX2: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 115 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 230 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll index 44bc4436a76e1f..c1eb4369a06f03 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll @@ -22,8 +22,8 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 ; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 ; AVX1: LV: Found an estimated cost of 57 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 138 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 276 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll index 72aa6e9f938c1b..626a2d206bf3d4 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll @@ -22,15 +22,15 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 ; AVX1: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 ; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 161 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 322 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 ; AVX2: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 ; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 161 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 322 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll index fa655335d3e903..c95cb010a957b2 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll @@ -22,15 +22,15 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 ; AVX1: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 ; AVX1: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 368 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 ; AVX2: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 ; AVX2: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 184 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 368 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll index 5a5ca679ca6ee9..0bb786e74a39b9 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll @@ -22,10 +22,10 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 ; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 26 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll index 0cac57729eb871..e6135d41798da1 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll @@ -21,9 +21,9 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 39 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 78 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 156 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll index 62aaef625e1199..269234d8158ad5 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll @@ -21,9 +21,9 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 ; AVX1: LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll index 0e21b5bd3af28b..b65ae04f449c10 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll @@ -20,14 +20,14 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 ; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 65 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 130 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 ; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 65 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 130 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll index 56bc67ba2cfa93..0ddeab032c62ca 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll @@ -20,8 +20,8 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 ; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 78 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 156 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll index 13f4123ee8c11f..5a7d4e3b4498b4 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll @@ -20,14 +20,14 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 ; AVX1: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 91 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 182 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 ; AVX2: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 91 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 182 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll index 4d871ef84c0b19..351f8677d5c447 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll @@ -20,14 +20,14 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 ; AVX1: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 ; AVX2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 104 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 208 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll index a4f19d0a45fa2c..0a5dca98c21243 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll @@ -26,7 +26,7 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX1: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX1: LV: Found an estimated cost of 67 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 166 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX512DQ: LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX512DQ: LV: Found an estimated cost of 5 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 362 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll index 7809ff49ee36cd..dce22b131875af 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll @@ -26,7 +26,7 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX1: LV: Found an estimated cost of 54 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX1: LV: Found an estimated cost of 101 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 249 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX512DQ: LV: Found an estimated cost of 15 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 543 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll index f0c55338680069..73e66704fab1f3 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll @@ -26,7 +26,7 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX1: LV: Found an estimated cost of 67 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX1: LV: Found an estimated cost of 134 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 332 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX512DQ: LV: Found an estimated cost of 9 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX512DQ: LV: Found an estimated cost of 14 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 724 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll index 334ea248232842..91203d587563a5 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll @@ -26,7 +26,7 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX1: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX1: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 415 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 @@ -34,7 +34,7 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX2: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 415 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 @@ -42,8 +42,8 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX512DQ: LV: Found an estimated cost of 167 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 413 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 905 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 338 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll index 795c7dc70b2ccc..7ed278e1aae230 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll @@ -26,7 +26,7 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX1: LV: Found an estimated cost of 101 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX1: LV: Found an estimated cost of 201 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 498 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 @@ -43,7 +43,7 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 19 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX512DQ: LV: Found an estimated cost of 93 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 1086 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll index 3d07d969afbf13..2cfbd1db549f61 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll @@ -26,7 +26,7 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX1: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX1: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 581 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 @@ -34,7 +34,7 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX2: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX2: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 581 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 @@ -42,8 +42,8 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX512DQ: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 578 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 1267 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 473 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll index 492bbe006b00f7..b82b2e709bcf62 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll @@ -26,7 +26,7 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX1: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX1: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 664 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 @@ -34,7 +34,7 @@ define void @test() { ; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX2: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX2: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 664 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 @@ -42,8 +42,8 @@ define void @test() { ; AVX512DQ: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX512DQ: LV: Found an estimated cost of 133 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX512DQ: LV: Found an estimated cost of 266 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 660 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 1448 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 540 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll index 155397c14002be..9e9e7bc460fb91 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll @@ -50,7 +50,7 @@ define void @test() { ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4 -; AVX512: LV: Found an estimated cost of 22 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4 +; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll index 36360dc77a6589..52ec7043315da2 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll @@ -50,7 +50,7 @@ define void @test() { ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8 -; AVX512: LV: Found an estimated cost of 24 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8 +; AVX512: LV: Found an estimated cost of 23 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll index 4552b32055dff0..f7ba91352eb8e4 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll @@ -24,12 +24,12 @@ define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readon ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 30 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 30 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 62 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 62 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test1' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll index 739f89eb027dca..b186ac59b94521 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll @@ -24,12 +24,12 @@ define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readon ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 30 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 30 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test1' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 @@ -81,11 +81,11 @@ define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 23 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 50 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 43 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test2' @@ -148,14 +148,14 @@ define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonl ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll index 0dc3f986ae690e..60bdd601eeb3dd 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -80,12 +80,12 @@ define i32 @masked_load() { ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 292 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -107,12 +107,12 @@ define i32 @masked_load() { ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 308 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -249,12 +249,12 @@ define i32 @masked_store() { ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -276,12 +276,12 @@ define i32 @masked_store() { ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 168 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 352 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -402,57 +402,57 @@ define i32 @masked_gather() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_gather' -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_gather' -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_gather' @@ -472,41 +472,41 @@ define i32 @masked_gather() { ; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_gather' ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 244 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_gather' @@ -526,14 +526,14 @@ define i32 @masked_gather() { ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 244 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) @@ -625,57 +625,57 @@ define i32 @masked_scatter() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX-LABEL: 'masked_scatter' -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_scatter' ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 288 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_scatter' @@ -695,14 +695,14 @@ define i32 @masked_scatter() { ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) @@ -936,109 +936,109 @@ define i32 @masked_compressstore() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_compressstore' -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_compressstore' -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_compressstore' -; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 120 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1333,11 +1333,11 @@ define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> % ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32' @@ -1345,7 +1345,7 @@ define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> % ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32' -; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32' @@ -1366,11 +1366,11 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' @@ -1378,7 +1378,7 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' @@ -1405,13 +1405,13 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' @@ -1449,13 +1449,13 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, < ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1493,13 +1493,13 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3 ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1543,7 +1543,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -1551,7 +1551,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -1602,7 +1602,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_scatter_16i32' @@ -1610,7 +1610,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKL-LABEL: 'test_scatter_16i32' @@ -1618,7 +1618,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_16i32' @@ -1648,7 +1648,7 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_8i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_8i32' @@ -1669,11 +1669,11 @@ define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; KNL-LABEL: 'test_scatter_4i32' -; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKX-LABEL: 'test_scatter_4i32' @@ -1700,13 +1700,13 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) ; AVX1-LABEL: 'test_gather_4f32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32' @@ -1718,7 +1718,7 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) ; KNL-LABEL: 'test_gather_4f32' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32' @@ -1750,13 +1750,13 @@ define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' @@ -1768,7 +1768,7 @@ define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index 02a279e68aeedd..86e3abfbd5bed9 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -176,12 +176,12 @@ define i32 @masked_load() { ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 292 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -235,12 +235,12 @@ define i32 @masked_load() { ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 308 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -537,12 +537,12 @@ define i32 @masked_store() { ; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -596,12 +596,12 @@ define i32 @masked_store() { ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 168 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 352 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -786,57 +786,57 @@ define i32 @masked_gather() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_gather' -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_gather' -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_gather' @@ -856,41 +856,41 @@ define i32 @masked_gather() { ; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_gather' ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 244 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_gather' @@ -910,14 +910,14 @@ define i32 @masked_gather() { ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 244 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) @@ -1009,57 +1009,57 @@ define i32 @masked_scatter() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX-LABEL: 'masked_scatter' -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_scatter' ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 288 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_scatter' @@ -1079,14 +1079,14 @@ define i32 @masked_scatter() { ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) @@ -1320,109 +1320,109 @@ define i32 @masked_compressstore() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_compressstore' -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_compressstore' -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_compressstore' -; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 120 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1717,11 +1717,11 @@ define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> % ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32' @@ -1729,7 +1729,7 @@ define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> % ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32' -; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32' @@ -1750,11 +1750,11 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' @@ -1762,7 +1762,7 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' @@ -1789,13 +1789,13 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' @@ -1833,13 +1833,13 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, < ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1877,13 +1877,13 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3 ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1927,7 +1927,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -1935,7 +1935,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -1986,7 +1986,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_scatter_16i32' @@ -1994,7 +1994,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKL-LABEL: 'test_scatter_16i32' @@ -2002,7 +2002,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_16i32' @@ -2032,7 +2032,7 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_8i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_8i32' @@ -2053,11 +2053,11 @@ define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; KNL-LABEL: 'test_scatter_4i32' -; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKX-LABEL: 'test_scatter_4i32' @@ -2084,13 +2084,13 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) ; AVX1-LABEL: 'test_gather_4f32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32' @@ -2102,7 +2102,7 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) ; KNL-LABEL: 'test_gather_4f32' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32' @@ -2134,13 +2134,13 @@ define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' @@ -2152,7 +2152,7 @@ define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' diff --git a/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll index 39c6086ef110eb..41d1dbbee351b1 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll @@ -34,22 +34,22 @@ define void @test() { ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 ; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 20 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 40 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 ; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 40 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 -; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 +; AVX512: LV: Found an estimated cost of 10 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll index d0c3f2ee9a3f05..f403c6896552de 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll @@ -33,23 +33,23 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX512: LV: Found an estimated cost of 12 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll index 01eaa41f5d37c1..2fb3469501be32 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll @@ -27,16 +27,16 @@ define void @test([1024 x i16]* %C) { ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 ; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 ; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 40 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 ; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 ; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 40 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll b/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll index 6c1dbf3e738b2b..7483b7769e85b2 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll @@ -35,7 +35,7 @@ define void @test([1024 x i8]* %C) { ; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 ; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 ; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 40 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 @@ -43,7 +43,7 @@ define void @test([1024 x i8]* %C) { ; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 ; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 ; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 40 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll index 0f8d8b5580ce32..225d79b82ab77d 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll @@ -45,25 +45,25 @@ define void @reduce_f64(double %arg) { ; AVX1-LABEL: 'reduce_f64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double %arg, <2 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'reduce_f64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double %arg, <2 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'reduce_f64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double %arg, <2 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef) @@ -115,27 +115,27 @@ define void @reduce_f32(float %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'reduce_f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'reduce_f32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll b/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll index 7f79cce3b1692c..14da7f5e539a2d 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll @@ -45,25 +45,25 @@ define void @reduce_f64(double %arg) { ; AVX1-LABEL: 'reduce_f64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double %arg, <2 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'reduce_f64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double %arg, <2 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'reduce_f64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double %arg, <2 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef) @@ -115,27 +115,27 @@ define void @reduce_f32(float %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float %arg, <1 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float %arg, <2 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float %arg, <4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'reduce_f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float %arg, <1 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float %arg, <2 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float %arg, <4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'reduce_f32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float %arg, <1 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float %arg, <2 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float %arg, <4 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fmul.v1f32(float %arg, <1 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll index 4058f3fe25e50a..c509069ca1b69d 100644 --- a/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll @@ -33,27 +33,27 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 448 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 213 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 426 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 53 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 106 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 30 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 144 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 288 for VF 64 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: store i16 %valB, i16* %out, align 2 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll index cf18faa01fc5e6..8d7da669f4092a 100644 --- a/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll @@ -33,23 +33,23 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 448 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 107 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 214 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 428 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 32 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 27 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 54 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 108 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll index 8a7356ddd88646..783de9ffd68fcf 100644 --- a/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll @@ -33,23 +33,23 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 448 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 16 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 32 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 112 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX512: LV: Found an estimated cost of 16 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll index ae7b6364f9a04f..0a7200903c78bc 100644 --- a/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll @@ -33,27 +33,27 @@ define void @test() { ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 448 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 212 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 425 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 ; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 52 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 105 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 30 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 60 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 136 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 288 for VF 64 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: store i8 %valB, i8* %out, align 1 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll index b7f0b384d66cc8..29ada5b60ff108 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll @@ -67,9 +67,9 @@ define void @replication_i16_stride2() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride2' @@ -178,9 +178,9 @@ define void @replication_i16_stride3() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride3' @@ -196,11 +196,11 @@ define void @replication_i16_stride3() nounwind "min-legal-vector-width"="256" { ; AVX512FVEC256-LABEL: 'replication_i16_stride3' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 540 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 456 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride3' @@ -289,9 +289,9 @@ define void @replication_i16_stride4() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride4' @@ -400,9 +400,9 @@ define void @replication_i16_stride5() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 456 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 428 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride5' @@ -417,12 +417,12 @@ define void @replication_i16_stride5() nounwind "min-legal-vector-width"="256" { ; ; AVX512FVEC256-LABEL: 'replication_i16_stride5' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 223 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 892 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 752 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride5' @@ -511,9 +511,9 @@ define void @replication_i16_stride6() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride6' @@ -528,12 +528,12 @@ define void @replication_i16_stride6() nounwind "min-legal-vector-width"="256" { ; ; AVX512FVEC256-LABEL: 'replication_i16_stride6' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 267 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 534 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1068 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 225 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 450 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 900 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride6' @@ -622,9 +622,9 @@ define void @replication_i16_stride7() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 572 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride7' @@ -639,12 +639,12 @@ define void @replication_i16_stride7() nounwind "min-legal-vector-width"="256" { ; ; AVX512FVEC256-LABEL: 'replication_i16_stride7' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 311 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1244 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1048 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride7' @@ -733,9 +733,9 @@ define void @replication_i16_stride8() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 161 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 322 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 644 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll index 0bd7673e1ad989..5dfac79d25c81c 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll @@ -53,9 +53,9 @@ define void @replication_i32_stride2() nounwind "min-legal-vector-width"="256" { ; AVX-LABEL: 'replication_i32_stride2' ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride2' @@ -126,9 +126,9 @@ define void @replication_i32_stride3() nounwind "min-legal-vector-width"="256" { ; AVX-LABEL: 'replication_i32_stride3' ; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride3' @@ -199,9 +199,9 @@ define void @replication_i32_stride4() nounwind "min-legal-vector-width"="256" { ; AVX-LABEL: 'replication_i32_stride4' ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride4' @@ -272,9 +272,9 @@ define void @replication_i32_stride5() nounwind "min-legal-vector-width"="256" { ; AVX-LABEL: 'replication_i32_stride5' ; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride5' @@ -345,9 +345,9 @@ define void @replication_i32_stride6() nounwind "min-legal-vector-width"="256" { ; AVX-LABEL: 'replication_i32_stride6' ; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride6' @@ -418,9 +418,9 @@ define void @replication_i32_stride7() nounwind "min-legal-vector-width"="256" { ; AVX-LABEL: 'replication_i32_stride7' ; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride7' @@ -491,9 +491,9 @@ define void @replication_i32_stride8() nounwind "min-legal-vector-width"="256" { ; AVX-LABEL: 'replication_i32_stride8' ; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll index 5cb10e86cc5933..f8ed3b4ae0375d 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll @@ -47,9 +47,9 @@ define void @replication_i64_stride2() nounwind "min-legal-vector-width"="256" { ; ; AVX-LABEL: 'replication_i64_stride2' ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride2' @@ -111,9 +111,9 @@ define void @replication_i64_stride3() nounwind "min-legal-vector-width"="256" { ; ; AVX-LABEL: 'replication_i64_stride3' ; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride3' @@ -175,9 +175,9 @@ define void @replication_i64_stride4() nounwind "min-legal-vector-width"="256" { ; ; AVX-LABEL: 'replication_i64_stride4' ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride4' @@ -239,9 +239,9 @@ define void @replication_i64_stride5() nounwind "min-legal-vector-width"="256" { ; ; AVX-LABEL: 'replication_i64_stride5' ; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride5' @@ -303,9 +303,9 @@ define void @replication_i64_stride6() nounwind "min-legal-vector-width"="256" { ; ; AVX-LABEL: 'replication_i64_stride6' ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride6' @@ -367,9 +367,9 @@ define void @replication_i64_stride7() nounwind "min-legal-vector-width"="256" { ; ; AVX-LABEL: 'replication_i64_stride7' ; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride7' @@ -431,9 +431,9 @@ define void @replication_i64_stride8() nounwind "min-legal-vector-width"="256" { ; ; AVX-LABEL: 'replication_i64_stride8' ; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll index 70296bd8318c2a..a1ee064534df81 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll @@ -67,9 +67,9 @@ define void @replication_i8_stride2() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride2' @@ -178,9 +178,9 @@ define void @replication_i8_stride3() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 540 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride3' @@ -196,11 +196,11 @@ define void @replication_i8_stride3() nounwind "min-legal-vector-width"="256" { ; AVX512FVEC256-LABEL: 'replication_i8_stride3' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride3' @@ -289,9 +289,9 @@ define void @replication_i8_stride4() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 169 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 338 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 676 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride4' @@ -400,9 +400,9 @@ define void @replication_i8_stride5() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 406 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 812 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride5' @@ -417,12 +417,12 @@ define void @replication_i8_stride5() nounwind "min-legal-vector-width"="256" { ; ; AVX512FVEC256-LABEL: 'replication_i8_stride5' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 209 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 435 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 870 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1740 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 720 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1440 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride5' @@ -511,9 +511,9 @@ define void @replication_i8_stride6() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 474 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 948 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride6' @@ -528,12 +528,12 @@ define void @replication_i8_stride6() nounwind "min-legal-vector-width"="256" { ; ; AVX512FVEC256-LABEL: 'replication_i8_stride6' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 521 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1042 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2084 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 215 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 431 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 862 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1724 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride6' @@ -622,9 +622,9 @@ define void @replication_i8_stride7() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 572 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1144 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 271 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 542 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1084 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride7' @@ -639,12 +639,12 @@ define void @replication_i8_stride7() nounwind "min-legal-vector-width"="256" { ; ; AVX512FVEC256-LABEL: 'replication_i8_stride7' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 149 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 295 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 607 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1214 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2428 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 502 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1004 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2008 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride7' @@ -733,9 +733,9 @@ define void @replication_i8_stride8() nounwind "min-legal-vector-width"="256" { ; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 305 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 610 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1220 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll index 17657bc66519db..f3c489397c62a7 100644 --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -157,15 +157,15 @@ define i32 @sitofp_i64_double() { ; AVX-LABEL: 'sitofp_i64_double' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sitofp_i64_double' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sitofp_i64_double' @@ -358,8 +358,8 @@ define i32 @sitofp_i64_float() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sitofp_i64_float' diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll index c39f683dffad24..5cbb3da50d91d4 100644 --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -315,29 +315,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -347,29 +347,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -382,29 +382,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -414,29 +414,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -516,29 +516,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -548,29 +548,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -650,29 +650,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -682,29 +682,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -784,29 +784,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -816,29 +816,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -851,29 +851,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -883,29 +883,29 @@ define i32 @trunc_vXi16() "min-legal-vector-width"="256" { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1295,29 +1295,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1328,29 +1328,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1361,29 +1361,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1397,29 +1397,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1430,29 +1430,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1463,29 +1463,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1601,29 +1601,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1634,29 +1634,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1667,29 +1667,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1805,29 +1805,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1838,29 +1838,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1871,29 +1871,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -2009,29 +2009,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -2042,29 +2042,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -2075,29 +2075,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -2111,29 +2111,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -2144,29 +2144,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -2177,29 +2177,29 @@ define i32 @trunc_vXi8() "min-legal-vector-width"="256" { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -2751,28 +2751,28 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" { ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> @@ -2786,28 +2786,28 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" { ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> @@ -2821,28 +2821,28 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8 = trunc i8 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> @@ -2894,28 +2894,28 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" { ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> @@ -2929,28 +2929,28 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" { ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> @@ -2964,28 +2964,28 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" { ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8 = trunc i8 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> @@ -3895,28 +3895,28 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i1 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> @@ -3930,28 +3930,28 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i1 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> @@ -3965,28 +3965,28 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8 = trunc i8 undef to i1 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> From 89aaa2d033270d6eeeb82429c0bb88a78ae030fa Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Tue, 24 May 2022 14:16:00 +0000 Subject: [PATCH 351/908] [mlir][vector] Add new lowering mode to vector.contractionOp Add lowering for cases where the reduction dimension is fully unrolled. It is common to unroll the reduction dimension, therefore we would want to lower the contractions to an elementwise vector op in this case. Differential Revision: https://reviews.llvm.org/D126120 --- .../Vector/Transforms/VectorRewritePatterns.h | 3 + .../Vector/Transforms/VectorTransforms.cpp | 215 ++++++++++++++---- .../Vector/vector-contract-transforms.mlir | 52 +++++ .../Dialect/Vector/TestVectorTransforms.cpp | 13 ++ 4 files changed, 242 insertions(+), 41 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h index 0522ef58bc812b..e7226f4a6ac0ff 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h @@ -49,6 +49,9 @@ enum class VectorContractLowering { Matmul = 1, /// Lower to `vector.outerproduct`. OuterProduct = 2, + /// Lower contract with all reduction dimensions unrolled to 1 to a vector + /// elementwise operations. + ParallelArith = 3, }; /// Enum to control the splitting of `vector.transfer` operations into /// in-bounds and out-of-bounds variants. diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp index f84fc2da08f23d..e1f4cffb93552d 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -144,6 +144,59 @@ static SmallVector extractVector(ArrayAttr arrayAttr) { [](IntegerAttr attr) { return static_cast(attr.getInt()); })); } +/// Helper to create arithmetic operation associated with a kind of contraction. +static Optional createContractArithOp(Location loc, Value x, Value y, + Value acc, + vector::CombiningKind kind, + PatternRewriter &rewriter, + bool isInt) { + using vector::CombiningKind; + Value mul; + if (isInt) { + if (kind == CombiningKind::MINF || kind == CombiningKind::MAXF) + // Only valid for floating point types. + return Optional(); + mul = rewriter.create(loc, x, y); + } else { + // Float case. + if (kind == CombiningKind::AND || kind == CombiningKind::MINUI || + kind == CombiningKind::MINSI || kind == CombiningKind::MAXUI || + kind == CombiningKind::MAXSI || kind == CombiningKind::OR || + kind == CombiningKind::XOR) + // Only valid for integer types. + return Optional(); + // Special case for fused multiply-add. + if (acc && acc.getType().isa() && kind == CombiningKind::ADD) { + return Optional(rewriter.create(loc, x, y, acc)); + } + mul = rewriter.create(loc, x, y); + } + if (!acc) + return Optional(mul); + return makeArithReduction(rewriter, loc, kind, mul, acc); +} + +/// Return the positions of the reductions in the given map. +static SmallVector getReductionIndex(AffineMap map, + ArrayAttr iteratorTypes) { + SmallVector dimsIdx; + for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { + if (isReductionIterator(iteratorTypes[map.getDimPosition(i)])) + dimsIdx.push_back(i); + } + return dimsIdx; +} + +/// Look for a given dimension in an affine map and return its position. Return +/// llvm::None if the dimension is not in the map results. +static llvm::Optional getDimPosition(AffineMap map, unsigned dim) { + for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { + if (map.getDimPosition(i) == dim) + return i; + } + return llvm::None; +} + namespace { /// ShapeCastOpFolder folds cancelling ShapeCastOps away. @@ -498,9 +551,8 @@ class OuterProductOpLowering : public OpRewritePattern { if (!rhsType) { // Special case: AXPY operation. Value b = rewriter.create(loc, lhsType, op.getRhs()); - Optional mult = - isInt ? genMultI(loc, op.getLhs(), b, acc, kind, rewriter) - : genMultF(loc, op.getLhs(), b, acc, kind, rewriter); + Optional mult = createContractArithOp(loc, op.getLhs(), b, acc, + kind, rewriter, isInt); if (!mult.hasValue()) return failure(); rewriter.replaceOp(op, mult.getValue()); @@ -518,8 +570,7 @@ class OuterProductOpLowering : public OpRewritePattern { if (acc) r = rewriter.create(loc, rhsType, acc, pos); Optional m = - isInt ? genMultI(loc, a, op.getRhs(), r, kind, rewriter) - : genMultF(loc, a, op.getRhs(), r, kind, rewriter); + createContractArithOp(loc, a, op.getRhs(), r, kind, rewriter, isInt); if (!m.hasValue()) return failure(); result = rewriter.create(loc, resType, m.getValue(), @@ -528,48 +579,127 @@ class OuterProductOpLowering : public OpRewritePattern { rewriter.replaceOp(op, result); return success(); } +}; -private: - static Optional genMultI(Location loc, Value x, Value y, Value acc, - vector::CombiningKind kind, - PatternRewriter &rewriter) { - using vector::CombiningKind; - - auto mul = rewriter.create(loc, x, y); - if (!acc) - return Optional(mul); - - if (kind == CombiningKind::MINF || kind == CombiningKind::MAXF) - // Only valid for floating point types. - return Optional(); - - return makeArithReduction(rewriter, loc, kind, mul, acc); +/// Lower vector.contract with all size one reduction dimensions to +/// elementwise ops when possible. +struct ContractOpToElementwise + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + using FilterConstraintType = + std::function; + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); } + ContractOpToElementwise( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, + const FilterConstraintType &constraint = defaultFilter) + : OpRewritePattern(context), + vectorTransformOptions(vectorTransformOptions), filter(defaultFilter) {} - static Optional genMultF(Location loc, Value x, Value y, Value acc, - vector::CombiningKind kind, - PatternRewriter &rewriter) { - using vector::CombiningKind; - - // Special case for fused multiply-add. - if (acc && kind == CombiningKind::ADD) { - return Optional(rewriter.create(loc, x, y, acc)); - } - - auto mul = rewriter.create(loc, x, y); - - if (!acc) - return Optional(mul); + LogicalResult matchAndRewrite(vector::ContractionOp contractOp, + PatternRewriter &rewriter) const override { + // TODO: implement masks + if (llvm::size(contractOp.getMasks()) != 0) + return failure(); - if (kind == CombiningKind::ADD || kind == CombiningKind::AND || - kind == CombiningKind::MINUI || kind == CombiningKind::MINSI || - kind == CombiningKind::MAXUI || kind == CombiningKind::MAXSI || - kind == CombiningKind::OR || kind == CombiningKind::XOR) - // Already handled or only valid for integer types. - return Optional(); + if (failed(filter(contractOp))) + return failure(); - return makeArithReduction(rewriter, loc, kind, mul, acc); + if (vectorTransformOptions.vectorContractLowering != + vector::VectorContractLowering::ParallelArith) + return failure(); + ArrayRef lhsShape = contractOp.getLhsType().getShape(); + ArrayRef rhsShape = contractOp.getRhsType().getShape(); + AffineMap lhsMap = contractOp.getIndexingMaps()[0]; + AffineMap rhsMap = contractOp.getIndexingMaps()[1]; + SmallVector lhsReductionDims = + getReductionIndex(lhsMap, contractOp.getIteratorTypes()); + SmallVector rhsReductionDims = + getReductionIndex(rhsMap, contractOp.getIteratorTypes()); + // All the reduction dimensions must be a size 1. + for (int64_t dim : lhsReductionDims) { + if (lhsShape[dim] != 1) + return failure(); + } + for (int64_t dim : rhsReductionDims) { + if (rhsShape[dim] != 1) + return failure(); + } + AffineMap accMap = contractOp.getIndexingMaps()[2]; + unsigned numParallelDims = accMap.getNumResults(); + unsigned numLhsDimToBroadcast = + numParallelDims - (lhsMap.getNumResults() - lhsReductionDims.size()); + unsigned numRhsDimToBroadcast = + numParallelDims - (rhsMap.getNumResults() - rhsReductionDims.size()); + SmallVector lhsDims; + SmallVector lhsTranspose; + SmallVector rhsDims; + SmallVector rhsTranspose; + for (int64_t dim : lhsReductionDims) + lhsTranspose.push_back(numLhsDimToBroadcast + dim); + for (int64_t dim : rhsReductionDims) + rhsTranspose.push_back(numRhsDimToBroadcast + dim); + // Loop through the parallel dimensions to calculate the dimensions to + // broadcast and to permute in order to extract only parallel dimensions. + for (unsigned i = 0; i < numParallelDims; i++) { + llvm::Optional lhsDim = + getDimPosition(lhsMap, accMap.getDimPosition(i)); + if (lhsDim) { + lhsTranspose.push_back(numLhsDimToBroadcast + *lhsDim); + } else { + // If the parallel dimension doesn't exist we will have to broadcast it. + lhsDims.push_back( + contractOp.getResultType().cast().getDimSize(i)); + lhsTranspose.push_back(lhsDims.size() - 1); + } + llvm::Optional rhsDim = + getDimPosition(rhsMap, accMap.getDimPosition(i)); + if (rhsDim) { + rhsTranspose.push_back(numRhsDimToBroadcast + *rhsDim); + } else { + // If the parallel dimension doesn't exist we will have to broadcast it. + rhsDims.push_back( + contractOp.getResultType().cast().getDimSize(i)); + rhsTranspose.push_back(rhsDims.size() - 1); + } + } + Value newLhs = contractOp.getLhs(); + Value newRhs = contractOp.getRhs(); + Location loc = contractOp.getLoc(); + if (!lhsDims.empty()) { + lhsDims.append(lhsShape.begin(), lhsShape.end()); + auto expandedType = + VectorType::get(lhsDims, contractOp.getLhsType().getElementType()); + newLhs = rewriter.create(loc, expandedType, newLhs); + } + if (!rhsDims.empty()) { + rhsDims.append(rhsShape.begin(), rhsShape.end()); + auto expandedType = + VectorType::get(rhsDims, contractOp.getRhsType().getElementType()); + newRhs = rewriter.create(loc, expandedType, newRhs); + } + bool isInt = contractOp.getLhsType().getElementType().isIntOrIndex(); + newLhs = rewriter.create(loc, newLhs, lhsTranspose); + newRhs = rewriter.create(loc, newRhs, rhsTranspose); + SmallVector lhsOffsets(lhsReductionDims.size(), 0); + SmallVector rhsOffsets(rhsReductionDims.size(), 0); + newLhs = rewriter.create( + loc, newLhs, rewriter.getI64ArrayAttr(lhsOffsets)); + newRhs = rewriter.create( + loc, newRhs, rewriter.getI64ArrayAttr(rhsOffsets)); + Optional result = + createContractArithOp(loc, newLhs, newRhs, contractOp.getAcc(), + contractOp.getKind(), rewriter, isInt); + rewriter.replaceOp(contractOp, {*result}); + return success(); } + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; }; /// Progressive lowering of ConstantMaskOp. @@ -1594,6 +1724,9 @@ ContractionOpLowering::matchAndRewrite(vector::ContractionOp op, ContractionOpToDotLowering pat3(vectorTransformOptions, ctx); if (succeeded(pat3.matchAndRewrite(op, rewriter))) return success(); + ContractOpToElementwise pat4(vectorTransformOptions, ctx); + if (succeeded(pat4.matchAndRewrite(op, rewriter))) + return success(); // Find first batch dimension in LHS/RHS, and lower when found. std::vector> batchDimMap = op.getBatchDimMap(); diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir index 989dee42f8a1c6..e725d1883e9bd7 100644 --- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir @@ -2,6 +2,7 @@ // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-filter-outerproduct=1 | FileCheck %s --check-prefix=FILTEROUTERPRODUCT +// RUN: mlir-opt %s -test-vector-contraction-lowering=vector-parallel-arith=1 | FileCheck %s --check-prefix=PARALLEL #dotp_accesses = [ affine_map<(i) -> (i)>, @@ -1104,3 +1105,54 @@ func.func @matmul_4_not_filtered(%arg0: vector<3x4xf32>, %arg1: vector<4x4xf32>, : vector<3x4xf32>, vector<4x4xf32> into vector<3x4xf32> return %0 : vector<3x4xf32> } + +// PARALLEL-LABEL: func @parrallel_contract_lowering +// PARALLEL: %[[E0:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32> +// PARALLEL: %[[E1:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32> +// PARALLEL: %[[F:.*]] = vector.fma %[[E0]], %[[E1]], %{{.*}} : vector<4xf32> +// PARALLEL: return %[[F]] : vector<4xf32> +func.func @parrallel_contract_lowering(%arg0: vector<1x1x4xf32>, %arg1: vector<1x1x4xf32>, %arg2: vector<4xf32>) -> vector<4xf32> { + %0 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"], kind = #vector.kind} %arg0, %arg1, %arg2 : vector<1x1x4xf32>, vector<1x1x4xf32> into vector<4xf32> + return %0 : vector<4xf32> +} + +// PARALLEL-LABEL: func @parrallel_contract_lowering_broadcast +// PARALLEL: %[[B:.*]] = vector.broadcast %{{.*}} : vector<1x1xf32> to vector<4x1x1xf32> +// PARALLEL: %[[T:.*]] = vector.transpose %[[B]], [1, 2, 0] : vector<4x1x1xf32> to vector<1x1x4xf32> +// PARALLEL: %[[E0:.*]] = vector.extract %[[T]][0, 0] : vector<1x1x4xf32> +// PARALLEL: %[[E1:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32> +// PARALLEL: %[[F:.*]] = vector.fma %[[E0]], %[[E1]], %{{.*}} : vector<4xf32> +// PARALLEL: return %[[F]] : vector<4xf32> +func.func @parrallel_contract_lowering_broadcast(%arg0: vector<1x1xf32>, %arg1: vector<1x1x4xf32>, %arg2: vector<4xf32>) -> vector<4xf32> { + %0 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"], kind = #vector.kind} %arg0, %arg1, %arg2 : vector<1x1xf32>, vector<1x1x4xf32> into vector<4xf32> + return %0 : vector<4xf32> +} + +// PARALLEL-LABEL: func @parrallel_contract_lowering +// PARALLEL: %[[B:.*]] = vector.broadcast %{{.*}} : vector<1x1xf32> to vector<4x1x1xf32> +// PARALLEL: %[[T0:.*]] = vector.transpose %[[B]], [1, 2, 0] : vector<4x1x1xf32> to vector<1x1x4xf32> +// PARALLEL: %[[T1:.*]] = vector.transpose %{{.*}}, [0, 2, 1] : vector<1x4x1xf32> to vector<1x1x4xf32> +// PARALLEL: %[[E0:.*]] = vector.extract %[[T0]][0, 0] : vector<1x1x4xf32> +// PARALLEL: %[[E1:.*]] = vector.extract %[[T1]][0, 0] : vector<1x1x4xf32> +// PARALLEL: %[[F:.*]] = vector.fma %[[E0]], %[[E1]], %arg2 : vector<4xf32> +// PARALLEL: return %[[F]] : vector<4xf32> +func.func @parrallel_contract_lowering_transpose(%arg0: vector<1x1xf32>, %arg1: vector<1x4x1xf32>, %arg2: vector<4xf32>) -> vector<4xf32> { + %0 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"], kind = #vector.kind} %arg0, %arg1, %arg2 : vector<1x1xf32>, vector<1x4x1xf32> into vector<4xf32> + return %0 : vector<4xf32> +} + +// PARALLEL-LABEL: func @parrallel_contract_lowering_scalar +// PARALLEL: %[[E0:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1xf32> +// PARALLEL: %[[E1:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1xf32> +// PARALLEL: %[[M:.*]] = arith.mulf %[[E0]], %[[E1]] : f32 +// PARALLEL: %[[A:.*]] = arith.addf %[[M]], %{{.*}} : f32 +// PARALLEL: return %[[A]] : f32 +func.func @parrallel_contract_lowering_scalar(%arg0: vector<1x1xf32>, %arg1: vector<1x1xf32>, %arg2: f32) -> f32 { + %0 = vector.contract { + indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> ()>], + iterator_types = ["reduction", "reduction"], kind = #vector.kind} + %arg0, %arg1, %arg2 : vector<1x1xf32>, vector<1x1xf32> into f32 + return %0 : f32 +} diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index f2514fc932727f..a81aa536df4add 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -135,6 +135,10 @@ struct TestVectorContractionLowering llvm::cl::desc("Lower vector.contract to vector.outerproduct but not for " "vectors of size 4."), llvm::cl::init(false)}; + Option lowerToParallelArith{ + *this, "vector-parallel-arith", + llvm::cl::desc("Lower vector.contract to elementwise vector ops."), + llvm::cl::init(false)}; void runOnOperation() override { RewritePatternSet patterns(&getContext()); @@ -165,6 +169,15 @@ struct TestVectorContractionLowering return; } + if (lowerToParallelArith) { + vector::populateVectorContractLoweringPatterns( + patterns, + vector::VectorTransformsOptions().setVectorTransformsOptions( + vector::VectorContractLowering::ParallelArith)); + (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); + return; + } + // Test on all contract lowering patterns. VectorContractLowering contractLowering = VectorContractLowering::Dot; if (lowerToFlatMatrix) From e0fe9785d35203d6b76670cb2b8c6449db4fa576 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 24 May 2022 15:33:21 +0100 Subject: [PATCH 352/908] [TypePromotion] Avoid unnecessary trunc zext pairs Any zext 'sink' should already have an operand that is in the legal value, so avoid using a trunc and just use the trunc operand instead. Differential Revision: https://reviews.llvm.org/D118905 --- llvm/lib/CodeGen/TypePromotion.cpp | 27 +++++++------------ .../Transforms/TypePromotion/ARM/casts.ll | 10 +++---- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 8752a081623aab..c72088e853fe77 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -102,7 +102,6 @@ static cl::opt DisablePromotion("disable-type-promotion", cl::Hidden, namespace { class IRPromoter { LLVMContext &Ctx; - IntegerType *OrigTy = nullptr; unsigned PromotedWidth = 0; SetVector &Visited; SetVector &Sources; @@ -122,16 +121,13 @@ class IRPromoter { void Cleanup(); public: - IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width, + IRPromoter(LLVMContext &C, unsigned Width, SetVector &visited, SetVector &sources, SetVector &sinks, SmallPtrSetImpl &wrap) - : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), + : Ctx(C), PromotedWidth(Width), Visited(visited), Sources(sources), Sinks(sinks), SafeWrap(wrap) { ExtTy = IntegerType::get(Ctx, PromotedWidth); - assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() < - ExtTy->getPrimitiveSizeInBits().getFixedSize() && - "Original type not smaller than extended type"); } void Mutate(); @@ -244,7 +240,7 @@ bool TypePromotion::isSource(Value *V) { bool TypePromotion::isSink(Value *V) { // TODO The truncate also isn't actually necessary because we would already // proved that the data value is kept within the range of the original data - // type. + // type. We currently remove any truncs inserted for handling zext sinks. // Sinks are: // - points where the value in the register is being observed, such as an @@ -591,11 +587,9 @@ void IRPromoter::Cleanup() { continue; } - // Unless they produce a value that is narrower than ExtTy, we can - // replace the result of the zext with the input of a newly inserted - // trunc. - if (NewInsts.count(Src) && isa(Src) && - Src->getType() == OrigTy) { + // We've inserted a trunc for a zext sink, but we already know that the + // input is in range, negating the need for the trunc. + if (NewInsts.count(Src) && isa(Src)) { auto *Trunc = cast(Src); assert(Trunc->getOperand(0)->getType() == ExtTy && "expected inserted trunc to be operating on i32"); @@ -636,9 +630,8 @@ void IRPromoter::ConvertTruncs() { } void IRPromoter::Mutate() { - LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from " - << OrigTy->getBitWidth() << " to " << PromotedWidth - << "-bits\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to " + << PromotedWidth << "-bits\n"); // Cache original types of the values that will likely need truncating for (auto *I : Sinks) { @@ -878,8 +871,8 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size()))) return false; - IRPromoter Promoter(*Ctx, cast(OrigTy), PromotedWidth, - CurrentVisited, Sources, Sinks, SafeWrap); + IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks, + SafeWrap); Promoter.Mutate(); return true; } diff --git a/llvm/test/Transforms/TypePromotion/ARM/casts.ll b/llvm/test/Transforms/TypePromotion/ARM/casts.ll index 4a68be49fb38f5..34a519fa64d60a 100644 --- a/llvm/test/Transforms/TypePromotion/ARM/casts.ll +++ b/llvm/test/Transforms/TypePromotion/ARM/casts.ll @@ -996,8 +996,8 @@ entry: ret i1 %tobool } -define i32 @dont_replace_trunc_2(i16* %a, i8* %b) { -; CHECK-LABEL: @dont_replace_trunc_2( +define i32 @dont_return_inserted_trunc(i16* %a, i8* %b) { +; CHECK-LABEL: @dont_return_inserted_trunc( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32 @@ -1010,8 +1010,7 @@ define i32 @dont_replace_trunc_2(i16* %a, i8* %b) { ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[OR]] to i8 ; CHECK-NEXT: store i8 [[TMP5]], i8* [[B]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[OR]] to i8 -; CHECK-NEXT: [[CONV5:%.*]] = zext i8 [[TMP6]] to i32 -; CHECK-NEXT: ret i32 [[CONV5]] +; CHECK-NEXT: ret i32 [[OR]] ; entry: %0 = load i16, i16* %a, align 2 @@ -1037,10 +1036,9 @@ define i32 @replace_trunk_with_mask(i16* %a) { ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255 ; CHECK-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP3]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 -; CHECK-NEXT: [[PHITMP:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: br label [[COND_END]] ; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[PHITMP]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP4]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[COND]] ; entry: From ebe2e4f5b773e73e3a0d50804e3c72aed6b90b9f Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 24 May 2022 15:47:41 +0200 Subject: [PATCH 353/908] [libcxx] Add sort.bench.cpp to libcxx/benchmarks/CMakeLists.txt It was forgotten in D124740. Differential revision: https://reviews.llvm.org/D126297 --- libcxx/benchmarks/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 71b73158f4670f..7c13a5a8f65c2a 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -160,11 +160,12 @@ endfunction() # Register Benchmark tests #============================================================================== set(BENCHMARK_TESTS - algorithms/make_heap_then_sort_heap.bench.cpp algorithms/make_heap.bench.cpp + algorithms/make_heap_then_sort_heap.bench.cpp algorithms/min_max_element.bench.cpp algorithms/pop_heap.bench.cpp algorithms/push_heap.bench.cpp + algorithms/sort.bench.cpp algorithms/sort_heap.bench.cpp algorithms/stable_sort.bench.cpp algorithms.partition_point.bench.cpp From 11455e47588940bd397e72e0c50811001c09295e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 May 2022 15:44:44 +0100 Subject: [PATCH 354/908] [DAG] Unroll vectorized FPOW instructions before widening that will scalarize to libcalls anyway Followup to D125988 - FPOW is similar to FREM and will most likely scalarize to libcalls, so unroll before widening to prevent use making additional libcalls with UNDEF args. --- .../SelectionDAG/LegalizeVectorTypes.cpp | 2 +- llvm/test/CodeGen/X86/pow-libcall.ll | 29 +++++-------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 835d5644cfb4f5..ffb36ee155ecea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3694,6 +3694,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Binary(N); break; + case ISD::FPOW: case ISD::FREM: if (unrollExpandedOp()) break; @@ -3704,7 +3705,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FADD: case ISD::FMUL: - case ISD::FPOW: case ISD::FSUB: case ISD::FDIV: case ISD::SDIV: diff --git a/llvm/test/CodeGen/X86/pow-libcall.ll b/llvm/test/CodeGen/X86/pow-libcall.ll index e90155e5978cf3..492418fffbbadb 100644 --- a/llvm/test/CodeGen/X86/pow-libcall.ll +++ b/llvm/test/CodeGen/X86/pow-libcall.ll @@ -1,45 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s -; FIXME: Ensure vectorized FPOWs are not widened/unrolled such that they get lowered +; Ensure vectorized FPOWs are not widened/unrolled such that they get lowered ; into libcalls on undef elements. define float @test_fpow_v2f32_multiuse(<2 x float> %a0, <2 x float> %a1, <2 x float> *%p3) nounwind { ; CHECK-LABEL: test_fpow_v2f32_multiuse: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $64, %rsp +; CHECK-NEXT: subq $48, %rsp ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] -; CHECK-NEXT: callq powf@PLT ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] -; CHECK-NEXT: callq powf@PLT -; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: callq powf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] ; CHECK-NEXT: callq powf@PLT -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload +; CHECK-NEXT: movaps %xmm2, %xmm1 ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: addss %xmm2, %xmm0 ; CHECK-NEXT: movlps %xmm1, (%rbx) -; CHECK-NEXT: addq $64, %rsp +; CHECK-NEXT: addq $48, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq %fp = call <2 x float> @llvm.pow.v2f32(<2 x float> %a0, <2 x float> %a1) From 6fc0bc5b0fa7b87960a8b326853643df503d76f7 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Tue, 24 May 2022 18:30:13 +0700 Subject: [PATCH 355/908] Fix behavior of is_fp_class on empty class set The second argument to is_fp_class specifies the set of floating-point class to test against. It can be zero, in this case the intrinsic is expected to return zero value. Differential Revision: https://reviews.llvm.org/D112025 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++- llvm/test/CodeGen/X86/is_fpclass.ll | 31 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5af1cd8d4f5350..5cf5aa9c298083 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7446,7 +7446,9 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, assert(OperandVT.isFloatingPoint()); // Degenerated cases. - if (Test == 0 || (Test & fcAllFlags) == fcAllFlags) + if (Test == 0) + return DAG.getBoolConstant(false, DL, ResultVT, OperandVT); + if ((Test & fcAllFlags) == fcAllFlags) return DAG.getBoolConstant(true, DL, ResultVT, OperandVT); // PPC double double is a pair of doubles, of which the higher part determines diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll index 3929141f692212..6ef60023eac7b4 100644 --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -935,6 +935,37 @@ entry: ret <4 x i1> %0 } +define i1 @isnone_f(float %x) { +; CHECK-32-LABEL: isnone_f: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xorl %eax, %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: isnone_f: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xorl %eax, %eax +; CHECK-64-NEXT: retq +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0) + ret i1 %0 +} + +define i1 @isany_f(float %x) { +; CHECK-32-LABEL: isany_f: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: movb $1, %al +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: isany_f: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: movb $1, %al +; CHECK-64-NEXT: retq +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023) + ret i1 %0 +} + + declare i1 @llvm.is.fpclass.f32(float, i32) declare i1 @llvm.is.fpclass.f64(double, i32) From deb62f5ad64931c6d47ef3cbf9bb53f4f651b3ae Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 11 May 2022 17:08:21 -0700 Subject: [PATCH 356/908] [flang][runtime] Clean up asynchronous I/O APIs Now that the requirements and implementation of asynchronous I/O are better understood, adjust their I/O runtime APIs. In particular: 1) Remove the BeginAsynchronousOutput/Input APIs; they're not needed, since any data transfer statement might have ASYNCHRONOUS= and (if ASYNCHRONOUS='YES') ID= control list specifiers that need to at least be checked. 2) Add implementations for BeginWait(All) to check for the error case of a bad unit number and nonzero ID=. 3) Rearrange and comment SetAsynchronous so that it's clear that it can be called for READ/WRITE as well as for OPEN. The implementation remains completely synchronous, but should be conforming. Where opportunities make sense for true asynchronous implementations of some big block transfers without SIZE= in the future, we'll need to add a GetAsynchronousId API to capture ID= on a READ or WRITE; add sourceFile and sourceLine arguments to BeginWait(All) for good error reporting; track pending operations in unit.h; and add code to force synchronization to non-asynchronous I/O operations. Lowering should call SetAsynchronous when ASYNCHRONOUS= appears as a control list specifier. It should also set ID=x variables to 0 until such time as we support asynchronous operations, if ever. This patch only removes the removed APIs from lowering. Differential Revision: https://reviews.llvm.org/D126143 --- flang/include/flang/Runtime/io-api.h | 21 +++++---- flang/include/flang/Runtime/iostat.h | 2 + flang/lib/Lower/IO.cpp | 14 ++---- flang/runtime/io-api.cpp | 66 ++++++++++++++++------------ flang/runtime/io-error.h | 3 -- flang/runtime/io-stmt.h | 2 +- flang/runtime/iostat.cpp | 5 +++ flang/runtime/unit.h | 3 +- 8 files changed, 61 insertions(+), 55 deletions(-) diff --git a/flang/include/flang/Runtime/io-api.h b/flang/include/flang/Runtime/io-api.h index eca378d71ab80e..66dd1a7a18b7bf 100644 --- a/flang/include/flang/Runtime/io-api.h +++ b/flang/include/flang/Runtime/io-api.h @@ -143,14 +143,9 @@ Cookie IONAME(BeginUnformattedOutput)(ExternalUnit = DefaultUnit, Cookie IONAME(BeginUnformattedInput)(ExternalUnit = DefaultUnit, const char *sourceFile = nullptr, int sourceLine = 0); -// Asynchronous I/O is supported (at most) for unformatted direct access -// block transfers. -AsynchronousId IONAME(BeginAsynchronousOutput)(ExternalUnit, std::int64_t REC, - const char *, std::size_t, const char *sourceFile = nullptr, - int sourceLine = 0); -AsynchronousId IONAME(BeginAsynchronousInput)(ExternalUnit, std::int64_t REC, - char *, std::size_t, const char *sourceFile = nullptr, int sourceLine = 0); +// WAIT(ID=) Cookie IONAME(BeginWait)(ExternalUnit, AsynchronousId); +// WAIT(no ID=) Cookie IONAME(BeginWaitAll)(ExternalUnit); // Other I/O statements @@ -199,6 +194,9 @@ Cookie IONAME(BeginInquireIoLength)( void IONAME(EnableHandlers)(Cookie, bool hasIoStat = false, bool hasErr = false, bool hasEnd = false, bool hasEor = false, bool hasIoMsg = false); +// ASYNCHRONOUS='YES' or 'NO' with no ID= on READ/WRITE/OPEN +bool IONAME(SetAsynchronous)(Cookie, const char *, std::size_t); + // Control list options. These return false on a error that the // Begin...() call has specified will be handled by the caller. // The interfaces that pass a default-kind CHARACTER argument @@ -270,14 +268,12 @@ bool IONAME(InputNamelist)(Cookie, const NamelistGroup &); // Additional specifier interfaces for the connection-list of // on OPEN statement (only). SetBlank(), SetDecimal(), -// SetDelim(), GetIoMsg(), SetPad(), SetRound(), & SetSign() -// are also acceptable for OPEN. +// SetDelim(), GetIoMsg(), SetPad(), SetRound(), SetSign(), +// & SetAsynchronous() are also acceptable for OPEN. // ACCESS=SEQUENTIAL, DIRECT, STREAM bool IONAME(SetAccess)(Cookie, const char *, std::size_t); // ACTION=READ, WRITE, or READWRITE bool IONAME(SetAction)(Cookie, const char *, std::size_t); -// ASYNCHRONOUS=YES, NO -bool IONAME(SetAsynchronous)(Cookie, const char *, std::size_t); // CARRIAGECONTROL=LIST, FORTRAN, NONE bool IONAME(SetCarriagecontrol)(Cookie, const char *, std::size_t); // CONVERT=NATIVE, LITTLE_ENDIAN, BIG_ENDIAN, or SWAP @@ -310,6 +306,9 @@ std::size_t IONAME(GetIoLength)(Cookie); // end-of-record/file condition is present. void IONAME(GetIoMsg)(Cookie, char *, std::size_t); // IOMSG= +// TODO: for ID= on READ/WRITE(ASYNCHRONOUS='YES') +// int IONAME(GetAsynchronousId)(Cookie); + // INQUIRE() specifiers are mostly identified by their NUL-terminated // case-insensitive names. // ACCESS, ACTION, ASYNCHRONOUS, BLANK, CONVERT, DECIMAL, DELIM, DIRECT, diff --git a/flang/include/flang/Runtime/iostat.h b/flang/include/flang/Runtime/iostat.h index f09f25a738e2ea..33952074682374 100644 --- a/flang/include/flang/Runtime/iostat.h +++ b/flang/include/flang/Runtime/iostat.h @@ -70,6 +70,8 @@ enum Iostat { IostatUnitOverflow, IostatBadRealInput, IostatBadScaleFactor, + IostatBadAsynchronous, + IostatBadWaitUnit, }; const char *IostatErrorString(int); diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp index 75f23927c69b07..b8f704d448f2e3 100644 --- a/flang/lib/Lower/IO.cpp +++ b/flang/lib/Lower/IO.cpp @@ -75,8 +75,7 @@ static constexpr std::tuple< mkIOKey(BeginInternalFormattedInput), mkIOKey(BeginExternalListOutput), mkIOKey(BeginExternalListInput), mkIOKey(BeginExternalFormattedOutput), mkIOKey(BeginExternalFormattedInput), mkIOKey(BeginUnformattedOutput), - mkIOKey(BeginUnformattedInput), mkIOKey(BeginAsynchronousOutput), - mkIOKey(BeginAsynchronousInput), mkIOKey(BeginWait), mkIOKey(BeginWaitAll), + mkIOKey(BeginUnformattedInput), mkIOKey(BeginWait), mkIOKey(BeginWaitAll), mkIOKey(BeginClose), mkIOKey(BeginFlush), mkIOKey(BeginBackspace), mkIOKey(BeginEndfile), mkIOKey(BeginRewind), mkIOKey(BeginOpenUnit), mkIOKey(BeginOpenNewUnit), mkIOKey(BeginInquireUnit), @@ -1784,8 +1783,6 @@ getBeginDataTransferFunc(mlir::Location loc, fir::FirOpBuilder &builder, bool isFormatted, bool isListOrNml, bool isInternal, bool isInternalWithDesc, bool isAsync) { if constexpr (isInput) { - if (isAsync) - return getIORuntimeFunc(loc, builder); if (isFormatted || isListOrNml) { if (isInternal) { if (isInternalWithDesc) { @@ -1808,8 +1805,6 @@ getBeginDataTransferFunc(mlir::Location loc, fir::FirOpBuilder &builder, } return getIORuntimeFunc(loc, builder); } else { - if (isAsync) - return getIORuntimeFunc(loc, builder); if (isFormatted || isListOrNml) { if (isInternal) { if (isInternalWithDesc) { @@ -1873,12 +1868,9 @@ void genBeginDataTransferCallArgs( getDefaultScratch(builder, loc, ioFuncTy.getInput(ioArgs.size()))); ioArgs.push_back( // buffer length getDefaultScratchLen(builder, loc, ioFuncTy.getInput(ioArgs.size()))); - } else if (isAsync) { // unit; REC; buffer and length - ioArgs.push_back(getIOUnit(converter, loc, stmt, - ioFuncTy.getInput(ioArgs.size()), csi, - stmtCtx)); - TODO(loc, "asynchronous"); } else { // external IO - maybe explicit format; unit + if (isAsync) + TODO(loc, "asynchronous"); maybeGetFormatArgs(); ioArgs.push_back(getIOUnit(converter, loc, stmt, ioFuncTy.getInput(ioArgs.size()), csi, diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index d5ef844ec557cf..18e1a075855a79 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -337,6 +337,21 @@ Cookie IONAME(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) unit, false /*was an existing file*/, sourceFile, sourceLine); } +Cookie IONAME(BeginWait)(ExternalUnit unitNumber, AsynchronousId id) { + // TODO: add and use sourceFile & sourceLine here + Terminator oom; + // TODO: add and use sourceFile & sourceLine here + auto &io{ + New{oom}(nullptr, 0).release()->ioStatementState()}; + if (id != 0 && !ExternalFileUnit::LookUp(unitNumber)) { + io.GetIoErrorHandler().SetPendingError(IostatBadWaitUnit); + } + return &io; +} +Cookie IONAME(BeginWaitAll)(ExternalUnit unitNumber) { + return IONAME(BeginWait)(unitNumber, 0 /*no ID=*/); +} + Cookie IONAME(BeginClose)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { if (ExternalFileUnit * unit{ExternalFileUnit::LookUpForClose(unitNumber)}) { @@ -475,15 +490,14 @@ static bool YesOrNo(const char *keyword, std::size_t length, const char *what, bool IONAME(SetAdvance)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; - bool nonAdvancing{ - !YesOrNo(keyword, length, "ADVANCE", io.GetIoErrorHandler())}; + IoErrorHandler &handler{io.GetIoErrorHandler()}; + bool nonAdvancing{!YesOrNo(keyword, length, "ADVANCE", handler)}; if (nonAdvancing && io.GetConnectionState().access == Access::Direct) { - io.GetIoErrorHandler().SignalError( - "Non-advancing I/O attempted on direct access file"); + handler.SignalError("Non-advancing I/O attempted on direct access file"); } else { io.mutableModes().nonAdvancing = nonAdvancing; } - return true; + return !handler.InError(); } bool IONAME(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { @@ -543,9 +557,9 @@ bool IONAME(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { bool IONAME(SetPad)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; - io.mutableModes().pad = - YesOrNo(keyword, length, "PAD", io.GetIoErrorHandler()); - return true; + IoErrorHandler &handler{io.GetIoErrorHandler()}; + io.mutableModes().pad = YesOrNo(keyword, length, "PAD", handler); + return !handler.InError(); } bool IONAME(SetPos)(Cookie cookie, std::int64_t pos) { @@ -713,27 +727,23 @@ bool IONAME(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { bool IONAME(SetAsynchronous)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; - auto *open{io.get_if()}; - if (!open) { - io.GetIoErrorHandler().Crash( - "SetAsynchronous() called when not in an OPEN statement"); - } else if (open->completedOperation()) { - io.GetIoErrorHandler().Crash( - "SetAsynchronous() called after GetNewUnit() for an OPEN statement"); - } - static const char *keywords[]{"YES", "NO", nullptr}; - switch (IdentifyValue(keyword, length, keywords)) { - case 0: - open->unit().set_mayAsynchronous(true); - return true; - case 1: - open->unit().set_mayAsynchronous(false); - return true; - default: - open->SignalError(IostatErrorInKeyword, "Invalid ASYNCHRONOUS='%.*s'", - static_cast(length), keyword); - return false; + IoErrorHandler &handler{io.GetIoErrorHandler()}; + bool isYes{YesOrNo(keyword, length, "ASYNCHRONOUS", handler)}; + if (auto *open{io.get_if()}) { + if (open->completedOperation()) { + handler.Crash( + "SetAsynchronous() called after GetNewUnit() for an OPEN statement"); + } + open->unit().set_mayAsynchronous(isYes); + } else if (ExternalFileUnit * unit{io.GetExternalFileUnit()}) { + if (isYes && !unit->mayAsynchronous()) { + handler.SignalError(IostatBadAsynchronous); + } + } else { + handler.Crash("SetAsynchronous() called when not in an OPEN or external " + "I/O statement"); } + return !handler.InError(); } bool IONAME(SetCarriagecontrol)( diff --git a/flang/runtime/io-error.h b/flang/runtime/io-error.h index 4186ee68af0847..565e7153351e7e 100644 --- a/flang/runtime/io-error.h +++ b/flang/runtime/io-error.h @@ -32,9 +32,6 @@ class IoErrorHandler : public Terminator { void HasEndLabel() { flags_ |= hasEnd; } void HasEorLabel() { flags_ |= hasEor; } void HasIoMsg() { flags_ |= hasIoMsg; } - void HandleAnything() { - flags_ = hasIoStat | hasErr | hasEnd | hasEor | hasIoMsg; - } bool InError() const { return ioStat_ != IostatOk || pendingError_ != IostatOk; diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index 4dcc147a8d2109..c9bda49458f067 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -582,7 +582,7 @@ class CloseStatementState : public ExternalIoStatementBase { CloseStatus status_{CloseStatus::Keep}; }; -// For CLOSE(bad unit) and INQUIRE(unconnected unit) +// For CLOSE(bad unit), WAIT(bad unit, ID=nonzero) and INQUIRE(unconnected unit) class NoUnitIoStatementState : public IoStatementBase { public: IoStatementState &ioStatementState() { return ioStatementState_; } diff --git a/flang/runtime/iostat.cpp b/flang/runtime/iostat.cpp index e375b28a4c7344..de9658fd23d2bb 100644 --- a/flang/runtime/iostat.cpp +++ b/flang/runtime/iostat.cpp @@ -83,6 +83,11 @@ const char *IostatErrorString(int iostat) { return "Bad REAL input value"; case IostatBadScaleFactor: return "Bad REAL output scale factor (kP)"; + case IostatBadAsynchronous: + return "READ/WRITE(ASYNCHRONOUS='YES') on unit without " + "OPEN(ASYNCHRONOUS='YES')"; + case IostatBadWaitUnit: + return "WAIT(ID=nonzero) for a bad unit number"; default: return nullptr; } diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h index 9b9d78c6a108b5..f38f4cac19360e 100644 --- a/flang/runtime/unit.h +++ b/flang/runtime/unit.h @@ -124,7 +124,8 @@ class ExternalFileUnit : public ConnectionState, Lock lock_; - // When an I/O statement is in progress on this unit, holds its state. + // When a synchronous I/O statement is in progress on this unit, holds its + // state. std::variant, ExternalFormattedIoStatementState, From 57d239e4ad110c052fe0c05fb25a588ec5a8de14 Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Wed, 18 May 2022 13:59:08 -0700 Subject: [PATCH 357/908] [mlir] Breakdown diagnostic string literals This commit breaks down diagnostic string literals so that the attribute name and enumurator names can be shared with the stringify utility function and the "expected ", " to be one of ", and ", " can be shared between different enum-related diagnostic. Differential Revision: https://reviews.llvm.org/D125938 --- mlir/include/mlir/IR/EnumAttr.td | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/IR/EnumAttr.td b/mlir/include/mlir/IR/EnumAttr.td index a655d3456acd6f..ef2fd33b64e920 100644 --- a/mlir/include/mlir/IR/EnumAttr.td +++ b/mlir/include/mlir/IR/EnumAttr.td @@ -222,9 +222,10 @@ class IntEnumAttr Date: Tue, 24 May 2022 15:03:12 +0000 Subject: [PATCH 358/908] [mlir] Rename mlir::SmallVector -> llvm::SmallVector --- mlir/include/mlir/Interfaces/ControlFlowInterfaces.td | 2 +- mlir/include/mlir/Interfaces/InferTypeOpInterface.td | 4 ++-- mlir/include/mlir/Interfaces/SideEffectInterfaceBase.td | 2 +- mlir/include/mlir/Interfaces/VectorInterfaces.td | 4 ++-- mlir/include/mlir/Interfaces/ViewLikeInterface.td | 6 +++--- mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 4 ++-- mlir/tools/mlir-tblgen/OpFormatGen.cpp | 6 +++--- mlir/tools/mlir-tblgen/RewriterGen.cpp | 8 ++++---- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td index 3eace4606fa6bc..9266bc33b49245 100644 --- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td +++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td @@ -161,7 +161,7 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> { "void", "getSuccessorRegions", (ins "::mlir::Optional":$index, "::mlir::ArrayRef<::mlir::Attribute>":$operands, - "::mlir::SmallVectorImpl<::mlir::RegionSuccessor> &":$regions) + "::llvm::SmallVectorImpl<::mlir::RegionSuccessor> &":$regions) >, InterfaceMethod<[{ Populates `invocationBounds` with the minimum and maximum number of diff --git a/mlir/include/mlir/Interfaces/InferTypeOpInterface.td b/mlir/include/mlir/Interfaces/InferTypeOpInterface.td index ef41a08bdab338..09bbba68be52bb 100644 --- a/mlir/include/mlir/Interfaces/InferTypeOpInterface.td +++ b/mlir/include/mlir/Interfaces/InferTypeOpInterface.td @@ -100,7 +100,7 @@ def InferShapedTypeOpInterface : OpInterface<"InferShapedTypeOpInterface"> { "::mlir::ValueShapeRange":$operands, "::mlir::DictionaryAttr":$attributes, "::mlir::RegionRange":$regions, - "::mlir::SmallVectorImpl<::mlir::ShapedTypeComponents>&": + "::llvm::SmallVectorImpl<::mlir::ShapedTypeComponents>&": $inferredReturnShapes), /*methodBody=*/[{}], /*defaultImplementation=*/[{ return ::mlir::failure(); }] @@ -126,7 +126,7 @@ def InferShapedTypeOpInterface : OpInterface<"InferShapedTypeOpInterface"> { /*methodName=*/"reifyReturnTypeShapes", /*args=*/(ins "::mlir::OpBuilder&":$builder, "::mlir::ValueRange":$operands, - "::mlir::SmallVectorImpl<::mlir::Value> &":$reifiedReturnShapes), + "::llvm::SmallVectorImpl<::mlir::Value> &":$reifiedReturnShapes), /*methodBody=*/[{}], /*defaultImplementation=*/[{ return ::mlir::failure(); }] > diff --git a/mlir/include/mlir/Interfaces/SideEffectInterfaceBase.td b/mlir/include/mlir/Interfaces/SideEffectInterfaceBase.td index 3bfde2eb0d7669..0bb9ad6c7febb5 100644 --- a/mlir/include/mlir/Interfaces/SideEffectInterfaceBase.td +++ b/mlir/include/mlir/Interfaces/SideEffectInterfaceBase.td @@ -52,7 +52,7 @@ class EffectOpInterfaceBase Collects all of the operation's effects into `effects`. }], "void", "getEffects", - (ins "::mlir::SmallVectorImpl<::mlir::SideEffects::EffectInstance<" + (ins "::llvm::SmallVectorImpl<::mlir::SideEffects::EffectInstance<" # baseEffect # ">> &":$effects) >, ]; diff --git a/mlir/include/mlir/Interfaces/VectorInterfaces.td b/mlir/include/mlir/Interfaces/VectorInterfaces.td index 00e69e2b89c1bf..c36d6d1d7aa609 100644 --- a/mlir/include/mlir/Interfaces/VectorInterfaces.td +++ b/mlir/include/mlir/Interfaces/VectorInterfaces.td @@ -28,7 +28,7 @@ def VectorUnrollOpInterface : OpInterface<"VectorUnrollOpInterface"> { `targetShape`. Return `None` if the op cannot be unrolled to the target vector shape. }], - /*retTy=*/"::mlir::Optional<::mlir::SmallVector>", + /*retTy=*/"::mlir::Optional<::llvm::SmallVector>", /*methodName=*/"getShapeForUnroll", /*args=*/(ins), /*methodBody=*/"", @@ -38,7 +38,7 @@ def VectorUnrollOpInterface : OpInterface<"VectorUnrollOpInterface"> { template dyn_cast<::mlir::VectorType>(); if (!vt) return ::mlir::None; - ::mlir::SmallVector res(vt.getShape().begin(), vt.getShape().end()); + ::llvm::SmallVector res(vt.getShape().begin(), vt.getShape().end()); return res; }] >, diff --git a/mlir/include/mlir/Interfaces/ViewLikeInterface.td b/mlir/include/mlir/Interfaces/ViewLikeInterface.td index e3cbd2c667b71b..d389bab840a6ad 100644 --- a/mlir/include/mlir/Interfaces/ViewLikeInterface.td +++ b/mlir/include/mlir/Interfaces/ViewLikeInterface.td @@ -160,7 +160,7 @@ def OffsetSizeAndStrideOpInterface : OpInterface<"OffsetSizeAndStrideOpInterface /*desc=*/[{ Return a vector of all the static or dynamic sizes of the op. }], - /*retTy=*/"::mlir::SmallVector<::mlir::OpFoldResult, 4>", + /*retTy=*/"::llvm::SmallVector<::mlir::OpFoldResult, 4>", /*methodName=*/"getMixedOffsets", /*args=*/(ins), /*methodBody=*/"", @@ -173,7 +173,7 @@ def OffsetSizeAndStrideOpInterface : OpInterface<"OffsetSizeAndStrideOpInterface /*desc=*/[{ Return a vector of all the static or dynamic sizes of the op. }], - /*retTy=*/"::mlir::SmallVector<::mlir::OpFoldResult, 4>", + /*retTy=*/"::llvm::SmallVector<::mlir::OpFoldResult, 4>", /*methodName=*/"getMixedSizes", /*args=*/(ins), /*methodBody=*/"", @@ -185,7 +185,7 @@ def OffsetSizeAndStrideOpInterface : OpInterface<"OffsetSizeAndStrideOpInterface /*desc=*/[{ Return a vector of all the static or dynamic strides of the op. }], - /*retTy=*/"::mlir::SmallVector<::mlir::OpFoldResult, 4>", + /*retTy=*/"::llvm::SmallVector<::mlir::OpFoldResult, 4>", /*methodName=*/"getMixedStrides", /*args=*/(ins), /*methodBody=*/"", diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 2028fe417d2c46..a3626b5d3f791a 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -1631,7 +1631,7 @@ void OpEmitter::genInferredTypeCollectiveParamBuilder() { // Result types body << formatv(R"( - ::mlir::SmallVector<::mlir::Type, 2> inferredReturnTypes; + ::llvm::SmallVector<::mlir::Type, 2> inferredReturnTypes; if (::mlir::succeeded({0}::inferReturnTypes(odsBuilder.getContext(), {1}.location, operands, {1}.attributes.getDictionary({1}.getContext()), @@ -2271,7 +2271,7 @@ void OpEmitter::genSideEffectInterfaceMethods() { for (auto &it : interfaceEffects) { // Generate the 'getEffects' method. - std::string type = llvm::formatv("::mlir::SmallVectorImpl<::mlir::" + std::string type = llvm::formatv("::llvm::SmallVectorImpl<::mlir::" "SideEffects::EffectInstance<{0}>> &", it.first()) .str(); diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 5044f67f422874..0eda318f894a72 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -793,7 +793,7 @@ static void genElementParserStorage(FormatElement *element, const Operator &op, genElementParserStorage(paramElement, op, body); } else if (isa(element)) { - body << " ::mlir::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> " + body << " ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> " "allOperands;\n"; } else if (isa(element)) { @@ -812,7 +812,7 @@ static void genElementParserStorage(FormatElement *element, const Operator &op, StringRef name = operand->getVar()->name; if (operand->getVar()->isVariableLength()) { body - << " ::mlir::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> " + << " ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> " << name << "Operands;\n"; if (operand->getVar()->isVariadicOfVariadic()) { body << " llvm::SmallVector " << name @@ -855,7 +855,7 @@ static void genElementParserStorage(FormatElement *element, const Operator &op, ArgumentLengthKind lengthKind; StringRef name = getTypeListName(dir->getArg(), lengthKind); if (lengthKind != ArgumentLengthKind::Single) - body << " ::mlir::SmallVector<::mlir::Type, 1> " << name << "Types;\n"; + body << " ::llvm::SmallVector<::mlir::Type, 1> " << name << "Types;\n"; else body << llvm::formatv(" ::mlir::Type {0}RawTypes[1];\n", name) << llvm::formatv( diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp index d7dfa7315684de..d55e199f25a916 100644 --- a/mlir/tools/mlir-tblgen/RewriterGen.cpp +++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp @@ -1485,7 +1485,7 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, // Then prepare the result types. We need to specify the types for all // results. - os.indent() << formatv("::mlir::SmallVector<::mlir::Type, 4> tblgen_types; " + os.indent() << formatv("::llvm::SmallVector<::mlir::Type, 4> tblgen_types; " "(void)tblgen_types;\n"); int numResults = resultOp.getNumResults(); if (tail.returnType) { @@ -1531,7 +1531,7 @@ void PatternEmitter::createSeparateLocalVarsForOpArgs( std::string varName; if (operand->isVariadic()) { varName = std::string(formatv("tblgen_values_{0}", valueIndex++)); - os << formatv("::mlir::SmallVector<::mlir::Value, 4> {0};\n", varName); + os << formatv("::llvm::SmallVector<::mlir::Value, 4> {0};\n", varName); std::string range; if (node.isNestedDagArg(argIndex)) { range = childNodeNames[argIndex]; @@ -1614,9 +1614,9 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( Operator &resultOp = node.getDialectOp(opMap); auto scope = os.scope(); - os << formatv("::mlir::SmallVector<::mlir::Value, 4> " + os << formatv("::llvm::SmallVector<::mlir::Value, 4> " "tblgen_values; (void)tblgen_values;\n"); - os << formatv("::mlir::SmallVector<::mlir::NamedAttribute, 4> " + os << formatv("::llvm::SmallVector<::mlir::NamedAttribute, 4> " "tblgen_attrs; (void)tblgen_attrs;\n"); const char *addAttrCmd = From dc8a9a03ecdba4280ae15b890725ae30ad55ff60 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 24 May 2022 11:22:43 -0400 Subject: [PATCH 359/908] [libc++][NFC] Move definitions around in string.cpp to reduce _LIBCPP_HAS_NO_WIDE_CHARACTERS blocks --- libcxx/src/string.cpp | 100 ++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 57 deletions(-) diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp index 9d1de0cf6ca1a6..d847ad506845c2 100644 --- a/libcxx/src/string.cpp +++ b/libcxx/src/string.cpp @@ -288,98 +288,42 @@ stoi(const string& str, size_t* idx, int base) return as_integer( "stoi", str, idx, base ); } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -int -stoi(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stoi", str, idx, base ); -} -#endif - long stol(const string& str, size_t* idx, int base) { return as_integer( "stol", str, idx, base ); } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -long -stol(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stol", str, idx, base ); -} -#endif - unsigned long stoul(const string& str, size_t* idx, int base) { return as_integer( "stoul", str, idx, base ); } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -unsigned long -stoul(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stoul", str, idx, base ); -} -#endif - long long stoll(const string& str, size_t* idx, int base) { return as_integer( "stoll", str, idx, base ); } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -long long -stoll(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stoll", str, idx, base ); -} -#endif - unsigned long long stoull(const string& str, size_t* idx, int base) { return as_integer( "stoull", str, idx, base ); } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -unsigned long long -stoull(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stoull", str, idx, base ); -} -#endif - float stof(const string& str, size_t* idx) { return as_float( "stof", str, idx ); } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -float -stof(const wstring& str, size_t* idx) -{ - return as_float( "stof", str, idx ); -} -#endif - double stod(const string& str, size_t* idx) { return as_float( "stod", str, idx ); } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -double -stod(const wstring& str, size_t* idx) -{ - return as_float( "stod", str, idx ); -} -#endif - long double stold(const string& str, size_t* idx) { @@ -387,12 +331,54 @@ stold(const string& str, size_t* idx) } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +int +stoi(const wstring& str, size_t* idx, int base) +{ + return as_integer( "stoi", str, idx, base ); +} + +long +stol(const wstring& str, size_t* idx, int base) +{ + return as_integer( "stol", str, idx, base ); +} + +unsigned long +stoul(const wstring& str, size_t* idx, int base) +{ + return as_integer( "stoul", str, idx, base ); +} + +long long +stoll(const wstring& str, size_t* idx, int base) +{ + return as_integer( "stoll", str, idx, base ); +} + +unsigned long long +stoull(const wstring& str, size_t* idx, int base) +{ + return as_integer( "stoull", str, idx, base ); +} + +float +stof(const wstring& str, size_t* idx) +{ + return as_float( "stof", str, idx ); +} + +double +stod(const wstring& str, size_t* idx) +{ + return as_float( "stod", str, idx ); +} + long double stold(const wstring& str, size_t* idx) { return as_float( "stold", str, idx ); } -#endif +#endif // !_LIBCPP_HAS_NO_WIDE_CHARACTERS // to_string From 63251840494216af0d063503faee21f58a915c3c Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 24 May 2022 11:31:31 -0400 Subject: [PATCH 360/908] [libc++][NFC] Whitespace refactoring of string.cpp for consistency and legibility --- libcxx/src/string.cpp | 276 +++++++++++++----------------------------- 1 file changed, 84 insertions(+), 192 deletions(-) diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp index d847ad506845c2..8db129520ee501 100644 --- a/libcxx/src/string.cpp +++ b/libcxx/src/string.cpp @@ -56,42 +56,33 @@ void __basic_string_common::__throw_out_of_range() const { #endif #undef _LIBCPP_EXTERN_TEMPLATE_DEFINE -template string operator+, allocator >(char const*, string const&); +template string operator+, allocator>(char const*, string const&); namespace { template -inline -void throw_helper( const string& msg ) -{ +inline void throw_helper(const string& msg) { #ifndef _LIBCPP_NO_EXCEPTIONS - throw T( msg ); + throw T(msg); #else fprintf(stderr, "%s\n", msg.c_str()); _VSTD::abort(); #endif } -inline -void throw_from_string_out_of_range( const string& func ) -{ +inline void throw_from_string_out_of_range(const string& func) { throw_helper(func + ": out of range"); } -inline -void throw_from_string_invalid_arg( const string& func ) -{ +inline void throw_from_string_invalid_arg(const string& func) { throw_helper(func + ": no conversion"); } // as_integer template -inline -V -as_integer_helper(const string& func, const S& str, size_t* idx, int base, F f) -{ +inline V as_integer_helper(const string& func, const S& str, size_t* idx, int base, F f) { typename S::value_type* ptr = nullptr; const typename S::value_type* const p = str.c_str(); typename remove_reference::type errno_save = errno; @@ -108,109 +99,77 @@ as_integer_helper(const string& func, const S& str, size_t* idx, int base, F f) } template -inline -V -as_integer(const string& func, const S& s, size_t* idx, int base); +inline V as_integer(const string& func, const S& s, size_t* idx, int base); // string template<> -inline -int -as_integer(const string& func, const string& s, size_t* idx, int base ) -{ +inline int as_integer(const string& func, const string& s, size_t* idx, int base) { // Use long as no Standard string to integer exists. - long r = as_integer_helper( func, s, idx, base, strtol ); + long r = as_integer_helper(func, s, idx, base, strtol); if (r < numeric_limits::min() || numeric_limits::max() < r) throw_from_string_out_of_range(func); return static_cast(r); } template<> -inline -long -as_integer(const string& func, const string& s, size_t* idx, int base ) -{ - return as_integer_helper( func, s, idx, base, strtol ); +inline long as_integer(const string& func, const string& s, size_t* idx, int base) { + return as_integer_helper(func, s, idx, base, strtol); } template<> -inline -unsigned long -as_integer( const string& func, const string& s, size_t* idx, int base ) -{ - return as_integer_helper( func, s, idx, base, strtoul ); +inline unsigned long as_integer(const string& func, const string& s, size_t* idx, int base) { + return as_integer_helper(func, s, idx, base, strtoul); } template<> -inline -long long -as_integer( const string& func, const string& s, size_t* idx, int base ) -{ - return as_integer_helper( func, s, idx, base, strtoll ); +inline long long as_integer(const string& func, const string& s, size_t* idx, int base) { + return as_integer_helper(func, s, idx, base, strtoll); } template<> -inline -unsigned long long -as_integer( const string& func, const string& s, size_t* idx, int base ) -{ - return as_integer_helper( func, s, idx, base, strtoull ); +inline unsigned long long as_integer(const string& func, const string& s, size_t* idx, int base) { + return as_integer_helper(func, s, idx, base, strtoull); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS // wstring template<> -inline -int -as_integer( const string& func, const wstring& s, size_t* idx, int base ) -{ +inline int as_integer(const string& func, const wstring& s, size_t* idx, int base) { // Use long as no Stantard string to integer exists. - long r = as_integer_helper( func, s, idx, base, wcstol ); + long r = as_integer_helper(func, s, idx, base, wcstol); if (r < numeric_limits::min() || numeric_limits::max() < r) throw_from_string_out_of_range(func); return static_cast(r); } template<> -inline -long -as_integer( const string& func, const wstring& s, size_t* idx, int base ) -{ - return as_integer_helper( func, s, idx, base, wcstol ); +inline long as_integer(const string& func, const wstring& s, size_t* idx, int base) { + return as_integer_helper(func, s, idx, base, wcstol); } template<> inline unsigned long -as_integer( const string& func, const wstring& s, size_t* idx, int base ) +as_integer(const string& func, const wstring& s, size_t* idx, int base) { - return as_integer_helper( func, s, idx, base, wcstoul ); + return as_integer_helper(func, s, idx, base, wcstoul); } template<> -inline -long long -as_integer( const string& func, const wstring& s, size_t* idx, int base ) -{ - return as_integer_helper( func, s, idx, base, wcstoll ); +inline long long as_integer(const string& func, const wstring& s, size_t* idx, int base) { + return as_integer_helper(func, s, idx, base, wcstoll); } template<> -inline -unsigned long long -as_integer( const string& func, const wstring& s, size_t* idx, int base ) -{ - return as_integer_helper( func, s, idx, base, wcstoull ); +inline unsigned long long as_integer(const string& func, const wstring& s, size_t* idx, int base) { + return as_integer_helper(func, s, idx, base, wcstoull); } #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS // as_float template -inline -V -as_float_helper(const string& func, const S& str, size_t* idx, F f ) -{ +inline V as_float_helper(const string& func, const S& str, size_t* idx, F f) { typename S::value_type* ptr = nullptr; const typename S::value_type* const p = str.c_str(); typename remove_reference::type errno_save = errno; @@ -227,156 +186,105 @@ as_float_helper(const string& func, const S& str, size_t* idx, F f ) } template -inline -V as_float( const string& func, const S& s, size_t* idx = nullptr ); +inline V as_float(const string& func, const S& s, size_t* idx = nullptr); template<> -inline -float -as_float( const string& func, const string& s, size_t* idx ) -{ - return as_float_helper( func, s, idx, strtof ); +inline float as_float(const string& func, const string& s, size_t* idx) { + return as_float_helper(func, s, idx, strtof); } template<> -inline -double -as_float(const string& func, const string& s, size_t* idx ) -{ - return as_float_helper( func, s, idx, strtod ); +inline double as_float(const string& func, const string& s, size_t* idx) { + return as_float_helper(func, s, idx, strtod); } template<> -inline -long double -as_float( const string& func, const string& s, size_t* idx ) -{ - return as_float_helper( func, s, idx, strtold ); +inline long double as_float(const string& func, const string& s, size_t* idx) { + return as_float_helper(func, s, idx, strtold); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template<> -inline -float -as_float( const string& func, const wstring& s, size_t* idx ) -{ - return as_float_helper( func, s, idx, wcstof ); +inline float as_float(const string& func, const wstring& s, size_t* idx) { + return as_float_helper(func, s, idx, wcstof); } template<> -inline -double -as_float( const string& func, const wstring& s, size_t* idx ) -{ - return as_float_helper( func, s, idx, wcstod ); +inline double as_float(const string& func, const wstring& s, size_t* idx) { + return as_float_helper(func, s, idx, wcstod); } template<> -inline -long double -as_float( const string& func, const wstring& s, size_t* idx ) -{ - return as_float_helper( func, s, idx, wcstold ); +inline long double as_float(const string& func, const wstring& s, size_t* idx) { + return as_float_helper(func, s, idx, wcstold); } #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS } // unnamed namespace -int -stoi(const string& str, size_t* idx, int base) -{ - return as_integer( "stoi", str, idx, base ); +int stoi(const string& str, size_t* idx, int base) { + return as_integer("stoi", str, idx, base); } -long -stol(const string& str, size_t* idx, int base) -{ - return as_integer( "stol", str, idx, base ); +long stol(const string& str, size_t* idx, int base) { + return as_integer("stol", str, idx, base); } -unsigned long -stoul(const string& str, size_t* idx, int base) -{ - return as_integer( "stoul", str, idx, base ); +unsigned long stoul(const string& str, size_t* idx, int base) { + return as_integer("stoul", str, idx, base); } -long long -stoll(const string& str, size_t* idx, int base) -{ - return as_integer( "stoll", str, idx, base ); +long long stoll(const string& str, size_t* idx, int base) { + return as_integer("stoll", str, idx, base); } -unsigned long long -stoull(const string& str, size_t* idx, int base) -{ - return as_integer( "stoull", str, idx, base ); +unsigned long long stoull(const string& str, size_t* idx, int base) { + return as_integer("stoull", str, idx, base); } -float -stof(const string& str, size_t* idx) -{ - return as_float( "stof", str, idx ); +float stof(const string& str, size_t* idx) { + return as_float("stof", str, idx); } -double -stod(const string& str, size_t* idx) -{ - return as_float( "stod", str, idx ); +double stod(const string& str, size_t* idx) { + return as_float("stod", str, idx); } -long double -stold(const string& str, size_t* idx) -{ - return as_float( "stold", str, idx ); +long double stold(const string& str, size_t* idx) { + return as_float("stold", str, idx); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -int -stoi(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stoi", str, idx, base ); +int stoi(const wstring& str, size_t* idx, int base) { + return as_integer("stoi", str, idx, base); } -long -stol(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stol", str, idx, base ); +long stol(const wstring& str, size_t* idx, int base) { + return as_integer("stol", str, idx, base); } -unsigned long -stoul(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stoul", str, idx, base ); +unsigned long stoul(const wstring& str, size_t* idx, int base) { + return as_integer("stoul", str, idx, base); } -long long -stoll(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stoll", str, idx, base ); +long long stoll(const wstring& str, size_t* idx, int base) { + return as_integer("stoll", str, idx, base); } -unsigned long long -stoull(const wstring& str, size_t* idx, int base) -{ - return as_integer( "stoull", str, idx, base ); +unsigned long long stoull(const wstring& str, size_t* idx, int base) { + return as_integer("stoull", str, idx, base); } -float -stof(const wstring& str, size_t* idx) -{ - return as_float( "stof", str, idx ); +float stof(const wstring& str, size_t* idx) { + return as_float("stof", str, idx); } -double -stod(const wstring& str, size_t* idx) -{ - return as_float( "stod", str, idx ); +double stod(const wstring& str, size_t* idx) { + return as_float("stod", str, idx); } -long double -stold(const wstring& str, size_t* idx) -{ - return as_float( "stold", str, idx ); +long double stold(const wstring& str, size_t* idx) { + return as_float("stold", str, idx); } #endif // !_LIBCPP_HAS_NO_WIDE_CHARACTERS @@ -388,21 +296,15 @@ namespace // as_string template -inline -S -as_string(P sprintf_like, S s, const typename S::value_type* fmt, V a) -{ +inline S as_string(P sprintf_like, S s, const typename S::value_type* fmt, V a) { typedef typename S::size_type size_type; size_type available = s.size(); - while (true) - { + while (true) { int status = sprintf_like(&s[0], available + 1, fmt, a); - if ( status >= 0 ) - { + if (status >= 0) { size_type used = static_cast(status); - if ( used <= available ) - { - s.resize( used ); + if (used <= available) { + s.resize(used); break; } available = used; // Assume this is advice of how much space we need. @@ -418,11 +320,8 @@ template struct initial_string; template <> -struct initial_string -{ - string - operator()() const - { +struct initial_string { + string operator()() const { string s; s.resize(s.capacity()); return s; @@ -431,11 +330,8 @@ struct initial_string #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <> -struct initial_string -{ - wstring - operator()() const - { +struct initial_string { + wstring operator()() const { wstring s(20, wchar_t()); s.resize(s.capacity()); return s; @@ -444,10 +340,7 @@ struct initial_string typedef int (*wide_printf)(wchar_t* __restrict, size_t, const wchar_t*__restrict, ...); -inline -wide_printf -get_swprintf() -{ +inline wide_printf get_swprintf() { #ifndef _LIBCPP_MSVCRT return swprintf; #else @@ -457,8 +350,7 @@ get_swprintf() #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS template -S i_to_string(V v) -{ +S i_to_string(V v) { // numeric_limits::digits10 returns value less on 1 than desired for unsigned numbers. // For example, for 1-byte unsigned value digits10 is 2 (999 can not be represented), // so we need +1 here. From 154f93ca90e088ae64fc215a6a98ec60892f1c8a Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 24 May 2022 11:40:40 -0400 Subject: [PATCH 361/908] [gn build] (semi-automatically) port 0360b9f1599b --- llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/BUILD.gn | 1 + .../gn/secondary/clang-tools-extra/pseudo/unittests/BUILD.gn | 2 ++ 2 files changed, 3 insertions(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/BUILD.gn index 5bca8359f43a61..670b5c3263910b 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/BUILD.gn @@ -8,6 +8,7 @@ static_library("lib") { ] include_dirs = [ "../include" ] sources = [ + "Bracket.cpp", "DirectiveTree.cpp", "Forest.cpp", "GLR.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/unittests/BUILD.gn index 143c1ce3f4345e..96db81ad3ac0bf 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/unittests/BUILD.gn @@ -8,9 +8,11 @@ unittest("ClangPseudoTests") { "//clang/lib/Lex", "//clang/lib/Testing", "//llvm/lib/Support", + "//llvm/lib/Testing/Support", ] include_dirs = [ "../include" ] sources = [ + "BracketTest.cpp", "DirectiveTreeTest.cpp", "ForestTest.cpp", "GLRTest.cpp", From a1a14e817eeb5a0663b1342a125674348b8aac06 Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Tue, 24 May 2022 06:24:02 -0700 Subject: [PATCH 362/908] [Clang] Avoid misleading 'conflicting types' diagnostic with no-prototype decls. Clang has recently started diagnosing prototype redeclaration errors like [rG385e7df33046](https://reviews.llvm.org/rG385e7df33046d7292612ee1e3ac00a59d8bc0441) This flagged legitimate issues in a codebase but was confusing to resolve because it actually conflicted with a function declaration from a system header and not from the one emitted with "note: ". This patch updates the error handling to use the canonical declaration's source location instead to avoid misleading errors like the one described. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D126258 --- clang/lib/Sema/SemaDecl.cpp | 2 ++ clang/test/Sema/prototype-redecls.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index df3e8804c7d555..b150a88e0aa21a 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -3911,6 +3911,8 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, Scope *S, // ASTContext::typesAreCompatible(). if (Old->hasPrototype() && !New->hasWrittenPrototype() && NewDeclIsDefn && Old->getNumParams() != New->getNumParams()) { + if (Old->hasInheritedPrototype()) + Old = Old->getCanonicalDecl(); Diag(New->getLocation(), diag::err_conflicting_types) << New; Diag(Old->getLocation(), PrevDiag) << Old << Old->getType(); return true; diff --git a/clang/test/Sema/prototype-redecls.c b/clang/test/Sema/prototype-redecls.c index ca3355f79d69a8..ed569b5223ce2a 100644 --- a/clang/test/Sema/prototype-redecls.c +++ b/clang/test/Sema/prototype-redecls.c @@ -12,6 +12,10 @@ void blarg() {} // expected-error {{conflicting types for 'blarg'}} void blerp(short); // expected-note {{previous}} void blerp(x) int x; {} // expected-error {{conflicting types for 'blerp'}} +void foo(int); // expected-note {{previous}} +void foo(); +void foo() {} // expected-error {{conflicting types for 'foo'}} + void glerp(int); void glerp(x) short x; {} // Okay, promoted type is fine From 574f9dfee86a8084b8fca43c850f8a3387c6c68b Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 19 May 2022 09:55:29 -0700 Subject: [PATCH 363/908] [flang] Extension: Accept Hollerith actual arguments as if they were BOZ When a Hollerith (or short character) literal is presented as an actual argument that corresponds to a dummy argument for which a BOZ literal would be acceptable, treat the Hollerith as if it had been a BOZ literal in the same way -- and with the same code -- as f18 already does for the similar extension in DATA statements. Differential Revision: https://reviews.llvm.org/D126144 --- flang/include/flang/Evaluate/tools.h | 5 +++++ flang/lib/Evaluate/tools.cpp | 19 ++++++++++++++++++ flang/lib/Semantics/check-call.cpp | 28 +++++++++++++++++++-------- flang/lib/Semantics/check-call.h | 2 +- flang/lib/Semantics/data-to-inits.cpp | 23 +++++----------------- 5 files changed, 50 insertions(+), 27 deletions(-) diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 403625cc4cf03b..2b56da846dd927 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1076,6 +1076,11 @@ Constant PackageConstant(std::vector> &&elements, std::optional> DataConstantConversionExtension( FoldingContext &, const DynamicType &, const Expr &); +// Convert Hollerith or short character to a another type as if the +// Hollerith data had been BOZ. +std::optional> HollerithToBOZ( + FoldingContext &, const Expr &, const DynamicType &); + } // namespace Fortran::evaluate namespace Fortran::semantics { diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index bc8b7087164ba5..293c91b700502d 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1125,6 +1125,25 @@ bool MayBePassedAsAbsentOptional( IsAllocatableOrPointerObject(expr, context); } +std::optional> HollerithToBOZ(FoldingContext &context, + const Expr &expr, const DynamicType &type) { + if (std::optional chValue{GetScalarConstantValue(expr)}) { + // Pad on the right with spaces when short, truncate the right if long. + // TODO: big-endian targets + auto bytes{static_cast( + ToInt64(type.MeasureSizeInBytes(context, false)).value())}; + BOZLiteralConstant bits{0}; + for (std::size_t j{0}; j < bytes; ++j) { + char ch{j >= chValue->size() ? ' ' : chValue->at(j)}; + BOZLiteralConstant chBOZ{static_cast(ch)}; + bits = bits.IOR(chBOZ.SHIFTL(8 * j)); + } + return ConvertToType(type, Expr{bits}); + } else { + return std::nullopt; + } +} + } // namespace Fortran::evaluate namespace Fortran::semantics { diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index e2725f49547db5..19edc232fede93 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -167,15 +167,27 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, characteristics::TypeAndShape &actualType, bool isElemental, evaluate::FoldingContext &context, const Scope *scope, const evaluate::SpecificIntrinsic *intrinsic, - bool allowIntegerConversions) { + bool allowActualArgumentConversions) { // Basic type & rank checking parser::ContextualMessages &messages{context.messages()}; CheckCharacterActual(actual, dummy.type, actualType, context, messages); - if (allowIntegerConversions) { + if (allowActualArgumentConversions) { ConvertIntegerActual(actual, dummy.type, actualType, messages); } bool typesCompatible{dummy.type.type().IsTkCompatibleWith(actualType.type())}; + if (!typesCompatible && dummy.type.Rank() == 0 && + allowActualArgumentConversions) { + // Extension: pass Hollerith literal to scalar as if it had been BOZ + if (auto converted{ + evaluate::HollerithToBOZ(context, actual, dummy.type.type())}) { + messages.Say( + "passing Hollerith or character literal as if it were BOZ"_port_en_US); + actual = *converted; + actualType.type() = dummy.type.type(); + typesCompatible = true; + } + } if (typesCompatible) { if (isElemental) { } else if (dummy.type.attrs().test( @@ -683,7 +695,7 @@ static void CheckExplicitInterfaceArg(evaluate::ActualArgument &arg, const characteristics::DummyArgument &dummy, const characteristics::Procedure &proc, evaluate::FoldingContext &context, const Scope *scope, const evaluate::SpecificIntrinsic *intrinsic, - bool allowIntegerConversions) { + bool allowActualArgumentConversions) { auto &messages{context.messages()}; std::string dummyName{"dummy argument"}; if (!dummy.name.empty()) { @@ -714,7 +726,7 @@ static void CheckExplicitInterfaceArg(evaluate::ActualArgument &arg, object.type.Rank() == 0 && proc.IsElemental()}; CheckExplicitDataArg(object, dummyName, *expr, *type, isElemental, context, scope, intrinsic, - allowIntegerConversions); + allowActualArgumentConversions); } else if (object.type.type().IsTypelessIntrinsicArgument() && IsBOZLiteral(*expr)) { // ok @@ -867,7 +879,7 @@ static parser::Messages CheckExplicitInterface( const characteristics::Procedure &proc, evaluate::ActualArguments &actuals, const evaluate::FoldingContext &context, const Scope *scope, const evaluate::SpecificIntrinsic *intrinsic, - bool allowIntegerConversions) { + bool allowActualArgumentConversions) { parser::Messages buffer; parser::ContextualMessages messages{context.messages().at(), &buffer}; RearrangeArguments(proc, actuals, messages); @@ -878,7 +890,7 @@ static parser::Messages CheckExplicitInterface( const auto &dummy{proc.dummyArguments.at(index++)}; if (actual) { CheckExplicitInterfaceArg(*actual, dummy, proc, localContext, scope, - intrinsic, allowIntegerConversions); + intrinsic, allowActualArgumentConversions); } else if (!dummy.IsOptional()) { if (dummy.name.empty()) { messages.Say( @@ -909,9 +921,9 @@ parser::Messages CheckExplicitInterface(const characteristics::Procedure &proc, bool CheckInterfaceForGeneric(const characteristics::Procedure &proc, evaluate::ActualArguments &actuals, const evaluate::FoldingContext &context, - bool allowIntegerConversions) { + bool allowActualArgumentConversions) { return !CheckExplicitInterface( - proc, actuals, context, nullptr, nullptr, allowIntegerConversions) + proc, actuals, context, nullptr, nullptr, allowActualArgumentConversions) .AnyFatalError(); } diff --git a/flang/lib/Semantics/check-call.h b/flang/lib/Semantics/check-call.h index 7c68f2bd8e2aa3..f3a26f59249d45 100644 --- a/flang/lib/Semantics/check-call.h +++ b/flang/lib/Semantics/check-call.h @@ -46,6 +46,6 @@ parser::Messages CheckExplicitInterface( // Checks actual arguments for the purpose of resolving a generic interface. bool CheckInterfaceForGeneric(const evaluate::characteristics::Procedure &, evaluate::ActualArguments &, const evaluate::FoldingContext &, - bool allowIntegerConversions = false); + bool allowActualArgumentConversions = false); } // namespace Fortran::semantics #endif diff --git a/flang/lib/Semantics/data-to-inits.cpp b/flang/lib/Semantics/data-to-inits.cpp index f392288bffb424..b3c786c18c9400 100644 --- a/flang/lib/Semantics/data-to-inits.cpp +++ b/flang/lib/Semantics/data-to-inits.cpp @@ -274,24 +274,11 @@ DataInitializationCompiler::ConvertElement( if (auto converted{evaluate::ConvertToType(type, SomeExpr{expr})}) { return {std::make_pair(std::move(*converted), false)}; } - if (std::optional chValue{ - evaluate::GetScalarConstantValue(expr)}) { - // Allow DATA initialization with Hollerith and kind=1 CHARACTER like - // (most) other Fortran compilers do. Pad on the right with spaces - // when short, truncate the right if long. - // TODO: big-endian targets - auto bytes{static_cast(evaluate::ToInt64( - type.MeasureSizeInBytes(exprAnalyzer_.GetFoldingContext(), false)) - .value())}; - evaluate::BOZLiteralConstant bits{0}; - for (std::size_t j{0}; j < bytes; ++j) { - char ch{j >= chValue->size() ? ' ' : chValue->at(j)}; - evaluate::BOZLiteralConstant chBOZ{static_cast(ch)}; - bits = bits.IOR(chBOZ.SHIFTL(8 * j)); - } - if (auto converted{evaluate::ConvertToType(type, SomeExpr{bits})}) { - return {std::make_pair(std::move(*converted), true)}; - } + // Allow DATA initialization with Hollerith and kind=1 CHARACTER like + // (most) other Fortran compilers do. + if (auto converted{evaluate::HollerithToBOZ( + exprAnalyzer_.GetFoldingContext(), expr, type)}) { + return {std::make_pair(std::move(*converted), true)}; } SemanticsContext &context{exprAnalyzer_.context()}; if (context.IsEnabled(common::LanguageFeature::LogicalIntegerAssignment)) { From 1b976f2cb22fb022421380caec8254f20f6754b5 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 24 May 2022 11:42:34 -0400 Subject: [PATCH 364/908] [gn build] Reformat all build files Ran: git ls-files '*.gn' '*.gni' | xargs llvm/utils/gn/gn.py format --- .../secondary/clang-tools-extra/pseudo/tool/BUILD.gn | 2 +- .../clang-tools-extra/unittests/clang-tidy/BUILD.gn | 2 +- .../clang/tools/clang-offload-packager/BUILD.gn | 2 +- llvm/utils/gn/secondary/compiler-rt/BUILD.gn | 3 ++- .../gn/secondary/compiler-rt/lib/builtins/BUILD.gn | 2 +- llvm/utils/gn/secondary/lld/test/BUILD.gn | 3 +-- .../lldb/source/Plugins/Platform/gdb-server/BUILD.gn | 1 + .../gn/secondary/llvm/lib/Support/BLAKE3/BUILD.gn | 10 +++++----- llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn | 2 +- llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn | 4 ++-- 10 files changed, 16 insertions(+), 15 deletions(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/tool/BUILD.gn index 6bdae018884f13..6fbe7d94ddae45 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/tool/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/tool/BUILD.gn @@ -1,8 +1,8 @@ executable("clang-pseudo") { configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ - "//clang/lib/Basic", "//clang-tools-extra/pseudo/lib", + "//clang/lib/Basic", "//llvm/lib/Support", ] include_dirs = [ "../include" ] diff --git a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn index e71136aba6d61a..6ae3d632a28972 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn @@ -6,8 +6,8 @@ unittest("ClangTidyTests") { "//clang-tools-extra/clang-tidy", "//clang-tools-extra/clang-tidy/android", "//clang-tools-extra/clang-tidy/google", - "//clang-tools-extra/clang-tidy/modernize", "//clang-tools-extra/clang-tidy/llvm", + "//clang-tools-extra/clang-tidy/modernize", "//clang-tools-extra/clang-tidy/objc", "//clang-tools-extra/clang-tidy/readability", "//clang-tools-extra/clang-tidy/utils", diff --git a/llvm/utils/gn/secondary/clang/tools/clang-offload-packager/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-offload-packager/BUILD.gn index 82051a43b87eef..b33b534fde46b3 100644 --- a/llvm/utils/gn/secondary/clang/tools/clang-offload-packager/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/clang-offload-packager/BUILD.gn @@ -2,9 +2,9 @@ executable("clang-offload-packager") { configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang/lib/Basic", - "//llvm/lib/Target:TargetsToBuild", "//llvm/lib/Object", "//llvm/lib/Support", + "//llvm/lib/Target:TargetsToBuild", ] sources = [ "ClangOffloadPackager.cpp" ] } diff --git a/llvm/utils/gn/secondary/compiler-rt/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/BUILD.gn index 05053e770e0d56..edadd265833b93 100644 --- a/llvm/utils/gn/secondary/compiler-rt/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/BUILD.gn @@ -13,7 +13,8 @@ if (current_os == "win") { } supported_toolchains += supported_android_toolchains if (llvm_build_AArch64) { - supported_toolchains += [ "//llvm/utils/gn/build/toolchain:stage2_baremetal_aarch64" ] + supported_toolchains += + [ "//llvm/utils/gn/build/toolchain:stage2_baremetal_aarch64" ] } group("compiler-rt") { deps = [ "//compiler-rt/include($host_toolchain)" ] diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 39154c25f67290..0248511dfb8a09 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -435,9 +435,9 @@ static_library("builtins") { if (current_cpu == "avr") { sources += [ - "avr/exit.S", "avr/divmodhi4.S", "avr/divmodqi4.S", + "avr/exit.S", "avr/mulhi3.S", "avr/mulqi3.S", "avr/udivmodhi4.S", diff --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn index 569880a3c3d8da..585ef0449d0e4d 100644 --- a/llvm/utils/gn/secondary/lld/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn @@ -14,8 +14,7 @@ write_cmake_config("lit_site_cfg") { values = [ "LIT_SITE_CFG_IN_HEADER=## Autogenerated from $input, do not edit", - "LLD_BINARY_DIR=" + - rebase_path(get_label_info("//lld", "target_out_dir")), + "LLD_BINARY_DIR=" + rebase_path(get_label_info("//lld", "target_out_dir")), "CURRENT_LIBS_DIR=", # FIXME: for shared builds only (?) "CURRENT_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), "LLD_SOURCE_DIR=" + rebase_path("//lld"), diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/gdb-server/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/gdb-server/BUILD.gn index c1f99e94b47ec9..bcd800a17d24fe 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/gdb-server/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/gdb-server/BUILD.gn @@ -9,6 +9,7 @@ static_library("gdb-server") { "//lldb/source/Core", "//lldb/source/Host", "//lldb/source/Plugins/Process/Utility", + #"//lldb/source/Plugins/Process/gdb-remote", # 4-deep dependency cycle "//lldb/source/Target", ] diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BLAKE3/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BLAKE3/BUILD.gn index 01a85ba249e55b..ae28e967ad1498 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BLAKE3/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BLAKE3/BUILD.gn @@ -6,11 +6,11 @@ static_library("BLAKE3") { "blake3_portable.c", ] defines = [ - "BLAKE3_NO_AVX512", - "BLAKE3_NO_AVX2", - "BLAKE3_NO_SSE41", - "BLAKE3_NO_SSE2", - "BLAKE3_USE_NEON=0", + "BLAKE3_NO_AVX512", + "BLAKE3_NO_AVX2", + "BLAKE3_NO_SSE41", + "BLAKE3_NO_SSE2", + "BLAKE3_USE_NEON=0", ] } diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index 412557df6015fb..09bf4d391eb13d 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -27,9 +27,9 @@ executable("llvm-tblgen") { "DAGISelMatcherEmitter.cpp", "DAGISelMatcherGen.cpp", "DAGISelMatcherOpt.cpp", - "DecoderEmitter.cpp", "DFAEmitter.cpp", "DFAPacketizerEmitter.cpp", + "DecoderEmitter.cpp", "DirectiveEmitter.cpp", "DisassemblerEmitter.cpp", "ExegesisEmitter.cpp", diff --git a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn index b317b969312b18..819f6c91d02708 100644 --- a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn @@ -21,12 +21,12 @@ write_cmake_config("llvm-lit") { deps = [ "//llvm/tools/llvm-config" ] deps += [ - "//clang-tools-extra/test:lit_site_cfg", - "//clang-tools-extra/test:lit_unit_site_cfg", "//clang-tools-extra/clangd/test:lit_site_cfg", "//clang-tools-extra/clangd/test:lit_unit_site_cfg", "//clang-tools-extra/pseudo/test:lit_site_cfg", "//clang-tools-extra/pseudo/test:lit_unit_site_cfg", + "//clang-tools-extra/test:lit_site_cfg", + "//clang-tools-extra/test:lit_unit_site_cfg", "//clang/test:lit_site_cfg", "//clang/test:lit_unit_site_cfg", "//lld/test:lit_site_cfg", From cddeb78e8d5b7b924c920fb702d3d883b2661b12 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 May 2022 09:41:00 -0700 Subject: [PATCH 365/908] [RISCV] Add test cases showing failure to remove mask on rotate amounts. This is similar to tests I added in e2f410feeab27a8bb2c015fc02bb8527702e401f that had to be reverted. I've modified them to avoid the bug that is being fixed by D126036. --- llvm/test/CodeGen/RISCV/rotl-rotr.ll | 310 +++++++++++++++++++++++++++ 1 file changed, 310 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll index fd5437e3cb1474..0cd4866515ad39 100644 --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -550,3 +550,313 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { %d = or i64 %b, %c ret i64 %d } + +; Test that we're able to remove a mask on the rotate amount that has more than +; one use. +define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { +; RV32I-LABEL: rotl_32_mask_shared: +; RV32I: # %bb.0: +; RV32I-NEXT: sll a3, a0, a2 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotl_32_mask_shared: +; RV64I: # %bb.0: +; RV64I-NEXT: sllw a3, a0, a2 +; RV64I-NEXT: negw a4, a2 +; RV64I-NEXT: srlw a0, a0, a4 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: sllw a1, a1, a2 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotl_32_mask_shared: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: andi a3, a2, 31 +; RV32ZBB-NEXT: rol a0, a0, a3 +; RV32ZBB-NEXT: sll a1, a1, a2 +; RV32ZBB-NEXT: add a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotl_32_mask_shared: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rolw a0, a0, a2 +; RV64ZBB-NEXT: sllw a1, a1, a2 +; RV64ZBB-NEXT: addw a0, a0, a1 +; RV64ZBB-NEXT: ret + %maskedamt = and i32 %amt, 31 + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt) + %2 = shl i32 %b, %maskedamt + %3 = add i32 %1, %2 + ret i32 %3 +} +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 signext %amt) nounwind { +; RV32I-LABEL: rotl_64_mask_shared: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a5, a4, 26 +; RV32I-NEXT: srli a5, a5, 31 +; RV32I-NEXT: mv a7, a0 +; RV32I-NEXT: bnez a5, .LBB9_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a7, a1 +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: andi a6, a4, 63 +; RV32I-NEXT: sll t0, a7, a4 +; RV32I-NEXT: bnez a5, .LBB9_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB9_4: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: not t1, a4 +; RV32I-NEXT: srl a0, a0, t1 +; RV32I-NEXT: or a5, t0, a0 +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: srli a0, a7, 1 +; RV32I-NEXT: srl a7, a0, t1 +; RV32I-NEXT: addi a0, a6, -32 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: bltz a0, .LBB9_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: sll a3, a2, a0 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: j .LBB9_7 +; RV32I-NEXT: .LBB9_6: +; RV32I-NEXT: sll a0, a3, a4 +; RV32I-NEXT: srli a3, a2, 1 +; RV32I-NEXT: xori a6, a6, 31 +; RV32I-NEXT: srl a3, a3, a6 +; RV32I-NEXT: or a3, a0, a3 +; RV32I-NEXT: sll a0, a2, a4 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: .LBB9_7: +; RV32I-NEXT: sltu a1, a0, a1 +; RV32I-NEXT: add a2, a5, a3 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotl_64_mask_shared: +; RV64I: # %bb.0: +; RV64I-NEXT: sll a3, a0, a2 +; RV64I-NEXT: neg a4, a2 +; RV64I-NEXT: srl a0, a0, a4 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: sll a1, a1, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotl_64_mask_shared: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: slli a5, a4, 26 +; RV32ZBB-NEXT: srli a5, a5, 31 +; RV32ZBB-NEXT: mv a7, a0 +; RV32ZBB-NEXT: bnez a5, .LBB9_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: mv a7, a1 +; RV32ZBB-NEXT: .LBB9_2: +; RV32ZBB-NEXT: andi a6, a4, 63 +; RV32ZBB-NEXT: sll t0, a7, a4 +; RV32ZBB-NEXT: bnez a5, .LBB9_4 +; RV32ZBB-NEXT: # %bb.3: +; RV32ZBB-NEXT: mv a1, a0 +; RV32ZBB-NEXT: .LBB9_4: +; RV32ZBB-NEXT: srli a0, a1, 1 +; RV32ZBB-NEXT: not t1, a4 +; RV32ZBB-NEXT: srl a0, a0, t1 +; RV32ZBB-NEXT: or a5, t0, a0 +; RV32ZBB-NEXT: sll a1, a1, a4 +; RV32ZBB-NEXT: srli a0, a7, 1 +; RV32ZBB-NEXT: srl a7, a0, t1 +; RV32ZBB-NEXT: addi a0, a6, -32 +; RV32ZBB-NEXT: or a1, a1, a7 +; RV32ZBB-NEXT: bltz a0, .LBB9_6 +; RV32ZBB-NEXT: # %bb.5: +; RV32ZBB-NEXT: sll a3, a2, a0 +; RV32ZBB-NEXT: mv a0, a1 +; RV32ZBB-NEXT: j .LBB9_7 +; RV32ZBB-NEXT: .LBB9_6: +; RV32ZBB-NEXT: sll a0, a3, a4 +; RV32ZBB-NEXT: srli a3, a2, 1 +; RV32ZBB-NEXT: xori a6, a6, 31 +; RV32ZBB-NEXT: srl a3, a3, a6 +; RV32ZBB-NEXT: or a3, a0, a3 +; RV32ZBB-NEXT: sll a0, a2, a4 +; RV32ZBB-NEXT: add a0, a1, a0 +; RV32ZBB-NEXT: .LBB9_7: +; RV32ZBB-NEXT: sltu a1, a0, a1 +; RV32ZBB-NEXT: add a2, a5, a3 +; RV32ZBB-NEXT: add a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotl_64_mask_shared: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: andi a3, a2, 63 +; RV64ZBB-NEXT: rol a0, a0, a3 +; RV64ZBB-NEXT: sll a1, a1, a2 +; RV64ZBB-NEXT: add a0, a0, a1 +; RV64ZBB-NEXT: ret + %maskedamt = and i64 %amt, 63 + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt) + %2 = shl i64 %b, %maskedamt + %3 = add i64 %1, %2 + ret i64 %3 +} +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { +; RV32I-LABEL: rotr_32_mask_shared: +; RV32I: # %bb.0: +; RV32I-NEXT: srl a3, a0, a2 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: sll a0, a0, a4 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotr_32_mask_shared: +; RV64I: # %bb.0: +; RV64I-NEXT: srlw a3, a0, a2 +; RV64I-NEXT: negw a4, a2 +; RV64I-NEXT: sllw a0, a0, a4 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: sllw a1, a1, a2 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotr_32_mask_shared: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: andi a3, a2, 31 +; RV32ZBB-NEXT: ror a0, a0, a3 +; RV32ZBB-NEXT: sll a1, a1, a2 +; RV32ZBB-NEXT: add a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotr_32_mask_shared: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rorw a0, a0, a2 +; RV64ZBB-NEXT: sllw a1, a1, a2 +; RV64ZBB-NEXT: addw a0, a0, a1 +; RV64ZBB-NEXT: ret + %maskedamt = and i32 %amt, 31 + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt) + %2 = shl i32 %b, %maskedamt + %3 = add i32 %1, %2 + ret i32 %3 +} +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 signext %amt) nounwind { +; RV32I-LABEL: rotr_64_mask_shared: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a7, a4, 32 +; RV32I-NEXT: mv a6, a1 +; RV32I-NEXT: beqz a7, .LBB11_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: andi a5, a4, 63 +; RV32I-NEXT: srl t0, a6, a4 +; RV32I-NEXT: beqz a7, .LBB11_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB11_4: +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: not a7, a4 +; RV32I-NEXT: sll a1, a1, a7 +; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: srl t0, a0, a4 +; RV32I-NEXT: slli a0, a6, 1 +; RV32I-NEXT: sll a6, a0, a7 +; RV32I-NEXT: addi a0, a5, -32 +; RV32I-NEXT: or a6, a6, t0 +; RV32I-NEXT: bltz a0, .LBB11_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: sll a3, a2, a0 +; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: j .LBB11_7 +; RV32I-NEXT: .LBB11_6: +; RV32I-NEXT: sll a0, a3, a4 +; RV32I-NEXT: srli a3, a2, 1 +; RV32I-NEXT: xori a5, a5, 31 +; RV32I-NEXT: srl a3, a3, a5 +; RV32I-NEXT: or a3, a0, a3 +; RV32I-NEXT: sll a0, a2, a4 +; RV32I-NEXT: add a0, a6, a0 +; RV32I-NEXT: .LBB11_7: +; RV32I-NEXT: sltu a2, a0, a6 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotr_64_mask_shared: +; RV64I: # %bb.0: +; RV64I-NEXT: srl a3, a0, a2 +; RV64I-NEXT: neg a4, a2 +; RV64I-NEXT: sll a0, a0, a4 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: sll a1, a1, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotr_64_mask_shared: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: andi a7, a4, 32 +; RV32ZBB-NEXT: mv a6, a1 +; RV32ZBB-NEXT: beqz a7, .LBB11_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: mv a6, a0 +; RV32ZBB-NEXT: .LBB11_2: +; RV32ZBB-NEXT: andi a5, a4, 63 +; RV32ZBB-NEXT: srl t0, a6, a4 +; RV32ZBB-NEXT: beqz a7, .LBB11_4 +; RV32ZBB-NEXT: # %bb.3: +; RV32ZBB-NEXT: mv a0, a1 +; RV32ZBB-NEXT: .LBB11_4: +; RV32ZBB-NEXT: slli a1, a0, 1 +; RV32ZBB-NEXT: not a7, a4 +; RV32ZBB-NEXT: sll a1, a1, a7 +; RV32ZBB-NEXT: or a1, a1, t0 +; RV32ZBB-NEXT: srl t0, a0, a4 +; RV32ZBB-NEXT: slli a0, a6, 1 +; RV32ZBB-NEXT: sll a6, a0, a7 +; RV32ZBB-NEXT: addi a0, a5, -32 +; RV32ZBB-NEXT: or a6, a6, t0 +; RV32ZBB-NEXT: bltz a0, .LBB11_6 +; RV32ZBB-NEXT: # %bb.5: +; RV32ZBB-NEXT: sll a3, a2, a0 +; RV32ZBB-NEXT: mv a0, a6 +; RV32ZBB-NEXT: j .LBB11_7 +; RV32ZBB-NEXT: .LBB11_6: +; RV32ZBB-NEXT: sll a0, a3, a4 +; RV32ZBB-NEXT: srli a3, a2, 1 +; RV32ZBB-NEXT: xori a5, a5, 31 +; RV32ZBB-NEXT: srl a3, a3, a5 +; RV32ZBB-NEXT: or a3, a0, a3 +; RV32ZBB-NEXT: sll a0, a2, a4 +; RV32ZBB-NEXT: add a0, a6, a0 +; RV32ZBB-NEXT: .LBB11_7: +; RV32ZBB-NEXT: sltu a2, a0, a6 +; RV32ZBB-NEXT: add a1, a1, a3 +; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotr_64_mask_shared: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: andi a3, a2, 63 +; RV64ZBB-NEXT: ror a0, a0, a3 +; RV64ZBB-NEXT: sll a1, a1, a2 +; RV64ZBB-NEXT: add a0, a0, a1 +; RV64ZBB-NEXT: ret + %maskedamt = and i64 %amt, 63 + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt) + %2 = shl i64 %b, %maskedamt + %3 = add i64 %1, %2 + ret i64 %3 +} +declare i64 @llvm.fshr.i64(i64, i64, i64) From 415b9f595d4d2fae947f86b239777ec54c3824dc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 May 2022 09:41:04 -0700 Subject: [PATCH 366/908] Recommit "[RISCV] Use selectShiftMaskXLen ComplexPattern for isel of rotates." This reverts commit dfe513ae1bb6e788ead93b850d80d77d54cf29d3. Tests have been changed to avoid the type legalization bug being fixed in D126036. Original commit message: This will remove masks on the shift amount. We usually get this with SimplifyDemandedBits in DAGCombine, but that's restricted to cases where the AND has a single use. selectShiftMaskXLen does not have that restriction. --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 8 ++++---- llvm/test/CodeGen/RISCV/rotl-rotr.ll | 12 ++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index b0e82e45e9eced..a1e795819b1a84 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -817,8 +817,8 @@ def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbbOrZbpOrZbkb] let Predicates = [HasStdExtZbbOrZbpOrZbkb] in { -def : PatGprGpr; -def : PatGprGpr; +def : PatGprGpr, ROL>; +def : PatGprGpr, ROR>; def : PatGprImm; // There's no encoding for roli in the the 'B' extension as it can be @@ -828,8 +828,8 @@ def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt), } // Predicates = [HasStdExtZbbOrZbpOrZbkb] let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { -def : PatGprGpr; -def : PatGprGpr; +def : PatGprGpr, ROLW>; +def : PatGprGpr, RORW>; def : PatGprImm; def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll index 0cd4866515ad39..58ec0b0b0c170a 100644 --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -576,8 +576,7 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; ; RV32ZBB-LABEL: rotl_32_mask_shared: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a3, a2, 31 -; RV32ZBB-NEXT: rol a0, a0, a3 +; RV32ZBB-NEXT: rol a0, a0, a2 ; RV32ZBB-NEXT: sll a1, a1, a2 ; RV32ZBB-NEXT: add a0, a0, a1 ; RV32ZBB-NEXT: ret @@ -695,8 +694,7 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; ; RV64ZBB-LABEL: rotl_64_mask_shared: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a3, a2, 63 -; RV64ZBB-NEXT: rol a0, a0, a3 +; RV64ZBB-NEXT: rol a0, a0, a2 ; RV64ZBB-NEXT: sll a1, a1, a2 ; RV64ZBB-NEXT: add a0, a0, a1 ; RV64ZBB-NEXT: ret @@ -731,8 +729,7 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; ; RV32ZBB-LABEL: rotr_32_mask_shared: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a3, a2, 31 -; RV32ZBB-NEXT: ror a0, a0, a3 +; RV32ZBB-NEXT: ror a0, a0, a2 ; RV32ZBB-NEXT: sll a1, a1, a2 ; RV32ZBB-NEXT: add a0, a0, a1 ; RV32ZBB-NEXT: ret @@ -848,8 +845,7 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; ; RV64ZBB-LABEL: rotr_64_mask_shared: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a3, a2, 63 -; RV64ZBB-NEXT: ror a0, a0, a3 +; RV64ZBB-NEXT: ror a0, a0, a2 ; RV64ZBB-NEXT: sll a1, a1, a2 ; RV64ZBB-NEXT: add a0, a0, a1 ; RV64ZBB-NEXT: ret From 28432b0f655641df7f9d079cf69ba235038d6340 Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Tue, 24 May 2022 09:47:22 -0700 Subject: [PATCH 367/908] [PS5] Verify defaults to -fno-stack-size-section --- clang/test/Driver/stack-size-section.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/test/Driver/stack-size-section.c b/clang/test/Driver/stack-size-section.c index 461f0b5c9aee67..18fb55f4ac9f59 100644 --- a/clang/test/Driver/stack-size-section.c +++ b/clang/test/Driver/stack-size-section.c @@ -1,4 +1,5 @@ // RUN: %clang -target x86_64-unknown %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT +// RUN: %clang -target x86_64-sie-ps5 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT // RUN: %clang -target x86_64-scei-ps4 -fno-stack-size-section %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT // CHECK-ABSENT-NOT: -fstack-size-section @@ -6,4 +7,9 @@ // RUN: %clang -target x86_64-scei-ps4 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PRESENT // CHECK-PRESENT: -fstack-size-section +// RUN: %clang -target x86_64-unknown -fstack-size-section -fno-stack-size-section -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ABSENT +// RUN: %clang -target x86_64-unknown -fnostack-size-section -fstack-size-section -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PRESENT + int foo() { return 42; } From d90e866a191fb77bb20140c6a955a4b3258c6ee3 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 18 May 2022 13:23:39 -0700 Subject: [PATCH 368/908] [flang][runtime] INQUIRE(UNIT=666,NUMBER=n) must set n=666 Whether a unit number in an inquire-by-unit statement is valid or not, it should be the value to which the NUMBER= variable is set, not -1. -1 should be returned to NUMBER= only for an inquire-by-file statement when the FILE= is not connected to any unit. Differential Revision: https://reviews.llvm.org/D126145 --- flang/runtime/io-api.cpp | 6 +++--- flang/runtime/io-stmt.cpp | 15 +++++++++------ flang/runtime/io-stmt.h | 16 +++++++++++----- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 18e1a075855a79..a50f01131b7af5 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -360,7 +360,7 @@ Cookie IONAME(BeginClose)( } else { // CLOSE(UNIT=bad unit) is just a no-op Terminator oom{sourceFile, sourceLine}; - return &New{oom}(sourceFile, sourceLine) + return &New{oom}(sourceFile, sourceLine, unitNumber) .release() ->ioStatementState(); } @@ -374,7 +374,7 @@ Cookie IONAME(BeginFlush)( } else { // FLUSH(UNIT=unknown) is a no-op Terminator oom{sourceFile, sourceLine}; - return &New{oom}(sourceFile, sourceLine) + return &New{oom}(sourceFile, sourceLine, unitNumber) .release() ->ioStatementState(); } @@ -420,7 +420,7 @@ Cookie IONAME(BeginInquireUnit)( } else { // INQUIRE(UNIT=unrecognized unit) Terminator oom{sourceFile, sourceLine}; - return &New{oom}(sourceFile, sourceLine) + return &New{oom}(sourceFile, sourceLine, unitNumber) .release() ->ioStatementState(); } diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 237d2648b61d61..78fae20a173c99 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -1270,7 +1270,7 @@ bool InquireUnitState::Inquire( } return true; case HashInquiryKeyword("NUMBER"): - result = unit().IsConnected() ? unit().unitNumber() : -1; + result = unit().unitNumber(); return true; case HashInquiryKeyword("POS"): result = unit().InquirePos(); @@ -1300,8 +1300,9 @@ bool InquireUnitState::Inquire( } } -InquireNoUnitState::InquireNoUnitState(const char *sourceFile, int sourceLine) - : NoUnitIoStatementState{sourceFile, sourceLine, *this} {} +InquireNoUnitState::InquireNoUnitState( + const char *sourceFile, int sourceLine, int badUnitNumber) + : NoUnitIoStatementState{*this, sourceFile, sourceLine, badUnitNumber} {} bool InquireNoUnitState::Inquire( InquiryKeywordHash inquiry, char *result, std::size_t length) { @@ -1370,8 +1371,10 @@ bool InquireNoUnitState::Inquire( bool InquireNoUnitState::Inquire( InquiryKeywordHash inquiry, std::int64_t &result) { switch (inquiry) { - case HashInquiryKeyword("NEXTREC"): case HashInquiryKeyword("NUMBER"): + result = badUnitNumber(); + return true; + case HashInquiryKeyword("NEXTREC"): case HashInquiryKeyword("POS"): case HashInquiryKeyword("RECL"): case HashInquiryKeyword("SIZE"): @@ -1385,7 +1388,7 @@ bool InquireNoUnitState::Inquire( InquireUnconnectedFileState::InquireUnconnectedFileState( OwningPtr &&path, const char *sourceFile, int sourceLine) - : NoUnitIoStatementState{sourceFile, sourceLine, *this}, path_{std::move( + : NoUnitIoStatementState{*this, sourceFile, sourceLine}, path_{std::move( path)} {} bool InquireUnconnectedFileState::Inquire( @@ -1491,7 +1494,7 @@ bool InquireUnconnectedFileState::Inquire( InquireIOLengthState::InquireIOLengthState( const char *sourceFile, int sourceLine) - : NoUnitIoStatementState{sourceFile, sourceLine, *this} {} + : NoUnitIoStatementState{*this, sourceFile, sourceLine} {} bool InquireIOLengthState::Emit(const char *, std::size_t n, std::size_t) { bytes_ += n; diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index c9bda49458f067..16a0e031403f76 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -588,23 +588,28 @@ class NoUnitIoStatementState : public IoStatementBase { IoStatementState &ioStatementState() { return ioStatementState_; } MutableModes &mutableModes() { return connection_.modes; } ConnectionState &GetConnectionState() { return connection_; } + int badUnitNumber() const { return badUnitNumber_; } void CompleteOperation(); int EndIoStatement(); protected: template - NoUnitIoStatementState(const char *sourceFile, int sourceLine, A &stmt) - : IoStatementBase{sourceFile, sourceLine}, ioStatementState_{stmt} {} + NoUnitIoStatementState(A &stmt, const char *sourceFile = nullptr, + int sourceLine = 0, int badUnitNumber = -1) + : IoStatementBase{sourceFile, sourceLine}, ioStatementState_{stmt}, + badUnitNumber_{badUnitNumber} {} private: IoStatementState ioStatementState_; // points to *this ConnectionState connection_; + int badUnitNumber_; }; class NoopStatementState : public NoUnitIoStatementState { public: - NoopStatementState(const char *sourceFile, int sourceLine) - : NoUnitIoStatementState{sourceFile, sourceLine, *this} {} + NoopStatementState( + const char *sourceFile = nullptr, int sourceLine = 0, int unitNumber = -1) + : NoUnitIoStatementState{*this, sourceFile, sourceLine, unitNumber} {} void set_status(CloseStatus) {} // discards }; @@ -656,7 +661,8 @@ class InquireUnitState : public ExternalIoStatementBase { class InquireNoUnitState : public NoUnitIoStatementState { public: - InquireNoUnitState(const char *sourceFile = nullptr, int sourceLine = 0); + InquireNoUnitState(const char *sourceFile = nullptr, int sourceLine = 0, + int badUnitNumber = -1); bool Inquire(InquiryKeywordHash, char *, std::size_t); bool Inquire(InquiryKeywordHash, bool &); bool Inquire(InquiryKeywordHash, std::int64_t, bool &); From 11dd508bd43d606b8a2f52f4d0cf7471f78aca16 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Tue, 24 May 2022 11:51:00 -0500 Subject: [PATCH 369/908] NFC. Clang-formatting. Since the rest of the DirectX backend is pretty well clang-format clean, this file should be too. --- .../DirectX/DXILWriter/DXILBitcodeWriter.cpp | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 1dbfafe87d553a..260242ebeb62dc 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -122,9 +122,8 @@ class DXILBitcodeWriter { /// writing to the provided \p Buffer. DXILBitcodeWriter(const Module &M, SmallVectorImpl &Buffer, StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream) - : Stream(Stream), StrtabBuilder(StrtabBuilder), M(M), - VE(M, true), Buffer(Buffer), - BitcodeStartBit(Stream.GetCurrentBitNo()) { + : Stream(Stream), StrtabBuilder(StrtabBuilder), M(M), VE(M, true), + Buffer(Buffer), BitcodeStartBit(Stream.GetCurrentBitNo()) { GlobalValueId = VE.getValues().size(); } @@ -342,7 +341,8 @@ using namespace llvm::dxil; /// Begin dxil::BitcodeWriter Implementation //////////////////////////////////////////////////////////////////////////////// -dxil::BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer, raw_fd_stream *FS) +dxil::BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer, + raw_fd_stream *FS) : Buffer(Buffer), Stream(new BitstreamWriter(Buffer, FS, 512)) { // Emit the file header. Stream->Emit((unsigned)'B', 8); @@ -794,29 +794,40 @@ unsigned DXILBitcodeWriter::getEncodedLinkage(const GlobalValue &GV) { unsigned DXILBitcodeWriter::getEncodedVisibility(const GlobalValue &GV) { switch (GV.getVisibility()) { - case GlobalValue::DefaultVisibility: return 0; - case GlobalValue::HiddenVisibility: return 1; - case GlobalValue::ProtectedVisibility: return 2; + case GlobalValue::DefaultVisibility: + return 0; + case GlobalValue::HiddenVisibility: + return 1; + case GlobalValue::ProtectedVisibility: + return 2; } llvm_unreachable("Invalid visibility"); } unsigned DXILBitcodeWriter::getEncodedDLLStorageClass(const GlobalValue &GV) { switch (GV.getDLLStorageClass()) { - case GlobalValue::DefaultStorageClass: return 0; - case GlobalValue::DLLImportStorageClass: return 1; - case GlobalValue::DLLExportStorageClass: return 2; + case GlobalValue::DefaultStorageClass: + return 0; + case GlobalValue::DLLImportStorageClass: + return 1; + case GlobalValue::DLLExportStorageClass: + return 2; } llvm_unreachable("Invalid DLL storage class"); } unsigned DXILBitcodeWriter::getEncodedThreadLocalMode(const GlobalValue &GV) { switch (GV.getThreadLocalMode()) { - case GlobalVariable::NotThreadLocal: return 0; - case GlobalVariable::GeneralDynamicTLSModel: return 1; - case GlobalVariable::LocalDynamicTLSModel: return 2; - case GlobalVariable::InitialExecTLSModel: return 3; - case GlobalVariable::LocalExecTLSModel: return 4; + case GlobalVariable::NotThreadLocal: + return 0; + case GlobalVariable::GeneralDynamicTLSModel: + return 1; + case GlobalVariable::LocalDynamicTLSModel: + return 2; + case GlobalVariable::InitialExecTLSModel: + return 3; + case GlobalVariable::LocalExecTLSModel: + return 4; } llvm_unreachable("Invalid TLS model"); } From efebb27b745a0d677ad2ea9aefff242c12aef29c Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Tue, 24 May 2022 09:59:57 -0700 Subject: [PATCH 370/908] Revert "[PS5] Verify defaults to -fno-stack-size-section" This reverts commit 28432b0f655641df7f9d079cf69ba235038d6340. Caused some unexpected buildbot failures. --- clang/test/Driver/stack-size-section.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/clang/test/Driver/stack-size-section.c b/clang/test/Driver/stack-size-section.c index 18fb55f4ac9f59..461f0b5c9aee67 100644 --- a/clang/test/Driver/stack-size-section.c +++ b/clang/test/Driver/stack-size-section.c @@ -1,5 +1,4 @@ // RUN: %clang -target x86_64-unknown %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT -// RUN: %clang -target x86_64-sie-ps5 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT // RUN: %clang -target x86_64-scei-ps4 -fno-stack-size-section %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT // CHECK-ABSENT-NOT: -fstack-size-section @@ -7,9 +6,4 @@ // RUN: %clang -target x86_64-scei-ps4 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PRESENT // CHECK-PRESENT: -fstack-size-section -// RUN: %clang -target x86_64-unknown -fstack-size-section -fno-stack-size-section -### 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-ABSENT -// RUN: %clang -target x86_64-unknown -fnostack-size-section -fstack-size-section -### 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-PRESENT - int foo() { return 42; } From ee8524087c78a673fcf5486ded69ee597a85e0f1 Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Thu, 19 May 2022 16:09:36 -0700 Subject: [PATCH 371/908] [libclang] add supporting for indexing/visiting C++ concepts This commit builds upon recently added indexing support for C++ concepts from https://reviews.llvm.org/D124441 by extending libclang to support indexing and visiting concepts, constraints and requires expressions as well. Differential Revision: https://reviews.llvm.org/D126031 --- clang/include/clang-c/Index.h | 22 ++- clang/lib/Sema/SemaCodeComplete.cpp | 3 + clang/test/Index/index-concept-kind.cpp | 9 + clang/test/Index/index-concepts.cpp | 186 +++++++++++++++++++ clang/tools/c-index-test/c-index-test.c | 2 + clang/tools/libclang/CIndex.cpp | 141 +++++++++++++- clang/tools/libclang/CXCursor.cpp | 10 +- clang/tools/libclang/CXIndexDataConsumer.cpp | 14 +- clang/tools/libclang/CXIndexDataConsumer.h | 2 + clang/tools/libclang/CursorVisitor.h | 9 + 10 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/Index/index-concept-kind.cpp create mode 100644 clang/test/Index/index-concepts.cpp diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index c4da7df6595d10..70de5195d0580a 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2189,7 +2189,17 @@ enum CXCursorKind { */ CXCursor_CXXAddrspaceCastExpr = 152, - CXCursor_LastExpr = CXCursor_CXXAddrspaceCastExpr, + /** + * Expression that references a C++20 concept. + */ + CXCursor_ConceptSpecializationExpr = 153, + + /** + * Expression that references a C++20 concept. + */ + CXCursor_RequiresExpr = 154, + + CXCursor_LastExpr = CXCursor_RequiresExpr, /* Statements */ CXCursor_FirstStmt = 200, @@ -2700,8 +2710,13 @@ enum CXCursorKind { * a friend declaration. */ CXCursor_FriendDecl = 603, + /** + * a concept declaration. + */ + CXCursor_ConceptDecl = 604, + CXCursor_FirstExtraDecl = CXCursor_ModuleImportDecl, - CXCursor_LastExtraDecl = CXCursor_FriendDecl, + CXCursor_LastExtraDecl = CXCursor_ConceptDecl, /** * A code completion overload candidate. @@ -6319,7 +6334,8 @@ typedef enum { CXIdxEntity_CXXDestructor = 23, CXIdxEntity_CXXConversionFunction = 24, CXIdxEntity_CXXTypeAlias = 25, - CXIdxEntity_CXXInterface = 26 + CXIdxEntity_CXXInterface = 26, + CXIdxEntity_CXXConcept = 27 } CXIdxEntityKind; diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 30720c197d7e7e..d35e9c6e42bf24 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -4044,6 +4044,9 @@ CXCursorKind clang::getCursorKindForDecl(const Decl *D) { case Decl::ObjCTypeParam: return CXCursor_TemplateTypeParameter; + case Decl::Concept: + return CXCursor_ConceptDecl; + default: if (const auto *TD = dyn_cast(D)) { switch (TD->getTagKind()) { diff --git a/clang/test/Index/index-concept-kind.cpp b/clang/test/Index/index-concept-kind.cpp new file mode 100644 index 00000000000000..7aaf814f5f989a --- /dev/null +++ b/clang/test/Index/index-concept-kind.cpp @@ -0,0 +1,9 @@ +// RUN: c-index-test -index-file %s -std=gnu++20 | FileCheck %s + +template +concept LargeType = sizeof(T) > 8; +// CHECK: [indexDeclaration]: kind: concept | name: LargeType | USR: c:@CT@LargeType | lang: C | cursor: ConceptDecl=LargeType:[[@LINE-1]]:9 (Definition) | loc: [[@LINE-1]]:9 | semantic-container: [TU] | lexical-container: [TU] | isRedecl: 0 | isDef: 1 | isContainer: 0 | isImplicit: 0 + +template +// CHECK: [indexEntityReference]: kind: concept | name: LargeType | USR: c:@CT@LargeType | lang: C | cursor: TemplateRef=LargeType:4:9 | loc: [[@LINE-1]]:11 | :: kind: function-template | name: f | USR: c:@FT@>1#Tf#v# | lang: C++ | container: [<>] | refkind: direct | role: ref +void f(); diff --git a/clang/test/Index/index-concepts.cpp b/clang/test/Index/index-concepts.cpp new file mode 100644 index 00000000000000..9887e9fdc6541c --- /dev/null +++ b/clang/test/Index/index-concepts.cpp @@ -0,0 +1,186 @@ +// RUN: c-index-test -test-load-source all %s -std=gnu++20 -fno-delayed-template-parsing | FileCheck %s + +template +struct type_trait { + const static bool value = false; +}; + +template<> +struct type_trait { + const static bool value = true; +}; + +template +requires (type_trait::value) +// CHECK: index-concepts.cpp:[[@LINE-1]]:10: ParenExpr= Extent=[[[@LINE-1]]:10 - [[@LINE-1]]:32] +// CHECK: index-concepts.cpp:[[@LINE-2]]:11: DeclRefExpr= Extent=[[[@LINE-2]]:11 - [[@LINE-2]]:31] +// CHECK: index-concepts.cpp:[[@LINE-3]]:11: TemplateRef=type_trait:4:8 Extent=[[[@LINE-3]]:11 - [[@LINE-3]]:21] +// CHECK: index-concepts.cpp:[[@LINE-4]]:22: TypeRef=T:13:17 Extent=[[[@LINE-4]]:22 - [[@LINE-4]]:23] +void indexRequiresClause() { +} + +template +requires (type_trait::value) +// CHECK: index-concepts.cpp:[[@LINE-1]]:10: ParenExpr= Extent=[[[@LINE-1]]:10 - [[@LINE-1]]:32] +// CHECK: index-concepts.cpp:[[@LINE-2]]:11: DeclRefExpr= Extent=[[[@LINE-2]]:11 - [[@LINE-2]]:31] +// CHECK: index-concepts.cpp:[[@LINE-3]]:11: TemplateRef=type_trait:4:8 Extent=[[[@LINE-3]]:11 - [[@LINE-3]]:21] +// CHECK: index-concepts.cpp:[[@LINE-4]]:22: TypeRef=T:22:16 Extent=[[[@LINE-4]]:22 - [[@LINE-4]]:23] +class IndexRequiresClauseInClass {}; + +template +concept Con1 = type_trait::value; +// CHECK: index-concepts.cpp:[[@LINE-1]]:9: ConceptDecl=Con1:[[@LINE-1]]:9 (Definition) Extent=[[[@LINE-2]]:1 - [[@LINE-1]]:36] +// CHECK: index-concepts.cpp:[[@LINE-3]]:17: TemplateTypeParameter=T:[[@LINE-3]]:17 (Definition) Extent=[[[@LINE-3]]:11 - [[@LINE-3]]:18] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-3]]:16: DeclRefExpr= Extent=[[[@LINE-3]]:16 - [[@LINE-3]]:36] +// CHECK: index-concepts.cpp:[[@LINE-4]]:16: TemplateRef=type_trait:4:8 Extent=[[[@LINE-4]]:16 - [[@LINE-4]]:26] +// CHECK: index-concepts.cpp:[[@LINE-5]]:27: TypeRef=T:30:17 Extent=[[[@LINE-5]]:27 - [[@LINE-5]]:28] + +constexpr int sizeFunc() { return 4; } + +template +concept ConWithLogicalAnd = Con1 && sizeof(T) > sizeFunc(); +// CHECK: index-concepts.cpp:[[@LINE-1]]:9: ConceptDecl=ConWithLogicalAnd:[[@LINE-1]]:9 (Definition) Extent=[[[@LINE-2]]:1 - [[@LINE-1]]:62] +// CHECK: index-concepts.cpp:[[@LINE-3]]:17: TemplateTypeParameter=T:[[@LINE-3]]:17 (Definition) Extent=[[[@LINE-3]]:11 - [[@LINE-3]]:18] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-3]]:29: BinaryOperator= Extent=[[[@LINE-3]]:29 - [[@LINE-3]]:62] +// CHECK: index-concepts.cpp:[[@LINE-4]]:29: ConceptSpecializationExpr= Extent=[[[@LINE-4]]:29 - [[@LINE-4]]:36] +// CHECK: index-concepts.cpp:[[@LINE-5]]:29: TemplateRef=Con1:31:9 Extent=[[[@LINE-5]]:29 - [[@LINE-5]]:33] +// CHECK: index-concepts.cpp:[[@LINE-6]]:40: BinaryOperator= Extent=[[[@LINE-6]]:40 - [[@LINE-6]]:62] +// CHECK: index-concepts.cpp:[[@LINE-7]]:40: UnaryExpr= Extent=[[[@LINE-7]]:40 - [[@LINE-7]]:49] +// CHECK: index-concepts.cpp:[[@LINE-8]]:47: TypeRef=T:40:17 Extent=[[[@LINE-8]]:47 - [[@LINE-8]]:48] +// CHECK: index-concepts.cpp:[[@LINE-9]]:52: UnexposedExpr=sizeFunc:38:15 Extent=[[[@LINE-9]]:52 - [[@LINE-9]]:62] +// CHECK: index-concepts.cpp:[[@LINE-10]]:52: CallExpr=sizeFunc:38:15 Extent=[[[@LINE-10]]:52 - [[@LINE-10]]:62] +// CHECK: index-concepts.cpp:[[@LINE-11]]:52: UnexposedExpr=sizeFunc:38:15 Extent=[[[@LINE-11]]:52 - [[@LINE-11]]:60] +// CHECK: index-concepts.cpp:[[@LINE-12]]:52: DeclRefExpr=sizeFunc:38:15 Extent=[[[@LINE-12]]:52 - [[@LINE-12]]:60] + +namespace ns { + +template +concept ConInNamespace = sizeof(T) > 4; + +} + +template +concept ConTwoTemplateParams = ns::ConInNamespace && ConWithLogicalAnd; +// CHECK: index-concepts.cpp:[[@LINE-1]]:9: ConceptDecl=ConTwoTemplateParams:[[@LINE-1]]:9 (Definition) Extent=[[[@LINE-2]]:1 - [[@LINE-1]]:79] +// CHECK: index-concepts.cpp:[[@LINE-3]]:17: TemplateTypeParameter=T1:[[@LINE-3]]:17 (Definition) Extent=[[[@LINE-3]]:11 - [[@LINE-3]]:19] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-4]]:27: TemplateTypeParameter=T2:[[@LINE-4]]:27 (Definition) Extent=[[[@LINE-4]]:21 - [[@LINE-4]]:29] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-4]]:36: BinaryOperator= Extent=[[[@LINE-4]]:36 - [[@LINE-4]]:79] +// CHECK: index-concepts.cpp:[[@LINE-5]]:36: ConceptSpecializationExpr= Extent=[[[@LINE-5]]:36 - [[@LINE-5]]:54] +// CHECK: index-concepts.cpp:[[@LINE-6]]:32: NamespaceRef=ns:55:11 Extent=[[[@LINE-6]]:32 - [[@LINE-6]]:34] +// CHECK: index-concepts.cpp:[[@LINE-7]]:36: TemplateRef=ConInNamespace:58:9 Extent=[[[@LINE-7]]:36 - [[@LINE-7]]:50] +// CHECK: index-concepts.cpp:[[@LINE-8]]:58: ConceptSpecializationExpr= Extent=[[[@LINE-8]]:58 - [[@LINE-8]]:79] +// CHECK: index-concepts.cpp:[[@LINE-9]]:58: TemplateRef=ConWithLogicalAnd:41:9 Extent=[[[@LINE-9]]:58 - [[@LINE-9]]:75] + + +struct ConcreteType {}; + +template +requires ConTwoTemplateParams +struct UsesConceptInRequires {}; +// CHECK: index-concepts.cpp:[[@LINE-1]]:8: ClassTemplate=UsesConceptInRequires:[[@LINE-1]]:8 (Definition) Extent=[[[@LINE-3]]:1 - [[@LINE-1]]:32] +// CHECK: index-concepts.cpp:[[@LINE-4]]:16: TemplateTypeParameter=T:[[@LINE-4]]:16 (Definition) Extent=[[[@LINE-4]]:10 - [[@LINE-4]]:17] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-4]]:10: ConceptSpecializationExpr= Extent=[[[@LINE-4]]:10 - [[@LINE-4]]:47] +// CHECK: index-concepts.cpp:[[@LINE-5]]:10: TemplateRef=ConTwoTemplateParams:63:9 Extent=[[[@LINE-5]]:10 - [[@LINE-5]]:30] +// CHECK: index-concepts.cpp:[[@LINE-6]]:31: TypeRef=T:[[@LINE-7]]:16 Extent=[[[@LINE-6]]:31 - [[@LINE-6]]:32] +// CHECK: index-concepts.cpp:[[@LINE-7]]:34: TypeRef=struct ConcreteType:[[@LINE-10]]:8 Extent=[[[@LINE-7]]:34 - [[@LINE-7]]:46] + + +template +struct UsesConceptInTemplateArg {}; +// CHECK: index-concepts.cpp:[[@LINE-1]]:8: ClassTemplate=UsesConceptInTemplateArg:[[@LINE-1]]:8 (Definition) Extent=[[[@LINE-2]]:1 - [[@LINE-1]]:35] +// CHECK: index-concepts.cpp:[[@LINE-3]]:28: TemplateTypeParameter=T:[[@LINE-3]]:28 (Definition) Extent=[[[@LINE-3]]:10 - [[@LINE-3]]:29] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-4]]:10: TemplateRef=ConWithLogicalAnd:41:9 Extent=[[[@LINE-4]]:10 - [[@LINE-4]]:27] + +void usesConceptInAutoParam(ns::ConInNamespace auto x) {} +// CHECK: index-concepts.cpp:[[@LINE-1]]:6: FunctionTemplate=usesConceptInAutoParam:[[@LINE-1]]:6 (Definition) +// CHECK: index-concepts.cpp:[[@LINE-2]]:53: ParmDecl=x:[[@LINE-2]]:53 (Definition) Extent=[[[@LINE-2]]:29 - [[@LINE-2]]:54] +// CHECK: index-concepts.cpp:[[@LINE-3]]:29: NamespaceRef=ns:55:11 Extent=[[[@LINE-3]]:29 - [[@LINE-3]]:31] +// CHECK: index-concepts.cpp:[[@LINE-4]]:33: TemplateRef=ConInNamespace:58:9 Extent=[[[@LINE-4]]:33 - [[@LINE-4]]:47] +// CHECK: index-concepts.cpp:[[@LINE-5]]:48: TypeRef=ns::ConInNamespace auto:[[@LINE-5]]:53 Extent=[[[@LINE-5]]:48 - [[@LINE-5]]:52] +// CHECK: index-concepts.cpp:[[@LINE-6]]:56: CompoundStmt= Extent=[[[@LINE-6]]:56 - [[@LINE-6]]:58] + + +template +void testTrailingRequires(const T &x) +requires ns::ConInNamespace && ConTwoTemplateParams {} +// CHECK: index-concepts.cpp:[[@LINE-2]]:6: FunctionTemplate=testTrailingRequires:[[@LINE-2]]:6 (Definition) Extent=[[[@LINE-3]]:1 - [[@LINE-1]]:75] +// CHECK: index-concepts.cpp:[[@LINE-4]]:16: TemplateTypeParameter=T:[[@LINE-4]]:16 (Definition) Extent=[[[@LINE-4]]:10 - [[@LINE-4]]:17] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-4]]:36: ParmDecl=x:[[@LINE-4]]:36 (Definition) Extent=[[[@LINE-4]]:27 - [[@LINE-4]]:37] +// CHECK: index-concepts.cpp:[[@LINE-5]]:33: TypeRef=T:[[@LINE-6]]:16 Extent=[[[@LINE-5]]:33 - [[@LINE-5]]:34] +// CHECK: index-concepts.cpp:[[@LINE-5]]:14: ConceptSpecializationExpr= Extent=[[[@LINE-5]]:14 - [[@LINE-5]]:31] +// CHECK: index-concepts.cpp:[[@LINE-6]]:10: NamespaceRef=ns:55:11 Extent=[[[@LINE-6]]:10 - [[@LINE-6]]:12] +// CHECK: index-concepts.cpp:[[@LINE-7]]:14: TemplateRef=ConInNamespace:58:9 Extent=[[[@LINE-7]]:14 - [[@LINE-7]]:28] +// CHECK: index-concepts.cpp:[[@LINE-8]]:29: TypeRef=T:[[@LINE-10]]:16 Extent=[[[@LINE-8]]:29 - [[@LINE-8]]:30] +// CHECK: index-concepts.cpp:[[@LINE-9]]:35: ConceptSpecializationExpr= Extent=[[[@LINE-9]]:35 - [[@LINE-9]]:72] +// CHECK: index-concepts.cpp:[[@LINE-10]]:35: TemplateRef=ConTwoTemplateParams:63:9 Extent=[[[@LINE-10]]:35 - [[@LINE-10]]:55] +// CHECK: index-concepts.cpp:[[@LINE-11]]:56: TypeRef=T:[[@LINE-13]]:16 Extent=[[[@LINE-11]]:56 - [[@LINE-11]]:57] +// CHECK: index-concepts.cpp:[[@LINE-12]]:59: TypeRef=struct ConcreteType:75:8 Extent=[[[@LINE-12]]:59 - [[@LINE-12]]:71] + + +void concreteFunc(ConcreteType); + +template +void genericFunc(const T&x); + +template +concept ConWithRequires = requires(const T& x, ConcreteType value) { + concreteFunc(value); + genericFunc(x); +}; +// CHECK: index-concepts.cpp:[[@LINE-4]]:9: ConceptDecl=ConWithRequires:[[@LINE-4]]:9 (Definition) Extent=[[[@LINE-5]]:1 - [[@LINE-1]]:2] +// CHECK: index-concepts.cpp:[[@LINE-6]]:16: TemplateTypeParameter=T:[[@LINE-6]]:16 (Definition) Extent=[[[@LINE-6]]:10 - [[@LINE-6]]:17] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-6]]:27: RequiresExpr= Extent=[[[@LINE-6]]:27 - [[@LINE-3]]:2] +// CHECK: index-concepts.cpp:[[@LINE-7]]:61: ParmDecl=value:[[@LINE-7]]:61 (Definition) Extent=[[[@LINE-7]]:48 - [[@LINE-7]]:66] +// CHECK: index-concepts.cpp:[[@LINE-8]]:48: TypeRef=struct ConcreteType:75:8 Extent=[[[@LINE-8]]:48 - [[@LINE-8]]:60] +// CHECK: index-concepts.cpp:[[@LINE-9]]:45: ParmDecl=x:[[@LINE-9]]:45 (Definition) Extent=[[[@LINE-9]]:36 - [[@LINE-9]]:46] +// CHECK: index-concepts.cpp:[[@LINE-10]]:42: TypeRef=T:[[@LINE-11]]:16 Extent=[[[@LINE-10]]:42 - [[@LINE-10]]:43] +// CHECK: index-concepts.cpp:[[@LINE-10]]:3: UnexposedExpr=concreteFunc:[[@LINE-17]]:6 Extent=[[[@LINE-10]]:3 - [[@LINE-10]]:15] +// CHECK: index-concepts.cpp:[[@LINE-11]]:3: DeclRefExpr=concreteFunc:[[@LINE-18]]:6 Extent=[[[@LINE-11]]:3 - [[@LINE-11]]:15] +// CHECK: index-concepts.cpp:[[@LINE-12]]:16: CallExpr=ConcreteType:75:8 Extent=[[[@LINE-12]]:16 - [[@LINE-12]]:21] +// CHECK: index-concepts.cpp:[[@LINE-13]]:16: UnexposedExpr=value:[[@LINE-14]]:61 Extent=[[[@LINE-13]]:16 - [[@LINE-13]]:21] +// CHECK: index-concepts.cpp:[[@LINE-14]]:16: DeclRefExpr=value:[[@LINE-15]]:61 Extent=[[[@LINE-14]]:16 - [[@LINE-14]]:21] +// CHECK: index-concepts.cpp:[[@LINE-14]]:3: DeclRefExpr=[[[@LINE-19]]:6] Extent=[[[@LINE-14]]:3 - [[@LINE-14]]:14] +// CHECK: index-concepts.cpp:[[@LINE-15]]:3: OverloadedDeclRef=genericFunc[[[@LINE-20]]:6] Extent=[[[@LINE-15]]:3 - [[@LINE-15]]:14] +// CHECK: index-concepts.cpp:[[@LINE-16]]:15: DeclRefExpr=x:[[@LINE-18]]:45 Extent=[[[@LINE-16]]:15 - [[@LINE-16]]:16] + +template +concept ConWithCompRequires = requires { + { genericFunc(T()) } -> ns::ConInNamespace; + { genericFunc(T()) } -> ConTwoTemplateParams; +}; +// CHECK: index-concepts.cpp:[[@LINE-4]]:9: ConceptDecl=ConWithCompRequires:[[@LINE-4]]:9 (Definition) Extent=[[[@LINE-5]]:1 - [[@LINE-1]]:2] +// CHECK: index-concepts.cpp:[[@LINE-6]]:16: TemplateTypeParameter=T:[[@LINE-6]]:16 (Definition) Extent=[[[@LINE-6]]:10 - [[@LINE-6]]:17] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-6]]:31: RequiresExpr= Extent=[[[@LINE-6]]:31 - [[@LINE-3]]:2] +// CHECK: index-concepts.cpp:[[@LINE-6]]:5: DeclRefExpr=[123:6] Extent=[[[@LINE-6]]:5 - [[@LINE-6]]:16] +// CHECK: index-concepts.cpp:[[@LINE-7]]:5: OverloadedDeclRef=genericFunc[123:6] Extent=[[[@LINE-7]]:5 - [[@LINE-7]]:16] +// CHECK: index-concepts.cpp:[[@LINE-8]]:17: CallExpr= Extent=[[[@LINE-8]]:17 - [[@LINE-8]]:20] +// CHECK: index-concepts.cpp:[[@LINE-9]]:17: TypeRef=T:[[@LINE-11]]:16 Extent=[[[@LINE-9]]:17 - [[@LINE-9]]:18] +// CHECK: index-concepts.cpp:[[@LINE-10]]:27: NamespaceRef=ns:55:11 Extent=[[[@LINE-10]]:27 - [[@LINE-10]]:29] +// CHECK: index-concepts.cpp:[[@LINE-11]]:31: TemplateRef=ConInNamespace:58:9 Extent=[[[@LINE-11]]:31 - [[@LINE-11]]:45] +// CHECK: index-concepts.cpp:[[@LINE-11]]:5: DeclRefExpr=[123:6] Extent=[[[@LINE-11]]:5 - [[@LINE-11]]:16] +// CHECK: index-concepts.cpp:[[@LINE-12]]:5: OverloadedDeclRef=genericFunc[123:6] Extent=[[[@LINE-12]]:5 - [[@LINE-12]]:16] +// CHECK: index-concepts.cpp:[[@LINE-13]]:17: CallExpr= Extent=[[[@LINE-13]]:17 - [[@LINE-13]]:20] +// CHECK: index-concepts.cpp:[[@LINE-14]]:17: TypeRef=T:[[@LINE-17]]:16 Extent=[[[@LINE-14]]:17 - [[@LINE-14]]:18] +// CHECK: index-concepts.cpp:[[@LINE-15]]:27: TemplateRef=ConTwoTemplateParams:63:9 Extent=[[[@LINE-15]]:27 - [[@LINE-15]]:47] +// CHECK: index-concepts.cpp:[[@LINE-16]]:48: TypeRef=struct ConcreteType:75:8 Extent=[[[@LINE-16]]:48 - [[@LINE-16]]:60] + +template +concept ConWithTypeReq = requires { + typename type_trait; +}; +// CHECK: index-concepts.cpp:[[@LINE-3]]:9: ConceptDecl=ConWithTypeReq:[[@LINE-3]]:9 (Definition) Extent=[[[@LINE-4]]:1 - [[@LINE-1]]:2] +// CHECK: index-concepts.cpp:[[@LINE-5]]:16: TemplateTypeParameter=T:[[@LINE-5]]:16 (Definition) Extent=[[[@LINE-5]]:10 - [[@LINE-5]]:17] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-5]]:26: RequiresExpr= Extent=[[[@LINE-5]]:26 - [[@LINE-3]]:2] +// CHECK: index-concepts.cpp:[[@LINE-5]]:12: TemplateRef=type_trait:4:8 Extent=[[[@LINE-5]]:12 - [[@LINE-5]]:22] +// CHECK: index-concepts.cpp:[[@LINE-6]]:23: TypeRef=T:[[@LINE-8]]:16 Extent=[[[@LINE-6]]:23 - [[@LINE-6]]:24] + +template +concept ConWithNestedReq = requires { + requires ns::ConInNamespace; +}; +// CHECK: index-concepts.cpp:[[@LINE-3]]:9: ConceptDecl=ConWithNestedReq:[[@LINE-3]]:9 (Definition) Extent=[[[@LINE-4]]:1 - [[@LINE-1]]:2] +// CHECK: index-concepts.cpp:[[@LINE-5]]:16: TemplateTypeParameter=T:[[@LINE-5]]:16 (Definition) Extent=[[[@LINE-5]]:10 - [[@LINE-5]]:17] [access=public] +// CHECK: index-concepts.cpp:[[@LINE-5]]:28: RequiresExpr= Extent=[[[@LINE-5]]:28 - [[@LINE-3]]:2] +// CHECK: index-concepts.cpp:[[@LINE-5]]:12: NamespaceRef=ns:55:11 Extent=[[[@LINE-5]]:12 - [[@LINE-5]]:14] +// CHECK: index-concepts.cpp:[[@LINE-6]]:16: TemplateRef=ConInNamespace:58:9 Extent=[[[@LINE-6]]:16 - [[@LINE-6]]:30] +// CHECK: index-concepts.cpp:[[@LINE-7]]:31: TypeRef=T:[[@LINE-9]]:16 Extent=[[[@LINE-7]]:31 - [[@LINE-7]]:32] diff --git a/clang/tools/c-index-test/c-index-test.c b/clang/tools/c-index-test/c-index-test.c index 7d0d6e8e6e2f73..8eac69de3f1a2f 100644 --- a/clang/tools/c-index-test/c-index-test.c +++ b/clang/tools/c-index-test/c-index-test.c @@ -3504,6 +3504,8 @@ static const char *getEntityKindString(CXIdxEntityKind kind) { case CXIdxEntity_CXXConversionFunction: return "conversion-func"; case CXIdxEntity_CXXTypeAlias: return "type-alias"; case CXIdxEntity_CXXInterface: return "c++-__interface"; + case CXIdxEntity_CXXConcept: + return "concept"; } assert(0 && "Garbage entity kind"); return 0; diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 235ca3e9c060bc..97ce1c7533f23a 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -761,10 +761,10 @@ bool CursorVisitor::VisitClassTemplatePartialSpecializationDecl( } bool CursorVisitor::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) { - if (const auto *TC = D->getTypeConstraint()) - if (Visit(MakeCXCursor(TC->getImmediatelyDeclaredConstraint(), StmtParent, - TU, RegionOfInterest))) + if (const auto *TC = D->getTypeConstraint()) { + if (VisitTypeConstraint(*TC)) return true; + } // Visit the default argument. if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) @@ -863,6 +863,11 @@ bool CursorVisitor::VisitFunctionDecl(FunctionDecl *ND) { // FIXME: Attributes? } + if (auto *E = ND->getTrailingRequiresClause()) { + if (Visit(E)) + return true; + } + if (ND->doesThisDeclarationHaveABody() && !ND->isLateTemplateParsed()) { if (CXXConstructorDecl *Constructor = dyn_cast(ND)) { // Find the initializers that were written in the source. @@ -1310,6 +1315,75 @@ bool CursorVisitor::VisitDecompositionDecl(DecompositionDecl *D) { return VisitVarDecl(D); } +bool CursorVisitor::VisitConceptDecl(ConceptDecl *D) { + if (VisitTemplateParameters(D->getTemplateParameters())) + return true; + + if (auto *E = D->getConstraintExpr()) { + if (Visit(MakeCXCursor(E, D, TU, RegionOfInterest))) + return true; + } + return false; +} + +bool CursorVisitor::VisitTypeConstraint(const TypeConstraint &TC) { + if (TC.getNestedNameSpecifierLoc()) { + if (VisitNestedNameSpecifierLoc(TC.getNestedNameSpecifierLoc())) + return true; + } + if (TC.getNamedConcept()) { + if (Visit(MakeCursorTemplateRef(TC.getNamedConcept(), + TC.getConceptNameLoc(), TU))) + return true; + } + if (auto Args = TC.getTemplateArgsAsWritten()) { + for (const auto &Arg : Args->arguments()) { + if (VisitTemplateArgumentLoc(Arg)) + return true; + } + } + return false; +} + +bool CursorVisitor::VisitConceptRequirement(const concepts::Requirement &R) { + using namespace concepts; + switch (R.getKind()) { + case Requirement::RK_Type: { + const TypeRequirement &TR = cast(R); + if (!TR.isSubstitutionFailure()) { + if (Visit(TR.getType()->getTypeLoc())) + return true; + } + break; + } + case Requirement::RK_Simple: + case Requirement::RK_Compound: { + const ExprRequirement &ER = cast(R); + if (!ER.isExprSubstitutionFailure()) { + if (Visit(ER.getExpr())) + return true; + } + if (ER.getKind() == Requirement::RK_Compound) { + const auto &RTR = ER.getReturnTypeRequirement(); + if (RTR.isTypeConstraint()) { + if (const auto *Cons = RTR.getTypeConstraint()) + VisitTypeConstraint(*Cons); + } + } + break; + } + case Requirement::RK_Nested: { + const NestedRequirement &NR = cast(R); + if (!NR.isSubstitutionFailure()) { + if (Visit(NR.getConstraintExpr())) + return true; + } + break; + } + } + return false; +} + bool CursorVisitor::VisitDeclarationNameInfo(DeclarationNameInfo Name) { switch (Name.getName().getNameKind()) { case clang::DeclarationName::Identifier: @@ -1436,6 +1510,12 @@ bool CursorVisitor::VisitTemplateParameters( return true; } + if (const auto *E = Params->getRequiresClause()) { + if (Visit(MakeCXCursor(Params->getRequiresClause(), nullptr, TU, + RegionOfInterest))) + return true; + } + return false; } @@ -1594,6 +1674,11 @@ bool CursorVisitor::VisitTagTypeLoc(TagTypeLoc TL) { } bool CursorVisitor::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) { + if (const auto *TC = TL.getDecl()->getTypeConstraint()) { + if (VisitTypeConstraint(*TC)) + return true; + } + return Visit(MakeCursorTypeRef(TL.getDecl(), TL.getNameLoc(), TU)); } @@ -1869,6 +1954,9 @@ DEF_JOB(DeclRefExprParts, DeclRefExpr, DeclRefExprPartsKind) DEF_JOB(OverloadExprParts, OverloadExpr, OverloadExprPartsKind) DEF_JOB(SizeOfPackExprParts, SizeOfPackExpr, SizeOfPackExprPartsKind) DEF_JOB(LambdaExprParts, LambdaExpr, LambdaExprPartsKind) +DEF_JOB(ConceptSpecializationExprVisit, ConceptSpecializationExpr, + ConceptSpecializationExprVisitKind) +DEF_JOB(RequiresExprVisit, RequiresExpr, RequiresExprVisitKind) DEF_JOB(PostChildrenVisit, void, PostChildrenVisitKind) #undef DEF_JOB @@ -2044,6 +2132,8 @@ class EnqueueVisitor : public ConstStmtVisitor { void VisitPseudoObjectExpr(const PseudoObjectExpr *E); void VisitOpaqueValueExpr(const OpaqueValueExpr *E); void VisitLambdaExpr(const LambdaExpr *E); + void VisitConceptSpecializationExpr(const ConceptSpecializationExpr *E); + void VisitRequiresExpr(const RequiresExpr *E); void VisitOMPExecutableDirective(const OMPExecutableDirective *D); void VisitOMPLoopBasedDirective(const OMPLoopBasedDirective *D); void VisitOMPLoopDirective(const OMPLoopDirective *D); @@ -2887,6 +2977,15 @@ void EnqueueVisitor::VisitLambdaExpr(const LambdaExpr *E) { AddStmt(E->getBody()); WL.push_back(LambdaExprParts(E, Parent)); } +void EnqueueVisitor::VisitConceptSpecializationExpr( + const ConceptSpecializationExpr *E) { + WL.push_back(ConceptSpecializationExprVisit(E, Parent)); +} +void EnqueueVisitor::VisitRequiresExpr(const RequiresExpr *E) { + WL.push_back(RequiresExprVisit(E, Parent)); + for (ParmVarDecl *VD : E->getLocalParameters()) + AddDecl(VD); +} void EnqueueVisitor::VisitPseudoObjectExpr(const PseudoObjectExpr *E) { // Treat the expression like its syntactic form. Visit(E->getSyntacticForm()); @@ -3380,6 +3479,36 @@ bool CursorVisitor::RunVisitorWorkList(VisitorWorkList &WL) { break; } + case VisitorJob::ConceptSpecializationExprVisitKind: { + const ConceptSpecializationExpr *E = + cast(&LI)->get(); + if (NestedNameSpecifierLoc QualifierLoc = + E->getNestedNameSpecifierLoc()) { + if (VisitNestedNameSpecifierLoc(QualifierLoc)) + return true; + } + + if (E->getNamedConcept() && + Visit(MakeCursorTemplateRef(E->getNamedConcept(), + E->getConceptNameLoc(), TU))) + return true; + + if (auto Args = E->getTemplateArgsAsWritten()) { + for (const auto &Arg : Args->arguments()) { + if (VisitTemplateArgumentLoc(Arg)) + return true; + } + } + break; + } + + case VisitorJob::RequiresExprVisitKind: { + const RequiresExpr *E = cast(&LI)->get(); + for (const concepts::Requirement *R : E->getRequirements()) + VisitConceptRequirement(*R); + break; + } + case VisitorJob::PostChildrenVisitKind: if (PostChildrenVisitor(Parent, ClientData)) return true; @@ -5401,6 +5530,10 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("ObjCMessageExpr"); case CXCursor_BuiltinBitCastExpr: return cxstring::createRef("BuiltinBitCastExpr"); + case CXCursor_ConceptSpecializationExpr: + return cxstring::createRef("ConceptSpecializationExpr"); + case CXCursor_RequiresExpr: + return cxstring::createRef("RequiresExpr"); case CXCursor_UnexposedStmt: return cxstring::createRef("UnexposedStmt"); case CXCursor_DeclStmt: @@ -5751,6 +5884,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("attribute(warn_unused_result)"); case CXCursor_AlignedAttr: return cxstring::createRef("attribute(aligned)"); + case CXCursor_ConceptDecl: + return cxstring::createRef("ConceptDecl"); } llvm_unreachable("Unhandled CXCursorKind"); diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index fbe30516e507d5..1de76ab5d6b6f4 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -299,8 +299,6 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::BinaryConditionalOperatorClass: case Stmt::TypeTraitExprClass: case Stmt::CoawaitExprClass: - case Stmt::ConceptSpecializationExprClass: - case Stmt::RequiresExprClass: case Stmt::DependentCoawaitExprClass: case Stmt::CoyieldExprClass: case Stmt::CXXBindTemporaryExprClass: @@ -637,6 +635,14 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, return getSelectorIdentifierCursor(SelectorIdIndex, C); } + case Stmt::ConceptSpecializationExprClass: + K = CXCursor_ConceptSpecializationExpr; + break; + + case Stmt::RequiresExprClass: + K = CXCursor_RequiresExpr; + break; + case Stmt::MSDependentExistsStmtClass: K = CXCursor_UnexposedStmt; break; diff --git a/clang/tools/libclang/CXIndexDataConsumer.cpp b/clang/tools/libclang/CXIndexDataConsumer.cpp index 2e87689f656615..979acaef8d00d4 100644 --- a/clang/tools/libclang/CXIndexDataConsumer.cpp +++ b/clang/tools/libclang/CXIndexDataConsumer.cpp @@ -146,6 +146,11 @@ class IndexingDeclVisitor : public ConstDeclVisitor { DataConsumer.importedModule(D); return true; } + + bool VisitConceptDecl(const ConceptDecl *D) { + DataConsumer.handleConcept(D); + return true; + } }; CXSymbolRole getSymbolRole(SymbolRoleSet Role) { @@ -883,6 +888,12 @@ bool CXIndexDataConsumer::handleTypeAliasTemplate(const TypeAliasTemplateDecl *D return handleDecl(D, D->getLocation(), getCursor(D), DInfo); } +bool CXIndexDataConsumer::handleConcept(const ConceptDecl *D) { + DeclInfo DInfo(/*isRedeclaration=*/!D->isCanonicalDecl(), + /*isDefinition=*/true, /*isContainer=*/false); + return handleDecl(D, D->getLocation(), getCursor(D), DInfo); +} + bool CXIndexDataConsumer::handleReference(const NamedDecl *D, SourceLocation Loc, CXCursor Cursor, const NamedDecl *Parent, @@ -1249,7 +1260,6 @@ static CXIdxEntityKind getEntityKindFromSymbolKind(SymbolKind K, SymbolLanguage case SymbolKind::TemplateTypeParm: case SymbolKind::TemplateTemplateParm: case SymbolKind::NonTypeTemplateParm: - case SymbolKind::Concept: return CXIdxEntity_Unexposed; case SymbolKind::Enum: return CXIdxEntity_Enum; @@ -1289,6 +1299,8 @@ static CXIdxEntityKind getEntityKindFromSymbolKind(SymbolKind K, SymbolLanguage case SymbolKind::Destructor: return CXIdxEntity_CXXDestructor; case SymbolKind::ConversionFunction: return CXIdxEntity_CXXConversionFunction; case SymbolKind::Parameter: return CXIdxEntity_Variable; + case SymbolKind::Concept: + return CXIdxEntity_CXXConcept; } llvm_unreachable("invalid symbol kind"); } diff --git a/clang/tools/libclang/CXIndexDataConsumer.h b/clang/tools/libclang/CXIndexDataConsumer.h index f93b06c8d0c229..04c64cab1952e8 100644 --- a/clang/tools/libclang/CXIndexDataConsumer.h +++ b/clang/tools/libclang/CXIndexDataConsumer.h @@ -409,6 +409,8 @@ class CXIndexDataConsumer : public index::IndexDataConsumer { bool handleFunctionTemplate(const FunctionTemplateDecl *D); bool handleTypeAliasTemplate(const TypeAliasTemplateDecl *D); + bool handleConcept(const ConceptDecl *D); + bool handleReference(const NamedDecl *D, SourceLocation Loc, CXCursor Cursor, const NamedDecl *Parent, const DeclContext *DC, diff --git a/clang/tools/libclang/CursorVisitor.h b/clang/tools/libclang/CursorVisitor.h index 364d9fdebdbc74..74190bd7f01526 100644 --- a/clang/tools/libclang/CursorVisitor.h +++ b/clang/tools/libclang/CursorVisitor.h @@ -19,6 +19,10 @@ namespace clang { class PreprocessingRecord; class ASTUnit; +namespace concepts { +class Requirement; +} + namespace cxcursor { class VisitorJob { @@ -37,6 +41,8 @@ class VisitorJob { MemberRefVisitKind, SizeOfPackExprPartsKind, LambdaExprPartsKind, + ConceptSpecializationExprVisitKind, + RequiresExprVisitKind, PostChildrenVisitKind }; @@ -242,6 +248,9 @@ class CursorVisitor : public DeclVisitor, bool VisitStaticAssertDecl(StaticAssertDecl *D); bool VisitFriendDecl(FriendDecl *D); bool VisitDecompositionDecl(DecompositionDecl *D); + bool VisitConceptDecl(ConceptDecl *D); + bool VisitTypeConstraint(const TypeConstraint &TC); + bool VisitConceptRequirement(const concepts::Requirement &R); // Name visitor bool VisitDeclarationNameInfo(DeclarationNameInfo Name); From 5df6669d45bc72df8a9ac7fb0f4f4cfc00444e0d Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 18 May 2022 12:22:38 -0700 Subject: [PATCH 372/908] [AMDGPU] Enforce alignment of image vaddr on gfx90a Even though single address image instructions only use a single VGPR HW accesses 4 or 5 which creates alignment requirement. Fixes: SWDEV-316648 Differential Revision: https://reviews.llvm.org/D126009 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 24 +- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 2 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 31 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 77 +- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 5 + .../llvm.amdgcn.image.atomic.dim.ll | 1264 ++++++++++++++++- .../AMDGPU/alloc-aligned-tuples-gfx90a.mir | 2 +- .../AMDGPU/llvm.amdgcn.image.atomic.dim.ll | 25 +- .../AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll | 27 +- .../llvm.amdgcn.image.sample.dim.gfx90a.ll | 17 +- .../AMDGPU/verify-image-vaddr-align.mir | 27 + 11 files changed, 1436 insertions(+), 65 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 25e947dfb022db..2e507b31ec813e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1434,23 +1434,7 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, if (HasVSrc) { Register VSrc = MI.getOperand(1).getReg(); - - if (STI.needsAlignedVGPRs()) { - // Add implicit aligned super-reg to force alignment on the data operand. - Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); - Register NewVR = - MRI->createVirtualRegister(&AMDGPU::VReg_64_Align2RegClass); - BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), NewVR) - .addReg(VSrc, 0, MI.getOperand(1).getSubReg()) - .addImm(AMDGPU::sub0) - .addReg(Undef) - .addImm(AMDGPU::sub1); - MIB.addReg(NewVR, 0, AMDGPU::sub0); - MIB.addReg(NewVR, RegState::Implicit); - } else { - MIB.addReg(VSrc); - } + MIB.addReg(VSrc); if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI)) return false; @@ -1459,6 +1443,8 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, MIB.addImm(ImmOffset) .cloneMemRefs(MI); + TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0); + MI.eraseFromParent(); return true; } @@ -1753,7 +1739,9 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( } MI.eraseFromParent(); - return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr); + return true; } bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 9427355a1fbc85..6358547a983e34 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -478,6 +478,7 @@ multiclass MIMG_Store_Addr_Helper ; + let hasPostISelHook = 1 in def _V1_gfx90a : MIMG_Store_Helper_gfx90a ; def _V1_gfx10 : MIMG_Store_gfx10 ; + let hasPostISelHook = 1 in def _V1_gfx90a : MIMG_Atomic_gfx90a ; } if op.HAS_BASE then { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3b65454967e1ed..933f7b34954d13 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4257,29 +4257,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::DS_GWS_INIT: case AMDGPU::DS_GWS_SEMA_BR: case AMDGPU::DS_GWS_BARRIER: - if (Subtarget->needsAlignedVGPRs()) { - // Add implicit aligned super-reg to force alignment on the data operand. - const DebugLoc &DL = MI.getDebugLoc(); - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); - MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::data0); - Register DataReg = Op->getReg(); - bool IsAGPR = TRI->isAGPR(MRI, DataReg); - Register Undef = MRI.createVirtualRegister( - IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), Undef); - Register NewVR = - MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass - : &AMDGPU::VReg_64_Align2RegClass); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), NewVR) - .addReg(DataReg, 0, Op->getSubReg()) - .addImm(AMDGPU::sub0) - .addReg(Undef) - .addImm(AMDGPU::sub1); - Op->setReg(NewVR); - Op->setSubReg(AMDGPU::sub0); - MI.addOperand(MachineOperand::CreateReg(NewVR, false, true)); - } + TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::data0); LLVM_FALLTHROUGH; case AMDGPU::DS_GWS_SEMA_V: case AMDGPU::DS_GWS_SEMA_P: @@ -11832,8 +11810,11 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, return; } - if (TII->isMIMG(MI) && !MI.mayStore()) - AddIMGInit(MI); + if (TII->isMIMG(MI)) { + if (!MI.mayStore()) + AddIMGInit(MI); + TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::vaddr); + } } static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index b23e884a5d0e1f..a0b2a6d7439bd1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4619,25 +4619,36 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } - if (ST.needsAlignedVGPRs() && - (MI.getOpcode() == AMDGPU::DS_GWS_INIT || - MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || - MI.getOpcode() == AMDGPU::DS_GWS_BARRIER)) { - const MachineOperand *Op = getNamedOperand(MI, AMDGPU::OpName::data0); - Register Reg = Op->getReg(); - bool Aligned = true; - if (Reg.isPhysical()) { - Aligned = !(RI.getHWRegIndex(Reg) & 1); - } else { + if (ST.needsAlignedVGPRs()) { + const auto isAlignedReg = [&MI, &MRI, this](unsigned OpName) -> bool { + const MachineOperand *Op = getNamedOperand(MI, OpName); + if (!Op) + return true; + Register Reg = Op->getReg(); + if (Reg.isPhysical()) + return !(RI.getHWRegIndex(Reg) & 1); const TargetRegisterClass &RC = *MRI.getRegClass(Reg); - Aligned = RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && - !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); + return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && + !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); + }; + + if (MI.getOpcode() == AMDGPU::DS_GWS_INIT || + MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || + MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) { + + if (!isAlignedReg(AMDGPU::OpName::data0)) { + ErrInfo = "Subtarget requires even aligned vector registers " + "for DS_GWS instructions"; + return false; + } } - if (!Aligned) { - ErrInfo = "Subtarget requires even aligned vector registers " - "for DS_GWS instructions"; - return false; + if (isMIMG(MI)) { + if (!isAlignedReg(AMDGPU::OpName::vaddr)) { + ErrInfo = "Subtarget requires even aligned vector registers " + "for vaddr operand of image instructions"; + return false; + } } } @@ -8427,3 +8438,37 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return false; } + +void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI, + unsigned OpName) const { + if (!ST.needsAlignedVGPRs()) + return; + + int OpNo = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); + if (OpNo < 0) + return; + MachineOperand &Op = MI.getOperand(OpNo); + if (getOpSize(MI, OpNo) > 4) + return; + + // Add implicit aligned super-reg to force alignment on the data operand. + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock *BB = MI.getParent(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + Register DataReg = Op.getReg(); + bool IsAGPR = RI.isAGPR(MRI, DataReg); + Register Undef = MRI.createVirtualRegister( + IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass); + BuildMI(*BB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef); + Register NewVR = + MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass + : &AMDGPU::VReg_64_Align2RegClass); + BuildMI(*BB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewVR) + .addReg(DataReg, 0, Op.getSubReg()) + .addImm(AMDGPU::sub0) + .addReg(Undef) + .addImm(AMDGPU::sub1); + Op.setReg(NewVR); + Op.setSubReg(AMDGPU::sub0); + MI.addOperand(MachineOperand::CreateReg(NewVR, false, true)); +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index a3357be4ff512e..6c6da687369370 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1150,6 +1150,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { static unsigned getDSShaderTypeValue(const MachineFunction &MF); const TargetSchedModel &getSchedModel() const { return SchedModel; } + + // Enforce operand's \p OpName even alignment if required by target. + // This is used if an operand is a 32 bit register but needs to be aligned + // regardless. + void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; }; /// \brief Returns true if a reg:subreg pair P has a TRC class diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll index a617ed96e79bed..4315f34724e512 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll @@ -1,9 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s - define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { ; GFX6-LABEL: atomic_swap_i32_1d: ; GFX6: ; %bb.0: ; %main_body @@ -33,6 +34,35 @@ define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_swap_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_swap_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_swap v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_swap_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -81,6 +111,35 @@ define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -129,6 +188,35 @@ define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_sub_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_sub_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_sub v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_sub_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -177,6 +265,35 @@ define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_smin_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_smin_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_smin v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_smin_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -225,6 +342,35 @@ define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_umin_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_umin_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_umin v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_umin_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -273,6 +419,35 @@ define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_smax_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_smax_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_smax v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_smax_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -321,6 +496,35 @@ define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_umax_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_umax_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_umax v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_umax_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -369,6 +573,35 @@ define amdgpu_ps float @atomic_and_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_and_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_and_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_and v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_and_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -417,6 +650,35 @@ define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 % ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_or_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_or_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_or v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_or_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -465,6 +727,35 @@ define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_xor_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_xor_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_xor v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_xor_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -513,6 +804,35 @@ define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_inc_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_inc_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_inc v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_inc_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -561,6 +881,35 @@ define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_dec_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_dec_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_dec v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_dec_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -609,6 +958,34 @@ define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_cmpswap_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_cmpswap_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_cmpswap_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -655,6 +1032,32 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc ; GFX8-NEXT: s_endpgm ; +; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_endpgm +; +; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_endpgm +; ; GFX10-LABEL: atomic_cmpswap_i32_1d_no_return: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -701,6 +1104,36 @@ define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_2d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_2d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_2d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -749,6 +1182,37 @@ define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_3d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_3d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: v_mov_b32_e32 v6, v3 +; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_3d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -797,6 +1261,37 @@ define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i3 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_cube: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_cube: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: v_mov_b32_e32 v6, v3 +; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_cube: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -845,6 +1340,36 @@ define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_1darray: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_1darray: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_1darray: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -893,6 +1418,37 @@ define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_2darray: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_2darray: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: v_mov_b32_e32 v6, v3 +; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_2darray: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -941,6 +1497,37 @@ define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_2dmsaa: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_2dmsaa: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: v_mov_b32_e32 v6, v3 +; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_2dmsaa: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -989,6 +1576,38 @@ define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %d ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_2darraymsaa: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_2darraymsaa: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v6, v1 +; GFX90A-NEXT: v_mov_b32_e32 v7, v2 +; GFX90A-NEXT: v_mov_b32_e32 v8, v3 +; GFX90A-NEXT: v_mov_b32_e32 v9, v4 +; GFX90A-NEXT: image_atomic_add v0, v[6:9], s[0:7] dmask:0x1 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_2darraymsaa: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1037,6 +1656,35 @@ define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_1d_slc: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_1d_slc: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc slc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_1d_slc: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1085,6 +1733,34 @@ define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %dat ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_swap_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_swap_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_swap_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1133,6 +1809,34 @@ define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1181,6 +1885,34 @@ define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_sub_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_sub_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_sub_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1229,6 +1961,34 @@ define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %dat ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_smin_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_smin_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_smin_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1277,6 +2037,34 @@ define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %dat ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_umin_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_umin_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_umin_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1325,6 +2113,34 @@ define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %dat ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_smax_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_smax_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_smax_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1373,6 +2189,34 @@ define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %dat ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_umax_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_umax_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_umax_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1421,6 +2265,34 @@ define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_and_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_and_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_and_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1469,6 +2341,34 @@ define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_or_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_or_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_or_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1517,6 +2417,34 @@ define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_xor_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_xor_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_xor_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1565,6 +2493,34 @@ define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_inc_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_inc_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_inc_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1613,6 +2569,34 @@ define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_dec_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_dec_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_dec_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1661,6 +2645,34 @@ define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 % ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_cmpswap_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_cmpswap_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_cmpswap_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1707,6 +2719,32 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc ; GFX8-NEXT: s_endpgm ; +; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX900-NEXT: s_endpgm +; +; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: s_endpgm +; ; GFX10-LABEL: atomic_cmpswap_i64_1d_no_return: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1753,6 +2791,34 @@ define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_2d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_2d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_2d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1801,6 +2867,34 @@ define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_3d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_3d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_3d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1849,6 +2943,34 @@ define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %da ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_cube: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_cube: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_cube: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1897,6 +3019,34 @@ define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_1darray: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_1darray: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_1darray: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1945,6 +3095,34 @@ define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_2darray: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_2darray: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_2darray: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1993,6 +3171,34 @@ define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 % ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_2dmsaa: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_2dmsaa: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_2dmsaa: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -2041,6 +3247,34 @@ define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_2darraymsaa: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_2darraymsaa: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_2darraymsaa: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -2089,6 +3323,34 @@ define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 % ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_1d_slc: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_1d_slc: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_1d_slc: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir index 5aea86567b4987..4ba5fa09f58783 100644 --- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir +++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir @@ -73,7 +73,7 @@ body: | %0:vreg_64_align2 = COPY $vgpr0_vgpr1 %1:vgpr_32 = COPY $vgpr2 - %2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %0.sub0, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll index 40bd98fae3e65a..d05a98336fb231 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll @@ -1,10 +1,11 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX6789 %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX6789 %s -; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX90A %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s ; GCN-LABEL: {{^}}atomic_swap_1d: ; GFX6789: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_swap v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -15,6 +16,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_swap_1d_i64: ; GFX6789: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} +; GFX90A: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} ; GFX10: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps <2 x float> @atomic_swap_1d_i64(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { main_body: @@ -25,6 +27,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_add_1d: ; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_add v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -35,6 +38,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_sub_1d: ; GFX6789: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_sub v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -45,6 +49,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_smin_1d: ; GFX6789: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_smin v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -55,6 +60,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_umin_1d: ; GFX6789: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_umin v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -65,6 +71,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_smax_1d: ; GFX6789: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_smax v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -75,6 +82,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_umax_1d: ; GFX6789: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_umax v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -85,6 +93,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_and_1d: ; GFX6789: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_and v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -95,6 +104,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_or_1d: ; GFX6789: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_or v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -105,6 +115,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_xor_1d: ; GFX6789: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_xor v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -115,6 +126,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_inc_1d: ; GFX6789: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_inc v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -125,6 +137,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_dec_1d: ; GFX6789: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_dec v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -135,6 +148,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_cmpswap_1d: ; GFX6789: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} +; GFX90A: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} ; GFX10: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) { main_body: @@ -145,6 +159,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_cmpswap_1d_64: ; GFX6789: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}} +; GFX90A: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}} ; GFX10: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps <2 x float> @atomic_cmpswap_1d_64(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) { main_body: @@ -155,6 +170,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_add_2d: ; GFX6789: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[13579]}}], s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc ; define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) { main_body: @@ -165,6 +181,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_add_3d: ; GFX6789: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[02468]}}], s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc ; define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) { main_body: @@ -175,6 +192,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_add_cube: ; GFX6789: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[02468]}}], s[0:7] dmask:0x1 unorm glc da{{$}} ; GFX10: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc ; define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) { main_body: @@ -185,6 +203,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_add_1darray: ; GFX6789: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[13579]}}], s[0:7] dmask:0x1 unorm glc da{{$}} ; GFX10: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc ; define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) { main_body: @@ -195,6 +214,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_add_2darray: ; GFX6789: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[02468]}}], s[0:7] dmask:0x1 unorm glc da{{$}} ; GFX10: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc ; define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) { main_body: @@ -205,6 +225,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_add_2dmsaa: ; GFX6789: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[02468]}}], s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc ; define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) { main_body: @@ -215,6 +236,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_add_2darraymsaa: ; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[13579]}}], s[0:7] dmask:0x1 unorm glc da{{$}} ; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc ; define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) { main_body: @@ -225,6 +247,7 @@ main_body: ; GCN-LABEL: {{^}}atomic_add_1d_slc: ; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc{{$}} +; GFX90A: image_atomic_add v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc slc{{$}} ; GFX10: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc ; define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll index d5e5bc1d052c6b..15269a88e61c71 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; GCN-LABEL: {{^}}load_1d: ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} @@ -97,6 +98,16 @@ main_body: ret <4 x float> %v } +; GCN-LABEL: {{^}}load_1d_addr_align: +; GCN: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1 +; GCN: image_load v[0:3], [[VADDR]], s[0:7] dmask:0xf unorm{{$}} +define amdgpu_ps <4 x float> @load_1d_addr_align(<8 x i32> inreg %rsrc, <2 x i32> %s) { +main_body: + %s1 = extractelement <2 x i32> %s, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s1, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + ; GCN-LABEL: {{^}}store_1d: ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}} define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { @@ -178,7 +189,8 @@ main_body: } ; GCN-LABEL: {{^}}store_1d_V1: -; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}} +; GCN: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1 +; GCN: image_store v0, [[VADDR]], s[0:7] dmask:0x2 unorm{{$}} define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) { main_body: call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -256,7 +268,7 @@ main_body: } ; GCN-LABEL: image_load_mmo -; GCN: image_load v1, v[2:3], s[0:7] dmask:0x1 unorm +; GCN: image_load v1, v[{{[0-9:]+}}], s[0:7] dmask:0x1 unorm define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 { store float 0.000000e+00, float addrspace(3)* %lds %c0 = extractelement <2 x i32> %c, i32 0 @@ -267,6 +279,15 @@ define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3) ret float %tex } +; GCN: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1 +; GCN: image_get_resinfo v[0:3], [[VADDR]], s[0:7] dmask:0xf unorm +define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i32> %s) { +main_body: + %s1 = extractelement <2 x i32> %s, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %s1, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 @@ -301,6 +322,8 @@ declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i3 declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0 declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 + attributes #0 = { nounwind } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll index dead4ecbaae7d4..e712a18b74df35 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,GISEL %s ; GFX90A-LABEL: {{^}}sample_1d: ; GFX90A-NOT: s_wqm_b64 @@ -66,6 +67,20 @@ main_body: ret <4 x float> %v } +; Address register must be even aligned. + +; GFX90A-LABEL: {{^}}sample_1d_addr_align: +; GFX90A: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1 +; SDAG: image_sample v{{[0-9]+}}, [[VADDR]], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0x1 +; GISEL: image_sample v[{{[0-9:]+}}], [[VADDR]], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf +define amdgpu_ps float @sample_1d_addr_align(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x float> %s) { +main_body: + %s1 = extractelement <2 x float> %s, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v1 = extractelement <4 x float> %v, i32 0 + ret float %v1 +} + declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir b/llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir new file mode 100644 index 00000000000000..07f4067ad0b19d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir @@ -0,0 +1,27 @@ +# RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX90A-ERR %s + +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: %4:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx90a %0.sub1:vreg_128_align2 +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: $vgpr0 = IMAGE_SAMPLE_V1_V1_gfx90a $vgpr1, +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: %5:vgpr_32 = IMAGE_LOAD_V1_V1_gfx90a %0.sub1:vreg_128_align2 +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: IMAGE_STORE_V1_V1_gfx90a $vgpr1, +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: %6:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a %6:vgpr_32(tied-def 0), $vgpr1, +--- +name: image_sample_odd_vgpr +body: | + bb.0: + %0:vreg_128_align2 = IMPLICIT_DEF + %1:areg_128_align2 = IMPLICIT_DEF + %2:sgpr_256 = IMPLICIT_DEF + %3:sgpr_128 = IMPLICIT_DEF + + %4:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx90a %0.sub1, %2, %3, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + $vgpr0 = IMAGE_SAMPLE_V1_V1_gfx90a $vgpr1, %2, %3, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %5:vgpr_32 = IMAGE_LOAD_V1_V1_gfx90a %0.sub1, %2, 8, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "ImageResource") + IMAGE_STORE_V1_V1_gfx90a $vgpr1, %5, %2, 2, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource") + %6:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a %6:vgpr_32, $vgpr1, %2, 1, -1, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") +... From 4a3e2aff5f849278f6c49678fd21f8ca5e59c4e7 Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Tue, 24 May 2022 10:03:20 -0700 Subject: [PATCH 373/908] Reland "[PS5] Verify defaults to -fno-stack-size-section" This reverts commit efebb27b745a0d677ad2ea9aefff242c12aef29c. Fixes typos (accidentally omitted %s from some RUN lines). --- clang/test/Driver/stack-size-section.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/test/Driver/stack-size-section.c b/clang/test/Driver/stack-size-section.c index 461f0b5c9aee67..f50c9767e707a9 100644 --- a/clang/test/Driver/stack-size-section.c +++ b/clang/test/Driver/stack-size-section.c @@ -1,4 +1,5 @@ // RUN: %clang -target x86_64-unknown %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT +// RUN: %clang -target x86_64-sie-ps5 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT // RUN: %clang -target x86_64-scei-ps4 -fno-stack-size-section %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT // CHECK-ABSENT-NOT: -fstack-size-section @@ -6,4 +7,9 @@ // RUN: %clang -target x86_64-scei-ps4 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PRESENT // CHECK-PRESENT: -fstack-size-section +// RUN: %clang -target x86_64-unknown -fstack-size-section -fno-stack-size-section %s -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ABSENT +// RUN: %clang -target x86_64-unknown -fno-stack-size-section -fstack-size-section %s -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PRESENT + int foo() { return 42; } From 175833ed6f62b697ce6248930da54e4ddb7f4c00 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Tue, 24 May 2022 02:12:53 -0400 Subject: [PATCH 374/908] [clangd] Handle '--' in QueryDriverDatabase Fixes https://github.com/clangd/clangd/issues/1100, a regression from D116721. Differential Revision: https://reviews.llvm.org/D126274 --- clang-tools-extra/clangd/QueryDriverDatabase.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/QueryDriverDatabase.cpp index 477087323c7d3f..e96912c6290c7b 100644 --- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp +++ b/clang-tools-extra/clangd/QueryDriverDatabase.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/Regex.h" #include "llvm/Support/ScopedPrinter.h" #include +#include #include #include #include @@ -238,10 +239,17 @@ extractSystemIncludesAndTarget(llvm::SmallString<128> Driver, tooling::CompileCommand & addSystemIncludes(tooling::CompileCommand &Cmd, llvm::ArrayRef SystemIncludes) { + std::vector ToAppend; for (llvm::StringRef Include : SystemIncludes) { // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl" - Cmd.CommandLine.push_back("-isystem"); - Cmd.CommandLine.push_back(Include.str()); + ToAppend.push_back("-isystem"); + ToAppend.push_back(Include.str()); + } + if (!ToAppend.empty()) { + // Just append when `--` isn't present. + auto InsertAt = llvm::find(Cmd.CommandLine, "--"); + Cmd.CommandLine.insert(InsertAt, std::make_move_iterator(ToAppend.begin()), + std::make_move_iterator(ToAppend.end())); } return Cmd; } @@ -254,7 +262,9 @@ tooling::CompileCommand &setTarget(tooling::CompileCommand &Cmd, if (Arg == "-target" || Arg.startswith("--target=")) return Cmd; } - Cmd.CommandLine.push_back("--target=" + Target); + // Just append when `--` isn't present. + auto InsertAt = llvm::find(Cmd.CommandLine, "--"); + Cmd.CommandLine.insert(InsertAt, "--target=" + Target); } return Cmd; } From c8644ea88ea220a28ef41b7364ca2eb1071552f9 Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Tue, 24 May 2022 10:22:46 -0700 Subject: [PATCH 375/908] [compiler-rt][lsan] Update CanBeAHeapPointer for AArch64 While attempting to get the 64-bit lsan allocator working for Fuchsia, I noticed this function would incorrectly return false for pointers returned by the 64-bit allocator. On AArch64, this function attempts to get the VMA size dynamically by counting the number of leading zeros from the function frame address. This will fail if the frame address is significantly below an allocated pointer (that is, the frame address has more leading zeros than an allocated pointer). This is possible on Fuchsia and linux (when not called from the initial thread stack). It seems the intended use of this function is to speed up pointer scanning by filtering out addresses that user code might not be able to access. Other platforms this check is done on seem to hardcode the VMA size/shift, so it seems appropriate to do this for aarch64 as well. This implies pointers on aarch64 where the VMA size is <64 will pass through, but bad pointers will still be caught by subsequent scan checks. This patch also renames the function to something more fitting of what it's trying to do. Differential Revision: https://reviews.llvm.org/D123814 --- compiler-rt/lib/lsan/lsan_common.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 8d1bf11fdab626..a23b2b4e1d29aa 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -240,7 +240,7 @@ class Decorator : public __sanitizer::SanitizerCommonDecorator { const char *Leak() { return Blue(); } }; -static inline bool CanBeAHeapPointer(uptr p) { +static inline bool MaybeUserPointer(uptr p) { // Since our heap is located in mmap-ed memory, we can assume a sensible lower // bound on heap addresses. const uptr kMinAddress = 4 * 4096; @@ -252,8 +252,8 @@ static inline bool CanBeAHeapPointer(uptr p) { # elif defined(__mips64) return ((p >> 40) == 0); # elif defined(__aarch64__) - unsigned runtimeVMA = (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1); - return ((p >> runtimeVMA) == 0); + // Accept up to 48 bit VMA. + return ((p >> 48) == 0); # else return true; # endif @@ -276,7 +276,7 @@ void ScanRangeForPointers(uptr begin, uptr end, Frontier *frontier, pp = pp + alignment - pp % alignment; for (; pp + sizeof(void *) <= end; pp += alignment) { void *p = *reinterpret_cast(pp); - if (!CanBeAHeapPointer(reinterpret_cast(p))) + if (!MaybeUserPointer(reinterpret_cast(p))) continue; uptr chunk = PointsIntoChunk(p); if (!chunk) From 8527f9e4437fbecb529babb674777eb2cab99ca5 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 18 May 2022 13:40:33 -0700 Subject: [PATCH 376/908] [flang][runtime] Handle BACKSPACE after reading past EOF An external READ(END=) that hits the end of the file must also note the virtual position of the endfile record that has just been discovered, so that a later BACKSPACE statement won't end up at the wrong record. Differential Revision: https://reviews.llvm.org/D126146 --- flang/runtime/unit.cpp | 23 ++++++++++++----------- flang/runtime/unit.h | 1 + 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index 7b4a0ea27bf923..c2f9d37bb12245 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -348,10 +348,7 @@ bool ExternalFileUnit::Receive(char *data, std::size_t bytes, furthestPositionInRecord = furthestAfter; return true; } else { - handler.SignalEnd(); - if (IsRecordFile() && access != Access::Direct) { - endfileRecordNumber = currentRecordNumber; - } + HitEndOnRead(handler); return false; } } @@ -384,10 +381,7 @@ const char *ExternalFileUnit::FrameNextInput( if (got >= need) { return Frame() + at; } - handler.SignalEnd(); - if (IsRecordFile() && access != Access::Direct) { - endfileRecordNumber = currentRecordNumber; - } + HitEndOnRead(handler); } return nullptr; } @@ -422,7 +416,7 @@ bool ExternalFileUnit::BeginReadingRecord(IoErrorHandler &handler) { recordLength = openRecl; } else { recordLength.reset(); - handler.SignalEnd(); + HitEndOnRead(handler); } } else { recordLength.reset(); @@ -667,7 +661,7 @@ void ExternalFileUnit::BeginSequentialVariableUnformattedInputRecord( const char *error{nullptr}; if (got < need) { if (got == recordOffsetInFrame_) { - handler.SignalEnd(); + HitEndOnRead(handler); } else { error = "Unformatted variable-length sequential file input failed at " "record #%jd (file offset %jd): truncated record header"; @@ -722,7 +716,7 @@ void ExternalFileUnit::BeginVariableFormattedInputRecord( recordLength = length; unterminatedRecord = true; } else { - handler.SignalEnd(); + HitEndOnRead(handler); } break; } @@ -878,6 +872,13 @@ bool ExternalFileUnit::CheckDirectAccess(IoErrorHandler &handler) { return true; } +void ExternalFileUnit::HitEndOnRead(IoErrorHandler &handler) { + handler.SignalEnd(); + if (IsRecordFile() && access != Access::Direct) { + endfileRecordNumber = currentRecordNumber; + } +} + ChildIo &ExternalFileUnit::PushChildIo(IoStatementState &parent) { OwningPtr current{std::move(child_)}; Terminator &terminator{parent.GetIoErrorHandler()}; diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h index f38f4cac19360e..567aa740244ac5 100644 --- a/flang/runtime/unit.h +++ b/flang/runtime/unit.h @@ -115,6 +115,7 @@ class ExternalFileUnit : public ConnectionState, void DoEndfile(IoErrorHandler &); void CommitWrites(); bool CheckDirectAccess(IoErrorHandler &); + void HitEndOnRead(IoErrorHandler &); int unitNumber_{-1}; Direction direction_{Direction::Output}; From 9ba937112fa6d4076e4a98b587a334786b6c0d9c Mon Sep 17 00:00:00 2001 From: Mike Rice Date: Tue, 17 May 2022 10:11:00 -0700 Subject: [PATCH 377/908] [OpenMP] Add parsing/sema support for omp_all_memory reserved locator Adds support for the reserved locator 'omp_all_memory' for use in depend clauses with 'out' or 'inout' dependence-types. Differential Revision: https://reviews.llvm.org/D125828 --- clang/include/clang/AST/OpenMPClause.h | 59 +++++++----- .../clang/Basic/DiagnosticParseKinds.td | 6 ++ clang/include/clang/Basic/OpenMPKinds.def | 2 + clang/include/clang/Parse/Parser.h | 30 ++---- clang/include/clang/Sema/Sema.h | 46 ++++++--- clang/lib/AST/OpenMPClause.cpp | 32 +++++-- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 + clang/lib/Parse/ParseOpenMP.cpp | 66 +++++++++---- clang/lib/Sema/SemaOpenMP.cpp | 96 ++++++++++--------- clang/lib/Sema/TreeTransform.h | 21 ++-- clang/lib/Serialization/ASTReader.cpp | 1 + clang/lib/Serialization/ASTWriter.cpp | 1 + clang/test/OpenMP/task_ast_print.cpp | 24 +++-- clang/test/OpenMP/task_depend_messages.cpp | 10 +- 14 files changed, 239 insertions(+), 157 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 3103f61d4248de..a745df11434685 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -4746,14 +4746,24 @@ class OMPDependClause final friend OMPVarListClause; friend TrailingObjects; - /// Dependency type (one of in, out, inout). - OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown; +public: + struct DependDataTy final { + /// Dependency type (one of in, out, inout). + OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown; - /// Dependency type location. - SourceLocation DepLoc; + /// Dependency type location. + SourceLocation DepLoc; - /// Colon location. - SourceLocation ColonLoc; + /// Colon location. + SourceLocation ColonLoc; + + /// Location of 'omp_all_memory'. + SourceLocation OmpAllMemoryLoc; + }; + +private: + /// Dependency type and source locations. + DependDataTy Data; /// Number of loops, associated with the depend clause. unsigned NumLoops = 0; @@ -4784,13 +4794,16 @@ class OMPDependClause final NumLoops(NumLoops) {} /// Set dependency kind. - void setDependencyKind(OpenMPDependClauseKind K) { DepKind = K; } + void setDependencyKind(OpenMPDependClauseKind K) { Data.DepKind = K; } /// Set dependency kind and its location. - void setDependencyLoc(SourceLocation Loc) { DepLoc = Loc; } + void setDependencyLoc(SourceLocation Loc) { Data.DepLoc = Loc; } /// Set colon location. - void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; } + void setColonLoc(SourceLocation Loc) { Data.ColonLoc = Loc; } + + /// Set the 'omp_all_memory' location. + void setOmpAllMemoryLoc(SourceLocation Loc) { Data.OmpAllMemoryLoc = Loc; } /// Sets optional dependency modifier. void setModifier(Expr *DepModifier); @@ -4802,18 +4815,15 @@ class OMPDependClause final /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. - /// \param DepKind Dependency type. - /// \param DepLoc Location of the dependency type. - /// \param ColonLoc Colon location. + /// \param Data Dependency type and source locations. /// \param VL List of references to the variables. /// \param NumLoops Number of loops that is associated with this depend /// clause. static OMPDependClause *Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc, Expr *DepModifier, - OpenMPDependClauseKind DepKind, - SourceLocation DepLoc, SourceLocation ColonLoc, - ArrayRef VL, unsigned NumLoops); + SourceLocation EndLoc, DependDataTy Data, + Expr *DepModifier, ArrayRef VL, + unsigned NumLoops); /// Creates an empty clause with \a N variables. /// @@ -4825,7 +4835,16 @@ class OMPDependClause final unsigned NumLoops); /// Get dependency type. - OpenMPDependClauseKind getDependencyKind() const { return DepKind; } + OpenMPDependClauseKind getDependencyKind() const { return Data.DepKind; } + + /// Get dependency type location. + SourceLocation getDependencyLoc() const { return Data.DepLoc; } + + /// Get colon location. + SourceLocation getColonLoc() const { return Data.ColonLoc; } + + /// Get 'omp_all_memory' location. + SourceLocation getOmpAllMemoryLoc() const { return Data.OmpAllMemoryLoc; } /// Return optional depend modifier. Expr *getModifier(); @@ -4833,12 +4852,6 @@ class OMPDependClause final return const_cast(this)->getModifier(); } - /// Get dependency type location. - SourceLocation getDependencyLoc() const { return DepLoc; } - - /// Get colon location. - SourceLocation getColonLoc() const { return ColonLoc; } - /// Get number of loops associated with the clause. unsigned getNumLoops() const { return NumLoops; } diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index bd74674325fbe2..74930a4690d7b7 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1464,6 +1464,12 @@ def warn_omp51_compat_attributes : Warning< def err_omp_expected_colon : Error<"missing ':' in %0">; def err_omp_expected_context_selector : Error<"expected valid context selector in %0">; +def err_omp_requires_out_inout_depend_type : Error< + "reserved locator 'omp_all_memory' requires 'out' or 'inout' " + "dependency types">; +def warn_omp_more_one_omp_all_memory : Warning< + "reserved locator 'omp_all_memory' cannot be specified more than once">, + InGroup; // Pragma loop support. def err_pragma_loop_missing_argument : Error< diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index 0c0c4c2f7ed92e..4c0884e0a64244 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -106,6 +106,8 @@ OPENMP_DEPEND_KIND(depobj) OPENMP_DEPEND_KIND(source) OPENMP_DEPEND_KIND(sink) OPENMP_DEPEND_KIND(inoutset) +OPENMP_DEPEND_KIND(outallmemory) +OPENMP_DEPEND_KIND(inoutallmemory) // Modifiers for 'linear' clause. OPENMP_LINEAR_KIND(val) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index e44ef6bb8f19d7..a20866e410aa49 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3332,30 +3332,14 @@ class Parser : public CodeCompletionHandler { ExprResult ParseOpenMPParensExpr(StringRef ClauseName, SourceLocation &RLoc, bool IsAddressOfOperand = false); - /// Data used for parsing list of variables in OpenMP clauses. - struct OpenMPVarListDataTy { - Expr *DepModOrTailExpr = nullptr; - SourceLocation ColonLoc; - SourceLocation RLoc; - CXXScopeSpec ReductionOrMapperIdScopeSpec; - DeclarationNameInfo ReductionOrMapperId; - int ExtraModifier = -1; ///< Additional modifier for linear, map, depend or - ///< lastprivate clause. - SmallVector - MapTypeModifiers; - SmallVector - MapTypeModifiersLoc; - SmallVector - MotionModifiers; - SmallVector MotionModifiersLoc; - bool IsMapTypeImplicit = false; - SourceLocation ExtraModifierLoc; - }; - + /// Parses a reserved locator like 'omp_all_memory'. + bool ParseOpenMPReservedLocator(OpenMPClauseKind Kind, + Sema::OpenMPVarListDataTy &Data, + const LangOptions &LangOpts); /// Parses clauses with list. bool ParseOpenMPVarList(OpenMPDirectiveKind DKind, OpenMPClauseKind Kind, SmallVectorImpl &Vars, - OpenMPVarListDataTy &Data); + Sema::OpenMPVarListDataTy &Data); bool ParseUnqualifiedId(CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHadErrors, bool EnteringContext, bool AllowDestructorName, bool AllowConstructorName, @@ -3363,11 +3347,11 @@ class Parser : public CodeCompletionHandler { SourceLocation *TemplateKWLoc, UnqualifiedId &Result); /// Parses the mapper modifier in map, to, and from clauses. - bool parseMapperModifier(OpenMPVarListDataTy &Data); + bool parseMapperModifier(Sema::OpenMPVarListDataTy &Data); /// Parses map-type-modifiers in map clause. /// map([ [map-type-modifier[,] [map-type-modifier[,] ...] map-type : ] list) /// where, map-type-modifier ::= always | close | mapper(mapper-identifier) - bool parseMapTypeModifiers(OpenMPVarListDataTy &Data); + bool parseMapTypeModifiers(Sema::OpenMPVarListDataTy &Data); private: //===--------------------------------------------------------------------===// diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 976ee59f868475..94440ff9ce55e3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11435,16 +11435,31 @@ class Sema final { OpenMPAtomicDefaultMemOrderClauseKind Kind, SourceLocation KindLoc, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); - OMPClause *ActOnOpenMPVarListClause( - OpenMPClauseKind Kind, ArrayRef Vars, Expr *DepModOrTailExpr, - const OMPVarListLocTy &Locs, SourceLocation ColonLoc, - CXXScopeSpec &ReductionOrMapperIdScopeSpec, - DeclarationNameInfo &ReductionOrMapperId, int ExtraModifier, - ArrayRef MapTypeModifiers, - ArrayRef MapTypeModifiersLoc, bool IsMapTypeImplicit, - SourceLocation ExtraModifierLoc, - ArrayRef MotionModifiers, - ArrayRef MotionModifiersLoc); + /// Data used for processing a list of variables in OpenMP clauses. + struct OpenMPVarListDataTy final { + Expr *DepModOrTailExpr = nullptr; + SourceLocation ColonLoc; + SourceLocation RLoc; + CXXScopeSpec ReductionOrMapperIdScopeSpec; + DeclarationNameInfo ReductionOrMapperId; + int ExtraModifier = -1; ///< Additional modifier for linear, map, depend or + ///< lastprivate clause. + SmallVector + MapTypeModifiers; + SmallVector + MapTypeModifiersLoc; + SmallVector + MotionModifiers; + SmallVector MotionModifiersLoc; + bool IsMapTypeImplicit = false; + SourceLocation ExtraModifierLoc; + SourceLocation OmpAllMemoryLoc; + }; + + OMPClause *ActOnOpenMPVarListClause(OpenMPClauseKind Kind, + ArrayRef Vars, + const OMPVarListLocTy &Locs, + OpenMPVarListDataTy &Data); /// Called on well-formed 'inclusive' clause. OMPClause *ActOnOpenMPInclusiveClause(ArrayRef VarList, SourceLocation StartLoc, @@ -11535,11 +11550,12 @@ class Sema final { SourceLocation LParenLoc, SourceLocation EndLoc); /// Called on well-formed 'depend' clause. - OMPClause * - ActOnOpenMPDependClause(Expr *DepModifier, OpenMPDependClauseKind DepKind, - SourceLocation DepLoc, SourceLocation ColonLoc, - ArrayRef VarList, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation EndLoc); + OMPClause *ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, + Expr *DepModifier, + ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); /// Called on well-formed 'device' clause. OMPClause *ActOnOpenMPDeviceClause(OpenMPDeviceClauseModifier Modifier, Expr *Device, SourceLocation StartLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 9f95c642060281..dc2d90e366bc76 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1040,19 +1040,19 @@ OMPDepobjClause *OMPDepobjClause::CreateEmpty(const ASTContext &C) { OMPDependClause * OMPDependClause::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc, - Expr *DepModifier, OpenMPDependClauseKind DepKind, - SourceLocation DepLoc, SourceLocation ColonLoc, + DependDataTy Data, Expr *DepModifier, ArrayRef VL, unsigned NumLoops) { void *Mem = C.Allocate( totalSizeToAlloc(VL.size() + /*depend-modifier*/ 1 + NumLoops), alignof(OMPDependClause)); OMPDependClause *Clause = new (Mem) OMPDependClause(StartLoc, LParenLoc, EndLoc, VL.size(), NumLoops); - Clause->setVarRefs(VL); - Clause->setDependencyKind(DepKind); - Clause->setDependencyLoc(DepLoc); - Clause->setColonLoc(ColonLoc); + Clause->setDependencyKind(Data.DepKind); + Clause->setDependencyLoc(Data.DepLoc); + Clause->setColonLoc(Data.ColonLoc); + Clause->setOmpAllMemoryLoc(Data.OmpAllMemoryLoc); Clause->setModifier(DepModifier); + Clause->setVarRefs(VL); for (unsigned I = 0 ; I < NumLoops; ++I) Clause->setLoopData(I, nullptr); return Clause; @@ -2183,11 +2183,23 @@ void OMPClausePrinter::VisitOMPDependClause(OMPDependClause *Node) { DepModifier->printPretty(OS, nullptr, Policy); OS << ", "; } - OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(), - Node->getDependencyKind()); - if (!Node->varlist_empty()) { + OpenMPDependClauseKind DepKind = Node->getDependencyKind(); + OpenMPDependClauseKind PrintKind = DepKind; + bool IsOmpAllMemory = false; + if (PrintKind == OMPC_DEPEND_outallmemory) { + PrintKind = OMPC_DEPEND_out; + IsOmpAllMemory = true; + } else if (PrintKind == OMPC_DEPEND_inoutallmemory) { + PrintKind = OMPC_DEPEND_inout; + IsOmpAllMemory = true; + } + OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(), PrintKind); + if (!Node->varlist_empty() || IsOmpAllMemory) OS << " :"; - VisitOMPClauseList(Node, ' '); + VisitOMPClauseList(Node, ' '); + if (IsOmpAllMemory) { + OS << (Node->varlist_empty() ? " " : ","); + OS << "omp_all_memory"; } OS << ")"; } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index f197c331e6cfc2..c1982859c06099 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4534,6 +4534,8 @@ static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { case OMPC_DEPEND_source: case OMPC_DEPEND_sink: case OMPC_DEPEND_depobj: + case OMPC_DEPEND_outallmemory: + case OMPC_DEPEND_inoutallmemory: case OMPC_DEPEND_unknown: llvm_unreachable("Unknown task dependence type"); } diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index e3594e17610850..48ee1b02921e2c 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -744,7 +744,7 @@ static bool parseDeclareSimdClauses( OpenMPClauseKind CKind = getOpenMPClauseKind(ClauseName); if (CKind == OMPC_uniform || CKind == OMPC_aligned || CKind == OMPC_linear) { - Parser::OpenMPVarListDataTy Data; + Sema::OpenMPVarListDataTy Data; SmallVectorImpl *Vars = &Uniforms; if (CKind == OMPC_aligned) { Vars = &Aligneds; @@ -1437,7 +1437,7 @@ void Parser::ParseOMPDeclareVariantClauses(Parser::DeclGroupPtrTy Ptr, case OMPC_adjust_args: { AdjustArgsLoc = Tok.getLocation(); ConsumeToken(); - Parser::OpenMPVarListDataTy Data; + Sema::OpenMPVarListDataTy Data; SmallVector Vars; IsError = ParseOpenMPVarList(OMPD_declare_variant, OMPC_adjust_args, Vars, Data); @@ -3893,7 +3893,7 @@ static OpenMPMapModifierKind isMapModifier(Parser &P) { } /// Parse the mapper modifier in map, to, and from clauses. -bool Parser::parseMapperModifier(OpenMPVarListDataTy &Data) { +bool Parser::parseMapperModifier(Sema::OpenMPVarListDataTy &Data) { // Parse '('. BalancedDelimiterTracker T(*this, tok::l_paren, tok::colon); if (T.expectAndConsume(diag::err_expected_lparen_after, "mapper")) { @@ -3925,7 +3925,7 @@ bool Parser::parseMapperModifier(OpenMPVarListDataTy &Data) { /// map([ [map-type-modifier[,] [map-type-modifier[,] ...] map-type : ] list) /// where, map-type-modifier ::= always | close | mapper(mapper-identifier) | /// present -bool Parser::parseMapTypeModifiers(OpenMPVarListDataTy &Data) { +bool Parser::parseMapTypeModifiers(Sema::OpenMPVarListDataTy &Data) { while (getCurToken().isNot(tok::colon)) { OpenMPMapModifierKind TypeModifier = isMapModifier(*this); if (TypeModifier == OMPC_MAP_MODIFIER_always || @@ -3981,7 +3981,7 @@ static OpenMPMapClauseKind isMapType(Parser &P) { /// Parse map-type in map clause. /// map([ [map-type-modifier[,] [map-type-modifier[,] ...] map-type : ] list) /// where, map-type ::= to | from | tofrom | alloc | release | delete -static void parseMapType(Parser &P, Parser::OpenMPVarListDataTy &Data) { +static void parseMapType(Parser &P, Sema::OpenMPVarListDataTy &Data) { Token Tok = P.getCurToken(); if (Tok.is(tok::colon)) { P.Diag(Tok, diag::err_omp_map_type_missing); @@ -4100,11 +4100,38 @@ ExprResult Parser::ParseOpenMPIteratorsExpr() { Data); } +bool Parser::ParseOpenMPReservedLocator(OpenMPClauseKind Kind, + Sema::OpenMPVarListDataTy &Data, + const LangOptions &LangOpts) { + // Currently the only reserved locator is 'omp_all_memory' which is only + // allowed on a depend clause. + if (Kind != OMPC_depend || LangOpts.OpenMP < 51) + return false; + + if (Tok.is(tok::identifier) && + Tok.getIdentifierInfo()->isStr("omp_all_memory")) { + + if (Data.ExtraModifier == OMPC_DEPEND_outallmemory || + Data.ExtraModifier == OMPC_DEPEND_inoutallmemory) + Diag(Tok, diag::warn_omp_more_one_omp_all_memory); + else if (Data.ExtraModifier != OMPC_DEPEND_out && + Data.ExtraModifier != OMPC_DEPEND_inout) + Diag(Tok, diag::err_omp_requires_out_inout_depend_type); + else + Data.ExtraModifier = Data.ExtraModifier == OMPC_DEPEND_out + ? OMPC_DEPEND_outallmemory + : OMPC_DEPEND_inoutallmemory; + ConsumeToken(); + return true; + } + return false; +} + /// Parses clauses with list. bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, OpenMPClauseKind Kind, SmallVectorImpl &Vars, - OpenMPVarListDataTy &Data) { + Sema::OpenMPVarListDataTy &Data) { UnqualifiedId UnqualifiedReductionId; bool InvalidReductionId = false; bool IsInvalidMapperModifier = false; @@ -4360,14 +4387,16 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, Tok.isNot(tok::annot_pragma_openmp_end))) { ParseScope OMPListScope(this, Scope::OpenMPDirectiveScope); ColonProtectionRAIIObject ColonRAII(*this, MayHaveTail); - // Parse variable - ExprResult VarExpr = - Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression()); - if (VarExpr.isUsable()) { - Vars.push_back(VarExpr.get()); - } else { - SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end, - StopBeforeMatch); + if (!ParseOpenMPReservedLocator(Kind, Data, getLangOpts())) { + // Parse variable + ExprResult VarExpr = + Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression()); + if (VarExpr.isUsable()) { + Vars.push_back(VarExpr.get()); + } else { + SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end, + StopBeforeMatch); + } } // Skip ',' if any IsComma = Tok.is(tok::comma); @@ -4476,7 +4505,7 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind, SourceLocation Loc = Tok.getLocation(); SourceLocation LOpen = ConsumeToken(); SmallVector Vars; - OpenMPVarListDataTy Data; + Sema::OpenMPVarListDataTy Data; if (ParseOpenMPVarList(DKind, Kind, Vars, Data)) return nullptr; @@ -4484,10 +4513,5 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind, if (ParseOnly) return nullptr; OMPVarListLocTy Locs(Loc, LOpen, Data.RLoc); - return Actions.ActOnOpenMPVarListClause( - Kind, Vars, Data.DepModOrTailExpr, Locs, Data.ColonLoc, - Data.ReductionOrMapperIdScopeSpec, Data.ReductionOrMapperId, - Data.ExtraModifier, Data.MapTypeModifiers, Data.MapTypeModifiersLoc, - Data.IsMapTypeImplicit, Data.ExtraModifierLoc, Data.MotionModifiers, - Data.MotionModifiersLoc); + return Actions.ActOnOpenMPVarListClause(Kind, Vars, Locs, Data); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 42c3dbc181911e..ce65fe467c3fa0 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -16158,8 +16158,9 @@ OMPClause *Sema::ActOnOpenMPUpdateClause(OpenMPDependClauseKind Kind, SourceLocation EndLoc) { if (Kind == OMPC_DEPEND_unknown || Kind == OMPC_DEPEND_source || Kind == OMPC_DEPEND_sink || Kind == OMPC_DEPEND_depobj) { - SmallVector Except = {OMPC_DEPEND_source, OMPC_DEPEND_sink, - OMPC_DEPEND_depobj}; + SmallVector Except = { + OMPC_DEPEND_source, OMPC_DEPEND_sink, OMPC_DEPEND_depobj, + OMPC_DEPEND_outallmemory, OMPC_DEPEND_inoutallmemory}; if (LangOpts.OpenMP < 51) Except.push_back(OMPC_DEPEND_inoutset); Diag(KindKwLoc, diag::err_omp_unexpected_clause_value) @@ -16969,20 +16970,17 @@ OMPClause *Sema::ActOnOpenMPFilterClause(Expr *ThreadID, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPVarListClause( - OpenMPClauseKind Kind, ArrayRef VarList, Expr *DepModOrTailExpr, - const OMPVarListLocTy &Locs, SourceLocation ColonLoc, - CXXScopeSpec &ReductionOrMapperIdScopeSpec, - DeclarationNameInfo &ReductionOrMapperId, int ExtraModifier, - ArrayRef MapTypeModifiers, - ArrayRef MapTypeModifiersLoc, bool IsMapTypeImplicit, - SourceLocation ExtraModifierLoc, - ArrayRef MotionModifiers, - ArrayRef MotionModifiersLoc) { +OMPClause *Sema::ActOnOpenMPVarListClause(OpenMPClauseKind Kind, + ArrayRef VarList, + const OMPVarListLocTy &Locs, + OpenMPVarListDataTy &Data) { SourceLocation StartLoc = Locs.StartLoc; SourceLocation LParenLoc = Locs.LParenLoc; SourceLocation EndLoc = Locs.EndLoc; OMPClause *Res = nullptr; + int ExtraModifier = Data.ExtraModifier; + SourceLocation ExtraModifierLoc = Data.ExtraModifierLoc; + SourceLocation ColonLoc = Data.ColonLoc; switch (Kind) { case OMPC_private: Res = ActOnOpenMPPrivateClause(VarList, StartLoc, LParenLoc, EndLoc); @@ -17006,28 +17004,28 @@ OMPClause *Sema::ActOnOpenMPVarListClause( Res = ActOnOpenMPReductionClause( VarList, static_cast(ExtraModifier), StartLoc, LParenLoc, ExtraModifierLoc, ColonLoc, EndLoc, - ReductionOrMapperIdScopeSpec, ReductionOrMapperId); + Data.ReductionOrMapperIdScopeSpec, Data.ReductionOrMapperId); break; case OMPC_task_reduction: - Res = ActOnOpenMPTaskReductionClause(VarList, StartLoc, LParenLoc, ColonLoc, - EndLoc, ReductionOrMapperIdScopeSpec, - ReductionOrMapperId); + Res = ActOnOpenMPTaskReductionClause( + VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, + Data.ReductionOrMapperIdScopeSpec, Data.ReductionOrMapperId); break; case OMPC_in_reduction: - Res = ActOnOpenMPInReductionClause(VarList, StartLoc, LParenLoc, ColonLoc, - EndLoc, ReductionOrMapperIdScopeSpec, - ReductionOrMapperId); + Res = ActOnOpenMPInReductionClause( + VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, + Data.ReductionOrMapperIdScopeSpec, Data.ReductionOrMapperId); break; case OMPC_linear: assert(0 <= ExtraModifier && ExtraModifier <= OMPC_LINEAR_unknown && "Unexpected linear modifier."); Res = ActOnOpenMPLinearClause( - VarList, DepModOrTailExpr, StartLoc, LParenLoc, + VarList, Data.DepModOrTailExpr, StartLoc, LParenLoc, static_cast(ExtraModifier), ExtraModifierLoc, ColonLoc, EndLoc); break; case OMPC_aligned: - Res = ActOnOpenMPAlignedClause(VarList, DepModOrTailExpr, StartLoc, + Res = ActOnOpenMPAlignedClause(VarList, Data.DepModOrTailExpr, StartLoc, LParenLoc, ColonLoc, EndLoc); break; case OMPC_copyin: @@ -17043,26 +17041,30 @@ OMPClause *Sema::ActOnOpenMPVarListClause( assert(0 <= ExtraModifier && ExtraModifier <= OMPC_DEPEND_unknown && "Unexpected depend modifier."); Res = ActOnOpenMPDependClause( - DepModOrTailExpr, static_cast(ExtraModifier), - ExtraModifierLoc, ColonLoc, VarList, StartLoc, LParenLoc, EndLoc); + {static_cast(ExtraModifier), ExtraModifierLoc, + ColonLoc, Data.OmpAllMemoryLoc}, + Data.DepModOrTailExpr, VarList, StartLoc, LParenLoc, EndLoc); break; case OMPC_map: assert(0 <= ExtraModifier && ExtraModifier <= OMPC_MAP_unknown && "Unexpected map modifier."); Res = ActOnOpenMPMapClause( - MapTypeModifiers, MapTypeModifiersLoc, ReductionOrMapperIdScopeSpec, - ReductionOrMapperId, static_cast(ExtraModifier), - IsMapTypeImplicit, ExtraModifierLoc, ColonLoc, VarList, Locs); + Data.MapTypeModifiers, Data.MapTypeModifiersLoc, + Data.ReductionOrMapperIdScopeSpec, Data.ReductionOrMapperId, + static_cast(ExtraModifier), Data.IsMapTypeImplicit, + ExtraModifierLoc, ColonLoc, VarList, Locs); break; case OMPC_to: - Res = ActOnOpenMPToClause(MotionModifiers, MotionModifiersLoc, - ReductionOrMapperIdScopeSpec, ReductionOrMapperId, - ColonLoc, VarList, Locs); + Res = + ActOnOpenMPToClause(Data.MotionModifiers, Data.MotionModifiersLoc, + Data.ReductionOrMapperIdScopeSpec, + Data.ReductionOrMapperId, ColonLoc, VarList, Locs); break; case OMPC_from: - Res = ActOnOpenMPFromClause(MotionModifiers, MotionModifiersLoc, - ReductionOrMapperIdScopeSpec, - ReductionOrMapperId, ColonLoc, VarList, Locs); + Res = ActOnOpenMPFromClause(Data.MotionModifiers, Data.MotionModifiersLoc, + Data.ReductionOrMapperIdScopeSpec, + Data.ReductionOrMapperId, ColonLoc, VarList, + Locs); break; case OMPC_use_device_ptr: Res = ActOnOpenMPUseDevicePtrClause(VarList, Locs); @@ -17077,7 +17079,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause( Res = ActOnOpenMPHasDeviceAddrClause(VarList, Locs); break; case OMPC_allocate: - Res = ActOnOpenMPAllocateClause(DepModOrTailExpr, VarList, StartLoc, + Res = ActOnOpenMPAllocateClause(Data.DepModOrTailExpr, VarList, StartLoc, LParenLoc, ColonLoc, EndLoc); break; case OMPC_nontemporal: @@ -17091,7 +17093,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause( break; case OMPC_affinity: Res = ActOnOpenMPAffinityClause(StartLoc, LParenLoc, ColonLoc, EndLoc, - DepModOrTailExpr, VarList); + Data.DepModOrTailExpr, VarList); break; case OMPC_if: case OMPC_depobj: @@ -19684,10 +19686,12 @@ OMPClause *Sema::ActOnOpenMPDepobjClause(Expr *Depobj, SourceLocation StartLoc, } OMPClause * -Sema::ActOnOpenMPDependClause(Expr *DepModifier, OpenMPDependClauseKind DepKind, - SourceLocation DepLoc, SourceLocation ColonLoc, - ArrayRef VarList, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation EndLoc) { +Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, + Expr *DepModifier, ArrayRef VarList, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) { + OpenMPDependClauseKind DepKind = Data.DepKind; + SourceLocation DepLoc = Data.DepLoc; if (DSAStack->getCurrentDirective() == OMPD_ordered && DepKind != OMPC_DEPEND_source && DepKind != OMPC_DEPEND_sink) { Diag(DepLoc, diag::err_omp_unexpected_clause_value) @@ -19706,9 +19710,9 @@ Sema::ActOnOpenMPDependClause(Expr *DepModifier, OpenMPDependClauseKind DepKind, ((LangOpts.OpenMP < 50 || DSAStack->getCurrentDirective() == OMPD_depobj) && DepKind == OMPC_DEPEND_depobj))) { - SmallVector Except; - Except.push_back(OMPC_DEPEND_source); - Except.push_back(OMPC_DEPEND_sink); + SmallVector Except = {OMPC_DEPEND_source, OMPC_DEPEND_sink, + OMPC_DEPEND_outallmemory, + OMPC_DEPEND_inoutallmemory}; if (LangOpts.OpenMP < 50 || DSAStack->getCurrentDirective() == OMPD_depobj) Except.push_back(OMPC_DEPEND_depobj); if (LangOpts.OpenMP < 51) @@ -19934,12 +19938,14 @@ Sema::ActOnOpenMPDependClause(Expr *DepModifier, OpenMPDependClauseKind DepKind, << 1 << DSAStack->getParentLoopControlVariable(VarList.size() + 1); } if (DepKind != OMPC_DEPEND_source && DepKind != OMPC_DEPEND_sink && - Vars.empty()) + DepKind != OMPC_DEPEND_outallmemory && + DepKind != OMPC_DEPEND_inoutallmemory && Vars.empty()) return nullptr; - auto *C = OMPDependClause::Create(Context, StartLoc, LParenLoc, EndLoc, - DepModifier, DepKind, DepLoc, ColonLoc, - Vars, TotalDepCount.getZExtValue()); + auto *C = OMPDependClause::Create( + Context, StartLoc, LParenLoc, EndLoc, + {DepKind, DepLoc, Data.ColonLoc, Data.OmpAllMemoryLoc}, DepModifier, Vars, + TotalDepCount.getZExtValue()); if ((DepKind == OMPC_DEPEND_sink || DepKind == OMPC_DEPEND_source) && DSAStack->isParentOrderedRegion()) DSAStack->addDoacrossDependClause(C, OpsOffs); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3d7279c47f7c34..fae712e04db865 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1919,14 +1919,13 @@ class TreeTransform { /// /// By default, performs semantic analysis to build the new OpenMP clause. /// Subclasses may override this routine to provide different behavior. - OMPClause * - RebuildOMPDependClause(Expr *DepModifier, OpenMPDependClauseKind DepKind, - SourceLocation DepLoc, SourceLocation ColonLoc, - ArrayRef VarList, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDependClause(DepModifier, DepKind, DepLoc, - ColonLoc, VarList, StartLoc, - LParenLoc, EndLoc); + OMPClause *RebuildOMPDependClause(OMPDependClause::DependDataTy Data, + Expr *DepModifier, ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return getSema().ActOnOpenMPDependClause(Data, DepModifier, VarList, + StartLoc, LParenLoc, EndLoc); } /// Build a new OpenMP 'device' clause. @@ -10040,9 +10039,9 @@ TreeTransform::TransformOMPDependClause(OMPDependClause *C) { Vars.push_back(EVar.get()); } return getDerived().RebuildOMPDependClause( - DepModifier, C->getDependencyKind(), C->getDependencyLoc(), - C->getColonLoc(), Vars, C->getBeginLoc(), C->getLParenLoc(), - C->getEndLoc()); + {C->getDependencyKind(), C->getDependencyLoc(), C->getColonLoc(), + C->getOmpAllMemoryLoc()}, + DepModifier, Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc()); } template diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 055e8550bd2728..76629a516cddd8 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -12490,6 +12490,7 @@ void OMPClauseReader::VisitOMPDependClause(OMPDependClause *C) { static_cast(Record.readInt())); C->setDependencyLoc(Record.readSourceLocation()); C->setColonLoc(Record.readSourceLocation()); + C->setOmpAllMemoryLoc(Record.readSourceLocation()); unsigned NumVars = C->varlist_size(); SmallVector Vars; Vars.reserve(NumVars); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index f064a93766c056..61be9265db0cce 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6592,6 +6592,7 @@ void OMPClauseWriter::VisitOMPDependClause(OMPDependClause *C) { Record.push_back(C->getDependencyKind()); Record.AddSourceLocation(C->getDependencyLoc()); Record.AddSourceLocation(C->getColonLoc()); + Record.AddSourceLocation(C->getOmpAllMemoryLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 41e61dadc1ce72..b469c1e5254cdb 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER @@ -188,6 +188,18 @@ int main(int argc, char **argv) { // CHECK-NEXT: foo(); // CHECK-NEXT: #pragma omp task in_reduction(+: arr1) #pragma omp task in_reduction(+: arr1) + foo(); + // CHECK-NEXT: foo(); + // CHECK-NEXT: #pragma omp task depend(out : arr,omp_all_memory) +#pragma omp task depend(out: omp_all_memory, arr) + foo(); + // CHECK-NEXT: foo(); + // CHECK-NEXT: #pragma omp task depend(inout : b,arr,a,x,omp_all_memory) +#pragma omp task depend(inout: b, arr, omp_all_memory, a, x) + foo(); + // CHECK-NEXT: foo(); + // CHECK-NEXT: #pragma omp task depend(inout : omp_all_memory) +#pragma omp task depend(inout: omp_all_memory) foo(); // CHECK-NEXT: foo(); return tmain(b, &b) + tmain(x, &x); diff --git a/clang/test/OpenMP/task_depend_messages.cpp b/clang/test/OpenMP/task_depend_messages.cpp index dc45233b6e97b6..1e20335a1ee333 100644 --- a/clang/test/OpenMP/task_depend_messages.cpp +++ b/clang/test/OpenMP/task_depend_messages.cpp @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-version=45 -fopenmp -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=50 -fopenmp -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized -// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp-version=51 -fopenmp -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp51,omp51warn -fopenmp-version=51 -fopenmp -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-version=45 -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=50 -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized -// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp-version=51 -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp51,omp51warn -fopenmp-version=51 -fopenmp-simd -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized typedef void *omp_depend_t; @@ -85,6 +85,10 @@ int main(int argc, char **argv, char *env[]) { argc = 0; #pragma omp task depend(iterator(i = 0:10, i = 0:10), in : argv[i]) // omp45-error {{expected 'in', 'out', 'inout' or 'mutexinoutset' in OpenMP clause 'depend'}} omp45-error {{use of undeclared identifier 'i'}} omp50-error {{redefinition of 'i'}} omp50-note {{previous definition is here}} omp51-error {{redefinition of 'i'}} omp51-note {{previous definition is here}} i = 0; // expected-error {{use of undeclared identifier 'i'}} - +#pragma omp task depend(in: argc, omp_all_memory) // omp45-error {{use of undeclared identifier 'omp_all_memory'}} omp50-error {{use of undeclared identifier 'omp_all_memory'}} omp51-error {{reserved locator 'omp_all_memory' requires 'out' or 'inout' dependency types}} +#pragma omp task depend(out: omp_all_memory, argc, omp_all_memory) // omp45-error {{use of undeclared identifier 'omp_all_memory'}} omp45-error {{use of undeclared identifier 'omp_all_memory'}} omp50-error {{use of undeclared identifier 'omp_all_memory'}} omp50-error {{use of undeclared identifier 'omp_all_memory'}} omp51warn-warning {{reserved locator 'omp_all_memory' cannot be specified more than once}} + // expected-error@+1 {{use of undeclared identifier 'omp_all_memory'}} +#pragma omp task depend(out: argc) private(argc) allocate(argc, omp_all_memory) + argc = 0; return 0; } From 3723868d9e07de5d4a7468a4c2c74fc8517afc14 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 24 May 2022 13:43:52 -0400 Subject: [PATCH 378/908] [OpenMP] Fix file arguments for embedding bitcode in the linker wrapper Summary: The linker wrapper supports embedding bitcode images instead of linked device images to facilitate JIT in the device runtime. However, we were incorrectly passing in the file twice when this option was set. This patch makes sure we only use the intermediate result of the LTO pass and don't add the final output to the full job. In the future we will want to add both of these andle handle that accoridngly to allow the runtime to either use the AoT compiled version or JIT compile the bitcode version if availible. --- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index a06409fdab610e..74c03a26b884fc 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1012,8 +1012,14 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, if (Error Err = LTOBackend->run(AddStream)) return Err; + // If we are embedding bitcode we only need the intermediate output. + if (EmbedBitcode) { + InputFiles = NewInputFiles; + return Error::success(); + } + // Is we are compiling for NVPTX we need to run the assembler first. - if (TheTriple.isNVPTX() && !EmbedBitcode) { + if (TheTriple.isNVPTX()) { for (auto &File : Files) { auto FileOrErr = nvptx::assemble(File, TheTriple, Arch, !WholeProgram); if (!FileOrErr) From b159108bc5ebd7f37373896ece1da8f49612f7a6 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Fri, 20 May 2022 20:35:28 +0000 Subject: [PATCH 379/908] Sema: adjust assertion to account for deduced types Previous changes for the BTF attributes introduced a new sub-tree visitation. That uncovered that when accessing the typespec location we would assert that the type specification is either a type declaration or `typename`. However, `typename` was explicitly permitted. This change predates the introduction of newer deduced type representations such as `__underlying_type` from C++ and the addition of the GNU `__typeof__` expression. Thanks to aaron.ballman for the valuable discussion and pointer to `isTypeRep`. Differential Revision: https://reviews.llvm.org/D126093 Reviewed By: aaron.ballman, yonghong-song --- clang/include/clang/Sema/DeclSpec.h | 3 ++- clang/test/Sema/typerep-typespec.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 clang/test/Sema/typerep-typespec.c diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h index 6a7fbe8282d85d..c03ead9c79b341 100644 --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -516,7 +516,8 @@ class DeclSpec { SourceLocation getTypeSpecSatLoc() const { return TSSatLoc; } SourceLocation getTypeSpecTypeNameLoc() const { - assert(isDeclRep((TST) TypeSpecType) || TypeSpecType == TST_typename); + assert(isDeclRep((TST)TypeSpecType) || isTypeRep((TST)TypeSpecType) || + isExprRep((TST)TypeSpecType)); return TSTNameLoc; } diff --git a/clang/test/Sema/typerep-typespec.c b/clang/test/Sema/typerep-typespec.c new file mode 100644 index 00000000000000..001131306a4a9f --- /dev/null +++ b/clang/test/Sema/typerep-typespec.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -std=c11 %s -fsyntax-only -verify +// REQUIRES: asserts + +struct dispatch_object_s; +void _dispatch_queue_get_head(struct dispatch_object_s *volatile dq_items_head) { + (_Atomic __typeof__(dq_items_head) *)0; // expected-warning{{expression result unused}} +} +void g(void) { + (_Atomic __typeof__(struct dispatch_object_s *volatile) *)0; // expected-warning{{expression result unused}} +} From f6038cdca03115da22b9e6ada5c25de4df5f42d2 Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Tue, 24 May 2022 10:51:42 -0700 Subject: [PATCH 380/908] [compiler-rt][scudo] Simplify TBI checks Differential Revision: https://reviews.llvm.org/D111080 --- compiler-rt/lib/scudo/standalone/memtag.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h index 7f14a30fee12b3..13eb4bb8d22775 100644 --- a/compiler-rt/lib/scudo/standalone/memtag.h +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -18,19 +18,16 @@ namespace scudo { -#if (__clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__)) || \ - defined(SCUDO_FUZZ) - // We assume that Top-Byte Ignore is enabled if the architecture supports memory // tagging. Not all operating systems enable TBI, so we only claim architectural // support for memory tagging if the operating system enables TBI. // HWASan uses the top byte for its own purpose and Scudo should not touch it. -#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) && \ - !__has_feature(hwaddress_sanitizer) +#if (__clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__) && \ + SCUDO_LINUX && \ + !defined(SCUDO_DISABLE_TBI) !__has_feature(hwaddress_sanitizer)) || \ + defined(SCUDO_FUZZ) + inline constexpr bool archSupportsMemoryTagging() { return true; } -#else -inline constexpr bool archSupportsMemoryTagging() { return false; } -#endif inline constexpr uptr archMemoryTagGranuleSize() { return 16; } From 26c4cf23915a598ec4bba01342f4166902bd5dbe Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 24 May 2022 10:47:18 -0700 Subject: [PATCH 381/908] [AMDGPU] Disable newly added gfx90a global isel image tests. NFC. This fixed build failure with expensive checks after D126009. The change has added new run lines for Global ISel which has uncovered a pre-existing problem: it does not select a correct flavor of these image instructions. --- .../CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll | 4 +++- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll | 4 +++- .../CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll index 4315f34724e512..dbb479edc6e26a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll @@ -2,9 +2,11 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s +; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s +; FIXME: global isel does not select _gfx90a flavor of image instructions. + define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { ; GFX6-LABEL: atomic_swap_i32_1d: ; GFX6: ; %bb.0: ; %main_body diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll index 15269a88e61c71..cecec75916139d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll @@ -1,5 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; XUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + +; FIXME: global isel does not select _gfx90a flavor of image instructions. ; GCN-LABEL: {{^}}load_1d: ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll index e712a18b74df35..164dde00ac6774 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll @@ -1,5 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,GISEL %s +; XUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,GISEL %s + +; FIXME: global isel does not select _gfx90a flavor of image instructions. ; GFX90A-LABEL: {{^}}sample_1d: ; GFX90A-NOT: s_wqm_b64 From 1e1f60c605a9b1c803f3bbb1a1339c9bb1af4e34 Mon Sep 17 00:00:00 2001 From: V Donaldson Date: Tue, 24 May 2022 10:06:24 -0700 Subject: [PATCH 382/908] [flang] Alternate entry points with unused arguments A dummy argument in an entry point of a subprogram with multiple entry points need not be defined in other entry points. It is only legal to reference such an argument when calling an entry point that does have a definition. An entry point without such a definition needs a local "substitute" definition sufficient to generate code. It is nonconformant to reference such a definition at runtime. Most such definitions and associated code will be deleted as dead code at compile time. However, that is not always possible, as in the following code. This code is conformant if all calls to entry point ss set m=3, and all calls to entry point ee set n=3. subroutine ss(a, b, m, d, k) ! no x, y, n integer :: a(m), b(a(m)), m, d(k) integer :: x(n), y(x(n)), n integer :: k 1 print*, m, k print*, a print*, b print*, d if (m == 3) return entry ee(x, y, n, d, k) ! no a, b, m print*, n, k print*, x print*, y print*, d if (n /= 3) goto 1 end integer :: xx(3), yy(5), zz(3) xx = 5 yy = 7 zz = 9 call ss(xx, yy, 3, zz, 3) call ss(xx, yy, 3, zz, 3) end Lowering currently generates fir::UndefOp's for all unused arguments. This is usually ok, but cases such as the one here incorrectly access unused UndefOp arguments for m and n from an entry point that doesn't have a proper definition. The problem is addressed by creating a more complete definition of an unused argument in most cases. This is implemented in large part by moving the definition of an unused argument from mapDummiesAndResults to mapSymbolAttributes. The code in mapSymbolAttributes then chooses one of three code generation options, depending on information available there. This patch deals with dummy procedures in alternate entries, and adds a TODO for procedure pointers (the PFTBuilder is modified to analyze procedure pointer symbol so that they are not silently ignored, and instead hits proper TODOs). BoxAnalyzer is also changed because assumed-sized arrays were wrongfully categorized as constant shape arrays. This had no impact, except when there were unused entry points. Co-authored-by: jeanPerier Differential Revision: https://reviews.llvm.org/D125867 --- flang/include/flang/Lower/BoxAnalyzer.h | 8 +- flang/include/flang/Lower/PFTBuilder.h | 2 - flang/lib/Lower/Bridge.cpp | 31 +- flang/lib/Lower/ConvertType.cpp | 2 + flang/lib/Lower/ConvertVariable.cpp | 94 ++++- flang/lib/Lower/PFTBuilder.cpp | 56 ++- flang/test/Lower/dummy-procedure-in-entry.f90 | 90 +++++ flang/test/Lower/entry-statement.f90 | 331 +++++++++++++++++- 8 files changed, 531 insertions(+), 83 deletions(-) create mode 100644 flang/test/Lower/dummy-procedure-in-entry.f90 diff --git a/flang/include/flang/Lower/BoxAnalyzer.h b/flang/include/flang/Lower/BoxAnalyzer.h index 8baf23e13ec571..e347d5299c6acd 100644 --- a/flang/include/flang/Lower/BoxAnalyzer.h +++ b/flang/include/flang/Lower/BoxAnalyzer.h @@ -236,6 +236,10 @@ inline bool isExplicitShape(const Fortran::semantics::Symbol &sym) { return det && det->IsArray() && det->shape().IsExplicitShape(); } +inline bool isAssumedSize(const Fortran::semantics::Symbol &sym) { + return Fortran::semantics::IsAssumedSizeArray(sym.GetUltimate()); +} + //===----------------------------------------------------------------------===// // Perform analysis to determine a box's parameter values //===----------------------------------------------------------------------===// @@ -378,7 +382,7 @@ class BoxAnalyzer : public fir::details::matcher { /// Run the analysis on `sym`. void analyze(const Fortran::semantics::Symbol &sym) { if (symIsArray(sym)) { - bool isConstant = true; + bool isConstant = !isAssumedSize(sym); llvm::SmallVector lbounds; llvm::SmallVector shapes; llvm::SmallVector bounds; @@ -396,6 +400,8 @@ class BoxAnalyzer : public fir::details::matcher { continue; } } else if (subs.ubound().isStar()) { + assert(Fortran::semantics::IsNamedConstant(sym) && + "expect implied shape constant"); shapes.push_back(fir::SequenceType::getUnknownExtent()); continue; } diff --git a/flang/include/flang/Lower/PFTBuilder.h b/flang/include/flang/Lower/PFTBuilder.h index 0c9aba6d2e2f24..2fcbdcadeea1e4 100644 --- a/flang/include/flang/Lower/PFTBuilder.h +++ b/flang/include/flang/Lower/PFTBuilder.h @@ -668,8 +668,6 @@ struct FunctionLikeUnit : public ProgramUnit { entryPointList{std::pair{nullptr, nullptr}}; /// Current index into entryPointList. Index 0 is the primary entry point. int activeEntry = 0; - /// Dummy arguments that are not universal across entry points. - llvm::SmallVector nonUniversalDummyArguments; /// Primary result for function subprograms with alternate entries. This /// is one of the largest result values, not necessarily the first one. const semantics::Symbol *primaryResult{nullptr}; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index e41f525e8217a5..28dbc8b8b0b4a2 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2392,19 +2392,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { for (const Fortran::lower::CalleeInterface::PassedEntity &arg : callee.getPassedArguments()) mapPassedEntity(arg); - - // Allocate local skeleton instances of dummies from other entry points. - // Most of these locals will not survive into final generated code, but - // some will. It is illegal to reference them at run time if they do. - for (const Fortran::semantics::Symbol *arg : - funit.nonUniversalDummyArguments) { - if (lookupSymbol(*arg)) - continue; - mlir::Type type = genType(*arg); - // TODO: Account for VALUE arguments (and possibly other variants). - type = builder->getRefType(type); - addSymbol(*arg, builder->create(toLocation(), type)); - } if (std::optional passedResult = callee.getPassedResult()) { mapPassedEntity(*passedResult); @@ -2491,15 +2478,10 @@ class FirConverter : public Fortran::lower::AbstractConverter { } } - // If this is a host procedure with host associations, then create the tuple - // of pointers for passing to the internal procedures. - if (!funit.getHostAssoc().empty()) - funit.getHostAssoc().hostProcedureBindings(*this, localSymbols); - - /// TODO: should use same mechanism as equivalence? - /// One blocking point is character entry returns that need special handling - /// since they are not locally allocated but come as argument. CHARACTER(*) - /// is not something that fit wells with equivalence lowering. + // TODO: should use same mechanism as equivalence? + // One blocking point is character entry returns that need special handling + // since they are not locally allocated but come as argument. CHARACTER(*) + // is not something that fits well with equivalence lowering. for (const Fortran::lower::pft::Variable &altResult : deferredFuncResultList) { if (std::optional @@ -2510,6 +2492,11 @@ class FirConverter : public Fortran::lower::AbstractConverter { stmtCtx, primaryFuncResultStorage); } + // If this is a host procedure with host associations, then create the tuple + // of pointers for passing to the internal procedures. + if (!funit.getHostAssoc().empty()) + funit.getHostAssoc().hostProcedureBindings(*this, localSymbols); + // Create most function blocks in advance. createEmptyBlocks(funit.evaluationList); diff --git a/flang/lib/Lower/ConvertType.cpp b/flang/lib/Lower/ConvertType.cpp index 056490ec41986c..389cb87d297f85 100644 --- a/flang/lib/Lower/ConvertType.cpp +++ b/flang/lib/Lower/ConvertType.cpp @@ -225,6 +225,8 @@ struct TypeBuilder { // links, the fir type is built based on the ultimate symbol. This relies // on the fact volatile and asynchronous are not reflected in fir types. const Fortran::semantics::Symbol &ultimate = symbol.GetUltimate(); + if (Fortran::semantics::IsProcedurePointer(ultimate)) + TODO(loc, "procedure pointers"); if (const Fortran::semantics::DeclTypeSpec *type = ultimate.GetType()) { if (const Fortran::semantics::IntrinsicTypeSpec *tySpec = type->AsIntrinsic()) { diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 88b0e69bf7f1ff..86f7fac38c836e 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -122,7 +122,7 @@ static fir::GlobalOp declareGlobal(Fortran::lower::AbstractConverter &converter, // symbol is an object of a function pointer. const Fortran::semantics::Symbol &ultimate = sym.GetUltimate(); if (!ultimate.has() && - !ultimate.has()) + !Fortran::semantics::IsProcedurePointer(ultimate)) mlir::emitError(loc, "lowering global declaration: symbol '") << toStringRef(sym.name()) << "' has unexpected details\n"; return builder.createGlobal(loc, converter.genType(var), globalName, linkage, @@ -378,6 +378,10 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, if (global && globalIsInitialized(global)) return global; + + if (Fortran::semantics::IsProcedurePointer(sym)) + TODO(loc, "procedure pointer globals"); + // If this is an array, check to see if we can use a dense attribute // with a tensor mlir type. This optimization currently only supports // rank-1 Fortran arrays of integer, real, or logical. The tensor @@ -1187,11 +1191,10 @@ static mlir::Value genExtentValue(fir::FirOpBuilder &builder, } /// Lower specification expressions and attributes of variable \p var and -/// add it to the symbol map. -/// For global and aliases, the address must be pre-computed and provided -/// in \p preAlloc. -/// Dummy arguments must have already been mapped to mlir block arguments -/// their mapping may be updated here. +/// add it to the symbol map. For a global or an alias, the address must be +/// pre-computed and provided in \p preAlloc. A dummy argument for the current +/// entry point has already been mapped to an mlir block argument in +/// mapDummiesAndResults. Its mapping may be updated here. void Fortran::lower::mapSymbolAttributes( AbstractConverter &converter, const Fortran::lower::pft::Variable &var, Fortran::lower::SymMap &symMap, Fortran::lower::StatementContext &stmtCtx, @@ -1200,14 +1203,32 @@ void Fortran::lower::mapSymbolAttributes( const Fortran::semantics::Symbol &sym = var.getSymbol(); const mlir::Location loc = converter.genLocation(sym.name()); mlir::IndexType idxTy = builder.getIndexType(); - const bool isDummy = Fortran::semantics::IsDummy(sym); + const bool isDeclaredDummy = Fortran::semantics::IsDummy(sym); + // An active dummy from the current entry point. + const bool isDummy = isDeclaredDummy && symMap.lookupSymbol(sym).getAddr(); + // An unused dummy from another entry point. + const bool isUnusedEntryDummy = isDeclaredDummy && !isDummy; const bool isResult = Fortran::semantics::IsFunctionResult(sym); const bool replace = isDummy || isResult; fir::factory::CharacterExprHelper charHelp{builder, loc}; + + if (Fortran::semantics::IsProcedure(sym)) { + if (isUnusedEntryDummy) { + // Additional discussion below. + mlir::Type dummyProcType = + Fortran::lower::getDummyProcedureType(sym, converter); + mlir::Value undefOp = builder.create(loc, dummyProcType); + symMap.addSymbol(sym, undefOp); + } + if (Fortran::semantics::IsPointer(sym)) + TODO(loc, "procedure pointers"); + return; + } + Fortran::lower::BoxAnalyzer ba; ba.analyze(sym); - // First deal with pointers an allocatables, because their handling here + // First deal with pointers and allocatables, because their handling here // is the same regardless of their rank. if (Fortran::semantics::IsAllocatableOrPointer(sym)) { // Get address of fir.box describing the entity. @@ -1263,6 +1284,42 @@ void Fortran::lower::mapSymbolAttributes( } } + // A dummy from another entry point that is not declared in the current + // entry point requires a skeleton definition. Most such "unused" dummies + // will not survive into final generated code, but some will. It is illegal + // to reference one at run time if it does. Such a dummy is mapped to a + // value in one of three ways: + // + // - Generate a fir::UndefOp value. This is lightweight, easy to clean up, + // and often valid, but it may fail for a dummy with dynamic bounds, + // or a dummy used to define another dummy. Information to distinguish + // valid cases is not generally available here, with the exception of + // dummy procedures. See the first function exit above. + // + // - Allocate an uninitialized stack slot. This is an intermediate-weight + // solution that is harder to clean up. It is often valid, but may fail + // for an object with dynamic bounds. This option is "automatically" + // used by default for cases that do not use one of the other options. + // + // - Allocate a heap box/descriptor, initialized to zero. This always + // works, but is more heavyweight and harder to clean up. It is used + // for dynamic objects via calls to genUnusedEntryPointBox. + + auto genUnusedEntryPointBox = [&]() { + if (isUnusedEntryDummy) { + assert(!Fortran::semantics::IsAllocatableOrPointer(sym) && + "handled above"); + // The box is read right away because lowering code does not expect + // a non pointer/allocatable symbol to be mapped to a MutableBox. + symMap.addSymbol(sym, fir::factory::genMutableBoxRead( + builder, loc, + fir::factory::createTempMutableBox( + builder, loc, converter.genType(var)))); + return true; + } + return false; + }; + // Helper to generate scalars for the symbol properties. auto genValue = [&](const Fortran::lower::SomeExpr &expr) { return genScalarValue(converter, loc, expr, symMap, stmtCtx); @@ -1412,6 +1469,8 @@ void Fortran::lower::mapSymbolAttributes( //===--------------------------------------------------------------===// [&](const Fortran::lower::details::ScalarDynamicChar &x) { + if (genUnusedEntryPointBox()) + return; // type is a CHARACTER, determine the LEN value auto charLen = x.charLen(); if (replace) { @@ -1419,17 +1478,8 @@ void Fortran::lower::mapSymbolAttributes( mlir::Value boxAddr = symBox.getAddr(); mlir::Value len; mlir::Type addrTy = boxAddr.getType(); - if (addrTy.isa() || addrTy.isa()) { + if (addrTy.isa() || addrTy.isa()) std::tie(boxAddr, len) = charHelp.createUnboxChar(symBox.getAddr()); - } else { - // dummy from an other entry case: we cannot get a dynamic length - // for it, it's illegal for the user program to use it. However, - // since we are lowering all function unit statements regardless - // of whether the execution will reach them or not, we need to - // fill a value for the length here. - len = builder.createIntegerConstant( - loc, builder.getCharacterLengthType(), 1); - } // Override LEN with an expression if (charLen) len = genExplicitCharLen(charLen); @@ -1484,6 +1534,8 @@ void Fortran::lower::mapSymbolAttributes( //===--------------------------------------------------------------===// [&](const Fortran::lower::details::DynamicArray &x) { + if (genUnusedEntryPointBox()) + return; // cast to the known constant parts from the declaration mlir::Type varType = converter.genType(var); mlir::Value addr = symMap.lookupSymbol(sym).getAddr(); @@ -1587,6 +1639,8 @@ void Fortran::lower::mapSymbolAttributes( //===--------------------------------------------------------------===// [&](const Fortran::lower::details::StaticArrayDynamicChar &x) { + if (genUnusedEntryPointBox()) + return; mlir::Value addr; mlir::Value len; [[maybe_unused]] bool mustBeDummy = false; @@ -1656,6 +1710,8 @@ void Fortran::lower::mapSymbolAttributes( //===--------------------------------------------------------------===// [&](const Fortran::lower::details::DynamicArrayStaticChar &x) { + if (genUnusedEntryPointBox()) + return; mlir::Value addr; mlir::Value len; mlir::Value argBox; @@ -1714,6 +1770,8 @@ void Fortran::lower::mapSymbolAttributes( //===--------------------------------------------------------------===// [&](const Fortran::lower::details::DynamicArrayDynamicChar &x) { + if (genUnusedEntryPointBox()) + return; mlir::Value addr; mlir::Value len; mlir::Value argBox; diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp index 3be7ebedb3da63..70774425760a65 100644 --- a/flang/lib/Lower/PFTBuilder.cpp +++ b/flang/lib/Lower/PFTBuilder.cpp @@ -975,30 +975,28 @@ class PFTBuilder { } } - /// For multiple entry subprograms, build a list of the dummy arguments that - /// appear in some, but not all entry points. For those that are functions, - /// also find one of the largest function results, since a single result - /// container holds the result for all entries. + /// Do processing specific to subprograms with multiple entry points. void processEntryPoints() { lower::pft::Evaluation *initialEval = &evaluationListStack.back()->front(); lower::pft::FunctionLikeUnit *unit = initialEval->getOwningProcedure(); int entryCount = unit->entryPointList.size(); if (entryCount == 1) return; - llvm::DenseMap dummyCountMap; + + // The first executable statement in the subprogram is preceded by a + // branch to the entry point, so it starts a new block. + if (initialEval->hasNestedEvaluations()) + initialEval = &initialEval->getFirstNestedEvaluation(); + else if (initialEval->isA()) + initialEval = initialEval->lexicalSuccessor; + initialEval->isNewBlock = true; + + // All function entry points share a single result container. + // Find one of the largest results. for (int entryIndex = 0; entryIndex < entryCount; ++entryIndex) { unit->setActiveEntry(entryIndex); const auto &details = unit->getSubprogramSymbol().get(); - for (semantics::Symbol *arg : details.dummyArgs()) { - if (!arg) - continue; // alternate return specifier (no actual argument) - const auto iter = dummyCountMap.find(arg); - if (iter == dummyCountMap.end()) - dummyCountMap.try_emplace(arg, 1); - else - ++iter->second; - } if (details.isFunction()) { const semantics::Symbol *resultSym = &details.result(); assert(resultSym && "missing result symbol"); @@ -1008,16 +1006,6 @@ class PFTBuilder { } } unit->setActiveEntry(0); - for (auto arg : dummyCountMap) - if (arg.second < entryCount) - unit->nonUniversalDummyArguments.push_back(arg.first); - // The first executable statement in the subprogram is preceded by a - // branch to the entry point, so it starts a new block. - if (initialEval->hasNestedEvaluations()) - initialEval = &initialEval->getFirstNestedEvaluation(); - else if (initialEval->isA()) - initialEval = initialEval->lexicalSuccessor; - initialEval->isNewBlock = true; } std::unique_ptr pgm; @@ -1401,10 +1389,14 @@ struct SymbolDependenceDepth { LLVM_DEBUG(llvm::dbgs() << "analyze symbol: " << sym << '\n'); if (!done.second) return 0; - if (semantics::IsProcedure(sym)) { - // TODO: add declaration? + const bool isProcedurePointerOrDummy = + semantics::IsProcedurePointer(sym) || + (semantics::IsProcedure(sym) && IsDummy(sym)); + // A procedure argument in a subprogram with multiple entry points might + // need a vars list entry to trigger creation of a symbol map entry in + // some cases. Non-dummy procedures don't. + if (semantics::IsProcedure(sym) && !isProcedurePointerOrDummy) return 0; - } semantics::Symbol ultimate = sym.GetUltimate(); if (const auto *details = ultimate.detailsIf()) { @@ -1414,7 +1406,7 @@ struct SymbolDependenceDepth { return 0; } if (!ultimate.has() && - !ultimate.has()) + !isProcedurePointerOrDummy) return 0; if (sym.has()) @@ -1422,15 +1414,14 @@ struct SymbolDependenceDepth { // Symbol must be something lowering will have to allocate. int depth = 0; - const semantics::DeclTypeSpec *symTy = sym.GetType(); - assert(symTy && "symbol must have a type"); - // Analyze symbols appearing in object entity specification expression. This // ensures these symbols will be instantiated before the current one. // This is not done for object entities that are host associated because // they must be instantiated from the value of the host symbols (the // specification expressions should not be re-evaluated). if (const auto *details = sym.detailsIf()) { + const semantics::DeclTypeSpec *symTy = sym.GetType(); + assert(symTy && "symbol must have a type"); // check CHARACTER's length if (symTy->category() == semantics::DeclTypeSpec::Character) if (auto e = symTy->characterTypeSpec().length().GetExplicit()) @@ -1471,9 +1462,8 @@ struct SymbolDependenceDepth { // If there are alias sets, then link the participating variables to their // aggregate stores when constructing the new variable on the list. - if (lower::pft::Variable::AggregateStore *store = findStoreIfAlias(sym)) { + if (lower::pft::Variable::AggregateStore *store = findStoreIfAlias(sym)) vars[depth].back().setAlias(store->getOffset()); - } return depth; } diff --git a/flang/test/Lower/dummy-procedure-in-entry.f90 b/flang/test/Lower/dummy-procedure-in-entry.f90 new file mode 100644 index 00000000000000..7e9cacc5228632 --- /dev/null +++ b/flang/test/Lower/dummy-procedure-in-entry.f90 @@ -0,0 +1,90 @@ +! Test dummy procedures that are not an argument in every entry. +! This requires creating a mock value in the entries where it is +! not an argument. +! RUN: bbc -emit-fir %s -o - | FileCheck %s + +subroutine dummy_with_iface() + interface + real function x() + end function + end interface + entry dummy_with_iface_entry(x) + call takes_real(x()) +end subroutine +! CHECK-LABEL: func @_QPdummy_with_iface() { +! CHECK: %[[VAL_0:.*]] = fir.alloca f32 {adapt.valuebyref} +! CHECK: %[[VAL_1:.*]] = fir.undefined !fir.boxproc<() -> ()> +! CHECK: br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.boxproc<() -> ()>) -> (() -> f32) +! CHECK: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> f32 +! CHECK: fir.store %[[VAL_3]] to %[[VAL_0]] : !fir.ref +! CHECK: fir.call @_QPtakes_real(%[[VAL_0]]) : (!fir.ref) -> () + +! CHECK-LABEL: func @_QPdummy_with_iface_entry( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.boxproc<() -> ()>) { +! CHECK: %[[VAL_1:.*]] = fir.alloca f32 {adapt.valuebyref} +! CHECK: br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_0]] : (!fir.boxproc<() -> ()>) -> (() -> f32) +! CHECK: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> f32 +! CHECK: fir.store %[[VAL_3]] to %[[VAL_1]] : !fir.ref +! CHECK: fir.call @_QPtakes_real(%[[VAL_1]]) : (!fir.ref) -> () + +subroutine subroutine_dummy() + entry subroutine_dummy_entry(x) + call x() +end subroutine +! CHECK-LABEL: func @_QPsubroutine_dummy() { +! CHECK: %[[VAL_0:.*]] = fir.undefined !fir.boxproc<() -> ()> +! CHECK: br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_1:.*]] = fir.box_addr %[[VAL_0]] : (!fir.boxproc<() -> ()>) -> (() -> ()) +! CHECK: fir.call %[[VAL_1]]() : () -> () + +! CHECK-LABEL: func @_QPsubroutine_dummy_entry( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.boxproc<() -> ()>) { +! CHECK: br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_1:.*]] = fir.box_addr %[[VAL_0]] : (!fir.boxproc<() -> ()>) -> (() -> ()) +! CHECK: fir.call %[[VAL_1]]() : () -> () + +subroutine character_dummy() + external :: c + character(*) :: c + entry character_dummy_entry(c) + call takes_char(c()) +end subroutine +! CHECK-LABEL: func @_QPcharacter_dummy() { +! CHECK: %[[VAL_0:.*]] = fir.undefined tuple ()>, i64> +! CHECK: br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_1:.*]] = fir.extract_value %[[VAL_0]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.boxproc<() -> ()>) -> (() -> ()) +! CHECK: %[[VAL_3:.*]] = fir.extract_value %[[VAL_0]], [1 : index] : (tuple ()>, i64>) -> i64 +! CHECK: %[[VAL_4:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_3]] : i64) {bindc_name = ".result"} +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) +! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_8:.*]] = fir.call %[[VAL_6]](%[[VAL_5]], %[[VAL_7]]) : (!fir.ref>, index) -> !fir.boxchar<1> +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_10:.*]] = fir.emboxchar %[[VAL_5]], %[[VAL_9]] : (!fir.ref>, index) -> !fir.boxchar<1> +! CHECK: fir.call @_QPtakes_char(%[[VAL_10]]) : (!fir.boxchar<1>) -> () +! CHECK: fir.call @llvm.stackrestore(%[[VAL_4]]) : (!fir.ref) -> () + +! CHECK-LABEL: func @_QPcharacter_dummy_entry( +! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> {fir.char_proc}) { +! CHECK: br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_1:.*]] = fir.extract_value %[[VAL_0]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.boxproc<() -> ()>) -> (() -> ()) +! CHECK: %[[VAL_3:.*]] = fir.extract_value %[[VAL_0]], [1 : index] : (tuple ()>, i64>) -> i64 +! CHECK: %[[VAL_4:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_3]] : i64) {bindc_name = ".result"} +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) +! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_8:.*]] = fir.call %[[VAL_6]](%[[VAL_5]], %[[VAL_7]]) : (!fir.ref>, index) -> !fir.boxchar<1> +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_10:.*]] = fir.emboxchar %[[VAL_5]], %[[VAL_9]] : (!fir.ref>, index) -> !fir.boxchar<1> +! CHECK: fir.call @_QPtakes_char(%[[VAL_10]]) : (!fir.boxchar<1>) -> () +! CHECK: fir.call @llvm.stackrestore(%[[VAL_4]]) : (!fir.ref) -> () diff --git a/flang/test/Lower/entry-statement.f90 b/flang/test/Lower/entry-statement.f90 index c4d202200e9d56..4d826cf34c1672 100644 --- a/flang/test/Lower/entry-statement.f90 +++ b/flang/test/Lower/entry-statement.f90 @@ -16,10 +16,26 @@ subroutine compare1(x, c1, c2) end program entries + character c(3) character(10) hh, qq, m character(len=4) s1, s2 - integer mm + integer mm, x(3), y(5) logical r + complex xx(3) + character(5), external :: f1, f2, f3 + + interface + subroutine ashapec(asc) + character asc(:) + end subroutine + subroutine ashapei(asi) + integer asi(:) + end subroutine + subroutine ashapex(asx) + complex asx(:) + end subroutine + end interface + s1 = 'a111' s2 = 'a222' call compare1(r, s1, s2); print*, r @@ -37,6 +53,16 @@ program entries call dd2 call dd3(6) 6 continue + x = 5 + y = 7 + call level3a(x, y, 3) + call level3b(x, y, 3) + call ashapec(c); print*, c + call ashapei(x); print*, x + call ashapex(xx); print*, xx + print *, f1(1) + print *, f2(2) + print *, f3() end ! CHECK-LABEL: func @_QPss( @@ -110,8 +136,7 @@ function char_array() ! CHECK-LABEL: func @_QPdd1() subroutine dd1 - ! CHECK: %[[kk:[0-9]*]] = fir.alloca i32 {bindc_name = "kk", uniq_name = - ! "_QFdd1Ekk"} + ! CHECK: %[[kk:[0-9]*]] = fir.alloca i32 {bindc_name = "kk", uniq_name = "_QFdd1Ekk"} ! CHECK: br ^bb1 ! CHECK: ^bb1: // pred: ^bb0 ! CHECK: %[[ten:.*]] = arith.constant 10 : i32 @@ -126,8 +151,7 @@ subroutine dd1 kk = 10 ! CHECK-LABEL: func @_QPdd2() - ! CHECK: %[[kk:[0-9]*]] = fir.alloca i32 {bindc_name = "kk", uniq_name = - ! "_QFdd1Ekk"} + ! CHECK: %[[kk:[0-9]*]] = fir.alloca i32 {bindc_name = "kk", uniq_name = "_QFdd1Ekk"} ! CHECK: br ^bb1 ! CHECK: ^bb1: // pred: ^bb0 ! CHECK: %[[twenty:.*]] = arith.constant 20 : i32 @@ -141,8 +165,7 @@ subroutine dd1 ! CHECK-LABEL: func @_QPdd3 ! CHECK: %[[dd3:[0-9]*]] = fir.alloca index {bindc_name = "dd3"} - ! CHECK: %[[kk:[0-9]*]] = fir.alloca i32 {bindc_name = "kk", uniq_name = - ! "_QFdd1Ekk"} + ! CHECK: %[[kk:[0-9]*]] = fir.alloca i32 {bindc_name = "kk", uniq_name = "_QFdd1Ekk"} ! CHECK: %[[zero:.*]] = arith.constant 0 : index ! CHECK: fir.store %[[zero:.*]] to %[[dd3]] : !fir.ref ! CHECK: br ^bb1 @@ -156,3 +179,297 @@ subroutine dd1 entry dd3(*) kk = 30 end + +! CHECK-LABEL: func @_QPashapec( +subroutine ashapec(asc) + ! CHECK: %[[asx:[0-9]*]] = fir.alloca !fir.box>>> + ! CHECK: %[[asi:[0-9]*]] = fir.alloca !fir.box>> + ! CHECK: %[[zeroi:[0-9]*]] = fir.zero_bits !fir.heap> + ! CHECK: %[[shapei:[0-9]*]] = fir.shape %c0{{.*}} : (index) -> !fir.shape<1> + ! CHECK: %[[boxi:[0-9]*]] = fir.embox %[[zeroi]](%[[shapei]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + ! CHECK: fir.store %[[boxi]] to %[[asi]] : !fir.ref>>> + ! CHECK: %[[zerox:[0-9]*]] = fir.zero_bits !fir.heap>> + ! CHECK: %[[shapex:[0-9]*]] = fir.shape %c0{{.*}} : (index) -> !fir.shape<1> + ! CHECK: %[[boxx:[0-9].*]] = fir.embox %[[zerox]](%[[shapex]]) : (!fir.heap>>, !fir.shape<1>) -> !fir.box>>> + ! CHECK: fir.store %[[boxx]] to %[[asx]] : !fir.ref>>>> + character asc(:) + integer asi(:) + complex asx(:) + asc = '?' + return +! CHECK-LABEL: func @_QPashapei( +entry ashapei(asi) + ! CHECK: %[[asx:[0-9]*]] = fir.alloca !fir.box>>> + ! CHECK: %[[asc:[0-9]*]] = fir.alloca !fir.box>>> + ! CHECK: %[[zeroc:[0-9]*]] = fir.zero_bits !fir.heap>> + ! CHECK: %[[shapec:[0-9]*]] = fir.shape %c0{{.*}} : (index) -> !fir.shape<1> + ! CHECK: %[[boxc:[0-9]*]] = fir.embox %[[zeroc]](%[[shapec]]) : (!fir.heap>>, !fir.shape<1>) -> !fir.box>>> + ! CHECK: fir.store %[[boxc]] to %[[asc]] : !fir.ref>>>> + ! CHECK: %[[zerox:[0-9]*]] = fir.zero_bits !fir.heap>> + ! CHECK: %[[shapex:[0-9]*]] = fir.shape %c0{{.*}} : (index) -> !fir.shape<1> + ! CHECK: %[[boxx:[0-9].*]] = fir.embox %[[zerox]](%[[shapex]]) : (!fir.heap>>, !fir.shape<1>) -> !fir.box>>> + ! CHECK: fir.store %[[boxx]] to %[[asx]] : !fir.ref>>>> + asi = 3 + return +! CHECK-LABEL: func @_QPashapex( +entry ashapex(asx) + ! CHECK: %[[asi:[0-9]*]] = fir.alloca !fir.box>> + ! CHECK: %[[asc:[0-9]*]] = fir.alloca !fir.box>>> + ! CHECK: %[[zeroc:[0-9]*]] = fir.zero_bits !fir.heap>> + ! CHECK: %[[shapec:[0-9]*]] = fir.shape %c0{{.*}} : (index) -> !fir.shape<1> + ! CHECK: %[[boxc:[0-9]*]] = fir.embox %[[zeroc]](%[[shapec]]) : (!fir.heap>>, !fir.shape<1>) -> !fir.box>>> + ! CHECK: fir.store %[[boxc]] to %[[asc]] : !fir.ref>>>> + ! CHECK: %[[zeroi:[0-9]*]] = fir.zero_bits !fir.heap> + ! CHECK: %[[shapei:[0-9]*]] = fir.shape %c0{{.*}} : (index) -> !fir.shape<1> + ! CHECK: %[[boxi:[0-9].*]] = fir.embox %[[zeroi]](%[[shapei]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + ! CHECK: fir.store %[[boxi]] to %[[asi]] : !fir.ref>>> + asx = (2.0,-2.0) +end + +! CHECK-LABEL: func @_QPlevel3a( +subroutine level3a(a, b, m) + ! CHECK: fir.alloca !fir.box>> + ! CHECK: fir.alloca !fir.box>> + ! CHECK: fir.alloca i32 {bindc_name = "n", uniq_name = "_QFlevel3aEn"} + integer :: a(m), b(a(m)), m + integer :: x(n), y(x(n)), n +1 print*, m + print*, a + print*, b + if (m == 3) return +! CHECK-LABEL: func @_QPlevel3b( +entry level3b(x, y, n) + ! CHECK: fir.alloca !fir.box>> + ! CHECK: fir.alloca !fir.box>> + ! CHECK: fir.alloca i32 {bindc_name = "m", uniq_name = "_QFlevel3aEm"} + print*, n + print*, x + print*, y + if (n /= 3) goto 1 +end + +! CHECK-LABEL: @_QPf1 +function f1(n1) result(res1) + ! CHECK: %[[V_0:[0-9]+]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> + ! CHECK: %[[V_1:[0-9]+]] = fir.alloca i32 {bindc_name = "n2", uniq_name = "_QFf1En2"} + ! CHECK: %[[V_2:[0-9]+]] = fir.alloca tuple, !fir.boxchar<1>> + ! CHECK: %[[V_3:[0-9]+]] = fir.coordinate_of %[[V_2]], %c0{{.*}}_i32 : (!fir.ref, !fir.boxchar<1>>>, i32) -> !fir.ref> + ! CHECK: %[[V_4:[0-9]+]] = fir.emboxchar %[[V_0]], %c5{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> + ! CHECK: fir.store %[[V_4]] to %[[V_3]] : !fir.ref> + ! CHECK: %[[V_5:[0-9]+]] = fir.coordinate_of %[[V_2]], %c1{{.*}}_i32 : (!fir.ref, !fir.boxchar<1>>>, i32) -> !fir.ref> + ! CHECK: %[[V_6:[0-9]+]] = fir.emboxchar %[[V_0]], %c5{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> + ! CHECK: fir.store %[[V_6]] to %[[V_5]] : !fir.ref> + ! CHECK: br ^bb1 + ! CHECK: ^bb1: // pred: ^bb0 + ! CHECK: %[[V_7:[0-9]+]] = fir.address_of(@_QQcl.6120612061) : !fir.ref> + ! CHECK: %[[V_8:[0-9]+]] = arith.cmpi slt, %c5{{.*}}, %c5{{.*}} : index + ! CHECK: %[[V_9:[0-9]+]] = arith.select %[[V_8]], %c5{{.*}}, %c5{{.*}} : index + ! CHECK: %[[V_10:[0-9]+]] = fir.convert %[[V_9]] : (index) -> i64 + ! CHECK: %[[V_11:[0-9]+]] = arith.muli %c1{{.*}}_i64, %[[V_10]] : i64 + ! CHECK: %[[V_12:[0-9]+]] = fir.convert %[[V_0]] : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_13:[0-9]+]] = fir.convert %[[V_7]] : (!fir.ref>) -> !fir.ref + ! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[V_12]], %[[V_13]], %[[V_11]], %false{{.*}}) : (!fir.ref, !fir.ref, i64, i1) -> () + ! CHECK: %[[V_14:[0-9]+]] = arith.subi %c5{{.*}}, %c1{{.*}} : index + ! CHECK: %[[V_15:[0-9]+]] = fir.undefined !fir.char<1> + ! CHECK: %[[V_16:[0-9]+]] = fir.insert_value %[[V_15]], %c32{{.*}}_i8, [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> + ! CHECK: fir.do_loop %arg3 = %[[V_9]] to %[[V_14]] step %c1{{.*}} { + ! CHECK: %[[V_32:[0-9]+]] = fir.convert %[[V_0]] : (!fir.ref>) -> !fir.ref>> + ! CHECK: %[[V_33:[0-9]+]] = fir.coordinate_of %[[V_32]], %arg3 : (!fir.ref>>, index) -> !fir.ref> + ! CHECK: fir.store %[[V_16]] to %[[V_33]] : !fir.ref> + ! CHECK: } + ! CHECK: %[[V_17:[0-9]+]] = fir.load %arg2 : !fir.ref + ! CHECK: %[[V_18:[0-9]+]] = arith.cmpi eq, %[[V_17]], %c1{{.*}}_i32_4 : i32 + ! CHECK: cond_br %[[V_18]], ^bb2, ^bb3 + ! CHECK: ^bb2: // 2 preds: ^bb1, ^bb3 + ! CHECK: br ^bb5 + ! CHECK: ^bb3: // pred: ^bb1 + ! CHECK: fir.call @_QFf1Ps2(%[[V_2]]) : (!fir.ref, !fir.boxchar<1>>>) -> () + ! CHECK: %[[V_19:[0-9]+]] = fir.load %[[V_1]] : !fir.ref + ! CHECK: %[[V_20:[0-9]+]] = arith.cmpi eq, %[[V_19]], %c2{{.*}}_i32 : i32 + ! CHECK: cond_br %[[V_20]], ^bb2, ^bb4 + ! CHECK: ^bb4: // pred: ^bb3 + ! CHECK: %[[V_21:[0-9]+]] = fir.address_of(@_QQcl.4320432043) : !fir.ref> + ! CHECK: %[[V_22:[0-9]+]] = arith.cmpi slt, %c5{{.*}}, %c5{{.*}} : index + ! CHECK: %[[V_23:[0-9]+]] = arith.select %[[V_22]], %c5{{.*}}, %c5{{.*}} : index + ! CHECK: %[[V_24:[0-9]+]] = fir.convert %[[V_23]] : (index) -> i64 + ! CHECK: %[[V_25:[0-9]+]] = arith.muli %c1{{.*}}_i64_6, %[[V_24]] : i64 + ! CHECK: %[[V_26:[0-9]+]] = fir.convert %[[V_0]] : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_27:[0-9]+]] = fir.convert %[[V_21]] : (!fir.ref>) -> !fir.ref + ! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[V_26]], %[[V_27]], %[[V_25]], %false{{.*}}) : (!fir.ref, !fir.ref, i64, i1) -> () + ! CHECK: %[[V_28:[0-9]+]] = arith.subi %c5{{.*}}, %c1{{.*}} : index + ! CHECK: %[[V_29:[0-9]+]] = fir.undefined !fir.char<1> + ! CHECK: %[[V_30:[0-9]+]] = fir.insert_value %[[V_29]], %c32{{.*}}_i8_9, [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> + ! CHECK: fir.do_loop %arg3 = %[[V_23]] to %[[V_28]] step %c1{{.*}} { + ! CHECK: %[[V_32]] = fir.convert %[[V_0]] : (!fir.ref>) -> !fir.ref>> + ! CHECK: %[[V_33]] = fir.coordinate_of %[[V_32]], %arg3 : (!fir.ref>>, index) -> !fir.ref> + ! CHECK: fir.store %[[V_30]] to %[[V_33]] : !fir.ref> + ! CHECK: } + ! CHECK: fir.call @_QFf1Ps3(%[[V_2]]) : (!fir.ref, !fir.boxchar<1>>>) -> () + ! CHECK: br ^bb5 + ! CHECK: ^bb5: // 2 preds: ^bb2, ^bb4 + ! CHECK: %[[V_31:[0-9]+]] = fir.emboxchar %[[V_0]], %c5{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> + ! CHECK: return %[[V_31]] : !fir.boxchar<1> + ! CHECK: } + character(5) res1, f2, f3 + res1 = 'a a a' + if (n1 == 1) return + +! CHECK-LABEL: @_QPf2 +entry f2(n2) + ! CHECK: %[[V_0:[0-9]+]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> + ! CHECK: %[[V_1:[0-9]+]] = fir.alloca i32 {bindc_name = "n1", uniq_name = "_QFf1En1"} + ! CHECK: %[[V_2:[0-9]+]] = fir.alloca tuple, !fir.boxchar<1>> + ! CHECK: %[[V_3:[0-9]+]] = fir.coordinate_of %[[V_2]], %c0{{.*}}_i32 : (!fir.ref, !fir.boxchar<1>>>, i32) -> !fir.ref> + ! CHECK: %[[V_4:[0-9]+]] = fir.emboxchar %[[V_0]], %c5{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> + ! CHECK: fir.store %[[V_4]] to %[[V_3]] : !fir.ref> + ! CHECK: %[[V_5:[0-9]+]] = fir.coordinate_of %[[V_2]], %c1{{.*}}_i32 : (!fir.ref, !fir.boxchar<1>>>, i32) -> !fir.ref> + ! CHECK: %[[V_6:[0-9]+]] = fir.emboxchar %[[V_0]], %c5{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> + ! CHECK: fir.store %[[V_6]] to %[[V_5]] : !fir.ref> + ! CHECK: br ^bb1 + ! CHECK: ^bb1: // pred: ^bb0 + ! CHECK: fir.call @_QFf1Ps2(%[[V_2]]) : (!fir.ref, !fir.boxchar<1>>>) -> () + ! CHECK: %[[V_7:[0-9]+]] = fir.load %arg2 : !fir.ref + ! CHECK: %[[V_8:[0-9]+]] = arith.cmpi eq, %[[V_7]], %c2{{.*}}_i32 : i32 + ! CHECK: cond_br %[[V_8]], ^bb2, ^bb3 + ! CHECK: ^bb2: // pred: ^bb1 + ! CHECK: br ^bb4 + ! CHECK: ^bb3: // pred: ^bb1 + ! CHECK: %[[V_9:[0-9]+]] = fir.address_of(@_QQcl.4320432043) : !fir.ref> + ! CHECK: %[[V_10:[0-9]+]] = arith.cmpi slt, %c5{{.*}}, %c5{{.*}} : index + ! CHECK: %[[V_11:[0-9]+]] = arith.select %[[V_10]], %c5{{.*}}, %c5{{.*}} : index + ! CHECK: %[[V_12:[0-9]+]] = fir.convert %[[V_11]] : (index) -> i64 + ! CHECK: %[[V_13:[0-9]+]] = arith.muli %c1{{.*}}_i64, %[[V_12]] : i64 + ! CHECK: %[[V_14:[0-9]+]] = fir.convert %[[V_0]] : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_15:[0-9]+]] = fir.convert %[[V_9]] : (!fir.ref>) -> !fir.ref + ! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[V_14]], %[[V_15]], %[[V_13]], %false{{.*}}) : (!fir.ref, !fir.ref, i64, i1) -> () + ! CHECK: %[[V_16:[0-9]+]] = arith.subi %c5{{.*}}, %c1{{.*}} : index + ! CHECK: %[[V_17:[0-9]+]] = fir.undefined !fir.char<1> + ! CHECK: %[[V_18:[0-9]+]] = fir.insert_value %[[V_17]], %c32{{.*}}_i8, [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> + ! CHECK: fir.do_loop %arg3 = %[[V_11]] to %[[V_16]] step %c1{{.*}} { + ! CHECK: %[[V_20:[0-9]+]] = fir.convert %[[V_0]] : (!fir.ref>) -> !fir.ref>> + ! CHECK: %[[V_21:[0-9]+]] = fir.coordinate_of %[[V_20]], %arg3 : (!fir.ref>>, index) -> !fir.ref> + ! CHECK: fir.store %[[V_18]] to %[[V_21]] : !fir.ref> + ! CHECK: } + ! CHECK: fir.call @_QFf1Ps3(%[[V_2]]) : (!fir.ref, !fir.boxchar<1>>>) -> () + ! CHECK: br ^bb4 + ! CHECK: ^bb4: // 2 preds: ^bb2, ^bb3 + ! CHECK: %[[V_19:[0-9]+]] = fir.emboxchar %[[V_0]], %c5{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> + ! CHECK: return %[[V_19]] : !fir.boxchar<1> + ! CHECK: } + call s2 + if (n2 == 2) return + +! CHECK-LABEL: @_QPf3 +entry f3 + ! CHECK: %[[V_0:[0-9]+]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> + ! CHECK: %[[V_1:[0-9]+]] = fir.alloca i32 {bindc_name = "n1", uniq_name = "_QFf1En1"} + ! CHECK: %[[V_2:[0-9]+]] = fir.alloca i32 {bindc_name = "n2", uniq_name = "_QFf1En2"} + ! CHECK: %[[V_3:[0-9]+]] = fir.alloca tuple, !fir.boxchar<1>> + ! CHECK: %[[V_4:[0-9]+]] = fir.coordinate_of %[[V_3]], %c0{{.*}}_i32 : (!fir.ref, !fir.boxchar<1>>>, i32) -> !fir.ref> + ! CHECK: %[[V_5:[0-9]+]] = fir.emboxchar %[[V_0]], %c5{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> + ! CHECK: fir.store %[[V_5]] to %[[V_4]] : !fir.ref> + ! CHECK: %[[V_6:[0-9]+]] = fir.coordinate_of %[[V_3]], %c1{{.*}}_i32 : (!fir.ref, !fir.boxchar<1>>>, i32) -> !fir.ref> + ! CHECK: %[[V_7:[0-9]+]] = fir.emboxchar %[[V_0]], %c5{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> + ! CHECK: fir.store %[[V_7]] to %[[V_6]] : !fir.ref> + ! CHECK: br ^bb1 + ! CHECK: ^bb1: // pred: ^bb0 + ! CHECK: %[[V_8:[0-9]+]] = fir.address_of(@_QQcl.4320432043) : !fir.ref> + ! CHECK: %[[V_9:[0-9]+]] = arith.cmpi slt, %c5{{.*}}, %c5{{.*}} : index + ! CHECK: %[[V_10:[0-9]+]] = arith.select %[[V_9]], %c5{{.*}}, %c5{{.*}} : index + ! CHECK: %[[V_11:[0-9]+]] = fir.convert %[[V_10]] : (index) -> i64 + ! CHECK: %[[V_12:[0-9]+]] = arith.muli %c1{{.*}}_i64, %[[V_11]] : i64 + ! CHECK: %[[V_13:[0-9]+]] = fir.convert %[[V_0]] : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_14:[0-9]+]] = fir.convert %[[V_8]] : (!fir.ref>) -> !fir.ref + ! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[V_13]], %[[V_14]], %[[V_12]], %false{{.*}}) : (!fir.ref, !fir.ref, i64, i1) -> () + ! CHECK: %[[V_15:[0-9]+]] = arith.subi %c5{{.*}}, %c1{{.*}} : index + ! CHECK: %[[V_16:[0-9]+]] = fir.undefined !fir.char<1> + ! CHECK: %[[V_17:[0-9]+]] = fir.insert_value %[[V_16]], %c32{{.*}}_i8, [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> + ! CHECK: fir.do_loop %arg2 = %[[V_10]] to %[[V_15]] step %c1{{.*}} { + ! CHECK: %[[V_19:[0-9]+]] = fir.convert %[[V_0]] : (!fir.ref>) -> !fir.ref>> + ! CHECK: %[[V_20:[0-9]+]] = fir.coordinate_of %[[V_19]], %arg2 : (!fir.ref>>, index) -> !fir.ref> + ! CHECK: fir.store %[[V_17]] to %[[V_20]] : !fir.ref> + ! CHECK: } + ! CHECK: fir.call @_QFf1Ps3(%[[V_3]]) : (!fir.ref, !fir.boxchar<1>>>) -> () + ! CHECK: br ^bb2 + ! CHECK: ^bb2: // pred: ^bb1 + ! CHECK: %[[V_18:[0-9]+]] = fir.emboxchar %[[V_0]], %c5{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> + ! CHECK: return %[[V_18]] : !fir.boxchar<1> + ! CHECK: } + f3 = "C C C" + call s3 +contains + ! CHECK-LABEL: @_QFf1Ps2 + subroutine s2 + ! CHECK: %[[V_0:[0-9]+]] = fir.coordinate_of %arg0, %c0{{.*}}_i32 : (!fir.ref, !fir.boxchar<1>>>, i32) -> !fir.ref> + ! CHECK: %[[V_1:[0-9]+]] = fir.load %[[V_0]] : !fir.ref> + ! CHECK: %[[V_2:[0-9]+]]:2 = fir.unboxchar %[[V_1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) + ! CHECK: %[[V_3:[0-9]+]] = fir.address_of(@_QQcl.6220622062) : !fir.ref> + ! CHECK: %[[V_4:[0-9]+]] = arith.cmpi slt, %[[V_2]]#1, %c5{{.*}} : index + ! CHECK: %[[V_5:[0-9]+]] = arith.select %[[V_4]], %[[V_2]]#1, %c5{{.*}} : index + ! CHECK: %[[V_6:[0-9]+]] = fir.convert %[[V_5]] : (index) -> i64 + ! CHECK: %[[V_7:[0-9]+]] = arith.muli %c1{{.*}}_i64, %[[V_6]] : i64 + ! CHECK: %[[V_8:[0-9]+]] = fir.convert %[[V_2]]#0 : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_9:[0-9]+]] = fir.convert %[[V_3]] : (!fir.ref>) -> !fir.ref + ! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[V_8]], %[[V_9]], %[[V_7]], %false{{.*}}) : (!fir.ref, !fir.ref, i64, i1) -> () + ! CHECK: %[[V_10:[0-9]+]] = arith.subi %[[V_2]]#1, %c1{{.*}} : index + ! CHECK: %[[V_11:[0-9]+]] = fir.undefined !fir.char<1> + ! CHECK: %[[V_12:[0-9]+]] = fir.insert_value %[[V_11]], %c32{{.*}}_i8, [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> + ! CHECK: fir.do_loop %arg1 = %[[V_5]] to %[[V_10]] step %c1{{.*}} { + ! CHECK: %[[V_13:[0-9]+]] = fir.convert %[[V_2]]#0 : (!fir.ref>) -> !fir.ref>> + ! CHECK: %[[V_14:[0-9]+]] = fir.coordinate_of %[[V_13]], %arg1 : (!fir.ref>>, index) -> !fir.ref> + ! CHECK: fir.store %[[V_12]] to %[[V_14]] : !fir.ref> + ! CHECK: } + ! CHECK: return + ! CHECK: } + f2 = 'b b b' + end + + ! CHECK-LABEL: @_QFf1Ps3 + subroutine s3 + ! CHECK: %[[V_0:[0-9]+]] = fir.coordinate_of %arg0, %c1{{.*}}_i32 : (!fir.ref, !fir.boxchar<1>>>, i32) -> !fir.ref> + ! CHECK: %[[V_1:[0-9]+]] = fir.load %[[V_0]] : !fir.ref> + ! CHECK: %[[V_2:[0-9]+]]:2 = fir.unboxchar %[[V_1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) + ! CHECK: %[[V_3:[0-9]+]] = fir.address_of(@_QQcl.6320632063) : !fir.ref> + ! CHECK: %[[V_4:[0-9]+]] = arith.cmpi slt, %[[V_2]]#1, %c5{{.*}} : index + ! CHECK: %[[V_5:[0-9]+]] = arith.select %[[V_4]], %[[V_2]]#1, %c5{{.*}} : index + ! CHECK: %[[V_6:[0-9]+]] = fir.convert %[[V_5]] : (index) -> i64 + ! CHECK: %[[V_7:[0-9]+]] = arith.muli %c1{{.*}}_i64, %[[V_6]] : i64 + ! CHECK: %[[V_8:[0-9]+]] = fir.convert %[[V_2]]#0 : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_9:[0-9]+]] = fir.convert %[[V_3]] : (!fir.ref>) -> !fir.ref + ! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[V_8]], %[[V_9]], %[[V_7]], %false{{.*}}) : (!fir.ref, !fir.ref, i64, i1) -> () + ! CHECK: %[[V_10:[0-9]+]] = arith.subi %[[V_2]]#1, %c1{{.*}} : index + ! CHECK: %[[V_11:[0-9]+]] = fir.undefined !fir.char<1> + ! CHECK: %[[V_12:[0-9]+]] = fir.insert_value %[[V_11]], %c32{{.*}}_i8, [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> + ! CHECK: fir.do_loop %arg1 = %[[V_5]] to %[[V_10]] step %c1{{.*}} { + ! CHECK: %[[V_13:[0-9]+]] = fir.convert %[[V_2]]#0 : (!fir.ref>) -> !fir.ref>> + ! CHECK: %[[V_14:[0-9]+]] = fir.coordinate_of %[[V_13]], %arg1 : (!fir.ref>>, index) -> !fir.ref> + ! CHECK: fir.store %[[V_12]] to %[[V_14]] : !fir.ref> + ! CHECK: } + ! CHECK: return + ! CHECK: } + f3 = 'c c c' + end +end + +! CHECK-LABEL: func @_QPassumed_size() { +subroutine assumed_size() + real :: x(*) +! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box>> +! CHECK: %[[VAL_1:.*]] = fir.zero_bits !fir.heap> +! CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_1]](%[[VAL_3]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> +! CHECK: fir.store %[[VAL_4]] to %[[VAL_0]] : !fir.ref>>> +! CHECK: br ^bb1 +! CHECK: ^bb1: +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func @_QPentry_with_assumed_size( + entry entry_with_assumed_size(x) +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "x"}) { +! CHECK: br ^bb1 +! CHECK: ^bb1: +! CHECK: return +! CHECK: } +end subroutine From 03095bd97b81b40d74762713b6ed72adb59df658 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 18 May 2022 17:48:34 -0700 Subject: [PATCH 383/908] [flang] Fix crash in semantics after PDT instantiation The code in semantics that reinitializes symbol table pointers in the parse tree of a parameterized derived type prior to a new instantiation of the type was processing the symbols of the derived type instantiation scope in arbitrary address order, which could fail if a reference to a type parameter inherited from an ancestor type was processed prior to the parent component sequence. Fix by instantiating components of PDT instantiations in declaration order. Differential Revision: https://reviews.llvm.org/D126147 --- flang/lib/Semantics/type.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/flang/lib/Semantics/type.cpp b/flang/lib/Semantics/type.cpp index f6888a7198e243..98ad4fd6d0361b 100644 --- a/flang/lib/Semantics/type.cpp +++ b/flang/lib/Semantics/type.cpp @@ -373,8 +373,11 @@ void DerivedTypeSpec::Instantiate(Scope &containingScope) { } void InstantiateHelper::InstantiateComponents(const Scope &fromScope) { - for (const auto &pair : fromScope) { - InstantiateComponent(*pair.second); + // Instantiate symbols in declaration order; this ensures that + // parent components and type parameters of ancestor types exist + // by the time that they're needed. + for (SymbolRef ref : fromScope.GetSymbols()) { + InstantiateComponent(*ref); } ComputeOffsets(context(), scope_); } @@ -396,7 +399,7 @@ class ComponentInitResetHelper { void Post(const parser::Name &name) { if (name.symbol && name.symbol->has()) { - name.symbol = scope_.FindSymbol(name.source); + name.symbol = scope_.FindComponent(name.source); } } From 67be40df6eefe375a0c97341940922f0ef7775a4 Mon Sep 17 00:00:00 2001 From: Sotiris Apostolakis Date: Mon, 23 May 2022 23:04:20 -0400 Subject: [PATCH 384/908] Recommit "[SelectOpti][5/5] Optimize select-to-branch transformation" Use container::size_type directly to avoid type mismatch causing build failures in Windows. Original commit message: This patch optimizes the transformation of selects to a branch when the heuristics deemed it profitable. It aggressively sinks eligible instructions to the newly created true/false blocks to prevent their execution on the common path and interleaves dependence slices to maximize ILP. Depends on D120232 Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D120233 --- llvm/lib/CodeGen/SelectOptimize.cpp | 154 +++++++++++++++++++---- llvm/test/CodeGen/X86/select-optimize.ll | 44 ++++--- 2 files changed, 155 insertions(+), 43 deletions(-) diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 2194da77e0b4c0..9c786b9a5588e0 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -179,8 +179,8 @@ class SelectOptimize : public FunctionPass { // For a given source instruction, collect its backwards dependence slice // consisting of instructions exclusively computed for producing the operands // of the source instruction. - void getExclBackwardsSlice(Instruction *I, - SmallVector &Slice); + void getExclBackwardsSlice(Instruction *I, std::stack &Slice, + bool ForSinking = false); // Returns true if the condition of the select is highly predictable. bool isSelectHighlyPredictable(const SelectInst *SI); @@ -329,6 +329,10 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue, void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { for (SelectGroup &ASI : ProfSIGroups) { + // The code transformation here is a modified version of the sinking + // transformation in CodeGenPrepare::optimizeSelectInst with a more + // aggressive strategy of which instructions to sink. + // // TODO: eliminate the redundancy of logic transforming selects to branches // by removing CodeGenPrepare::optimizeSelectInst and optimizing here // selects for all cases (with and without profile information). @@ -342,13 +346,73 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // start: // %cmp = cmp uge i32 %a, %b // %cmp.frozen = freeze %cmp - // br i1 %cmp.frozen, label %select.end, label %select.false + // br i1 %cmp.frozen, label %select.true, label %select.false + // select.true: + // br label %select.end // select.false: // br label %select.end // select.end: - // %sel = phi i32 [ %c, %start ], [ %d, %select.false ] + // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] // // %cmp should be frozen, otherwise it may introduce undefined behavior. + // In addition, we may sink instructions that produce %c or %d into the + // destination(s) of the new branch. + // If the true or false blocks do not contain a sunken instruction, that + // block and its branch may be optimized away. In that case, one side of the + // first branch will point directly to select.end, and the corresponding PHI + // predecessor block will be the start block. + + // Find all the instructions that can be soundly sunk to the true/false + // blocks. These are instructions that are computed solely for producing the + // operands of the select instructions in the group and can be sunk without + // breaking the semantics of the LLVM IR (e.g., cannot sink instructions + // with side effects). + SmallVector, 2> TrueSlices, FalseSlices; + typedef std::stack::size_type StackSizeType; + StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0; + for (SelectInst *SI : ASI) { + // For each select, compute the sinkable dependence chains of the true and + // false operands. + if (auto *TI = dyn_cast(SI->getTrueValue())) { + std::stack TrueSlice; + getExclBackwardsSlice(TI, TrueSlice, true); + maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); + TrueSlices.push_back(TrueSlice); + } + if (auto *FI = dyn_cast(SI->getFalseValue())) { + std::stack FalseSlice; + getExclBackwardsSlice(FI, FalseSlice, true); + maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); + FalseSlices.push_back(FalseSlice); + } + } + // In the case of multiple select instructions in the same group, the order + // of non-dependent instructions (instructions of different dependence + // slices) in the true/false blocks appears to affect performance. + // Interleaving the slices seems to experimentally be the optimal approach. + // This interleaving scheduling allows for more ILP (with a natural downside + // of increasing a bit register pressure) compared to a simple ordering of + // one whole chain after another. One would expect that this ordering would + // not matter since the scheduling in the backend of the compiler would + // take care of it, but apparently the scheduler fails to deliver optimal + // ILP with a naive ordering here. + SmallVector TrueSlicesInterleaved, FalseSlicesInterleaved; + for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) { + for (auto &S : TrueSlices) { + if (!S.empty()) { + TrueSlicesInterleaved.push_back(S.top()); + S.pop(); + } + } + } + for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) { + for (auto &S : FalseSlices) { + if (!S.empty()) { + FalseSlicesInterleaved.push_back(S.top()); + S.pop(); + } + } + } // We split the block containing the select(s) into two blocks. SelectInst *SI = ASI.front(); @@ -374,24 +438,55 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { } // These are the new basic blocks for the conditional branch. - // For now, no instruction sinking to the true/false blocks. - // Thus both True and False blocks will be empty. + // At least one will become an actual new basic block. BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; - - // Use the 'false' side for a new input value to the PHI. - FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", - EndBlock->getParent(), EndBlock); - auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); - FalseBranch->setDebugLoc(SI->getDebugLoc()); - - // For the 'true' side the path originates from the start block from the - // point view of the new PHI. - TrueBlock = StartBlock; + BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr; + if (!TrueSlicesInterleaved.empty()) { + TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + TrueBranch->setDebugLoc(LastSI->getDebugLoc()); + for (Instruction *TrueInst : TrueSlicesInterleaved) + TrueInst->moveBefore(TrueBranch); + } + if (!FalseSlicesInterleaved.empty()) { + FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(LastSI->getDebugLoc()); + for (Instruction *FalseInst : FalseSlicesInterleaved) + FalseInst->moveBefore(FalseBranch); + } + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new input value to the PHI. + if (TrueBlock == FalseBlock) { + assert(TrueBlock == nullptr && + "Unexpected basic block transform while optimizing select"); + + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(SI->getDebugLoc()); + } // Insert the real conditional branch based on the original condition. + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that side of the branch goes to the end block + // directly and the path originates from the start block from the point of + // view of the new PHI. BasicBlock *TT, *FT; - TT = EndBlock; - FT = FalseBlock; + if (TrueBlock == nullptr) { + TT = EndBlock; + FT = FalseBlock; + TrueBlock = StartBlock; + } else if (FalseBlock == nullptr) { + TT = TrueBlock; + FT = EndBlock; + FalseBlock = StartBlock; + } else { + TT = TrueBlock; + FT = FalseBlock; + } IRBuilder<> IB(SI); auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); @@ -586,12 +681,13 @@ bool SelectOptimize::hasExpensiveColdOperand( HotWeight = TrueWeight; } if (ColdI) { - SmallVector ColdSlice; + std::stack ColdSlice; getExclBackwardsSlice(ColdI, ColdSlice); InstructionCost SliceCost = 0; - for (auto *ColdII : ColdSlice) { - SliceCost += - TTI->getInstructionCost(ColdII, TargetTransformInfo::TCK_Latency); + while (!ColdSlice.empty()) { + SliceCost += TTI->getInstructionCost(ColdSlice.top(), + TargetTransformInfo::TCK_Latency); + ColdSlice.pop(); } // The colder the cold value operand of the select is the more expensive // the cmov becomes for computing the cold value operand every time. Thus, @@ -613,8 +709,9 @@ bool SelectOptimize::hasExpensiveColdOperand( // (sufficiently-accurate in practice), we populate this set with the // instructions of the backwards dependence slice that only have one-use and // form an one-use chain that leads to the source instruction. -void SelectOptimize::getExclBackwardsSlice( - Instruction *I, SmallVector &Slice) { +void SelectOptimize::getExclBackwardsSlice(Instruction *I, + std::stack &Slice, + bool ForSinking) { SmallPtrSet Visited; std::queue Worklist; Worklist.push(I); @@ -630,13 +727,20 @@ void SelectOptimize::getExclBackwardsSlice( if (!II->hasOneUse()) continue; + // Cannot soundly sink instructions with side-effects. + // Terminator or phi instructions cannot be sunk. + // Avoid sinking other select instructions (should be handled separetely). + if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || + isa(II) || isa(II))) + continue; + // Avoid considering instructions with less frequency than the source // instruction (i.e., avoid colder code regions of the dependence slice). if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) continue; // Eligible one-use instruction added to the dependence slice. - Slice.push_back(II); + Slice.push(II); // Explore all the operands of the current instruction to expand the slice. for (unsigned k = 0; k < II->getNumOperands(); ++k) diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll index ee267c80e7a5ec..f2dff8cf8c9f15 100644 --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -105,20 +105,28 @@ define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { ; If select group predictable, turn it into a branch. define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { ; CHECK-LABEL: @weighted_select_group( +; CHECK-NEXT: [[A1:%.*]] = add i32 [[A:%.*]], 1 ; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] -; CHECK: select.false: +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !prof [[PROF16]] +; CHECK: select.true.sink: +; CHECK-NEXT: [[C1:%.*]] = add i32 [[C:%.*]], 1 +; CHECK-NEXT: br label [[SELECT_END:%.*]] +; CHECK: select.false.sink: +; CHECK-NEXT: [[B1:%.*]] = add i32 [[B:%.*]], 1 ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] -; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[A]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A1]], [[SELECT_TRUE_SINK]] ], [ [[B1]], [[SELECT_FALSE_SINK]] ] +; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C1]], [[SELECT_TRUE_SINK]] ], [ [[A1]], [[SELECT_FALSE_SINK]] ] ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[ADD]] ; - %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !15 + %a1 = add i32 %a, 1 + %b1 = add i32 %b, 1 + %c1 = add i32 %c, 1 + %sel1 = select i1 %cmp, i32 %a1, i32 %b1, !prof !15 call void @llvm.dbg.value(metadata i32 %sel1, metadata !24, metadata !DIExpression()), !dbg !DILocation(scope: !23) - %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !15 + %sel2 = select i1 %cmp, i32 %c1, i32 %a1, !prof !15 %add = add i32 %sel1, %sel2 ret i32 %add } @@ -151,13 +159,13 @@ define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { ; sink load define i32 @expensive_val_operand1(i32* nocapture %a, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand1( -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] -; CHECK: select.false: +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] +; CHECK: select.true.sink: +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] ; CHECK-NEXT: ret i32 [[SEL]] ; %load = load i32, i32* %a, align 8 @@ -181,14 +189,14 @@ define i32 @expensive_val_operand2(i32* nocapture %a, i32 %x, i1 %cmp) { ; into a branch with sinked dependence slice. define i32 @expensive_val_operand3(i32* nocapture %a, i32 %b, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand3( +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] +; CHECK: select.true.sink: ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] -; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] ; CHECK-NEXT: ret i32 [[SEL]] ; %load = load i32, i32* %a, align 8 @@ -242,14 +250,14 @@ define double @cmov_on_critical_path(i32 %n, double %x, double* nocapture %a) { ; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[R:%.*]] = load double, double* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] ; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]] ; CHECK-NEXT: [[X2_FROZEN:%.*]] = freeze i1 [[CMP2]] -; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]], !prof [[PROF27:![0-9]+]] -; CHECK: select.false: +; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]] +; CHECK: select.true.sink: +; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: -; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[FOR_BODY]] ], [ [[X1]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[SELECT_TRUE_SINK]] ], [ [[X1]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] From 676eaa2ca967ca6ad4a84d31d6f0ebabdcf3e44b Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Tue, 24 May 2022 11:11:31 -0700 Subject: [PATCH 385/908] [compiler-rt][scudo] Add missing preprocessor token This should fix build errors seen on bots like https://lab.llvm.org/buildbot/#/builders/57/builds/18263. --- compiler-rt/lib/scudo/standalone/memtag.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h index 13eb4bb8d22775..d65e76ad4ffcdf 100644 --- a/compiler-rt/lib/scudo/standalone/memtag.h +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -23,8 +23,8 @@ namespace scudo { // support for memory tagging if the operating system enables TBI. // HWASan uses the top byte for its own purpose and Scudo should not touch it. #if (__clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__) && \ - SCUDO_LINUX && \ - !defined(SCUDO_DISABLE_TBI) !__has_feature(hwaddress_sanitizer)) || \ + SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) && \ + !__has_feature(hwaddress_sanitizer)) || \ defined(SCUDO_FUZZ) inline constexpr bool archSupportsMemoryTagging() { return true; } From 6edbdf80cac119f8f30d2ae6fa2972d9e778510b Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Tue, 24 May 2022 11:29:44 -0700 Subject: [PATCH 386/908] Revert "[compiler-rt][scudo] Add missing preprocessor token" and "[compiler-rt][scudo] Simplify TBI checks" This reverts commit 676eaa2ca967ca6ad4a84d31d6f0ebabdcf3e44b and f6038cdca03115da22b9e6ada5c25de4df5f42d2 since builders are still broken. --- compiler-rt/lib/scudo/standalone/memtag.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h index d65e76ad4ffcdf..7f14a30fee12b3 100644 --- a/compiler-rt/lib/scudo/standalone/memtag.h +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -18,16 +18,19 @@ namespace scudo { +#if (__clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__)) || \ + defined(SCUDO_FUZZ) + // We assume that Top-Byte Ignore is enabled if the architecture supports memory // tagging. Not all operating systems enable TBI, so we only claim architectural // support for memory tagging if the operating system enables TBI. // HWASan uses the top byte for its own purpose and Scudo should not touch it. -#if (__clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__) && \ - SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) && \ - !__has_feature(hwaddress_sanitizer)) || \ - defined(SCUDO_FUZZ) - +#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) && \ + !__has_feature(hwaddress_sanitizer) inline constexpr bool archSupportsMemoryTagging() { return true; } +#else +inline constexpr bool archSupportsMemoryTagging() { return false; } +#endif inline constexpr uptr archMemoryTagGranuleSize() { return 16; } From 05527b68a0be19fe49b884f3b22195701dc4b46f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 24 May 2022 15:07:24 -0400 Subject: [PATCH 387/908] [InstCombine] fold more shuffles with FP<->Int cast operands shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask) This extends the transform added with 0353c2c996c5. If the shuffle reduces vector length, the transform reduces the width of the cast, so that should be a win for most codegen (if not, it can be inverted). --- .../Transforms/InstCombine/InstCombineVectorOps.cpp | 5 +++-- llvm/test/Transforms/InstCombine/vec_shuffle.ll | 13 ++++--------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index c5bdfa43f74913..7d616278e104a3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2302,8 +2302,9 @@ static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf, VectorType *ShufOpTy = cast(Shuf.getOperand(0)->getType()); VectorType *CastSrcTy = cast(Cast0->getSrcTy()); - // TODO: Allow length-changing shuffles? - if (ShufTy != ShufOpTy) + // TODO: Allow length-increasing shuffles? + if (ShufTy->getElementCount().getKnownMinValue() > + ShufOpTy->getElementCount().getKnownMinValue()) return nullptr; // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)? diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 20dab18266d0ed..0084e77787a91d 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -2075,8 +2075,6 @@ define <3 x i16> @fptoui_shuf_different_source_types(<3 x float> %x, <3 x half> ret <3 x i16> %r } -; negative test - must have same size elements - define <4 x i32> @fptoui_shuf_widen_elts(<4 x half> %x, <4 x half> %y) { ; CHECK-LABEL: @fptoui_shuf_widen_elts( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[X:%.*]], <4 x half> [[Y:%.*]], <4 x i32> @@ -2089,7 +2087,7 @@ define <4 x i32> @fptoui_shuf_widen_elts(<4 x half> %x, <4 x half> %y) { ret <4 x i32> %r } -; negative test - must have same size elements +; negative test - must have same or smaller size source elements define <4 x float> @sitofp_shuf_narrow_elts(<4 x i64> %x, <4 x i64> %y) { ; CHECK-LABEL: @sitofp_shuf_narrow_elts( @@ -2172,7 +2170,7 @@ define <4 x i32> @fptoi_shuf(<4 x float> %x, <4 x float> %y) { ret <4 x i32> %r } -; negative test - length-changing shuffle +; negative test - length-increasing shuffle define <4 x float> @sitofp_shuf_widen(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @sitofp_shuf_widen( @@ -2187,13 +2185,10 @@ define <4 x float> @sitofp_shuf_widen(<2 x i32> %x, <2 x i32> %y) { ret <4 x float> %r } -; negative test - length-changing shuffle - define <2 x float> @uitofp_shuf_narrow(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @uitofp_shuf_narrow( -; CHECK-NEXT: [[NX:%.*]] = uitofp <4 x i32> [[X:%.*]] to <4 x float> -; CHECK-NEXT: [[NY:%.*]] = uitofp <4 x i32> [[Y:%.*]] to <4 x float> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <2 x i32> +; CHECK-NEXT: [[R:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[R]] ; %nx = uitofp <4 x i32> %x to <4 x float> From 9df0568b073307dd9390e7d670873e40b65c9f95 Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Tue, 24 May 2022 09:16:30 -0700 Subject: [PATCH 388/908] [SLP] Fix crash caused by reorderBottomToTop(). The crash is caused by incorrect order set by reorderBottomToTop(), which happens when it is reordering a TreeEntry which has a user that has already been reordered earlier. Please see the detailed description in the lit test. Differential Revision: https://reviews.llvm.org/D126099 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 10 +- .../X86/reorder_with_reordered_users.ll | 133 ++++++++++++++++++ 2 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a8de0bffe8d527..d74153c4c6368c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3858,7 +3858,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { while (!OrderedEntries.empty()) { // 1. Filter out only reordered nodes. // 2. If the entry has multiple uses - skip it and jump to the next node. - MapVector>> Users; + DenseMap>> Users; SmallVector Filtered; for (TreeEntry *TE : OrderedEntries) { if (!(TE->State == TreeEntry::Vectorize || @@ -3886,7 +3886,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { // Erase filtered entries. for_each(Filtered, [&OrderedEntries](TreeEntry *TE) { OrderedEntries.remove(TE); }); - for (auto &Data : Users) { + SmallVector< + std::pair>>> + UsersVec(Users.begin(), Users.end()); + sort(UsersVec, [](const auto &Data1, const auto &Data2) { + return Data1.first->Idx > Data2.first->Idx; + }); + for (auto &Data : UsersVec) { // Check that operands are used only in the User node. SmallVector GatherOps; if (!canReorderOperands(Data.first, Data.second, NonVectorized, diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll new file mode 100644 index 00000000000000..822a300be721e8 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll @@ -0,0 +1,133 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-grtev4-linux-gnu -S | FileCheck %s + +; This checks that reorderBottomToTop() can handle reordering of a TreeEntry +; which has a user TreeEntry that has already been reordered. +; Here is when the crash occurs: +; +; (N4)OrderB +; | +; (N1)OrderA (N2)OrderA (N3)NoOrder +; \ | / +; (Phi)NoOrder +; +; 1. Phi is visited along with its operands (N1,N2,N3). BestOrder is "OrderA". +; 2. Phi along with all its operands (N1,N2,N3) are reordered. The result is: +; +; (N4)OrderB +; | +; (N1)NoOrder (N2)NoOrder (N3)OrderA +; \ | / +; (Phi)OrderA +; +; 3. N3 is now visited along with its operand N4. BestOrder is "OrderB". +; 4. N3 and N4 are reordered. The result is this: +; +; (N4)NoOrder +; | +; (N1)NoOrder (N2)NoOrder (N3)OrderB +; \ | / +; (Phi)OrderA +; +; At this point there is a discrepancy between Phi's Operand 2 which are +; reordered based on OrderA and N3's OrderB. This results in a crash in +; vectorizeTree() on its way from N3 back to the Phi. The reason is that +; N3->isSame(Phi's operand 2) returns false and vectorizeTree() skips N3. +; +; This patch changes the order in which the nodes are visited to bottom-up, +; which fixes the issue. +; +; NOTE: The crash shows up when reorderTopToBottom() does not reorder the tree, +; so to simulate this we add external store users. Alternatively one can +; comment out reorderTopToBottom() and remove the stores. + + +define void @reorder_crash(float* %ptr) { +; CHECK-LABEL: @reorder_crash( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0 +; CHECK-NEXT: br i1 undef, label [[BB0:%.*]], label [[BB12:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4 +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb12: +; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ [[TMP1]], [[BB0]] ], [ [[TMP4]], [[BB1]] ], [ [[SHUFFLE]], [[BB2]] ] +; CHECK-NEXT: ret void +; +entry: + %gep0 = getelementptr inbounds float, float* %ptr, i64 0 + %gep1 = getelementptr inbounds float, float* %ptr, i64 1 + %gep2 = getelementptr inbounds float, float* %ptr, i64 2 + %gep3 = getelementptr inbounds float, float* %ptr, i64 3 + br i1 undef, label %bb0, label %bb12 + +bb0: + ; Used by phi in this order: 1, 0, 2, 3 + %ld00 = load float, float* %gep0 + %ld01 = load float, float* %gep1 + %ld02 = load float, float* %gep2 + %ld03 = load float, float* %gep3 + + ; External store users in natural order 0, 1, 2, 3 + store float %ld00, float *%gep0 + store float %ld01, float *%gep1 + store float %ld02, float *%gep2 + store float %ld03, float *%gep3 + br label %bb3 + +bb12: + br i1 undef, label %bb1, label %bb2 + +bb1: + ; Used by phi in this order: 1, 0, 2, 3 + %ld10 = load float, float* %gep0 + %ld11 = load float, float* %gep1 + %ld12 = load float, float* %gep2 + %ld13 = load float, float* %gep3 + + ; External store users in natural order 0, 1, 2, 3 + store float %ld10, float *%gep0 + store float %ld11, float *%gep1 + store float %ld12, float *%gep2 + store float %ld13, float *%gep3 + + br label %bb3 + +bb2: + ; Used by fadd in this order: 2, 3, 0, 1 + %ld20 = load float, float* %gep0 + %ld21 = load float, float* %gep1 + %ld22 = load float, float* %gep2 + %ld23 = load float, float* %gep3 + + ; Used by phi in this order: 0, 1, 2, 3 + %add20 = fadd float %ld22, 0.0 + %add21 = fadd float %ld23, 0.0 + %add22 = fadd float %ld20, 0.0 + %add23 = fadd float %ld21, 0.0 + br label %bb3 + +bb3: + %phi0 = phi float [ %ld01, %bb0 ], [ %ld11, %bb1 ], [ %add20, %bb2 ] + %phi1 = phi float [ %ld00, %bb0 ], [ %ld10, %bb1 ], [ %add21, %bb2 ] + %phi2 = phi float [ %ld02, %bb0 ], [ %ld12, %bb1 ], [ %add22, %bb2 ] + %phi3 = phi float [ %ld03, %bb0 ], [ %ld13, %bb1 ], [ %add23, %bb2 ] + ret void +} From c428620913ba318a7d6d2a2164b56ff116f90c56 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 18 May 2022 15:44:01 -0700 Subject: [PATCH 389/908] [flang] Catch calls to assumed-length character functions Semantics was allowing calls to CHARACTER(*) functions, which are odd things -- they can be declared, and passed around, but can never actually be called as such. They must be redeclared with an explicit length that ends up being passed as a hidden argument. So check for these calls and diagnose them, add tests, and clean up some existing tests that were in error and now get caught. Possible TODO for lowering: there were some test cases that used bad calls to assumed-length CHARACTER*(*) functions and validated their implementations. I've removed some, and adjusted another, but the code that somehow implemented these calls may need to be removed and replaced with an assert about bad semantics. Differential Revision: https://reviews.llvm.org/D126148 --- flang/lib/Semantics/expression.cpp | 19 ++-- flang/test/Evaluate/rewrite01.f90 | 14 ++- .../test/Lower/dummy-procedure-character.f90 | 43 -------- flang/test/Lower/dummy-procedure-in-entry.f90 | 40 -------- flang/test/Lower/host-associated.f90 | 99 +++++++++---------- flang/test/Semantics/call01.f90 | 27 +++++ 6 files changed, 90 insertions(+), 152 deletions(-) diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index da45ef4f163dcf..c1417b65372016 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -2266,6 +2266,7 @@ void ExpressionAnalyzer::CheckForBadRecursion( msg = Say("NON_RECURSIVE procedure '%s' cannot call itself"_err_en_US, callSite); } else if (IsAssumedLengthCharacter(proc) && IsExternal(proc)) { + // TODO: Also catch assumed PDT type parameters msg = Say( // 15.6.2.1(3) "Assumed-length CHARACTER(*) function '%s' cannot call itself"_err_en_US, callSite); @@ -2516,17 +2517,19 @@ std::optional ExpressionAnalyzer::CheckCall( DEREF(proc.GetSymbol()).name()); } // Checks for ASSOCIATED() are done in intrinsic table processing - bool procIsAssociated{false}; - if (const SpecificIntrinsic * - specificIntrinsic{proc.GetSpecificIntrinsic()}) { - if (specificIntrinsic->name == "associated") { - procIsAssociated = true; - } - } + const SpecificIntrinsic *specificIntrinsic{proc.GetSpecificIntrinsic()}; + bool procIsAssociated{ + specificIntrinsic && specificIntrinsic->name == "associated"}; if (!procIsAssociated) { + if (chars->functionResult && + chars->functionResult->IsAssumedLengthCharacter() && + !specificIntrinsic) { + Say(callSite, + "Assumed-length character function must be defined with a length to be called"_err_en_US); + } semantics::CheckArguments(*chars, arguments, GetFoldingContext(), context_.FindScope(callSite), treatExternalAsImplicit, - proc.GetSpecificIntrinsic()); + specificIntrinsic); const Symbol *procSymbol{proc.GetSymbol()}; if (procSymbol && !IsPureProcedure(*procSymbol)) { if (const semantics::Scope * diff --git a/flang/test/Evaluate/rewrite01.f90 b/flang/test/Evaluate/rewrite01.f90 index 94c43d95ab27a6..b95c1b2de85489 100644 --- a/flang/test/Evaluate/rewrite01.f90 +++ b/flang/test/Evaluate/rewrite01.f90 @@ -105,11 +105,9 @@ subroutine lbound_test(x, n, m) !CHECK: len_test subroutine len_test(a,b, c, d, e, n, m) character(*), intent(in) :: a - character(*) :: b + character(10) :: b external b character(10), intent(in) :: c - character(10) :: d - external d integer, intent(in) :: n, m character(n), intent(in) :: e @@ -117,9 +115,9 @@ subroutine len_test(a,b, c, d, e, n, m) print *, len(a, kind=8) !CHECK: PRINT *, 5_4 print *, len(a(1:5)) - !CHECK: PRINT *, len(b(a)) + !CHECK: PRINT *, 10_4 print *, len(b(a)) - !CHECK: PRINT *, len(b(a)//a) + !CHECK: PRINT *, int(10_8+int(a%len,kind=8),kind=4) print *, len(b(a) // a) !CHECK: PRINT *, 10_4 print *, len(c) @@ -128,14 +126,14 @@ subroutine len_test(a,b, c, d, e, n, m) !CHECK: PRINT *, 5_4 print *, len(c(1:5)) !CHECK: PRINT *, 10_4 - print *, len(d(c)) + print *, len(b(c)) !CHECK: PRINT *, 20_4 - print *, len(d(c) // c) + print *, len(b(c) // c) !CHECK: PRINT *, 0_4 print *, len(a(10:4)) !CHECK: PRINT *, int(max(0_8,int(m,kind=8)-int(n,kind=8)+1_8),kind=4) print *, len(a(n:m)) - !CHECK: PRINT *, len(b(a(int(n,kind=8):int(m,kind=8)))) + !CHECK: PRINT *, 10_4 print *, len(b(a(n:m))) !CHECK: PRINT *, int(max(0_8,max(0_8,int(n,kind=8))-4_8+1_8),kind=4) print *, len(e(4:)) diff --git a/flang/test/Lower/dummy-procedure-character.f90 b/flang/test/Lower/dummy-procedure-character.f90 index 8eabf6df418bca..e2874cb018b81c 100644 --- a/flang/test/Lower/dummy-procedure-character.f90 +++ b/flang/test/Lower/dummy-procedure-character.f90 @@ -143,21 +143,6 @@ subroutine override_incoming_length(bar7) ! Test calling character dummy function ! ----------------------------------------------------------------------------- -! CHECK-LABEL: func @_QPcall_assumed_length -! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> {fir.char_proc}) { -subroutine call_assumed_length(bar8) - character(*) :: bar8 - external :: bar8 -! CHECK: %[[VAL_3:.*]] = fir.extract_value %[[VAL_0]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> -! CHECK: %[[WAL_2:.*]] = fir.box_addr %[[VAL_3]] : (!fir.boxproc<() -> ()>) -> (() -> ()) -! CHECK: %[[VAL_4:.*]] = fir.extract_value %[[VAL_0]], [1 : index] : (tuple ()>, i64>) -> i64 -! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_4]] : i64) {bindc_name = ".result"} -! CHECK: %[[VAL_7:.*]] = fir.convert %[[WAL_2]] : (() -> ()) -> ((!fir.ref>, index, !fir.ref) -> !fir.boxchar<1>) -! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_4]] : (i64) -> index -! CHECK: fir.call %[[VAL_7]](%[[VAL_6]], %[[VAL_8]], %{{.*}}) : (!fir.ref>, index, !fir.ref) -> !fir.boxchar<1> - call test(bar8(42)) -end subroutine - ! CHECK-LABEL: func @_QPcall_explicit_length ! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> {fir.char_proc}) { subroutine call_explicit_length(bar9) @@ -196,34 +181,6 @@ function bar10(n) call test(bar10(42_8)) end subroutine - -! CHECK-LABEL: func @_QPhost( -! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> -subroutine host(f) - character*(*) :: f - external :: f - ! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1:.*]], %{{.*}} : (!fir.ref ()>, i64>>>, i32) -> !fir.ref ()>, i64>> - ! CHECK: fir.store %[[VAL_0]] to %[[VAL_3]] : !fir.ref ()>, i64>> - ! CHECK: fir.call @_QFhostPintern(%[[VAL_1]]) - call intern() -contains -! CHECK-LABEL: func @_QFhostPintern( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ()>, i64>>> {fir.host_assoc}) - subroutine intern() -! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 -! CHECK: %[[VAL_2:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_1]] : (!fir.ref ()>, i64>>>, i32) -> !fir.ref ()>, i64>> -! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]] : !fir.ref ()>, i64>> -! CHECK: %[[VAL_4:.*]] = fir.extract_value %[[VAL_3]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> -! CHECK: %[[WAL_1:.*]] = fir.box_addr %[[VAL_4]] : (!fir.boxproc<() -> ()>) -> (() -> ()) -! CHECK: %[[VAL_5:.*]] = fir.extract_value %[[VAL_3]], [1 : index] : (tuple ()>, i64>) -> i64 -! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_5]] : i64) {bindc_name = ".result"} -! CHECK: %[[VAL_8:.*]] = fir.convert %[[WAL_1]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) -! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_5]] : (i64) -> index -! CHECK: fir.call %[[VAL_8]](%[[VAL_7]], %[[VAL_9]]) : (!fir.ref>, index) -> !fir.boxchar<1> - call test(f()) - end subroutine -end subroutine - ! CHECK-LABEL: func @_QPhost2( ! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> {fir.char_proc}) subroutine host2(f) diff --git a/flang/test/Lower/dummy-procedure-in-entry.f90 b/flang/test/Lower/dummy-procedure-in-entry.f90 index 7e9cacc5228632..07051d56e06b77 100644 --- a/flang/test/Lower/dummy-procedure-in-entry.f90 +++ b/flang/test/Lower/dummy-procedure-in-entry.f90 @@ -48,43 +48,3 @@ subroutine subroutine_dummy() ! CHECK: ^bb1: ! CHECK: %[[VAL_1:.*]] = fir.box_addr %[[VAL_0]] : (!fir.boxproc<() -> ()>) -> (() -> ()) ! CHECK: fir.call %[[VAL_1]]() : () -> () - -subroutine character_dummy() - external :: c - character(*) :: c - entry character_dummy_entry(c) - call takes_char(c()) -end subroutine -! CHECK-LABEL: func @_QPcharacter_dummy() { -! CHECK: %[[VAL_0:.*]] = fir.undefined tuple ()>, i64> -! CHECK: br ^bb1 -! CHECK: ^bb1: -! CHECK: %[[VAL_1:.*]] = fir.extract_value %[[VAL_0]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> -! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.boxproc<() -> ()>) -> (() -> ()) -! CHECK: %[[VAL_3:.*]] = fir.extract_value %[[VAL_0]], [1 : index] : (tuple ()>, i64>) -> i64 -! CHECK: %[[VAL_4:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref -! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_3]] : i64) {bindc_name = ".result"} -! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) -! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_3]] : (i64) -> index -! CHECK: %[[VAL_8:.*]] = fir.call %[[VAL_6]](%[[VAL_5]], %[[VAL_7]]) : (!fir.ref>, index) -> !fir.boxchar<1> -! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_3]] : (i64) -> index -! CHECK: %[[VAL_10:.*]] = fir.emboxchar %[[VAL_5]], %[[VAL_9]] : (!fir.ref>, index) -> !fir.boxchar<1> -! CHECK: fir.call @_QPtakes_char(%[[VAL_10]]) : (!fir.boxchar<1>) -> () -! CHECK: fir.call @llvm.stackrestore(%[[VAL_4]]) : (!fir.ref) -> () - -! CHECK-LABEL: func @_QPcharacter_dummy_entry( -! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> {fir.char_proc}) { -! CHECK: br ^bb1 -! CHECK: ^bb1: -! CHECK: %[[VAL_1:.*]] = fir.extract_value %[[VAL_0]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> -! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.boxproc<() -> ()>) -> (() -> ()) -! CHECK: %[[VAL_3:.*]] = fir.extract_value %[[VAL_0]], [1 : index] : (tuple ()>, i64>) -> i64 -! CHECK: %[[VAL_4:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref -! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_3]] : i64) {bindc_name = ".result"} -! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) -! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_3]] : (i64) -> index -! CHECK: %[[VAL_8:.*]] = fir.call %[[VAL_6]](%[[VAL_5]], %[[VAL_7]]) : (!fir.ref>, index) -> !fir.boxchar<1> -! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_3]] : (i64) -> index -! CHECK: %[[VAL_10:.*]] = fir.emboxchar %[[VAL_5]], %[[VAL_9]] : (!fir.ref>, index) -> !fir.boxchar<1> -! CHECK: fir.call @_QPtakes_char(%[[VAL_10]]) : (!fir.boxchar<1>) -> () -! CHECK: fir.call @llvm.stackrestore(%[[VAL_4]]) : (!fir.ref) -> () diff --git a/flang/test/Lower/host-associated.f90 b/flang/test/Lower/host-associated.f90 index a2c7ef10ed58a7..9654ddbb49a8a9 100644 --- a/flang/test/Lower/host-associated.f90 +++ b/flang/test/Lower/host-associated.f90 @@ -579,57 +579,50 @@ end subroutine test_proc_dummy_other ! CHECK: %[[VAL_10:.*]] = fir.address_of(@_QQcl.{{.*}}) : !fir.ref> ! CHECK: %[[VAL_11:.*]] = fir.extract_value %[[VAL_2]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> ! CHECK: %[[VAL_12:.*]] = fir.box_addr %[[VAL_11]] : (!fir.boxproc<() -> ()>) -> (() -> ()) -! CHECK: %[[VAL_13:.*]] = fir.extract_value %[[VAL_2]], [1 : index] : (tuple ()>, i64>) -> i64 -! CHECK: %[[VAL_14:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref -! CHECK: %[[VAL_15:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_13]] : i64) {bindc_name = ".result"} -! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_12]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) -! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_13]] : (i64) -> index -! CHECK: %[[VAL_18:.*]] = fir.call %[[VAL_16]](%[[VAL_15]], %[[VAL_17]]) : (!fir.ref>, index) -> !fir.boxchar<1> -! CHECK: %[[VAL_19:.*]] = arith.addi %[[VAL_17]], %[[VAL_4]] : index -! CHECK: %[[VAL_20:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_19]] : index) {bindc_name = ".chrtmp"} -! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_4]] : (index) -> i64 -! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_20]] : (!fir.ref>) -> !fir.ref -! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_10]] : (!fir.ref>) -> !fir.ref -! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[VAL_22]], %[[VAL_23]], %[[VAL_21]], %[[VAL_5]]) : (!fir.ref, !fir.ref, i64, i1) -> () -! CHECK: br ^bb1(%[[VAL_4]], %[[VAL_17]] : index, index) -! CHECK: ^bb1(%[[VAL_24:.*]]: index, %[[VAL_25:.*]]: index): -! CHECK: %[[VAL_26:.*]] = arith.cmpi sgt, %[[VAL_25]], %[[VAL_8]] : index -! CHECK: cond_br %[[VAL_26]], ^bb2, ^bb3 -! CHECK: ^bb2: -! CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_24]], %[[VAL_4]] : index -! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_15]] : (!fir.ref>) -> !fir.ref>> -! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], %[[VAL_27]] : (!fir.ref>>, index) -> !fir.ref> -! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref> -! CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_20]] : (!fir.ref>) -> !fir.ref>> -! CHECK: %[[VAL_32:.*]] = fir.coordinate_of %[[VAL_31]], %[[VAL_24]] : (!fir.ref>>, index) -> !fir.ref> -! CHECK: fir.store %[[VAL_30]] to %[[VAL_32]] : !fir.ref> -! CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_24]], %[[VAL_6]] : index -! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_25]], %[[VAL_6]] : index -! CHECK: br ^bb1(%[[VAL_33]], %[[VAL_34]] : index, index) -! CHECK: ^bb3: -! CHECK: %[[VAL_35:.*]] = arith.cmpi slt, %[[VAL_3]], %[[VAL_19]] : index -! CHECK: %[[VAL_36:.*]] = arith.select %[[VAL_35]], %[[VAL_3]], %[[VAL_19]] : index -! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (index) -> i64 -! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_9]] : (!fir.ref>) -> !fir.ref -! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[VAL_38]], %[[VAL_22]], %[[VAL_37]], %[[VAL_5]]) : (!fir.ref, !fir.ref, i64, i1) -> () -! CHECK: %[[VAL_39:.*]] = fir.undefined !fir.char<1> -! CHECK: %[[VAL_40:.*]] = fir.insert_value %[[VAL_39]], %[[VAL_7]], [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> -! CHECK: %[[VAL_41:.*]] = arith.subi %[[VAL_3]], %[[VAL_36]] : index -! CHECK: br ^bb4(%[[VAL_36]], %[[VAL_41]] : index, index) -! CHECK: ^bb4(%[[VAL_42:.*]]: index, %[[VAL_43:.*]]: index): -! CHECK: %[[VAL_44:.*]] = arith.cmpi sgt, %[[VAL_43]], %[[VAL_8]] : index -! CHECK: cond_br %[[VAL_44]], ^bb5, ^bb6 -! CHECK: ^bb5: -! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_9]] : (!fir.ref>) -> !fir.ref>> -! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_45]], %[[VAL_42]] : (!fir.ref>>, index) -> !fir.ref> -! CHECK: fir.store %[[VAL_40]] to %[[VAL_46]] : !fir.ref> -! CHECK: %[[VAL_47:.*]] = arith.addi %[[VAL_42]], %[[VAL_6]] : index -! CHECK: %[[VAL_48:.*]] = arith.subi %[[VAL_43]], %[[VAL_6]] : index -! CHECK: br ^bb4(%[[VAL_47]], %[[VAL_48]] : index, index) -! CHECK: ^bb6: -! CHECK: fir.call @llvm.stackrestore(%[[VAL_14]]) : (!fir.ref) -> () -! CHECK: %[[VAL_49:.*]] = fir.emboxchar %[[VAL_9]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> -! CHECK: return %[[VAL_49]] : !fir.boxchar<1> +! CHECK: %[[VAL_13:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_12]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) +! CHECK: %[[VAL_15:.*]] = fir.call %[[VAL_14]](%0, %c10) : (!fir.ref>, index) -> !fir.boxchar<1> +! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.char<1,?>(%c22 : index) {bindc_name = ".chrtmp"} +! CHECK: %[[VAL_17:.*]] = fir.convert %c12 : (index) -> i64 +! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.convert %2 : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[VAL_18]], %[[VAL_19]], %[[VAL_17]], %false) : (!fir.ref, !fir.ref, i64, i1) -> () +! CHECK: cf.br ^bb1(%c12, %c10 : index, index) +! CHECK: ^bb1(%[[VAL_20:.*]]: index, %[[VAL_21:.*]]: index): // 2 preds: ^bb0, ^bb2 +! CHECK: %[[VAL_22:.*]] = arith.cmpi sgt, %[[VAL_21]], %c0 : index +! CHECK: cf.cond_br %[[VAL_22]], ^bb2, ^bb3 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_20]], %c12 : index +! CHECK: %[[VAL_24:.*]] = fir.convert %0 : (!fir.ref>) -> !fir.ref>> +! CHECK: %[[VAL_25:.*]] = fir.coordinate_of %[[VAL_24]], %[[VAL_23]] : (!fir.ref>>, index) -> !fir.ref> +! CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_25]] : !fir.ref> +! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_16]] : (!fir.ref>) -> !fir.ref>> +! CHECK: %[[VAL_28:.*]] = fir.coordinate_of %[[VAL_27]], %[[VAL_20]] : (!fir.ref>>, index) -> !fir.ref> +! CHECK: fir.store %[[VAL_26]] to %[[VAL_28]] : !fir.ref> +! CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_20]], %c1 : index +! CHECK: %[[VAL_30:.*]] = arith.subi %[[VAL_21]], %c1 : index +! CHECK: cf.br ^bb1(%[[VAL_29]], %[[VAL_30]] : index, index) +! CHECK: ^bb3: // pred: ^bb1 +! CHECK: %[[VAL_31:.*]] = fir.convert %c22 : (index) -> i64 +! CHECK: %[[VAL_32:.*]] = fir.convert %1 : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[VAL_32]], %[[VAL_18]], %[[VAL_31]], %false) : (!fir.ref, !fir.ref, i64, i1) -> () +! CHECK: %[[VAL_33:.*]] = fir.undefined !fir.char<1> +! CHECK: %[[VAL_34:.*]] = fir.insert_value %[[VAL_33]], %c32_i8, [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> +! CHECK: cf.br ^bb4(%c22, %c18 : index, index) +! CHECK: ^bb4(%[[VAL_35:.*]]: index, %[[VAL_36:.*]]: index): // 2 preds: ^bb3, ^bb5 +! CHECK: %[[VAL_37:.*]] = arith.cmpi sgt, %[[VAL_36]], %c0 : index +! CHECK: cf.cond_br %[[VAL_37]], ^bb5, ^bb6 +! CHECK: ^bb5: // pred: ^bb4 +! CHECK: %[[VAL_38:.*]] = fir.convert %1 : (!fir.ref>) -> !fir.ref>> +! CHECK: %[[VAL_39:.*]] = fir.coordinate_of %[[VAL_38]], %[[VAL_35]] : (!fir.ref>>, index) -> !fir.ref> +! CHECK: fir.store %[[VAL_34]] to %[[VAL_39]] : !fir.ref> +! CHECK: %[[VAL_40:.*]] = arith.addi %[[VAL_35]], %c1 : index +! CHECK: %[[VAL_41:.*]] = arith.subi %[[VAL_36]], %c1 : index +! CHECK: cf.br ^bb4(%[[VAL_40]], %[[VAL_41]] : index, index) +! CHECK: ^bb6: // pred: ^bb4 +! CHECK: fir.call @llvm.stackrestore(%[[VAL_13]]) : (!fir.ref) -> () +! CHECK: %[[VAL_42:.*]] = fir.emboxchar %1, %c40 : (!fir.ref>, index) -> !fir.boxchar<1> +! CHECK: return %[[VAL_42]] : !fir.boxchar<1> ! CHECK: } subroutine test_proc_dummy_char @@ -647,8 +640,8 @@ end subroutine test_proc_dummy_char function get_message(a) character(40) :: get_message - character(*) :: a - get_message = "message is: " // a() + character(10) :: a + get_message = "message is: " // a() end function get_message ! CHECK-LABEL: func @_QPtest_11a() { diff --git a/flang/test/Semantics/call01.f90 b/flang/test/Semantics/call01.f90 index d75b72a4e32ab2..f25fe45736bcae 100644 --- a/flang/test/Semantics/call01.f90 +++ b/flang/test/Semantics/call01.f90 @@ -97,6 +97,7 @@ function f13(n) result(res) res = '' else !ERROR: Assumed-length CHARACTER(*) function 'f13' cannot call itself + !ERROR: Assumed-length character function must be defined with a length to be called res = f13(n-1) ! 15.6.2.1(3) end if end function @@ -112,6 +113,32 @@ function f14(n) result(res) contains character(1) function nested !ERROR: Assumed-length CHARACTER(*) function 'f14' cannot call itself + !ERROR: Assumed-length character function must be defined with a length to be called nested = f14(n-1) ! 15.6.2.1(3) end function nested end function + +subroutine s01(f1, f2, fp1, fp2) + character*(*) :: f1, f3, fp1 + external :: f1, f3 + pointer :: fp1 + procedure(character*(*)), pointer :: fp2 + interface + character*(*) function f2() + end function + character*(*) function f4() + end function + end interface + !ERROR: Assumed-length character function must be defined with a length to be called + print *, f1() + !ERROR: Assumed-length character function must be defined with a length to be called + print *, f2() + !ERROR: Assumed-length character function must be defined with a length to be called + print *, f3() + !ERROR: Assumed-length character function must be defined with a length to be called + print *, f4() + !ERROR: Assumed-length character function must be defined with a length to be called + print *, fp1() + !ERROR: Assumed-length character function must be defined with a length to be called + print *, fp2() +end subroutine From 8246b2e156568c31e71e16cbaf4c14d316e7c06e Mon Sep 17 00:00:00 2001 From: Mariusz Borsa Date: Mon, 23 May 2022 14:35:42 -0700 Subject: [PATCH 390/908] [Sanitizers][Darwin] Replace SANITIZER_MAC with SANITIZER_APPLE in source files This is a follow up to [Sanitizers][Darwin] Rename Apple macro SANITIZER_MAC -> SANITIZER_APPLE (D125816) Performed a global search/replace as in title against LLVM sources Differential Revision: https://reviews.llvm.org/D126263 --- compiler-rt/lib/asan/asan_flags.cpp | 2 +- compiler-rt/lib/asan/asan_flags.inc | 2 +- compiler-rt/lib/asan/asan_interceptors.cpp | 12 +-- compiler-rt/lib/asan/asan_interceptors.h | 4 +- compiler-rt/lib/asan/asan_mac.cpp | 4 +- compiler-rt/lib/asan/asan_malloc_mac.cpp | 2 +- compiler-rt/lib/asan/asan_mapping.h | 4 +- compiler-rt/lib/asan/asan_new_delete.cpp | 12 +-- .../lib/asan/tests/asan_noinst_test.cpp | 2 +- compiler-rt/lib/asan/tests/asan_test_main.cpp | 4 +- compiler-rt/lib/interception/interception.h | 18 ++--- .../lib/interception/interception_mac.cpp | 4 +- .../lib/interception/interception_mac.h | 4 +- .../interception/interception_type_test.cpp | 4 +- .../lib/interception/interception_win.cpp | 2 +- compiler-rt/lib/lsan/lsan_common.cpp | 2 +- compiler-rt/lib/lsan/lsan_common.h | 4 +- compiler-rt/lib/lsan/lsan_common_mac.cpp | 4 +- compiler-rt/lib/lsan/lsan_interceptors.cpp | 10 +-- compiler-rt/lib/lsan/lsan_mac.cpp | 4 +- compiler-rt/lib/lsan/lsan_malloc_mac.cpp | 4 +- .../lib/sanitizer_common/sanitizer_common.h | 4 +- .../sanitizer_common_interceptors.inc | 14 ++-- .../sanitizer_common_nolibc.cpp | 2 +- .../lib/sanitizer_common/sanitizer_errno.h | 2 +- .../lib/sanitizer_common/sanitizer_flags.inc | 6 +- .../sanitizer_internal_defs.h | 8 +- .../sanitizer_common/sanitizer_libignore.cpp | 4 +- .../lib/sanitizer_common/sanitizer_mac.cpp | 4 +- .../lib/sanitizer_common/sanitizer_mac.h | 10 +-- .../sanitizer_mac_libcdep.cpp | 4 +- .../sanitizer_common/sanitizer_malloc_mac.inc | 2 +- .../lib/sanitizer_common/sanitizer_mutex.h | 2 +- .../sanitizer_platform_interceptors.h | 6 +- .../sanitizer_platform_limits_posix.cpp | 28 +++---- .../sanitizer_platform_limits_posix.h | 26 +++---- .../lib/sanitizer_common/sanitizer_posix.cpp | 4 +- .../sanitizer_posix_libcdep.cpp | 2 +- .../lib/sanitizer_common/sanitizer_procmaps.h | 2 +- .../sanitizer_procmaps_mac.cpp | 4 +- .../sanitizer_common/sanitizer_stacktrace.h | 2 +- .../sanitizer_stoptheworld_mac.cpp | 4 +- .../sanitizer_symbolizer_libcdep.cpp | 2 +- .../sanitizer_symbolizer_mac.cpp | 4 +- .../sanitizer_symbolizer_mac.h | 4 +- .../sanitizer_symbolizer_posix_libcdep.cpp | 20 ++--- .../sanitizer_symbolizer_report.cpp | 2 +- .../sanitizer_syscall_generic.inc | 4 +- .../sanitizer_unwind_linux_libcdep.cpp | 2 +- .../tests/sanitizer_common_test.cpp | 2 +- .../tests/sanitizer_libc_test.cpp | 4 +- .../tests/sanitizer_mac_test.cpp | 4 +- .../tests/sanitizer_procmaps_test.cpp | 2 +- .../lib/tsan/rtl-old/tsan_dispatch_defs.h | 2 +- compiler-rt/lib/tsan/rtl-old/tsan_flags.inc | 4 +- .../rtl-old/tsan_interceptors_libdispatch.cpp | 4 +- .../tsan/rtl-old/tsan_interceptors_mac.cpp | 4 +- .../tsan/rtl-old/tsan_interceptors_posix.cpp | 78 +++++++++---------- .../lib/tsan/rtl-old/tsan_malloc_mac.cpp | 2 +- compiler-rt/lib/tsan/rtl-old/tsan_platform.h | 2 +- .../lib/tsan/rtl-old/tsan_platform_mac.cpp | 4 +- compiler-rt/lib/tsan/rtl-old/tsan_report.cpp | 2 +- compiler-rt/lib/tsan/rtl-old/tsan_rtl.cpp | 2 +- compiler-rt/lib/tsan/rtl-old/tsan_rtl.h | 6 +- compiler-rt/lib/tsan/rtl/tsan_dispatch_defs.h | 2 +- compiler-rt/lib/tsan/rtl/tsan_flags.inc | 4 +- .../rtl/tsan_interceptors_libdispatch.cpp | 4 +- .../lib/tsan/rtl/tsan_interceptors_mac.cpp | 4 +- .../lib/tsan/rtl/tsan_interceptors_posix.cpp | 76 +++++++++--------- compiler-rt/lib/tsan/rtl/tsan_malloc_mac.cpp | 2 +- compiler-rt/lib/tsan/rtl/tsan_platform.h | 2 +- .../lib/tsan/rtl/tsan_platform_mac.cpp | 4 +- compiler-rt/lib/tsan/rtl/tsan_report.cpp | 2 +- compiler-rt/lib/tsan/rtl/tsan_rtl.cpp | 2 +- compiler-rt/lib/tsan/rtl/tsan_rtl.h | 4 +- compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp | 2 +- compiler-rt/lib/ubsan/ubsan_value.cpp | 2 +- compiler-rt/lib/xray/xray_basic_logging.cpp | 2 +- compiler-rt/lib/xray/xray_init.cpp | 2 +- compiler-rt/lib/xray/xray_x86_64.cpp | 6 +- 80 files changed, 264 insertions(+), 264 deletions(-) diff --git a/compiler-rt/lib/asan/asan_flags.cpp b/compiler-rt/lib/asan/asan_flags.cpp index 9ea899f84b4b7a..23989843323211 100644 --- a/compiler-rt/lib/asan/asan_flags.cpp +++ b/compiler-rt/lib/asan/asan_flags.cpp @@ -87,7 +87,7 @@ void InitializeFlags() { RegisterCommonFlags(&ubsan_parser); #endif - if (SANITIZER_MAC) { + if (SANITIZER_APPLE) { // Support macOS MallocScribble and MallocPreScribble: // diff --git a/compiler-rt/lib/asan/asan_flags.inc b/compiler-rt/lib/asan/asan_flags.inc index 682666615dbc41..fad1577d912a5e 100644 --- a/compiler-rt/lib/asan/asan_flags.inc +++ b/compiler-rt/lib/asan/asan_flags.inc @@ -122,7 +122,7 @@ ASAN_FLAG(bool, poison_array_cookie, true, // https://github.com/google/sanitizers/issues/309 // TODO(glider,timurrrr): Fix known issues and enable this back. ASAN_FLAG(bool, alloc_dealloc_mismatch, - !SANITIZER_MAC && !SANITIZER_WINDOWS && !SANITIZER_ANDROID, + !SANITIZER_APPLE && !SANITIZER_WINDOWS && !SANITIZER_ANDROID, "Report errors on malloc/delete, new/free, new/delete[], etc.") ASAN_FLAG(bool, new_delete_type_mismatch, true, diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index 2ff314a5a9cbdd..37d0fc67cf75bf 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -103,7 +103,7 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *) do { \ if (asan_init_is_running) \ return REAL(func)(__VA_ARGS__); \ - if (SANITIZER_MAC && UNLIKELY(!asan_inited)) \ + if (SANITIZER_APPLE && UNLIKELY(!asan_inited)) \ return REAL(func)(__VA_ARGS__); \ ENSURE_ASAN_INITED(); \ } while (false) @@ -355,7 +355,7 @@ INTERCEPTOR(_Unwind_Reason_Code, _Unwind_SjLj_RaiseException, INTERCEPTOR(char*, index, const char *string, int c) ALIAS(WRAPPER_NAME(strchr)); # else -# if SANITIZER_MAC +# if SANITIZER_APPLE DECLARE_REAL(char*, index, const char *string, int c) OVERRIDE_FUNCTION(index, strchr); # else @@ -409,7 +409,7 @@ INTERCEPTOR(char*, strncat, char *to, const char *from, uptr size) { INTERCEPTOR(char *, strcpy, char *to, const char *from) { void *ctx; ASAN_INTERCEPTOR_ENTER(ctx, strcpy); -#if SANITIZER_MAC +#if SANITIZER_APPLE if (UNLIKELY(!asan_inited)) return REAL(strcpy)(to, from); #endif @@ -489,7 +489,7 @@ INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) { INTERCEPTOR(int, atoi, const char *nptr) { void *ctx; ASAN_INTERCEPTOR_ENTER(ctx, atoi); -#if SANITIZER_MAC +#if SANITIZER_APPLE if (UNLIKELY(!asan_inited)) return REAL(atoi)(nptr); #endif ENSURE_ASAN_INITED(); @@ -510,7 +510,7 @@ INTERCEPTOR(int, atoi, const char *nptr) { INTERCEPTOR(long, atol, const char *nptr) { void *ctx; ASAN_INTERCEPTOR_ENTER(ctx, atol); -#if SANITIZER_MAC +#if SANITIZER_APPLE if (UNLIKELY(!asan_inited)) return REAL(atol)(nptr); #endif ENSURE_ASAN_INITED(); @@ -563,7 +563,7 @@ static void AtCxaAtexit(void *unused) { #if ASAN_INTERCEPT___CXA_ATEXIT INTERCEPTOR(int, __cxa_atexit, void (*func)(void *), void *arg, void *dso_handle) { -#if SANITIZER_MAC +#if SANITIZER_APPLE if (UNLIKELY(!asan_inited)) return REAL(__cxa_atexit)(func, arg, dso_handle); #endif ENSURE_ASAN_INITED(); diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h index 047b044c8bf47d..35727a96497dcb 100644 --- a/compiler-rt/lib/asan/asan_interceptors.h +++ b/compiler-rt/lib/asan/asan_interceptors.h @@ -133,7 +133,7 @@ DECLARE_REAL(char*, strncpy, char *to, const char *from, uptr size) DECLARE_REAL(uptr, strnlen, const char *s, uptr maxlen) DECLARE_REAL(char*, strstr, const char *s1, const char *s2) -# if !SANITIZER_MAC +# if !SANITIZER_APPLE # define ASAN_INTERCEPT_FUNC(name) \ do { \ if (!INTERCEPT_FUNCTION(name)) \ @@ -156,7 +156,7 @@ DECLARE_REAL(char*, strstr, const char *s1, const char *s2) # else // OS X interceptors don't need to be initialized with INTERCEPT_FUNCTION. # define ASAN_INTERCEPT_FUNC(name) -# endif // SANITIZER_MAC +# endif // SANITIZER_APPLE #endif // !SANITIZER_FUCHSIA diff --git a/compiler-rt/lib/asan/asan_mac.cpp b/compiler-rt/lib/asan/asan_mac.cpp index 9161f728d44c8a..4f4ce92cc6a156 100644 --- a/compiler-rt/lib/asan/asan_mac.cpp +++ b/compiler-rt/lib/asan/asan_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "asan_interceptors.h" #include "asan_internal.h" @@ -296,4 +296,4 @@ INTERCEPTOR(void, dispatch_source_set_event_handler, } #endif -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/asan/asan_malloc_mac.cpp b/compiler-rt/lib/asan/asan_malloc_mac.cpp index e8484685daed43..924d1f12640a7a 100644 --- a/compiler-rt/lib/asan/asan_malloc_mac.cpp +++ b/compiler-rt/lib/asan/asan_malloc_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "asan_interceptors.h" #include "asan_report.h" diff --git a/compiler-rt/lib/asan/asan_mapping.h b/compiler-rt/lib/asan/asan_mapping.h index 4ff09b103d5f29..e448d9798a1fc2 100644 --- a/compiler-rt/lib/asan/asan_mapping.h +++ b/compiler-rt/lib/asan/asan_mapping.h @@ -174,7 +174,7 @@ #else # if SANITIZER_IOS # define ASAN_SHADOW_OFFSET_DYNAMIC -# elif SANITIZER_MAC && defined(__aarch64__) +# elif SANITIZER_APPLE && defined(__aarch64__) # define ASAN_SHADOW_OFFSET_DYNAMIC # elif SANITIZER_RISCV64 # define ASAN_SHADOW_OFFSET_CONST 0x0000000d55550000 @@ -188,7 +188,7 @@ # define ASAN_SHADOW_OFFSET_CONST 0x0000400000000000 # elif SANITIZER_NETBSD # define ASAN_SHADOW_OFFSET_CONST 0x0000400000000000 -# elif SANITIZER_MAC +# elif SANITIZER_APPLE # define ASAN_SHADOW_OFFSET_CONST 0x0000100000000000 # elif defined(__mips64) # define ASAN_SHADOW_OFFSET_CONST 0x0000002000000000 diff --git a/compiler-rt/lib/asan/asan_new_delete.cpp b/compiler-rt/lib/asan/asan_new_delete.cpp index da446072de1852..17280129c758bc 100644 --- a/compiler-rt/lib/asan/asan_new_delete.cpp +++ b/compiler-rt/lib/asan/asan_new_delete.cpp @@ -89,7 +89,7 @@ enum class align_val_t: size_t {}; // delete. // To make sure that C++ allocation/deallocation operators are overridden on // OS X we need to intercept them using their mangled names. -#if !SANITIZER_MAC +#if !SANITIZER_APPLE CXX_OPERATOR_ATTRIBUTE void *operator new(size_t size) { OPERATOR_NEW_BODY(FROM_NEW, false /*nothrow*/); } @@ -115,7 +115,7 @@ CXX_OPERATOR_ATTRIBUTE void *operator new[](size_t size, std::align_val_t align, std::nothrow_t const&) { OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR, true /*nothrow*/); } -#else // SANITIZER_MAC +#else // SANITIZER_APPLE INTERCEPTOR(void *, _Znwm, size_t size) { OPERATOR_NEW_BODY(FROM_NEW, false /*nothrow*/); } @@ -128,7 +128,7 @@ INTERCEPTOR(void *, _ZnwmRKSt9nothrow_t, size_t size, std::nothrow_t const&) { INTERCEPTOR(void *, _ZnamRKSt9nothrow_t, size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY(FROM_NEW_BR, true /*nothrow*/); } -#endif // !SANITIZER_MAC +#endif // !SANITIZER_APPLE #define OPERATOR_DELETE_BODY(type) \ GET_STACK_TRACE_FREE; \ @@ -146,7 +146,7 @@ INTERCEPTOR(void *, _ZnamRKSt9nothrow_t, size_t size, std::nothrow_t const&) { GET_STACK_TRACE_FREE; \ asan_delete(ptr, size, static_cast(align), &stack, type); -#if !SANITIZER_MAC +#if !SANITIZER_APPLE CXX_OPERATOR_ATTRIBUTE void operator delete(void *ptr) NOEXCEPT { OPERATOR_DELETE_BODY(FROM_NEW); } @@ -184,7 +184,7 @@ CXX_OPERATOR_ATTRIBUTE void operator delete[](void *ptr, size_t size, std::align_val_t align) NOEXCEPT { OPERATOR_DELETE_BODY_SIZE_ALIGN(FROM_NEW_BR); } -#else // SANITIZER_MAC +#else // SANITIZER_APPLE INTERCEPTOR(void, _ZdlPv, void *ptr) { OPERATOR_DELETE_BODY(FROM_NEW); } INTERCEPTOR(void, _ZdaPv, void *ptr) @@ -193,4 +193,4 @@ INTERCEPTOR(void, _ZdlPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&) { OPERATOR_DELETE_BODY(FROM_NEW); } INTERCEPTOR(void, _ZdaPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&) { OPERATOR_DELETE_BODY(FROM_NEW_BR); } -#endif // !SANITIZER_MAC +#endif // !SANITIZER_APPLE diff --git a/compiler-rt/lib/asan/tests/asan_noinst_test.cpp b/compiler-rt/lib/asan/tests/asan_noinst_test.cpp index 97a60a104cd52e..4c103609c83bfd 100644 --- a/compiler-rt/lib/asan/tests/asan_noinst_test.cpp +++ b/compiler-rt/lib/asan/tests/asan_noinst_test.cpp @@ -282,7 +282,7 @@ TEST(AddressSanitizer, LoadStoreCallbacks) { } #if defined(__x86_64__) && \ - !(defined(SANITIZER_MAC) || defined(SANITIZER_WINDOWS)) + !(defined(SANITIZER_APPLE) || defined(SANITIZER_WINDOWS)) // clang-format off #define CALL_ASAN_MEMORY_ACCESS_CALLBACK_ADD(s, reg, op) \ diff --git a/compiler-rt/lib/asan/tests/asan_test_main.cpp b/compiler-rt/lib/asan/tests/asan_test_main.cpp index 245d07f87b517b..64ac1b288e53c1 100644 --- a/compiler-rt/lib/asan/tests/asan_test_main.cpp +++ b/compiler-rt/lib/asan/tests/asan_test_main.cpp @@ -14,7 +14,7 @@ // Default ASAN_OPTIONS for the unit tests. extern "C" const char* __asan_default_options() { -#if SANITIZER_MAC +#if SANITIZER_APPLE // On Darwin, we default to `abort_on_error=1`, which would make tests run // much slower. Let's override this and run lit tests with 'abort_on_error=0' // and make sure we do not overwhelm the syslog while testing. Also, let's @@ -35,7 +35,7 @@ extern "C" const char* __asan_default_options() { namespace __sanitizer { bool ReexecDisabled() { -#if __has_feature(address_sanitizer) && SANITIZER_MAC +#if __has_feature(address_sanitizer) && SANITIZER_APPLE // Allow re-exec in instrumented unit tests on Darwin. Technically, we only // need this for 10.10 and below, where re-exec is required for the // interceptors to work, but to avoid duplicating the version detection logic, diff --git a/compiler-rt/lib/interception/interception.h b/compiler-rt/lib/interception/interception.h index d8dc092c45f56e..d97974ee9074e9 100644 --- a/compiler-rt/lib/interception/interception.h +++ b/compiler-rt/lib/interception/interception.h @@ -16,7 +16,7 @@ #include "sanitizer_common/sanitizer_internal_defs.h" -#if !SANITIZER_LINUX && !SANITIZER_FREEBSD && !SANITIZER_MAC && \ +#if !SANITIZER_LINUX && !SANITIZER_FREEBSD && !SANITIZER_APPLE && \ !SANITIZER_NETBSD && !SANITIZER_WINDOWS && !SANITIZER_FUCHSIA && \ !SANITIZER_SOLARIS # error "Interception doesn't work on this operating system." @@ -88,7 +88,7 @@ typedef __sanitizer::OFF64_T OFF64_T; // As it's decided at compile time which functions are to be intercepted on Mac, // INTERCEPT_FUNCTION() is effectively a no-op on this system. -#if SANITIZER_MAC +#if SANITIZER_APPLE #include // For __DARWIN_ALIAS_C(). // Just a pair of pointers. @@ -157,7 +157,7 @@ const interpose_substitution substitution_##func_name[] \ # define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default"))) # define REAL(x) __unsanitized_##x # define DECLARE_REAL(ret_type, func, ...) -#elif !SANITIZER_MAC +#elif !SANITIZER_APPLE # define PTR_TO_REAL(x) real_##x # define REAL(x) __interception::PTR_TO_REAL(x) # define FUNC_TYPE(x) x##_type @@ -168,12 +168,12 @@ const interpose_substitution substitution_##func_name[] \ extern FUNC_TYPE(func) PTR_TO_REAL(func); \ } # define ASSIGN_REAL(dst, src) REAL(dst) = REAL(src) -#else // SANITIZER_MAC +#else // SANITIZER_APPLE # define REAL(x) x # define DECLARE_REAL(ret_type, func, ...) \ extern "C" ret_type func(__VA_ARGS__); # define ASSIGN_REAL(x, y) -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE #if !SANITIZER_FUCHSIA # define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) \ @@ -193,7 +193,7 @@ const interpose_substitution substitution_##func_name[] \ // macros does its job. In exceptional cases you may need to call REAL(foo) // without defining INTERCEPTOR(..., foo, ...). For example, if you override // foo with an interceptor for other function. -#if !SANITIZER_MAC && !SANITIZER_FUCHSIA +#if !SANITIZER_APPLE && !SANITIZER_FUCHSIA # define DEFINE_REAL(ret_type, func, ...) \ typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__); \ namespace __interception { \ @@ -213,7 +213,7 @@ const interpose_substitution substitution_##func_name[] \ __interceptor_##func(__VA_ARGS__); \ extern "C" INTERCEPTOR_ATTRIBUTE ret_type func(__VA_ARGS__) -#elif !SANITIZER_MAC +#elif !SANITIZER_APPLE #define INTERCEPTOR(ret_type, func, ...) \ DEFINE_REAL(ret_type, func, __VA_ARGS__) \ @@ -226,7 +226,7 @@ const interpose_substitution substitution_##func_name[] \ #define INTERCEPTOR_WITH_SUFFIX(ret_type, func, ...) \ INTERCEPTOR(ret_type, func, __VA_ARGS__) -#else // SANITIZER_MAC +#else // SANITIZER_APPLE #define INTERCEPTOR_ZZZ(suffix, ret_type, func, ...) \ extern "C" ret_type func(__VA_ARGS__) suffix; \ @@ -278,7 +278,7 @@ typedef unsigned long uptr; # define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) # define INTERCEPT_FUNCTION_VER(func, symver) \ INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) -#elif SANITIZER_MAC +#elif SANITIZER_APPLE # include "interception_mac.h" # define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_MAC(func) # define INTERCEPT_FUNCTION_VER(func, symver) \ diff --git a/compiler-rt/lib/interception/interception_mac.cpp b/compiler-rt/lib/interception/interception_mac.cpp index fb6eadcff597e0..03eae0fdca0d80 100644 --- a/compiler-rt/lib/interception/interception_mac.cpp +++ b/compiler-rt/lib/interception/interception_mac.cpp @@ -13,6 +13,6 @@ #include "interception.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/interception/interception_mac.h b/compiler-rt/lib/interception/interception_mac.h index eddedb8959c459..26079518c649b3 100644 --- a/compiler-rt/lib/interception/interception_mac.h +++ b/compiler-rt/lib/interception/interception_mac.h @@ -11,7 +11,7 @@ // Mac-specific interception methods. //===----------------------------------------------------------------------===// -#if SANITIZER_MAC +#if SANITIZER_APPLE #if !defined(INCLUDED_FROM_INTERCEPTION_LIB) # error "interception_mac.h should be included from interception.h only" @@ -24,4 +24,4 @@ #define INTERCEPT_FUNCTION_VER_MAC(func, symver) #endif // INTERCEPTION_MAC_H -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/interception/interception_type_test.cpp b/compiler-rt/lib/interception/interception_type_test.cpp index a611604a700cf7..2a118fb214fffe 100644 --- a/compiler-rt/lib/interception/interception_type_test.cpp +++ b/compiler-rt/lib/interception/interception_type_test.cpp @@ -13,7 +13,7 @@ #include "interception.h" -#if SANITIZER_LINUX || SANITIZER_MAC +#if SANITIZER_LINUX || SANITIZER_APPLE #include #include @@ -24,7 +24,7 @@ COMPILER_CHECK(sizeof(::SSIZE_T) == sizeof(ssize_t)); COMPILER_CHECK(sizeof(::PTRDIFF_T) == sizeof(ptrdiff_t)); COMPILER_CHECK(sizeof(::INTMAX_T) == sizeof(intmax_t)); -#if !SANITIZER_MAC +#if !SANITIZER_APPLE COMPILER_CHECK(sizeof(::OFF64_T) == sizeof(off64_t)); #endif diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index 10b893391f47a1..d0db981d519cbb 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -1068,4 +1068,4 @@ bool OverrideImportedFunction(const char *module_to_patch, } // namespace __interception -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index a23b2b4e1d29aa..2e38b1ce8eec31 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -105,7 +105,7 @@ static const char kStdSuppressions[] = // definition. "leak:*pthread_exit*\n" # endif // SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT -# if SANITIZER_MAC +# if SANITIZER_APPLE // For Darwin and os_log/os_trace: https://reviews.llvm.org/D35173 "leak:*_os_trace*\n" # endif diff --git a/compiler-rt/lib/lsan/lsan_common.h b/compiler-rt/lib/lsan/lsan_common.h index 6b06c4517cd5de..d7153751faee0f 100644 --- a/compiler-rt/lib/lsan/lsan_common.h +++ b/compiler-rt/lib/lsan/lsan_common.h @@ -34,11 +34,11 @@ // is missing. This caused a link error. #if SANITIZER_ANDROID && (__ANDROID_API__ < 28 || defined(__arm__)) # define CAN_SANITIZE_LEAKS 0 -#elif (SANITIZER_LINUX || SANITIZER_MAC) && (SANITIZER_WORDSIZE == 64) && \ +#elif (SANITIZER_LINUX || SANITIZER_APPLE) && (SANITIZER_WORDSIZE == 64) && \ (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__) || \ defined(__powerpc64__) || defined(__s390x__)) # define CAN_SANITIZE_LEAKS 1 -#elif defined(__i386__) && (SANITIZER_LINUX || SANITIZER_MAC) +#elif defined(__i386__) && (SANITIZER_LINUX || SANITIZER_APPLE) # define CAN_SANITIZE_LEAKS 1 #elif defined(__arm__) && SANITIZER_LINUX # define CAN_SANITIZE_LEAKS 1 diff --git a/compiler-rt/lib/lsan/lsan_common_mac.cpp b/compiler-rt/lib/lsan/lsan_common_mac.cpp index a4204740c7fab1..26b623fb1d49ad 100644 --- a/compiler-rt/lib/lsan/lsan_common_mac.cpp +++ b/compiler-rt/lib/lsan/lsan_common_mac.cpp @@ -15,7 +15,7 @@ #include "sanitizer_common/sanitizer_libc.h" #include "lsan_common.h" -#if CAN_SANITIZE_LEAKS && SANITIZER_MAC +#if CAN_SANITIZE_LEAKS && SANITIZER_APPLE #include "sanitizer_common/sanitizer_allocator_internal.h" #include "lsan_allocator.h" @@ -201,4 +201,4 @@ void LockStuffAndStopTheWorld(StopTheWorldCallback callback, } // namespace __lsan -#endif // CAN_SANITIZE_LEAKS && SANITIZER_MAC +#endif // CAN_SANITIZE_LEAKS && SANITIZER_APPLE diff --git a/compiler-rt/lib/lsan/lsan_interceptors.cpp b/compiler-rt/lib/lsan/lsan_interceptors.cpp index 205e85685a7faf..3a1b2afdbb74eb 100644 --- a/compiler-rt/lib/lsan/lsan_interceptors.cpp +++ b/compiler-rt/lib/lsan/lsan_interceptors.cpp @@ -67,7 +67,7 @@ namespace std { enum class align_val_t: size_t; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE INTERCEPTOR(void*, malloc, uptr size) { if (DlsymAlloc::Use()) return DlsymAlloc::Allocate(size); @@ -116,7 +116,7 @@ INTERCEPTOR(void*, valloc, uptr size) { GET_STACK_TRACE_MALLOC; return lsan_valloc(size, stack); } -#endif // !SANITIZER_MAC +#endif // !SANITIZER_APPLE #if SANITIZER_INTERCEPT_MEMALIGN INTERCEPTOR(void*, memalign, uptr alignment, uptr size) { @@ -242,7 +242,7 @@ INTERCEPTOR(int, mprobe, void *ptr) { // libstdc++, each of has its implementation of new and delete. // To make sure that C++ allocation/deallocation operators are overridden on // OS X we need to intercept them using their mangled names. -#if !SANITIZER_MAC +#if !SANITIZER_APPLE INTERCEPTOR_ATTRIBUTE void *operator new(size_t size) { OPERATOR_NEW_BODY(false /*nothrow*/); } @@ -301,7 +301,7 @@ INTERCEPTOR_ATTRIBUTE void operator delete[](void *ptr, size_t size, std::align_val_t) NOEXCEPT { OPERATOR_DELETE_BODY; } -#else // SANITIZER_MAC +#else // SANITIZER_APPLE INTERCEPTOR(void *, _Znwm, size_t size) { OPERATOR_NEW_BODY(false /*nothrow*/); } @@ -321,7 +321,7 @@ INTERCEPTOR(void, _ZdlPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&) INTERCEPTOR(void, _ZdaPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&) { OPERATOR_DELETE_BODY; } -#endif // !SANITIZER_MAC +#endif // !SANITIZER_APPLE ///// Thread initialization and finalization. ///// diff --git a/compiler-rt/lib/lsan/lsan_mac.cpp b/compiler-rt/lib/lsan/lsan_mac.cpp index 10a73f8fa93dff..6964a9ba28df52 100644 --- a/compiler-rt/lib/lsan/lsan_mac.cpp +++ b/compiler-rt/lib/lsan/lsan_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "interception/interception.h" #include "lsan.h" @@ -188,4 +188,4 @@ INTERCEPTOR(void, dispatch_source_set_event_handler, dispatch_source_t ds, } #endif -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/lsan/lsan_malloc_mac.cpp b/compiler-rt/lib/lsan/lsan_malloc_mac.cpp index d03eb2e915c0bc..525c30272ccca6 100644 --- a/compiler-rt/lib/lsan/lsan_malloc_mac.cpp +++ b/compiler-rt/lib/lsan/lsan_malloc_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "lsan.h" #include "lsan_allocator.h" @@ -56,4 +56,4 @@ using namespace __lsan; #include "sanitizer_common/sanitizer_malloc_mac.inc" -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 8648a5b174def7..9614883a96057b 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -891,13 +891,13 @@ void WriteToSyslog(const char *buffer); #define SANITIZER_WIN_TRACE 0 #endif -#if SANITIZER_MAC || SANITIZER_WIN_TRACE +#if SANITIZER_APPLE || SANITIZER_WIN_TRACE void LogFullErrorReport(const char *buffer); #else inline void LogFullErrorReport(const char *buffer) {} #endif -#if SANITIZER_LINUX || SANITIZER_MAC +#if SANITIZER_LINUX || SANITIZER_APPLE void WriteOneLineToSyslog(const char *s); void LogMessageOnPrintf(const char *str); #else diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 43296e6c1f6a72..99bd59abb9ee2a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -203,13 +203,13 @@ extern const short *_tolower_tab_; #endif // Platform-specific options. -#if SANITIZER_MAC +#if SANITIZER_APPLE #define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE 0 #elif SANITIZER_WINDOWS64 #define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE 0 #else #define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE 1 -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE #ifndef COMMON_INTERCEPTOR_INITIALIZE_RANGE #define COMMON_INTERCEPTOR_INITIALIZE_RANGE(p, size) {} @@ -1334,7 +1334,7 @@ INTERCEPTOR_WITH_SUFFIX(int, fputs, char *s, void *file) { // libc file streams can call user-supplied functions, see fopencookie. void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, fputs, s, file); - if (!SANITIZER_MAC || s) { // `fputs(NULL, file)` is supported on Darwin. + if (!SANITIZER_APPLE || s) { // `fputs(NULL, file)` is supported on Darwin. COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1); } return REAL(fputs)(s, file); @@ -1349,7 +1349,7 @@ INTERCEPTOR(int, puts, char *s) { // libc file streams can call user-supplied functions, see fopencookie. void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, puts, s); - if (!SANITIZER_MAC || s) { // `puts(NULL)` is supported on Darwin. + if (!SANITIZER_APPLE || s) { // `puts(NULL)` is supported on Darwin. COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1); } return REAL(puts)(s); @@ -1952,7 +1952,7 @@ UNUSED static void unpoison_passwd(void *ctx, __sanitizer_passwd *pwd) { COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_gecos, internal_strlen(pwd->pw_gecos) + 1); #endif -#if SANITIZER_MAC || SANITIZER_FREEBSD || SANITIZER_NETBSD +#if SANITIZER_APPLE || SANITIZER_FREEBSD || SANITIZER_NETBSD if (pwd->pw_class) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_class, internal_strlen(pwd->pw_class) + 1); @@ -3941,7 +3941,7 @@ INTERCEPTOR(char *, strerror, int errnum) { // * GNU version returns message pointer, which points to either buf or some // static storage. #if ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE) || \ - SANITIZER_MAC || SANITIZER_ANDROID || SANITIZER_NETBSD || \ + SANITIZER_APPLE || SANITIZER_ANDROID || SANITIZER_NETBSD || \ SANITIZER_FREEBSD // POSIX version. Spec is not clear on whether buf is NULL-terminated. // At least on OSX, buf contents are valid even when the call fails. @@ -3974,7 +3974,7 @@ INTERCEPTOR(char *, strerror_r, int errnum, char *buf, SIZE_T buflen) { return res; } #endif //(_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE || - //SANITIZER_MAC + //SANITIZER_APPLE #define INIT_STRERROR_R COMMON_INTERCEPT_FUNCTION(strerror_r); #else #define INIT_STRERROR_R diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp index a20602d8b95a73..67e77a8777818d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp @@ -28,7 +28,7 @@ void Abort() { internal__exit(1); } bool CreateDir(const char *pathname) { return false; } #endif // !SANITIZER_WINDOWS -#if !SANITIZER_WINDOWS && !SANITIZER_MAC +#if !SANITIZER_WINDOWS && !SANITIZER_APPLE void ListOfModules::init() {} void InitializePlatformCommonFlags(CommonFlags *cf) {} #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_errno.h b/compiler-rt/lib/sanitizer_common/sanitizer_errno.h index 70a6e88dbaad45..46c85364cef560 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_errno.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_errno.h @@ -21,7 +21,7 @@ #include "sanitizer_errno_codes.h" #include "sanitizer_platform.h" -#if SANITIZER_FREEBSD || SANITIZER_MAC +#if SANITIZER_FREEBSD || SANITIZER_APPLE # define __errno_location __error #elif SANITIZER_ANDROID || SANITIZER_NETBSD # define __errno_location __errno diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc index 0ca91aff8dd4b4..8627ffd0d01c21 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc @@ -62,7 +62,7 @@ COMMON_FLAG( COMMON_FLAG(const char *, log_suffix, nullptr, "String to append to log file name, e.g. \".txt\".") COMMON_FLAG( - bool, log_to_syslog, (bool)SANITIZER_ANDROID || (bool)SANITIZER_MAC, + bool, log_to_syslog, (bool)SANITIZER_ANDROID || (bool)SANITIZER_APPLE, "Write all sanitizer output to syslog in addition to other means of " "logging.") COMMON_FLAG( @@ -71,7 +71,7 @@ COMMON_FLAG( COMMON_FLAG(bool, strip_env, 1, "Whether to remove the sanitizer from DYLD_INSERT_LIBRARIES to " "avoid passing it to children. Default is true.") -COMMON_FLAG(bool, detect_leaks, !SANITIZER_MAC, "Enable memory leak detection.") +COMMON_FLAG(bool, detect_leaks, !SANITIZER_APPLE, "Enable memory leak detection.") COMMON_FLAG( bool, leak_check_at_exit, true, "Invoke leak checking in an atexit handler. Has no effect if " @@ -245,7 +245,7 @@ COMMON_FLAG(bool, decorate_proc_maps, (bool)SANITIZER_ANDROID, COMMON_FLAG(int, exitcode, 1, "Override the program exit status if the tool " "found an error") COMMON_FLAG( - bool, abort_on_error, (bool)SANITIZER_ANDROID || (bool)SANITIZER_MAC, + bool, abort_on_error, (bool)SANITIZER_ANDROID || (bool)SANITIZER_APPLE, "If set, the tool calls abort() instead of _exit() after printing the " "error report.") COMMON_FLAG(bool, suppress_equal_pcs, true, diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h index ff65069de8db5e..15738ff1e1bcbd 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h @@ -73,7 +73,7 @@ // Before Xcode 4.5, the Darwin linker doesn't reliably support undefined // weak symbols. Mac OS X 10.9/Darwin 13 is the first release only supported // by Xcode >= 4.5. -#elif SANITIZER_MAC && \ +#elif SANITIZER_APPLE && \ __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1090 && !SANITIZER_GO # define SANITIZER_SUPPORTS_WEAK_HOOKS 1 #else @@ -139,7 +139,7 @@ namespace __sanitizer { typedef unsigned long long uptr; typedef signed long long sptr; #else -# if (SANITIZER_WORDSIZE == 64) || SANITIZER_MAC || SANITIZER_WINDOWS +# if (SANITIZER_WORDSIZE == 64) || SANITIZER_APPLE || SANITIZER_WINDOWS typedef unsigned long uptr; typedef signed long sptr; # else @@ -177,7 +177,7 @@ typedef long pid_t; typedef int pid_t; #endif -#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC || \ +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE || \ (SANITIZER_SOLARIS && (defined(_LP64) || _FILE_OFFSET_BITS == 64)) || \ (SANITIZER_LINUX && !SANITIZER_GLIBC && !SANITIZER_ANDROID) || \ (SANITIZER_LINUX && (defined(__x86_64__) || defined(__hexagon__))) @@ -187,7 +187,7 @@ typedef uptr OFF_T; #endif typedef u64 OFF64_T; -#if (SANITIZER_WORDSIZE == 64) || SANITIZER_MAC +#if (SANITIZER_WORDSIZE == 64) || SANITIZER_APPLE typedef uptr operator_new_size_type; #else # if defined(__s390__) && !defined(__s390x__) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_libignore.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_libignore.cpp index caaba3155a7bec..b7fc9444cc66b3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_libignore.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_libignore.cpp @@ -8,7 +8,7 @@ #include "sanitizer_platform.h" -#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC || \ +#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_APPLE || \ SANITIZER_NETBSD #include "sanitizer_libignore.h" @@ -125,5 +125,5 @@ void LibIgnore::OnLibraryUnloaded() { } // namespace __sanitizer -#endif // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC || +#endif // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_APPLE || // SANITIZER_NETBSD diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index bc3903d6f6207e..327c7167dcf5ea 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_mac.h" #include "interception/interception.h" @@ -1429,4 +1429,4 @@ void InitializePlatformCommonFlags(CommonFlags *cf) {} } // namespace __sanitizer -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.h b/compiler-rt/lib/sanitizer_common/sanitizer_mac.h index 0b6af5a3c0edc6..a8b274e8c82c02 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.h @@ -9,12 +9,12 @@ // This file is shared between various sanitizers' runtime libraries and // provides definitions for OSX-specific functions. //===----------------------------------------------------------------------===// -#ifndef SANITIZER_MAC_H -#define SANITIZER_MAC_H +#ifndef SANITIZER_APPLE_H +#define SANITIZER_APPLE_H #include "sanitizer_common.h" #include "sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_posix.h" namespace __sanitizer { @@ -64,5 +64,5 @@ void RestrictMemoryToMaxAddress(uptr max_address); } // namespace __sanitizer -#endif // SANITIZER_MAC -#endif // SANITIZER_MAC_H +#endif // SANITIZER_APPLE +#endif // SANITIZER_APPLE_H diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac_libcdep.cpp index ac7e328946bfbe..b452dc4a49e235 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac_libcdep.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_mac.h" #include @@ -26,4 +26,4 @@ void RestrictMemoryToMaxAddress(uptr max_address) { } // namespace __sanitizer -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc b/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc index 764e2cef5e74df..fe76b3f8aa0514 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if !SANITIZER_MAC +#if !SANITIZER_APPLE #error "This file should only be compiled on Darwin." #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h index d2188a9e6d6239..b1a58e421d81a5 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h @@ -101,7 +101,7 @@ enum { // THREADLOCAL variables they are not usable early on during process init when // `__sanitizer::Mutex` is used. #define SANITIZER_CHECK_DEADLOCKS \ - (SANITIZER_DEBUG && !SANITIZER_GO && SANITIZER_SUPPORTS_THREADLOCAL && !SANITIZER_MAC) + (SANITIZER_DEBUG && !SANITIZER_GO && SANITIZER_SUPPORTS_THREADLOCAL && !SANITIZER_APPLE) #if SANITIZER_CHECK_DEADLOCKS struct MutexMeta { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 3cbbead4e98f7f..8d31f5aeca9e0d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -76,7 +76,7 @@ #define SI_LINUX 0 #endif -#if SANITIZER_MAC +#if SANITIZER_APPLE #define SI_MAC 1 #define SI_NOT_MAC 0 #else @@ -126,7 +126,7 @@ #define SI_SOLARIS32 0 #endif -#if SANITIZER_POSIX && !SANITIZER_MAC +#if SANITIZER_POSIX && !SANITIZER_APPLE #define SI_POSIX_NOT_MAC 1 #else #define SI_POSIX_NOT_MAC 0 @@ -587,7 +587,7 @@ // sigaltstack on i386 macOS cannot be intercepted due to setjmp() // calling it and assuming that it does not clobber registers. #define SANITIZER_INTERCEPT_SIGALTSTACK \ - (SI_POSIX && !(SANITIZER_MAC && SANITIZER_I386)) + (SI_POSIX && !(SANITIZER_APPLE && SANITIZER_I386)) #define SANITIZER_INTERCEPT_UNAME (SI_POSIX && !SI_FREEBSD) #define SANITIZER_INTERCEPT___XUNAME SI_FREEBSD #define SANITIZER_INTERCEPT_FLOPEN SI_FREEBSD diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 8ed3e92d27047a..ecfcffb34a5a2f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -23,7 +23,7 @@ // Must go after undef _FILE_OFFSET_BITS. #include "sanitizer_platform.h" -#if SANITIZER_LINUX || SANITIZER_MAC +#if SANITIZER_LINUX || SANITIZER_APPLE // Must go after undef _FILE_OFFSET_BITS. #include "sanitizer_glibc_version.h" @@ -51,7 +51,7 @@ #include #include #include -#if !SANITIZER_MAC +#if !SANITIZER_APPLE #include #endif @@ -163,7 +163,7 @@ typedef struct user_fpregs elf_fpregset_t; #include #endif // SANITIZER_LINUX -#if SANITIZER_MAC +#if SANITIZER_APPLE #include #include #include @@ -177,9 +177,9 @@ typedef struct user_fpregs elf_fpregset_t; namespace __sanitizer { unsigned struct_utsname_sz = sizeof(struct utsname); unsigned struct_stat_sz = sizeof(struct stat); -#if !SANITIZER_IOS && !(SANITIZER_MAC && TARGET_CPU_ARM64) +#if !SANITIZER_IOS && !(SANITIZER_APPLE && TARGET_CPU_ARM64) unsigned struct_stat64_sz = sizeof(struct stat64); -#endif // !SANITIZER_IOS && !(SANITIZER_MAC && TARGET_CPU_ARM64) +#endif // !SANITIZER_IOS && !(SANITIZER_APPLE && TARGET_CPU_ARM64) unsigned struct_rusage_sz = sizeof(struct rusage); unsigned struct_tm_sz = sizeof(struct tm); unsigned struct_passwd_sz = sizeof(struct passwd); @@ -204,14 +204,14 @@ namespace __sanitizer { unsigned struct_regex_sz = sizeof(regex_t); unsigned struct_regmatch_sz = sizeof(regmatch_t); -#if (SANITIZER_MAC && !TARGET_CPU_ARM64) && !SANITIZER_IOS +#if (SANITIZER_APPLE && !TARGET_CPU_ARM64) && !SANITIZER_IOS unsigned struct_statfs64_sz = sizeof(struct statfs64); -#endif // (SANITIZER_MAC && !TARGET_CPU_ARM64) && !SANITIZER_IOS +#endif // (SANITIZER_APPLE && !TARGET_CPU_ARM64) && !SANITIZER_IOS -#if SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC +#if SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE unsigned struct_fstab_sz = sizeof(struct fstab); #endif // SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD || - // SANITIZER_MAC + // SANITIZER_APPLE #if !SANITIZER_ANDROID unsigned struct_statfs_sz = sizeof(struct statfs); unsigned struct_sockaddr_sz = sizeof(struct sockaddr); @@ -300,7 +300,7 @@ namespace __sanitizer { int shmctl_shm_stat = (int)SHM_STAT; #endif -#if !SANITIZER_MAC && !SANITIZER_FREEBSD +#if !SANITIZER_APPLE && !SANITIZER_FREEBSD unsigned struct_utmp_sz = sizeof(struct utmp); #endif #if !SANITIZER_ANDROID @@ -508,7 +508,7 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); unsigned struct_ppp_stats_sz = sizeof(struct ppp_stats); #endif // SANITIZER_GLIBC -#if !SANITIZER_ANDROID && !SANITIZER_MAC +#if !SANITIZER_ANDROID && !SANITIZER_APPLE unsigned struct_sioc_sg_req_sz = sizeof(struct sioc_sg_req); unsigned struct_sioc_vif_req_sz = sizeof(struct sioc_vif_req); #endif @@ -1067,7 +1067,7 @@ CHECK_SIZE_AND_OFFSET(mmsghdr, msg_len); COMPILER_CHECK(sizeof(__sanitizer_dirent) <= sizeof(dirent)); CHECK_SIZE_AND_OFFSET(dirent, d_ino); -#if SANITIZER_MAC +#if SANITIZER_APPLE CHECK_SIZE_AND_OFFSET(dirent, d_seekoff); #elif SANITIZER_FREEBSD // There is no 'd_off' field on FreeBSD. @@ -1249,7 +1249,7 @@ CHECK_SIZE_AND_OFFSET(passwd, pw_shell); CHECK_SIZE_AND_OFFSET(passwd, pw_gecos); #endif -#if SANITIZER_MAC +#if SANITIZER_APPLE CHECK_SIZE_AND_OFFSET(passwd, pw_change); CHECK_SIZE_AND_OFFSET(passwd, pw_expire); CHECK_SIZE_AND_OFFSET(passwd, pw_class); @@ -1317,4 +1317,4 @@ CHECK_TYPE_SIZE(sem_t); COMPILER_CHECK(ARM_VFPREGS_SIZE == ARM_VFPREGS_SIZE_ASAN); #endif -#endif // SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_MAC +#endif // SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index 62a99035db31b5..03fa2f3414b5dd 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -14,7 +14,7 @@ #ifndef SANITIZER_PLATFORM_LIMITS_POSIX_H #define SANITIZER_PLATFORM_LIMITS_POSIX_H -#if SANITIZER_LINUX || SANITIZER_MAC +#if SANITIZER_LINUX || SANITIZER_APPLE #include "sanitizer_internal_defs.h" #include "sanitizer_platform.h" @@ -322,7 +322,7 @@ struct __sanitizer_ifaddrs { }; #endif // !SANITIZER_ANDROID -#if SANITIZER_MAC +#if SANITIZER_APPLE typedef unsigned long __sanitizer_pthread_key_t; #else typedef unsigned __sanitizer_pthread_key_t; @@ -349,7 +349,7 @@ struct __sanitizer_passwd { char *pw_passwd; int pw_uid; int pw_gid; -#if SANITIZER_MAC +#if SANITIZER_APPLE long pw_change; char *pw_class; #endif @@ -358,7 +358,7 @@ struct __sanitizer_passwd { #endif char *pw_dir; char *pw_shell; -#if SANITIZER_MAC +#if SANITIZER_APPLE long pw_expire; #endif }; @@ -431,7 +431,7 @@ struct __sanitizer_file_handle { }; #endif -#if SANITIZER_MAC +#if SANITIZER_APPLE struct __sanitizer_msghdr { void *msg_name; unsigned msg_namelen; @@ -472,7 +472,7 @@ struct __sanitizer_mmsghdr { }; #endif -#if SANITIZER_MAC +#if SANITIZER_APPLE struct __sanitizer_dirent { unsigned long long d_ino; unsigned long long d_seekoff; @@ -557,7 +557,7 @@ typedef unsigned long __sanitizer_sigset_t[16 / sizeof(unsigned long)]; # else typedef unsigned long __sanitizer_sigset_t; # endif -#elif SANITIZER_MAC +#elif SANITIZER_APPLE typedef unsigned __sanitizer_sigset_t; #elif SANITIZER_LINUX struct __sanitizer_sigset_t { @@ -729,7 +729,7 @@ struct __sanitizer_addrinfo { int ai_family; int ai_socktype; int ai_protocol; -#if SANITIZER_ANDROID || SANITIZER_MAC +#if SANITIZER_ANDROID || SANITIZER_APPLE unsigned ai_addrlen; char *ai_canonname; void *ai_addr; @@ -755,7 +755,7 @@ struct __sanitizer_pollfd { short revents; }; -#if SANITIZER_ANDROID || SANITIZER_MAC +#if SANITIZER_ANDROID || SANITIZER_APPLE typedef unsigned __sanitizer_nfds_t; #else typedef unsigned long __sanitizer_nfds_t; @@ -855,7 +855,7 @@ extern int shmctl_shm_info; extern int shmctl_shm_stat; #endif -#if !SANITIZER_MAC && !SANITIZER_FREEBSD +#if !SANITIZER_APPLE && !SANITIZER_FREEBSD extern unsigned struct_utmp_sz; #endif #if !SANITIZER_ANDROID @@ -870,7 +870,7 @@ struct __sanitizer_ifconf { union { void *ifcu_req; } ifc_ifcu; -#if SANITIZER_MAC +#if SANITIZER_APPLE } __attribute__((packed)); #else }; @@ -1023,7 +1023,7 @@ extern unsigned struct_audio_buf_info_sz; extern unsigned struct_ppp_stats_sz; #endif // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID -#if !SANITIZER_ANDROID && !SANITIZER_MAC +#if !SANITIZER_ANDROID && !SANITIZER_APPLE extern unsigned struct_sioc_sg_req_sz; extern unsigned struct_sioc_vif_req_sz; #endif @@ -1464,6 +1464,6 @@ extern const int si_SEGV_ACCERR; #define SIGACTION_SYMNAME sigaction -#endif // SANITIZER_LINUX || SANITIZER_MAC +#endif // SANITIZER_LINUX || SANITIZER_APPLE #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp index 3b330a3705e220..19dfaa9e4e0179 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp @@ -147,7 +147,7 @@ bool MprotectReadOnly(uptr addr, uptr size) { return 0 == internal_mprotect((void *)addr, size, PROT_READ); } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE void MprotectMallocZones(void *addr, int prot) {} #endif @@ -240,7 +240,7 @@ bool MemoryRangeIsAvailable(uptr range_start, uptr range_end) { return true; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE void DumpProcessMap() { MemoryMappingLayout proc_maps(/*cache_enabled*/true); const sptr kBufSize = 4095; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp index b6d8c7281bd43f..46e41c669738cd 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp @@ -384,7 +384,7 @@ real_pthread_attr_getstack(void *attr, void **addr, size_t *size); } // extern "C" int my_pthread_attr_getstack(void *attr, void **addr, uptr *size) { -#if !SANITIZER_GO && !SANITIZER_MAC +#if !SANITIZER_GO && !SANITIZER_APPLE if (&real_pthread_attr_getstack) return real_pthread_attr_getstack((pthread_attr_t *)attr, addr, (size_t *)size); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h index 055af366ef06e2..19bad158387c54 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h @@ -16,7 +16,7 @@ #include "sanitizer_platform.h" #if SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD || \ - SANITIZER_MAC || SANITIZER_SOLARIS || \ + SANITIZER_APPLE || SANITIZER_SOLARIS || \ SANITIZER_FUCHSIA #include "sanitizer_common.h" diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp index 62b2e5e032166d..01d7957763d2d7 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_common.h" #include "sanitizer_placement_new.h" #include "sanitizer_procmaps.h" @@ -376,4 +376,4 @@ void MemoryMappingLayout::DumpListOfModules( } // namespace __sanitizer -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h index 9a5f8fb13a29d2..ee996c3e07eae8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h @@ -33,7 +33,7 @@ static const u32 kStackTraceMax = 255; // Fast unwind is the only option on Mac for now; we will need to // revisit this macro when slow unwind works on Mac, see // https://github.com/google/sanitizers/issues/137 -#if SANITIZER_MAC +#if SANITIZER_APPLE # define SANITIZER_CAN_SLOW_UNWIND 0 #else # define SANITIZER_CAN_SLOW_UNWIND 1 diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp index 5ec30803b7ade3..c87674ff7b76b6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp @@ -12,7 +12,7 @@ #include "sanitizer_platform.h" -#if SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__) || \ +#if SANITIZER_APPLE && (defined(__x86_64__) || defined(__aarch64__) || \ defined(__i386)) #include @@ -176,5 +176,5 @@ PtraceRegistersStatus SuspendedThreadsListMac::GetRegistersAndSP( } // namespace __sanitizer -#endif // SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__)) || +#endif // SANITIZER_APPLE && (defined(__x86_64__) || defined(__aarch64__)) || // defined(__i386)) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp index 8bbd4af0c7c275..9177990418566c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp @@ -237,7 +237,7 @@ const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) { class LLVMSymbolizerProcess final : public SymbolizerProcess { public: explicit LLVMSymbolizerProcess(const char *path) - : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_MAC) {} + : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_APPLE) {} private: bool ReachedEndOfOutput(const char *buffer, uptr length) const override { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp index ac811c8a9136d0..f4f2a036a1e71d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_allocator_internal.h" #include "sanitizer_mac.h" @@ -202,4 +202,4 @@ bool AtosSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { } // namespace __sanitizer -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.h index d5abe9d98c1f9c..cea244182907a2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.h @@ -15,7 +15,7 @@ #define SANITIZER_SYMBOLIZER_MAC_H #include "sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_symbolizer_internal.h" @@ -42,6 +42,6 @@ class AtosSymbolizer final : public SymbolizerTool { } // namespace __sanitizer -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE #endif // SANITIZER_SYMBOLIZER_MAC_H diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp index 5f6e4cc3180e9c..6f18d7cbd74700 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp @@ -155,7 +155,7 @@ bool SymbolizerProcess::StartSymbolizerSubprocess() { } if (use_posix_spawn_) { -#if SANITIZER_MAC +#if SANITIZER_APPLE fd_t fd = internal_spawn(argv, const_cast(GetEnvP()), &pid); if (fd == kInvalidFd) { Report("WARNING: failed to spawn external symbolizer (errno: %d)\n", @@ -165,9 +165,9 @@ bool SymbolizerProcess::StartSymbolizerSubprocess() { input_fd_ = fd; output_fd_ = fd; -#else // SANITIZER_MAC +#else // SANITIZER_APPLE UNIMPLEMENTED(); -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE } else { fd_t infd[2] = {}, outfd[2] = {}; if (!CreateTwoHighNumberedPipes(infd, outfd)) { @@ -427,13 +427,13 @@ static SymbolizerTool *ChooseExternalSymbolizer(LowLevelAllocator *allocator) { VReport(2, "Using llvm-symbolizer at user-specified path: %s\n", path); return new(*allocator) LLVMSymbolizer(path, allocator); } else if (!internal_strcmp(binary_name, "atos")) { -#if SANITIZER_MAC +#if SANITIZER_APPLE VReport(2, "Using atos at user-specified path: %s\n", path); return new(*allocator) AtosSymbolizer(path, allocator); -#else // SANITIZER_MAC +#else // SANITIZER_APPLE Report("ERROR: Using `atos` is only supported on Darwin.\n"); Die(); -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE } else if (!internal_strcmp(binary_name, "addr2line")) { VReport(2, "Using addr2line at user-specified path: %s\n", path); return new(*allocator) Addr2LinePool(path, allocator); @@ -446,12 +446,12 @@ static SymbolizerTool *ChooseExternalSymbolizer(LowLevelAllocator *allocator) { // Otherwise symbolizer program is unknown, let's search $PATH CHECK(path == nullptr); -#if SANITIZER_MAC +#if SANITIZER_APPLE if (const char *found_path = FindPathToBinary("atos")) { VReport(2, "Using atos found at: %s\n", found_path); return new(*allocator) AtosSymbolizer(found_path, allocator); } -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE if (const char *found_path = FindPathToBinary("llvm-symbolizer")) { VReport(2, "Using llvm-symbolizer found at: %s\n", found_path); return new(*allocator) LLVMSymbolizer(found_path, allocator); @@ -488,10 +488,10 @@ static void ChooseSymbolizerTools(IntrusiveList *list, list->push_back(tool); } -#if SANITIZER_MAC +#if SANITIZER_APPLE VReport(2, "Using dladdr symbolizer.\n"); list->push_back(new(*allocator) DlAddrSymbolizer()); -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE } Symbolizer *Symbolizer::PlatformInit() { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp index ac855c8be1c869..d5c028e3640d22 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp @@ -94,7 +94,7 @@ void ReportMmapWriteExec(int prot, int flags) { if ((prot & pflags) != pflags) return; -# if SANITIZER_MAC && defined(MAP_JIT) +# if SANITIZER_APPLE && defined(MAP_JIT) if ((flags & MAP_JIT) == MAP_JIT) return; # endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_syscall_generic.inc b/compiler-rt/lib/sanitizer_common/sanitizer_syscall_generic.inc index 8829985b5b07fd..eb41ff9ccbed50 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_syscall_generic.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_syscall_generic.inc @@ -13,13 +13,13 @@ // NetBSD uses libc calls directly #if !SANITIZER_NETBSD -#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_SOLARIS +#if SANITIZER_FREEBSD || SANITIZER_APPLE || SANITIZER_SOLARIS # define SYSCALL(name) SYS_ ## name #else # define SYSCALL(name) __NR_ ## name #endif -#if defined(__x86_64__) && (SANITIZER_FREEBSD || SANITIZER_MAC) +#if defined(__x86_64__) && (SANITIZER_FREEBSD || SANITIZER_APPLE) # define internal_syscall __syscall # else # define internal_syscall syscall diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp index b2628dcc4dc1f6..72f025a7d307ad 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp @@ -58,7 +58,7 @@ unwind_backtrace_signal_arch_func unwind_backtrace_signal_arch; #endif uptr Unwind_GetIP(struct _Unwind_Context *ctx) { -#if defined(__arm__) && !SANITIZER_MAC +#if defined(__arm__) && !SANITIZER_APPLE uptr val; _Unwind_VRS_Result res = _Unwind_VRS_Get(ctx, _UVRSC_CORE, 15 /* r15 = PC */, _UVRSD_UINT32, &val); diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_common_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_common_test.cpp index 579fd9904df4ad..8fbeb96fcb04bf 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_common_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_common_test.cpp @@ -421,7 +421,7 @@ TEST(SanitizerCommon, InternalScopedStringLargeFormat) { } } -#if SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_IOS +#if SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_APPLE || SANITIZER_IOS TEST(SanitizerCommon, GetRandom) { u8 buffer_1[32], buffer_2[32]; for (bool blocking : { false, true }) { diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_libc_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_libc_test.cpp index 5bdf726cdc0291..7afa5e43795bcf 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_libc_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_libc_test.cpp @@ -123,7 +123,7 @@ TEST(SanitizerCommon, FileOps) { fd = OpenFile(tmpfile, WrOnly); ASSERT_NE(fd, kInvalidFd); -#if SANITIZER_POSIX && !SANITIZER_MAC +#if SANITIZER_POSIX && !SANITIZER_APPLE EXPECT_EQ(internal_lseek(fd, 0, SEEK_END), 0u); #endif uptr bytes_written = 0; @@ -311,7 +311,7 @@ TEST(SanitizerCommon, InternalWideStringFunctions) { } // FIXME: File manipulations are not yet supported on Windows -#if SANITIZER_POSIX && !SANITIZER_MAC +#if SANITIZER_POSIX && !SANITIZER_APPLE TEST(SanitizerCommon, InternalMmapWithOffset) { char tmpfile[128]; temp_file_name(tmpfile, sizeof(tmpfile), diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp index b149567949b521..fa96f32026ef25 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_common/sanitizer_mac.h" @@ -89,4 +89,4 @@ TEST(SanitizerMac, GetDarwinKernelVersion) { } // namespace __sanitizer -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_test.cpp index 5cb799f949e6ef..3201867508e705 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_test.cpp @@ -57,7 +57,7 @@ TEST(MemoryMappingLayout, DumpListOfModules) { } TEST(MemoryMapping, LoadedModuleArchAndUUID) { - if (SANITIZER_MAC) { + if (SANITIZER_APPLE) { MemoryMappingLayout memory_mapping(false); const uptr kMaxModules = 100; InternalMmapVector modules; diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_dispatch_defs.h b/compiler-rt/lib/tsan/rtl-old/tsan_dispatch_defs.h index 94e0b50fed3607..54c0b0ba4b409a 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_dispatch_defs.h +++ b/compiler-rt/lib/tsan/rtl-old/tsan_dispatch_defs.h @@ -56,7 +56,7 @@ extern const dispatch_block_t _dispatch_data_destructor_munmap; # define DISPATCH_NOESCAPE #endif -#if SANITIZER_MAC +#if SANITIZER_APPLE # define SANITIZER_WEAK_IMPORT extern "C" __attribute((weak_import)) #else # define SANITIZER_WEAK_IMPORT extern "C" __attribute((weak)) diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_flags.inc b/compiler-rt/lib/tsan/rtl-old/tsan_flags.inc index 7954a4307fa1e0..4233702851872b 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_flags.inc +++ b/compiler-rt/lib/tsan/rtl-old/tsan_flags.inc @@ -75,9 +75,9 @@ TSAN_FLAG(int, io_sync, 1, TSAN_FLAG(bool, die_after_fork, true, "Die after multi-threaded fork if the child creates new threads.") TSAN_FLAG(const char *, suppressions, "", "Suppressions file name.") -TSAN_FLAG(bool, ignore_interceptors_accesses, SANITIZER_MAC ? true : false, +TSAN_FLAG(bool, ignore_interceptors_accesses, SANITIZER_APPLE ? true : false, "Ignore reads and writes from all interceptors.") -TSAN_FLAG(bool, ignore_noninstrumented_modules, SANITIZER_MAC ? true : false, +TSAN_FLAG(bool, ignore_noninstrumented_modules, SANITIZER_APPLE ? true : false, "Interceptors should only detect races when called from instrumented " "modules.") TSAN_FLAG(bool, shared_ptr_interceptor, true, diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_libdispatch.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_libdispatch.cpp index cbbb7ecb2397e4..88d5f0a481196f 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_libdispatch.cpp +++ b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_libdispatch.cpp @@ -19,7 +19,7 @@ #include "BlocksRuntime/Block.h" #include "tsan_dispatch_defs.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE # include #endif @@ -225,7 +225,7 @@ DISPATCH_INTERCEPT(dispatch_barrier, true) // dispatch_async_and_wait() and friends were introduced in macOS 10.14. // Linking of these interceptors fails when using an older SDK. -#if !SANITIZER_MAC || defined(__MAC_10_14) +#if !SANITIZER_APPLE || defined(__MAC_10_14) // macOS 10.14 is greater than our minimal deployment target. To ensure we // generate a weak reference so the TSan dylib continues to work on older // systems, we need to forward declare the intercepted functions as "weak diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mac.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mac.cpp index ed064150d005cd..1ee47bcd1237e3 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mac.cpp +++ b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "interception/interception.h" #include "tsan_interceptors.h" @@ -518,4 +518,4 @@ STDCXX_INTERCEPTOR(void, _ZNSt3__111__call_onceERVmPvPFvS2_E, void *flag, } // namespace __tsan -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_posix.cpp index cf3dc90d96a123..61204f982df885 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_posix.cpp @@ -35,7 +35,7 @@ using namespace __tsan; -#if SANITIZER_FREEBSD || SANITIZER_MAC +#if SANITIZER_FREEBSD || SANITIZER_APPLE #define stdout __stdoutp #define stderr __stderrp #endif @@ -102,14 +102,14 @@ extern __sanitizer_FILE __sF[]; #else extern __sanitizer_FILE *stdout, *stderr; #endif -#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_FREEBSD && !SANITIZER_APPLE && !SANITIZER_NETBSD const int PTHREAD_MUTEX_RECURSIVE = 1; const int PTHREAD_MUTEX_RECURSIVE_NP = 1; #else const int PTHREAD_MUTEX_RECURSIVE = 2; const int PTHREAD_MUTEX_RECURSIVE_NP = 2; #endif -#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_FREEBSD && !SANITIZER_APPLE && !SANITIZER_NETBSD const int EPOLL_CTL_ADD = 1; #endif const int SIGILL = 4; @@ -119,7 +119,7 @@ const int SIGFPE = 8; const int SIGSEGV = 11; const int SIGPIPE = 13; const int SIGTERM = 15; -#if defined(__mips__) || SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD +#if defined(__mips__) || SANITIZER_FREEBSD || SANITIZER_APPLE || SANITIZER_NETBSD const int SIGBUS = 10; const int SIGSYS = 12; #else @@ -129,7 +129,7 @@ const int SIGSYS = 31; void *const MAP_FAILED = (void*)-1; #if SANITIZER_NETBSD const int PTHREAD_BARRIER_SERIAL_THREAD = 1234567; -#elif !SANITIZER_MAC +#elif !SANITIZER_APPLE const int PTHREAD_BARRIER_SERIAL_THREAD = -1; #endif const int MAP_FIXED = 0x10; @@ -142,7 +142,7 @@ typedef __sanitizer::u16 mode_t; # define F_TLOCK 2 /* Test and lock a region for exclusive use. */ # define F_TEST 3 /* Test a region for other processes locks. */ -#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD +#if SANITIZER_FREEBSD || SANITIZER_APPLE || SANITIZER_NETBSD const int SA_SIGINFO = 0x40; const int SIG_SETMASK = 3; #elif defined(__mips__) @@ -189,7 +189,7 @@ struct InterceptorContext { // in a single cache line if possible (it's accessed in every interceptor). ALIGNED(64) LibIgnore libignore; __sanitizer_sigaction sigactions[kSigCount]; -#if !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD unsigned finalize_key; #endif @@ -456,7 +456,7 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(), return res; } -#if !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD static void on_exit_callback_installed_at(int status, void *arg) { ThreadState *thr = cur_thread(); AtExitCtx *ctx = (AtExitCtx*)arg; @@ -548,11 +548,11 @@ static void LongJmp(ThreadState *thr, uptr *env) { // FIXME: put everything below into a common extern "C" block? extern "C" void __tsan_setjmp(uptr sp) { SetJmp(cur_thread_init(), sp); } -#if SANITIZER_MAC +#if SANITIZER_APPLE TSAN_INTERCEPTOR(int, setjmp, void *env); TSAN_INTERCEPTOR(int, _setjmp, void *env); TSAN_INTERCEPTOR(int, sigsetjmp, void *env); -#else // SANITIZER_MAC +#else // SANITIZER_APPLE #if SANITIZER_NETBSD #define setjmp_symname __setjmp14 @@ -614,7 +614,7 @@ DEFINE_REAL(int, sigsetjmp_symname, void *env) #if !SANITIZER_NETBSD DEFINE_REAL(int, __sigsetjmp, void *env) #endif -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE #if SANITIZER_NETBSD #define longjmp_symname __longjmp14 @@ -653,7 +653,7 @@ TSAN_INTERCEPTOR(void, _longjmp, uptr *env, int val) { } #endif -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(void*, malloc, uptr size) { if (in_symbolizer()) return InternalAlloc(size); @@ -811,7 +811,7 @@ TSAN_INTERCEPTOR(void*, memalign, uptr align, uptr sz) { #define TSAN_MAYBE_INTERCEPT_MEMALIGN #endif -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(void*, aligned_alloc, uptr align, uptr sz) { if (in_symbolizer()) return InternalAlloc(sz, nullptr, align); @@ -842,7 +842,7 @@ TSAN_INTERCEPTOR(void*, pvalloc, uptr sz) { #define TSAN_MAYBE_INTERCEPT_PVALLOC #endif -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, posix_memalign, void **memptr, uptr align, uptr sz) { if (in_symbolizer()) { void *p = InternalAlloc(sz, nullptr, align); @@ -914,7 +914,7 @@ static void guard_release(ThreadState *thr, uptr pc, atomic_uint32_t *g, // these interceptors with INTERFACE_ATTRIBUTE. // On OS X, we don't support statically linking, so we just use a regular // interceptor. -#if SANITIZER_MAC +#if SANITIZER_APPLE #define STDCXX_INTERCEPTOR TSAN_INTERCEPTOR #else #define STDCXX_INTERCEPTOR(rettype, name, ...) \ @@ -957,7 +957,7 @@ void PlatformCleanUpThreadState(ThreadState *thr) { } } // namespace __tsan -#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD && !SANITIZER_FREEBSD static void thread_finalize(void *v) { uptr iter = (uptr)v; if (iter > 1) { @@ -989,7 +989,7 @@ extern "C" void *__tsan_thread_start_func(void *arg) { ThreadState *thr = cur_thread_init(); // Thread-local state is not initialized yet. ScopedIgnoreInterceptors ignore; -#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD && !SANITIZER_FREEBSD ThreadIgnoreBegin(thr, 0); if (pthread_setspecific(interceptor_ctx()->finalize_key, (void *)GetPthreadDestructorIterations())) { @@ -1097,7 +1097,7 @@ TSAN_INTERCEPTOR(int, pthread_detach, void *th) { TSAN_INTERCEPTOR(void, pthread_exit, void *retval) { { SCOPED_INTERCEPTOR_RAW(pthread_exit, retval); -#if !SANITIZER_MAC && !SANITIZER_ANDROID +#if !SANITIZER_APPLE && !SANITIZER_ANDROID CHECK_EQ(thr, &cur_thread_placeholder); #endif } @@ -1266,7 +1266,7 @@ INTERCEPTOR(int, pthread_cond_clockwait, void *c, void *m, #define TSAN_MAYBE_PTHREAD_COND_CLOCKWAIT #endif -#if SANITIZER_MAC +#if SANITIZER_APPLE INTERCEPTOR(int, pthread_cond_timedwait_relative_np, void *c, void *m, void *reltime) { void *cond = init_cond(c); @@ -1343,7 +1343,7 @@ TSAN_INTERCEPTOR(int, pthread_mutex_trylock, void *m) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_mutex_timedlock, void *m, void *abstime) { SCOPED_TSAN_INTERCEPTOR(pthread_mutex_timedlock, m, abstime); int res = REAL(pthread_mutex_timedlock)(m, abstime); @@ -1354,7 +1354,7 @@ TSAN_INTERCEPTOR(int, pthread_mutex_timedlock, void *m, void *abstime) { } #endif -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_spin_init, void *m, int pshared) { SCOPED_TSAN_INTERCEPTOR(pthread_spin_init, m, pshared); int res = REAL(pthread_spin_init)(m, pshared); @@ -1437,7 +1437,7 @@ TSAN_INTERCEPTOR(int, pthread_rwlock_tryrdlock, void *m) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_rwlock_timedrdlock, void *m, void *abstime) { SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedrdlock, m, abstime); int res = REAL(pthread_rwlock_timedrdlock)(m, abstime); @@ -1467,7 +1467,7 @@ TSAN_INTERCEPTOR(int, pthread_rwlock_trywrlock, void *m) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_rwlock_timedwrlock, void *m, void *abstime) { SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedwrlock, m, abstime); int res = REAL(pthread_rwlock_timedwrlock)(m, abstime); @@ -1485,7 +1485,7 @@ TSAN_INTERCEPTOR(int, pthread_rwlock_unlock, void *m) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_barrier_init, void *b, void *a, unsigned count) { SCOPED_TSAN_INTERCEPTOR(pthread_barrier_init, b, a, count); MemoryAccess(thr, pc, (uptr)b, 1, kAccessWrite); @@ -1519,7 +1519,7 @@ TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) { return errno_EINVAL; atomic_uint32_t *a; - if (SANITIZER_MAC) + if (SANITIZER_APPLE) a = static_cast((void *)((char *)o + sizeof(long_t))); else if (SANITIZER_NETBSD) a = static_cast @@ -1529,7 +1529,7 @@ TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) { // Mac OS X appears to use pthread_once() where calling BlockingRegion hooks // result in crashes due to too little stack space. - if (guard_acquire(thr, pc, a, !SANITIZER_MAC)) { + if (guard_acquire(thr, pc, a, !SANITIZER_APPLE)) { (*f)(); guard_release(thr, pc, a, kGuardDone); } @@ -1549,7 +1549,7 @@ TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) { #endif TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) { -#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID || SANITIZER_NETBSD +#if SANITIZER_FREEBSD || SANITIZER_APPLE || SANITIZER_ANDROID || SANITIZER_NETBSD SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf); if (fd > 0) FdAccess(thr, pc, fd); @@ -1656,7 +1656,7 @@ TSAN_INTERCEPTOR(int, dup2, int oldfd, int newfd) { return newfd2; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, dup3, int oldfd, int newfd, int flags) { SCOPED_TSAN_INTERCEPTOR(dup3, oldfd, newfd, flags); int newfd2 = REAL(dup3)(oldfd, newfd, flags); @@ -1805,7 +1805,7 @@ TSAN_INTERCEPTOR(int, pipe, int *pipefd) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pipe2, int *pipefd, int flags) { SCOPED_TSAN_INTERCEPTOR(pipe2, pipefd, flags); int res = REAL(pipe2)(pipefd, flags); @@ -2257,7 +2257,7 @@ TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags, } #endif -#if !SANITIZER_MAC && !SANITIZER_ANDROID +#if !SANITIZER_APPLE && !SANITIZER_ANDROID typedef int (*dl_iterate_phdr_cb_t)(__sanitizer_dl_phdr_info *info, SIZE_T size, void *data); struct dl_iterate_phdr_data { @@ -2314,7 +2314,7 @@ struct TsanInterceptorContext { const uptr pc; }; -#if !SANITIZER_MAC +#if !SANITIZER_APPLE static void HandleRecvmsg(ThreadState *thr, uptr pc, __sanitizer_msghdr *msg) { int fds[64]; @@ -2454,7 +2454,7 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc, off); \ } while (false) -#if !SANITIZER_MAC +#if !SANITIZER_APPLE #define COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg) \ HandleRecvmsg(((TsanInterceptorContext *)ctx)->thr, \ ((TsanInterceptorContext *)ctx)->pc, msg) @@ -2515,7 +2515,7 @@ int sigaction_impl(int sig, const __sanitizer_sigaction *act, sigactions[sig].sa_flags = *(volatile int const *)&act->sa_flags; internal_memcpy(&sigactions[sig].sa_mask, &act->sa_mask, sizeof(sigactions[sig].sa_mask)); -#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_FREEBSD && !SANITIZER_APPLE && !SANITIZER_NETBSD sigactions[sig].sa_restorer = act->sa_restorer; #endif internal_memcpy(&newact, act, sizeof(newact)); @@ -2562,7 +2562,7 @@ struct ScopedSyscall { } }; -#if !SANITIZER_FREEBSD && !SANITIZER_MAC +#if !SANITIZER_FREEBSD && !SANITIZER_APPLE static void syscall_access_range(uptr pc, uptr p, uptr s, bool write) { TSAN_SYSCALL(); MemoryAccessRange(thr, pc, p, s, write); @@ -2746,7 +2746,7 @@ static void finalize(void *arg) { Die(); } -#if !SANITIZER_MAC && !SANITIZER_ANDROID +#if !SANITIZER_APPLE && !SANITIZER_ANDROID static void unreachable() { Report("FATAL: ThreadSanitizer: unreachable called\n"); Die(); @@ -2757,7 +2757,7 @@ static void unreachable() { SANITIZER_WEAK_ATTRIBUTE void InitializeLibdispatchInterceptors() {} void InitializeInterceptors() { -#if !SANITIZER_MAC +#if !SANITIZER_APPLE // We need to setup it early, because functions like dlsym() can call it. REAL(memset) = internal_memset; REAL(memcpy) = internal_memcpy; @@ -2769,7 +2769,7 @@ void InitializeInterceptors() { InitializeSignalInterceptors(); InitializeLibdispatchInterceptors(); -#if !SANITIZER_MAC +#if !SANITIZER_APPLE // We can not use TSAN_INTERCEPT to get setjmp addr, // because it does &setjmp and setjmp is not present in some versions of libc. using __interception::InterceptFunction; @@ -2922,7 +2922,7 @@ void InitializeInterceptors() { TSAN_MAYBE_INTERCEPT__LWP_EXIT; TSAN_MAYBE_INTERCEPT_THR_EXIT; -#if !SANITIZER_MAC && !SANITIZER_ANDROID +#if !SANITIZER_APPLE && !SANITIZER_ANDROID // Need to setup it, because interceptors check that the function is resolved. // But atexit is emitted directly into the module, so can't be resolved. REAL(atexit) = (int(*)(void(*)()))unreachable; @@ -2937,7 +2937,7 @@ void InitializeInterceptors() { Die(); } -#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD && !SANITIZER_FREEBSD if (pthread_key_create(&interceptor_ctx()->finalize_key, &thread_finalize)) { Printf("ThreadSanitizer: failed to create thread key\n"); Die(); diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_malloc_mac.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_malloc_mac.cpp index 0e861bf1f96255..ac844ae8a44a87 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_malloc_mac.cpp +++ b/compiler-rt/lib/tsan/rtl-old/tsan_malloc_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_common/sanitizer_errno.h" #include "tsan_interceptors.h" diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_platform.h b/compiler-rt/lib/tsan/rtl-old/tsan_platform.h index 7ff0acace8f6d2..4b4812af731896 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl-old/tsan_platform.h @@ -648,7 +648,7 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { return Func::template Apply(arg); # endif #else // SANITIZER_GO -# if defined(__x86_64__) || SANITIZER_IOSSIM || SANITIZER_MAC && !SANITIZER_IOS +# if defined(__x86_64__) || SANITIZER_IOSSIM || SANITIZER_APPLE && !SANITIZER_IOS return Func::template Apply(arg); # elif defined(__aarch64__) && defined(__APPLE__) return Func::template Apply(arg); diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_platform_mac.cpp index 1465f9953c1935..e9c0f3b9192496 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_platform_mac.cpp +++ b/compiler-rt/lib/tsan/rtl-old/tsan_platform_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" @@ -323,4 +323,4 @@ int call_pthread_cancel_with_cleanup(int (*fn)(void *arg), } // namespace __tsan -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_report.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_report.cpp index a926c3761ccf91..91f4d0cfd3553f 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_report.cpp +++ b/compiler-rt/lib/tsan/rtl-old/tsan_report.cpp @@ -98,7 +98,7 @@ static const char *ReportTypeString(ReportType typ, uptr tag) { UNREACHABLE("missing case"); } -#if SANITIZER_MAC +#if SANITIZER_APPLE static const char *const kInterposedFunctionPrefix = "wrap_"; #else static const char *const kInterposedFunctionPrefix = "__interceptor_"; diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_rtl.cpp index 5b46d5f5e2bc75..5a8d877af724fa 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl.cpp @@ -44,7 +44,7 @@ void (*on_initialize)(void); int (*on_finalize)(int); #endif -#if !SANITIZER_GO && !SANITIZER_MAC +#if !SANITIZER_GO && !SANITIZER_APPLE __attribute__((tls_model("initial-exec"))) THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED( SANITIZER_CACHE_LINE_SIZE); diff --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl.h b/compiler-rt/lib/tsan/rtl-old/tsan_rtl.h index c71b27e1cbf58f..8958f9dd554123 100644 --- a/compiler-rt/lib/tsan/rtl-old/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl.h @@ -222,7 +222,7 @@ struct ThreadState { } ALIGNED(SANITIZER_CACHE_LINE_SIZE); #if !SANITIZER_GO -#if SANITIZER_MAC || SANITIZER_ANDROID +#if SANITIZER_APPLE || SANITIZER_ANDROID ThreadState *cur_thread(); void set_cur_thread(ThreadState *thr); void cur_thread_finalize(); @@ -243,7 +243,7 @@ inline void set_cur_thread(ThreadState *thr) { reinterpret_cast(cur_thread_placeholder)->current = thr; } inline void cur_thread_finalize() { } -# endif // SANITIZER_MAC || SANITIZER_ANDROID +# endif // SANITIZER_APPLE || SANITIZER_ANDROID #endif // SANITIZER_GO class ThreadContext final : public ThreadContextBase { @@ -586,7 +586,7 @@ void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c); // The trick is that the call preserves all registers and the compiler // does not treat it as a call. // If it does not work for you, use normal call. -#if !SANITIZER_DEBUG && defined(__x86_64__) && !SANITIZER_MAC +#if !SANITIZER_DEBUG && defined(__x86_64__) && !SANITIZER_APPLE // The caller may not create the stack frame for itself at all, // so we create a reserve stack frame for it (1024b must be enough). #define HACKY_CALL(f) \ diff --git a/compiler-rt/lib/tsan/rtl/tsan_dispatch_defs.h b/compiler-rt/lib/tsan/rtl/tsan_dispatch_defs.h index 94e0b50fed3607..54c0b0ba4b409a 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_dispatch_defs.h +++ b/compiler-rt/lib/tsan/rtl/tsan_dispatch_defs.h @@ -56,7 +56,7 @@ extern const dispatch_block_t _dispatch_data_destructor_munmap; # define DISPATCH_NOESCAPE #endif -#if SANITIZER_MAC +#if SANITIZER_APPLE # define SANITIZER_WEAK_IMPORT extern "C" __attribute((weak_import)) #else # define SANITIZER_WEAK_IMPORT extern "C" __attribute((weak)) diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.inc b/compiler-rt/lib/tsan/rtl/tsan_flags.inc index 32cf3bbf152dde..c6a5fb8bc98404 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_flags.inc +++ b/compiler-rt/lib/tsan/rtl/tsan_flags.inc @@ -74,9 +74,9 @@ TSAN_FLAG(int, io_sync, 1, TSAN_FLAG(bool, die_after_fork, true, "Die after multi-threaded fork if the child creates new threads.") TSAN_FLAG(const char *, suppressions, "", "Suppressions file name.") -TSAN_FLAG(bool, ignore_interceptors_accesses, SANITIZER_MAC ? true : false, +TSAN_FLAG(bool, ignore_interceptors_accesses, SANITIZER_APPLE ? true : false, "Ignore reads and writes from all interceptors.") -TSAN_FLAG(bool, ignore_noninstrumented_modules, SANITIZER_MAC ? true : false, +TSAN_FLAG(bool, ignore_noninstrumented_modules, SANITIZER_APPLE ? true : false, "Interceptors should only detect races when called from instrumented " "modules.") TSAN_FLAG(bool, shared_ptr_interceptor, true, diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp index cbbb7ecb2397e4..88d5f0a481196f 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp @@ -19,7 +19,7 @@ #include "BlocksRuntime/Block.h" #include "tsan_dispatch_defs.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE # include #endif @@ -225,7 +225,7 @@ DISPATCH_INTERCEPT(dispatch_barrier, true) // dispatch_async_and_wait() and friends were introduced in macOS 10.14. // Linking of these interceptors fails when using an older SDK. -#if !SANITIZER_MAC || defined(__MAC_10_14) +#if !SANITIZER_APPLE || defined(__MAC_10_14) // macOS 10.14 is greater than our minimal deployment target. To ensure we // generate a weak reference so the TSan dylib continues to work on older // systems, we need to forward declare the intercepted functions as "weak diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp index ed064150d005cd..1ee47bcd1237e3 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "interception/interception.h" #include "tsan_interceptors.h" @@ -518,4 +518,4 @@ STDCXX_INTERCEPTOR(void, _ZNSt3__111__call_onceERVmPvPFvS2_E, void *flag, } // namespace __tsan -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 60ca9633868eab..17f6b1f472d8fc 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -35,7 +35,7 @@ using namespace __tsan; -#if SANITIZER_FREEBSD || SANITIZER_MAC +#if SANITIZER_FREEBSD || SANITIZER_APPLE #define stdout __stdoutp #define stderr __stderrp #endif @@ -102,14 +102,14 @@ extern __sanitizer_FILE __sF[]; #else extern __sanitizer_FILE *stdout, *stderr; #endif -#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_FREEBSD && !SANITIZER_APPLE && !SANITIZER_NETBSD const int PTHREAD_MUTEX_RECURSIVE = 1; const int PTHREAD_MUTEX_RECURSIVE_NP = 1; #else const int PTHREAD_MUTEX_RECURSIVE = 2; const int PTHREAD_MUTEX_RECURSIVE_NP = 2; #endif -#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_FREEBSD && !SANITIZER_APPLE && !SANITIZER_NETBSD const int EPOLL_CTL_ADD = 1; #endif const int SIGILL = 4; @@ -119,7 +119,7 @@ const int SIGFPE = 8; const int SIGSEGV = 11; const int SIGPIPE = 13; const int SIGTERM = 15; -#if defined(__mips__) || SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD +#if defined(__mips__) || SANITIZER_FREEBSD || SANITIZER_APPLE || SANITIZER_NETBSD const int SIGBUS = 10; const int SIGSYS = 12; #else @@ -129,7 +129,7 @@ const int SIGSYS = 31; void *const MAP_FAILED = (void*)-1; #if SANITIZER_NETBSD const int PTHREAD_BARRIER_SERIAL_THREAD = 1234567; -#elif !SANITIZER_MAC +#elif !SANITIZER_APPLE const int PTHREAD_BARRIER_SERIAL_THREAD = -1; #endif const int MAP_FIXED = 0x10; @@ -142,7 +142,7 @@ typedef __sanitizer::u16 mode_t; # define F_TLOCK 2 /* Test and lock a region for exclusive use. */ # define F_TEST 3 /* Test a region for other processes locks. */ -#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD +#if SANITIZER_FREEBSD || SANITIZER_APPLE || SANITIZER_NETBSD const int SA_SIGINFO = 0x40; const int SIG_SETMASK = 3; #elif defined(__mips__) @@ -189,7 +189,7 @@ struct InterceptorContext { // in a single cache line if possible (it's accessed in every interceptor). ALIGNED(64) LibIgnore libignore; __sanitizer_sigaction sigactions[kSigCount]; -#if !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD unsigned finalize_key; #endif @@ -461,7 +461,7 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(), return res; } -#if !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD static void on_exit_callback_installed_at(int status, void *arg) { ThreadState *thr = cur_thread(); AtExitCtx *ctx = (AtExitCtx*)arg; @@ -553,11 +553,11 @@ static void LongJmp(ThreadState *thr, uptr *env) { // FIXME: put everything below into a common extern "C" block? extern "C" void __tsan_setjmp(uptr sp) { SetJmp(cur_thread_init(), sp); } -#if SANITIZER_MAC +#if SANITIZER_APPLE TSAN_INTERCEPTOR(int, setjmp, void *env); TSAN_INTERCEPTOR(int, _setjmp, void *env); TSAN_INTERCEPTOR(int, sigsetjmp, void *env); -#else // SANITIZER_MAC +#else // SANITIZER_APPLE #if SANITIZER_NETBSD #define setjmp_symname __setjmp14 @@ -619,7 +619,7 @@ DEFINE_REAL(int, sigsetjmp_symname, void *env) #if !SANITIZER_NETBSD DEFINE_REAL(int, __sigsetjmp, void *env) #endif -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE #if SANITIZER_NETBSD #define longjmp_symname __longjmp14 @@ -658,7 +658,7 @@ TSAN_INTERCEPTOR(void, _longjmp, uptr *env, int val) { } #endif -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(void*, malloc, uptr size) { if (in_symbolizer()) return InternalAlloc(size); @@ -816,7 +816,7 @@ TSAN_INTERCEPTOR(void*, memalign, uptr align, uptr sz) { #define TSAN_MAYBE_INTERCEPT_MEMALIGN #endif -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(void*, aligned_alloc, uptr align, uptr sz) { if (in_symbolizer()) return InternalAlloc(sz, nullptr, align); @@ -847,7 +847,7 @@ TSAN_INTERCEPTOR(void*, pvalloc, uptr sz) { #define TSAN_MAYBE_INTERCEPT_PVALLOC #endif -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, posix_memalign, void **memptr, uptr align, uptr sz) { if (in_symbolizer()) { void *p = InternalAlloc(sz, nullptr, align); @@ -919,7 +919,7 @@ static void guard_release(ThreadState *thr, uptr pc, atomic_uint32_t *g, // these interceptors with INTERFACE_ATTRIBUTE. // On OS X, we don't support statically linking, so we just use a regular // interceptor. -#if SANITIZER_MAC +#if SANITIZER_APPLE #define STDCXX_INTERCEPTOR TSAN_INTERCEPTOR #else #define STDCXX_INTERCEPTOR(rettype, name, ...) \ @@ -962,7 +962,7 @@ void PlatformCleanUpThreadState(ThreadState *thr) { } } // namespace __tsan -#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD && !SANITIZER_FREEBSD static void thread_finalize(void *v) { uptr iter = (uptr)v; if (iter > 1) { @@ -994,7 +994,7 @@ extern "C" void *__tsan_thread_start_func(void *arg) { ThreadState *thr = cur_thread_init(); // Thread-local state is not initialized yet. ScopedIgnoreInterceptors ignore; -#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD && !SANITIZER_FREEBSD ThreadIgnoreBegin(thr, 0); if (pthread_setspecific(interceptor_ctx()->finalize_key, (void *)GetPthreadDestructorIterations())) { @@ -1102,7 +1102,7 @@ TSAN_INTERCEPTOR(int, pthread_detach, void *th) { TSAN_INTERCEPTOR(void, pthread_exit, void *retval) { { SCOPED_INTERCEPTOR_RAW(pthread_exit, retval); -#if !SANITIZER_MAC && !SANITIZER_ANDROID +#if !SANITIZER_APPLE && !SANITIZER_ANDROID CHECK_EQ(thr, &cur_thread_placeholder); #endif } @@ -1271,7 +1271,7 @@ INTERCEPTOR(int, pthread_cond_clockwait, void *c, void *m, #define TSAN_MAYBE_PTHREAD_COND_CLOCKWAIT #endif -#if SANITIZER_MAC +#if SANITIZER_APPLE INTERCEPTOR(int, pthread_cond_timedwait_relative_np, void *c, void *m, void *reltime) { void *cond = init_cond(c); @@ -1348,7 +1348,7 @@ TSAN_INTERCEPTOR(int, pthread_mutex_trylock, void *m) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_mutex_timedlock, void *m, void *abstime) { SCOPED_TSAN_INTERCEPTOR(pthread_mutex_timedlock, m, abstime); int res = REAL(pthread_mutex_timedlock)(m, abstime); @@ -1359,7 +1359,7 @@ TSAN_INTERCEPTOR(int, pthread_mutex_timedlock, void *m, void *abstime) { } #endif -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_spin_init, void *m, int pshared) { SCOPED_TSAN_INTERCEPTOR(pthread_spin_init, m, pshared); int res = REAL(pthread_spin_init)(m, pshared); @@ -1442,7 +1442,7 @@ TSAN_INTERCEPTOR(int, pthread_rwlock_tryrdlock, void *m) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_rwlock_timedrdlock, void *m, void *abstime) { SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedrdlock, m, abstime); int res = REAL(pthread_rwlock_timedrdlock)(m, abstime); @@ -1472,7 +1472,7 @@ TSAN_INTERCEPTOR(int, pthread_rwlock_trywrlock, void *m) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_rwlock_timedwrlock, void *m, void *abstime) { SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedwrlock, m, abstime); int res = REAL(pthread_rwlock_timedwrlock)(m, abstime); @@ -1490,7 +1490,7 @@ TSAN_INTERCEPTOR(int, pthread_rwlock_unlock, void *m) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pthread_barrier_init, void *b, void *a, unsigned count) { SCOPED_TSAN_INTERCEPTOR(pthread_barrier_init, b, a, count); MemoryAccess(thr, pc, (uptr)b, 1, kAccessWrite); @@ -1524,7 +1524,7 @@ TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) { return errno_EINVAL; atomic_uint32_t *a; - if (SANITIZER_MAC) + if (SANITIZER_APPLE) a = static_cast((void *)((char *)o + sizeof(long_t))); else if (SANITIZER_NETBSD) a = static_cast @@ -1534,7 +1534,7 @@ TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) { // Mac OS X appears to use pthread_once() where calling BlockingRegion hooks // result in crashes due to too little stack space. - if (guard_acquire(thr, pc, a, !SANITIZER_MAC)) { + if (guard_acquire(thr, pc, a, !SANITIZER_APPLE)) { (*f)(); guard_release(thr, pc, a, kGuardDone); } @@ -1661,7 +1661,7 @@ TSAN_INTERCEPTOR(int, dup2, int oldfd, int newfd) { return newfd2; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, dup3, int oldfd, int newfd, int flags) { SCOPED_TSAN_INTERCEPTOR(dup3, oldfd, newfd, flags); int newfd2 = REAL(dup3)(oldfd, newfd, flags); @@ -1805,7 +1805,7 @@ TSAN_INTERCEPTOR(int, pipe, int *pipefd) { return res; } -#if !SANITIZER_MAC +#if !SANITIZER_APPLE TSAN_INTERCEPTOR(int, pipe2, int *pipefd, int flags) { SCOPED_TSAN_INTERCEPTOR(pipe2, pipefd, flags); int res = REAL(pipe2)(pipefd, flags); @@ -2263,7 +2263,7 @@ TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags, } #endif -#if !SANITIZER_MAC && !SANITIZER_ANDROID +#if !SANITIZER_APPLE && !SANITIZER_ANDROID typedef int (*dl_iterate_phdr_cb_t)(__sanitizer_dl_phdr_info *info, SIZE_T size, void *data); struct dl_iterate_phdr_data { @@ -2320,7 +2320,7 @@ struct TsanInterceptorContext { const uptr pc; }; -#if !SANITIZER_MAC +#if !SANITIZER_APPLE static void HandleRecvmsg(ThreadState *thr, uptr pc, __sanitizer_msghdr *msg) { int fds[64]; @@ -2460,7 +2460,7 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc, off); \ } while (false) -#if !SANITIZER_MAC +#if !SANITIZER_APPLE #define COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg) \ HandleRecvmsg(((TsanInterceptorContext *)ctx)->thr, \ ((TsanInterceptorContext *)ctx)->pc, msg) @@ -2521,7 +2521,7 @@ int sigaction_impl(int sig, const __sanitizer_sigaction *act, sigactions[sig].sa_flags = *(volatile int const *)&act->sa_flags; internal_memcpy(&sigactions[sig].sa_mask, &act->sa_mask, sizeof(sigactions[sig].sa_mask)); -#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD +#if !SANITIZER_FREEBSD && !SANITIZER_APPLE && !SANITIZER_NETBSD sigactions[sig].sa_restorer = act->sa_restorer; #endif internal_memcpy(&newact, act, sizeof(newact)); @@ -2568,7 +2568,7 @@ struct ScopedSyscall { } }; -#if !SANITIZER_FREEBSD && !SANITIZER_MAC +#if !SANITIZER_FREEBSD && !SANITIZER_APPLE static void syscall_access_range(uptr pc, uptr p, uptr s, bool write) { TSAN_SYSCALL(); MemoryAccessRange(thr, pc, p, s, write); @@ -2772,7 +2772,7 @@ static void finalize(void *arg) { Die(); } -#if !SANITIZER_MAC && !SANITIZER_ANDROID +#if !SANITIZER_APPLE && !SANITIZER_ANDROID static void unreachable() { Report("FATAL: ThreadSanitizer: unreachable called\n"); Die(); @@ -2783,7 +2783,7 @@ static void unreachable() { SANITIZER_WEAK_ATTRIBUTE void InitializeLibdispatchInterceptors() {} void InitializeInterceptors() { -#if !SANITIZER_MAC +#if !SANITIZER_APPLE // We need to setup it early, because functions like dlsym() can call it. REAL(memset) = internal_memset; REAL(memcpy) = internal_memcpy; @@ -2795,7 +2795,7 @@ void InitializeInterceptors() { InitializeSignalInterceptors(); InitializeLibdispatchInterceptors(); -#if !SANITIZER_MAC +#if !SANITIZER_APPLE // We can not use TSAN_INTERCEPT to get setjmp addr, // because it does &setjmp and setjmp is not present in some versions of libc. using __interception::InterceptFunction; @@ -2948,7 +2948,7 @@ void InitializeInterceptors() { TSAN_MAYBE_INTERCEPT__LWP_EXIT; TSAN_MAYBE_INTERCEPT_THR_EXIT; -#if !SANITIZER_MAC && !SANITIZER_ANDROID +#if !SANITIZER_APPLE && !SANITIZER_ANDROID // Need to setup it, because interceptors check that the function is resolved. // But atexit is emitted directly into the module, so can't be resolved. REAL(atexit) = (int(*)(void(*)()))unreachable; @@ -2963,7 +2963,7 @@ void InitializeInterceptors() { Die(); } -#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD +#if !SANITIZER_APPLE && !SANITIZER_NETBSD && !SANITIZER_FREEBSD if (pthread_key_create(&interceptor_ctx()->finalize_key, &thread_finalize)) { Printf("ThreadSanitizer: failed to create thread key\n"); Die(); diff --git a/compiler-rt/lib/tsan/rtl/tsan_malloc_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_malloc_mac.cpp index 0e861bf1f96255..ac844ae8a44a87 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_malloc_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_malloc_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_common/sanitizer_errno.h" #include "tsan_interceptors.h" diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h index 233bf0a39df078..12643f7f512ca3 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -587,7 +587,7 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { #else // SANITIZER_GO # if SANITIZER_IOS && !SANITIZER_IOSSIM return Func::template Apply(arg); -# elif defined(__x86_64__) || SANITIZER_MAC +# elif defined(__x86_64__) || SANITIZER_APPLE return Func::template Apply(arg); # elif defined(__aarch64__) switch (vmaSize) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp index 44b98d46cfbcd2..68ce5f83bdbd2e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_MAC +#if SANITIZER_APPLE #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" @@ -316,4 +316,4 @@ int call_pthread_cancel_with_cleanup(int (*fn)(void *arg), } // namespace __tsan -#endif // SANITIZER_MAC +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/tsan/rtl/tsan_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_report.cpp index 9f151279b601dd..3b0bddc33a4348 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_report.cpp @@ -98,7 +98,7 @@ static const char *ReportTypeString(ReportType typ, uptr tag) { UNREACHABLE("missing case"); } -#if SANITIZER_MAC +#if SANITIZER_APPLE static const char *const kInterposedFunctionPrefix = "wrap_"; #else static const char *const kInterposedFunctionPrefix = "__interceptor_"; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index 1d6fc725266266..b1556149033be3 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -45,7 +45,7 @@ void (*on_initialize)(void); int (*on_finalize)(int); #endif -#if !SANITIZER_GO && !SANITIZER_MAC +#if !SANITIZER_GO && !SANITIZER_APPLE __attribute__((tls_model("initial-exec"))) THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED( SANITIZER_CACHE_LINE_SIZE); diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index b472c0f77df18f..c8d3c48a0c0ced 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -235,7 +235,7 @@ struct ThreadState { } ALIGNED(SANITIZER_CACHE_LINE_SIZE); #if !SANITIZER_GO -#if SANITIZER_MAC || SANITIZER_ANDROID +#if SANITIZER_APPLE || SANITIZER_ANDROID ThreadState *cur_thread(); void set_cur_thread(ThreadState *thr); void cur_thread_finalize(); @@ -256,7 +256,7 @@ inline void set_cur_thread(ThreadState *thr) { reinterpret_cast(cur_thread_placeholder)->current = thr; } inline void cur_thread_finalize() { } -# endif // SANITIZER_MAC || SANITIZER_ANDROID +# endif // SANITIZER_APPLE || SANITIZER_ANDROID #endif // SANITIZER_GO class ThreadContext final : public ThreadContextBase { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp index 86c8b3764cc719..77488f84328542 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp @@ -53,7 +53,7 @@ static void CollectThreadLeaks(ThreadContextBase *tctx_base, void *arg) { // Disabled on Mac because lldb test TestTsanBasic fails: // https://reviews.llvm.org/D112603#3163158 -#if !SANITIZER_GO && !SANITIZER_MAC +#if !SANITIZER_GO && !SANITIZER_APPLE static void ReportIgnoresEnabled(ThreadContext *tctx, IgnoreSet *set) { if (tctx->tid == kMainTid) { Printf("ThreadSanitizer: main thread finished with ignores enabled\n"); diff --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp index 40042bf3a9030b..5a93a0d7fc2d9a 100644 --- a/compiler-rt/lib/ubsan/ubsan_value.cpp +++ b/compiler-rt/lib/ubsan/ubsan_value.cpp @@ -18,7 +18,7 @@ #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_mutex.h" -// TODO(dliew): Prefer '__APPLE__' here over 'SANITIZER_MAC', as the latter is +// TODO(dliew): Prefer '__APPLE__' here over 'SANITIZER_APPLE', as the latter is // unclear. rdar://58124919 tracks using a more obviously portable guard. #if defined(__APPLE__) #include diff --git a/compiler-rt/lib/xray/xray_basic_logging.cpp b/compiler-rt/lib/xray/xray_basic_logging.cpp index 6e83252a05160d..6ac5417bef7542 100644 --- a/compiler-rt/lib/xray/xray_basic_logging.cpp +++ b/compiler-rt/lib/xray/xray_basic_logging.cpp @@ -18,7 +18,7 @@ #include #include #include -#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE #include #endif #include diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp index 00ba5fe4a52b8a..f22a31b95686d0 100644 --- a/compiler-rt/lib/xray/xray_init.cpp +++ b/compiler-rt/lib/xray/xray_init.cpp @@ -27,7 +27,7 @@ extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak)); extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak)); extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak)); -#if SANITIZER_MAC +#if SANITIZER_APPLE // HACK: This is a temporary workaround to make XRay build on // Darwin, but it will probably not work at runtime. const XRaySledEntry __start_xray_instr_map[] = {}; diff --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp index 669d2e85bede5a..1bf241c1223fcd 100644 --- a/compiler-rt/lib/xray/xray_x86_64.cpp +++ b/compiler-rt/lib/xray/xray_x86_64.cpp @@ -6,7 +6,7 @@ #include "xray_defs.h" #include "xray_interface_internal.h" -#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE #include #include #elif SANITIZER_FUCHSIA @@ -82,11 +82,11 @@ uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { } return TSCFrequency == -1 ? 0 : static_cast(TSCFrequency); } -#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC +#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { long long TSCFrequency = -1; size_t tscfreqsz = sizeof(TSCFrequency); -#if SANITIZER_MAC +#if SANITIZER_APPLE if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, &tscfreqsz, NULL, 0) != -1) { From 6eb9e0f5ebb94cccf52ea27aa342001b84c1c8b1 Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Tue, 24 May 2022 19:04:54 +0000 Subject: [PATCH 391/908] [clang][dataflow] Make limit on fixpoint-algorithm iterations proportional to size of CFG. Currently, the maximum number of iterations of the loop for finding the fixpoint of the dataflow analysis is set at 2^16. When things go wrong in an analysis, this can be far too large. This patch changes the limit to be proportional to the size of the CFG, which will generally be far smaller than 2^16 (while still maintaining 2^16 as the absolute limit). Differential Revision: https://reviews.llvm.org/D126316 --- .../FlowSensitive/TypeErasedDataflowAnalysis.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index aebc9dd6f6faca..ee1723dcd8cc85 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include #include @@ -330,10 +331,17 @@ runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx, // converging. To limit the damage (infinite loops) that these bugs can cause, // limit the number of iterations. // FIXME: Consider making the maximum number of iterations configurable. + // FIXME: Consider restricting the number of backedges followed, rather than + // iterations. // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number // of iterations, number of functions that time out, etc. + static constexpr uint32_t MaxAverageVisitsPerBlock = 4; + static constexpr uint32_t AbsoluteMaxIterations = 1 << 16; + const uint32_t RelativeMaxIterations = + MaxAverageVisitsPerBlock * BlockStates.size(); + const uint32_t MaxIterations = + std::min(RelativeMaxIterations, AbsoluteMaxIterations); uint32_t Iterations = 0; - static constexpr uint32_t MaxIterations = 1 << 16; while (const CFGBlock *Block = Worklist.dequeue()) { if (++Iterations > MaxIterations) { return llvm::createStringError(std::errc::timed_out, From cd2a8df8916505de9df68b9a6c1700f71e5ebf44 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 19 May 2022 14:17:17 -0700 Subject: [PATCH 392/908] [flang] Don't prematurely resolve subprogram names Name resolution for subprograms checks whether the name is already present in the enclosing scope as a generic interface, so that the case of a generic with the same name as one of its specifics can be handled. The particular means by which the enclosing scope is searched for the name would resolve the name (bind a symbol to it) as a side effect. This turns out to be the wrong thing to do when the subprogram is going to have its symbol created in another scope to cope with its BIND(C,NAME="name") name, and its Fortran name is already present in the enclosing scope for a subprogram of the same name but without BIND(C,NAME="name"). A very long explanation for a one-line fix, sorry. In short, change the code to look up the name but not resolve it at that point. Differential Revision: https://reviews.llvm.org/D126149 --- flang/lib/Semantics/resolve-names.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index cbb657040783e6..d7eef39eb09a7c 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -3706,7 +3706,8 @@ void SubprogramVisitor::PushBlockDataScope(const parser::Name &name) { // If name is a generic, return specific subprogram with the same name. Symbol *SubprogramVisitor::GetSpecificFromGeneric(const parser::Name &name) { - if (auto *symbol{FindSymbol(name)}) { + // Search for the name but don't resolve it + if (auto *symbol{currScope().FindSymbol(name.source)}) { if (auto *details{symbol->detailsIf()}) { // found generic, want subprogram auto *specific{details->specific()}; From 1b34f1e996565bc5e4f2be14b89f881f8fe0f3b9 Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Tue, 24 May 2022 13:38:04 -0700 Subject: [PATCH 393/908] [clang][test] mark tests added in ee8524087c78 as unsupported on AIX These tests are failing on the PPC64 AIX CI bot, but it's unclear why, as they pass on other CI jobs. I marked them as unsupported on AIX for now while investigating the failure. --- clang/test/Index/index-concept-kind.cpp | 2 +- clang/test/Index/index-concepts.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Index/index-concept-kind.cpp b/clang/test/Index/index-concept-kind.cpp index 7aaf814f5f989a..f33fc6bc5cc0d2 100644 --- a/clang/test/Index/index-concept-kind.cpp +++ b/clang/test/Index/index-concept-kind.cpp @@ -1,5 +1,5 @@ // RUN: c-index-test -index-file %s -std=gnu++20 | FileCheck %s - +// UNSUPPORTED: aix template concept LargeType = sizeof(T) > 8; // CHECK: [indexDeclaration]: kind: concept | name: LargeType | USR: c:@CT@LargeType | lang: C | cursor: ConceptDecl=LargeType:[[@LINE-1]]:9 (Definition) | loc: [[@LINE-1]]:9 | semantic-container: [TU] | lexical-container: [TU] | isRedecl: 0 | isDef: 1 | isContainer: 0 | isImplicit: 0 diff --git a/clang/test/Index/index-concepts.cpp b/clang/test/Index/index-concepts.cpp index 9887e9fdc6541c..ba4b87111ff487 100644 --- a/clang/test/Index/index-concepts.cpp +++ b/clang/test/Index/index-concepts.cpp @@ -1,5 +1,5 @@ // RUN: c-index-test -test-load-source all %s -std=gnu++20 -fno-delayed-template-parsing | FileCheck %s - +// UNSUPPORTED: aix template struct type_trait { const static bool value = false; From bd92bca53517bfbce3396d9f87f42fa438e8d1fd Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 19 May 2022 15:32:06 -0700 Subject: [PATCH 394/908] [flang] Fix purity testing for generic calls The purity or impurity of a call to a generic interface depends on the attributes of the specific procedure or specific binding. Change expression analysis of calls to generic interfaces to replace the symbol in the parse tree with the specific procedure or binding; this ensures that later checking for purity in DO CONCURRENT and other contexts will be accurate. Remove an "XFAIL" from a test that now passes again with this fix. Differential Revision: https://reviews.llvm.org/D126150 --- flang/lib/Semantics/expression.cpp | 9 ++++- flang/test/Semantics/doconcurrent09.f90 | 47 +++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 flang/test/Semantics/doconcurrent09.f90 diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index c1417b65372016..98816017dbfe33 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -1921,7 +1921,10 @@ auto ExpressionAnalyzer::AnalyzeProcedureComponentRef( }}; auto pair{ResolveGeneric(*sym, arguments, adjustment)}; sym = pair.first; - if (!sym) { + if (sym) { + // re-resolve the name to the specific binding + sc.component.symbol = const_cast(sym); + } else { EmitGenericResolutionError(*sc.component.symbol, pair.second); return std::nullopt; } @@ -2184,6 +2187,10 @@ auto ExpressionAnalyzer::GetCalleeAndArguments(const parser::Name &name, *symbol, arguments, noAdjustment, mightBeStructureConstructor)}; resolution = pair.first; dueToNullActual = pair.second; + if (resolution) { + // re-resolve name to the specific procedure + name.symbol = const_cast(resolution); + } } if (!resolution) { // Not generic, or no resolution; may be intrinsic diff --git a/flang/test/Semantics/doconcurrent09.f90 b/flang/test/Semantics/doconcurrent09.f90 new file mode 100644 index 00000000000000..d783da0e144c47 --- /dev/null +++ b/flang/test/Semantics/doconcurrent09.f90 @@ -0,0 +1,47 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! Ensure that DO CONCURRENT purity checks apply to specific procedures +! in the case of calls to generic interfaces. +module m + interface purity + module procedure :: ps, ips + end interface + type t + contains + procedure :: pb, ipb + generic :: purity => pb, ipb + end type + contains + pure subroutine ps(n) + integer, intent(in) :: n + end subroutine + impure subroutine ips(a) + real, intent(in) :: a + end subroutine + pure subroutine pb(x,n) + class(t), intent(in) :: x + integer, intent(in) :: n + end subroutine + impure subroutine ipb(x,n) + class(t), intent(in) :: x + real, intent(in) :: n + end subroutine +end module + +program test + use m + type(t) :: x + do concurrent (j=1:1) + call ps(1) ! ok + call purity(1) ! ok + !ERROR: Call to an impure procedure is not allowed in DO CONCURRENT + call purity(1.) + !ERROR: Call to an impure procedure is not allowed in DO CONCURRENT + call ips(1.) + call x%pb(1) ! ok + call x%purity(1) ! ok + !ERROR: Call to an impure procedure component is not allowed in DO CONCURRENT + call x%purity(1.) + !ERROR: Call to an impure procedure component is not allowed in DO CONCURRENT + call x%ipb(1.) + end do +end program From 48a8a3eb2f5646c9ac847a07f42316a81e025c23 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 19 May 2022 16:30:04 -0700 Subject: [PATCH 395/908] [flang] Accept defined assignment with CLASS(*) RHS A utility predicate in semantics was incorrectly determining that an INTERFACE ASSIGNMENT(=) (or other form of generic) could not have a specific procedure with an unlimited polymorphic second argument. This led to a crash later in expression analysis. Fix, and extend tests. Differential Revision: https://reviews.llvm.org/D126151 --- flang/lib/Semantics/tools.cpp | 5 ++++- flang/test/Semantics/defined-ops.f90 | 11 +++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 7e6d1d0aecb31d..807c0152eb9a10 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -103,9 +103,12 @@ Tristate IsDefinedAssignment( if (!lhsType || !rhsType) { return Tristate::No; // error or rhs is untyped } - if (lhsType->IsUnlimitedPolymorphic() || rhsType->IsUnlimitedPolymorphic()) { + if (lhsType->IsUnlimitedPolymorphic()) { return Tristate::No; } + if (rhsType->IsUnlimitedPolymorphic()) { + return Tristate::Maybe; + } TypeCategory lhsCat{lhsType->category()}; TypeCategory rhsCat{rhsType->category()}; if (rhsRank > 0 && lhsRank != rhsRank) { diff --git a/flang/test/Semantics/defined-ops.f90 b/flang/test/Semantics/defined-ops.f90 index e46caeb82b9424..18ed40364246fe 100644 --- a/flang/test/Semantics/defined-ops.f90 +++ b/flang/test/Semantics/defined-ops.f90 @@ -69,6 +69,10 @@ subroutine s1(x, y) class(t), intent(out) :: x integer, intent(in) :: y end + subroutine s2(x, y) + real, intent(out) :: x + class(*), intent(in) :: y + end end interface interface operator(+) integer function f(x, y) @@ -77,12 +81,15 @@ integer function f(x, y) end end interface contains - subroutine test(x, y) + subroutine test(x, y, z) class(t) :: x, y + class(*), intent(in) :: z + real :: a !CHECK: CALL s1(x,2_4) x = 2 !CHECK: i=f(x,y) i = x + y + !CHECK: CALL s2(a,z) + a = z end end - From dcf9ba82d99c2b4625b2e0c00c44a469048f2827 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Fri, 20 May 2022 08:45:46 -0700 Subject: [PATCH 396/908] [flang] Fix false error for multiple defined I/O subroutines User-defined derived type I/O subroutines need to be unique for a given type and operation in any scope, but it is acceptable to have more than one defined I/O subroutine so long as only one of them is visible. Differential Revision: https://reviews.llvm.org/D126152 --- flang/lib/Semantics/check-declarations.cpp | 46 +++++++++++++--------- flang/test/Semantics/io11.f90 | 45 +++++++++++++++------ 2 files changed, 60 insertions(+), 31 deletions(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 99c6e27e714a40..25f4b29b45cbca 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -109,12 +109,13 @@ class CheckHelper { void CheckDefinedIoProc( const Symbol &, const GenericDetails &, GenericKind::DefinedIo); bool CheckDioDummyIsData(const Symbol &, const Symbol *, std::size_t); - void CheckDioDummyIsDerived( - const Symbol &, const Symbol &, GenericKind::DefinedIo ioKind); + void CheckDioDummyIsDerived(const Symbol &, const Symbol &, + GenericKind::DefinedIo ioKind, const Symbol &); void CheckDioDummyIsDefaultInteger(const Symbol &, const Symbol &); void CheckDioDummyIsScalar(const Symbol &, const Symbol &); void CheckDioDummyAttrs(const Symbol &, const Symbol &, Attr); - void CheckDioDtvArg(const Symbol &, const Symbol *, GenericKind::DefinedIo); + void CheckDioDtvArg( + const Symbol &, const Symbol *, GenericKind::DefinedIo, const Symbol &); void CheckGenericVsIntrinsic(const Symbol &, const GenericDetails &); void CheckDefaultIntegerArg(const Symbol &, const Symbol *, Attr); void CheckDioAssumedLenCharacterArg( @@ -123,12 +124,13 @@ class CheckHelper { void CheckDioArgCount( const Symbol &, GenericKind::DefinedIo ioKind, std::size_t); struct TypeWithDefinedIo { - const DerivedTypeSpec *type; + const DerivedTypeSpec &type; GenericKind::DefinedIo ioKind; const Symbol &proc; + const Symbol &generic; }; - void CheckAlreadySeenDefinedIo( - const DerivedTypeSpec *, GenericKind::DefinedIo, const Symbol &); + void CheckAlreadySeenDefinedIo(const DerivedTypeSpec &, + GenericKind::DefinedIo, const Symbol &, const Symbol &generic); SemanticsContext &context_; evaluate::FoldingContext &foldingContext_{context_.foldingContext()}; @@ -1903,28 +1905,34 @@ bool CheckHelper::CheckDioDummyIsData( } } -void CheckHelper::CheckAlreadySeenDefinedIo(const DerivedTypeSpec *derivedType, - GenericKind::DefinedIo ioKind, const Symbol &proc) { +void CheckHelper::CheckAlreadySeenDefinedIo(const DerivedTypeSpec &derivedType, + GenericKind::DefinedIo ioKind, const Symbol &proc, const Symbol &generic) { for (TypeWithDefinedIo definedIoType : seenDefinedIoTypes_) { - if (*derivedType == *definedIoType.type && ioKind == definedIoType.ioKind && - proc != definedIoType.proc) { + // It's okay to have two or more distinct derived type I/O procedures + // for the same type if they're coming from distinct non-type-bound + // interfaces. (The non-type-bound interfaces would have been merged into + // a single generic if both were visible in the same scope.) + if (derivedType == definedIoType.type && ioKind == definedIoType.ioKind && + proc != definedIoType.proc && + (generic.owner().IsDerivedType() || + definedIoType.generic.owner().IsDerivedType())) { SayWithDeclaration(proc, definedIoType.proc.name(), "Derived type '%s' already has defined input/output procedure" " '%s'"_err_en_US, - derivedType->name(), + derivedType.name(), parser::ToUpperCaseLetters(GenericKind::EnumToString(ioKind))); return; } } seenDefinedIoTypes_.emplace_back( - TypeWithDefinedIo{derivedType, ioKind, proc}); + TypeWithDefinedIo{derivedType, ioKind, proc, generic}); } -void CheckHelper::CheckDioDummyIsDerived( - const Symbol &subp, const Symbol &arg, GenericKind::DefinedIo ioKind) { +void CheckHelper::CheckDioDummyIsDerived(const Symbol &subp, const Symbol &arg, + GenericKind::DefinedIo ioKind, const Symbol &generic) { if (const DeclTypeSpec * type{arg.GetType()}) { if (const DerivedTypeSpec * derivedType{type->AsDerived()}) { - CheckAlreadySeenDefinedIo(derivedType, ioKind, subp); + CheckAlreadySeenDefinedIo(*derivedType, ioKind, subp, generic); bool isPolymorphic{type->IsPolymorphic()}; if (isPolymorphic != IsExtensibleType(derivedType)) { messages_.Say(arg.name(), @@ -1965,11 +1973,11 @@ void CheckHelper::CheckDioDummyIsScalar(const Symbol &subp, const Symbol &arg) { } } -void CheckHelper::CheckDioDtvArg( - const Symbol &subp, const Symbol *arg, GenericKind::DefinedIo ioKind) { +void CheckHelper::CheckDioDtvArg(const Symbol &subp, const Symbol *arg, + GenericKind::DefinedIo ioKind, const Symbol &generic) { // Dtv argument looks like: dtv-type-spec, INTENT(INOUT) :: dtv if (CheckDioDummyIsData(subp, arg, 0)) { - CheckDioDummyIsDerived(subp, *arg, ioKind); + CheckDioDummyIsDerived(subp, *arg, ioKind, generic); CheckDioDummyAttrs(subp, *arg, ioKind == GenericKind::DefinedIo::ReadFormatted || ioKind == GenericKind::DefinedIo::ReadUnformatted @@ -2107,7 +2115,7 @@ void CheckHelper::CheckDefinedIoProc(const Symbol &symbol, switch (argCount++) { case 0: // dtv-type-spec, INTENT(INOUT) :: dtv - CheckDioDtvArg(specific, arg, ioKind); + CheckDioDtvArg(specific, arg, ioKind, symbol); break; case 1: // INTEGER, INTENT(IN) :: unit diff --git a/flang/test/Semantics/io11.f90 b/flang/test/Semantics/io11.f90 index 35ea874237649c..07e93773ea3a86 100644 --- a/flang/test/Semantics/io11.f90 +++ b/flang/test/Semantics/io11.f90 @@ -434,7 +434,6 @@ subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine !ERROR: Derived type 't' already has defined input/output procedure 'READUNFORMATTED' subroutine unformattedReadProc(dtv,unit,iostat,iomsg) @@ -443,7 +442,6 @@ subroutine unformattedReadProc(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine end module @@ -469,7 +467,6 @@ subroutine unformattedReadProc(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine subroutine unformattedWriteProc(dtv,unit,iostat,iomsg) class(t),intent(in) :: dtv @@ -477,7 +474,6 @@ subroutine unformattedWriteProc(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg write(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine end module @@ -502,7 +498,6 @@ subroutine unformattedReadProc(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine !ERROR: Derived type 't' already has defined input/output procedure 'READUNFORMATTED' subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) @@ -511,7 +506,6 @@ subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine end module @@ -536,7 +530,6 @@ subroutine unformattedReadProc(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) class(t(3)),intent(inout) :: dtv @@ -544,7 +537,6 @@ subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine end module @@ -569,7 +561,6 @@ subroutine unformattedReadProc(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) class(t(3)),intent(inout) :: dtv @@ -577,7 +568,6 @@ subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine end module @@ -602,7 +592,6 @@ subroutine unformattedReadProc(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine !ERROR: Derived type 't' already has defined input/output procedure 'READUNFORMATTED' subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) @@ -611,6 +600,38 @@ subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) integer,intent(out) :: iostat character(*),intent(inout) :: iomsg read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c - print *,v_list end subroutine end module + +module m25a + ! Test against false error when two defined I/O procedures exist + ! for the same type but are not both visible in the same scope. + type t + integer c + end type + interface read(unformatted) + module procedure unformattedReadProc1 + end interface + contains + subroutine unformattedReadProc1(dtv,unit,iostat,iomsg) + class(t),intent(inout) :: dtv + integer,intent(in) :: unit + integer,intent(out) :: iostat + character(*),intent(inout) :: iomsg + read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c + end subroutine +end module +subroutine m25b + use m25a, only: t + interface read(unformatted) + procedure unformattedReadProc2 + end interface + contains + subroutine unformattedReadProc2(dtv,unit,iostat,iomsg) + class(t),intent(inout) :: dtv + integer,intent(in) :: unit + integer,intent(out) :: iostat + character(*),intent(inout) :: iomsg + read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c + end subroutine +end subroutine From 2f93bbb9cd7c20ea1a273cf652d852d4b641f94a Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Tue, 24 May 2022 18:45:59 +0000 Subject: [PATCH 397/908] [clang][dataflow] Relax `Environment` comparison operation. Ignore `MemberLocToStruct` in environment comparison. As an ancillary data structure, including it is redundant. We also can generate environments which differ in their `MemberLocToStruct` but are otherwise equivalent. Differential Revision: https://reviews.llvm.org/D126314 --- .../FlowSensitive/DataflowEnvironment.cpp | 3 - .../Analysis/FlowSensitive/TransferTest.cpp | 55 +++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 51aea12cb27147..e555bee0e8bf28 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -239,9 +239,6 @@ bool Environment::equivalentTo(const Environment &Other, if (ExprToLoc != Other.ExprToLoc) return false; - if (MemberLocToStruct != Other.MemberLocToStruct) - return false; - // Compare the contents for the intersection of their domains. for (auto &Entry : LocToVal) { const StorageLocation *Loc = Entry.first; diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 9819409291040a..b6d2940a98daad 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -3120,4 +3120,59 @@ TEST_F(TransferTest, LoopWithReferenceAssignmentConverges) { }); } +TEST_F(TransferTest, LoopWithStructReferenceAssignmentConverges) { + std::string Code = R"( + struct Lookup { + int x; + }; + + void target(Lookup val, bool b) { + const Lookup* l = nullptr; + while (b) { + l = &val; + /*[[p-inner]]*/ + } + (void)0; + /*[[p-outer]]*/ + } + )"; + // The key property that we are verifying is implicit in `runDataflow` -- + // namely, that the analysis succeeds, rather than hitting the maximum number + // of iterations. + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, + ElementsAre(Pair("p-outer", _), Pair("p-inner", _))); + const Environment &OuterEnv = Results[0].second.Env; + const Environment &InnerEnv = Results[1].second.Env; + + const ValueDecl *ValDecl = findValueDecl(ASTCtx, "val"); + ASSERT_THAT(ValDecl, NotNull()); + + const ValueDecl *LDecl = findValueDecl(ASTCtx, "l"); + ASSERT_THAT(LDecl, NotNull()); + + // Inner. + auto *LVal = dyn_cast( + InnerEnv.getValue(*LDecl, SkipPast::None)); + ASSERT_THAT(LVal, NotNull()); + + EXPECT_EQ(&LVal->getPointeeLoc(), + InnerEnv.getStorageLocation(*ValDecl, SkipPast::Reference)); + + // Outer. + LVal = dyn_cast( + OuterEnv.getValue(*LDecl, SkipPast::None)); + ASSERT_THAT(LVal, NotNull()); + + // The loop body may not have been executed, so we should not conclude + // that `l` points to `val`. + EXPECT_NE(&LVal->getPointeeLoc(), + OuterEnv.getStorageLocation(*ValDecl, SkipPast::Reference)); +}); +} + } // namespace From d52a6e75b0c402c7f3b42a2b1b2873f151220947 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Fri, 20 May 2022 13:31:14 -0700 Subject: [PATCH 398/908] [flang] Process subprogram BIND(C,NAME=...) locally The scalar-default-character-expression that defines the interoperable name of a function or subroutine (or interface) must have its names resolved within the context of the subprogram, despite its appearance on a function-stmt or a subroutine-stmt. Failure to do so can lead to bogus errors or to incorrect results. The solution is to defer name resolution for function-stmt suffixes (but not entry-stmt suffixes) and for subroutine-stmt language binding specifications to EndSubprogram(). (Their resolution only need to be deferred to the end of the specification part, but it's cleanest to deal with it in EndSubprogram().) Differential Revision: https://reviews.llvm.org/D126153 --- flang/lib/Semantics/resolve-names.cpp | 89 +++++++++++++------ .../test/Lower/program-units-fir-mangling.f90 | 37 ++++++++ 2 files changed, 100 insertions(+), 26 deletions(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index d7eef39eb09a7c..1fe21ce9bc9c48 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -809,7 +809,6 @@ class SubprogramVisitor : public virtual ScopeHandler, public InterfaceVisitor { public: bool HandleStmtFunction(const parser::StmtFunctionStmt &); bool Pre(const parser::SubroutineStmt &); - void Post(const parser::SubroutineStmt &); bool Pre(const parser::FunctionStmt &); void Post(const parser::FunctionStmt &); bool Pre(const parser::EntryStmt &); @@ -827,7 +826,8 @@ class SubprogramVisitor : public virtual ScopeHandler, public InterfaceVisitor { const ProgramTree::EntryStmtList * = nullptr); bool BeginMpSubprogram(const parser::Name &); void PushBlockDataScope(const parser::Name &); - void EndSubprogram(); + void EndSubprogram( + const std::optional * = nullptr); protected: // Set when we see a stmt function that is really an array element assignment @@ -3208,7 +3208,9 @@ bool SubprogramVisitor::Pre(const parser::Suffix &suffix) { } } } - return true; + // LanguageBindingSpec deferred to Post(EntryStmt) or, for FunctionStmt, + // all the way to EndSubprogram(). + return false; } bool SubprogramVisitor::Pre(const parser::PrefixSpec &x) { @@ -3234,30 +3236,27 @@ bool SubprogramVisitor::Pre(const parser::InterfaceBody::Subroutine &x) { std::get>(x.t).statement.t)}; return BeginSubprogram(name, Symbol::Flag::Subroutine); } -void SubprogramVisitor::Post(const parser::InterfaceBody::Subroutine &) { - EndSubprogram(); +void SubprogramVisitor::Post(const parser::InterfaceBody::Subroutine &x) { + EndSubprogram(&std::get>( + std::get>(x.t).statement.t)); } bool SubprogramVisitor::Pre(const parser::InterfaceBody::Function &x) { const auto &name{std::get( std::get>(x.t).statement.t)}; return BeginSubprogram(name, Symbol::Flag::Function); } -void SubprogramVisitor::Post(const parser::InterfaceBody::Function &) { - EndSubprogram(); -} - -bool SubprogramVisitor::Pre(const parser::SubroutineStmt &) { - return BeginAttrs(); -} -bool SubprogramVisitor::Pre(const parser::FunctionStmt &) { - FuncResultStack::FuncInfo &info{DEREF(funcResultStack().Top())}; - CHECK(!info.inFunctionStmt); - info.inFunctionStmt = true; - return BeginAttrs(); +void SubprogramVisitor::Post(const parser::InterfaceBody::Function &x) { + const auto &maybeSuffix{std::get>( + std::get>(x.t).statement.t)}; + EndSubprogram(maybeSuffix ? &maybeSuffix->binding : nullptr); } -bool SubprogramVisitor::Pre(const parser::EntryStmt &) { return BeginAttrs(); } -void SubprogramVisitor::Post(const parser::SubroutineStmt &stmt) { +bool SubprogramVisitor::Pre(const parser::SubroutineStmt &stmt) { + BeginAttrs(); + Walk(std::get>(stmt.t)); + Walk(std::get(stmt.t)); + Walk(std::get>(stmt.t)); + // Don't traverse the LanguageBindingSpec now; it's deferred to EndSubprogram. const auto &name{std::get(stmt.t)}; auto &details{PostSubprogramStmt(name)}; for (const auto &dummyArg : std::get>(stmt.t)) { @@ -3268,7 +3267,15 @@ void SubprogramVisitor::Post(const parser::SubroutineStmt &stmt) { details.add_alternateReturn(); } } + return false; } +bool SubprogramVisitor::Pre(const parser::FunctionStmt &) { + FuncResultStack::FuncInfo &info{DEREF(funcResultStack().Top())}; + CHECK(!info.inFunctionStmt); + info.inFunctionStmt = true; + return BeginAttrs(); +} +bool SubprogramVisitor::Pre(const parser::EntryStmt &) { return BeginAttrs(); } void SubprogramVisitor::Post(const parser::FunctionStmt &stmt) { const auto &name{std::get(stmt.t)}; @@ -3340,11 +3347,6 @@ void SubprogramVisitor::Post(const parser::FunctionStmt &stmt) { SubprogramDetails &SubprogramVisitor::PostSubprogramStmt( const parser::Name &name) { Symbol &symbol{*currScope().symbol()}; - auto &subp{symbol.get()}; - SetBindNameOn(symbol); - CHECK(name.source == symbol.name() || - (subp.bindName() && symbol.owner().IsGlobal() && - context().IsTempName(symbol.name().ToString()))); symbol.attrs() |= EndAttrs(); if (symbol.attrs().test(Attr::MODULE)) { symbol.attrs().set(Attr::EXTERNAL, false); @@ -3353,6 +3355,9 @@ SubprogramDetails &SubprogramVisitor::PostSubprogramStmt( } void SubprogramVisitor::Post(const parser::EntryStmt &stmt) { + if (const auto &suffix{std::get>(stmt.t)}) { + Walk(suffix->binding); + } PostEntryStmt(stmt); EndAttrs(); } @@ -3592,7 +3597,19 @@ bool SubprogramVisitor::BeginSubprogram(const parser::Name &name, return true; } -void SubprogramVisitor::EndSubprogram() { PopScope(); } +void SubprogramVisitor::EndSubprogram( + const std::optional *binding) { + if (binding && *binding && currScope().symbol()) { + // Finally process the BIND(C,NAME=name) now that symbols in the name + // expression will resolve local names. + auto flagRestorer{common::ScopedSet(inSpecificationPart_, false)}; + BeginAttrs(); + Walk(**binding); + SetBindNameOn(*currScope().symbol()); + currScope().symbol()->attrs() |= EndAttrs(); + } + PopScope(); +} bool SubprogramVisitor::HandlePreviousCalls( const parser::Name &name, Symbol &symbol, Symbol::Flag subpFlag) { @@ -7421,7 +7438,27 @@ bool ResolveNamesVisitor::BeginScopeForNode(const ProgramTree &node) { } void ResolveNamesVisitor::EndScopeForNode(const ProgramTree &node) { - EndSubprogram(); + using BindingPtr = const std::optional *; + EndSubprogram(common::visit( + common::visitors{ + [](const parser::Statement *stmt) { + if (stmt) { + if (const auto &maybeSuffix{ + std::get>( + stmt->statement.t)}) { + return &maybeSuffix->binding; + } + } + return BindingPtr{}; + }, + [](const parser::Statement *stmt) { + return stmt ? &std::get>( + stmt->statement.t) + : BindingPtr{}; + }, + [](const auto *) { return BindingPtr{}; }, + }, + node.stmt())); } // Some analyses and checks, such as the processing of initializers of diff --git a/flang/test/Lower/program-units-fir-mangling.f90 b/flang/test/Lower/program-units-fir-mangling.f90 index bf360768ae8233..e9311f2e2ab743 100644 --- a/flang/test/Lower/program-units-fir-mangling.f90 +++ b/flang/test/Lower/program-units-fir-mangling.f90 @@ -185,4 +185,41 @@ subroutine sub_with_entries entry some_other_entry() bind(c) end subroutine +! Test that semantics constructs binding labels with local name resolution +module testMod3 + character*(*), parameter :: foo = "bad!!" + character*(*), parameter :: ok = "ok" + interface + real function f1() bind(c,name=ok//'1') + import ok + end function + subroutine s1() bind(c,name=ok//'2') + import ok + end subroutine + end interface + contains +! CHECK-LABEL: func @ok3() -> f32 attributes {fir.sym_name = "_QMtestmod3Pf2"} { + real function f2() bind(c,name=foo//'3') + character*(*), parameter :: foo = ok +! CHECK: fir.call @ok1() : () -> f32 +! CHECK-LABEL: func @ok4() -> f32 attributes {fir.sym_name = "_QMtestmod3Pf3"} { + entry f3() bind(c,name=foo//'4') +! CHECK: fir.call @ok1() : () -> f32 + f2 = f1() + end function +! CHECK-LABEL: func @ok5() attributes {fir.sym_name = "_QMtestmod3Ps2"} { + subroutine s2() bind(c,name=foo//'5') + character*(*), parameter :: foo = ok +! CHECK: fir.call @ok2() : () -> () +! CHECK-LABEL: func @ok6() attributes {fir.sym_name = "_QMtestmod3Ps3"} { + entry s3() bind(c,name=foo//'6') +! CHECK: fir.call @ok2() : () -> () + continue ! force end of specification part +! CHECK-LABEL: func @ok7() attributes {fir.sym_name = "_QMtestmod3Ps4"} { + entry s4() bind(c,name=foo//'7') +! CHECK: fir.call @ok2() : () -> () + call s1 + end subroutine +end module + ! CHECK-LABEL: fir.global internal @_QFfooEpi : f32 { From 0a79113b9e064e50f660ebe6e257bf69ba028026 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Fri, 20 May 2022 14:21:59 -0700 Subject: [PATCH 399/908] [flang] Replace crash and improve a semantics TODO message The derived type information table construction code had a crash whose root cause was replacing an expression with one of its operands -- the deletion of the LHS of that assignment led to the RHS being invalidated before it could be read. Fix by cloning the RHS. Also update a TODO message to the new "_todo_en_US" message class and add a comment about how it should be resolved. Differential Revision: https://reviews.llvm.org/D126154 --- flang/lib/Semantics/runtime-type-info.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/flang/lib/Semantics/runtime-type-info.cpp b/flang/lib/Semantics/runtime-type-info.cpp index d24b844bd7b585..eae8c0c977a29f 100644 --- a/flang/lib/Semantics/runtime-type-info.cpp +++ b/flang/lib/Semantics/runtime-type-info.cpp @@ -103,9 +103,13 @@ class RuntimeTableBuilder { lenParameterEnum_, FindLenParameterIndex(*parameters, *lenParam)); } } + // TODO: Replace a specification expression requiring actual operations + // with a reference to a new anonymous LEN type parameter whose default + // value captures the expression. This replacement must take place when + // the type is declared so that the new LEN type parameters appear in + // all instantiations and structure constructors. context_.Say(location_, - "Specification expression '%s' is neither constant nor a length " - "type parameter"_err_en_US, + "derived type specification expression '%s' that is neither constant nor a length type parameter"_todo_en_US, expr->AsFortran()); } return PackageIntValue(deferredEnum_); @@ -734,7 +738,7 @@ evaluate::StructureConstructor RuntimeTableBuilder::DescribeComponent( evaluate::Extremum>(*len)}) { if (clamped->ordering == evaluate::Ordering::Greater && clamped->left() == evaluate::Expr{0}) { - len = clamped->right(); + len = common::Clone(clamped->right()); } } AddValue(values, componentSchema_, "characterlen"s, From cdd54cbdd937f7473e0ba065ecafc70742f3f1fd Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Fri, 20 May 2022 16:16:09 -0700 Subject: [PATCH 400/908] [flang][runtime] Catch decimal integer input overflow B/O/Z input overflow is already caught, and real input overflow is signalled as an IEEE arithmetic exception, but regular decimal integer overflow was silent. Differential Revision: https://reviews.llvm.org/D126155 --- flang/include/flang/Runtime/iostat.h | 2 ++ flang/runtime/edit-input.cpp | 16 +++++++++++++++- flang/runtime/iostat.cpp | 4 ++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/flang/include/flang/Runtime/iostat.h b/flang/include/flang/Runtime/iostat.h index 33952074682374..562efb73f96c97 100644 --- a/flang/include/flang/Runtime/iostat.h +++ b/flang/include/flang/Runtime/iostat.h @@ -72,6 +72,8 @@ enum Iostat { IostatBadScaleFactor, IostatBadAsynchronous, IostatBadWaitUnit, + IostatBOZInputOverflow, + IostatIntegerInputOverflow, }; const char *IostatErrorString(int); diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp index 2e8ea3faa9e3e0..7e53b54f59949f 100644 --- a/flang/runtime/edit-input.cpp +++ b/flang/runtime/edit-input.cpp @@ -47,7 +47,7 @@ static bool EditBOZInput( } auto significantBytes{static_cast(digits * LOG2_BASE + 7) / 8}; if (significantBytes > bytes) { - io.GetIoErrorHandler().SignalError( + io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow, "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes); return false; } @@ -140,6 +140,7 @@ bool EditIntegerInput( bool negate{ScanNumericPrefix(io, edit, next, remaining)}; common::UnsignedInt128 value{0}; bool any{negate}; + bool overflow{false}; for (; next; next = io.NextInField(remaining, edit)) { char32_t ch{*next}; if (ch == ' ' || ch == '\t') { @@ -157,10 +158,23 @@ bool EditIntegerInput( "Bad character '%lc' in INTEGER input field", ch); return false; } + static constexpr auto maxu128{~common::UnsignedInt128{0}}; + static constexpr auto maxu128OverTen{maxu128 / 10}; + static constexpr int maxLastDigit{ + static_cast(maxu128 - (maxu128OverTen * 10))}; + overflow |= value >= maxu128OverTen && + (value > maxu128OverTen || digit > maxLastDigit); value *= 10; value += digit; any = true; } + auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)}; + overflow |= value >= maxForKind && (value > maxForKind || !negate); + if (overflow) { + io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow, + "Decimal input overflows INTEGER(%d) variable", kind); + return false; + } if (negate) { value = -value; } diff --git a/flang/runtime/iostat.cpp b/flang/runtime/iostat.cpp index de9658fd23d2bb..5ab1ee435ef221 100644 --- a/flang/runtime/iostat.cpp +++ b/flang/runtime/iostat.cpp @@ -88,6 +88,10 @@ const char *IostatErrorString(int iostat) { "OPEN(ASYNCHRONOUS='YES')"; case IostatBadWaitUnit: return "WAIT(ID=nonzero) for a bad unit number"; + case IostatBOZInputOverflow: + return "B/O/Z input value overflows variable"; + case IostatIntegerInputOverflow: + return "Integer input value overflows variable"; default: return nullptr; } From 0bf3c38b0be3f9b45b589b780ed53f2de4d364d4 Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Tue, 24 May 2022 16:11:23 -0500 Subject: [PATCH 401/908] Fix build failure revealed by c35ca3a1c78f693b749ad11742350b7fc6c5cd89 This commit resolves a Linux kernel build failure that was revealed by c35ca3a1c78f693b749ad11742350b7fc6c5cd89. The patch introduces two new intrinsics, which ultimately changes the intrinsic numbering of other PPC intrinsics. This causes an issue introduced by ff40fb07ad6309131c2448ca00572a078c7a2d59, as the patch checks for intrinsics with particular values, but the addition of the fnabs/fnabss intrinsics updates the original sqrt/sdiv intrinsic values. --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index fc1143e3d78e04..948ed51352d3b1 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4646,7 +4646,8 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, static bool isSWTestOp(SDValue N) { if (N.getOpcode() == PPCISD::FTSQRT) return true; - if (N.getNumOperands() < 1 || !isa(N.getOperand(0))) + if (N.getNumOperands() < 1 || !isa(N.getOperand(0)) || + N.getOpcode() != ISD::INTRINSIC_WO_CHAIN) return false; switch (N.getConstantOperandVal(0)) { case Intrinsic::ppc_vsx_xvtdivdp: From 29f167abcf7d871d17dd3f38f361916de1a12470 Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Tue, 24 May 2022 21:31:57 +0000 Subject: [PATCH 402/908] [Flang][OpenMP] Fixes for unstructured OpenMP code Since the FIR operations are mostly structured, it is only the functions that could contain multiple blocks inside an operation. This changes with OpenMP since OpenMP regions can contain multiple blocks. For unstructured code, the blocks are created in advance and belong to the top-level function. This caused code in OpenMP region to be placed under the function level. In this fix, if the OpenMP region is unstructured then new blocks are created inside it. Note1: This is part of upstreaming from the fir-dev branch of https://github.com/flang-compiler/f18-llvm-project. The code in this patch is a subset of the changes in https://github.com/flang-compiler/f18-llvm-project/pull/1178. Reviewed By: vdonaldson Differential Revision: https://reviews.llvm.org/D126293 Co-authored-by: Val Donaldson Co-authored-by: Eric Schweitz Co-authored-by: Valentin Clement --- flang/lib/Lower/OpenMP.cpp | 63 ++++++++--- flang/test/Lower/OpenMP/omp-unstructured.f90 | 113 +++++++++++++++++++ 2 files changed, 159 insertions(+), 17 deletions(-) create mode 100644 flang/test/Lower/OpenMP/omp-unstructured.f90 diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index e59d80c3436380..b42fb95d50ffee 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -138,11 +138,35 @@ static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); } +/// Create empty blocks for the current region. +/// These blocks replace blocks parented to an enclosing region. +void createEmptyRegionBlocks( + fir::FirOpBuilder &firOpBuilder, + std::list &evaluationList) { + auto *region = &firOpBuilder.getRegion(); + for (auto &eval : evaluationList) { + if (eval.block) { + if (eval.block->empty()) { + eval.block->erase(); + eval.block = firOpBuilder.createBlock(region); + } else { + [[maybe_unused]] auto &terminatorOp = eval.block->back(); + assert((mlir::isa(terminatorOp) || + mlir::isa(terminatorOp)) && + "expected terminator op"); + } + } + if (eval.hasNestedEvaluations()) + createEmptyRegionBlocks(firOpBuilder, eval.getNestedEvaluations()); + } +} + /// Create the body (block) for an OpenMP Operation. /// /// \param [in] op - the operation the body belongs to. /// \param [inout] converter - converter to use for the clauses. /// \param [in] loc - location in source code. +/// \param [in] eval - current PFT node/evaluation. /// \oaran [in] clauses - list of clauses to process. /// \param [in] args - block arguments (induction variable[s]) for the //// region. @@ -151,14 +175,14 @@ static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, template static void createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter, - mlir::Location &loc, + mlir::Location &loc, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OmpClauseList *clauses = nullptr, const SmallVector &args = {}, bool outerCombined = false) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - // If arguments for the region are provided then create the block with those - // arguments. Also update the symbol's address with the mlir argument values. - // e.g. For loops the arguments are the induction variable. And all further + auto &firOpBuilder = converter.getFirOpBuilder(); + // If an argument for the region is provided then create the block with that + // argument. Also update the symbol's address with the mlir argument value. + // e.g. For loops the argument is the induction variable. And all further // uses of the induction variable should use this mlir value. if (args.size()) { std::size_t loopVarTypeSize = 0; @@ -184,7 +208,10 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter, auto &block = op.getRegion().back(); firOpBuilder.setInsertionPointToStart(&block); - // Insert the terminator. + if (eval.lowerAsUnstructured()) + createEmptyRegionBlocks(firOpBuilder, eval.getNestedEvaluations()); + + // Ensure the block is well-formed by inserting terminators. if constexpr (std::is_same_v) { mlir::ValueRange results; firOpBuilder.create(loc, results); @@ -369,7 +396,7 @@ createCombinedParallelOp(Fortran::lower::AbstractConverter &converter, allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(), /*reductions=*/nullptr, procBindKindAttr); - createBodyOfOp(parallelOp, converter, currentLocation, + createBodyOfOp(parallelOp, converter, currentLocation, eval, &opClauseList, /*iv=*/{}, /*isCombined=*/true); } @@ -461,26 +488,27 @@ genOMP(Fortran::lower::AbstractConverter &converter, allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(), /*reductions=*/nullptr, procBindKindAttr); createBodyOfOp(parallelOp, converter, currentLocation, - &opClauseList); + eval, &opClauseList); } else if (blockDirective.v == llvm::omp::OMPD_master) { auto masterOp = firOpBuilder.create(currentLocation, argTy); - createBodyOfOp(masterOp, converter, currentLocation); + createBodyOfOp(masterOp, converter, currentLocation, eval); } else if (blockDirective.v == llvm::omp::OMPD_single) { auto singleOp = firOpBuilder.create( currentLocation, allocateOperands, allocatorOperands, nowaitAttr); - createBodyOfOp(singleOp, converter, currentLocation); + createBodyOfOp(singleOp, converter, currentLocation, eval); } else if (blockDirective.v == llvm::omp::OMPD_ordered) { auto orderedOp = firOpBuilder.create( currentLocation, /*simd=*/nullptr); - createBodyOfOp(orderedOp, converter, currentLocation); + createBodyOfOp(orderedOp, converter, currentLocation, + eval); } else if (blockDirective.v == llvm::omp::OMPD_task) { auto taskOp = firOpBuilder.create( currentLocation, ifClauseOperand, finalClauseOperand, untiedAttr, mergeableAttr, /*in_reduction_vars=*/ValueRange(), /*in_reductions=*/nullptr, priorityClauseOperand, allocateOperands, allocatorOperands); - createBodyOfOp(taskOp, converter, currentLocation, &opClauseList); + createBodyOfOp(taskOp, converter, currentLocation, eval, &opClauseList); } else { TODO(converter.getCurrentLocation(), "Unhandled block directive"); } @@ -644,7 +672,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, wsLoopOp.nowaitAttr(firOpBuilder.getUnitAttr()); } - createBodyOfOp(wsLoopOp, converter, currentLocation, + createBodyOfOp(wsLoopOp, converter, currentLocation, eval, &wsLoopOpClauseList, iv); } @@ -688,7 +716,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, firOpBuilder.getContext(), global.sym_name())); } }(); - createBodyOfOp(criticalOp, converter, currentLocation); + createBodyOfOp(criticalOp, converter, currentLocation, eval); } static void @@ -700,7 +728,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, auto currentLocation = converter.getCurrentLocation(); mlir::omp::SectionOp sectionOp = firOpBuilder.create(currentLocation); - createBodyOfOp(sectionOp, converter, currentLocation); + createBodyOfOp(sectionOp, converter, currentLocation, eval); } // TODO: Add support for reduction @@ -757,14 +785,15 @@ genOMP(Fortran::lower::AbstractConverter &converter, currentLocation, /*reduction_vars*/ ValueRange(), /*reductions=*/nullptr, allocateOperands, allocatorOperands, /*nowait=*/nullptr); - createBodyOfOp(sectionsOp, converter, currentLocation); + createBodyOfOp(sectionsOp, converter, currentLocation, eval); // Sections Construct } else if (dir == llvm::omp::Directive::OMPD_sections) { auto sectionsOp = firOpBuilder.create( currentLocation, reductionVars, /*reductions = */ nullptr, allocateOperands, allocatorOperands, noWaitClauseOperand); - createBodyOfOp(sectionsOp, converter, currentLocation); + createBodyOfOp(sectionsOp, converter, currentLocation, + eval); } } diff --git a/flang/test/Lower/OpenMP/omp-unstructured.f90 b/flang/test/Lower/OpenMP/omp-unstructured.f90 new file mode 100644 index 00000000000000..68f944b91abea7 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-unstructured.f90 @@ -0,0 +1,113 @@ +! Test unstructured code adjacent to and inside OpenMP constructs. + +! RUN: bbc %s -fopenmp -o "-" | FileCheck %s + +! CHECK-LABEL: func @_QPss1{{.*}} { +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb4, ^bb3 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb4: // 2 preds: ^bb1, ^bb2 +! CHECK: omp.master { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: omp.terminator +! CHECK: } +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: } +subroutine ss1(n) ! unstructured code followed by a structured OpenMP construct + do i = 1, 3 + if (i .eq. n) exit + print*, 'ss1-A', i + enddo + !$omp master + print*, 'ss1-B', i + !$omp end master + print* +end + +! CHECK-LABEL: func @_QPss2{{.*}} { +! CHECK: omp.master { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb4, ^bb3 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb4: // 2 preds: ^bb1, ^bb2 +! CHECK: omp.terminator +! CHECK: } +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: } +subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct + !$omp master + print*, 'ss2-A', n + do i = 1, 3 + if (i .eq. n) exit + print*, 'ss2-B', i + enddo + !$omp end master + print*, 'ss2-C', i + print* +end + +! CHECK-LABEL: func @_QPss3{{.*}} { +! CHECK: omp.parallel { +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb3 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: omp.wsloop {{.*}} { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb4, ^bb3 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb4: // 2 preds: ^bb1, ^bb2 +! CHECK: omp.yield +! CHECK: } +! CHECK: br ^bb1 +! CHECK: ^bb3: // pred: ^bb1 +! CHECK: omp.terminator +! CHECK: } +! CHECK: } +subroutine ss3(n) ! nested unstructured OpenMP constructs + !$omp parallel + do i = 1, 3 + !$omp do + do k = 1, 3 + print*, 'ss3-A', k + enddo + !$omp end do + !$omp do + do j = 1, 3 + do k = 1, 3 + if (k .eq. n) exit + print*, 'ss3-B', k + enddo + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QQmain +program p + call ss1(2) + call ss2(2) + call ss3(2) +end From 6f215ca680fdda6eff0114bbd9d26a3efd8b21be Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Sun, 22 May 2022 13:49:01 +0100 Subject: [PATCH 403/908] [SelectionDAG] Add support to widen ISD::STEP_VECTOR operations. Fixes: #55165 Differential Revision: https://reviews.llvm.org/D126168 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 1 + llvm/test/CodeGen/AArch64/sve-stepvector.ll | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ffb36ee155ecea..69974df8d121a3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3615,6 +3615,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_ScalarOp(N); diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll index 61aafb67499904..4b3cb9c038613f 100644 --- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -45,6 +45,20 @@ entry: ; ILLEGAL INTEGER TYPES +define @stepvector_nxv6i64() { +; CHECK-LABEL: stepvector_nxv6i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: incd z1.d +; CHECK-NEXT: incd z2.d, all, mul #2 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv6i64() + ret %0 +} + define @stepvector_nxv4i64() { ; CHECK-LABEL: stepvector_nxv4i64: ; CHECK: // %bb.0: // %entry @@ -73,6 +87,16 @@ entry: ret %0 } +define @stepvector_nxv3i32() { +; CHECK-LABEL: stepvector_nxv3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv3i32() + ret %0 +} + define @stepvector_nxv2i32() { ; CHECK-LABEL: stepvector_nxv2i32: ; CHECK: // %bb.0: // %entry @@ -422,8 +446,10 @@ declare @llvm.experimental.stepvector.nxv4i32() declare @llvm.experimental.stepvector.nxv8i16() declare @llvm.experimental.stepvector.nxv16i8() +declare @llvm.experimental.stepvector.nxv6i64() declare @llvm.experimental.stepvector.nxv4i64() declare @llvm.experimental.stepvector.nxv16i32() +declare @llvm.experimental.stepvector.nxv3i32() declare @llvm.experimental.stepvector.nxv2i32() declare @llvm.experimental.stepvector.nxv8i8() declare @llvm.experimental.stepvector.nxv4i16() From a95ecb20bca894cc1373a85360ad5939dd2c3ff9 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 24 May 2022 14:17:43 -0700 Subject: [PATCH 404/908] [RISCV] Hoist VSETVLI out of idiomatic fixed length vector loops This patch teaches the VSETVLI insertion pass to perform a very limited form of partial redundancy elimination. The motivating example comes from the fixed length vectorization of a simple loop such as: for (unsigned i = 0; i < a_len; i++) a[i] += b; Without this change, the core vector loop and preheader is as follows: .LBB0_3: # %vector.ph andi a1, a6, -8 addi a4, a0, 16 mv a5, a1 .LBB0_4: # %vector.body # =>This Inner Loop Header: Depth=1 addi a3, a4, -16 vsetivli zero, 4, e32, m1, ta, mu vle32.v v8, (a3) vle32.v v9, (a4) vadd.vx v8, v8, a2 vadd.vx v9, v9, a2 vse32.v v8, (a3) vse32.v v9, (a4) addi a5, a5, -8 addi a4, a4, 32 bnez a5, .LBB0_4 The key thing to note here is that, the execution of the vsetivli only needs to happen once. Since there's no tail folding happening here, the value of the vector configuration registers are invariant through the loop. After this patch, we hoist the configuration into the preheader and perform it once. .LBB0_3: # %vector.ph andi a1, a6, -8 vsetivli zero, 4, e32, m1, ta, mu addi a4, a0, 16 mv a5, a1 .LBB0_4: # %vector.body # =>This Inner Loop Header: Depth=1 addi a3, a4, -16 vle32.v v8, (a3) vle32.v v9, (a4) vadd.vx v8, v8, a2 vadd.vx v9, v9, a2 vse32.v v8, (a3) vse32.v v9, (a4) addi a5, a5, -8 addi a4, a4, 32 bnez a5, .LBB0_4 Differential Revision: https://reviews.llvm.org/D124869 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 101 ++++++++++++++++++ .../rvv/fixed-vector-strided-load-store.ll | 8 +- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 82 +++++++------- 3 files changed, 146 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 58ab7f456a9fdc..7dd8fc40432da3 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -101,6 +101,10 @@ class VSETVLIInfo { assert(hasAVLImm()); return AVLImm; } + + unsigned getSEW() const { return SEW; } + RISCVII::VLMUL getVLMUL() const { return VLMul; } + bool hasZeroAVL() const { if (hasAVLImm()) return getAVLImm() == 0; @@ -458,6 +462,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); void doLocalPrepass(MachineBasicBlock &MBB); + void doPRE(MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -1278,6 +1283,98 @@ void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) { } } +/// Return true if the VL value configured must be equal to the requested one. +static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { + if (!Info.hasAVLImm()) + // TODO: Could allow VLMAX (e.g. X0), and possibly other registers + // by looking at the associated vreg def placement. + return false; + + if (RISCVII::LMUL_1 != Info.getVLMUL()) + // TODO: Generalize the code below to account for LMUL + return false; + + unsigned AVL = Info.getAVLImm(); + unsigned SEW = Info.getSEW(); + unsigned AVLInBits = AVL * SEW; + return ST.getRealMinVLen() >= AVLInBits; +} + +/// Perform simple partial redundancy elimination of the VSETVLI instructions +/// we're about to insert by looking for cases where we can PRE from the +/// beginning of one block to the end of one of its predecessors. Specifically, +/// this is geared to catch the common case of a fixed length vsetvl in a single +/// block loop when it could execute once in the preheader instead. +void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); + const RISCVSubtarget &ST = MF.getSubtarget(); + + if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) + return; + + MachineBasicBlock *UnavailablePred = nullptr; + VSETVLIInfo AvailableInfo; + for (MachineBasicBlock *P : MBB.predecessors()) { + const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; + if (PredInfo.isUnknown()) { + if (UnavailablePred) + return; + UnavailablePred = P; + } else if (!AvailableInfo.isValid()) { + AvailableInfo = PredInfo; + } else if (AvailableInfo != PredInfo) { + return; + } + } + + // unreachable, single pred, or full redundancy. Note that FRE + // is handled by phase 3. + if (!UnavailablePred || !AvailableInfo.isValid()) + return; + + // critical edge - TODO: consider splitting? + if (UnavailablePred->succ_size() != 1) + return; + + // If VL can be less than AVL, then we can't reduce the frequency of exec. + if (!hasFixedResult(AvailableInfo, ST)) + return; + + // Does it actually let us remove an implicit transition in MBB? + bool Found = false; + for (auto &MI : MBB) { + if (isVectorConfigInstr(MI)) + return; + + const uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVII::hasSEWOp(TSFlags)) { + if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) + return; + Found = true; + break; + } + } + if (!Found) + return; + + // Finally, update both data flow state and insert the actual vsetvli. + // Doing both keeps the code in sync with the dataflow results, which + // is critical for correctness of phase 3. + auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; + LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " + << UnavailablePred->getName() << " with state " + << AvailableInfo << "\n"); + BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; + BlockInfo[MBB.getNumber()].Pred = AvailableInfo; + + // Note there's an implicit assumption here that terminators never use + // or modify VL or VTYPE. Also, fallthrough will return end(). + auto InsertPt = UnavailablePred->getFirstInstrTerminator(); + insertVSETVLI(*UnavailablePred, InsertPt, + UnavailablePred->findDebugLoc(InsertPt), + AvailableInfo, OldInfo); +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. const RISCVSubtarget &ST = MF.getSubtarget(); @@ -1332,6 +1429,10 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { computeIncomingVLVTYPE(MBB); } + // Perform partial redundancy elimination of vsetvli transitions. + for (MachineBasicBlock &MBB : MF) + doPRE(MBB); + // Phase 3 - add any vsetvli instructions needed in the block. Use the // Phase 2 information to avoid adding vsetvlis before the first vector // instruction in the block if the VL/VTYPE is satisfied by its diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll index c832b8ecade467..22eef61cafbe21 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll @@ -590,10 +590,10 @@ define void @struct_gather(i32* noalias nocapture %A, %struct.foo* noalias nocap ; CHECK-ASM-NEXT: addi a1, a1, 132 ; CHECK-ASM-NEXT: li a2, 1024 ; CHECK-ASM-NEXT: li a3, 16 +; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: .LBB8_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: addi a4, a1, -128 -; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: vlse32.v v8, (a4), a3 ; CHECK-ASM-NEXT: vlse32.v v9, (a1), a3 ; CHECK-ASM-NEXT: vle32.v v10, (a0) @@ -706,9 +706,9 @@ define void @gather_unroll(i32* noalias nocapture %A, i32* noalias nocapture rea ; CHECK-ASM-NEXT: li a2, 256 ; CHECK-ASM-NEXT: li a3, 64 ; CHECK-ASM-NEXT: li a4, 16 +; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: .LBB9_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-ASM-NEXT: vlse32.v v8, (a1), a3 ; CHECK-ASM-NEXT: vlse32.v v9, (a0), a4 ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 @@ -818,8 +818,8 @@ define void @gather_of_pointers(i32** noalias nocapture %0, i32** noalias nocapt ; CHECK-ASM: # %bb.0: ; CHECK-ASM-NEXT: li a2, 1024 ; CHECK-ASM-NEXT: li a3, 40 -; CHECK-ASM-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-ASM-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: vlse64.v v8, (a1), a3 ; CHECK-ASM-NEXT: addi a4, a1, 80 ; CHECK-ASM-NEXT: vlse64.v v9, (a4), a3 @@ -891,8 +891,8 @@ define void @scatter_of_pointers(i32** noalias nocapture %0, i32** noalias nocap ; CHECK-ASM: # %bb.0: ; CHECK-ASM-NEXT: li a2, 1024 ; CHECK-ASM-NEXT: li a3, 40 -; CHECK-ASM-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-ASM-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: vle64.v v8, (a1) ; CHECK-ASM-NEXT: addi a4, a1, 16 ; CHECK-ASM-NEXT: vle64.v v9, (a4) diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index c6585d01ee4d4b..a8063d98bf7a5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -6,9 +6,9 @@ define void @sink_splat_mul(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_mul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -42,9 +42,9 @@ define void @sink_splat_add(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_add: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB1_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -78,9 +78,9 @@ define void @sink_splat_sub(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB2_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -114,9 +114,9 @@ define void @sink_splat_rsub(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_rsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB3_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -150,9 +150,9 @@ define void @sink_splat_and(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_and: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB4_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -186,9 +186,9 @@ define void @sink_splat_or(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_or: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB5_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -222,9 +222,9 @@ define void @sink_splat_xor(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_xor: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB6_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -902,9 +902,9 @@ define void @sink_splat_shl(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_shl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB14_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsll.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -938,9 +938,9 @@ define void @sink_splat_lshr(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_lshr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB15_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsrl.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -974,9 +974,9 @@ define void @sink_splat_ashr(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_ashr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB16_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsra.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1286,9 +1286,9 @@ define void @sink_splat_fmul(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fmul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB20_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1322,9 +1322,9 @@ define void @sink_splat_fdiv(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB21_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1358,9 +1358,9 @@ define void @sink_splat_frdiv(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB22_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1394,9 +1394,9 @@ define void @sink_splat_fadd(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fadd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB23_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1430,9 +1430,9 @@ define void @sink_splat_fsub(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB24_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -1466,9 +1466,9 @@ define void @sink_splat_frsub(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB25_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2048,9 +2048,9 @@ define void @sink_splat_fma(float* noalias nocapture %a, float* nocapture readon ; CHECK-LABEL: sink_splat_fma: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB32_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 @@ -2089,9 +2089,9 @@ define void @sink_splat_fma_commute(float* noalias nocapture %a, float* nocaptur ; CHECK-LABEL: sink_splat_fma_commute: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB33_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 @@ -2415,9 +2415,9 @@ define void @sink_splat_udiv(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_udiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB38_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vdivu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2451,9 +2451,9 @@ define void @sink_splat_sdiv(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB39_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vdiv.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2487,9 +2487,9 @@ define void @sink_splat_urem(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_urem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB40_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vremu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2523,9 +2523,9 @@ define void @sink_splat_srem(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_srem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB41_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrem.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) @@ -2929,9 +2929,9 @@ define void @sink_splat_vp_mul(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i ; CHECK-LABEL: sink_splat_vp_mul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB46_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t @@ -2969,9 +2969,9 @@ define void @sink_splat_vp_add(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i ; CHECK-LABEL: sink_splat_vp_add: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB47_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t @@ -3051,9 +3051,9 @@ define void @sink_splat_vp_sub(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i ; CHECK-LABEL: sink_splat_vp_sub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB49_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t @@ -3089,9 +3089,9 @@ define void @sink_splat_vp_rsub(i32* nocapture %a, i32 signext %x, <4 x i1> %m, ; CHECK-LABEL: sink_splat_vp_rsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB50_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vrsub.vx v8, v8, a1, v0.t @@ -3129,9 +3129,9 @@ define void @sink_splat_vp_shl(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i ; CHECK-LABEL: sink_splat_vp_shl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB51_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsll.vx v8, v8, a1, v0.t @@ -3169,9 +3169,9 @@ define void @sink_splat_vp_lshr(i32* nocapture %a, i32 signext %x, <4 x i1> %m, ; CHECK-LABEL: sink_splat_vp_lshr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB52_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t @@ -3209,9 +3209,9 @@ define void @sink_splat_vp_ashr(i32* nocapture %a, i32 signext %x, <4 x i1> %m, ; CHECK-LABEL: sink_splat_vp_ashr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB53_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vsra.vx v8, v8, a1, v0.t @@ -3249,9 +3249,9 @@ define void @sink_splat_vp_fmul(float* nocapture %a, float %x, <4 x i1> %m, i32 ; CHECK-LABEL: sink_splat_vp_fmul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB54_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t @@ -3289,9 +3289,9 @@ define void @sink_splat_vp_fdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 ; CHECK-LABEL: sink_splat_vp_fdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB55_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t @@ -3327,9 +3327,9 @@ define void @sink_splat_vp_frdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 ; CHECK-LABEL: sink_splat_vp_frdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB56_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t @@ -3367,9 +3367,9 @@ define void @sink_splat_vp_fadd(float* nocapture %a, float %x, <4 x i1> %m, i32 ; CHECK-LABEL: sink_splat_vp_fadd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB57_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t @@ -3407,9 +3407,9 @@ define void @sink_splat_vp_fsub(float* nocapture %a, float %x, <4 x i1> %m, i32 ; CHECK-LABEL: sink_splat_vp_fsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB58_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t @@ -3447,9 +3447,9 @@ define void @sink_splat_vp_frsub(float* nocapture %a, float %x, <4 x i1> %m, i32 ; CHECK-LABEL: sink_splat_vp_frsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB59_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t @@ -3487,9 +3487,9 @@ define void @sink_splat_vp_udiv(i32* nocapture %a, i32 signext %x, <4 x i1> %m, ; CHECK-LABEL: sink_splat_vp_udiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB60_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t @@ -3527,9 +3527,9 @@ define void @sink_splat_vp_sdiv(i32* nocapture %a, i32 signext %x, <4 x i1> %m, ; CHECK-LABEL: sink_splat_vp_sdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB61_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t @@ -3567,9 +3567,9 @@ define void @sink_splat_vp_urem(i32* nocapture %a, i32 signext %x, <4 x i1> %m, ; CHECK-LABEL: sink_splat_vp_urem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB62_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t @@ -3607,9 +3607,9 @@ define void @sink_splat_vp_srem(i32* nocapture %a, i32 signext %x, <4 x i1> %m, ; CHECK-LABEL: sink_splat_vp_srem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB63_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t @@ -3688,9 +3688,9 @@ define void @sink_splat_vp_fma(float* noalias nocapture %a, float* nocapture rea ; CHECK-LABEL: sink_splat_vp_fma: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB65_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu @@ -3731,9 +3731,9 @@ define void @sink_splat_vp_fma_commute(float* noalias nocapture %a, float* nocap ; CHECK-LABEL: sink_splat_vp_fma_commute: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: .LBB66_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu From fb948572e03321b1470a2e47fbf685226f8f5f18 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 24 May 2022 14:55:42 -0700 Subject: [PATCH 405/908] [riscv] Use getFirstInstrTerminator [nfc] --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 7dd8fc40432da3..330fc8e1bca0a1 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1167,7 +1167,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { CurInfo != ExitInfo) { // Note there's an implicit assumption here that terminators never use // or modify VL or VTYPE. Also, fallthrough will return end(). - auto InsertPt = MBB.getFirstTerminator().getInstrIterator(); + auto InsertPt = MBB.getFirstInstrTerminator(); insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, CurInfo); CurInfo = ExitInfo; } From d2ee2c9c8d34bc915a3d7c00951831eb24708f6a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 May 2022 14:54:57 -0700 Subject: [PATCH 406/908] [RISCV] Add an operand kind to the opcode/imm returned from RISCVMatInt. Instead of matching opcodes to know the format to emit, use an enum value that we can get from the RISCVMatInt::Inst class. Change the consumers to use fully covered switches so that we get a compiler warning if a new kind is added. With the opcode checks it was easier to forget to update one of the 3 consumers. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D126317 --- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 23 +++++++++-------- .../Target/RISCV/MCTargetDesc/RISCVMatInt.cpp | 25 +++++++++++++++++++ .../Target/RISCV/MCTargetDesc/RISCVMatInt.h | 10 ++++++++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 19 ++++++++------ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 19 ++++++++------ 5 files changed, 72 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index ffa30b27756f94..08f0d062d215ae 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2318,23 +2318,26 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value, MCRegister SrcReg = RISCV::X0; for (RISCVMatInt::Inst &Inst : Seq) { - if (Inst.Opc == RISCV::LUI) { + switch (Inst.getOpndKind()) { + case RISCVMatInt::Imm: + emitToStreamer(Out, + MCInstBuilder(Inst.Opc).addReg(DestReg).addImm(Inst.Imm)); + break; + case RISCVMatInt::RegX0: emitToStreamer( - Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm)); - } else if (Inst.Opc == RISCV::ADD_UW) { - emitToStreamer(Out, MCInstBuilder(RISCV::ADD_UW) - .addReg(DestReg) - .addReg(SrcReg) - .addReg(RISCV::X0)); - } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || - Inst.Opc == RISCV::SH3ADD) { + Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addReg( + RISCV::X0)); + break; + case RISCVMatInt::RegReg: emitToStreamer( Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addReg( SrcReg)); - } else { + break; + case RISCVMatInt::RegImm: emitToStreamer( Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm( Inst.Imm)); + break; } // Only the first instruction has X0 as its source. diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index ea74dde6f715a0..d19da6bd36646a 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -394,5 +394,30 @@ int getIntMatCost(const APInt &Val, unsigned Size, } return std::max(1, Cost); } + +OpndKind Inst::getOpndKind() const { + switch (Opc) { + default: + llvm_unreachable("Unexpected opcode!"); + case RISCV::LUI: + return RISCVMatInt::Imm; + case RISCV::ADD_UW: + return RISCVMatInt::RegX0; + case RISCV::SH1ADD: + case RISCV::SH2ADD: + case RISCV::SH3ADD: + return RISCVMatInt::RegReg; + case RISCV::ADDI: + case RISCV::ADDIW: + case RISCV::SLLI: + case RISCV::SRLI: + case RISCV::SLLI_UW: + case RISCV::RORI: + case RISCV::BSETI: + case RISCV::BCLRI: + return RISCVMatInt::RegImm; + } +} + } // namespace RISCVMatInt } // namespace llvm diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h index 6a8e0c64000124..90c29f01c43d84 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -17,11 +17,21 @@ namespace llvm { class APInt; namespace RISCVMatInt { + +enum OpndKind { + RegImm, // ADDI/ADDIW/SLLI/SRLI/BSETI/BCLRI + Imm, // LUI + RegReg, // SH1ADD/SH2ADD/SH3ADD + RegX0, // ADD_UW +}; + struct Inst { unsigned Opc; int64_t Imm; Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {} + + OpndKind getOpndKind() const; }; using InstSeq = SmallVector; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index b45de2ff6f818d..7d6e27f8e7808a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -188,16 +188,21 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); for (RISCVMatInt::Inst &Inst : Seq) { SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); - if (Inst.Opc == RISCV::LUI) - Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); - else if (Inst.Opc == RISCV::ADD_UW) - Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg, + switch (Inst.getOpndKind()) { + case RISCVMatInt::Imm: + Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SDImm); + break; + case RISCVMatInt::RegX0: + Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, CurDAG->getRegister(RISCV::X0, XLenVT)); - else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || - Inst.Opc == RISCV::SH3ADD) + break; + case RISCVMatInt::RegReg: Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); - else + break; + case RISCVMatInt::RegImm: Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); + break; + } // Only the first instruction has X0 as its source. SrcReg = SDValue(Result, 0); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index b3e6cd8a915a70..3e5948f8ff169e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -651,27 +651,32 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, assert(!Seq.empty()); for (RISCVMatInt::Inst &Inst : Seq) { - if (Inst.Opc == RISCV::LUI) { - BuildMI(MBB, MBBI, DL, get(RISCV::LUI), DstReg) + switch (Inst.getOpndKind()) { + case RISCVMatInt::Imm: + BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) .addImm(Inst.Imm) .setMIFlag(Flag); - } else if (Inst.Opc == RISCV::ADD_UW) { - BuildMI(MBB, MBBI, DL, get(RISCV::ADD_UW), DstReg) + break; + case RISCVMatInt::RegX0: + BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) .addReg(SrcReg, RegState::Kill) .addReg(RISCV::X0) .setMIFlag(Flag); - } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || - Inst.Opc == RISCV::SH3ADD) { + break; + case RISCVMatInt::RegReg: BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) .addReg(SrcReg, RegState::Kill) .addReg(SrcReg, RegState::Kill) .setMIFlag(Flag); - } else { + break; + case RISCVMatInt::RegImm: BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) .addReg(SrcReg, RegState::Kill) .addImm(Inst.Imm) .setMIFlag(Flag); + break; } + // Only the first instruction has X0 as its source. SrcReg = DstReg; } From 5799f843a22029bd51d45edcd773e3c8662a0a08 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Tue, 24 May 2022 11:49:47 -0700 Subject: [PATCH 407/908] [mlir][sparse] add new complex ops to reduction recognition Reviewed By: bixia Differential Revision: https://reviews.llvm.org/D126318 --- mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 1a13eecb846eab..6453cf99e09fe2 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -378,11 +378,14 @@ static vector::CombiningKind getCombiningKind(Reduction kind) { static Reduction getReduction(Kind kind) { switch (kind) { case Kind::kAddF: + case Kind::kAddC: case Kind::kAddI: case Kind::kSubF: + case Kind::kSubC: case Kind::kSubI: return kSum; case Kind::kMulF: + case Kind::kMulC: case Kind::kMulI: return kProduct; case Kind::kAndI: From 948d931323a13dfc68430814a44b9075a59e2310 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 24 May 2022 14:58:09 -0700 Subject: [PATCH 408/908] [RISCV] Ensure the forwarded AVL register is alive When the AVL value does not fit in 5 bits, the register in which this value is stored may be dead when we want to forward it. This patch ensure the kill flags on the register are cleared before forwarding. Patch by: loralb Differential Revision: https://reviews.llvm.org/D125971 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 4 +++- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 330fc8e1bca0a1..2a1cdeca4af090 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1239,8 +1239,10 @@ void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) { MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (CurInfo.hasAVLImm()) VLOp.ChangeToImmediate(CurInfo.getAVLImm()); - else + else { + MRI->clearKillFlags(CurInfo.getAVLReg()); VLOp.ChangeToRegister(CurInfo.getAVLReg(), /*IsDef*/ false); + } CurInfo = computeInfoForInstr(MI, TSFlags, MRI); continue; } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index 4e0364e41eb74b..e625e446eea040 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -521,6 +521,20 @@ declare { , i64 } @llvm.riscv.vleff.nxv1i64.i64( declare @llvm.riscv.vmseq.nxv1i64.i64.i64( , i64, i64) +; Ensure AVL register is alive when forwarding an AVL immediate that does not fit in 5 bits +define @avl_forward5(* %addr) { +; CHECK-LABEL: avl_forward5: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: ret + %gvl = tail call i64 @llvm.riscv.vsetvli.i64(i64 32, i64 0, i64 2) + %ret = tail call @llvm.riscv.vle.nxv2i32.i64( undef, * %addr, i64 %gvl) + ret %ret +} + declare @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( , , From 239094cdee8e3f5de4a6757f43b94cc21f9a808f Mon Sep 17 00:00:00 2001 From: Mike Rice Date: Tue, 24 May 2022 11:48:42 -0700 Subject: [PATCH 409/908] [OpenMP] Add codegen for 'omp_all_memory' reserved locator. This creates an entry with address=nullptr and flag=0x80. When an 'omp_all_memory' entry is specified any other 'out' or 'inout' entries are not needed and are not passed to the runtime. Differential Revision: https://reviews.llvm.org/D126321 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 21 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 53 +- clang/test/OpenMP/depobj_codegen.cpp | 4 +- clang/test/OpenMP/interop_irbuilder.cpp | 48 +- .../target_enter_data_depend_codegen.cpp | 21 +- .../target_exit_data_depend_codegen.cpp | 21 +- .../OpenMP/target_update_depend_codegen.cpp | 21 +- clang/test/OpenMP/task_codegen.c | 6 +- clang/test/OpenMP/task_codegen.cpp | 1717 +++++++++++++---- clang/test/OpenMP/task_if_codegen.cpp | 144 +- 10 files changed, 1502 insertions(+), 554 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c1982859c06099..89e26b10dfae5e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4507,7 +4507,8 @@ enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4, - DepInOutSet = 0x8 + DepInOutSet = 0x8, + DepOmpAllMem = 0x80, }; /// Fields ids in kmp_depend_info record. enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; @@ -4531,10 +4532,12 @@ static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { case OMPC_DEPEND_inoutset: DepKind = DepInOutSet; break; + case OMPC_DEPEND_outallmemory: + DepKind = DepOmpAllMem; + break; case OMPC_DEPEND_source: case OMPC_DEPEND_sink: case OMPC_DEPEND_depobj: - case OMPC_DEPEND_outallmemory: case OMPC_DEPEND_inoutallmemory: case OMPC_DEPEND_unknown: llvm_unreachable("Unknown task dependence type"); @@ -4602,12 +4605,21 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, for (const Expr *E : Data.DepExprs) { llvm::Value *Addr; llvm::Value *Size; - std::tie(Addr, Size) = getPointerAndSize(CGF, E); + + // The expression will be a nullptr in the 'omp_all_memory' case. + if (E) { + std::tie(Addr, Size) = getPointerAndSize(CGF, E); + Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); + } else { + Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); + Size = llvm::ConstantInt::get(CGF.SizeTy, 0); + } LValue Base; if (unsigned *P = Pos.dyn_cast()) { Base = CGF.MakeAddrLValue( CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); } else { + assert(E && "Expected a non-null expression"); LValue &PosLVal = *Pos.get(); llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); Base = CGF.MakeAddrLValue( @@ -4616,8 +4628,7 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, // deps[i].base_addr = &; LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); - CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), - BaseAddrLVal); + CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); // deps[i].len = sizeof(); LValue LenLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Len)); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 5e75e8884bb493..61e3661e59be0d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4481,6 +4481,40 @@ class CheckVarsEscapingUntiedTaskDeclContext final }; } // anonymous namespace +static void buildDependences(const OMPExecutableDirective &S, + OMPTaskDataTy &Data) { + + // First look for 'omp_all_memory' and add this first. + bool OmpAllMemory = false; + if (llvm::any_of( + S.getClausesOfKind(), [](const OMPDependClause *C) { + return C->getDependencyKind() == OMPC_DEPEND_outallmemory || + C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; + })) { + OmpAllMemory = true; + // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are + // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to + // simplify. + OMPTaskDataTy::DependData &DD = + Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory, + /*IteratorExpr=*/nullptr); + // Add a nullptr Expr to simplify the codegen in emitDependData. + DD.DepExprs.push_back(nullptr); + } + // Add remaining dependences skipping any 'out' or 'inout' if they are + // overridden by 'omp_all_memory'. + for (const auto *C : S.getClausesOfKind()) { + OpenMPDependClauseKind Kind = C->getDependencyKind(); + if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) + continue; + if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) + continue; + OMPTaskDataTy::DependData &DD = + Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); + DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); + } +} + void CodeGenFunction::EmitOMPTaskBasedDirective( const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, @@ -4576,11 +4610,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( *this, S.getBeginLoc(), LHSs, RHSs, Data); // Build list of dependences. - for (const auto *C : S.getClausesOfKind()) { - OMPTaskDataTy::DependData &DD = - Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); - DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); - } + buildDependences(S, Data); // Get list of local vars for untied tasks. if (!Data.Tied) { CheckVarsEscapingUntiedTaskDeclContext Checker; @@ -4950,12 +4980,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } } (void)TargetScope.Privatize(); - // Build list of dependences. - for (const auto *C : S.getClausesOfKind()) { - OMPTaskDataTy::DependData &DD = - Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); - DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); - } + buildDependences(S, Data); auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. @@ -5070,11 +5095,7 @@ void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { OMPTaskDataTy Data; // Build list of dependences - for (const auto *C : S.getClausesOfKind()) { - OMPTaskDataTy::DependData &DD = - Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); - DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); - } + buildDependences(S, Data); CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); } diff --git a/clang/test/OpenMP/depobj_codegen.cpp b/clang/test/OpenMP/depobj_codegen.cpp index cace0db361c936..18e832f14adb52 100644 --- a/clang/test/OpenMP/depobj_codegen.cpp +++ b/clang/test/OpenMP/depobj_codegen.cpp @@ -160,12 +160,12 @@ int main(int argc, char **argv) { // &p[0] // CHECK: [[P:%.+]] = load i8*, i8** [[P_ADDR]], align 8 // CHECK: [[P0:%.+]] = getelementptr inbounds i8, i8* [[P]], i64 0 +// CHECK: [[P0_ADDR:%.+]] = ptrtoint i8* [[P0]] to i64 // dep[ITERATOR_COUNTER].base_addr = &p[0]; // CHECK: [[ITERATOR_COUNTER:%.+]] = load i64, i64* [[ITERATOR_COUNTER_ADDR]], align 8 // CHECK: [[DEP_IC:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_ADDR]], i64 [[ITERATOR_COUNTER]] // CHECK: [[DEP_IC_BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_IC]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[P0_ADDR:%.+]] = ptrtoint i8* [[P0]] to i64 // CHECK: store i64 [[P0_ADDR]], i64* [[DEP_IC_BASE_ADDR]], align 8 // dep[ITERATOR_COUNTER].size = sizeof(p[0]); @@ -210,9 +210,9 @@ int main(int argc, char **argv) { // CHECK: [[SHAPE_ADDR:%.+]] = load i32*, i32** [[ARGV_ADDR:%.+]], align 8 // CHECK: [[SZ1:%.+]] = mul nuw i64 12, %{{.+}} // CHECK: [[SZ:%.+]] = mul nuw i64 [[SZ1]], 4 +// CHECK: [[SHAPE:%.+]] = ptrtoint i32* [[SHAPE_ADDR]] to i64 // CHECK: [[BASE_ADDR:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_ADDR]], i{{.+}} 2 // CHECK: [[ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[SHAPE:%.+]] = ptrtoint i32* [[SHAPE_ADDR]] to i64 // CHECK: store i64 [[SHAPE]], i64* [[ADDR]], align 8 // CHECK: [[SZ_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 1 // CHECK: store i64 [[SZ]], i64* [[SZ_ADDR]], align 8 diff --git a/clang/test/OpenMP/interop_irbuilder.cpp b/clang/test/OpenMP/interop_irbuilder.cpp index 30eef6f4090971..bd18caa21423cd 100644 --- a/clang/test/OpenMP/interop_irbuilder.cpp +++ b/clang/test/OpenMP/interop_irbuilder.cpp @@ -51,42 +51,42 @@ void test1() { // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: call void @__tgt_interop_init(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i8** [[INTEROP]], i64 2, i32 [[TMP1]], i32 0, i8* null, i32 0) // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP3]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[D0]] to i64 -// CHECK-NEXT: store i64 [[TMP5]], i64* [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[D0]] to i64 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP5]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP4]], i32 0, i32 1 // CHECK-NEXT: store i64 4, i64* [[TMP6]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP4]], i32 0, i32 2 // CHECK-NEXT: store i8 1, i8* [[TMP7]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP2]], i64 1 -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP8]], i32 0, i32 0 -// CHECK-NEXT: [[TMP10:%.*]] = ptrtoint i32* [[D1]] to i64 -// CHECK-NEXT: store i64 [[TMP10]], i64* [[TMP9]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP8]], i32 0, i32 1 +// CHECK-NEXT: [[TMP8:%.*]] = ptrtoint i32* [[D1]] to i64 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP2]], i64 1 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP8]], i64* [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP9]], i32 0, i32 1 // CHECK-NEXT: store i64 4, i64* [[TMP11]], align 8 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP8]], i32 0, i32 2 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP9]], i32 0, i32 2 // CHECK-NEXT: store i8 1, i8* [[TMP12]], align 8 // CHECK-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK-NEXT: [[TMP13:%.*]] = bitcast %struct.kmp_depend_info* [[TMP2]] to i8* // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: call void @__tgt_interop_use(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4]], i8** [[INTEROP]], i32 -1, i32 2, i8* [[TMP13]], i32 1) // CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP14]], i64 0 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 0, i32 0 -// CHECK-NEXT: [[TMP17:%.*]] = ptrtoint i32* [[D0]] to i64 -// CHECK-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: [[TMP15:%.*]] = ptrtoint i32* [[D0]] to i64 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP14]], i64 0 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP16]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP15]], i64* [[TMP17]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP16]], i32 0, i32 1 // CHECK-NEXT: store i64 4, i64* [[TMP18]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP16]], i32 0, i32 2 // CHECK-NEXT: store i8 1, i8* [[TMP19]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP14]], i64 1 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 0 -// CHECK-NEXT: [[TMP22:%.*]] = ptrtoint i32* [[D1]] to i64 -// CHECK-NEXT: store i64 [[TMP22]], i64* [[TMP21]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 1 +// CHECK-NEXT: [[TMP20:%.*]] = ptrtoint i32* [[D1]] to i64 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP14]], i64 1 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP21]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP20]], i64* [[TMP22]], align 8 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP21]], i32 0, i32 1 // CHECK-NEXT: store i64 4, i64* [[TMP23]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 2 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP21]], i32 0, i32 2 // CHECK-NEXT: store i8 1, i8* [[TMP24]], align 8 // CHECK-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR6]], align 8 // CHECK-NEXT: [[TMP25:%.*]] = bitcast %struct.kmp_depend_info* [[TMP14]] to i8* diff --git a/clang/test/OpenMP/target_enter_data_depend_codegen.cpp b/clang/test/OpenMP/target_enter_data_depend_codegen.cpp index 3111ce01495e7a..f5975402dd45ae 100644 --- a/clang/test/OpenMP/target_enter_data_depend_codegen.cpp +++ b/clang/test/OpenMP/target_enter_data_depend_codegen.cpp @@ -94,9 +94,9 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], @@ -160,17 +160,17 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 1 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], @@ -241,25 +241,25 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 1 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 2 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], @@ -329,25 +329,26 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false) + // CK1: %{{.+}} = sub nuw + // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 1 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 2 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], @@ -360,9 +361,9 @@ void foo(int arg) { // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 4 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], diff --git a/clang/test/OpenMP/target_exit_data_depend_codegen.cpp b/clang/test/OpenMP/target_exit_data_depend_codegen.cpp index 8b686fc6495422..d87fdd7f781ef4 100644 --- a/clang/test/OpenMP/target_exit_data_depend_codegen.cpp +++ b/clang/test/OpenMP/target_exit_data_depend_codegen.cpp @@ -94,9 +94,9 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], @@ -160,17 +160,17 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 1 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], @@ -241,25 +241,25 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 1 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 2 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], @@ -329,25 +329,26 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false) + // CK1: %{{.+}} = sub nuw + // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 1 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 2 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], @@ -360,9 +361,9 @@ void foo(int arg) { // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 4 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], diff --git a/clang/test/OpenMP/target_update_depend_codegen.cpp b/clang/test/OpenMP/target_update_depend_codegen.cpp index cb16e663c68069..1e62fd9189949c 100644 --- a/clang/test/OpenMP/target_update_depend_codegen.cpp +++ b/clang/test/OpenMP/target_update_depend_codegen.cpp @@ -94,9 +94,9 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], @@ -160,17 +160,17 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 1 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], @@ -241,25 +241,25 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 1 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 2 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], @@ -329,25 +329,26 @@ void foo(int arg) { // CK1-32: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* // CK1-32: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false) + // CK1: %{{.+}} = sub nuw + // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP:%.+]], i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 1 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 2 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], @@ -360,9 +361,9 @@ void foo(int arg) { // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: [[DEP:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[MAIN_DEP]], i[[sz]] 4 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 - // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], diff --git a/clang/test/OpenMP/task_codegen.c b/clang/test/OpenMP/task_codegen.c index 8d9f97c08cdce8..0302e4351e8c6c 100644 --- a/clang/test/OpenMP/task_codegen.c +++ b/clang/test/OpenMP/task_codegen.c @@ -61,9 +61,9 @@ int main(void) { // CHECK: store i8* [[SV]], i8** [[SV_ADDR:%.+]], align 8 // CHECK: [[VLA:%.+]] = alloca %struct.kmp_depend_info, i64 [[SIZE]], // CHECK: [[SIZE32:%.+]] = trunc i64 [[SIZE]] to i32 + // CHECK: [[A_ADDR_CAST:%.+]] = ptrtoint i32* [[A_ADDR]] to i64 // CHECK: [[VLA0:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA]], i64 0 // CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA0]], i{{.+}} 0, i{{.+}} 0 - // CHECK: [[A_ADDR_CAST:%.+]] = ptrtoint i32* [[A_ADDR]] to i64 // CHECK: store i64 [[A_ADDR_CAST]], i64* [[BASE_ADDR]], align 16 // CHECK: [[SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA0]], i{{.+}} 0, i{{.+}} 1 // CHECK: store i64 4, i64* [[SIZE_ADDR]], align 8 @@ -75,9 +75,9 @@ int main(void) { // CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]], align 4 // CHECK: [[A_CAST:%.+]] = sext i32 [[A]] to i64 // CHECK: [[SZ:%.+]] = mul nuw i64 [[SZ1]], [[A_CAST]] + // CHECK: [[B_ADDR_CAST:%.+]] = ptrtoint i32** %{{.+}} to i64 // CHECK: [[VLA1:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA]], i64 1 // CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA1]], i{{.+}} 0, i{{.+}} 0 - // CHECK: [[B_ADDR_CAST:%.+]] = ptrtoint i32** %{{.+}} to i64 // CHECK: store i64 [[B_ADDR_CAST]], i64* [[BASE_ADDR]], align 8 // CHECK: [[SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[VLA1]], i{{.+}} 0, i{{.+}} 1 // CHECK: store i64 [[SZ]], i64* [[SIZE_ADDR]], align 8 @@ -198,12 +198,12 @@ for (int i = 0; i < 10; ++i) // CHECK: [[I:%.+]] = load i32, i32* [[I_ADDR]], // CHECK: [[IDX:%.+]] = sext i32 [[I]] to i64 // CHECK: [[AKI_ADDR:%.+]] = getelementptr inbounds i32, i32* [[AK]], i64 [[IDX]] + // CHECK: [[AKI_INT:%.+]] = ptrtoint i32* [[AKI_ADDR]] to i64 // DEPS[DEP_COUNTER].base_addr = &a[k][i]; // CHECK: [[DEP_COUNTER:%.+]] = load i64, i64* [[DEP_COUNTER_ADDR]], // CHECK: [[DEPS_DC:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEPS]], i64 [[DEP_COUNTER]] // CHECK: [[DEPS_DC_BASE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEPS_DC]], i{{.+}} 0, i{{.+}} 0 - // CHECK: [[AKI_INT:%.+]] = ptrtoint i32* [[AKI_ADDR]] to i64 // CHECK: store i64 [[AKI_INT]], i64* [[DEPS_DC_BASE_ADDR]], // DEPS[DEP_COUNTER].size = sizeof(a[k][i]); diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index a7c525add0a00b..b2a122c71b88e2 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -187,6 +187,32 @@ void xxxx() { // Copy firstprivate value of `b`. #endif // UNTIEDRT +#ifdef OMP51 +void test_omp_all_memory() +{ + int a,b,c,d,e; +#pragma omp task depend(in:a) depend(out:omp_all_memory) + { + a = 13; + } +#pragma omp task depend(in:a) depend(inout:omp_all_memory) + { + a = 14; + } +#pragma omp task depend(in:a) depend(inout:b) depend(inout:omp_all_memory) + { + a = 15; + } +#pragma omp task depend(in:a) depend(inout:b) depend(inout:c,omp_all_memory,d) + { + a = 16; + } +#pragma omp task depend(out:omp_all_memory) + { + a = 17; + } +} +#endif // OMP51 #endif // CHECK1-LABEL: define {{[^@]+}}@main // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { @@ -269,21 +295,21 @@ void xxxx() { // CHECK1-NEXT: store i64 4, i64* [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP25]], i32 0, i32 2 // CHECK1-NEXT: store i8 1, i8* [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = ptrtoint i8* [[B]] to i64 -// CHECK1-NEXT: store i64 [[TMP31]], i64* [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint i8* [[B]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 1 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP29]], i64* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 1 // CHECK1-NEXT: store i64 1, i64* [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 2 // CHECK1-NEXT: store i8 1, i8* [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 2 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP36:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 -// CHECK1-NEXT: store i64 [[TMP36]], i64* [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP34]], i64* [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 1 // CHECK1-NEXT: store i64 8, i64* [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 2 // CHECK1-NEXT: store i8 1, i8* [[TMP38]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP39]] @@ -293,13 +319,13 @@ void xxxx() { // CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 // CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i32* [[TMP41]] to i64 // CHECK1-NEXT: [[TMP44:%.*]] = sub nuw i64 [[TMP43]], [[TMP42]] -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 3 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP47:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: store i64 [[TMP47]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 3 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 1 // CHECK1-NEXT: store i64 [[TMP44]], i64* [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 2 // CHECK1-NEXT: store i8 1, i8* [[TMP49]], align 8 // CHECK1-NEXT: store i64 4, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.kmp_depend_info* [[TMP24]] to i8* @@ -315,13 +341,13 @@ void xxxx() { // CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP58]], i32 0, i32 0 // CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 0 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP63:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 -// CHECK1-NEXT: store i64 [[TMP63]], i64* [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP61:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 0 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP61]], i64* [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 1 // CHECK1-NEXT: store i64 4, i64* [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 2 // CHECK1-NEXT: store i8 3, i8* [[TMP65]], align 8 // CHECK1-NEXT: [[TMP66:%.*]] = load i8, i8* [[B]], align 1 // CHECK1-NEXT: [[TMP67:%.*]] = sext i8 [[TMP66]] to i64 @@ -337,13 +363,13 @@ void xxxx() { // CHECK1-NEXT: [[TMP73:%.*]] = ptrtoint i32* [[ARRAYIDX8]] to i64 // CHECK1-NEXT: [[TMP74:%.*]] = ptrtoint i32* [[TMP72]] to i64 // CHECK1-NEXT: [[TMP75:%.*]] = sub nuw i64 [[TMP74]], [[TMP73]] -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 1 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP78:%.*]] = ptrtoint i32* [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: store i64 [[TMP78]], i64* [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP76:%.*]] = ptrtoint i32* [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 1 +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP77]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP76]], i64* [[TMP78]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP77]], i32 0, i32 1 // CHECK1-NEXT: store i64 [[TMP75]], i64* [[TMP79]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP77]], i32 0, i32 2 // CHECK1-NEXT: store i8 3, i8* [[TMP80]], align 8 // CHECK1-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR11]], align 8 // CHECK1-NEXT: [[TMP81:%.*]] = bitcast %struct.kmp_depend_info* [[TMP60]] to i8* @@ -355,13 +381,13 @@ void xxxx() { // CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP85]], i32 0, i32 0 // CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR13]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 0 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP90:%.*]] = ptrtoint %struct.S* [[ARRAYIDX14]] to i64 -// CHECK1-NEXT: store i64 [[TMP90]], i64* [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP88:%.*]] = ptrtoint %struct.S* [[ARRAYIDX14]] to i64 +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 0 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP88]], i64* [[TMP90]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 1 // CHECK1-NEXT: store i64 4, i64* [[TMP91]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 2 // CHECK1-NEXT: store i8 4, i8* [[TMP92]], align 8 // CHECK1-NEXT: [[TMP93:%.*]] = load i8, i8* [[B]], align 1 // CHECK1-NEXT: [[TMP94:%.*]] = sext i8 [[TMP93]] to i64 @@ -377,13 +403,13 @@ void xxxx() { // CHECK1-NEXT: [[TMP100:%.*]] = ptrtoint i32* [[ARRAYIDX16]] to i64 // CHECK1-NEXT: [[TMP101:%.*]] = ptrtoint i32* [[TMP99]] to i64 // CHECK1-NEXT: [[TMP102:%.*]] = sub nuw i64 [[TMP101]], [[TMP100]] -// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 1 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP105:%.*]] = ptrtoint i32* [[ARRAYIDX16]] to i64 -// CHECK1-NEXT: store i64 [[TMP105]], i64* [[TMP104]], align 8 -// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP103:%.*]] = ptrtoint i32* [[ARRAYIDX16]] to i64 +// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 1 +// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP104]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP103]], i64* [[TMP105]], align 8 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP104]], i32 0, i32 1 // CHECK1-NEXT: store i64 [[TMP102]], i64* [[TMP106]], align 8 -// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP104]], i32 0, i32 2 // CHECK1-NEXT: store i8 4, i8* [[TMP107]], align 8 // CHECK1-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR19]], align 8 // CHECK1-NEXT: [[TMP108:%.*]] = bitcast %struct.kmp_depend_info* [[TMP87]] to i8* @@ -402,13 +428,13 @@ void xxxx() { // CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP115]], i32 0, i32 2 // CHECK1-NEXT: store i8 3, i8* [[TMP118]], align 8 // CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP119:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 1 -// CHECK1-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP121:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 -// CHECK1-NEXT: store i64 [[TMP121]], i64* [[TMP120]], align 8 -// CHECK1-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP119:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 +// CHECK1-NEXT: [[TMP120:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 1 +// CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP119]], i64* [[TMP121]], align 8 +// CHECK1-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 1 // CHECK1-NEXT: store i64 4, i64* [[TMP122]], align 8 -// CHECK1-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 2 // CHECK1-NEXT: store i8 3, i8* [[TMP123]], align 8 // CHECK1-NEXT: [[TMP124:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP124]] @@ -426,13 +452,13 @@ void xxxx() { // CHECK1-NEXT: [[TMP131:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 // CHECK1-NEXT: [[TMP132:%.*]] = ptrtoint i32* [[TMP130]] to i64 // CHECK1-NEXT: [[TMP133:%.*]] = sub nuw i64 [[TMP132]], [[TMP131]] -// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 2 -// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP136:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 -// CHECK1-NEXT: store i64 [[TMP136]], i64* [[TMP135]], align 8 -// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP134:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 +// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 2 +// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP135]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP134]], i64* [[TMP136]], align 8 +// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP135]], i32 0, i32 1 // CHECK1-NEXT: store i64 [[TMP133]], i64* [[TMP137]], align 8 -// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP135]], i32 0, i32 2 // CHECK1-NEXT: store i8 3, i8* [[TMP138]], align 8 // CHECK1-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR27]], align 8 // CHECK1-NEXT: [[TMP139:%.*]] = bitcast %struct.kmp_depend_info* [[TMP114]] to i8* @@ -1461,21 +1487,21 @@ void xxxx() { // CHECK2-NEXT: store i64 4, i64* [[TMP27]], align 8 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP25]], i32 0, i32 2 // CHECK2-NEXT: store i8 1, i8* [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 1 -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP31:%.*]] = ptrtoint i8* [[B]] to i64 -// CHECK2-NEXT: store i64 [[TMP31]], i64* [[TMP30]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP29:%.*]] = ptrtoint i8* [[B]] to i64 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 1 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP29]], i64* [[TMP31]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 1 // CHECK2-NEXT: store i64 1, i64* [[TMP32]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 2 // CHECK2-NEXT: store i8 1, i8* [[TMP33]], align 8 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 2 -// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP36:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 -// CHECK2-NEXT: store i64 [[TMP36]], i64* [[TMP35]], align 8 -// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP34:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 2 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP34]], i64* [[TMP36]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 1 // CHECK2-NEXT: store i64 8, i64* [[TMP37]], align 8 -// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 2 // CHECK2-NEXT: store i8 1, i8* [[TMP38]], align 8 // CHECK2-NEXT: [[TMP39:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP39]] @@ -1485,13 +1511,13 @@ void xxxx() { // CHECK2-NEXT: [[TMP42:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 // CHECK2-NEXT: [[TMP43:%.*]] = ptrtoint i32* [[TMP41]] to i64 // CHECK2-NEXT: [[TMP44:%.*]] = sub nuw i64 [[TMP43]], [[TMP42]] -// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 3 -// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP47:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK2-NEXT: store i64 [[TMP47]], i64* [[TMP46]], align 8 -// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP45:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 3 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP45]], i64* [[TMP47]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 1 // CHECK2-NEXT: store i64 [[TMP44]], i64* [[TMP48]], align 8 -// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 2 // CHECK2-NEXT: store i8 1, i8* [[TMP49]], align 8 // CHECK2-NEXT: store i64 4, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK2-NEXT: [[TMP50:%.*]] = bitcast %struct.kmp_depend_info* [[TMP24]] to i8* @@ -1507,13 +1533,13 @@ void xxxx() { // CHECK2-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP58]], i32 0, i32 0 // CHECK2-NEXT: [[TMP60:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 // CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 0 -// CHECK2-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP63:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 -// CHECK2-NEXT: store i64 [[TMP63]], i64* [[TMP62]], align 8 -// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP61:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 +// CHECK2-NEXT: [[TMP62:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 0 +// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP61]], i64* [[TMP63]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 1 // CHECK2-NEXT: store i64 4, i64* [[TMP64]], align 8 -// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 2 // CHECK2-NEXT: store i8 3, i8* [[TMP65]], align 8 // CHECK2-NEXT: [[TMP66:%.*]] = load i8, i8* [[B]], align 1 // CHECK2-NEXT: [[TMP67:%.*]] = sext i8 [[TMP66]] to i64 @@ -1529,13 +1555,13 @@ void xxxx() { // CHECK2-NEXT: [[TMP73:%.*]] = ptrtoint i32* [[ARRAYIDX8]] to i64 // CHECK2-NEXT: [[TMP74:%.*]] = ptrtoint i32* [[TMP72]] to i64 // CHECK2-NEXT: [[TMP75:%.*]] = sub nuw i64 [[TMP74]], [[TMP73]] -// CHECK2-NEXT: [[TMP76:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 1 -// CHECK2-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP78:%.*]] = ptrtoint i32* [[ARRAYIDX8]] to i64 -// CHECK2-NEXT: store i64 [[TMP78]], i64* [[TMP77]], align 8 -// CHECK2-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP76:%.*]] = ptrtoint i32* [[ARRAYIDX8]] to i64 +// CHECK2-NEXT: [[TMP77:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 1 +// CHECK2-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP77]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP76]], i64* [[TMP78]], align 8 +// CHECK2-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP77]], i32 0, i32 1 // CHECK2-NEXT: store i64 [[TMP75]], i64* [[TMP79]], align 8 -// CHECK2-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP77]], i32 0, i32 2 // CHECK2-NEXT: store i8 3, i8* [[TMP80]], align 8 // CHECK2-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR11]], align 8 // CHECK2-NEXT: [[TMP81:%.*]] = bitcast %struct.kmp_depend_info* [[TMP60]] to i8* @@ -1547,13 +1573,13 @@ void xxxx() { // CHECK2-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP85]], i32 0, i32 0 // CHECK2-NEXT: [[TMP87:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR13]], i64 0, i64 0 // CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP88:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 0 -// CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP90:%.*]] = ptrtoint %struct.S* [[ARRAYIDX14]] to i64 -// CHECK2-NEXT: store i64 [[TMP90]], i64* [[TMP89]], align 8 -// CHECK2-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP88:%.*]] = ptrtoint %struct.S* [[ARRAYIDX14]] to i64 +// CHECK2-NEXT: [[TMP89:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 0 +// CHECK2-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP88]], i64* [[TMP90]], align 8 +// CHECK2-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 1 // CHECK2-NEXT: store i64 4, i64* [[TMP91]], align 8 -// CHECK2-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 2 // CHECK2-NEXT: store i8 4, i8* [[TMP92]], align 8 // CHECK2-NEXT: [[TMP93:%.*]] = load i8, i8* [[B]], align 1 // CHECK2-NEXT: [[TMP94:%.*]] = sext i8 [[TMP93]] to i64 @@ -1569,13 +1595,13 @@ void xxxx() { // CHECK2-NEXT: [[TMP100:%.*]] = ptrtoint i32* [[ARRAYIDX16]] to i64 // CHECK2-NEXT: [[TMP101:%.*]] = ptrtoint i32* [[TMP99]] to i64 // CHECK2-NEXT: [[TMP102:%.*]] = sub nuw i64 [[TMP101]], [[TMP100]] -// CHECK2-NEXT: [[TMP103:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 1 -// CHECK2-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP105:%.*]] = ptrtoint i32* [[ARRAYIDX16]] to i64 -// CHECK2-NEXT: store i64 [[TMP105]], i64* [[TMP104]], align 8 -// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP103:%.*]] = ptrtoint i32* [[ARRAYIDX16]] to i64 +// CHECK2-NEXT: [[TMP104:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 1 +// CHECK2-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP104]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP103]], i64* [[TMP105]], align 8 +// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP104]], i32 0, i32 1 // CHECK2-NEXT: store i64 [[TMP102]], i64* [[TMP106]], align 8 -// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP104]], i32 0, i32 2 // CHECK2-NEXT: store i8 4, i8* [[TMP107]], align 8 // CHECK2-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR19]], align 8 // CHECK2-NEXT: [[TMP108:%.*]] = bitcast %struct.kmp_depend_info* [[TMP87]] to i8* @@ -1594,13 +1620,13 @@ void xxxx() { // CHECK2-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP115]], i32 0, i32 2 // CHECK2-NEXT: store i8 3, i8* [[TMP118]], align 8 // CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP119:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 1 -// CHECK2-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP121:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 -// CHECK2-NEXT: store i64 [[TMP121]], i64* [[TMP120]], align 8 -// CHECK2-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP119:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 +// CHECK2-NEXT: [[TMP120:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 1 +// CHECK2-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP119]], i64* [[TMP121]], align 8 +// CHECK2-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 1 // CHECK2-NEXT: store i64 4, i64* [[TMP122]], align 8 -// CHECK2-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 2 // CHECK2-NEXT: store i8 3, i8* [[TMP123]], align 8 // CHECK2-NEXT: [[TMP124:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK2-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP124]] @@ -1618,13 +1644,13 @@ void xxxx() { // CHECK2-NEXT: [[TMP131:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 // CHECK2-NEXT: [[TMP132:%.*]] = ptrtoint i32* [[TMP130]] to i64 // CHECK2-NEXT: [[TMP133:%.*]] = sub nuw i64 [[TMP132]], [[TMP131]] -// CHECK2-NEXT: [[TMP134:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 2 -// CHECK2-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP136:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 -// CHECK2-NEXT: store i64 [[TMP136]], i64* [[TMP135]], align 8 -// CHECK2-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP134:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 +// CHECK2-NEXT: [[TMP135:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 2 +// CHECK2-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP135]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP134]], i64* [[TMP136]], align 8 +// CHECK2-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP135]], i32 0, i32 1 // CHECK2-NEXT: store i64 [[TMP133]], i64* [[TMP137]], align 8 -// CHECK2-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP135]], i32 0, i32 2 // CHECK2-NEXT: store i8 3, i8* [[TMP138]], align 8 // CHECK2-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR27]], align 8 // CHECK2-NEXT: [[TMP139:%.*]] = bitcast %struct.kmp_depend_info* [[TMP114]] to i8* @@ -2656,21 +2682,21 @@ void xxxx() { // CHECK2-51-NEXT: store i64 4, i64* [[TMP27]], align 8 // CHECK2-51-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP25]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 1, i8* [[TMP28]], align 8 -// CHECK2-51-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 1 -// CHECK2-51-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP31:%.*]] = ptrtoint i8* [[B]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP31]], i64* [[TMP30]], align 8 -// CHECK2-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP29:%.*]] = ptrtoint i8* [[B]] to i64 +// CHECK2-51-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 1 +// CHECK2-51-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP29]], i64* [[TMP31]], align 8 +// CHECK2-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 1, i64* [[TMP32]], align 8 -// CHECK2-51-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 1, i8* [[TMP33]], align 8 -// CHECK2-51-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 2 -// CHECK2-51-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP36:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP36]], i64* [[TMP35]], align 8 -// CHECK2-51-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP34:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 +// CHECK2-51-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 2 +// CHECK2-51-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP34]], i64* [[TMP36]], align 8 +// CHECK2-51-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 8, i64* [[TMP37]], align 8 -// CHECK2-51-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 1, i8* [[TMP38]], align 8 // CHECK2-51-NEXT: [[TMP39:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK2-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP39]] @@ -2680,13 +2706,13 @@ void xxxx() { // CHECK2-51-NEXT: [[TMP42:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 // CHECK2-51-NEXT: [[TMP43:%.*]] = ptrtoint i32* [[TMP41]] to i64 // CHECK2-51-NEXT: [[TMP44:%.*]] = sub nuw i64 [[TMP43]], [[TMP42]] -// CHECK2-51-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 3 -// CHECK2-51-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP47:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP47]], i64* [[TMP46]], align 8 -// CHECK2-51-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP45:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK2-51-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 3 +// CHECK2-51-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP45]], i64* [[TMP47]], align 8 +// CHECK2-51-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 [[TMP44]], i64* [[TMP48]], align 8 -// CHECK2-51-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 1, i8* [[TMP49]], align 8 // CHECK2-51-NEXT: store i64 4, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK2-51-NEXT: [[TMP50:%.*]] = bitcast %struct.kmp_depend_info* [[TMP24]] to i8* @@ -2702,13 +2728,13 @@ void xxxx() { // CHECK2-51-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP58]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP60:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 // CHECK2-51-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK2-51-NEXT: [[TMP61:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 0 -// CHECK2-51-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP63:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP63]], i64* [[TMP62]], align 8 -// CHECK2-51-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP61:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 +// CHECK2-51-NEXT: [[TMP62:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 0 +// CHECK2-51-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP61]], i64* [[TMP63]], align 8 +// CHECK2-51-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 4, i64* [[TMP64]], align 8 -// CHECK2-51-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 3, i8* [[TMP65]], align 8 // CHECK2-51-NEXT: [[TMP66:%.*]] = load i8, i8* [[B]], align 1 // CHECK2-51-NEXT: [[TMP67:%.*]] = sext i8 [[TMP66]] to i64 @@ -2724,13 +2750,13 @@ void xxxx() { // CHECK2-51-NEXT: [[TMP73:%.*]] = ptrtoint i32* [[ARRAYIDX8]] to i64 // CHECK2-51-NEXT: [[TMP74:%.*]] = ptrtoint i32* [[TMP72]] to i64 // CHECK2-51-NEXT: [[TMP75:%.*]] = sub nuw i64 [[TMP74]], [[TMP73]] -// CHECK2-51-NEXT: [[TMP76:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 1 -// CHECK2-51-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP78:%.*]] = ptrtoint i32* [[ARRAYIDX8]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP78]], i64* [[TMP77]], align 8 -// CHECK2-51-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP76:%.*]] = ptrtoint i32* [[ARRAYIDX8]] to i64 +// CHECK2-51-NEXT: [[TMP77:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 1 +// CHECK2-51-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP77]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP76]], i64* [[TMP78]], align 8 +// CHECK2-51-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP77]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 [[TMP75]], i64* [[TMP79]], align 8 -// CHECK2-51-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP77]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 3, i8* [[TMP80]], align 8 // CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR11]], align 8 // CHECK2-51-NEXT: [[TMP81:%.*]] = bitcast %struct.kmp_depend_info* [[TMP60]] to i8* @@ -2742,13 +2768,13 @@ void xxxx() { // CHECK2-51-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP85]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP87:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR13]], i64 0, i64 0 // CHECK2-51-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK2-51-NEXT: [[TMP88:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 0 -// CHECK2-51-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP90:%.*]] = ptrtoint %struct.S* [[ARRAYIDX14]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP90]], i64* [[TMP89]], align 8 -// CHECK2-51-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP88:%.*]] = ptrtoint %struct.S* [[ARRAYIDX14]] to i64 +// CHECK2-51-NEXT: [[TMP89:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 0 +// CHECK2-51-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP88]], i64* [[TMP90]], align 8 +// CHECK2-51-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 4, i64* [[TMP91]], align 8 -// CHECK2-51-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 4, i8* [[TMP92]], align 8 // CHECK2-51-NEXT: [[TMP93:%.*]] = load i8, i8* [[B]], align 1 // CHECK2-51-NEXT: [[TMP94:%.*]] = sext i8 [[TMP93]] to i64 @@ -2764,13 +2790,13 @@ void xxxx() { // CHECK2-51-NEXT: [[TMP100:%.*]] = ptrtoint i32* [[ARRAYIDX16]] to i64 // CHECK2-51-NEXT: [[TMP101:%.*]] = ptrtoint i32* [[TMP99]] to i64 // CHECK2-51-NEXT: [[TMP102:%.*]] = sub nuw i64 [[TMP101]], [[TMP100]] -// CHECK2-51-NEXT: [[TMP103:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 1 -// CHECK2-51-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP105:%.*]] = ptrtoint i32* [[ARRAYIDX16]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP105]], i64* [[TMP104]], align 8 -// CHECK2-51-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP103:%.*]] = ptrtoint i32* [[ARRAYIDX16]] to i64 +// CHECK2-51-NEXT: [[TMP104:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i64 1 +// CHECK2-51-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP104]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP103]], i64* [[TMP105]], align 8 +// CHECK2-51-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP104]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 [[TMP102]], i64* [[TMP106]], align 8 -// CHECK2-51-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP104]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 4, i8* [[TMP107]], align 8 // CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR19]], align 8 // CHECK2-51-NEXT: [[TMP108:%.*]] = bitcast %struct.kmp_depend_info* [[TMP87]] to i8* @@ -2789,13 +2815,13 @@ void xxxx() { // CHECK2-51-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP115]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 3, i8* [[TMP118]], align 8 // CHECK2-51-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 1 -// CHECK2-51-NEXT: [[TMP119:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 1 -// CHECK2-51-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP121:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP121]], i64* [[TMP120]], align 8 -// CHECK2-51-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP119:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 +// CHECK2-51-NEXT: [[TMP120:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 1 +// CHECK2-51-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP119]], i64* [[TMP121]], align 8 +// CHECK2-51-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 4, i64* [[TMP122]], align 8 -// CHECK2-51-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 3, i8* [[TMP123]], align 8 // CHECK2-51-NEXT: [[TMP124:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK2-51-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP124]] @@ -2813,13 +2839,13 @@ void xxxx() { // CHECK2-51-NEXT: [[TMP131:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 // CHECK2-51-NEXT: [[TMP132:%.*]] = ptrtoint i32* [[TMP130]] to i64 // CHECK2-51-NEXT: [[TMP133:%.*]] = sub nuw i64 [[TMP132]], [[TMP131]] -// CHECK2-51-NEXT: [[TMP134:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 2 -// CHECK2-51-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP136:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP136]], i64* [[TMP135]], align 8 -// CHECK2-51-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP134:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 +// CHECK2-51-NEXT: [[TMP135:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i64 2 +// CHECK2-51-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP135]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP134]], i64* [[TMP136]], align 8 +// CHECK2-51-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP135]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 [[TMP133]], i64* [[TMP137]], align 8 -// CHECK2-51-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP135]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 3, i8* [[TMP138]], align 8 // CHECK2-51-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR27]], align 8 // CHECK2-51-NEXT: [[TMP139:%.*]] = bitcast %struct.kmp_depend_info* [[TMP114]] to i8* @@ -2836,13 +2862,13 @@ void xxxx() { // CHECK2-51-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP145]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 8, i8* [[TMP148]], align 8 // CHECK2-51-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 1 -// CHECK2-51-NEXT: [[TMP149:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP144]], i64 1 -// CHECK2-51-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP149]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP151:%.*]] = ptrtoint %struct.S* [[ARRAYIDX30]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP151]], i64* [[TMP150]], align 8 -// CHECK2-51-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP149]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP149:%.*]] = ptrtoint %struct.S* [[ARRAYIDX30]] to i64 +// CHECK2-51-NEXT: [[TMP150:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP144]], i64 1 +// CHECK2-51-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP150]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP149]], i64* [[TMP151]], align 8 +// CHECK2-51-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP150]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 4, i64* [[TMP152]], align 8 -// CHECK2-51-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP149]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP150]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 8, i8* [[TMP153]], align 8 // CHECK2-51-NEXT: [[TMP154:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK2-51-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP154]] @@ -2860,13 +2886,13 @@ void xxxx() { // CHECK2-51-NEXT: [[TMP161:%.*]] = ptrtoint i32* [[ARRAYIDX32]] to i64 // CHECK2-51-NEXT: [[TMP162:%.*]] = ptrtoint i32* [[TMP160]] to i64 // CHECK2-51-NEXT: [[TMP163:%.*]] = sub nuw i64 [[TMP162]], [[TMP161]] -// CHECK2-51-NEXT: [[TMP164:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP144]], i64 2 -// CHECK2-51-NEXT: [[TMP165:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP164]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP166:%.*]] = ptrtoint i32* [[ARRAYIDX32]] to i64 -// CHECK2-51-NEXT: store i64 [[TMP166]], i64* [[TMP165]], align 8 -// CHECK2-51-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP164]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP164:%.*]] = ptrtoint i32* [[ARRAYIDX32]] to i64 +// CHECK2-51-NEXT: [[TMP165:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP144]], i64 2 +// CHECK2-51-NEXT: [[TMP166:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP165]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP164]], i64* [[TMP166]], align 8 +// CHECK2-51-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP165]], i32 0, i32 1 // CHECK2-51-NEXT: store i64 [[TMP163]], i64* [[TMP167]], align 8 -// CHECK2-51-NEXT: [[TMP168:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP164]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP168:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP165]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 8, i8* [[TMP168]], align 8 // CHECK2-51-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR37]], align 8 // CHECK2-51-NEXT: [[TMP169:%.*]] = bitcast %struct.kmp_depend_info* [[TMP144]] to i8* @@ -3843,6 +3869,445 @@ void xxxx() { // CHECK2-51-NEXT: ret i32 0 // // +// CHECK2-51-LABEL: define {{[^@]+}}@_Z19test_omp_all_memoryv +// CHECK2-51-SAME: () #[[ATTR8]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[D:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[E:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 1 +// CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK2-51-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-51-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_31:%.*]], align 1 +// CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR2:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK2-51-NEXT: [[DEP_COUNTER_ADDR3:%.*]] = alloca i64, align 8 +// CHECK2-51-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_34:%.*]], align 1 +// CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK2-51-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i64, align 8 +// CHECK2-51-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_37:%.*]], align 1 +// CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR8:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK2-51-NEXT: [[DEP_COUNTER_ADDR9:%.*]] = alloca i64, align 8 +// CHECK2-51-NEXT: [[AGG_CAPTURED10:%.*]] = alloca [[STRUCT_ANON_40:%.*]], align 1 +// CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR11:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 +// CHECK2-51-NEXT: [[DEP_COUNTER_ADDR12:%.*]] = alloca i64, align 8 +// CHECK2-51-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-51-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.29*)* @.omp_task_entry..32 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.kmp_task_t_with_privates.29* +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], %struct.kmp_task_t_with_privates.29* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], %struct.kmp_task_t_with_privates.29* [[TMP2]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], %struct..kmp_privates.t.30* [[TMP4]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-51-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK2-51-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP7]], i64 0 +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP8]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP9]], align 8 +// CHECK2-51-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP8]], i32 0, i32 1 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP10]], align 8 +// CHECK2-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP8]], i32 0, i32 2 +// CHECK2-51-NEXT: store i8 -128, i8* [[TMP11]], align 8 +// CHECK2-51-NEXT: [[TMP12:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK2-51-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP7]], i64 1 +// CHECK2-51-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP13]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP12]], i64* [[TMP14]], align 8 +// CHECK2-51-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP13]], i32 0, i32 1 +// CHECK2-51-NEXT: store i64 4, i64* [[TMP15]], align 8 +// CHECK2-51-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP13]], i32 0, i32 2 +// CHECK2-51-NEXT: store i8 1, i8* [[TMP16]], align 8 +// CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP17:%.*]] = bitcast %struct.kmp_depend_info* [[TMP7]] to i8* +// CHECK2-51-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP1]], i32 2, i8* [[TMP17]], i32 0, i8* null) +// CHECK2-51-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.32*)* @.omp_task_entry..35 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to %struct.kmp_task_t_with_privates.32* +// CHECK2-51-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], %struct.kmp_task_t_with_privates.32* [[TMP20]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], %struct.kmp_task_t_with_privates.32* [[TMP20]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], %struct..kmp_privates.t.33* [[TMP22]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP24:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-51-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 8 +// CHECK2-51-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR2]], i64 0, i64 0 +// CHECK2-51-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP25]], i64 0 +// CHECK2-51-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP26]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP27]], align 8 +// CHECK2-51-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP26]], i32 0, i32 1 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP28]], align 8 +// CHECK2-51-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP26]], i32 0, i32 2 +// CHECK2-51-NEXT: store i8 -128, i8* [[TMP29]], align 8 +// CHECK2-51-NEXT: [[TMP30:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK2-51-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP25]], i64 1 +// CHECK2-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP30]], i64* [[TMP32]], align 8 +// CHECK2-51-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i32 0, i32 1 +// CHECK2-51-NEXT: store i64 4, i64* [[TMP33]], align 8 +// CHECK2-51-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i32 0, i32 2 +// CHECK2-51-NEXT: store i8 1, i8* [[TMP34]], align 8 +// CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR3]], align 8 +// CHECK2-51-NEXT: [[TMP35:%.*]] = bitcast %struct.kmp_depend_info* [[TMP25]] to i8* +// CHECK2-51-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP19]], i32 2, i8* [[TMP35]], i32 0, i8* null) +// CHECK2-51-NEXT: [[TMP37:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.35*)* @.omp_task_entry..38 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP38:%.*]] = bitcast i8* [[TMP37]] to %struct.kmp_task_t_with_privates.35* +// CHECK2-51-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], %struct.kmp_task_t_with_privates.35* [[TMP38]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], %struct.kmp_task_t_with_privates.35* [[TMP38]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], %struct..kmp_privates.t.36* [[TMP40]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP42:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-51-NEXT: store i32 [[TMP42]], i32* [[TMP41]], align 8 +// CHECK2-51-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 +// CHECK2-51-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP43]], i64 0 +// CHECK2-51-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP45]], align 8 +// CHECK2-51-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 1 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP46]], align 8 +// CHECK2-51-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 2 +// CHECK2-51-NEXT: store i8 -128, i8* [[TMP47]], align 8 +// CHECK2-51-NEXT: [[TMP48:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK2-51-NEXT: [[TMP49:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP43]], i64 1 +// CHECK2-51-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP49]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP48]], i64* [[TMP50]], align 8 +// CHECK2-51-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP49]], i32 0, i32 1 +// CHECK2-51-NEXT: store i64 4, i64* [[TMP51]], align 8 +// CHECK2-51-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP49]], i32 0, i32 2 +// CHECK2-51-NEXT: store i8 1, i8* [[TMP52]], align 8 +// CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR6]], align 8 +// CHECK2-51-NEXT: [[TMP53:%.*]] = bitcast %struct.kmp_depend_info* [[TMP43]] to i8* +// CHECK2-51-NEXT: [[TMP54:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP37]], i32 2, i8* [[TMP53]], i32 0, i8* null) +// CHECK2-51-NEXT: [[TMP55:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.38*)* @.omp_task_entry..41 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP55]] to %struct.kmp_task_t_with_privates.38* +// CHECK2-51-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], %struct.kmp_task_t_with_privates.38* [[TMP56]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], %struct.kmp_task_t_with_privates.38* [[TMP56]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], %struct..kmp_privates.t.39* [[TMP58]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP60:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-51-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 8 +// CHECK2-51-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR8]], i64 0, i64 0 +// CHECK2-51-NEXT: [[TMP62:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i64 0 +// CHECK2-51-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP63]], align 8 +// CHECK2-51-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 1 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP64]], align 8 +// CHECK2-51-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP62]], i32 0, i32 2 +// CHECK2-51-NEXT: store i8 -128, i8* [[TMP65]], align 8 +// CHECK2-51-NEXT: [[TMP66:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK2-51-NEXT: [[TMP67:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i64 1 +// CHECK2-51-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP67]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 [[TMP66]], i64* [[TMP68]], align 8 +// CHECK2-51-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP67]], i32 0, i32 1 +// CHECK2-51-NEXT: store i64 4, i64* [[TMP69]], align 8 +// CHECK2-51-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP67]], i32 0, i32 2 +// CHECK2-51-NEXT: store i8 1, i8* [[TMP70]], align 8 +// CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR9]], align 8 +// CHECK2-51-NEXT: [[TMP71:%.*]] = bitcast %struct.kmp_depend_info* [[TMP61]] to i8* +// CHECK2-51-NEXT: [[TMP72:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP55]], i32 2, i8* [[TMP71]], i32 0, i8* null) +// CHECK2-51-NEXT: [[TMP73:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.41*)* @.omp_task_entry..44 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP74:%.*]] = bitcast i8* [[TMP73]] to %struct.kmp_task_t_with_privates.41* +// CHECK2-51-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41:%.*]], %struct.kmp_task_t_with_privates.41* [[TMP74]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41]], %struct.kmp_task_t_with_privates.41* [[TMP74]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_42:%.*]], %struct..kmp_privates.t.42* [[TMP76]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-51-NEXT: store i32 [[TMP78]], i32* [[TMP77]], align 8 +// CHECK2-51-NEXT: [[TMP79:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR11]], i64 0, i64 0 +// CHECK2-51-NEXT: [[TMP80:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP79]], i64 0 +// CHECK2-51-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP80]], i32 0, i32 0 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP81]], align 8 +// CHECK2-51-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP80]], i32 0, i32 1 +// CHECK2-51-NEXT: store i64 0, i64* [[TMP82]], align 8 +// CHECK2-51-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP80]], i32 0, i32 2 +// CHECK2-51-NEXT: store i8 -128, i8* [[TMP83]], align 8 +// CHECK2-51-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR12]], align 8 +// CHECK2-51-NEXT: [[TMP84:%.*]] = bitcast %struct.kmp_depend_info* [[TMP79]] to i8* +// CHECK2-51-NEXT: [[TMP85:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP73]], i32 1, i8* [[TMP84]], i32 0, i8* null) +// CHECK2-51-NEXT: ret void +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..31 +// CHECK2-51-SAME: (%struct..kmp_privates.t.30* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.30*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.30* [[TMP0]], %struct..kmp_privates.t.30** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.30*, %struct..kmp_privates.t.30** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], %struct..kmp_privates.t.30* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK2-51-NEXT: ret void +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..32 +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.29* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.28*, align 8 +// CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.29*, align 8 +// CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.29* [[TMP1]], %struct.kmp_task_t_with_privates.29** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.29*, %struct.kmp_task_t_with_privates.29** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], %struct.kmp_task_t_with_privates.29* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.28* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], %struct.kmp_task_t_with_privates.29* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.30* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.29* [[TMP3]] to i8* +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META145:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META148:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META150:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META152:![0-9]+]]) +// CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !154 +// CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.30*, i32**)* @.omp_task_privates_map..31 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: store %struct.anon.28* [[TMP8]], %struct.anon.28** [[__CONTEXT_ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.28*, %struct.anon.28** [[__CONTEXT_ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK2-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK2-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: store i32 13, i32* [[TMP16]], align 4 +// CHECK2-51-NEXT: ret i32 0 +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..34 +// CHECK2-51-SAME: (%struct..kmp_privates.t.33* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.33*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.33* [[TMP0]], %struct..kmp_privates.t.33** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.33*, %struct..kmp_privates.t.33** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], %struct..kmp_privates.t.33* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK2-51-NEXT: ret void +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..35 +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.32* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.31*, align 8 +// CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.32*, align 8 +// CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.32* [[TMP1]], %struct.kmp_task_t_with_privates.32** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.32*, %struct.kmp_task_t_with_privates.32** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], %struct.kmp_task_t_with_privates.32* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.31* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], %struct.kmp_task_t_with_privates.32* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.33* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.32* [[TMP3]] to i8* +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META155:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META158:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META160:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META162:![0-9]+]]) +// CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !164 +// CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.33*, i32**)* @.omp_task_privates_map..34 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: store %struct.anon.31* [[TMP8]], %struct.anon.31** [[__CONTEXT_ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.31*, %struct.anon.31** [[__CONTEXT_ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK2-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK2-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: store i32 14, i32* [[TMP16]], align 4 +// CHECK2-51-NEXT: ret i32 0 +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..37 +// CHECK2-51-SAME: (%struct..kmp_privates.t.36* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.36*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.36* [[TMP0]], %struct..kmp_privates.t.36** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.36*, %struct..kmp_privates.t.36** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], %struct..kmp_privates.t.36* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK2-51-NEXT: ret void +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..38 +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.35* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.34*, align 8 +// CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.35*, align 8 +// CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.35* [[TMP1]], %struct.kmp_task_t_with_privates.35** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.35*, %struct.kmp_task_t_with_privates.35** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], %struct.kmp_task_t_with_privates.35* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.34* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], %struct.kmp_task_t_with_privates.35* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.36* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.35* [[TMP3]] to i8* +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META170:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META172:![0-9]+]]) +// CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !174 +// CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.36*, i32**)* @.omp_task_privates_map..37 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: store %struct.anon.34* [[TMP8]], %struct.anon.34** [[__CONTEXT_ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.34*, %struct.anon.34** [[__CONTEXT_ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK2-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK2-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: store i32 15, i32* [[TMP16]], align 4 +// CHECK2-51-NEXT: ret i32 0 +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..40 +// CHECK2-51-SAME: (%struct..kmp_privates.t.39* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.39*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.39* [[TMP0]], %struct..kmp_privates.t.39** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.39*, %struct..kmp_privates.t.39** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], %struct..kmp_privates.t.39* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK2-51-NEXT: ret void +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..41 +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.38* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.37*, align 8 +// CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.38*, align 8 +// CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.38* [[TMP1]], %struct.kmp_task_t_with_privates.38** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.38*, %struct.kmp_task_t_with_privates.38** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], %struct.kmp_task_t_with_privates.38* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.37* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], %struct.kmp_task_t_with_privates.38* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.39* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.38* [[TMP3]] to i8* +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META175:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META178:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META180:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META182:![0-9]+]]) +// CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !184 +// CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.39*, i32**)* @.omp_task_privates_map..40 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: store %struct.anon.37* [[TMP8]], %struct.anon.37** [[__CONTEXT_ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.37*, %struct.anon.37** [[__CONTEXT_ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK2-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK2-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: store i32 16, i32* [[TMP16]], align 4 +// CHECK2-51-NEXT: ret i32 0 +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..43 +// CHECK2-51-SAME: (%struct..kmp_privates.t.42* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.42*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.42* [[TMP0]], %struct..kmp_privates.t.42** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.42*, %struct..kmp_privates.t.42** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_42:%.*]], %struct..kmp_privates.t.42* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK2-51-NEXT: ret void +// +// +// CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..44 +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.41* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-NEXT: entry: +// CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.40*, align 8 +// CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.41*, align 8 +// CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.41* [[TMP1]], %struct.kmp_task_t_with_privates.41** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.41*, %struct.kmp_task_t_with_privates.41** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41:%.*]], %struct.kmp_task_t_with_privates.41* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.40* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41]], %struct.kmp_task_t_with_privates.41* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.42* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.41* [[TMP3]] to i8* +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META185:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META188:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META190:![0-9]+]]) +// CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META192:![0-9]+]]) +// CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !194 +// CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.42*, i32**)* @.omp_task_privates_map..43 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: store %struct.anon.40* [[TMP8]], %struct.anon.40** [[__CONTEXT_ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.40*, %struct.anon.40** [[__CONTEXT_ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK2-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK2-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: store i32 17, i32* [[TMP16]], align 4 +// CHECK2-51-NEXT: ret i32 0 +// +// // CHECK2-51-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_task_codegen.cpp // CHECK2-51-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" { // CHECK2-51-NEXT: entry: @@ -3933,21 +4398,21 @@ void xxxx() { // CHECK3-NEXT: store i64 4, i64* [[TMP26]], align 8 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i32 0, i32 2 // CHECK3-NEXT: store i8 1, i8* [[TMP27]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 1 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP30:%.*]] = ptrtoint i8* [[B]] to i64 -// CHECK3-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP28:%.*]] = ptrtoint i8* [[B]] to i64 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 1 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 0 +// CHECK3-NEXT: store i64 [[TMP28]], i64* [[TMP30]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 1 // CHECK3-NEXT: store i64 1, i64* [[TMP31]], align 8 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 2 // CHECK3-NEXT: store i8 1, i8* [[TMP32]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 2 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP35:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 -// CHECK3-NEXT: store i64 [[TMP35]], i64* [[TMP34]], align 8 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP33:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 2 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 0 +// CHECK3-NEXT: store i64 [[TMP33]], i64* [[TMP35]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 1 // CHECK3-NEXT: store i64 8, i64* [[TMP36]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 2 // CHECK3-NEXT: store i8 1, i8* [[TMP37]], align 8 // CHECK3-NEXT: [[TMP38:%.*]] = mul nsw i64 0, [[TMP1]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP38]] @@ -3957,13 +4422,13 @@ void xxxx() { // CHECK3-NEXT: [[TMP41:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 // CHECK3-NEXT: [[TMP42:%.*]] = ptrtoint i32* [[TMP40]] to i64 // CHECK3-NEXT: [[TMP43:%.*]] = sub nuw i64 [[TMP42]], [[TMP41]] -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 3 -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP46:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK3-NEXT: store i64 [[TMP46]], i64* [[TMP45]], align 8 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP44:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 3 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 +// CHECK3-NEXT: store i64 [[TMP44]], i64* [[TMP46]], align 8 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 // CHECK3-NEXT: store i64 [[TMP43]], i64* [[TMP47]], align 8 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 // CHECK3-NEXT: store i8 1, i8* [[TMP48]], align 8 // CHECK3-NEXT: store i64 4, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK3-NEXT: [[TMP49:%.*]] = bitcast %struct.kmp_depend_info* [[TMP23]] to i8* @@ -3983,13 +4448,13 @@ void xxxx() { // CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP57]], i32 0, i32 0 // CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR11]], i64 0, i64 0 // CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 0 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP62:%.*]] = ptrtoint %struct.S* [[ARRAYIDX12]] to i64 -// CHECK3-NEXT: store i64 [[TMP62]], i64* [[TMP61]], align 8 -// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP60:%.*]] = ptrtoint %struct.S* [[ARRAYIDX12]] to i64 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 0 +// CHECK3-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 0 +// CHECK3-NEXT: store i64 [[TMP60]], i64* [[TMP62]], align 8 +// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 1 // CHECK3-NEXT: store i64 4, i64* [[TMP63]], align 8 -// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 2 // CHECK3-NEXT: store i8 3, i8* [[TMP64]], align 8 // CHECK3-NEXT: [[TMP65:%.*]] = load i8, i8* [[B]], align 1 // CHECK3-NEXT: [[TMP66:%.*]] = sext i8 [[TMP65]] to i64 @@ -4005,13 +4470,13 @@ void xxxx() { // CHECK3-NEXT: [[TMP72:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 // CHECK3-NEXT: [[TMP73:%.*]] = ptrtoint i32* [[TMP71]] to i64 // CHECK3-NEXT: [[TMP74:%.*]] = sub nuw i64 [[TMP73]], [[TMP72]] -// CHECK3-NEXT: [[TMP75:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 1 -// CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP77:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 -// CHECK3-NEXT: store i64 [[TMP77]], i64* [[TMP76]], align 8 -// CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP75:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 +// CHECK3-NEXT: [[TMP76:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 1 +// CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 0 +// CHECK3-NEXT: store i64 [[TMP75]], i64* [[TMP77]], align 8 +// CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 1 // CHECK3-NEXT: store i64 [[TMP74]], i64* [[TMP78]], align 8 -// CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 2 // CHECK3-NEXT: store i8 3, i8* [[TMP79]], align 8 // CHECK3-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR17]], align 8 // CHECK3-NEXT: [[TMP80:%.*]] = bitcast %struct.kmp_depend_info* [[TMP59]] to i8* @@ -4025,13 +4490,13 @@ void xxxx() { // CHECK3-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP84]], i32 0, i32 0 // CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR21]], i64 0, i64 0 // CHECK3-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP87:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 0 -// CHECK3-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP89:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 -// CHECK3-NEXT: store i64 [[TMP89]], i64* [[TMP88]], align 8 -// CHECK3-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP87:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 +// CHECK3-NEXT: [[TMP88:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 0 +// CHECK3-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 0 +// CHECK3-NEXT: store i64 [[TMP87]], i64* [[TMP89]], align 8 +// CHECK3-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 1 // CHECK3-NEXT: store i64 4, i64* [[TMP90]], align 8 -// CHECK3-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 2 // CHECK3-NEXT: store i8 4, i8* [[TMP91]], align 8 // CHECK3-NEXT: [[TMP92:%.*]] = load i8, i8* [[B]], align 1 // CHECK3-NEXT: [[TMP93:%.*]] = sext i8 [[TMP92]] to i64 @@ -4047,13 +4512,13 @@ void xxxx() { // CHECK3-NEXT: [[TMP99:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 // CHECK3-NEXT: [[TMP100:%.*]] = ptrtoint i32* [[TMP98]] to i64 // CHECK3-NEXT: [[TMP101:%.*]] = sub nuw i64 [[TMP100]], [[TMP99]] -// CHECK3-NEXT: [[TMP102:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 1 -// CHECK3-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP104:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 -// CHECK3-NEXT: store i64 [[TMP104]], i64* [[TMP103]], align 8 -// CHECK3-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP102:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 +// CHECK3-NEXT: [[TMP103:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 1 +// CHECK3-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 0 +// CHECK3-NEXT: store i64 [[TMP102]], i64* [[TMP104]], align 8 +// CHECK3-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 1 // CHECK3-NEXT: store i64 [[TMP101]], i64* [[TMP105]], align 8 -// CHECK3-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 2 // CHECK3-NEXT: store i8 4, i8* [[TMP106]], align 8 // CHECK3-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR27]], align 8 // CHECK3-NEXT: [[TMP107:%.*]] = bitcast %struct.kmp_depend_info* [[TMP86]] to i8* @@ -4074,13 +4539,13 @@ void xxxx() { // CHECK3-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i32 0, i32 2 // CHECK3-NEXT: store i8 3, i8* [[TMP117]], align 8 // CHECK3-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP118:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 1 -// CHECK3-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP120:%.*]] = ptrtoint %struct.S* [[ARRAYIDX32]] to i64 -// CHECK3-NEXT: store i64 [[TMP120]], i64* [[TMP119]], align 8 -// CHECK3-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP118:%.*]] = ptrtoint %struct.S* [[ARRAYIDX32]] to i64 +// CHECK3-NEXT: [[TMP119:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 1 +// CHECK3-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 0 +// CHECK3-NEXT: store i64 [[TMP118]], i64* [[TMP120]], align 8 +// CHECK3-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 1 // CHECK3-NEXT: store i64 4, i64* [[TMP121]], align 8 -// CHECK3-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 2 // CHECK3-NEXT: store i8 3, i8* [[TMP122]], align 8 // CHECK3-NEXT: [[TMP123:%.*]] = mul nsw i64 0, [[TMP1]] // CHECK3-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP123]] @@ -4098,13 +4563,13 @@ void xxxx() { // CHECK3-NEXT: [[TMP130:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 // CHECK3-NEXT: [[TMP131:%.*]] = ptrtoint i32* [[TMP129]] to i64 // CHECK3-NEXT: [[TMP132:%.*]] = sub nuw i64 [[TMP131]], [[TMP130]] -// CHECK3-NEXT: [[TMP133:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 2 -// CHECK3-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP135:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 -// CHECK3-NEXT: store i64 [[TMP135]], i64* [[TMP134]], align 8 -// CHECK3-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP133:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 +// CHECK3-NEXT: [[TMP134:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 2 +// CHECK3-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 0 +// CHECK3-NEXT: store i64 [[TMP133]], i64* [[TMP135]], align 8 +// CHECK3-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 1 // CHECK3-NEXT: store i64 [[TMP132]], i64* [[TMP136]], align 8 -// CHECK3-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 2 // CHECK3-NEXT: store i8 3, i8* [[TMP137]], align 8 // CHECK3-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR37]], align 8 // CHECK3-NEXT: [[TMP138:%.*]] = bitcast %struct.kmp_depend_info* [[TMP113]] to i8* @@ -5012,21 +5477,21 @@ void xxxx() { // CHECK4-NEXT: store i64 4, i64* [[TMP26]], align 8 // CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i32 0, i32 2 // CHECK4-NEXT: store i8 1, i8* [[TMP27]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 1 -// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP30:%.*]] = ptrtoint i8* [[B]] to i64 -// CHECK4-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP28:%.*]] = ptrtoint i8* [[B]] to i64 +// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 1 +// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 0 +// CHECK4-NEXT: store i64 [[TMP28]], i64* [[TMP30]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 1 // CHECK4-NEXT: store i64 1, i64* [[TMP31]], align 8 -// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 2 // CHECK4-NEXT: store i8 1, i8* [[TMP32]], align 8 -// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 2 -// CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP35:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 -// CHECK4-NEXT: store i64 [[TMP35]], i64* [[TMP34]], align 8 -// CHECK4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP33:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 +// CHECK4-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 2 +// CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 0 +// CHECK4-NEXT: store i64 [[TMP33]], i64* [[TMP35]], align 8 +// CHECK4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 1 // CHECK4-NEXT: store i64 8, i64* [[TMP36]], align 8 -// CHECK4-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 2 // CHECK4-NEXT: store i8 1, i8* [[TMP37]], align 8 // CHECK4-NEXT: [[TMP38:%.*]] = mul nsw i64 0, [[TMP1]] // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP38]] @@ -5036,13 +5501,13 @@ void xxxx() { // CHECK4-NEXT: [[TMP41:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 // CHECK4-NEXT: [[TMP42:%.*]] = ptrtoint i32* [[TMP40]] to i64 // CHECK4-NEXT: [[TMP43:%.*]] = sub nuw i64 [[TMP42]], [[TMP41]] -// CHECK4-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 3 -// CHECK4-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP46:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK4-NEXT: store i64 [[TMP46]], i64* [[TMP45]], align 8 -// CHECK4-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP44:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK4-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 3 +// CHECK4-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 +// CHECK4-NEXT: store i64 [[TMP44]], i64* [[TMP46]], align 8 +// CHECK4-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 // CHECK4-NEXT: store i64 [[TMP43]], i64* [[TMP47]], align 8 -// CHECK4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 // CHECK4-NEXT: store i8 1, i8* [[TMP48]], align 8 // CHECK4-NEXT: store i64 4, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK4-NEXT: [[TMP49:%.*]] = bitcast %struct.kmp_depend_info* [[TMP23]] to i8* @@ -5062,13 +5527,13 @@ void xxxx() { // CHECK4-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP57]], i32 0, i32 0 // CHECK4-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR11]], i64 0, i64 0 // CHECK4-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP60:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 0 -// CHECK4-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP62:%.*]] = ptrtoint %struct.S* [[ARRAYIDX12]] to i64 -// CHECK4-NEXT: store i64 [[TMP62]], i64* [[TMP61]], align 8 -// CHECK4-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP60:%.*]] = ptrtoint %struct.S* [[ARRAYIDX12]] to i64 +// CHECK4-NEXT: [[TMP61:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 0 +// CHECK4-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 0 +// CHECK4-NEXT: store i64 [[TMP60]], i64* [[TMP62]], align 8 +// CHECK4-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 1 // CHECK4-NEXT: store i64 4, i64* [[TMP63]], align 8 -// CHECK4-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 2 // CHECK4-NEXT: store i8 3, i8* [[TMP64]], align 8 // CHECK4-NEXT: [[TMP65:%.*]] = load i8, i8* [[B]], align 1 // CHECK4-NEXT: [[TMP66:%.*]] = sext i8 [[TMP65]] to i64 @@ -5084,13 +5549,13 @@ void xxxx() { // CHECK4-NEXT: [[TMP72:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 // CHECK4-NEXT: [[TMP73:%.*]] = ptrtoint i32* [[TMP71]] to i64 // CHECK4-NEXT: [[TMP74:%.*]] = sub nuw i64 [[TMP73]], [[TMP72]] -// CHECK4-NEXT: [[TMP75:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 1 -// CHECK4-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP77:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 -// CHECK4-NEXT: store i64 [[TMP77]], i64* [[TMP76]], align 8 -// CHECK4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP75:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 +// CHECK4-NEXT: [[TMP76:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 1 +// CHECK4-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 0 +// CHECK4-NEXT: store i64 [[TMP75]], i64* [[TMP77]], align 8 +// CHECK4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 1 // CHECK4-NEXT: store i64 [[TMP74]], i64* [[TMP78]], align 8 -// CHECK4-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 2 // CHECK4-NEXT: store i8 3, i8* [[TMP79]], align 8 // CHECK4-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR17]], align 8 // CHECK4-NEXT: [[TMP80:%.*]] = bitcast %struct.kmp_depend_info* [[TMP59]] to i8* @@ -5104,13 +5569,13 @@ void xxxx() { // CHECK4-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP84]], i32 0, i32 0 // CHECK4-NEXT: [[TMP86:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR21]], i64 0, i64 0 // CHECK4-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP87:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 0 -// CHECK4-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP89:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 -// CHECK4-NEXT: store i64 [[TMP89]], i64* [[TMP88]], align 8 -// CHECK4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP87:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 +// CHECK4-NEXT: [[TMP88:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 0 +// CHECK4-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 0 +// CHECK4-NEXT: store i64 [[TMP87]], i64* [[TMP89]], align 8 +// CHECK4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 1 // CHECK4-NEXT: store i64 4, i64* [[TMP90]], align 8 -// CHECK4-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 2 // CHECK4-NEXT: store i8 4, i8* [[TMP91]], align 8 // CHECK4-NEXT: [[TMP92:%.*]] = load i8, i8* [[B]], align 1 // CHECK4-NEXT: [[TMP93:%.*]] = sext i8 [[TMP92]] to i64 @@ -5126,13 +5591,13 @@ void xxxx() { // CHECK4-NEXT: [[TMP99:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 // CHECK4-NEXT: [[TMP100:%.*]] = ptrtoint i32* [[TMP98]] to i64 // CHECK4-NEXT: [[TMP101:%.*]] = sub nuw i64 [[TMP100]], [[TMP99]] -// CHECK4-NEXT: [[TMP102:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 1 -// CHECK4-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP104:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 -// CHECK4-NEXT: store i64 [[TMP104]], i64* [[TMP103]], align 8 -// CHECK4-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP102:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 +// CHECK4-NEXT: [[TMP103:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 1 +// CHECK4-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 0 +// CHECK4-NEXT: store i64 [[TMP102]], i64* [[TMP104]], align 8 +// CHECK4-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 1 // CHECK4-NEXT: store i64 [[TMP101]], i64* [[TMP105]], align 8 -// CHECK4-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 2 // CHECK4-NEXT: store i8 4, i8* [[TMP106]], align 8 // CHECK4-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR27]], align 8 // CHECK4-NEXT: [[TMP107:%.*]] = bitcast %struct.kmp_depend_info* [[TMP86]] to i8* @@ -5153,13 +5618,13 @@ void xxxx() { // CHECK4-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i32 0, i32 2 // CHECK4-NEXT: store i8 3, i8* [[TMP117]], align 8 // CHECK4-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP118:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 1 -// CHECK4-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP120:%.*]] = ptrtoint %struct.S* [[ARRAYIDX32]] to i64 -// CHECK4-NEXT: store i64 [[TMP120]], i64* [[TMP119]], align 8 -// CHECK4-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP118:%.*]] = ptrtoint %struct.S* [[ARRAYIDX32]] to i64 +// CHECK4-NEXT: [[TMP119:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 1 +// CHECK4-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 0 +// CHECK4-NEXT: store i64 [[TMP118]], i64* [[TMP120]], align 8 +// CHECK4-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 1 // CHECK4-NEXT: store i64 4, i64* [[TMP121]], align 8 -// CHECK4-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 2 // CHECK4-NEXT: store i8 3, i8* [[TMP122]], align 8 // CHECK4-NEXT: [[TMP123:%.*]] = mul nsw i64 0, [[TMP1]] // CHECK4-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP123]] @@ -5177,13 +5642,13 @@ void xxxx() { // CHECK4-NEXT: [[TMP130:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 // CHECK4-NEXT: [[TMP131:%.*]] = ptrtoint i32* [[TMP129]] to i64 // CHECK4-NEXT: [[TMP132:%.*]] = sub nuw i64 [[TMP131]], [[TMP130]] -// CHECK4-NEXT: [[TMP133:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 2 -// CHECK4-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP135:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 -// CHECK4-NEXT: store i64 [[TMP135]], i64* [[TMP134]], align 8 -// CHECK4-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP133:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 +// CHECK4-NEXT: [[TMP134:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 2 +// CHECK4-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 0 +// CHECK4-NEXT: store i64 [[TMP133]], i64* [[TMP135]], align 8 +// CHECK4-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 1 // CHECK4-NEXT: store i64 [[TMP132]], i64* [[TMP136]], align 8 -// CHECK4-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 2 // CHECK4-NEXT: store i8 3, i8* [[TMP137]], align 8 // CHECK4-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR37]], align 8 // CHECK4-NEXT: [[TMP138:%.*]] = bitcast %struct.kmp_depend_info* [[TMP113]] to i8* @@ -6094,21 +6559,21 @@ void xxxx() { // CHECK3-51-NEXT: store i64 4, i64* [[TMP26]], align 8 // CHECK3-51-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 1, i8* [[TMP27]], align 8 -// CHECK3-51-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 1 -// CHECK3-51-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP30:%.*]] = ptrtoint i8* [[B]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8 -// CHECK3-51-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP28:%.*]] = ptrtoint i8* [[B]] to i64 +// CHECK3-51-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 1 +// CHECK3-51-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP28]], i64* [[TMP30]], align 8 +// CHECK3-51-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 1, i64* [[TMP31]], align 8 -// CHECK3-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 1, i8* [[TMP32]], align 8 -// CHECK3-51-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 2 -// CHECK3-51-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP35:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP35]], i64* [[TMP34]], align 8 -// CHECK3-51-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP33:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 +// CHECK3-51-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 2 +// CHECK3-51-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP33]], i64* [[TMP35]], align 8 +// CHECK3-51-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 8, i64* [[TMP36]], align 8 -// CHECK3-51-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 1, i8* [[TMP37]], align 8 // CHECK3-51-NEXT: [[TMP38:%.*]] = mul nsw i64 0, [[TMP1]] // CHECK3-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP38]] @@ -6118,13 +6583,13 @@ void xxxx() { // CHECK3-51-NEXT: [[TMP41:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 // CHECK3-51-NEXT: [[TMP42:%.*]] = ptrtoint i32* [[TMP40]] to i64 // CHECK3-51-NEXT: [[TMP43:%.*]] = sub nuw i64 [[TMP42]], [[TMP41]] -// CHECK3-51-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 3 -// CHECK3-51-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP46:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP46]], i64* [[TMP45]], align 8 -// CHECK3-51-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP44:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK3-51-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 3 +// CHECK3-51-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP44]], i64* [[TMP46]], align 8 +// CHECK3-51-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 [[TMP43]], i64* [[TMP47]], align 8 -// CHECK3-51-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 1, i8* [[TMP48]], align 8 // CHECK3-51-NEXT: store i64 4, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK3-51-NEXT: [[TMP49:%.*]] = bitcast %struct.kmp_depend_info* [[TMP23]] to i8* @@ -6144,13 +6609,13 @@ void xxxx() { // CHECK3-51-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP57]], i32 0, i32 0 // CHECK3-51-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR11]], i64 0, i64 0 // CHECK3-51-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK3-51-NEXT: [[TMP60:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 0 -// CHECK3-51-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP62:%.*]] = ptrtoint %struct.S* [[ARRAYIDX12]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP62]], i64* [[TMP61]], align 8 -// CHECK3-51-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP60:%.*]] = ptrtoint %struct.S* [[ARRAYIDX12]] to i64 +// CHECK3-51-NEXT: [[TMP61:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 0 +// CHECK3-51-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP60]], i64* [[TMP62]], align 8 +// CHECK3-51-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 4, i64* [[TMP63]], align 8 -// CHECK3-51-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 3, i8* [[TMP64]], align 8 // CHECK3-51-NEXT: [[TMP65:%.*]] = load i8, i8* [[B]], align 1 // CHECK3-51-NEXT: [[TMP66:%.*]] = sext i8 [[TMP65]] to i64 @@ -6166,13 +6631,13 @@ void xxxx() { // CHECK3-51-NEXT: [[TMP72:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 // CHECK3-51-NEXT: [[TMP73:%.*]] = ptrtoint i32* [[TMP71]] to i64 // CHECK3-51-NEXT: [[TMP74:%.*]] = sub nuw i64 [[TMP73]], [[TMP72]] -// CHECK3-51-NEXT: [[TMP75:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 1 -// CHECK3-51-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP77:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP77]], i64* [[TMP76]], align 8 -// CHECK3-51-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP75:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 +// CHECK3-51-NEXT: [[TMP76:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 1 +// CHECK3-51-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP75]], i64* [[TMP77]], align 8 +// CHECK3-51-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 [[TMP74]], i64* [[TMP78]], align 8 -// CHECK3-51-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 3, i8* [[TMP79]], align 8 // CHECK3-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR17]], align 8 // CHECK3-51-NEXT: [[TMP80:%.*]] = bitcast %struct.kmp_depend_info* [[TMP59]] to i8* @@ -6186,13 +6651,13 @@ void xxxx() { // CHECK3-51-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP84]], i32 0, i32 0 // CHECK3-51-NEXT: [[TMP86:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR21]], i64 0, i64 0 // CHECK3-51-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK3-51-NEXT: [[TMP87:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 0 -// CHECK3-51-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP89:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP89]], i64* [[TMP88]], align 8 -// CHECK3-51-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP87:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 +// CHECK3-51-NEXT: [[TMP88:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 0 +// CHECK3-51-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP87]], i64* [[TMP89]], align 8 +// CHECK3-51-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 4, i64* [[TMP90]], align 8 -// CHECK3-51-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 4, i8* [[TMP91]], align 8 // CHECK3-51-NEXT: [[TMP92:%.*]] = load i8, i8* [[B]], align 1 // CHECK3-51-NEXT: [[TMP93:%.*]] = sext i8 [[TMP92]] to i64 @@ -6208,13 +6673,13 @@ void xxxx() { // CHECK3-51-NEXT: [[TMP99:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 // CHECK3-51-NEXT: [[TMP100:%.*]] = ptrtoint i32* [[TMP98]] to i64 // CHECK3-51-NEXT: [[TMP101:%.*]] = sub nuw i64 [[TMP100]], [[TMP99]] -// CHECK3-51-NEXT: [[TMP102:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 1 -// CHECK3-51-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP104:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP104]], i64* [[TMP103]], align 8 -// CHECK3-51-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP102:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 +// CHECK3-51-NEXT: [[TMP103:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 1 +// CHECK3-51-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP102]], i64* [[TMP104]], align 8 +// CHECK3-51-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 [[TMP101]], i64* [[TMP105]], align 8 -// CHECK3-51-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 4, i8* [[TMP106]], align 8 // CHECK3-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR27]], align 8 // CHECK3-51-NEXT: [[TMP107:%.*]] = bitcast %struct.kmp_depend_info* [[TMP86]] to i8* @@ -6235,13 +6700,13 @@ void xxxx() { // CHECK3-51-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 3, i8* [[TMP117]], align 8 // CHECK3-51-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 1 -// CHECK3-51-NEXT: [[TMP118:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 1 -// CHECK3-51-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP120:%.*]] = ptrtoint %struct.S* [[ARRAYIDX32]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP120]], i64* [[TMP119]], align 8 -// CHECK3-51-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP118:%.*]] = ptrtoint %struct.S* [[ARRAYIDX32]] to i64 +// CHECK3-51-NEXT: [[TMP119:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 1 +// CHECK3-51-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP118]], i64* [[TMP120]], align 8 +// CHECK3-51-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 4, i64* [[TMP121]], align 8 -// CHECK3-51-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 3, i8* [[TMP122]], align 8 // CHECK3-51-NEXT: [[TMP123:%.*]] = mul nsw i64 0, [[TMP1]] // CHECK3-51-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP123]] @@ -6259,13 +6724,13 @@ void xxxx() { // CHECK3-51-NEXT: [[TMP130:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 // CHECK3-51-NEXT: [[TMP131:%.*]] = ptrtoint i32* [[TMP129]] to i64 // CHECK3-51-NEXT: [[TMP132:%.*]] = sub nuw i64 [[TMP131]], [[TMP130]] -// CHECK3-51-NEXT: [[TMP133:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 2 -// CHECK3-51-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP135:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP135]], i64* [[TMP134]], align 8 -// CHECK3-51-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP133:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 +// CHECK3-51-NEXT: [[TMP134:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 2 +// CHECK3-51-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP133]], i64* [[TMP135]], align 8 +// CHECK3-51-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 [[TMP132]], i64* [[TMP136]], align 8 -// CHECK3-51-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 3, i8* [[TMP137]], align 8 // CHECK3-51-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR37]], align 8 // CHECK3-51-NEXT: [[TMP138:%.*]] = bitcast %struct.kmp_depend_info* [[TMP113]] to i8* @@ -6284,13 +6749,13 @@ void xxxx() { // CHECK3-51-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP144]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 8, i8* [[TMP147]], align 8 // CHECK3-51-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 1 -// CHECK3-51-NEXT: [[TMP148:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP143]], i64 1 -// CHECK3-51-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP148]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP150:%.*]] = ptrtoint %struct.S* [[ARRAYIDX42]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP150]], i64* [[TMP149]], align 8 -// CHECK3-51-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP148]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP148:%.*]] = ptrtoint %struct.S* [[ARRAYIDX42]] to i64 +// CHECK3-51-NEXT: [[TMP149:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP143]], i64 1 +// CHECK3-51-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP149]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP148]], i64* [[TMP150]], align 8 +// CHECK3-51-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP149]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 4, i64* [[TMP151]], align 8 -// CHECK3-51-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP148]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP149]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 8, i8* [[TMP152]], align 8 // CHECK3-51-NEXT: [[TMP153:%.*]] = mul nsw i64 0, [[TMP1]] // CHECK3-51-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP153]] @@ -6308,13 +6773,13 @@ void xxxx() { // CHECK3-51-NEXT: [[TMP160:%.*]] = ptrtoint i32* [[ARRAYIDX44]] to i64 // CHECK3-51-NEXT: [[TMP161:%.*]] = ptrtoint i32* [[TMP159]] to i64 // CHECK3-51-NEXT: [[TMP162:%.*]] = sub nuw i64 [[TMP161]], [[TMP160]] -// CHECK3-51-NEXT: [[TMP163:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP143]], i64 2 -// CHECK3-51-NEXT: [[TMP164:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP163]], i32 0, i32 0 -// CHECK3-51-NEXT: [[TMP165:%.*]] = ptrtoint i32* [[ARRAYIDX44]] to i64 -// CHECK3-51-NEXT: store i64 [[TMP165]], i64* [[TMP164]], align 8 -// CHECK3-51-NEXT: [[TMP166:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP163]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP163:%.*]] = ptrtoint i32* [[ARRAYIDX44]] to i64 +// CHECK3-51-NEXT: [[TMP164:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP143]], i64 2 +// CHECK3-51-NEXT: [[TMP165:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP164]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP163]], i64* [[TMP165]], align 8 +// CHECK3-51-NEXT: [[TMP166:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP164]], i32 0, i32 1 // CHECK3-51-NEXT: store i64 [[TMP162]], i64* [[TMP166]], align 8 -// CHECK3-51-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP163]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP164]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 8, i8* [[TMP167]], align 8 // CHECK3-51-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR49]], align 8 // CHECK3-51-NEXT: [[TMP168:%.*]] = bitcast %struct.kmp_depend_info* [[TMP143]] to i8* @@ -7168,6 +7633,454 @@ void xxxx() { // CHECK3-51-NEXT: ret i32 0 // // +// CHECK3-51-LABEL: define {{[^@]+}}@_Z19test_omp_all_memoryv +// CHECK3-51-SAME: () #[[ATTR8]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[D:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[E:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 1 +// CHECK3-51-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK3-51-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-51-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 1 +// CHECK3-51-NEXT: [[DOTDEP_ARR_ADDR4:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK3-51-NEXT: [[DEP_COUNTER_ADDR5:%.*]] = alloca i64, align 8 +// CHECK3-51-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_31:%.*]], align 1 +// CHECK3-51-NEXT: [[DOTDEP_ARR_ADDR9:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK3-51-NEXT: [[DEP_COUNTER_ADDR10:%.*]] = alloca i64, align 8 +// CHECK3-51-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_34:%.*]], align 1 +// CHECK3-51-NEXT: [[DOTDEP_ARR_ADDR14:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK3-51-NEXT: [[DEP_COUNTER_ADDR15:%.*]] = alloca i64, align 8 +// CHECK3-51-NEXT: [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_37:%.*]], align 1 +// CHECK3-51-NEXT: [[DOTDEP_ARR_ADDR19:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 +// CHECK3-51-NEXT: [[DEP_COUNTER_ADDR20:%.*]] = alloca i64, align 8 +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-51-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.26*)* @.omp_task_entry..28 to i32 (i32, i8*)*)) +// CHECK3-51-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to %struct.kmp_task_t_with_privates.26* +// CHECK3-51-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26:%.*]], %struct.kmp_task_t_with_privates.26* [[TMP1]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26]], %struct.kmp_task_t_with_privates.26* [[TMP1]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_27:%.*]], %struct..kmp_privates.t.27* [[TMP3]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-51-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK3-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 +// CHECK3-51-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP6]], i64 0 +// CHECK3-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP7]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP8]], align 8 +// CHECK3-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP7]], i32 0, i32 1 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP9]], align 8 +// CHECK3-51-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP7]], i32 0, i32 2 +// CHECK3-51-NEXT: store i8 -128, i8* [[TMP10]], align 8 +// CHECK3-51-NEXT: [[TMP11:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK3-51-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP6]], i64 1 +// CHECK3-51-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP12]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP11]], i64* [[TMP13]], align 8 +// CHECK3-51-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP12]], i32 0, i32 1 +// CHECK3-51-NEXT: store i64 4, i64* [[TMP14]], align 8 +// CHECK3-51-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP12]], i32 0, i32 2 +// CHECK3-51-NEXT: store i8 1, i8* [[TMP15]], align 8 +// CHECK3-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR]], align 8 +// CHECK3-51-NEXT: [[TMP16:%.*]] = bitcast %struct.kmp_depend_info* [[TMP6]] to i8* +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-51-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i8* [[TMP0]], i32 2, i8* [[TMP16]], i32 0, i8* null) +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) +// CHECK3-51-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.29*)* @.omp_task_entry..31 to i32 (i32, i8*)*)) +// CHECK3-51-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates.29* +// CHECK3-51-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], %struct.kmp_task_t_with_privates.29* [[TMP19]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], %struct.kmp_task_t_with_privates.29* [[TMP19]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], %struct..kmp_privates.t.30* [[TMP21]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP23:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-51-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 8 +// CHECK3-51-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR4]], i64 0, i64 0 +// CHECK3-51-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 0 +// CHECK3-51-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP25]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP26]], align 8 +// CHECK3-51-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP25]], i32 0, i32 1 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP27]], align 8 +// CHECK3-51-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP25]], i32 0, i32 2 +// CHECK3-51-NEXT: store i8 -128, i8* [[TMP28]], align 8 +// CHECK3-51-NEXT: [[TMP29:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK3-51-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i64 1 +// CHECK3-51-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP29]], i64* [[TMP31]], align 8 +// CHECK3-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 1 +// CHECK3-51-NEXT: store i64 4, i64* [[TMP32]], align 8 +// CHECK3-51-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP30]], i32 0, i32 2 +// CHECK3-51-NEXT: store i8 1, i8* [[TMP33]], align 8 +// CHECK3-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR5]], align 8 +// CHECK3-51-NEXT: [[TMP34:%.*]] = bitcast %struct.kmp_depend_info* [[TMP24]] to i8* +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31]]) +// CHECK3-51-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM6]], i8* [[TMP18]], i32 2, i8* [[TMP34]], i32 0, i8* null) +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) +// CHECK3-51-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.32*)* @.omp_task_entry..34 to i32 (i32, i8*)*)) +// CHECK3-51-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates.32* +// CHECK3-51-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], %struct.kmp_task_t_with_privates.32* [[TMP37]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], %struct.kmp_task_t_with_privates.32* [[TMP37]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], %struct..kmp_privates.t.33* [[TMP39]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP41:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-51-NEXT: store i32 [[TMP41]], i32* [[TMP40]], align 8 +// CHECK3-51-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR9]], i64 0, i64 0 +// CHECK3-51-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP42]], i64 0 +// CHECK3-51-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP43]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP44]], align 8 +// CHECK3-51-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP43]], i32 0, i32 1 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP45]], align 8 +// CHECK3-51-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP43]], i32 0, i32 2 +// CHECK3-51-NEXT: store i8 -128, i8* [[TMP46]], align 8 +// CHECK3-51-NEXT: [[TMP47:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK3-51-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP42]], i64 1 +// CHECK3-51-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP48]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP47]], i64* [[TMP49]], align 8 +// CHECK3-51-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP48]], i32 0, i32 1 +// CHECK3-51-NEXT: store i64 4, i64* [[TMP50]], align 8 +// CHECK3-51-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP48]], i32 0, i32 2 +// CHECK3-51-NEXT: store i8 1, i8* [[TMP51]], align 8 +// CHECK3-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR10]], align 8 +// CHECK3-51-NEXT: [[TMP52:%.*]] = bitcast %struct.kmp_depend_info* [[TMP42]] to i8* +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33]]) +// CHECK3-51-NEXT: [[TMP53:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i8* [[TMP36]], i32 2, i8* [[TMP52]], i32 0, i8* null) +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) +// CHECK3-51-NEXT: [[TMP54:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.35*)* @.omp_task_entry..37 to i32 (i32, i8*)*)) +// CHECK3-51-NEXT: [[TMP55:%.*]] = bitcast i8* [[TMP54]] to %struct.kmp_task_t_with_privates.35* +// CHECK3-51-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], %struct.kmp_task_t_with_privates.35* [[TMP55]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], %struct.kmp_task_t_with_privates.35* [[TMP55]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], %struct..kmp_privates.t.36* [[TMP57]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP59:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-51-NEXT: store i32 [[TMP59]], i32* [[TMP58]], align 8 +// CHECK3-51-NEXT: [[TMP60:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR14]], i64 0, i64 0 +// CHECK3-51-NEXT: [[TMP61:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 0 +// CHECK3-51-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP62]], align 8 +// CHECK3-51-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 1 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP63]], align 8 +// CHECK3-51-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 2 +// CHECK3-51-NEXT: store i8 -128, i8* [[TMP64]], align 8 +// CHECK3-51-NEXT: [[TMP65:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK3-51-NEXT: [[TMP66:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i64 1 +// CHECK3-51-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP66]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 [[TMP65]], i64* [[TMP67]], align 8 +// CHECK3-51-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP66]], i32 0, i32 1 +// CHECK3-51-NEXT: store i64 4, i64* [[TMP68]], align 8 +// CHECK3-51-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP66]], i32 0, i32 2 +// CHECK3-51-NEXT: store i8 1, i8* [[TMP69]], align 8 +// CHECK3-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR15]], align 8 +// CHECK3-51-NEXT: [[TMP70:%.*]] = bitcast %struct.kmp_depend_info* [[TMP60]] to i8* +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM16:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK3-51-NEXT: [[TMP71:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM16]], i8* [[TMP54]], i32 2, i8* [[TMP70]], i32 0, i8* null) +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM18:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB37:[0-9]+]]) +// CHECK3-51-NEXT: [[TMP72:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM18]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.38*)* @.omp_task_entry..40 to i32 (i32, i8*)*)) +// CHECK3-51-NEXT: [[TMP73:%.*]] = bitcast i8* [[TMP72]] to %struct.kmp_task_t_with_privates.38* +// CHECK3-51-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], %struct.kmp_task_t_with_privates.38* [[TMP73]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], %struct.kmp_task_t_with_privates.38* [[TMP73]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], %struct..kmp_privates.t.39* [[TMP75]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP77:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-51-NEXT: store i32 [[TMP77]], i32* [[TMP76]], align 8 +// CHECK3-51-NEXT: [[TMP78:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR19]], i64 0, i64 0 +// CHECK3-51-NEXT: [[TMP79:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP78]], i64 0 +// CHECK3-51-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP79]], i32 0, i32 0 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP80]], align 8 +// CHECK3-51-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP79]], i32 0, i32 1 +// CHECK3-51-NEXT: store i64 0, i64* [[TMP81]], align 8 +// CHECK3-51-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP79]], i32 0, i32 2 +// CHECK3-51-NEXT: store i8 -128, i8* [[TMP82]], align 8 +// CHECK3-51-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR20]], align 8 +// CHECK3-51-NEXT: [[TMP83:%.*]] = bitcast %struct.kmp_depend_info* [[TMP78]] to i8* +// CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM21:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB37]]) +// CHECK3-51-NEXT: [[TMP84:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM21]], i8* [[TMP72]], i32 1, i8* [[TMP83]], i32 0, i8* null) +// CHECK3-51-NEXT: ret void +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..27 +// CHECK3-51-SAME: (%struct..kmp_privates.t.27* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.27*, align 8 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK3-51-NEXT: store %struct..kmp_privates.t.27* [[TMP0]], %struct..kmp_privates.t.27** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.27*, %struct..kmp_privates.t.27** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_27:%.*]], %struct..kmp_privates.t.27* [[TMP2]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK3-51-NEXT: ret void +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_entry..28 +// CHECK3-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.26* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK3-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.25*, align 8 +// CHECK3-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.26*, align 8 +// CHECK3-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: store %struct.kmp_task_t_with_privates.26* [[TMP1]], %struct.kmp_task_t_with_privates.26** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.26*, %struct.kmp_task_t_with_privates.26** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26:%.*]], %struct.kmp_task_t_with_privates.26* [[TMP3]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK3-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.25* +// CHECK3-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26]], %struct.kmp_task_t_with_privates.26* [[TMP3]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.27* [[TMP9]] to i8* +// CHECK3-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.26* [[TMP3]] to i8* +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META136:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META140:![0-9]+]]) +// CHECK3-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !142 +// CHECK3-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !142 +// CHECK3-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !142 +// CHECK3-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.27*, i32**)* @.omp_task_privates_map..27 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !142 +// CHECK3-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !142 +// CHECK3-51-NEXT: store %struct.anon.25* [[TMP8]], %struct.anon.25** [[__CONTEXT_ADDR_I]], align 8, !noalias !142 +// CHECK3-51-NEXT: [[TMP12:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR_I]], align 8, !noalias !142 +// CHECK3-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !142 +// CHECK3-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !142 +// CHECK3-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK3-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK3-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !142 +// CHECK3-51-NEXT: store i32 13, i32* [[TMP16]], align 4 +// CHECK3-51-NEXT: ret i32 0 +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..30 +// CHECK3-51-SAME: (%struct..kmp_privates.t.30* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.30*, align 8 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK3-51-NEXT: store %struct..kmp_privates.t.30* [[TMP0]], %struct..kmp_privates.t.30** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.30*, %struct..kmp_privates.t.30** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], %struct..kmp_privates.t.30* [[TMP2]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK3-51-NEXT: ret void +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_entry..31 +// CHECK3-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.29* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK3-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.28*, align 8 +// CHECK3-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.29*, align 8 +// CHECK3-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: store %struct.kmp_task_t_with_privates.29* [[TMP1]], %struct.kmp_task_t_with_privates.29** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.29*, %struct.kmp_task_t_with_privates.29** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], %struct.kmp_task_t_with_privates.29* [[TMP3]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK3-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.28* +// CHECK3-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], %struct.kmp_task_t_with_privates.29* [[TMP3]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.30* [[TMP9]] to i8* +// CHECK3-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.29* [[TMP3]] to i8* +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META143:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META148:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META150:![0-9]+]]) +// CHECK3-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !152 +// CHECK3-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !152 +// CHECK3-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !152 +// CHECK3-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.30*, i32**)* @.omp_task_privates_map..30 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !152 +// CHECK3-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !152 +// CHECK3-51-NEXT: store %struct.anon.28* [[TMP8]], %struct.anon.28** [[__CONTEXT_ADDR_I]], align 8, !noalias !152 +// CHECK3-51-NEXT: [[TMP12:%.*]] = load %struct.anon.28*, %struct.anon.28** [[__CONTEXT_ADDR_I]], align 8, !noalias !152 +// CHECK3-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !152 +// CHECK3-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !152 +// CHECK3-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK3-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK3-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !152 +// CHECK3-51-NEXT: store i32 14, i32* [[TMP16]], align 4 +// CHECK3-51-NEXT: ret i32 0 +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..33 +// CHECK3-51-SAME: (%struct..kmp_privates.t.33* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.33*, align 8 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK3-51-NEXT: store %struct..kmp_privates.t.33* [[TMP0]], %struct..kmp_privates.t.33** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.33*, %struct..kmp_privates.t.33** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], %struct..kmp_privates.t.33* [[TMP2]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK3-51-NEXT: ret void +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_entry..34 +// CHECK3-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.32* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK3-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.31*, align 8 +// CHECK3-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.32*, align 8 +// CHECK3-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: store %struct.kmp_task_t_with_privates.32* [[TMP1]], %struct.kmp_task_t_with_privates.32** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.32*, %struct.kmp_task_t_with_privates.32** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], %struct.kmp_task_t_with_privates.32* [[TMP3]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK3-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.31* +// CHECK3-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], %struct.kmp_task_t_with_privates.32* [[TMP3]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.33* [[TMP9]] to i8* +// CHECK3-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.32* [[TMP3]] to i8* +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META153:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META156:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META158:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META160:![0-9]+]]) +// CHECK3-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !162 +// CHECK3-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !162 +// CHECK3-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !162 +// CHECK3-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.33*, i32**)* @.omp_task_privates_map..33 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !162 +// CHECK3-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !162 +// CHECK3-51-NEXT: store %struct.anon.31* [[TMP8]], %struct.anon.31** [[__CONTEXT_ADDR_I]], align 8, !noalias !162 +// CHECK3-51-NEXT: [[TMP12:%.*]] = load %struct.anon.31*, %struct.anon.31** [[__CONTEXT_ADDR_I]], align 8, !noalias !162 +// CHECK3-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !162 +// CHECK3-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !162 +// CHECK3-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK3-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK3-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !162 +// CHECK3-51-NEXT: store i32 15, i32* [[TMP16]], align 4 +// CHECK3-51-NEXT: ret i32 0 +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..36 +// CHECK3-51-SAME: (%struct..kmp_privates.t.36* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.36*, align 8 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK3-51-NEXT: store %struct..kmp_privates.t.36* [[TMP0]], %struct..kmp_privates.t.36** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.36*, %struct..kmp_privates.t.36** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], %struct..kmp_privates.t.36* [[TMP2]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK3-51-NEXT: ret void +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_entry..37 +// CHECK3-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.35* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK3-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.34*, align 8 +// CHECK3-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.35*, align 8 +// CHECK3-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: store %struct.kmp_task_t_with_privates.35* [[TMP1]], %struct.kmp_task_t_with_privates.35** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.35*, %struct.kmp_task_t_with_privates.35** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], %struct.kmp_task_t_with_privates.35* [[TMP3]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK3-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.34* +// CHECK3-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], %struct.kmp_task_t_with_privates.35* [[TMP3]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.36* [[TMP9]] to i8* +// CHECK3-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.35* [[TMP3]] to i8* +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META163:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META166:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META170:![0-9]+]]) +// CHECK3-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !172 +// CHECK3-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !172 +// CHECK3-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !172 +// CHECK3-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.36*, i32**)* @.omp_task_privates_map..36 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !172 +// CHECK3-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !172 +// CHECK3-51-NEXT: store %struct.anon.34* [[TMP8]], %struct.anon.34** [[__CONTEXT_ADDR_I]], align 8, !noalias !172 +// CHECK3-51-NEXT: [[TMP12:%.*]] = load %struct.anon.34*, %struct.anon.34** [[__CONTEXT_ADDR_I]], align 8, !noalias !172 +// CHECK3-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !172 +// CHECK3-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !172 +// CHECK3-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK3-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK3-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !172 +// CHECK3-51-NEXT: store i32 16, i32* [[TMP16]], align 4 +// CHECK3-51-NEXT: ret i32 0 +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..39 +// CHECK3-51-SAME: (%struct..kmp_privates.t.39* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.39*, align 8 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK3-51-NEXT: store %struct..kmp_privates.t.39* [[TMP0]], %struct..kmp_privates.t.39** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.39*, %struct..kmp_privates.t.39** [[DOTADDR]], align 8 +// CHECK3-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], %struct..kmp_privates.t.39* [[TMP2]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK3-51-NEXT: ret void +// +// +// CHECK3-51-LABEL: define {{[^@]+}}@.omp_task_entry..40 +// CHECK3-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.38* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-51-NEXT: entry: +// CHECK3-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK3-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK3-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.37*, align 8 +// CHECK3-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK3-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK3-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.38*, align 8 +// CHECK3-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: store %struct.kmp_task_t_with_privates.38* [[TMP1]], %struct.kmp_task_t_with_privates.38** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK3-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.38*, %struct.kmp_task_t_with_privates.38** [[DOTADDR1]], align 8 +// CHECK3-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], %struct.kmp_task_t_with_privates.38* [[TMP3]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK3-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK3-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK3-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.37* +// CHECK3-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], %struct.kmp_task_t_with_privates.38* [[TMP3]], i32 0, i32 1 +// CHECK3-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.39* [[TMP9]] to i8* +// CHECK3-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.38* [[TMP3]] to i8* +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META173:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META178:![0-9]+]]) +// CHECK3-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META180:![0-9]+]]) +// CHECK3-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !182 +// CHECK3-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !182 +// CHECK3-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !182 +// CHECK3-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.39*, i32**)* @.omp_task_privates_map..39 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !182 +// CHECK3-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !182 +// CHECK3-51-NEXT: store %struct.anon.37* [[TMP8]], %struct.anon.37** [[__CONTEXT_ADDR_I]], align 8, !noalias !182 +// CHECK3-51-NEXT: [[TMP12:%.*]] = load %struct.anon.37*, %struct.anon.37** [[__CONTEXT_ADDR_I]], align 8, !noalias !182 +// CHECK3-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !182 +// CHECK3-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !182 +// CHECK3-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK3-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] +// CHECK3-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !182 +// CHECK3-51-NEXT: store i32 17, i32* [[TMP16]], align 4 +// CHECK3-51-NEXT: ret i32 0 +// +// // CHECK3-51-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_task_codegen.cpp // CHECK3-51-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" { // CHECK3-51-NEXT: entry: @@ -7258,21 +8171,21 @@ void xxxx() { // CHECK4-51-NEXT: store i64 4, i64* [[TMP26]], align 8 // CHECK4-51-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP24]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 1, i8* [[TMP27]], align 8 -// CHECK4-51-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 1 -// CHECK4-51-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 0 -// CHECK4-51-NEXT: [[TMP30:%.*]] = ptrtoint i8* [[B]] to i64 -// CHECK4-51-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8 -// CHECK4-51-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 1 +// CHECK4-51-NEXT: [[TMP28:%.*]] = ptrtoint i8* [[B]] to i64 +// CHECK4-51-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 1 +// CHECK4-51-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 0 +// CHECK4-51-NEXT: store i64 [[TMP28]], i64* [[TMP30]], align 8 +// CHECK4-51-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 1 // CHECK4-51-NEXT: store i64 1, i64* [[TMP31]], align 8 -// CHECK4-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 2 +// CHECK4-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP29]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 1, i8* [[TMP32]], align 8 -// CHECK4-51-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 2 -// CHECK4-51-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0 -// CHECK4-51-NEXT: [[TMP35:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 -// CHECK4-51-NEXT: store i64 [[TMP35]], i64* [[TMP34]], align 8 -// CHECK4-51-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1 +// CHECK4-51-NEXT: [[TMP33:%.*]] = ptrtoint [2 x %struct.S]* [[S]] to i64 +// CHECK4-51-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 2 +// CHECK4-51-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 0 +// CHECK4-51-NEXT: store i64 [[TMP33]], i64* [[TMP35]], align 8 +// CHECK4-51-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 1 // CHECK4-51-NEXT: store i64 8, i64* [[TMP36]], align 8 -// CHECK4-51-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2 +// CHECK4-51-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP34]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 1, i8* [[TMP37]], align 8 // CHECK4-51-NEXT: [[TMP38:%.*]] = mul nsw i64 0, [[TMP1]] // CHECK4-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP38]] @@ -7282,13 +8195,13 @@ void xxxx() { // CHECK4-51-NEXT: [[TMP41:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 // CHECK4-51-NEXT: [[TMP42:%.*]] = ptrtoint i32* [[TMP40]] to i64 // CHECK4-51-NEXT: [[TMP43:%.*]] = sub nuw i64 [[TMP42]], [[TMP41]] -// CHECK4-51-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 3 -// CHECK4-51-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 0 -// CHECK4-51-NEXT: [[TMP46:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK4-51-NEXT: store i64 [[TMP46]], i64* [[TMP45]], align 8 -// CHECK4-51-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 1 +// CHECK4-51-NEXT: [[TMP44:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK4-51-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i64 3 +// CHECK4-51-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 +// CHECK4-51-NEXT: store i64 [[TMP44]], i64* [[TMP46]], align 8 +// CHECK4-51-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 // CHECK4-51-NEXT: store i64 [[TMP43]], i64* [[TMP47]], align 8 -// CHECK4-51-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i32 0, i32 2 +// CHECK4-51-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 1, i8* [[TMP48]], align 8 // CHECK4-51-NEXT: store i64 4, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK4-51-NEXT: [[TMP49:%.*]] = bitcast %struct.kmp_depend_info* [[TMP23]] to i8* @@ -7308,13 +8221,13 @@ void xxxx() { // CHECK4-51-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP57]], i32 0, i32 0 // CHECK4-51-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR11]], i64 0, i64 0 // CHECK4-51-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK4-51-NEXT: [[TMP60:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 0 -// CHECK4-51-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 0 -// CHECK4-51-NEXT: [[TMP62:%.*]] = ptrtoint %struct.S* [[ARRAYIDX12]] to i64 -// CHECK4-51-NEXT: store i64 [[TMP62]], i64* [[TMP61]], align 8 -// CHECK4-51-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 1 +// CHECK4-51-NEXT: [[TMP60:%.*]] = ptrtoint %struct.S* [[ARRAYIDX12]] to i64 +// CHECK4-51-NEXT: [[TMP61:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 0 +// CHECK4-51-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 0 +// CHECK4-51-NEXT: store i64 [[TMP60]], i64* [[TMP62]], align 8 +// CHECK4-51-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 1 // CHECK4-51-NEXT: store i64 4, i64* [[TMP63]], align 8 -// CHECK4-51-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP60]], i32 0, i32 2 +// CHECK4-51-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP61]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 3, i8* [[TMP64]], align 8 // CHECK4-51-NEXT: [[TMP65:%.*]] = load i8, i8* [[B]], align 1 // CHECK4-51-NEXT: [[TMP66:%.*]] = sext i8 [[TMP65]] to i64 @@ -7330,13 +8243,13 @@ void xxxx() { // CHECK4-51-NEXT: [[TMP72:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 // CHECK4-51-NEXT: [[TMP73:%.*]] = ptrtoint i32* [[TMP71]] to i64 // CHECK4-51-NEXT: [[TMP74:%.*]] = sub nuw i64 [[TMP73]], [[TMP72]] -// CHECK4-51-NEXT: [[TMP75:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 1 -// CHECK4-51-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 0 -// CHECK4-51-NEXT: [[TMP77:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 -// CHECK4-51-NEXT: store i64 [[TMP77]], i64* [[TMP76]], align 8 -// CHECK4-51-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 1 +// CHECK4-51-NEXT: [[TMP75:%.*]] = ptrtoint i32* [[ARRAYIDX14]] to i64 +// CHECK4-51-NEXT: [[TMP76:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP59]], i64 1 +// CHECK4-51-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 0 +// CHECK4-51-NEXT: store i64 [[TMP75]], i64* [[TMP77]], align 8 +// CHECK4-51-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 1 // CHECK4-51-NEXT: store i64 [[TMP74]], i64* [[TMP78]], align 8 -// CHECK4-51-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP75]], i32 0, i32 2 +// CHECK4-51-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP76]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 3, i8* [[TMP79]], align 8 // CHECK4-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR17]], align 8 // CHECK4-51-NEXT: [[TMP80:%.*]] = bitcast %struct.kmp_depend_info* [[TMP59]] to i8* @@ -7350,13 +8263,13 @@ void xxxx() { // CHECK4-51-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP84]], i32 0, i32 0 // CHECK4-51-NEXT: [[TMP86:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR21]], i64 0, i64 0 // CHECK4-51-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 0 -// CHECK4-51-NEXT: [[TMP87:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 0 -// CHECK4-51-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 0 -// CHECK4-51-NEXT: [[TMP89:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 -// CHECK4-51-NEXT: store i64 [[TMP89]], i64* [[TMP88]], align 8 -// CHECK4-51-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 1 +// CHECK4-51-NEXT: [[TMP87:%.*]] = ptrtoint %struct.S* [[ARRAYIDX22]] to i64 +// CHECK4-51-NEXT: [[TMP88:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 0 +// CHECK4-51-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 0 +// CHECK4-51-NEXT: store i64 [[TMP87]], i64* [[TMP89]], align 8 +// CHECK4-51-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 1 // CHECK4-51-NEXT: store i64 4, i64* [[TMP90]], align 8 -// CHECK4-51-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP87]], i32 0, i32 2 +// CHECK4-51-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP88]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 4, i8* [[TMP91]], align 8 // CHECK4-51-NEXT: [[TMP92:%.*]] = load i8, i8* [[B]], align 1 // CHECK4-51-NEXT: [[TMP93:%.*]] = sext i8 [[TMP92]] to i64 @@ -7372,13 +8285,13 @@ void xxxx() { // CHECK4-51-NEXT: [[TMP99:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 // CHECK4-51-NEXT: [[TMP100:%.*]] = ptrtoint i32* [[TMP98]] to i64 // CHECK4-51-NEXT: [[TMP101:%.*]] = sub nuw i64 [[TMP100]], [[TMP99]] -// CHECK4-51-NEXT: [[TMP102:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 1 -// CHECK4-51-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 0 -// CHECK4-51-NEXT: [[TMP104:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 -// CHECK4-51-NEXT: store i64 [[TMP104]], i64* [[TMP103]], align 8 -// CHECK4-51-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 1 +// CHECK4-51-NEXT: [[TMP102:%.*]] = ptrtoint i32* [[ARRAYIDX24]] to i64 +// CHECK4-51-NEXT: [[TMP103:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i64 1 +// CHECK4-51-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 0 +// CHECK4-51-NEXT: store i64 [[TMP102]], i64* [[TMP104]], align 8 +// CHECK4-51-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 1 // CHECK4-51-NEXT: store i64 [[TMP101]], i64* [[TMP105]], align 8 -// CHECK4-51-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP102]], i32 0, i32 2 +// CHECK4-51-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP103]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 4, i8* [[TMP106]], align 8 // CHECK4-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR27]], align 8 // CHECK4-51-NEXT: [[TMP107:%.*]] = bitcast %struct.kmp_depend_info* [[TMP86]] to i8* @@ -7399,13 +8312,13 @@ void xxxx() { // CHECK4-51-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP114]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 3, i8* [[TMP117]], align 8 // CHECK4-51-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S]], i64 0, i64 1 -// CHECK4-51-NEXT: [[TMP118:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 1 -// CHECK4-51-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 0 -// CHECK4-51-NEXT: [[TMP120:%.*]] = ptrtoint %struct.S* [[ARRAYIDX32]] to i64 -// CHECK4-51-NEXT: store i64 [[TMP120]], i64* [[TMP119]], align 8 -// CHECK4-51-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 1 +// CHECK4-51-NEXT: [[TMP118:%.*]] = ptrtoint %struct.S* [[ARRAYIDX32]] to i64 +// CHECK4-51-NEXT: [[TMP119:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 1 +// CHECK4-51-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 0 +// CHECK4-51-NEXT: store i64 [[TMP118]], i64* [[TMP120]], align 8 +// CHECK4-51-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 1 // CHECK4-51-NEXT: store i64 4, i64* [[TMP121]], align 8 -// CHECK4-51-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP118]], i32 0, i32 2 +// CHECK4-51-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 3, i8* [[TMP122]], align 8 // CHECK4-51-NEXT: [[TMP123:%.*]] = mul nsw i64 0, [[TMP1]] // CHECK4-51-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP123]] @@ -7423,13 +8336,13 @@ void xxxx() { // CHECK4-51-NEXT: [[TMP130:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 // CHECK4-51-NEXT: [[TMP131:%.*]] = ptrtoint i32* [[TMP129]] to i64 // CHECK4-51-NEXT: [[TMP132:%.*]] = sub nuw i64 [[TMP131]], [[TMP130]] -// CHECK4-51-NEXT: [[TMP133:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 2 -// CHECK4-51-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 0 -// CHECK4-51-NEXT: [[TMP135:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 -// CHECK4-51-NEXT: store i64 [[TMP135]], i64* [[TMP134]], align 8 -// CHECK4-51-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 1 +// CHECK4-51-NEXT: [[TMP133:%.*]] = ptrtoint i32* [[ARRAYIDX34]] to i64 +// CHECK4-51-NEXT: [[TMP134:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP113]], i64 2 +// CHECK4-51-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 0 +// CHECK4-51-NEXT: store i64 [[TMP133]], i64* [[TMP135]], align 8 +// CHECK4-51-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 1 // CHECK4-51-NEXT: store i64 [[TMP132]], i64* [[TMP136]], align 8 -// CHECK4-51-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 2 +// CHECK4-51-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP134]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 3, i8* [[TMP137]], align 8 // CHECK4-51-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR37]], align 8 // CHECK4-51-NEXT: [[TMP138:%.*]] = bitcast %struct.kmp_depend_info* [[TMP113]] to i8* diff --git a/clang/test/OpenMP/task_if_codegen.cpp b/clang/test/OpenMP/task_if_codegen.cpp index 8a8c3eb6c3cf8c..bee4ae1c08faf2 100644 --- a/clang/test/OpenMP/task_if_codegen.cpp +++ b/clang/test/OpenMP/task_if_codegen.cpp @@ -408,13 +408,13 @@ int main() { // CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.15* // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], %struct.kmp_task_t_with_privates.15* [[TMP16]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK1-NEXT: store i64 [[TMP21]], i64* [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: store i64 4, i64* [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 2 // CHECK1-NEXT: store i8 1, i8* [[TMP23]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.kmp_depend_info* [[TMP18]] to i8* @@ -435,13 +435,13 @@ int main() { // CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %struct.kmp_task_t_with_privates.17* // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], %struct.kmp_task_t_with_privates.17* [[TMP29]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR9]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK1-NEXT: store i64 [[TMP34]], i64* [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP32:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP32]], i64* [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1 // CHECK1-NEXT: store i64 4, i64* [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2 // CHECK1-NEXT: store i8 3, i8* [[TMP36]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR10]], align 8 // CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.kmp_depend_info* [[TMP31]] to i8* @@ -462,13 +462,13 @@ int main() { // CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[TMP41]] to %struct.kmp_task_t_with_privates.19* // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], %struct.kmp_task_t_with_privates.19* [[TMP42]], i32 0, i32 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR16]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i64 0 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP47:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK1-NEXT: store i64 [[TMP47]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i64 0 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 1 // CHECK1-NEXT: store i64 4, i64* [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 2 // CHECK1-NEXT: store i8 3, i8* [[TMP49]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR17]], align 8 // CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.kmp_depend_info* [[TMP44]] to i8* @@ -1025,13 +1025,13 @@ int main() { // CHECK2-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.15* // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], %struct.kmp_task_t_with_privates.15* [[TMP16]], i32 0, i32 0 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP21:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK2-NEXT: store i64 [[TMP21]], i64* [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP19]], i64* [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 1 // CHECK2-NEXT: store i64 4, i64* [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 2 // CHECK2-NEXT: store i8 1, i8* [[TMP23]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK2-NEXT: [[TMP24:%.*]] = bitcast %struct.kmp_depend_info* [[TMP18]] to i8* @@ -1052,13 +1052,13 @@ int main() { // CHECK2-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %struct.kmp_task_t_with_privates.17* // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], %struct.kmp_task_t_with_privates.17* [[TMP29]], i32 0, i32 0 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR9]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i64 0 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP34:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK2-NEXT: store i64 [[TMP34]], i64* [[TMP33]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP32:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i64 0 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP32]], i64* [[TMP34]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1 // CHECK2-NEXT: store i64 4, i64* [[TMP35]], align 8 -// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2 // CHECK2-NEXT: store i8 3, i8* [[TMP36]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR10]], align 8 // CHECK2-NEXT: [[TMP37:%.*]] = bitcast %struct.kmp_depend_info* [[TMP31]] to i8* @@ -1079,13 +1079,13 @@ int main() { // CHECK2-NEXT: [[TMP42:%.*]] = bitcast i8* [[TMP41]] to %struct.kmp_task_t_with_privates.19* // CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], %struct.kmp_task_t_with_privates.19* [[TMP42]], i32 0, i32 0 // CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR16]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i64 0 -// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP47:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK2-NEXT: store i64 [[TMP47]], i64* [[TMP46]], align 8 -// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP45:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i64 0 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP45]], i64* [[TMP47]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 1 // CHECK2-NEXT: store i64 4, i64* [[TMP48]], align 8 -// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 2 // CHECK2-NEXT: store i8 3, i8* [[TMP49]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR17]], align 8 // CHECK2-NEXT: [[TMP50:%.*]] = bitcast %struct.kmp_depend_info* [[TMP44]] to i8* @@ -1642,13 +1642,13 @@ int main() { // CHECK5-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.15* // CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], %struct.kmp_task_t_with_privates.15* [[TMP16]], i32 0, i32 0 // CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 -// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP21:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK5-NEXT: store i64 [[TMP21]], i64* [[TMP20]], align 8 -// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP19:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP19]], i64* [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 1 // CHECK5-NEXT: store i64 4, i64* [[TMP22]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 2 // CHECK5-NEXT: store i8 1, i8* [[TMP23]], align 8 // CHECK5-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK5-NEXT: [[TMP24:%.*]] = bitcast %struct.kmp_depend_info* [[TMP18]] to i8* @@ -1669,13 +1669,13 @@ int main() { // CHECK5-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %struct.kmp_task_t_with_privates.17* // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], %struct.kmp_task_t_with_privates.17* [[TMP29]], i32 0, i32 0 // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR9]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i64 0 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP34:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK5-NEXT: store i64 [[TMP34]], i64* [[TMP33]], align 8 -// CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP32:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i64 0 +// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP32]], i64* [[TMP34]], align 8 +// CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1 // CHECK5-NEXT: store i64 4, i64* [[TMP35]], align 8 -// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2 // CHECK5-NEXT: store i8 3, i8* [[TMP36]], align 8 // CHECK5-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR10]], align 8 // CHECK5-NEXT: [[TMP37:%.*]] = bitcast %struct.kmp_depend_info* [[TMP31]] to i8* @@ -1696,13 +1696,13 @@ int main() { // CHECK5-NEXT: [[TMP42:%.*]] = bitcast i8* [[TMP41]] to %struct.kmp_task_t_with_privates.19* // CHECK5-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], %struct.kmp_task_t_with_privates.19* [[TMP42]], i32 0, i32 0 // CHECK5-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR16]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i64 0 -// CHECK5-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP47:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK5-NEXT: store i64 [[TMP47]], i64* [[TMP46]], align 8 -// CHECK5-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP45:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK5-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i64 0 +// CHECK5-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP45]], i64* [[TMP47]], align 8 +// CHECK5-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 1 // CHECK5-NEXT: store i64 4, i64* [[TMP48]], align 8 -// CHECK5-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 2 // CHECK5-NEXT: store i8 3, i8* [[TMP49]], align 8 // CHECK5-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR17]], align 8 // CHECK5-NEXT: [[TMP50:%.*]] = bitcast %struct.kmp_depend_info* [[TMP44]] to i8* @@ -2259,13 +2259,13 @@ int main() { // CHECK6-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.15* // CHECK6-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], %struct.kmp_task_t_with_privates.15* [[TMP16]], i32 0, i32 0 // CHECK6-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 -// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 -// CHECK6-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP21:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK6-NEXT: store i64 [[TMP21]], i64* [[TMP20]], align 8 -// CHECK6-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP19:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK6-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 +// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 0 +// CHECK6-NEXT: store i64 [[TMP19]], i64* [[TMP21]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 1 // CHECK6-NEXT: store i64 4, i64* [[TMP22]], align 8 -// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 2 // CHECK6-NEXT: store i8 1, i8* [[TMP23]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK6-NEXT: [[TMP24:%.*]] = bitcast %struct.kmp_depend_info* [[TMP18]] to i8* @@ -2286,13 +2286,13 @@ int main() { // CHECK6-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %struct.kmp_task_t_with_privates.17* // CHECK6-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], %struct.kmp_task_t_with_privates.17* [[TMP29]], i32 0, i32 0 // CHECK6-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR9]], i64 0, i64 0 -// CHECK6-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i64 0 -// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP34:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK6-NEXT: store i64 [[TMP34]], i64* [[TMP33]], align 8 -// CHECK6-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP32:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i64 0 +// CHECK6-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0 +// CHECK6-NEXT: store i64 [[TMP32]], i64* [[TMP34]], align 8 +// CHECK6-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1 // CHECK6-NEXT: store i64 4, i64* [[TMP35]], align 8 -// CHECK6-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP32]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2 // CHECK6-NEXT: store i8 3, i8* [[TMP36]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR10]], align 8 // CHECK6-NEXT: [[TMP37:%.*]] = bitcast %struct.kmp_depend_info* [[TMP31]] to i8* @@ -2313,13 +2313,13 @@ int main() { // CHECK6-NEXT: [[TMP42:%.*]] = bitcast i8* [[TMP41]] to %struct.kmp_task_t_with_privates.19* // CHECK6-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], %struct.kmp_task_t_with_privates.19* [[TMP42]], i32 0, i32 0 // CHECK6-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR16]], i64 0, i64 0 -// CHECK6-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i64 0 -// CHECK6-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP47:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 -// CHECK6-NEXT: store i64 [[TMP47]], i64* [[TMP46]], align 8 -// CHECK6-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP45:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 +// CHECK6-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i64 0 +// CHECK6-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 0 +// CHECK6-NEXT: store i64 [[TMP45]], i64* [[TMP47]], align 8 +// CHECK6-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 1 // CHECK6-NEXT: store i64 4, i64* [[TMP48]], align 8 -// CHECK6-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP45]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP46]], i32 0, i32 2 // CHECK6-NEXT: store i8 3, i8* [[TMP49]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR17]], align 8 // CHECK6-NEXT: [[TMP50:%.*]] = bitcast %struct.kmp_depend_info* [[TMP44]] to i8* From 2a5d5078d5dadb1aaa4487c89a92c91a98e2ed12 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 20 May 2022 06:44:35 +0000 Subject: [PATCH 410/908] [libc] Add the pthread_mutex_t type. Simple implementations of the functions pthread_mutex_init, pthread_mutex_destroy, pthread_mutex_lock and pthread_mutex_unlock have have also been added. Future patches will extend these functions to add features required by the POSIX specification. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D126235 --- libc/config/linux/api.td | 1 + libc/config/linux/x86_64/entrypoints.txt | 4 + libc/include/CMakeLists.txt | 1 + libc/include/llvm-libc-types/CMakeLists.txt | 3 +- libc/include/llvm-libc-types/__mutex_type.h | 5 + .../include/llvm-libc-types/pthread_mutex_t.h | 16 ++ libc/spec/posix.td | 28 ++- libc/src/pthread/CMakeLists.txt | 46 +++++ libc/src/pthread/pthread_mutex_destroy.cpp | 26 +++ libc/src/pthread/pthread_mutex_destroy.h | 20 ++ libc/src/pthread/pthread_mutex_init.cpp | 35 ++++ libc/src/pthread/pthread_mutex_init.h | 21 ++ libc/src/pthread/pthread_mutex_lock.cpp | 26 +++ libc/src/pthread/pthread_mutex_lock.h | 20 ++ libc/src/pthread/pthread_mutex_unlock.cpp | 26 +++ libc/src/pthread/pthread_mutex_unlock.h | 20 ++ libc/src/pthread/pthread_mutexattr.h | 10 + libc/test/src/pthread/CMakeLists.txt | 17 ++ libc/test/src/pthread/pthread_mutex_test.cpp | 188 ++++++++++++++++++ 19 files changed, 511 insertions(+), 2 deletions(-) create mode 100644 libc/include/llvm-libc-types/pthread_mutex_t.h create mode 100644 libc/src/pthread/pthread_mutex_destroy.cpp create mode 100644 libc/src/pthread/pthread_mutex_destroy.h create mode 100644 libc/src/pthread/pthread_mutex_init.cpp create mode 100644 libc/src/pthread/pthread_mutex_init.h create mode 100644 libc/src/pthread/pthread_mutex_lock.cpp create mode 100644 libc/src/pthread/pthread_mutex_lock.h create mode 100644 libc/src/pthread/pthread_mutex_unlock.cpp create mode 100644 libc/src/pthread/pthread_mutex_unlock.h create mode 100644 libc/test/src/pthread/pthread_mutex_test.cpp diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index c99e35e5f2f288..6862139d66c31f 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -241,6 +241,7 @@ def ThreadsAPI : PublicAPI<"threads.h"> { def PThreadAPI : PublicAPI<"pthread.h"> { let Types = [ "pthread_attr_t", + "pthread_mutex_t", "pthread_mutexattr_t" ]; } diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index a0b7a6b5af225e..99937c7ebe5927 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -236,6 +236,10 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.pthread.pthread_attr_setstack libc.src.pthread.pthread_attr_setstacksize libc.src.pthread.pthread_attr_init + libc.src.pthread.pthread_mutex_destroy + libc.src.pthread.pthread_mutex_init + libc.src.pthread.pthread_mutex_lock + libc.src.pthread.pthread_mutex_unlock libc.src.pthread.pthread_mutexattr_destroy libc.src.pthread.pthread_mutexattr_init libc.src.pthread.pthread_mutexattr_getpshared diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index d011d6d6f59be0..5ffef44cbc62ee 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -163,6 +163,7 @@ add_gen_header( DEPENDS .llvm_libc_common_h .llvm-libc-types.pthread_attr_t + .llvm-libc-types.pthread_mutex_t .llvm-libc-types.pthread_mutexattr_t ) diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 3a5c17f4e29c1c..681f7aa63fd28c 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -20,7 +20,8 @@ add_header(mtx_t HDR mtx_t.h DEPENDS .__futex_word .__mutex_type) add_header(off_t HDR off_t.h) add_header(off64_t HDR off64_t.h) add_header(once_flag HDR once_flag.h DEPENDS .__futex_word) -add_header(pthread_attr_t HDR pthread_attr_t.h) +add_header(pthread_attr_t HDR pthread_attr_t.h DEPENDS .size_t) +add_header(pthread_mutex_t HDR pthread_mutex_t.h DEPENDS .__futex_word .__mutex_type) add_header(pthread_mutexattr_t HDR pthread_mutexattr_t.h) add_header(size_t HDR size_t.h) add_header(ssize_t HDR ssize_t.h) diff --git a/libc/include/llvm-libc-types/__mutex_type.h b/libc/include/llvm-libc-types/__mutex_type.h index 37cb856fd58b8d..ffd67e7c7bc336 100644 --- a/libc/include/llvm-libc-types/__mutex_type.h +++ b/libc/include/llvm-libc-types/__mutex_type.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +#ifndef __LLVM_LIBC_TYPES___MUTEX_T_H +#define __LLVM_LIBC_TYPES___MUTEX_T_H + #include typedef struct { @@ -22,3 +25,5 @@ typedef struct { #error "Mutex type not defined for the target platform." #endif } __mutex_type; + +#endif // __LLVM_LIBC_TYPES___MUTEX_T_H diff --git a/libc/include/llvm-libc-types/pthread_mutex_t.h b/libc/include/llvm-libc-types/pthread_mutex_t.h new file mode 100644 index 00000000000000..65e43538cd27d4 --- /dev/null +++ b/libc/include/llvm-libc-types/pthread_mutex_t.h @@ -0,0 +1,16 @@ +//===-- Definition of pthread_mutex_t type --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_TYPES_PTHREAD_MUTEX_T_H +#define __LLVM_LIBC_TYPES_PTHREAD_MUTEX_T_H + +#include + +typedef __mutex_type pthread_mutex_t; + +#endif // __LLVM_LIBC_TYPES_PTHREAD_MUTEX_T_H diff --git a/libc/spec/posix.td b/libc/spec/posix.td index fbb6a78225dd85..2436d8eea4b1cf 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -33,6 +33,12 @@ def POSIX : StandardSpec<"POSIX"> { ConstType ConstPThreadMutexAttrTPtr = ConstType; ConstType ConstRestrictedPThreadMutexAttrTPtr = ConstType; + NamedType PThreadMutexTType = NamedType<"pthread_mutex_t">; + PtrType PThreadMutexTPtr = PtrType; + RestrictedPtrType RestrictedPThreadMutexTPtr = RestrictedPtrType; + ConstType ConstPThreadMutexTPtr = ConstType; + ConstType ConstRestrictedPThreadMutexTPtr = ConstType; + HeaderSpec Errno = HeaderSpec< "errno.h", [ @@ -386,7 +392,7 @@ def POSIX : StandardSpec<"POSIX"> { HeaderSpec PThread = HeaderSpec< "pthread.h", [], // Macros - [PThreadAttrTType, PThreadMutexAttrTType], // Types + [PThreadAttrTType, PThreadMutexAttrTType, PThreadMutexTType], // Types [], // Enumerations [ FunctionSpec< @@ -499,6 +505,26 @@ def POSIX : StandardSpec<"POSIX"> { RetValSpec, [ArgSpec, ArgSpec] >, + FunctionSpec< + "pthread_mutex_init", + RetValSpec, + [ArgSpec, ArgSpec] + >, + FunctionSpec< + "pthread_mutex_destroy", + RetValSpec, + [ArgSpec] + >, + FunctionSpec< + "pthread_mutex_lock", + RetValSpec, + [ArgSpec] + >, + FunctionSpec< + "pthread_mutex_unlock", + RetValSpec, + [ArgSpec] + >, ] >; diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt index 5d447baef08a6e..ca5f0e0433cb57 100644 --- a/libc/src/pthread/CMakeLists.txt +++ b/libc/src/pthread/CMakeLists.txt @@ -196,3 +196,49 @@ add_entrypoint_object( libc.include.errno libc.include.pthread ) + +add_entrypoint_object( + pthread_mutex_init + SRCS + pthread_mutex_init.cpp + HDRS + pthread_mutex_init.h + DEPENDS + .pthread_mutexattr + libc.include.errno + libc.include.pthread + libc.src.__support.threads.mutex +) + +add_entrypoint_object( + pthread_mutex_destroy + SRCS + pthread_mutex_destroy.cpp + HDRS + pthread_mutex_destroy.h + DEPENDS + libc.include.pthread + libc.src.__support.threads.mutex +) + +add_entrypoint_object( + pthread_mutex_lock + SRCS + pthread_mutex_lock.cpp + HDRS + pthread_mutex_lock.h + DEPENDS + libc.include.pthread + libc.src.__support.threads.mutex +) + +add_entrypoint_object( + pthread_mutex_unlock + SRCS + pthread_mutex_unlock.cpp + HDRS + pthread_mutex_unlock.h + DEPENDS + libc.include.pthread + libc.src.__support.threads.mutex +) diff --git a/libc/src/pthread/pthread_mutex_destroy.cpp b/libc/src/pthread/pthread_mutex_destroy.cpp new file mode 100644 index 00000000000000..acb344bbea52f3 --- /dev/null +++ b/libc/src/pthread/pthread_mutex_destroy.cpp @@ -0,0 +1,26 @@ +//===-- Linux implementation of the pthread_mutex_destroy function --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_mutex_destroy.h" + +#include "src/__support/common.h" +#include "src/__support/threads/mutex.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, pthread_mutex_destroy, (pthread_mutex_t * mutex)) { + auto *m = reinterpret_cast(mutex); + Mutex::destroy(m); + // TODO: When the Mutex class supports all the possible error conditions + // return the appropriate error value here. + return 0; +} + +} // namespace __llvm_libc diff --git a/libc/src/pthread/pthread_mutex_destroy.h b/libc/src/pthread/pthread_mutex_destroy.h new file mode 100644 index 00000000000000..bd6ea1932c1b68 --- /dev/null +++ b/libc/src/pthread/pthread_mutex_destroy.h @@ -0,0 +1,20 @@ +//===-- Implementation header for pthread_mutex_destroy ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX__DESTROY_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX__DESTROY_H + +#include + +namespace __llvm_libc { + +int pthread_mutex_destroy(pthread_mutex_t *mutex); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX_DESTROY_H diff --git a/libc/src/pthread/pthread_mutex_init.cpp b/libc/src/pthread/pthread_mutex_init.cpp new file mode 100644 index 00000000000000..5038ee08875939 --- /dev/null +++ b/libc/src/pthread/pthread_mutex_init.cpp @@ -0,0 +1,35 @@ +//===-- Linux implementation of the pthread_mutex_init function -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_mutex_init.h" +#include "pthread_mutexattr.h" + +#include "src/__support/common.h" +#include "src/__support/threads/mutex.h" + +#include +#include + +namespace __llvm_libc { + +static_assert(sizeof(Mutex) <= sizeof(pthread_mutex_t), + "The public pthread_mutex_t type cannot accommodate the internal " + "mutex type."); + +LLVM_LIBC_FUNCTION(int, pthread_mutex_init, + (pthread_mutex_t * m, + const pthread_mutexattr_t *__restrict attr)) { + auto mutexattr = attr == nullptr ? DEFAULT_MUTEXATTR : *attr; + auto err = + Mutex::init(reinterpret_cast(m), false, + get_mutexattr_type(mutexattr) & PTHREAD_MUTEX_RECURSIVE, + get_mutexattr_robust(mutexattr) & PTHREAD_MUTEX_ROBUST); + return err == MutexError::NONE ? 0 : EAGAIN; +} + +} // namespace __llvm_libc diff --git a/libc/src/pthread/pthread_mutex_init.h b/libc/src/pthread/pthread_mutex_init.h new file mode 100644 index 00000000000000..d25fe8968cd63a --- /dev/null +++ b/libc/src/pthread/pthread_mutex_init.h @@ -0,0 +1,21 @@ +//===-- Implementation header for pthread_mutex_init function ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX_INIT_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX_INIT_H + +#include "include/pthread.h" + +namespace __llvm_libc { + +int pthread_mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *__restrict attr); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_PTHREAD_pthread_mutex_INIT_H diff --git a/libc/src/pthread/pthread_mutex_lock.cpp b/libc/src/pthread/pthread_mutex_lock.cpp new file mode 100644 index 00000000000000..9daffafff942fe --- /dev/null +++ b/libc/src/pthread/pthread_mutex_lock.cpp @@ -0,0 +1,26 @@ +//===-- Linux implementation of the pthread_mutex_lock function -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_mutex_lock.h" + +#include "src/__support/common.h" +#include "src/__support/threads/mutex.h" + +#include + +namespace __llvm_libc { + +// The implementation currently handles only plain mutexes. +LLVM_LIBC_FUNCTION(int, pthread_mutex_lock, (pthread_mutex_t * mutex)) { + reinterpret_cast(mutex)->lock(); + // TODO: When the Mutex class supports all the possible error conditions + // return the appropriate error value here. + return 0; +} + +} // namespace __llvm_libc diff --git a/libc/src/pthread/pthread_mutex_lock.h b/libc/src/pthread/pthread_mutex_lock.h new file mode 100644 index 00000000000000..bb47e598096982 --- /dev/null +++ b/libc/src/pthread/pthread_mutex_lock.h @@ -0,0 +1,20 @@ +//===-- Implementation header for pthread_mutex_lock function ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX_LOCK_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX_LOCK_H + +#include + +namespace __llvm_libc { + +int pthread_mutex_lock(pthread_mutex_t *mutex); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX_LOCK_H diff --git a/libc/src/pthread/pthread_mutex_unlock.cpp b/libc/src/pthread/pthread_mutex_unlock.cpp new file mode 100644 index 00000000000000..3cc8c7cdac09b9 --- /dev/null +++ b/libc/src/pthread/pthread_mutex_unlock.cpp @@ -0,0 +1,26 @@ +//===-- Linux implementation of the pthread_mutex_unlock function ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_mutex_unlock.h" + +#include "src/__support/common.h" +#include "src/__support/threads/mutex.h" + +#include + +namespace __llvm_libc { + +// The implementation currently handles only plain mutexes. +LLVM_LIBC_FUNCTION(int, pthread_mutex_unlock, (pthread_mutex_t * mutex)) { + reinterpret_cast(mutex)->unlock(); + // TODO: When the Mutex class supports all the possible error conditions + // return the appropriate error value here. + return 0; +} + +} // namespace __llvm_libc diff --git a/libc/src/pthread/pthread_mutex_unlock.h b/libc/src/pthread/pthread_mutex_unlock.h new file mode 100644 index 00000000000000..4653841244eb79 --- /dev/null +++ b/libc/src/pthread/pthread_mutex_unlock.h @@ -0,0 +1,20 @@ +//===-- Implementation header for pthread_mutex_unlock function -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX_UNLOCK_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX_UNLOCK_H + +#include + +namespace __llvm_libc { + +int pthread_mutex_unlock(pthread_mutex_t *mutex); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEX_UNLOCK_H diff --git a/libc/src/pthread/pthread_mutexattr.h b/libc/src/pthread/pthread_mutexattr.h index 4f969e288c8817..98f9ff9067e443 100644 --- a/libc/src/pthread/pthread_mutexattr.h +++ b/libc/src/pthread/pthread_mutexattr.h @@ -31,6 +31,16 @@ constexpr pthread_mutexattr_t DEFAULT_MUTEXATTR = PTHREAD_MUTEX_STALLED << unsigned(PThreadMutexAttrPos::ROBUST_SHIFT) | PTHREAD_PROCESS_PRIVATE << unsigned(PThreadMutexAttrPos::PSHARED_SHIFT); +static inline int get_mutexattr_type(pthread_mutexattr_t attr) { + return (attr & unsigned(PThreadMutexAttrPos::TYPE_MASK)) >> + unsigned(PThreadMutexAttrPos::TYPE_SHIFT); +} + +static inline int get_mutexattr_robust(pthread_mutexattr_t attr) { + return (attr & unsigned(PThreadMutexAttrPos::ROBUST_MASK)) >> + unsigned(PThreadMutexAttrPos::ROBUST_SHIFT); +} + } // namespace __llvm_libc #endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_MUTEXATTR_H diff --git a/libc/test/src/pthread/CMakeLists.txt b/libc/test/src/pthread/CMakeLists.txt index db0516cecc4fdb..5f3bbcbdd9a360 100644 --- a/libc/test/src/pthread/CMakeLists.txt +++ b/libc/test/src/pthread/CMakeLists.txt @@ -39,3 +39,20 @@ add_libc_unittest( libc.src.pthread.pthread_mutexattr_setrobust libc.src.pthread.pthread_mutexattr_settype ) + +add_libc_unittest( + phtread_mutex_test + SUITE + libc_pthread_unittests + SRCS + pthread_mutex_test.cpp + DEPENDS + libc.include.pthread + libc.src.errno.errno + libc.src.pthread.pthread_mutex_destroy + libc.src.pthread.pthread_mutex_init + libc.src.pthread.pthread_mutex_lock + libc.src.pthread.pthread_mutex_unlock + libc.src.threads.thrd_create + libc.src.threads.thrd_join +) diff --git a/libc/test/src/pthread/pthread_mutex_test.cpp b/libc/test/src/pthread/pthread_mutex_test.cpp new file mode 100644 index 00000000000000..8b06294169ed68 --- /dev/null +++ b/libc/test/src/pthread/pthread_mutex_test.cpp @@ -0,0 +1,188 @@ +//===-- Unittests for pthread_mutex_t -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/pthread/pthread_mutex_destroy.h" +#include "src/pthread/pthread_mutex_init.h" +#include "src/pthread/pthread_mutex_lock.h" +#include "src/pthread/pthread_mutex_unlock.h" + +// TODO: When pthread_t type is available, use it to spawn threads instead of +// thrd_t. +#include "src/threads/thrd_create.h" +#include "src/threads/thrd_join.h" + +#include "utils/UnitTest/Test.h" + +#include + +constexpr int START = 0; +constexpr int MAX = 10000; + +pthread_mutex_t mutex; +static int shared_int = START; + +int counter(void *arg) { + int last_count = START; + while (true) { + __llvm_libc::pthread_mutex_lock(&mutex); + if (shared_int == last_count + 1) { + shared_int++; + last_count = shared_int; + } + __llvm_libc::pthread_mutex_unlock(&mutex); + if (last_count >= MAX) + break; + } + return 0; +} + +TEST(LlvmLibcMutexTest, RelayCounter) { + ASSERT_EQ(__llvm_libc::pthread_mutex_init(&mutex, nullptr), 0); + + // The idea of this test is that two competing threads will update + // a counter only if the other thread has updated it. + thrd_t thread; + __llvm_libc::thrd_create(&thread, counter, nullptr); + + int last_count = START; + while (true) { + ASSERT_EQ(__llvm_libc::pthread_mutex_lock(&mutex), 0); + if (shared_int == START) { + ++shared_int; + last_count = shared_int; + } else if (shared_int != last_count) { + ASSERT_EQ(shared_int, last_count + 1); + ++shared_int; + last_count = shared_int; + } + ASSERT_EQ(__llvm_libc::pthread_mutex_unlock(&mutex), 0); + if (last_count > MAX) + break; + } + + int retval = 123; + __llvm_libc::thrd_join(&thread, &retval); + ASSERT_EQ(retval, 0); + + __llvm_libc::pthread_mutex_destroy(&mutex); +} + +pthread_mutex_t start_lock, step_lock; +bool started, step; + +int stepper(void *arg) { + __llvm_libc::pthread_mutex_lock(&start_lock); + started = true; + __llvm_libc::pthread_mutex_unlock(&start_lock); + + __llvm_libc::pthread_mutex_lock(&step_lock); + step = true; + __llvm_libc::pthread_mutex_unlock(&step_lock); + return 0; +} + +TEST(LlvmLibcMutexTest, WaitAndStep) { + ASSERT_EQ(__llvm_libc::pthread_mutex_init(&start_lock, nullptr), 0); + ASSERT_EQ(__llvm_libc::pthread_mutex_init(&step_lock, nullptr), 0); + + // In this test, we start a new thread but block it before it can make a + // step. Once we ensure that the thread is blocked, we unblock it. + // After unblocking, we then verify that the thread was indeed unblocked. + step = false; + started = false; + ASSERT_EQ(__llvm_libc::pthread_mutex_lock(&step_lock), 0); + + thrd_t thread; + __llvm_libc::thrd_create(&thread, stepper, nullptr); + + while (true) { + // Make sure the thread actually started. + ASSERT_EQ(__llvm_libc::pthread_mutex_lock(&start_lock), 0); + bool s = started; + ASSERT_EQ(__llvm_libc::pthread_mutex_unlock(&start_lock), 0); + if (s) + break; + } + + // Since |step_lock| is still locked, |step| should be false. + ASSERT_FALSE(step); + + // Unlock the step lock and wait until the step is made. + ASSERT_EQ(__llvm_libc::pthread_mutex_unlock(&step_lock), 0); + + while (true) { + ASSERT_EQ(__llvm_libc::pthread_mutex_lock(&step_lock), 0); + bool current_step_value = step; + ASSERT_EQ(__llvm_libc::pthread_mutex_unlock(&step_lock), 0); + if (current_step_value) + break; + } + + int retval = 123; + __llvm_libc::thrd_join(&thread, &retval); + ASSERT_EQ(retval, 0); + + __llvm_libc::pthread_mutex_destroy(&start_lock); + __llvm_libc::pthread_mutex_destroy(&step_lock); +} + +static constexpr int THREAD_COUNT = 10; +static pthread_mutex_t multiple_waiter_lock; +static pthread_mutex_t counter_lock; +static int wait_count = 0; + +int waiter_func(void *) { + __llvm_libc::pthread_mutex_lock(&counter_lock); + ++wait_count; + __llvm_libc::pthread_mutex_unlock(&counter_lock); + + // Block on the waiter lock until the main + // thread unblocks. + __llvm_libc::pthread_mutex_lock(&multiple_waiter_lock); + __llvm_libc::pthread_mutex_unlock(&multiple_waiter_lock); + + __llvm_libc::pthread_mutex_lock(&counter_lock); + --wait_count; + __llvm_libc::pthread_mutex_unlock(&counter_lock); + + return 0; +} + +TEST(LlvmLibcMutexTest, MultipleWaiters) { + __llvm_libc::pthread_mutex_init(&multiple_waiter_lock, nullptr); + __llvm_libc::pthread_mutex_init(&counter_lock, nullptr); + + __llvm_libc::pthread_mutex_lock(&multiple_waiter_lock); + thrd_t waiters[THREAD_COUNT]; + for (int i = 0; i < THREAD_COUNT; ++i) { + __llvm_libc::thrd_create(waiters + i, waiter_func, nullptr); + } + + // Spin until the counter is incremented to the desired + // value. + while (true) { + __llvm_libc::pthread_mutex_lock(&counter_lock); + if (wait_count == THREAD_COUNT) { + __llvm_libc::pthread_mutex_unlock(&counter_lock); + break; + } + __llvm_libc::pthread_mutex_unlock(&counter_lock); + } + + __llvm_libc::pthread_mutex_unlock(&multiple_waiter_lock); + + int retval; + for (int i = 0; i < THREAD_COUNT; ++i) { + __llvm_libc::thrd_join(waiters + i, &retval); + } + + ASSERT_EQ(wait_count, 0); + + __llvm_libc::pthread_mutex_destroy(&multiple_waiter_lock); + __llvm_libc::pthread_mutex_destroy(&counter_lock); +} From 46c0ec9df46f5bb1d53ae5b98a337e23687bd3ff Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Tue, 24 May 2022 17:00:11 -0600 Subject: [PATCH 411/908] [InstCombine] Fold memrchr calls with sequences of identical bytes. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D123631 --- .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 19 ++++++- llvm/test/Transforms/InstCombine/memrchr-4.ll | 53 ++++++++++++++++--- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index f33a63556c8282..58c4361a3dc43e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -974,7 +974,24 @@ Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) { } } - return nullptr; + // Truncate the string to search at most EndOff characters. + Str = Str.substr(0, EndOff); + if (Str.find_first_not_of(Str[0]) != StringRef::npos) + return nullptr; + + // If the source array consists of all equal characters, then for any + // C and N (whether in bounds or not), fold memrchr(S, C, N) to + // N != 0 && *S == C ? S + N - 1 : null + Type *SizeTy = Size->getType(); + Type *Int8Ty = B.getInt8Ty(); + Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0)); + // Slice off the sought character's high end bits. + CharVal = B.CreateTrunc(CharVal, Int8Ty); + Value *CEqS0 = B.CreateICmpEQ(ConstantInt::get(Int8Ty, Str[0]), CharVal); + Value *And = B.CreateLogicalAnd(NNeZ, CEqS0); + Value *SizeM1 = B.CreateSub(Size, ConstantInt::get(SizeTy, 1)); + Value *SrcPlus = B.CreateGEP(Int8Ty, SrcStr, SizeM1, "memrchr.ptr_plus"); + return B.CreateSelect(And, SrcPlus, NullPtr, "memrchr.sel"); } Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { diff --git a/llvm/test/Transforms/InstCombine/memrchr-4.ll b/llvm/test/Transforms/InstCombine/memrchr-4.ll index d543d76f2099a1..e6ebb32a055e3d 100644 --- a/llvm/test/Transforms/InstCombine/memrchr-4.ll +++ b/llvm/test/Transforms/InstCombine/memrchr-4.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -passes=instcombine -S | FileCheck %s ; ; Verify that memrchr calls with a string consisting of all the same -; characters are folded. +; characters are folded and those with mixed strings are not. declare i8* @memrchr(i8*, i32, i64) @@ -14,8 +14,10 @@ declare i8* @memrchr(i8*, i32, i64) define i8* @fold_memrchr_a11111_c_5(i32 %C) { ; CHECK-LABEL: @fold_memrchr_a11111_c_5( -; CHECK-NEXT: [[RET:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @a11111, i64 0, i64 0), i32 [[C:%.*]], i64 5) -; CHECK-NEXT: ret i8* [[RET]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 1 +; CHECK-NEXT: [[MEMRCHR_SEL:%.*]] = select i1 [[TMP2]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a11111, i64 0, i64 4), i8* null +; CHECK-NEXT: ret i8* [[MEMRCHR_SEL]] ; %ptr = getelementptr [5 x i8], [5 x i8]* @a11111, i64 0, i64 0 @@ -24,12 +26,35 @@ define i8* @fold_memrchr_a11111_c_5(i32 %C) { } +; Fold memrchr(a11111, C, N) to N && *a11111 == C ? a11111 + N - 1 : null, +; on the assumption that N is in bounds. + +define i8* @fold_memrchr_a11111_c_n(i32 %C, i64 %N) { +; CHECK-LABEL: @fold_memrchr_a11111_c_n( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[N:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], i1 [[TMP3]], i1 false +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[MEMRCHR_PTR_PLUS:%.*]] = getelementptr [5 x i8], [5 x i8]* @a11111, i64 0, i64 [[TMP5]] +; CHECK-NEXT: [[MEMRCHR_SEL:%.*]] = select i1 [[TMP4]], i8* [[MEMRCHR_PTR_PLUS]], i8* null +; CHECK-NEXT: ret i8* [[MEMRCHR_SEL]] +; + + %ptr = getelementptr [5 x i8], [5 x i8]* @a11111, i64 0, i64 0 + %ret = call i8* @memrchr(i8* %ptr, i32 %C, i64 %N) + ret i8* %ret +} + + ; Fold memrchr(a1110111, C, 3) to a1110111[2] == C ? a1110111 + 2 : null. define i8* @fold_memrchr_a1110111_c_3(i32 %C) { ; CHECK-LABEL: @fold_memrchr_a1110111_c_3( -; CHECK-NEXT: [[RET:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(3) getelementptr inbounds ([7 x i8], [7 x i8]* @a1110111, i64 0, i64 0), i32 [[C:%.*]], i64 3) -; CHECK-NEXT: ret i8* [[RET]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 1 +; CHECK-NEXT: [[MEMRCHR_SEL:%.*]] = select i1 [[TMP2]], i8* getelementptr inbounds ([7 x i8], [7 x i8]* @a1110111, i64 0, i64 2), i8* null +; CHECK-NEXT: ret i8* [[MEMRCHR_SEL]] ; %ptr = getelementptr [7 x i8], [7 x i8]* @a1110111, i64 0, i64 0 @@ -54,8 +79,8 @@ define i8* @call_memrchr_a1110111_c_4(i32 %C) { ; Don't fold memrchr(a1110111, C, 7). -define i8* @call_memrchr_a11111_c_7(i32 %C) { -; CHECK-LABEL: @call_memrchr_a11111_c_7( +define i8* @call_memrchr_a1110111_c_7(i32 %C) { +; CHECK-LABEL: @call_memrchr_a1110111_c_7( ; CHECK-NEXT: [[RET:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(7) getelementptr inbounds ([7 x i8], [7 x i8]* @a1110111, i64 0, i64 0), i32 [[C:%.*]], i64 7) ; CHECK-NEXT: ret i8* [[RET]] ; @@ -64,3 +89,17 @@ define i8* @call_memrchr_a11111_c_7(i32 %C) { %ret = call i8* @memrchr(i8* %ptr, i32 %C, i64 7) ret i8* %ret } + + +; Don't fold memrchr(a1110111, C, N). + +define i8* @call_memrchr_a1110111_c_n(i32 %C, i64 %N) { +; CHECK-LABEL: @call_memrchr_a1110111_c_n( +; CHECK-NEXT: [[RET:%.*]] = call i8* @memrchr(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @a1110111, i64 0, i64 0), i32 [[C:%.*]], i64 [[N:%.*]]) +; CHECK-NEXT: ret i8* [[RET]] +; + + %ptr = getelementptr [7 x i8], [7 x i8]* @a1110111, i64 0, i64 0 + %ret = call i8* @memrchr(i8* %ptr, i32 %C, i64 %N) + ret i8* %ret +} From a9e354c83b9e158c690d83c69ad6f8b5c8d1daa6 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Tue, 24 May 2022 15:17:39 -0700 Subject: [PATCH 412/908] [mlir][sparse] complex lowering Reviewed By: bixia Differential Revision: https://reviews.llvm.org/D126335 --- mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt | 1 + .../Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt index 4b20a31572d9b0..d8158a04ec2059 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt @@ -9,6 +9,7 @@ add_mlir_dialect_library(MLIRSparseTensorPipelines MLIRAffineToStandard MLIRBufferizationTransforms MLIRComplexToLLVM + MLIRComplexToLibm MLIRComplexToStandard MLIRFuncTransforms MLIRLinalgTransforms diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp index 1e817d1a68a49b..04898102433b8e 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -50,8 +50,9 @@ void mlir::sparse_tensor::buildSparseCompiler( pm.addPass(createMemRefToLLVMPass()); pm.addNestedPass(createConvertComplexToStandardPass()); pm.addNestedPass(createConvertMathToLLVMPass()); - pm.addPass(createConvertComplexToLLVMPass()); pm.addPass(createConvertMathToLibmPass()); + pm.addPass(createConvertComplexToLibmPass()); + pm.addPass(createConvertComplexToLLVMPass()); pm.addPass(createConvertFuncToLLVMPass()); pm.addPass(createReconcileUnrealizedCastsPass()); } From 9426df95b1b21ebbad6587ea95c80847fa550c47 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 24 May 2022 23:49:59 +0100 Subject: [PATCH 413/908] [LLVM][IR] Fix assert in ConstantExpr::getPtrToInt so all vector types are supported. Fixes: #55410 --- llvm/lib/IR/Constants.cpp | 4 ++-- llvm/unittests/IR/ConstantsTest.cpp | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index b62d6ec3b1ab6c..26da7fae391b66 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -2235,8 +2235,8 @@ Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy, "PtrToInt destination must be integer or integer vector"); assert(isa(C->getType()) == isa(DstTy)); if (isa(C->getType())) - assert(cast(C->getType())->getNumElements() == - cast(DstTy)->getNumElements() && + assert(cast(C->getType())->getElementCount() == + cast(DstTy)->getElementCount() && "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::PtrToInt, C, DstTy, OnlyIfReduced); } diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp index ad43fbcb0531f5..b764af6cafae15 100644 --- a/llvm/unittests/IR/ConstantsTest.cpp +++ b/llvm/unittests/IR/ConstantsTest.cpp @@ -134,6 +134,9 @@ TEST(ConstantsTest, PointerCast) { VectorType *Int8PtrVecTy = FixedVectorType::get(Int8PtrTy, 4); VectorType *Int32PtrVecTy = FixedVectorType::get(Int32PtrTy, 4); VectorType *Int64VecTy = FixedVectorType::get(Int64Ty, 4); + VectorType *Int8PtrScalableVecTy = ScalableVectorType::get(Int8PtrTy, 4); + VectorType *Int32PtrScalableVecTy = ScalableVectorType::get(Int32PtrTy, 4); + VectorType *Int64ScalableVecTy = ScalableVectorType::get(Int64Ty, 4); // ptrtoint i8* to i64 EXPECT_EQ( @@ -150,11 +153,23 @@ TEST(ConstantsTest, PointerCast) { ConstantExpr::getPointerCast(Constant::getNullValue(Int8PtrVecTy), Int64VecTy)); + // ptrtoint to + EXPECT_EQ( + Constant::getNullValue(Int64ScalableVecTy), + ConstantExpr::getPointerCast(Constant::getNullValue(Int8PtrScalableVecTy), + Int64ScalableVecTy)); + // bitcast <4 x i8*> to <4 x i32*> EXPECT_EQ(Constant::getNullValue(Int32PtrVecTy), ConstantExpr::getPointerCast(Constant::getNullValue(Int8PtrVecTy), Int32PtrVecTy)); + // bitcast to + EXPECT_EQ( + Constant::getNullValue(Int32PtrScalableVecTy), + ConstantExpr::getPointerCast(Constant::getNullValue(Int8PtrScalableVecTy), + Int32PtrScalableVecTy)); + Type *Int32Ptr1Ty = Type::getInt32PtrTy(C, 1); ConstantInt *K = ConstantInt::get(Type::getInt64Ty(C), 1234); From b2b0322a8114af783d103435202b576167426c7f Mon Sep 17 00:00:00 2001 From: Andrew Browne Date: Tue, 24 May 2022 14:42:41 -0700 Subject: [PATCH 414/908] [DFSan] Add option to specify individual library files, and an option to exit with an error code if any library file was not found. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D126336 --- .../lib/dfsan/scripts/build-libc-list.py | 78 +++++++++++++------ 1 file changed, 54 insertions(+), 24 deletions(-) diff --git a/compiler-rt/lib/dfsan/scripts/build-libc-list.py b/compiler-rt/lib/dfsan/scripts/build-libc-list.py index 40805c0191602e..daa6248b82a953 100755 --- a/compiler-rt/lib/dfsan/scripts/build-libc-list.py +++ b/compiler-rt/lib/dfsan/scripts/build-libc-list.py @@ -54,40 +54,70 @@ def defined_function_list(object): help='path to libstdc++ DSO directory', default='/usr/lib/x86_64-linux-gnu') +p.add_option('--only-explicit-files', action='store_true', + dest='only_explicit_files', default=False, + help='Only process --lib-file, not the default libc libraries.') +p.add_option('--lib-file', action='append', metavar='PATH', + help='Specific library files to add.', + default=[]) + +p.add_option('--error-missing-lib', action='store_true', + help='Make this script exit with an error code if any library is missing.', + dest='error_missing_lib', default=False) + (options, args) = p.parse_args() -libs = [os.path.join(options.libc_dso_path, name) for name in - ['ld-linux-x86-64.so.2', - 'libanl.so.1', - 'libBrokenLocale.so.1', - 'libcidn.so.1', - 'libcrypt.so.1', - 'libc.so.6', - 'libdl.so.2', - 'libm.so.6', - 'libnsl.so.1', - 'libpthread.so.0', - 'libresolv.so.2', - 'librt.so.1', - 'libthread_db.so.1', - 'libutil.so.1']] -libs += [os.path.join(options.libc_archive_path, name) for name in - ['libc_nonshared.a', - 'libpthread_nonshared.a']] - -libs.append(os.path.join(options.libgcc_dso_path, 'libgcc_s.so.1')) -libs.append(os.path.join(options.libgcc_archive_path, 'libgcc.a')) - -if options.with_libstdcxx: - libs.append(os.path.join(options.libstdcxx_dso_path, 'libstdc++.so.6')) +def build_libs_list(): + libs = [os.path.join(options.libc_dso_path, name) for name in + ['ld-linux-x86-64.so.2', + 'libanl.so.1', + 'libBrokenLocale.so.1', + 'libcidn.so.1', + 'libcrypt.so.1', + 'libc.so.6', + 'libdl.so.2', + 'libm.so.6', + 'libnsl.so.1', + 'libpthread.so.0', + 'libresolv.so.2', + 'librt.so.1', + 'libthread_db.so.1', + 'libutil.so.1']] + libs += [os.path.join(options.libc_archive_path, name) for name in + ['libc_nonshared.a', + 'libpthread_nonshared.a']] + + libs.append(os.path.join(options.libgcc_dso_path, 'libgcc_s.so.1')) + libs.append(os.path.join(options.libgcc_archive_path, 'libgcc.a')) + + if options.with_libstdcxx: + libs.append(os.path.join(options.libstdcxx_dso_path, 'libstdc++.so.6')) + return libs + +libs = [] +if options.only_explicit_files: + libs = options.lib_file + if not libs: + print >> sys.stderr, 'No libraries provided.' + exit(1) +else: + libs = build_libs_list() + libs.extend(options.lib_file) + +missing_lib = False functions = [] for l in libs: if os.path.exists(l): functions += defined_function_list(l) else: + missing_lib = True print >> sys.stderr, 'warning: library %s not found' % l +if options.error_missing_lib and missing_lib: + print >> sys.stderr, 'Exiting with failure code due to missing library.' + exit(1) + functions = list(set(functions)) functions.sort() From 9ff4f2dfea632d63e3a57a88a2faa634aae5c772 Mon Sep 17 00:00:00 2001 From: Salman Javed Date: Wed, 25 May 2022 10:15:12 +1200 Subject: [PATCH 415/908] [clang-tidy] Fix #55134 (regression introduced by 5da7c04) 5da7c04 introduced a regression in the NOLINT macro checking loop, replacing the call to `getImmediateExpansionRange().getBegin()` with `getImmediateMacroCallerLoc()`, which has similar but subtly different behaviour. The consequence is that NOLINTs cannot suppress diagnostics when they are attached to a token that came from a macro **argument**, rather than elsewhere in the macro expansion. Revert to pre-patch behaviour and add test cases to cover this issue. Differential Revision: https://reviews.llvm.org/D126138 --- .../clang-tidy/NoLintDirectiveHandler.cpp | 2 +- clang-tools-extra/docs/ReleaseNotes.rst | 4 ++++ .../test/clang-tidy/infrastructure/nolint.cpp | 21 +++++++++++++++++-- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp index 667ffdb5fab608..d482e49e0f6d96 100644 --- a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp +++ b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp @@ -266,7 +266,7 @@ bool NoLintDirectiveHandler::Impl::diagHasNoLintInMacro( return true; if (!DiagLoc.isMacroID()) return false; - DiagLoc = SrcMgr.getImmediateMacroCallerLoc(DiagLoc); + DiagLoc = SrcMgr.getImmediateExpansionRange(DiagLoc).getBegin(); } return false; } diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 74e77f4a7de11a..787f535dedb641 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -106,6 +106,10 @@ Improvements to clang-tidy means it is advised to use YAML's block style initiated by the pipe character `|` for the `Checks` section in order to benefit from the easier syntax that works without commas. +- Fixed a regression introduced in clang-tidy 14.0.0, which prevented NOLINTs + from suppressing diagnostics associated with macro arguments. This fixes + `Issue 55134 `_. + New checks ^^^^^^^^^^ diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp index 9ef194232b6b75..bc952ee03b5ada 100644 --- a/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp +++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp @@ -1,5 +1,5 @@ // REQUIRES: static-analyzer -// RUN: %check_clang_tidy %s google-explicit-constructor,clang-diagnostic-unused-variable,clang-analyzer-core.UndefinedBinaryOperatorResult,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays %t -- -extra-arg=-Wunused-variable -- -I%S/Inputs/nolint +// RUN: %check_clang_tidy %s google-explicit-constructor,clang-diagnostic-unused-variable,clang-analyzer-core.UndefinedBinaryOperatorResult,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-pro-type-member-init %t -- -extra-arg=-Wunused-variable -- -I%S/Inputs/nolint #include "trigger_warning.h" void I(int& Out) { @@ -96,6 +96,23 @@ MACRO_NOARG // NOLINT #define MACRO_NOLINT class G { G(int i); }; // NOLINT MACRO_NOLINT +// Check that we can suppress diagnostics about macro arguments (as opposed to +// diagnostics about the macro contents itself). +#define MACRO_SUPPRESS_DIAG_FOR_ARG_1(X) \ + class X { \ + X(int i); /* NOLINT(google-explicit-constructor) */ \ + }; + +MACRO_SUPPRESS_DIAG_FOR_ARG_1(G1) + +#define MACRO_SUPPRESS_DIAG_FOR_ARG_2(X) \ + struct X { /* NOLINT(cppcoreguidelines-pro-type-member-init) */ \ + int a = 0; \ + int b; \ + }; + +MACRO_SUPPRESS_DIAG_FOR_ARG_2(G2) + #define DOUBLE_MACRO MACRO(H) // NOLINT DOUBLE_MACRO @@ -116,4 +133,4 @@ int array2[10]; // NOLINT(cppcoreguidelines-avoid-c-arrays) int array3[10]; // NOLINT(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) int array4[10]; // NOLINT(*-avoid-c-arrays) -// CHECK-MESSAGES: Suppressed 34 warnings (34 NOLINT) +// CHECK-MESSAGES: Suppressed 36 warnings (36 NOLINT) From f179f403c8579c939e09d27c383b6263cf2523aa Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 24 May 2022 17:07:36 -0700 Subject: [PATCH 416/908] [lldb] Disable modules in Apple-lldb-base The LLDB website recommends using the CMake caches to build on macOS. Although modules result in a faster build, this configuration tends to break occasionally because it's specific to our platform. I don't expect newcomers to be able to deal with those kind of breakages so don't enable them by default. --- lldb/cmake/caches/Apple-lldb-base.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/cmake/caches/Apple-lldb-base.cmake b/lldb/cmake/caches/Apple-lldb-base.cmake index 76ab843c4b6b7a..4d4f02bfae95bd 100644 --- a/lldb/cmake/caches/Apple-lldb-base.cmake +++ b/lldb/cmake/caches/Apple-lldb-base.cmake @@ -3,7 +3,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL "") set(LLVM_TARGETS_TO_BUILD X86;ARM;AArch64 CACHE STRING "") set(LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "") -set(LLVM_ENABLE_MODULES ON CACHE BOOL "") set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "") set(LIBCXX_ENABLE_STATIC OFF CACHE BOOL "") From 436eaf8d32fa8d5db6782f4da30d0b5b7c2690e8 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 24 May 2022 17:17:26 -0700 Subject: [PATCH 417/908] [lldb] Improve TestAppleSimulatorOSType.py error message This was inspired by D109336 which got reverted because we didn't want the test to fail silently. This patch prints a more informative error message when we fail to parse the simctl output while still failing the test. Differential revision: https://reviews.llvm.org/D126217 --- .../API/tools/lldb-server/TestAppleSimulatorOSType.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py b/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py index 31fe282c7d9103..840e970410121f 100644 --- a/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py +++ b/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py @@ -17,9 +17,13 @@ class TestAppleSimulatorOSType(gdbremote_testcase.GdbRemoteTestCaseBase): def check_simulator_ostype(self, sdk, platform_name, arch=platform.machine()): cmd = ['xcrun', 'simctl', 'list', '-j', 'devices'] - self.trace(' '.join(cmd)) + cmd_str = ' '.join(cmd) + self.trace(cmd_str) sim_devices_str = subprocess.check_output(cmd).decode("utf-8") - sim_devices = json.loads(sim_devices_str)['devices'] + try: + sim_devices = json.loads(sim_devices_str)['devices'] + except json.decoder.JSONDecodeError: + self.fail("Could not parse '{}' output. Authorization denied?".format(cmd_str)) # Find an available simulator for the requested platform deviceUDID = None deviceRuntime = None From b976fac6eec4d7bc636830dafec3cd1db541ee89 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 24 May 2022 18:01:20 -0700 Subject: [PATCH 418/908] [BOLT][NFC] Remove unused BF::computeLocalUDChain method definition The function is only used inside AArch64MCPlusBuilder class, there are no uses of it as a BinaryFunction method. Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D126220 --- bolt/include/bolt/Core/BinaryFunction.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 0b58969806694c..2c3ed9d35636ee 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -667,9 +667,6 @@ class BinaryFunction { uint64_t Offset, uint64_t &TargetAddress); - DenseMap> - computeLocalUDChain(const MCInst *CurInstr); - BinaryFunction &operator=(const BinaryFunction &) = delete; BinaryFunction(const BinaryFunction &) = delete; From 5d8247d4c7c40d3f83fdffd1e337ee8cae7841d1 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 24 May 2022 18:04:42 -0700 Subject: [PATCH 419/908] [BOLT][NFC] Use for_each to simplify printLoopInfo Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D126242 --- bolt/lib/Core/BinaryFunction.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 528133304f5c21..5674fb9499246b 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -18,6 +18,7 @@ #include "bolt/Utils/NameResolver.h" #include "bolt/Utils/NameShortener.h" #include "bolt/Utils/Utils.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/edit_distance.h" @@ -4446,14 +4447,12 @@ void BinaryFunction::printLoopInfo(raw_ostream &OS) const { OS << "\n"; std::stack St; - for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) - St.push(*I); + for_each(*BLI, [&](BinaryLoop *L) { St.push(L); }); while (!St.empty()) { BinaryLoop *L = St.top(); St.pop(); - for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - St.push(*I); + for_each(*L, [&](BinaryLoop *Inner) { St.push(Inner); }); if (!hasValidProfile()) continue; From 69f87b6c292ba24ba96f9b90bfb395fd0810995b Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 24 May 2022 18:25:40 -0700 Subject: [PATCH 420/908] [BOLT][NFC] Customize endline character for printInstruction(s) This would be used in `BF::dumpGraph` to dump left-justified text. Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D126232 --- bolt/include/bolt/Core/BinaryContext.h | 8 +++++--- bolt/lib/Core/BinaryContext.cpp | 11 ++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 9e1db3a78d0a27..ccd18336995486 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -1187,7 +1187,8 @@ class BinaryContext { uint64_t Offset = 0, const BinaryFunction *Function = nullptr, bool PrintMCInst = false, bool PrintMemData = false, - bool PrintRelocations = false) const; + bool PrintRelocations = false, + StringRef Endl = "\n") const; /// Print a range of instructions. template @@ -1195,10 +1196,11 @@ class BinaryContext { printInstructions(raw_ostream &OS, Itr Begin, Itr End, uint64_t Offset = 0, const BinaryFunction *Function = nullptr, bool PrintMCInst = false, bool PrintMemData = false, - bool PrintRelocations = false) const { + bool PrintRelocations = false, + StringRef Endl = "\n") const { while (Begin != End) { printInstruction(OS, *Begin, Offset, Function, PrintMCInst, PrintMemData, - PrintRelocations); + PrintRelocations, Endl); Offset += computeCodeSize(Begin, Begin + 1); ++Begin; } diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index dd2a51ceae6f71..e9ca8129453e18 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1643,9 +1643,10 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, uint64_t Offset, const BinaryFunction *Function, bool PrintMCInst, bool PrintMemData, - bool PrintRelocations) const { + bool PrintRelocations, + StringRef Endl) const { if (MIB->isEHLabel(Instruction)) { - OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n'; + OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; return; } OS << format(" %08" PRIx64 ": ", Offset); @@ -1654,7 +1655,7 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, OS << "\t!CFI\t$" << Offset << "\t; "; if (Function) printCFI(OS, *Function->getCFIFor(Instruction)); - OS << "\n"; + OS << Endl; return; } InstPrinter->printInst(&Instruction, 0, "", *STI, OS); @@ -1718,11 +1719,11 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, Function->printRelocations(OS, Offset, Size); } - OS << "\n"; + OS << Endl; if (PrintMCInst) { Instruction.dump_pretty(OS, InstPrinter.get()); - OS << "\n"; + OS << Endl; } } From f7581a3969c49653060abe29eb69076330d76290 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 24 May 2022 18:28:42 -0700 Subject: [PATCH 421/908] [BOLT][NFC] Use ListSeparator in BinaryFunction print methods Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D126243 --- bolt/lib/Core/BinaryFunction.cpp | 59 +++++++++++++------------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 5674fb9499246b..157d5d9c5d7793 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -20,6 +20,7 @@ #include "bolt/Utils/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/edit_distance.h" #include "llvm/Demangle/Demangle.h" @@ -454,31 +455,25 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, OS << "\n Parent : " << *ParentFragment; if (!Fragments.empty()) { OS << "\n Fragments : "; - const char *Sep = ""; - for (BinaryFunction *Frag : Fragments) { - OS << Sep << *Frag; - Sep = ", "; - } + ListSeparator LS; + for (BinaryFunction *Frag : Fragments) + OS << LS << *Frag; } if (hasCFG()) OS << "\n Hash : " << Twine::utohexstr(computeHash()); if (isMultiEntry()) { OS << "\n Secondary Entry Points : "; - const char *Sep = ""; - for (const auto &KV : SecondaryEntryPoints) { - OS << Sep << KV.second->getName(); - Sep = ", "; - } + ListSeparator LS; + for (const auto &KV : SecondaryEntryPoints) + OS << LS << KV.second->getName(); } if (FrameInstructions.size()) OS << "\n CFI Instrs : " << FrameInstructions.size(); if (BasicBlocksLayout.size()) { OS << "\n BB Layout : "; - const char *Sep = ""; - for (BinaryBasicBlock *BB : BasicBlocksLayout) { - OS << Sep << BB->getName(); - Sep = ", "; - } + ListSeparator LS; + for (BinaryBasicBlock *BB : BasicBlocksLayout) + OS << LS << BB->getName(); } if (ImageAddress) OS << "\n Image : 0x" << Twine::utohexstr(ImageAddress); @@ -553,20 +548,16 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, } if (!BB->pred_empty()) { OS << " Predecessors: "; - const char *Sep = ""; - for (BinaryBasicBlock *Pred : BB->predecessors()) { - OS << Sep << Pred->getName(); - Sep = ", "; - } + ListSeparator LS; + for (BinaryBasicBlock *Pred : BB->predecessors()) + OS << LS << Pred->getName(); OS << '\n'; } if (!BB->throw_empty()) { OS << " Throwers: "; - const char *Sep = ""; - for (BinaryBasicBlock *Throw : BB->throwers()) { - OS << Sep << Throw->getName(); - Sep = ", "; - } + ListSeparator LS; + for (BinaryBasicBlock *Throw : BB->throwers()) + OS << LS << Throw->getName(); OS << '\n'; } @@ -586,11 +577,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, return BB->BranchInfo[B] < BB->BranchInfo[A]; }); } - const char *Sep = ""; + ListSeparator LS; for (unsigned I = 0; I < Indices.size(); ++I) { BinaryBasicBlock *Succ = BB->Successors[Indices[I]]; BinaryBasicBlock::BinaryBranchInfo &BI = BB->BranchInfo[Indices[I]]; - OS << Sep << Succ->getName(); + OS << LS << Succ->getName(); if (ExecutionCount != COUNT_NO_PROFILE && BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { OS << " (mispreds: " << BI.MispredictedCount @@ -599,20 +590,18 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) { OS << " (inferred count: " << BI.Count << ")"; } - Sep = ", "; } OS << '\n'; } if (!BB->lp_empty()) { OS << " Landing Pads: "; - const char *Sep = ""; + ListSeparator LS; for (BinaryBasicBlock *LP : BB->landing_pads()) { - OS << Sep << LP->getName(); + OS << LS << LP->getName(); if (ExecutionCount != COUNT_NO_PROFILE) { OS << " (count: " << LP->getExecutionCount() << ")"; } - Sep = ", "; } OS << '\n'; } @@ -4461,11 +4450,9 @@ void BinaryFunction::printLoopInfo(raw_ostream &OS) const { << " loop header: " << L->getHeader()->getName(); OS << "\n"; OS << "Loop basic blocks: "; - const char *Sep = ""; - for (auto BI = L->block_begin(), BE = L->block_end(); BI != BE; ++BI) { - OS << Sep << (*BI)->getName(); - Sep = ", "; - } + ListSeparator LS; + for (BinaryBasicBlock *BB : L->blocks()) + OS << LS << BB->getName(); OS << "\n"; if (hasValidProfile()) { OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n"; From bebf7bdf9aad4e5d3ae09140d5af4d03bfda9094 Mon Sep 17 00:00:00 2001 From: owenca Date: Sat, 21 May 2022 21:51:19 -0700 Subject: [PATCH 422/908] [clang-format][NFC] Insert/remove braces in clang/lib/Format/ Differential Revision: https://reviews.llvm.org/D126157 --- clang/lib/Format/AffectedRangeManager.cpp | 6 +- clang/lib/Format/BreakableToken.cpp | 27 +- clang/lib/Format/ContinuationIndenter.cpp | 281 ++++-- clang/lib/Format/DefinitionBlockSeparator.cpp | 27 +- clang/lib/Format/Format.cpp | 139 ++- clang/lib/Format/FormatToken.cpp | 15 +- clang/lib/Format/FormatTokenLexer.cpp | 34 +- clang/lib/Format/QualifierAlignmentFixer.cpp | 31 +- clang/lib/Format/SortJavaScriptImports.cpp | 20 +- clang/lib/Format/TokenAnnotator.cpp | 853 ++++++++++++------ clang/lib/Format/UnwrappedLineFormatter.cpp | 144 ++- clang/lib/Format/UnwrappedLineParser.cpp | 159 ++-- clang/lib/Format/WhitespaceManager.cpp | 69 +- 13 files changed, 1187 insertions(+), 618 deletions(-) diff --git a/clang/lib/Format/AffectedRangeManager.cpp b/clang/lib/Format/AffectedRangeManager.cpp index f69f65c5ddf1f6..e65457437146e5 100644 --- a/clang/lib/Format/AffectedRangeManager.cpp +++ b/clang/lib/Format/AffectedRangeManager.cpp @@ -60,10 +60,12 @@ bool AffectedRangeManager::computeAffectedLines( bool AffectedRangeManager::affectsCharSourceRange( const CharSourceRange &Range) { - for (const CharSourceRange &R : Ranges) + for (const CharSourceRange &R : Ranges) { if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), R.getBegin()) && - !SourceMgr.isBeforeInTranslationUnit(R.getEnd(), Range.getBegin())) + !SourceMgr.isBeforeInTranslationUnit(R.getEnd(), Range.getBegin())) { return true; + } + } return false; } diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index 94bd9823e1f5ad..6ae112b86ebb6b 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -95,8 +95,9 @@ getCommentSplit(StringRef Text, unsigned ContentStartColumn, StringRef::size_type SpaceOffset = Text.find_first_of(Blanks, MaxSplitBytes); if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() && - Text[SpaceOffset + 1] == '{') + Text[SpaceOffset + 1] == '{') { MaxSplitBytes = SpaceOffset + 1; + } } StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); @@ -141,9 +142,10 @@ getCommentSplit(StringRef Text, unsigned ContentStartColumn, // Make sure that we don't break at leading whitespace that // reaches past MaxSplit. StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); - if (FirstNonWhitespace == StringRef::npos) + if (FirstNonWhitespace == StringRef::npos) { // If the comment is only whitespace, we cannot split. return BreakableToken::Split(StringRef::npos, 0); + } SpaceOffset = Text.find_first_of( Blanks, std::max(MaxSplitBytes, FirstNonWhitespace)); } @@ -396,8 +398,9 @@ BreakableBlockComment::BreakableBlockComment( // ** blah blah blah // */ if (Lines.size() >= 2 && Content[1].startswith("**") && - static_cast(ContentColumn[1]) == StartColumn) + static_cast(ContentColumn[1]) == StartColumn) { DecorationColumn = StartColumn; + } Decoration = "* "; if (Lines.size() == 1 && !FirstInLine) { @@ -451,9 +454,10 @@ BreakableBlockComment::BreakableBlockComment( if (DecorationSize) ContentColumn[i] = DecorationColumn + DecorationSize; Content[i] = Content[i].substr(DecorationSize); - if (!Decoration.startswith(Content[i])) + if (!Decoration.startswith(Content[i])) { IndentAtLineBreak = std::min(IndentAtLineBreak, std::max(0, ContentColumn[i])); + } } IndentAtLineBreak = std::max(IndentAtLineBreak, Decoration.size()); @@ -478,10 +482,11 @@ BreakableBlockComment::BreakableBlockComment( LLVM_DEBUG({ llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n"; - for (size_t i = 0; i < Lines.size(); ++i) + for (size_t i = 0; i < Lines.size(); ++i) { llvm::dbgs() << i << " |" << Content[i] << "| " << "CC=" << ContentColumn[i] << "| " << "IN=" << (Content[i].data() - Lines[i].data()) << "\n"; + } }); } @@ -586,8 +591,9 @@ unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const { StringRef FirstWord = ContentWithNoDecoration.substr( 0, ContentWithNoDecoration.find_first_of(Blanks)); if (ContentIndentingJavadocAnnotations.find(FirstWord) != - ContentIndentingJavadocAnnotations.end()) + ContentIndentingJavadocAnnotations.end()) { return Style.ContinuationIndentWidth; + } return 0; } @@ -633,8 +639,9 @@ BreakableToken::Split BreakableBlockComment::getReflowSplit( if (LineIndex) { unsigned PreviousContentIndent = getContentIndent(LineIndex - 1); if (PreviousContentIndent && Trimmed != StringRef::npos && - Trimmed != PreviousContentIndent) + Trimmed != PreviousContentIndent) { return Split(StringRef::npos, 0); + } } return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); @@ -676,9 +683,10 @@ void BreakableBlockComment::adaptStartOfLine( // Note: this works because getCommentSplit is careful never to split at // the beginning of a line. size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks); - if (BreakLength != StringRef::npos) + if (BreakLength != StringRef::npos) { insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0, Whitespaces); + } } return; } @@ -783,8 +791,9 @@ BreakableLineCommentSection::BreakableLineCommentSection( encoding::getCodePointNumBytes(FirstCommentChar, Encoding); if (encoding::columnWidth( Lines[i].substr(IndentPrefix.size(), FirstCharByteSize), - Encoding) != 1) + Encoding) != 1) { return false; + } // In C-like comments, add a space before #. For example this is useful // to preserve the relative indentation when commenting out code with // #includes. diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index e3d54a0ba42a46..2cb985cdc4e548 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -123,8 +123,9 @@ static bool startsNextParameter(const FormatToken &Current, const FormatStyle &Style) { const FormatToken &Previous = *Current.Previous; if (Current.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) { return true; + } if (Style.Language == FormatStyle::LK_Proto && Current.is(TT_SelectorName)) return true; return Previous.is(tok::comma) && !Current.isTrailingComment() && @@ -150,8 +151,9 @@ static bool opensProtoMessageField(const FormatToken &LessTok, // For example, the delimiter of R"deli(cont)deli" is deli. static llvm::Optional getRawStringDelimiter(StringRef TokenText) { if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'. - || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) + || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) { return None; + } // A raw string starts with 'R"(' and delimiter is ascii and has // size at most 16 by the standard, so the first '(' must be among the first @@ -246,8 +248,9 @@ LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, // preprocessor indent. if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && (Line->Type == LT_PreprocessorDirective || - Line->Type == LT_ImportStatement)) + Line->Type == LT_ImportStatement)) { State.Column = 0; + } State.Line = Line; State.NextToken = Line->First; State.Stack.push_back(ParenState(/*Tok=*/nullptr, FirstIndent, FirstIndent, @@ -279,15 +282,17 @@ bool ContinuationIndenter::canBreak(const LineState &State) { const auto &CurrentState = State.Stack.back(); assert(&Previous == Current.Previous); if (!Current.CanBreakBefore && !(CurrentState.BreakBeforeClosingBrace && - Current.closesBlockOrBlockTypeList(Style))) + Current.closesBlockOrBlockTypeList(Style))) { return false; + } // The opening "{" of a braced list has to be on the same line as the first // element if it is nested in another braced init list or function call. if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && Previous.isNot(TT_DictLiteral) && Previous.is(BK_BracedInit) && Previous.Previous && - Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) + Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) { return false; + } // This prevents breaks like: // ... // SomeParameter, OtherParameter).DoSomething( @@ -295,8 +300,9 @@ bool ContinuationIndenter::canBreak(const LineState &State) { // As they hide "DoSomething" and are generally bad for readability. if (Previous.opensScope() && Previous.isNot(tok::l_brace) && State.LowestLevelOnLine < State.StartOfLineLevel && - State.LowestLevelOnLine < Current.NestingLevel) + State.LowestLevelOnLine < Current.NestingLevel) { return false; + } if (Current.isMemberAccess() && CurrentState.ContainsUnwrappedBuilder) return false; @@ -304,8 +310,9 @@ bool ContinuationIndenter::canBreak(const LineState &State) { // statement. if (Previous.is(tok::l_brace) && State.Stack.size() > 1 && State.Stack[State.Stack.size() - 2].NestedBlockInlined && - State.Stack[State.Stack.size() - 2].HasMultipleNestedBlocks) + State.Stack[State.Stack.size() - 2].HasMultipleNestedBlocks) { return false; + } // Don't break after very short return types (e.g. "void") as that is often // unexpected. @@ -317,8 +324,9 @@ bool ContinuationIndenter::canBreak(const LineState &State) { // If binary operators are moved to the next line (including commas for some // styles of constructor initializers), that's always ok. if (!Current.isOneOf(TT_BinaryOperator, tok::comma) && - CurrentState.NoLineBreakInOperand) + CurrentState.NoLineBreakInOperand) { return false; + } if (Previous.is(tok::l_square) && Previous.is(TT_ObjCMethodExpr)) return false; @@ -338,20 +346,23 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { if (Current.MustBreakBefore || Current.is(TT_InlineASMColon)) return true; if (CurrentState.BreakBeforeClosingBrace && - Current.closesBlockOrBlockTypeList(Style)) + Current.closesBlockOrBlockTypeList(Style)) { return true; + } if (CurrentState.BreakBeforeClosingParen && Current.is(tok::r_paren)) return true; if (Style.Language == FormatStyle::LK_ObjC && Style.ObjCBreakBeforeNestedBlockParam && Current.ObjCSelectorNameParts > 1 && - Current.startsSequence(TT_SelectorName, tok::colon, tok::caret)) + Current.startsSequence(TT_SelectorName, tok::colon, tok::caret)) { return true; + } // Avoid producing inconsistent states by requiring breaks where they are not // permitted for C# generic type constraints. if (CurrentState.IsCSharpGenericTypeConstraint && - Previous.isNot(TT_CSharpGenericTypeConstraintComma)) + Previous.isNot(TT_CSharpGenericTypeConstraintComma)) { return false; + } if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) && Style.isCpp() && @@ -365,22 +376,25 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { (!Style.BreakBeforeTernaryOperators && Previous.is(TT_ConditionalExpr))) && CurrentState.BreakBeforeParameter && !Current.isTrailingComment() && - !Current.isOneOf(tok::r_paren, tok::r_brace)) + !Current.isOneOf(tok::r_paren, tok::r_brace)) { return true; + } if (CurrentState.IsChainedConditional && ((Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) && Current.is(tok::colon)) || (!Style.BreakBeforeTernaryOperators && Previous.is(TT_ConditionalExpr) && - Previous.is(tok::colon)))) + Previous.is(tok::colon)))) { return true; + } if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) || (Previous.is(TT_ArrayInitializerLSquare) && Previous.ParameterCount > 1) || opensProtoMessageField(Previous, Style)) && Style.ColumnLimit > 0 && getLengthToMatchingParen(Previous, State.Stack) + State.Column - 1 > - getColumnLimit(State)) + getColumnLimit(State)) { return true; + } const FormatToken &BreakConstructorInitializersToken = Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon @@ -392,24 +406,28 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { CurrentState.BreakBeforeParameter) && (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All || Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon || - Style.ColumnLimit != 0)) + Style.ColumnLimit != 0)) { return true; + } if (Current.is(TT_ObjCMethodExpr) && !Previous.is(TT_SelectorName) && - State.Line->startsWith(TT_ObjCMethodSpecifier)) + State.Line->startsWith(TT_ObjCMethodSpecifier)) { return true; + } if (Current.is(TT_SelectorName) && !Previous.is(tok::at) && CurrentState.ObjCSelectorNameFound && CurrentState.BreakBeforeParameter && (Style.ObjCBreakBeforeNestedBlockParam || - !Current.startsSequence(TT_SelectorName, tok::colon, tok::caret))) + !Current.startsSequence(TT_SelectorName, tok::colon, tok::caret))) { return true; + } unsigned NewLineColumn = getNewLineColumn(State); if (Current.isMemberAccess() && Style.ColumnLimit != 0 && State.Column + getLengthToNextOperator(Current) > Style.ColumnLimit && (State.Column > NewLineColumn || - Current.NestingLevel < State.StartOfLineLevel)) + Current.NestingLevel < State.StartOfLineLevel)) { return true; + } if (startsSegmentOfBuilderTypeCall(Current) && (CurrentState.CallContinuation != 0 || @@ -420,14 +438,16 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // }.bind(...)); // FIXME: We should find a more generic solution to this problem. !(State.Column <= NewLineColumn && Style.isJavaScript()) && - !(Previous.closesScopeAfterBlock() && State.Column <= NewLineColumn)) + !(Previous.closesScopeAfterBlock() && State.Column <= NewLineColumn)) { return true; + } // If the template declaration spans multiple lines, force wrap before the // function/class declaration if (Previous.ClosesTemplateDeclaration && CurrentState.BreakBeforeParameter && - Current.CanBreakBefore) + Current.CanBreakBefore) { return true; + } if (!State.Line->First->is(tok::kw_enum) && State.Column <= NewLineColumn) return false; @@ -438,8 +458,9 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at, Keywords.kw_dollar) && !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) && - nextIsMultilineString(State)) + nextIsMultilineString(State)) { return true; + } // Using CanBreakBefore here and below takes care of the decision whether the // current style uses wrapping before or after operators for the given @@ -477,8 +498,9 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // Same as above, but for the first "<<" operator. if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator) && - CurrentState.BreakBeforeParameter && CurrentState.FirstLessLess == 0) + CurrentState.BreakBeforeParameter && CurrentState.FirstLessLess == 0) { return true; + } if (Current.NestingLevel == 0 && !Current.isTrailingComment()) { // Always break after "template <...>"(*) and leading annotations. This is @@ -508,11 +530,13 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { return Style.AlwaysBreakTemplateDeclarations != FormatStyle::BTDS_No; } if (Previous.is(TT_FunctionAnnotationRParen) && - State.Line->Type != LT_PreprocessorDirective) + State.Line->Type != LT_PreprocessorDirective) { return true; + } if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) && - Current.isNot(TT_LeadingJavaAnnotation)) + Current.isNot(TT_LeadingJavaAnnotation)) { return true; + } } if (Style.isJavaScript() && Previous.is(tok::r_paren) && @@ -534,21 +558,25 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // name. !Style.isJavaScript()) || (Current.is(tok::kw_operator) && !Previous.is(tok::coloncolon))) && - !Previous.is(tok::kw_template) && CurrentState.BreakBeforeParameter) + !Previous.is(tok::kw_template) && CurrentState.BreakBeforeParameter) { return true; + } // The following could be precomputed as they do not depend on the state. // However, as they should take effect only if the UnwrappedLine does not fit // into the ColumnLimit, they are checked here in the ContinuationIndenter. if (Style.ColumnLimit != 0 && Previous.is(BK_Block) && - Previous.is(tok::l_brace) && !Current.isOneOf(tok::r_brace, tok::comment)) + Previous.is(tok::l_brace) && + !Current.isOneOf(tok::r_brace, tok::comment)) { return true; + } if (Current.is(tok::lessless) && ((Previous.is(tok::identifier) && Previous.TokenText == "endl") || (Previous.Tok.isLiteral() && (Previous.TokenText.endswith("\\n\"") || - Previous.TokenText == "\'\\n\'")))) + Previous.TokenText == "\'\\n\'")))) { return true; + } if (Previous.is(TT_BlockComment) && Previous.IsMultiline) return true; @@ -639,18 +667,21 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, PPColumnCorrection = -1; } - if (!DryRun) + if (!DryRun) { Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces, State.Column + Spaces + PPColumnCorrection); + } // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance // declaration unless there is multiple inheritance. if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma && - Current.is(TT_InheritanceColon)) + Current.is(TT_InheritanceColon)) { CurrentState.NoLineBreak = true; + } if (Style.BreakInheritanceList == FormatStyle::BILS_AfterColon && - Previous.is(TT_InheritanceColon)) + Previous.is(TT_InheritanceColon)) { CurrentState.NoLineBreak = true; + } if (Current.is(TT_SelectorName) && !CurrentState.ObjCSelectorNameFound) { unsigned MinIndent = std::max( @@ -684,8 +715,9 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // caaaaaaaaaaaall( // caaaaaaaaaaaaaaaaaaaaaaall(aaaaaaaaaaaaaa, aaaaaaaaa)))); Current.FakeLParens.size() > 0 && - Current.FakeLParens.back() > prec::Unknown) + Current.FakeLParens.back() > prec::Unknown) { CurrentState.NoLineBreak = true; + } if (Previous.is(TT_TemplateString) && Previous.opensScope()) CurrentState.NoLineBreak = true; @@ -699,14 +731,15 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, if (CurrentState.AvoidBinPacking && startsNextParameter(Current, Style)) CurrentState.NoLineBreak = true; if (startsSegmentOfBuilderTypeCall(Current) && - State.Column > getNewLineColumn(State)) + State.Column > getNewLineColumn(State)) { CurrentState.ContainsUnwrappedBuilder = true; + } if (Current.is(TT_LambdaArrow) && Style.Language == FormatStyle::LK_Java) CurrentState.NoLineBreak = true; if (Current.isMemberAccess() && Previous.is(tok::r_paren) && (Previous.MatchingParen && - (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) + (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) { // If there is a function call with long parameters, break before trailing // calls. This prevents things like: // EXPECT_CALL(SomeLongParameter).Times( @@ -714,6 +747,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // We don't want to do this for short parameters as they can just be // indexes. CurrentState.NoLineBreak = true; + } // Don't allow the RHS of an operator to be split over multiple lines unless // there is a line-break right after the operator. @@ -740,8 +774,9 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, if ((!BreakBeforeOperator && !(HasTwoOperands && Style.AlignOperands != FormatStyle::OAS_DontAlign)) || - (!CurrentState.LastOperatorWrapped && BreakBeforeOperator)) + (!CurrentState.LastOperatorWrapped && BreakBeforeOperator)) { CurrentState.NoLineBreakInOperand = true; + } } State.Column += Spaces; @@ -787,8 +822,9 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, if (Previous.MatchingParen) { const FormatToken *Next = Previous.MatchingParen->getNextNonComment(); if (Next && Next->isMemberAccess() && State.Stack.size() > 1 && - State.Stack[State.Stack.size() - 2].CallContinuation == 0) + State.Stack[State.Stack.size() - 2].CallContinuation == 0) { CurrentState.LastSpace = State.Column; + } } } } @@ -821,8 +857,9 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // to avoid unnecessary line breaks that just work around this penalty. if (NextNonComment->is(tok::lessless) && CurrentState.FirstLessLess == 0 && (State.Column <= Style.ColumnLimit / 3 || - CurrentState.BreakBeforeParameter)) + CurrentState.BreakBeforeParameter)) { Penalty += Style.PenaltyBreakFirstLessLess; + } State.Column = getNewLineColumn(State); @@ -838,9 +875,10 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // member(value), // looooooooooooooooong_member( // looooooooooong_call(param_1, param_2, param_3)) - if (State.Column > State.FirstIndent) + if (State.Column > State.FirstIndent) { Penalty += Style.PenaltyIndentedWhitespace * (State.Column - State.FirstIndent); + } // Indent nested blocks relative to this column, unless in a very specific // JavaScript special case where: @@ -855,8 +893,9 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, if (!Current.is(TT_LambdaArrow) && (!Style.isJavaScript() || Current.NestingLevel != 0 || !PreviousNonComment || !PreviousNonComment->is(tok::equal) || - !Current.isOneOf(Keywords.kw_async, Keywords.kw_function))) + !Current.isOneOf(Keywords.kw_async, Keywords.kw_function))) { CurrentState.NestedBlockIndent = State.Column; + } if (NextNonComment->isMemberAccess()) { if (CurrentState.CallContinuation == 0) @@ -889,25 +928,29 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // }]; // Thus, we set LastSpace of the next higher NestingLevel, to which we move // when we consume all of the "}"'s FakeRParens at the "{". - if (State.Stack.size() > 1) + if (State.Stack.size() > 1) { State.Stack[State.Stack.size() - 2].LastSpace = std::max(CurrentState.LastSpace, CurrentState.Indent) + Style.ContinuationIndentWidth; + } } if ((PreviousNonComment && PreviousNonComment->isOneOf(tok::comma, tok::semi) && !CurrentState.AvoidBinPacking) || - Previous.is(TT_BinaryOperator)) + Previous.is(TT_BinaryOperator)) { CurrentState.BreakBeforeParameter = false; + } if (PreviousNonComment && (PreviousNonComment->isOneOf(TT_TemplateCloser, TT_JavaAnnotation) || PreviousNonComment->ClosesRequiresClause) && - Current.NestingLevel == 0) + Current.NestingLevel == 0) { CurrentState.BreakBeforeParameter = false; + } if (NextNonComment->is(tok::question) || - (PreviousNonComment && PreviousNonComment->is(tok::question))) + (PreviousNonComment && PreviousNonComment->is(tok::question))) { CurrentState.BreakBeforeParameter = true; + } if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore) CurrentState.BreakBeforeParameter = false; @@ -932,11 +975,12 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, if (!Current.isTrailingComment()) CurrentState.LastSpace = State.Column; - if (Current.is(tok::lessless)) + if (Current.is(tok::lessless)) { // If we are breaking before a "<<", we always want to indent relative to // RHS. This is necessary only for "<<", as we special-case it and don't // always indent relative to the RHS. CurrentState.LastSpace += 3; // 3 -> width of "<< ". + } State.StartOfLineLevel = Current.NestingLevel; State.LowestLevelOnLine = Current.NestingLevel; @@ -965,19 +1009,22 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, !PreviousNonComment->isOneOf( TT_BinaryOperator, TT_FunctionAnnotationRParen, TT_JavaAnnotation, TT_LeadingJavaAnnotation) && - Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope()) + Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope()) { CurrentState.BreakBeforeParameter = true; + } // If we break after { or the [ of an array initializer, we should also break // before the corresponding } or ]. if (PreviousNonComment && (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - opensProtoMessageField(*PreviousNonComment, Style))) + opensProtoMessageField(*PreviousNonComment, Style))) { CurrentState.BreakBeforeClosingBrace = true; + } - if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) + if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) { CurrentState.BreakBeforeClosingParen = Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent; + } if (CurrentState.AvoidBinPacking) { // If we are breaking after '(', '{', '<', or this is the break after a ':' @@ -995,15 +1042,17 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, !State.Line->MustBeDeclaration) || (Style.PackConstructorInitializers != FormatStyle::PCIS_NextLine && PreviousIsBreakingCtorInitializerColon) || - Previous.is(TT_DictLiteral)) + Previous.is(TT_DictLiteral)) { CurrentState.BreakBeforeParameter = true; + } // If we are breaking after a ':' to start a member initializer list, // and we allow all arguments on the next line, we should not break // before the next parameter. if (PreviousIsBreakingCtorInitializerColon && - Style.PackConstructorInitializers == FormatStyle::PCIS_NextLine) + Style.PackConstructorInitializers == FormatStyle::PCIS_NextLine) { CurrentState.BreakBeforeParameter = false; + } } return Penalty; @@ -1017,8 +1066,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { const auto &CurrentState = State.Stack.back(); if (CurrentState.IsCSharpGenericTypeConstraint && - Current.isNot(TT_CSharpGenericTypeConstraint)) + Current.isNot(TT_CSharpGenericTypeConstraint)) { return CurrentState.ColonPos + 2; + } const FormatToken &Previous = *Current.Previous; // If we are continuing an expression, we want to use the continuation indent. @@ -1032,14 +1082,16 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { // Java specific bits. if (Style.Language == FormatStyle::LK_Java && - Current.isOneOf(Keywords.kw_implements, Keywords.kw_extends)) + Current.isOneOf(Keywords.kw_implements, Keywords.kw_extends)) { return std::max(CurrentState.LastSpace, CurrentState.Indent + Style.ContinuationIndentWidth); + } if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths && - State.Line->First->is(tok::kw_enum)) + State.Line->First->is(tok::kw_enum)) { return (Style.IndentWidth * State.Line->First->IndentLevel) + Style.IndentWidth; + } if (NextNonComment->is(tok::l_brace) && NextNonComment->is(BK_Block)) return Current.NestingLevel == 0 ? State.FirstIndent : CurrentState.Indent; @@ -1071,22 +1123,26 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { // } if (Current.is(tok::r_paren) && State.Stack.size() > 1 && (!Current.Next || - Current.Next->isOneOf(tok::semi, tok::kw_const, tok::l_brace))) + Current.Next->isOneOf(tok::semi, tok::kw_const, tok::l_brace))) { return State.Stack[State.Stack.size() - 2].LastSpace; + } if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && - Current.is(tok::r_paren) && State.Stack.size() > 1) + Current.is(tok::r_paren) && State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; + } if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope()) return State.Stack[State.Stack.size() - 2].LastSpace; if (Current.is(tok::identifier) && Current.Next && (Current.Next->is(TT_DictLiteral) || ((Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) && - Current.Next->isOneOf(tok::less, tok::l_brace)))) + Current.Next->isOneOf(tok::less, tok::l_brace)))) { return CurrentState.Indent; + } if (NextNonComment->is(TT_ObjCStringLiteral) && - State.StartOfStringLiteral != 0) + State.StartOfStringLiteral != 0) { return State.StartOfStringLiteral - 1; + } if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) return State.StartOfStringLiteral; if (NextNonComment->is(tok::lessless) && CurrentState.FirstLessLess != 0) @@ -1139,14 +1195,16 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { TT_AttributeParen, TT_AttributeSquare, TT_FunctionAnnotationRParen, TT_JavaAnnotation, TT_LeadingJavaAnnotation))) || (!Style.IndentWrappedFunctionNames && - NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) + NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) { return std::max(CurrentState.LastSpace, CurrentState.Indent); + } if (NextNonComment->is(TT_SelectorName)) { if (!CurrentState.ObjCSelectorNameFound) { unsigned MinIndent = CurrentState.Indent; - if (shouldIndentWrappedSelectorName(Style, State.Line->Type)) + if (shouldIndentWrappedSelectorName(Style, State.Line->Type)) { MinIndent = std::max(MinIndent, State.FirstIndent + Style.ContinuationIndentWidth); + } // If LongestObjCSelectorName is 0, we are indenting the first // part of an ObjC selector (or a selector component which is // not colon-aligned due to block formatting). @@ -1170,54 +1228,65 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (NextNonComment->is(tok::colon) && NextNonComment->is(TT_ObjCMethodExpr)) return CurrentState.ColonPos; if (NextNonComment->is(TT_ArraySubscriptLSquare)) { - if (CurrentState.StartOfArraySubscripts != 0) + if (CurrentState.StartOfArraySubscripts != 0) { return CurrentState.StartOfArraySubscripts; - else if (Style.isCSharp()) // C# allows `["key"] = value` inside object - // initializers. + } else if (Style.isCSharp()) { // C# allows `["key"] = value` inside object + // initializers. return CurrentState.Indent; + } return ContinuationIndent; } // This ensure that we correctly format ObjC methods calls without inputs, // i.e. where the last element isn't selector like: [callee method]; if (NextNonComment->is(tok::identifier) && NextNonComment->FakeRParens == 0 && - NextNonComment->Next && NextNonComment->Next->is(TT_ObjCMethodExpr)) + NextNonComment->Next && NextNonComment->Next->is(TT_ObjCMethodExpr)) { return CurrentState.Indent; + } if (NextNonComment->isOneOf(TT_StartOfName, TT_PointerOrReference) || - Previous.isOneOf(tok::coloncolon, tok::equal, TT_JsTypeColon)) + Previous.isOneOf(tok::coloncolon, tok::equal, TT_JsTypeColon)) { return ContinuationIndent; + } if (PreviousNonComment && PreviousNonComment->is(tok::colon) && - PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) + PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) { return ContinuationIndent; + } if (NextNonComment->is(TT_CtorInitializerComma)) return CurrentState.Indent; if (PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) + Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) { return CurrentState.Indent; + } if (PreviousNonComment && PreviousNonComment->is(TT_InheritanceColon) && - Style.BreakInheritanceList == FormatStyle::BILS_AfterColon) + Style.BreakInheritanceList == FormatStyle::BILS_AfterColon) { return CurrentState.Indent; + } if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon, - TT_InheritanceComma)) + TT_InheritanceComma)) { return State.FirstIndent + Style.ConstructorInitializerIndentWidth; + } if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() && - !Current.isOneOf(tok::colon, tok::comment)) + !Current.isOneOf(tok::colon, tok::comment)) { return ContinuationIndent; + } if (Current.is(TT_ProtoExtensionLSquare)) return CurrentState.Indent; - if (Current.isBinaryOperator() && CurrentState.UnindentOperator) + if (Current.isBinaryOperator() && CurrentState.UnindentOperator) { return CurrentState.Indent - Current.Tok.getLength() - Current.SpacesRequiredBefore; + } if (Current.isOneOf(tok::comment, TT_BlockComment, TT_LineComment) && - NextNonComment->isBinaryOperator() && CurrentState.UnindentOperator) + NextNonComment->isBinaryOperator() && CurrentState.UnindentOperator) { return CurrentState.Indent - NextNonComment->Tok.getLength() - NextNonComment->SpacesRequiredBefore; + } if (CurrentState.Indent == State.FirstIndent && PreviousNonComment && - !PreviousNonComment->isOneOf(tok::r_brace, TT_CtorInitializerComma)) + !PreviousNonComment->isOneOf(tok::r_brace, TT_CtorInitializerComma)) { // Ensure that we fall back to the continuation indent width instead of // just flushing continuations left. return CurrentState.Indent + Style.ContinuationIndentWidth; + } return CurrentState.Indent; } @@ -1254,11 +1323,13 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, if (Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless)) CurrentState.LastOperatorWrapped = Newline; if (Current.is(TT_ConditionalExpr) && Current.Previous && - !Current.Previous->is(TT_ConditionalExpr)) + !Current.Previous->is(TT_ConditionalExpr)) { CurrentState.LastOperatorWrapped = Newline; + } if (Current.is(TT_ArraySubscriptLSquare) && - CurrentState.StartOfArraySubscripts == 0) + CurrentState.StartOfArraySubscripts == 0) { CurrentState.StartOfArraySubscripts = State.Column; + } auto IsWrappedConditional = [](const FormatToken &Tok) { if (!(Tok.is(TT_ConditionalExpr) && Tok.is(tok::question))) @@ -1281,9 +1352,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, CurrentState.QuestionColumn = State.Column; } if (!Current.opensScope() && !Current.closesScope() && - !Current.is(TT_PointerOrReference)) + !Current.is(TT_PointerOrReference)) { State.LowestLevelOnLine = std::min(State.LowestLevelOnLine, Current.NestingLevel); + } if (Current.isMemberAccess()) CurrentState.StartOfFunctionCall = !Current.NextOperator ? 0 : State.Column; if (Current.is(TT_SelectorName)) @@ -1316,9 +1388,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, if (Style.PackConstructorInitializers > FormatStyle::PCIS_BinPack) CurrentState.AvoidBinPacking = true; } - if (Current.is(TT_InheritanceColon)) + if (Current.is(TT_InheritanceColon)) { CurrentState.Indent = State.FirstIndent + Style.ConstructorInitializerIndentWidth; + } if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline) CurrentState.NestedBlockIndent = State.Column + Current.ColumnWidth + 1; if (Current.isOneOf(TT_LambdaLSquare, TT_LambdaArrow)) @@ -1362,13 +1435,14 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column + 1; - if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) + if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) { State.StartOfStringLiteral = State.Column + 1; - else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) + } else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) { State.StartOfStringLiteral = State.Column; - else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && - !Current.isStringLiteral()) + } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && + !Current.isStringLiteral()) { State.StartOfStringLiteral = 0; + } State.Column += Current.ColumnWidth; State.NextToken = State.NextToken->Next; @@ -1461,8 +1535,9 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, if (PrecedenceLevel > prec::Unknown) NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column); if (PrecedenceLevel != prec::Conditional && !Current.is(TT_UnaryOperator) && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) + Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { NewParenState.StartOfFunctionCall = State.Column; + } // Indent conditional expressions, unless they are chained "else-if" // conditionals. Never indent expression where the 'operator' is ',', ';' or @@ -1515,8 +1590,9 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, // Don't allow '<' or '(' in C# generic type constraints to start new scopes. if (Current.isOneOf(tok::less, tok::l_paren) && - CurrentState.IsCSharpGenericTypeConstraint) + CurrentState.IsCSharpGenericTypeConstraint) { return; + } if (Current.MatchingParen && Current.is(BK_Block)) { moveStateToNewBlock(State); @@ -1596,8 +1672,9 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, // If this '[' opens an ObjC call, determine whether all parameters fit // into one line and put one per line if they don't. if (getLengthToMatchingParen(Current, State.Stack) + State.Column > - getColumnLimit(State)) + getColumnLimit(State)) { BreakBeforeParameter = true; + } } else { // For ColumnLimit = 0, we have to figure out whether there is or has to // be a line break within this call. @@ -1660,8 +1737,9 @@ void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) || (Current.is(tok::r_brace) && State.NextToken != State.Line->First) || State.NextToken->is(TT_TemplateCloser) || - (Current.is(tok::greater) && Current.is(TT_DictLiteral)))) + (Current.is(tok::greater) && Current.is(TT_DictLiteral)))) { State.Stack.pop_back(); + } auto &CurrentState = State.Stack.back(); @@ -1684,8 +1762,9 @@ void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { getLengthToMatchingParen(CurrentScopeOpener, State.Stack) + CurrentScopeOpener.TotalLength - Current.TotalLength - 1; if (State.Column + Current.ColumnWidth + NecessarySpaceInLine <= - Style.ColumnLimit) + Style.ColumnLimit) { CurrentState.BreakBeforeParameter = false; + } } } @@ -1972,9 +2051,10 @@ ContinuationIndenter::getRawStringStyle(const FormatToken &Current, if (!Delimiter) return None; auto RawStringStyle = RawStringFormats.getDelimiterStyle(*Delimiter); - if (!RawStringStyle && Delimiter->empty()) + if (!RawStringStyle && Delimiter->empty()) { RawStringStyle = RawStringFormats.getEnclosingFunctionStyle( getEnclosingFunctionName(Current)); + } if (!RawStringStyle) return None; RawStringStyle->ColumnLimit = getColumnLimit(State); @@ -1991,8 +2071,9 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, // don't support. if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || Style.isCSharp() || Style.isJson() || !Style.BreakStringLiterals || - !AllowBreak) + !AllowBreak) { return nullptr; + } // Don't break string literals inside preprocessor directives (except for // #define directives, as their contents are stored in separate lines and @@ -2039,8 +2120,9 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, // If a comment token switches formatting, like // /* clang-format on */, we don't want to break it further, // but we may still want to adjust its indentation. - switchesFormatting(Current)) + switchesFormatting(Current)) { return nullptr; + } return std::make_unique( Current, StartColumn, Current.OriginalColumn, !Current.Previous, State.Line->InPPDirective, Encoding, Style, Whitespaces.useCRLF()); @@ -2049,15 +2131,17 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, Current.Previous->isNot(TT_ImplicitStringLiteral))) { bool RegularComments = [&]() { for (const FormatToken *T = &Current; T && T->is(TT_LineComment); - T = T->Next) + T = T->Next) { if (!(T->TokenText.startswith("//") || T->TokenText.startswith("#"))) return false; + } return true; }(); if (!Style.ReflowComments || CommentPragmasRegex.match(Current.TokenText.substr(2)) || - switchesFormatting(Current) || !RegularComments) + switchesFormatting(Current) || !RegularComments) { return nullptr; + } return std::make_unique( Current, StartColumn, /*InPPDirective=*/false, Encoding, Style); } @@ -2146,11 +2230,12 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, if (Split.first == StringRef::npos) { // No break opportunity - update the penalty and continue with the next // logical line. - if (LineIndex < EndIndex - 1) + if (LineIndex < EndIndex - 1) { // The last line's penalty is handled in addNextStateToQueue() or when // calling replaceWhitespaceAfterLastLine below. Penalty += Style.PenaltyExcessCharacter * (ContentStartColumn + RemainingTokenColumns - ColumnLimit); + } LLVM_DEBUG(llvm::dbgs() << " No break opportunity.\n"); break; } @@ -2226,9 +2311,10 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // The current line fits after compressing the whitespace - reflow // the next line into it if possible. TryReflow = true; - if (!DryRun) + if (!DryRun) { Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); + } // When we continue on the same line, leave one space between content. ContentStartColumn += ToSplitColumns + 1; Penalty += ExcessCharactersPenalty; @@ -2272,9 +2358,10 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, LLVM_DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first << ", " << Split.second << "\n"); - if (!DryRun) + if (!DryRun) { Token->insertBreak(LineIndex, TailOffset, Split, ContentIndent, Whitespaces); + } Penalty += NewBreakPenalty; TailOffset += Split.first + Split.second; @@ -2286,10 +2373,11 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // line. if (LineIndex + 1 != EndIndex) { unsigned NextLineIndex = LineIndex + 1; - if (NewBreakBefore) + if (NewBreakBefore) { // After breaking a line, try to reflow the next line into the current // one once RemainingTokenColumns fits. TryReflow = true; + } if (TryReflow) { // We decided that we want to try reflowing the next line into the // current one. @@ -2407,9 +2495,10 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, Penalty += Style.PenaltyExcessCharacter * (ContentStartColumn + RemainingTokenColumns - ColumnLimit); - if (!DryRun) + if (!DryRun) { Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine, Whitespaces); + } ContentStartColumn = Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true); RemainingTokenColumns = Token->getRemainingLength( @@ -2457,12 +2546,14 @@ bool ContinuationIndenter::nextIsMultilineString(const LineState &State) { if (Current.IsMultiline) return true; if (Current.getNextNonComment() && - Current.getNextNonComment()->isStringLiteral()) + Current.getNextNonComment()->isStringLiteral()) { return true; // Implicit concatenation. + } if (Style.ColumnLimit != 0 && Style.BreakStringLiterals && State.Column + Current.ColumnWidth + Current.UnbreakableTailLength > - Style.ColumnLimit) + Style.ColumnLimit) { return true; // String will be split. + } return false; } diff --git a/clang/lib/Format/DefinitionBlockSeparator.cpp b/clang/lib/Format/DefinitionBlockSeparator.cpp index b7b1123773ce08..f6edcd13ecd639 100644 --- a/clang/lib/Format/DefinitionBlockSeparator.cpp +++ b/clang/lib/Format/DefinitionBlockSeparator.cpp @@ -45,8 +45,9 @@ void DefinitionBlockSeparator::separateBlocks( auto LikelyDefinition = [&](const AnnotatedLine *Line, bool ExcludeEnum = false) { if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) || - Line->startsWithNamespace()) + Line->startsWithNamespace()) { return true; + } int BracketLevel = 0; for (const FormatToken *CurrentToken = Line->First; CurrentToken; CurrentToken = CurrentToken->Next) { @@ -54,8 +55,9 @@ void DefinitionBlockSeparator::separateBlocks( if ((CurrentToken->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union) || (Style.isJavaScript() && - CurrentToken->is(ExtraKeywords.kw_function)))) + CurrentToken->is(ExtraKeywords.kw_function)))) { return true; + } if (!ExcludeEnum && CurrentToken->is(tok::kw_enum)) return true; } @@ -92,8 +94,9 @@ void DefinitionBlockSeparator::separateBlocks( return; if (IsAccessSpecifierToken(TargetToken) || (OpeningLineIndex > 0 && - IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First))) + IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First))) { return; + } if (!TargetLine->Affected) return; Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert, @@ -156,8 +159,9 @@ void DefinitionBlockSeparator::separateBlocks( if (NextLine->MightBeFunctionDecl && NextLine->mightBeFunctionDefinition() && NextLine->First->NewlinesBefore == 1 && - OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro)) + OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro)) { return true; + } } if ((Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare))) @@ -194,8 +198,9 @@ void DefinitionBlockSeparator::separateBlocks( // misrecognition. if (OpeningLineIndex > 0 && Lines[OpeningLineIndex]->First->is(tok::l_brace) && - Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace)) + Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace)) { --OpeningLineIndex; + } OpeningLine = Lines[OpeningLineIndex]; // Closing a function definition. if (LikelyDefinition(OpeningLine)) { @@ -209,8 +214,9 @@ void DefinitionBlockSeparator::separateBlocks( // Avoid duplicated replacement. if (TargetToken->isNot(tok::l_brace)) InsertReplacement(NewlineCount); - } else if (IsNeverStyle) + } else if (IsNeverStyle) { InsertReplacement(OpeningLineIndex != 0); + } } } @@ -226,8 +232,9 @@ void DefinitionBlockSeparator::separateBlocks( if (!TargetToken->closesScope() && !IsPPConditional(OpeningLineIndex)) { // Check whether current line may precede a definition line. while (OpeningLineIndex + 1 < Lines.size() && - MayPrecedeDefinition(/*Direction=*/0)) + MayPrecedeDefinition(/*Direction=*/0)) { ++OpeningLineIndex; + } TargetLine = Lines[OpeningLineIndex]; if (!LikelyDefinition(TargetLine)) { OpeningLineIndex = I + 1; @@ -235,16 +242,18 @@ void DefinitionBlockSeparator::separateBlocks( TargetToken = TargetLine->First; InsertReplacement(NewlineCount); } - } else if (IsNeverStyle) + } else if (IsNeverStyle) { InsertReplacement(/*NewlineToInsert=*/1); + } } } - for (const auto &R : Whitespaces.generateReplacements()) + for (const auto &R : Whitespaces.generateReplacements()) { // The add method returns an Error instance which simulates program exit // code through overloading boolean operator, thus false here indicates // success. if (Result.add(R)) return; + } } } // namespace format } // namespace clang diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 10945f455120ea..65a391dde3d574 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -675,12 +675,14 @@ template <> struct MappingTraits { // former for backwards compatibility. if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None && Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) { - if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All) + if (Style.AlwaysBreakAfterDefinitionReturnType == + FormatStyle::DRTBS_All) { Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; - else if (Style.AlwaysBreakAfterDefinitionReturnType == - FormatStyle::DRTBS_TopLevel) + } else if (Style.AlwaysBreakAfterDefinitionReturnType == + FormatStyle::DRTBS_TopLevel) { Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevelDefinitions; + } } IO.mapOptional("AlwaysBreakBeforeMultilineStrings", @@ -704,8 +706,9 @@ template <> struct MappingTraits { // BreakInheritance was not, initialize the latter from the // former for backwards compatibility. if (BreakBeforeInheritanceComma && - Style.BreakInheritanceList == FormatStyle::BILS_BeforeColon) + Style.BreakInheritanceList == FormatStyle::BILS_BeforeColon) { Style.BreakInheritanceList = FormatStyle::BILS_BeforeComma; + } IO.mapOptional("BreakBeforeTernaryOperators", Style.BreakBeforeTernaryOperators); @@ -719,8 +722,9 @@ template <> struct MappingTraits { // BreakConstructorInitializers was not, initialize the latter from the // former for backwards compatibility. if (BreakConstructorInitializersBeforeComma && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon) { Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; + } IO.mapOptional("BreakAfterJavaFieldAnnotations", Style.BreakAfterJavaFieldAnnotations); @@ -1944,8 +1948,9 @@ class JavaScriptRequoter : public TokenAnalyzer { (Style.JavaScriptQuotes == FormatStyle::JSQS_Single && !Input.startswith("\"")) || (Style.JavaScriptQuotes == FormatStyle::JSQS_Double && - !Input.startswith("\'"))) + !Input.startswith("\'"))) { continue; + } // Change start and end quote. bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single; @@ -2052,8 +2057,9 @@ class Formatter : public TokenAnalyzer { if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) return true; if (Tok->is(TT_TemplateCloser) && - Tok->Previous->is(TT_TemplateCloser)) + Tok->Previous->is(TT_TemplateCloser)) { return true; + } } } } @@ -2069,11 +2075,15 @@ class Formatter : public TokenAnalyzer { continue; // Don't treat space in `void foo() &&` as evidence. if (const auto *Prev = Tok->getPreviousNonComment()) { - if (Prev->is(tok::r_paren) && Prev->MatchingParen) - if (const auto *Func = Prev->MatchingParen->getPreviousNonComment()) + if (Prev->is(tok::r_paren) && Prev->MatchingParen) { + if (const auto *Func = + Prev->MatchingParen->getPreviousNonComment()) { if (Func->isOneOf(TT_FunctionDeclarationName, TT_StartOfName, - TT_OverloadedOperator)) + TT_OverloadedOperator)) { continue; + } + } + } } bool SpaceBefore = Tok->hasWhitespaceBefore(); bool SpaceAfter = Tok->Next->hasWhitespaceBefore(); @@ -2109,10 +2119,11 @@ class Formatter : public TokenAnalyzer { : FormatStyle::PAS_Right; Style.ReferenceAlignment = FormatStyle::RAS_Pointer; } - if (Style.Standard == FormatStyle::LS_Auto) + if (Style.Standard == FormatStyle::LS_Auto) { Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines) ? FormatStyle::LS_Latest : FormatStyle::LS_Cpp03; + } BinPackInconclusiveFunctions = HasBinPackedFunction || !HasOnePerLineFunction; } @@ -2165,8 +2176,9 @@ class TrailingCommaInserter : public TokenAnalyzer { continue; if (!(FormatTok->is(tok::r_square) && Matching->is(TT_ArrayInitializerLSquare)) && - !(FormatTok->is(tok::r_brace) && Matching->is(TT_DictLiteral))) + !(FormatTok->is(tok::r_brace) && Matching->is(TT_DictLiteral))) { continue; + } FormatToken *Prev = FormatTok->getPreviousNonComment(); if (Prev->is(tok::comma) || Prev->is(tok::semi)) continue; @@ -2286,8 +2298,9 @@ class Cleaner : public TokenAnalyzer { if (AnnotatedLines[CurrentLine]->startsWithNamespace()) { if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine, - DeletedLines)) + DeletedLines)) { return false; + } CurrentLine = NewLine; continue; } @@ -2308,8 +2321,9 @@ class Cleaner : public TokenAnalyzer { // Check if the empty namespace is actually affected by changed ranges. if (!AffectedRangeMgr.affectsCharSourceRange(CharSourceRange::getCharRange( AnnotatedLines[InitLine]->First->Tok.getLocation(), - AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc()))) + AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc()))) { return false; + } for (unsigned i = InitLine; i <= CurrentLine; ++i) DeletedLines.insert(i); @@ -2325,10 +2339,12 @@ class Cleaner : public TokenAnalyzer { void cleanupPair(FormatToken *Start, LeftKind LK, RightKind RK, bool DeleteLeft) { auto NextNotDeleted = [this](const FormatToken &Tok) -> FormatToken * { - for (auto *Res = Tok.Next; Res; Res = Res->Next) + for (auto *Res = Tok.Next; Res; Res = Res->Next) { if (!Res->is(tok::comment) && - DeletedTokens.find(Res) == DeletedTokens.end()) + DeletedTokens.find(Res) == DeletedTokens.end()) { return Res; + } + } return nullptr; }; for (auto *Left = Start; Left;) { @@ -2511,8 +2527,9 @@ class ObjCHeaderStyleGuesser : public TokenAnalyzer { for (auto Line : AnnotatedLines) { if (Line->First && (Line->First->TokenText.startswith("#") || Line->First->TokenText == "__pragma" || - Line->First->TokenText == "_Pragma")) + Line->First->TokenText == "_Pragma")) { continue; + } for (const FormatToken *FormatTok = Line->First; FormatTok; FormatTok = FormatTok->Next) { if ((FormatTok->Previous && FormatTok->Previous->is(tok::at) && @@ -2568,10 +2585,12 @@ struct JavaImportDirective { // Determines whether 'Ranges' intersects with ('Start', 'End'). static bool affectsRange(ArrayRef Ranges, unsigned Start, unsigned End) { - for (auto Range : Ranges) + for (auto Range : Ranges) { if (Range.getOffset() < End && - Range.getOffset() + Range.getLength() > Start) + Range.getOffset() + Range.getLength() > Start) { return true; + } + } return false; } @@ -2647,7 +2666,7 @@ static void sortCppIncludes(const FormatStyle &Style, SmallVector Indices = llvm::to_vector<16>(llvm::seq(0, Includes.size())); - if (Style.SortIncludes == FormatStyle::SI_CaseInsensitive) + if (Style.SortIncludes == FormatStyle::SI_CaseInsensitive) { llvm::stable_sort(Indices, [&](unsigned LHSI, unsigned RHSI) { const auto LHSFilenameLower = Includes[LHSI].Filename.lower(); const auto RHSFilenameLower = Includes[RHSI].Filename.lower(); @@ -2656,20 +2675,22 @@ static void sortCppIncludes(const FormatStyle &Style, std::tie(Includes[RHSI].Priority, RHSFilenameLower, Includes[RHSI].Filename); }); - else + } else { llvm::stable_sort(Indices, [&](unsigned LHSI, unsigned RHSI) { return std::tie(Includes[LHSI].Priority, Includes[LHSI].Filename) < std::tie(Includes[RHSI].Priority, Includes[RHSI].Filename); }); + } // The index of the include on which the cursor will be put after // sorting/deduplicating. unsigned CursorIndex; // The offset from cursor to the end of line. unsigned CursorToEOLOffset; - if (Cursor) + if (Cursor) { std::tie(CursorIndex, CursorToEOLOffset) = FindCursorIndex(Includes, Indices, *Cursor); + } // Deduplicate #includes. Indices.erase(std::unique(Indices.begin(), Indices.end(), @@ -2688,8 +2709,9 @@ static void sortCppIncludes(const FormatStyle &Style, // blocks. This we handle below by generating the updated #include blocks and // comparing it to the original. if (Indices.size() == Includes.size() && llvm::is_sorted(Indices) && - Style.IncludeStyle.IncludeBlocks == tooling::IncludeStyle::IBS_Preserve) + Style.IncludeStyle.IncludeBlocks == tooling::IncludeStyle::IBS_Preserve) { return; + } std::string result; for (unsigned Index : Indices) { @@ -2697,8 +2719,9 @@ static void sortCppIncludes(const FormatStyle &Style, result += "\n"; if (Style.IncludeStyle.IncludeBlocks == tooling::IncludeStyle::IBS_Regroup && - CurrentCategory != Includes[Index].Category) + CurrentCategory != Includes[Index].Category) { result += "\n"; + } } result += Includes[Index].Text; if (Cursor && CursorIndex == Index) @@ -2712,8 +2735,9 @@ static void sortCppIncludes(const FormatStyle &Style, // If the #includes are out of order, we generate a single replacement fixing // the entire range of blocks. Otherwise, no replacement is generated. if (replaceCRLF(result) == replaceCRLF(std::string(Code.substr( - IncludesBeginOffset, IncludesBlockSize)))) + IncludesBeginOffset, IncludesBlockSize)))) { return; + } auto Err = Replaces.add(tooling::Replacement( FileName, Includes.front().Offset, IncludesBlockSize, result)); @@ -2782,11 +2806,13 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, if (Trimmed.contains(RawStringTermination)) FormattingOff = false; - if (Trimmed == "// clang-format off" || Trimmed == "/* clang-format off */") + if (Trimmed == "// clang-format off" || + Trimmed == "/* clang-format off */") { FormattingOff = true; - else if (Trimmed == "// clang-format on" || - Trimmed == "/* clang-format on */") + } else if (Trimmed == "// clang-format on" || + Trimmed == "/* clang-format on */") { FormattingOff = false; + } const bool EmptyLineSkipped = Trimmed.empty() && @@ -2907,8 +2933,9 @@ static void sortJavaImports(const FormatStyle &Style, if (!result.empty()) { result += "\n"; if (CurrentIsStatic != Imports[Index].IsStatic || - CurrentImportGroup != JavaImportGroups[Index]) + CurrentImportGroup != JavaImportGroups[Index]) { result += "\n"; + } } for (StringRef CommentLine : Imports[Index].AssociatedCommentLines) { result += CommentLine; @@ -2922,8 +2949,9 @@ static void sortJavaImports(const FormatStyle &Style, // If the imports are out of order, we generate a single replacement fixing // the entire block. Otherwise, no replacement is generated. if (replaceCRLF(result) == replaceCRLF(std::string(Code.substr( - Imports.front().Offset, ImportsBlockSize)))) + Imports.front().Offset, ImportsBlockSize)))) { return; + } auto Err = Replaces.add(tooling::Replacement(FileName, Imports.front().Offset, ImportsBlockSize, result)); @@ -3012,8 +3040,9 @@ tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code, if (isLikelyXml(Code)) return Replaces; if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript && - isMpegTS(Code)) + isMpegTS(Code)) { return Replaces; + } if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript) return sortJavaScriptImports(Style, Code, Ranges, FileName); if (Style.Language == FormatStyle::LanguageKind::LK_Java) @@ -3227,46 +3256,54 @@ reformat(const FormatStyle &Style, StringRef Code, }); } - if (Style.InsertBraces) + if (Style.InsertBraces) { Passes.emplace_back([&](const Environment &Env) { return BracesInserter(Env, Expanded).process(); }); + } - if (Style.RemoveBracesLLVM) + if (Style.RemoveBracesLLVM) { Passes.emplace_back([&](const Environment &Env) { return BracesRemover(Env, Expanded).process(); }); + } - if (Style.FixNamespaceComments) + if (Style.FixNamespaceComments) { Passes.emplace_back([&](const Environment &Env) { return NamespaceEndCommentsFixer(Env, Expanded).process(); }); + } - if (Style.SortUsingDeclarations) + if (Style.SortUsingDeclarations) { Passes.emplace_back([&](const Environment &Env) { return UsingDeclarationsSorter(Env, Expanded).process(); }); + } } - if (Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave) + if (Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave) { Passes.emplace_back([&](const Environment &Env) { return DefinitionBlockSeparator(Env, Expanded).process(); }); + } - if (Style.isJavaScript() && Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) + if (Style.isJavaScript() && + Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) { Passes.emplace_back([&](const Environment &Env) { return JavaScriptRequoter(Env, Expanded).process(); }); + } Passes.emplace_back([&](const Environment &Env) { return Formatter(Env, Expanded, Status).process(); }); if (Style.isJavaScript() && - Style.InsertTrailingCommas == FormatStyle::TCS_Wrapped) + Style.InsertTrailingCommas == FormatStyle::TCS_Wrapped) { Passes.emplace_back([&](const Environment &Env) { return TrailingCommaInserter(Env, Expanded).process(); }); + } auto Env = Environment::make(Code, FileName, Ranges, FirstStartColumn, NextStartColumn, LastStartColumn); @@ -3409,18 +3446,21 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { return FormatStyle::LK_Java; if (FileName.endswith_insensitive(".js") || FileName.endswith_insensitive(".mjs") || - FileName.endswith_insensitive(".ts")) + FileName.endswith_insensitive(".ts")) { return FormatStyle::LK_JavaScript; // (module) JavaScript or TypeScript. + } if (FileName.endswith(".m") || FileName.endswith(".mm")) return FormatStyle::LK_ObjC; if (FileName.endswith_insensitive(".proto") || - FileName.endswith_insensitive(".protodevel")) + FileName.endswith_insensitive(".protodevel")) { return FormatStyle::LK_Proto; + } if (FileName.endswith_insensitive(".textpb") || FileName.endswith_insensitive(".pb.txt") || FileName.endswith_insensitive(".textproto") || - FileName.endswith_insensitive(".asciipb")) + FileName.endswith_insensitive(".asciipb")) { return FormatStyle::LK_TextProto; + } if (FileName.endswith_insensitive(".td")) return FormatStyle::LK_TableGen; if (FileName.endswith_insensitive(".cs")) @@ -3484,13 +3524,15 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, StringRef Source = ""; if (std::error_code ec = parseConfiguration(llvm::MemoryBufferRef(StyleName, Source), &Style, - AllowUnknownOptions)) + AllowUnknownOptions)) { return make_string_error("Error parsing -style: " + ec.message()); - if (Style.InheritsParentConfig) + } + if (Style.InheritsParentConfig) { ChildFormatTextToApply.emplace_back( llvm::MemoryBuffer::getMemBuffer(StyleName, Source, false)); - else + } else { return Style; + } } // User provided clang-format file using -style=file:path/to/format/file. @@ -3499,9 +3541,10 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, auto ConfigFile = StyleName.substr(5); llvm::ErrorOr> Text = loadAndParseConfigFile(ConfigFile, FS, &Style, AllowUnknownOptions); - if (auto EC = Text.getError()) + if (auto EC = Text.getError()) { return make_string_error("Error reading " + ConfigFile + ": " + EC.message()); + } LLVM_DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); @@ -3555,8 +3598,9 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, auto Status = FS->status(Directory); if (!Status || - Status->getType() != llvm::sys::fs::file_type::directory_file) + Status->getType() != llvm::sys::fs::file_type::directory_file) { continue; + } for (const auto &F : FilesToLookFor) { SmallString<128> ConfigFile(Directory); @@ -3608,10 +3652,11 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, } } } - if (!UnsuitableConfigFiles.empty()) + if (!UnsuitableConfigFiles.empty()) { return make_string_error("Configuration file(s) do(es) not support " + getLanguageName(Style.Language) + ": " + UnsuitableConfigFiles); + } if (!ChildFormatTextToApply.empty()) { LLVM_DEBUG(llvm::dbgs() diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 5577918d709275..2c0fee6975c2bc 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -106,8 +106,9 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, State.NextToken->Previous->getPreviousNonComment(); if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) || - LBrace->Next->is(TT_DesignatedInitializerPeriod)) + LBrace->Next->is(TT_DesignatedInitializerPeriod)) { return 0; + } // Calculate the number of code points we have to format this list. As the // first token is already placed, we have to subtract it. @@ -172,15 +173,17 @@ static unsigned CodePointsBetween(const FormatToken *Begin, void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { // FIXME: At some point we might want to do this for other lists, too. if (!Token->MatchingParen || - !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) + !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) { return; + } // In C++11 braced list style, we should not format in columns unless they // have many items (20 or more) or we allow bin-packing of function call // arguments. if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && - Commas.size() < 19) + Commas.size() < 19) { return; + } // Limit column layout for JavaScript array initializers to 20 or more items // for now to introduce it carefully. We can become more aggressive if this @@ -238,8 +241,9 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { // Consume trailing comments so the are included in EndOfLineItemLength. if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && - ItemEnd->Next->isTrailingComment()) + ItemEnd->Next->isTrailingComment()) { ItemEnd = ItemEnd->Next; + } } EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); // If there is a trailing comma in the list, the next item will start at the @@ -300,8 +304,9 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) return true; return false; - }()) + }()) { continue; + } // Ignore layouts that are bound to violate the column limit. if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 187b30fd55a7e0..214a52780888b6 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -85,10 +85,11 @@ ArrayRef FormatTokenLexer::lex() { if (Style.Language == FormatStyle::LK_TextProto) tryParsePythonComment(); tryMergePreviousTokens(); - if (Style.isCSharp()) + if (Style.isCSharp()) { // This needs to come after tokens have been merged so that C# // string literals are correctly identified. handleCSharpVerbatimAndInterpolatedStrings(); + } if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); @@ -335,8 +336,9 @@ bool FormatTokenLexer::tryMergeNullishCoalescingEqual() { auto &NullishCoalescing = *(Tokens.end() - 2); auto &Equal = *(Tokens.end() - 1); if (NullishCoalescing->getType() != TT_NullCoalescingOperator || - !Equal->is(tok::equal)) + !Equal->is(tok::equal)) { return false; + } NullishCoalescing->Tok.setKind(tok::equal); // no '??=' in clang tokens. NullishCoalescing->TokenText = StringRef(NullishCoalescing->TokenText.begin(), @@ -577,8 +579,9 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() { // Deal with multiline strings. if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") || - CSharpStringLiteral->TokenText.startswith(R"($@")"))) + CSharpStringLiteral->TokenText.startswith(R"($@")"))) { return; + } const char *StrBegin = Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size(); @@ -792,13 +795,14 @@ bool FormatTokenLexer::tryMergeConflictMarkers() { LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); TokenType Type = TT_Unknown; - if (LineStart == "<<<<<<<" || LineStart == ">>>>") + if (LineStart == "<<<<<<<" || LineStart == ">>>>") { Type = TT_ConflictStart; - else if (LineStart == "|||||||" || LineStart == "=======" || - LineStart == "====") + } else if (LineStart == "|||||||" || LineStart == "=======" || + LineStart == "====") { Type = TT_ConflictAlternative; - else if (LineStart == ">>>>>>>" || LineStart == "<<<<") + } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { Type = TT_ConflictEnd; + } if (Type != TT_Unknown) { FormatToken *Next = Tokens.back(); @@ -945,12 +949,13 @@ FormatToken *FormatTokenLexer::getNextToken() { while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') { unsigned SkippedWhitespace = 0; if (FormatTok->TokenText.size() > 2 && - (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n')) + (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n')) { SkippedWhitespace = 3; - else if (FormatTok->TokenText[1] == '\n') + } else if (FormatTok->TokenText[1] == '\n') { SkippedWhitespace = 2; - else + } else { break; + } ++FormatTok->NewlinesBefore; WhitespaceLength += SkippedWhitespace; @@ -1063,18 +1068,21 @@ void FormatTokenLexer::readRawToken(FormatToken &Tok) { if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) && - Tok.is(tok::char_constant)) + Tok.is(tok::char_constant)) { Tok.Tok.setKind(tok::string_literal); + } if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || - Tok.TokenText == "/* clang-format on */")) + Tok.TokenText == "/* clang-format on */")) { FormattingDisabled = false; + } Tok.Finalized = FormattingDisabled; if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || - Tok.TokenText == "/* clang-format off */")) + Tok.TokenText == "/* clang-format off */")) { FormattingDisabled = true; + } } void FormatTokenLexer::resetLexer(unsigned Offset) { diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp index c0b3345404decc..dd12298a57fff0 100644 --- a/clang/lib/Format/QualifierAlignmentFixer.cpp +++ b/clang/lib/Format/QualifierAlignmentFixer.cpp @@ -94,9 +94,10 @@ std::pair QualifierAlignmentFixer::analyze( if (!OriginalCode.equals(Fix.getReplacementText())) { auto Err = NonNoOpFixes.add(Fix); - if (Err) + if (Err) { llvm::errs() << "Error adding replacements : " << llvm::toString(std::move(Err)) << "\n"; + } } } return {NonNoOpFixes, 0}; @@ -108,9 +109,10 @@ static void replaceToken(const SourceManager &SourceMgr, auto Replacement = tooling::Replacement(SourceMgr, Range, NewText); auto Err = Fixes.add(Replacement); - if (Err) + if (Err) { llvm::errs() << "Error while rearranging Qualifier : " << llvm::toString(std::move(Err)) << "\n"; + } } static void removeToken(const SourceManager &SourceMgr, @@ -238,8 +240,9 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight( // Move to the end of any template class members e.g. // `Foo::iterator`. if (EndTemplate->startsSequence(TT_TemplateCloser, tok::coloncolon, - tok::identifier)) + tok::identifier)) { EndTemplate = EndTemplate->Next->Next; + } } if (EndTemplate && EndTemplate->Next && !EndTemplate->Next->isOneOf(tok::equal, tok::l_paren)) { @@ -263,8 +266,9 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight( // Move to the end of any template class members e.g. // `Foo::iterator`. if (Next && Next->startsSequence(TT_TemplateCloser, tok::coloncolon, - tok::identifier)) + tok::identifier)) { return Tok; + } assert(Next && "Missing template opener"); Next = Next->Next; } @@ -324,12 +328,14 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeLeft( } if (Tok->is(TT_TemplateOpener) && Tok->Next && (Tok->Next->is(tok::identifier) || Tok->Next->isSimpleTypeSpecifier()) && - Tok->Next->Next && Tok->Next->Next->is(QualifierType)) + Tok->Next->Next && Tok->Next->Next->is(QualifierType)) { rotateTokens(SourceMgr, Fixes, Tok->Next, Tok->Next->Next, /*Left=*/true); + } if (Tok->startsSequence(tok::identifier) && Tok->Next) { if (Tok->Previous && - Tok->Previous->isOneOf(tok::star, tok::ampamp, tok::amp)) + Tok->Previous->isOneOf(tok::star, tok::ampamp, tok::amp)) { return Tok; + } const FormatToken *Next = Tok->Next; // The case `std::Foo const` -> `const std::Foo &&` while (Next && Next->isOneOf(tok::identifier, tok::coloncolon)) @@ -422,12 +428,13 @@ LeftRightQualifierAlignmentFixer::analyze( Tok = Tok->Next) { if (Tok->is(tok::comment)) continue; - if (RightAlign) + if (RightAlign) { Tok = analyzeRight(SourceMgr, Keywords, Fixes, Tok, Qualifier, QualifierToken); - else + } else { Tok = analyzeLeft(SourceMgr, Keywords, Fixes, Tok, Qualifier, QualifierToken); + } } } return {Fixes, 0}; @@ -458,11 +465,12 @@ void QualifierAlignmentFixer::PrepareLeftRightOrdering( if (QualifierToken != tok::kw_typeof && QualifierToken != tok::identifier) Qualifiers.push_back(QualifierToken); - if (left) + if (left) { // Reverse the order for left aligned items. LeftOrder.insert(LeftOrder.begin(), s); - else + } else { RightOrder.push_back(s); + } } } @@ -479,9 +487,10 @@ bool LeftRightQualifierAlignmentFixer::isPossibleMacro(const FormatToken *Tok) { return false; if (!Tok->is(tok::identifier)) return false; - if (Tok->TokenText.upper() == Tok->TokenText.str()) + if (Tok->TokenText.upper() == Tok->TokenText.str()) { // T,K,U,V likely could be template arguments return (Tok->TokenText.size() != 1); + } return false; } diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp index 5e7976f40c5250..c9de4868bf84b8 100644 --- a/clang/lib/Format/SortJavaScriptImports.cpp +++ b/clang/lib/Format/SortJavaScriptImports.cpp @@ -107,12 +107,13 @@ bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) { if (LHS.Category != RHS.Category) return LHS.Category < RHS.Category; if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT || - LHS.Category == JsModuleReference::ReferenceCategory::ALIAS) + LHS.Category == JsModuleReference::ReferenceCategory::ALIAS) { // Side effect imports and aliases might be ordering sensitive. Consider // them equal so that they maintain their relative order in the stable sort // below. This retains transitivity because LHS.Category == RHS.Category // here. return false; + } // Empty URLs sort *last* (for export {...};). if (LHS.URL.empty() != RHS.URL.empty()) return LHS.URL.empty() < RHS.URL.empty(); @@ -171,8 +172,9 @@ class JavaScriptImportSorter : public TokenAnalyzer { // in a single group. if (!Reference.IsExport && (Reference.IsExport != References[I + 1].IsExport || - Reference.Category != References[I + 1].Category)) + Reference.Category != References[I + 1].Category)) { ReferencesText += "\n"; + } } } llvm::StringRef PreviousText = getSourceText(InsertionPoint); @@ -193,8 +195,10 @@ class JavaScriptImportSorter : public TokenAnalyzer { // Separate references from the main code body of the file. if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2 && !(FirstNonImportLine->First->is(tok::comment) && - FirstNonImportLine->First->TokenText.trim() == "// clang-format on")) + FirstNonImportLine->First->TokenText.trim() == + "// clang-format on")) { ReferencesText += "\n"; + } LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n" << PreviousText << "\nwith:\n" @@ -389,11 +393,12 @@ class JavaScriptImportSorter : public TokenAnalyzer { Current = Current->Next; } skipComments(); - if (Start.isInvalid() || References.empty()) + if (Start.isInvalid() || References.empty()) { // After the first file level comment, consider line comments to be part // of the import that immediately follows them by using the previously // set Start. Start = Line->First->Tok.getLocation(); + } if (!Current) { // Only comments on this line. Could be the first non-import line. FirstNonImportLine = Line; @@ -462,13 +467,14 @@ class JavaScriptImportSorter : public TokenAnalyzer { // URL = TokenText without the quotes. Reference.URL = Current->TokenText.substr(1, Current->TokenText.size() - 2); - if (Reference.URL.startswith("..")) + if (Reference.URL.startswith("..")) { Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE_PARENT; - else if (Reference.URL.startswith(".")) + } else if (Reference.URL.startswith(".")) { Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE; - else + } else { Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE; + } } return true; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 7f1c4b8e61b6bb..dbd503d25862b0 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -54,8 +54,9 @@ static bool isLambdaParameterList(const FormatToken *Left) { // Skip <...> if present. if (Left->Previous && Left->Previous->is(tok::greater) && Left->Previous->MatchingParen && - Left->Previous->MatchingParen->is(TT_TemplateOpener)) + Left->Previous->MatchingParen->is(TT_TemplateOpener)) { Left = Left->Previous->MatchingParen; + } // Check for `[...]`. return Left->Previous && Left->Previous->is(tok::r_square) && @@ -98,8 +99,10 @@ class AnnotatingParser { return false; if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 && (!Previous.Previous->MatchingParen || - !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen))) + !Previous.Previous->MatchingParen->is( + TT_OverloadedOperatorLParen))) { return false; + } } FormatToken *Left = CurrentToken->Previous; @@ -117,8 +120,9 @@ class AnnotatingParser { Contexts.back().ContextType = Context::TemplateArgument; if (Style.Language == FormatStyle::LK_Java && - CurrentToken->is(tok::question)) + CurrentToken->is(tok::question)) { next(); + } while (CurrentToken) { if (CurrentToken->is(tok::greater)) { @@ -133,8 +137,9 @@ class AnnotatingParser { (isKeywordWithCondition(*Line.First) || CurrentToken->getStartOfNonWhitespace() == CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset( - -1))) + -1))) { return false; + } Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; // In TT_Proto, we must distignuish between: @@ -144,10 +149,11 @@ class AnnotatingParser { // In TT_TextProto, map does not occur. if (Style.Language == FormatStyle::LK_TextProto || (Style.Language == FormatStyle::LK_Proto && Left->Previous && - Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) + Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) { CurrentToken->setType(TT_DictLiteral); - else + } else { CurrentToken->setType(TT_TemplateCloser); + } next(); return true; } @@ -159,8 +165,9 @@ class AnnotatingParser { if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) || (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext && !Style.isCSharp() && Style.Language != FormatStyle::LK_Proto && - Style.Language != FormatStyle::LK_TextProto)) + Style.Language != FormatStyle::LK_TextProto)) { return false; + } // If a && or || is found and interpreted as a binary operator, this set // of angles is likely part of something like "a < b && c > d". If the // angles are inside an expression, the ||/&& might also be a binary @@ -170,15 +177,17 @@ class AnnotatingParser { if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && CurrentToken->Previous->is(TT_BinaryOperator) && Contexts[Contexts.size() - 2].IsExpression && - !Line.startsWith(tok::kw_template)) + !Line.startsWith(tok::kw_template)) { return false; + } updateParameterCount(Left, CurrentToken); if (Style.Language == FormatStyle::LK_Proto) { if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) { if (CurrentToken->is(tok::colon) || (CurrentToken->isOneOf(tok::l_brace, tok::less) && - Previous->isNot(tok::colon))) + Previous->isNot(tok::colon))) { Previous->setType(TT_SelectorName); + } } } if (!consumeToken()) @@ -232,8 +241,9 @@ class AnnotatingParser { if (FormatToken *MaybeSel = OpeningParen.Previous) { // @selector( starts a selector. if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && - MaybeSel->Previous->is(tok::at)) + MaybeSel->Previous->is(tok::at)) { StartsObjCMethodExpr = true; + } } if (OpeningParen.is(TT_OverloadedOperatorLParen)) { @@ -356,8 +366,9 @@ class AnnotatingParser { if (CurrentToken->Previous->is(TT_PointerOrReference) && CurrentToken->Previous->Previous->isOneOf(tok::l_paren, - tok::coloncolon)) + tok::coloncolon)) { ProbablyFunctionType = true; + } if (CurrentToken->is(tok::comma)) MightBeFunctionType = false; if (CurrentToken->Previous->is(TT_BinaryOperator)) @@ -366,10 +377,12 @@ class AnnotatingParser { if (OpeningParen.isNot(TT_CppCastLParen) && MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next && (CurrentToken->Next->is(tok::l_paren) || - (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration))) + (CurrentToken->Next->is(tok::l_square) && + Line.MustBeDeclaration))) { OpeningParen.setType(OpeningParen.Next->is(tok::caret) ? TT_ObjCBlockLParen : TT_FunctionTypeLParen); + } OpeningParen.MatchingParen = CurrentToken; CurrentToken->MatchingParen = &OpeningParen; @@ -379,10 +392,12 @@ class AnnotatingParser { // function bodies, e.g.: // auto my_lambda = MACRO((Type *type, int i) { .. body .. }); for (FormatToken *Tok = &OpeningParen; Tok != CurrentToken; - Tok = Tok->Next) + Tok = Tok->Next) { if (Tok->is(TT_BinaryOperator) && - Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) + Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) { Tok->setType(TT_PointerOrReference); + } + } } if (StartsObjCMethodExpr) { @@ -398,14 +413,17 @@ class AnnotatingParser { if (OpeningParen.is(TT_TypeDeclarationParen)) CurrentToken->setType(TT_TypeDeclarationParen); if (OpeningParen.Previous && - OpeningParen.Previous->is(TT_JavaAnnotation)) + OpeningParen.Previous->is(TT_JavaAnnotation)) { CurrentToken->setType(TT_JavaAnnotation); + } if (OpeningParen.Previous && - OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) + OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) { CurrentToken->setType(TT_LeadingJavaAnnotation); + } if (OpeningParen.Previous && - OpeningParen.Previous->is(TT_AttributeSquare)) + OpeningParen.Previous->is(TT_AttributeSquare)) { CurrentToken->setType(TT_AttributeSquare); + } if (!HasMultipleLines) OpeningParen.setPackingKind(PPK_Inconclusive); @@ -424,16 +442,18 @@ class AnnotatingParser { OpeningParen.setType(TT_Unknown); if (CurrentToken->is(tok::comma) && CurrentToken->Next && !CurrentToken->Next->HasUnescapedNewline && - !CurrentToken->Next->isTrailingComment()) + !CurrentToken->Next->isTrailingComment()) { HasMultipleParametersOnALine = true; + } bool ProbablyFunctionTypeLParen = (CurrentToken->is(tok::l_paren) && CurrentToken->Next && CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret)); if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) || CurrentToken->Previous->isSimpleTypeSpecifier()) && !(CurrentToken->is(tok::l_brace) || - (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) + (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) { Contexts.back().IsExpression = false; + } if (CurrentToken->isOneOf(tok::semi, tok::colon)) { MightBeObjCForRangeLoop = false; if (PossibleObjCForInToken) { @@ -498,13 +518,15 @@ class AnnotatingParser { // Limit this to being an access modifier that follows. if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected, tok::comment, tok::kw_class, tok::kw_static, - tok::l_square, Keywords.kw_internal)) + tok::l_square, Keywords.kw_internal)) { return true; + } // incase its a [XXX] retval func(.... if (AttrTok->Next && - AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) + AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) { return true; + } return false; } @@ -530,8 +552,9 @@ class AnnotatingParser { // [[foo(:)]]. if (AttrTok->is(tok::colon) || AttrTok->startsSequence(tok::identifier, tok::identifier) || - AttrTok->startsSequence(tok::r_paren, tok::identifier)) + AttrTok->startsSequence(tok::r_paren, tok::identifier)) { return false; + } if (AttrTok->is(tok::ellipsis)) return true; AttrTok = AttrTok->Next; @@ -670,13 +693,13 @@ class AnnotatingParser { if (CurrentToken->is(tok::r_square)) { if (IsCpp11AttributeSpecifier) CurrentToken->setType(TT_AttributeSquare); - if (IsCSharpAttributeSpecifier) + if (IsCSharpAttributeSpecifier) { CurrentToken->setType(TT_AttributeSquare); - else if (((CurrentToken->Next && - CurrentToken->Next->is(tok::l_paren)) || - (CurrentToken->Previous && - CurrentToken->Previous->Previous == Left)) && - Left->is(TT_ObjCMethodExpr)) { + } else if (((CurrentToken->Next && + CurrentToken->Next->is(tok::l_paren)) || + (CurrentToken->Previous && + CurrentToken->Previous->Previous == Left)) && + Left->is(TT_ObjCMethodExpr)) { // An ObjC method call is rarely followed by an open parenthesis. It // also can't be composed of just one token, unless it's a macro that // will be expanded to more tokens. @@ -690,8 +713,9 @@ class AnnotatingParser { // before the r_square is tagged as a selector name component. if (!ColonFound && CurrentToken->Previous && CurrentToken->Previous->is(TT_Unknown) && - canBeObjCSelectorComponent(*CurrentToken->Previous)) + canBeObjCSelectorComponent(*CurrentToken->Previous)) { CurrentToken->Previous->setType(TT_SelectorName); + } // determineStarAmpUsage() thinks that '*' '[' is allocating an // array of pointers, but if '[' starts a selector then '*' is a // binary operator. @@ -700,8 +724,9 @@ class AnnotatingParser { } // An arrow after an ObjC method expression is not a lambda arrow. if (CurrentToken->getType() == TT_ObjCMethodExpr && - CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow)) + CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow)) { CurrentToken->Next->overwriteFixedType(TT_Unknown); + } Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; // FirstObjCSelectorName is set when a colon is found. This does @@ -741,15 +766,17 @@ class AnnotatingParser { Left->setType(TT_ObjCMethodExpr); StartsObjCMethodExpr = true; Contexts.back().ColonIsObjCMethodExpr = true; - if (Parent && Parent->is(tok::r_paren)) + if (Parent && Parent->is(tok::r_paren)) { // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. Parent->setType(TT_CastRParen); + } } ColonFound = true; } if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) && - !ColonFound) + !ColonFound) { Left->setType(TT_ArrayInitializerLSquare); + } FormatToken *Tok = CurrentToken; if (!consumeToken()) return false; @@ -790,8 +817,9 @@ class AnnotatingParser { if (OpeningBrace.is(BK_BracedInit)) Contexts.back().IsExpression = true; if (Style.isJavaScript() && OpeningBrace.Previous && - OpeningBrace.Previous->is(TT_JsTypeColon)) + OpeningBrace.Previous->is(TT_JsTypeColon)) { Contexts.back().IsExpression = false; + } unsigned CommaCount = 0; while (CurrentToken) { @@ -801,8 +829,9 @@ class AnnotatingParser { CurrentToken->MatchingParen = &OpeningBrace; if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) { if (OpeningBrace.ParentBracket == tok::l_brace && - couldBeInStructArrayInitializer() && CommaCount > 0) + couldBeInStructArrayInitializer() && CommaCount > 0) { Contexts.back().ContextType = Context::StructArrayInitializer; + } } next(); return true; @@ -820,8 +849,9 @@ class AnnotatingParser { Style.Language == FormatStyle::LK_TextProto) { OpeningBrace.setType(TT_DictLiteral); if (Previous->Tok.getIdentifierInfo() || - Previous->is(tok::string_literal)) + Previous->is(tok::string_literal)) { Previous->setType(TT_SelectorName); + } } if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown)) OpeningBrace.setType(TT_DictLiteral); @@ -948,12 +978,13 @@ class AnnotatingParser { Contexts.back().LongestObjCSelectorName == 0 || UnknownIdentifierInMethodDeclaration) { Tok->Previous->setType(TT_SelectorName); - if (!Contexts.back().FirstObjCSelectorName) + if (!Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName = Tok->Previous; - else if (Tok->Previous->ColumnWidth > - Contexts.back().LongestObjCSelectorName) + } else if (Tok->Previous->ColumnWidth > + Contexts.back().LongestObjCSelectorName) { Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; + } Tok->Previous->ParameterIndex = Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; @@ -968,17 +999,18 @@ class AnnotatingParser { FormatToken *Prev = Tok->getPreviousNonComment(); if (!Prev) break; - if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept)) + if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept)) { Tok->setType(TT_CtorInitializerColon); - else if (Prev->is(tok::kw_try)) { + } else if (Prev->is(tok::kw_try)) { // Member initializer list within function try block. FormatToken *PrevPrev = Prev->getPreviousNonComment(); if (!PrevPrev) break; if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept)) Tok->setType(TT_CtorInitializerColon); - } else + } else { Tok->setType(TT_InheritanceColon); + } } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next && (Tok->Next->isOneOf(tok::r_paren, tok::comma) || (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next && @@ -999,8 +1031,9 @@ class AnnotatingParser { break; case tok::kw_if: if (CurrentToken && - CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) + CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) { next(); + } LLVM_FALLTHROUGH; case tok::kw_while: if (CurrentToken && CurrentToken->is(tok::l_paren)) { @@ -1013,8 +1046,9 @@ class AnnotatingParser { if (Style.isJavaScript()) { // x.for and {for: ...} if ((Tok->Previous && Tok->Previous->is(tok::period)) || - (Tok->Next && Tok->Next->is(tok::colon))) + (Tok->Next && Tok->Next->is(tok::colon))) { break; + } // JS' for await ( ... if (CurrentToken && CurrentToken->is(Keywords.kw_await)) next(); @@ -1046,9 +1080,11 @@ class AnnotatingParser { if (Line.MustBeDeclaration && Contexts.size() == 1 && !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) && !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen) && - (!Tok->Previous || !Tok->Previous->isOneOf(tok::kw___attribute, - TT_LeadingJavaAnnotation))) + (!Tok->Previous || + !Tok->Previous->isOneOf(tok::kw___attribute, + TT_LeadingJavaAnnotation))) { Line.MightBeFunctionDecl = true; + } break; case tok::l_square: if (!parseSquare()) @@ -1102,20 +1138,23 @@ class AnnotatingParser { break; case tok::kw_operator: if (Style.Language == FormatStyle::LK_TextProto || - Style.Language == FormatStyle::LK_Proto) + Style.Language == FormatStyle::LK_Proto) { break; + } while (CurrentToken && !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { if (CurrentToken->isOneOf(tok::star, tok::amp)) CurrentToken->setType(TT_PointerOrReference); consumeToken(); if (CurrentToken && CurrentToken->is(tok::comma) && - CurrentToken->Previous->isNot(tok::kw_operator)) + CurrentToken->Previous->isNot(tok::kw_operator)) { break; + } if (CurrentToken && CurrentToken->Previous->isOneOf( TT_BinaryOperator, TT_UnaryOperator, tok::comma, - tok::star, tok::arrow, tok::amp, tok::ampamp)) + tok::star, tok::arrow, tok::amp, tok::ampamp)) { CurrentToken->Previous->setType(TT_OverloadedOperator); + } } if (CurrentToken && CurrentToken->is(tok::l_paren)) CurrentToken->setType(TT_OverloadedOperatorLParen); @@ -1136,8 +1175,9 @@ class AnnotatingParser { // Declarations cannot be conditional expressions, this can only be part // of a type declaration. if (Line.MustBeDeclaration && !Contexts.back().IsExpression && - Style.isJavaScript()) + Style.isJavaScript()) { break; + } if (Style.isCSharp()) { // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be // nullable types. @@ -1176,8 +1216,9 @@ class AnnotatingParser { break; case tok::identifier: if (Tok->isOneOf(Keywords.kw___has_include, - Keywords.kw___has_include_next)) + Keywords.kw___has_include_next)) { parseHasInclude(); + } if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next && Tok->Next->isNot(tok::l_paren)) { Tok->setType(TT_CSharpGenericTypeConstraint); @@ -1186,8 +1227,9 @@ class AnnotatingParser { break; case tok::arrow: if (Tok->isNot(TT_LambdaArrow) && Tok->Previous && - Tok->Previous->is(tok::kw_noexcept)) + Tok->Previous->is(tok::kw_noexcept)) { Tok->setType(TT_TrailingReturnArrow); + } break; default: break; @@ -1231,8 +1273,9 @@ class AnnotatingParser { // Mark tokens up to the trailing line comments as implicit string // literals. if (CurrentToken->isNot(tok::comment) && - !CurrentToken->TokenText.startswith("//")) + !CurrentToken->TokenText.startswith("//")) { CurrentToken->setType(TT_ImplicitStringLiteral); + } next(); } } @@ -1252,7 +1295,8 @@ class AnnotatingParser { void parsePragma() { next(); // Consume "pragma". if (CurrentToken && - CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option, Keywords.kw_region)) { + CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option, + Keywords.kw_region)) { bool IsMark = CurrentToken->is(Keywords.kw_mark); next(); next(); // Consume first token (so we fix leading whitespace). @@ -1326,11 +1370,12 @@ class AnnotatingParser { while (CurrentToken) { FormatToken *Tok = CurrentToken; next(); - if (Tok->is(tok::l_paren)) + if (Tok->is(tok::l_paren)) { parseParens(); - else if (Tok->isOneOf(Keywords.kw___has_include, - Keywords.kw___has_include_next)) + } else if (Tok->isOneOf(Keywords.kw___has_include, + Keywords.kw___has_include_next)) { parseHasInclude(); + } } return Type; } @@ -1396,8 +1441,9 @@ class AnnotatingParser { // an import in this sense. if (Line.First->is(tok::kw_export) && CurrentToken->is(Keywords.kw_from) && CurrentToken->Next && - CurrentToken->Next->isStringLiteral()) + CurrentToken->Next->isStringLiteral()) { ImportStatement = true; + } if (isClosureImportStatement(*CurrentToken)) ImportStatement = true; } @@ -1410,9 +1456,10 @@ class AnnotatingParser { return LT_ImportStatement; if (Line.startsWith(TT_ObjCMethodSpecifier)) { - if (Contexts.back().FirstObjCSelectorName) + if (Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; + } return LT_ObjCMethodDecl; } @@ -1455,8 +1502,9 @@ class AnnotatingParser { TT_RecordLBrace, TT_StructLBrace, TT_UnionLBrace, TT_RequiresClause, TT_RequiresClauseInARequiresExpression, TT_RequiresExpression, TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace, - TT_CompoundRequirementLBrace, TT_BracedListLBrace)) + TT_CompoundRequirementLBrace, TT_BracedListLBrace)) { CurrentToken->setType(TT_Unknown); + } CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; CurrentToken->FakeLParens.clear(); @@ -1570,8 +1618,9 @@ class AnnotatingParser { return false; if (Tok->isOneOf(tok::kw_class, tok::kw_enum, tok::kw_concept, - tok::kw_struct, tok::kw_using)) + tok::kw_struct, tok::kw_using)) { return false; + } return true; } @@ -1580,8 +1629,10 @@ class AnnotatingParser { // using `export type ...`. if (Style.isJavaScript() && (Line.startsWith(Keywords.kw_type, tok::identifier) || - Line.startsWith(tok::kw_export, Keywords.kw_type, tok::identifier))) + Line.startsWith(tok::kw_export, Keywords.kw_type, + tok::identifier))) { return false; + } return !Current.Previous || Current.Previous->isNot(tok::kw_operator); }; @@ -1602,8 +1653,9 @@ class AnnotatingParser { break; if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && Previous->isOneOf(tok::star, tok::amp, tok::ampamp) && - Previous->Previous && Previous->Previous->isNot(tok::equal)) + Previous->Previous && Previous->Previous->isNot(tok::equal)) { Previous->setType(TT_PointerOrReference); + } } } } else if (Current.is(tok::lessless) && @@ -1624,11 +1676,13 @@ class AnnotatingParser { } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { for (FormatToken *Previous = Current.Previous; Previous && Previous->isOneOf(tok::star, tok::amp); - Previous = Previous->Previous) + Previous = Previous->Previous) { Previous->setType(TT_PointerOrReference); + } if (Line.MustBeDeclaration && - Contexts.front().ContextType != Context::CtorInitializer) + Contexts.front().ContextType != Context::CtorInitializer) { Contexts.back().IsExpression = false; + } } else if (Current.is(tok::kw_new)) { Contexts.back().CanBeExpression = false; } else if (Current.is(tok::semi) || @@ -1695,8 +1749,9 @@ class AnnotatingParser { FormatToken *PriorLeadingIdentifier = LeadingIdentifier->Previous; // Skip back past explicit decoration if (PriorLeadingIdentifier && - PriorLeadingIdentifier->is(tok::kw_explicit)) + PriorLeadingIdentifier->is(tok::kw_explicit)) { PriorLeadingIdentifier = PriorLeadingIdentifier->Previous; + } return PriorLeadingIdentifier && (PriorLeadingIdentifier->is(TT_TemplateCloser) || @@ -1709,9 +1764,10 @@ class AnnotatingParser { } void determineTokenType(FormatToken &Current) { - if (!Current.is(TT_Unknown)) + if (!Current.is(TT_Unknown)) { // The token type is already known. return; + } if ((Style.isJavaScript() || Style.isCSharp()) && Current.is(tok::exclaim)) { @@ -1798,15 +1854,17 @@ class AnnotatingParser { Style.Language != FormatStyle::LK_TextProto)) { Current.setType(TT_BinaryOperator); } else if (Current.is(tok::comment)) { - if (Current.TokenText.startswith("/*")) - if (Current.TokenText.endswith("*/")) + if (Current.TokenText.startswith("/*")) { + if (Current.TokenText.endswith("*/")) { Current.setType(TT_BlockComment); - else + } else { // The lexer has for some reason determined a comment here. But we // cannot really handle it, if it isn't properly terminated. Current.Tok.setKind(tok::unknown); - else + } + } else { Current.setType(TT_LineComment); + } } else if (Current.is(tok::l_paren)) { if (lParenStartsCppCast(Current)) Current.setType(TT_CppCastLParen); @@ -1817,19 +1875,22 @@ class AnnotatingParser { !Current.Next->isBinaryOperator() && !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, tok::comma, tok::period, tok::arrow, - tok::coloncolon)) + tok::coloncolon)) { if (FormatToken *AfterParen = Current.MatchingParen->Next) { // Make sure this isn't the return type of an Obj-C block declaration if (AfterParen->isNot(tok::caret)) { - if (FormatToken *BeforeParen = Current.MatchingParen->Previous) + if (FormatToken *BeforeParen = Current.MatchingParen->Previous) { if (BeforeParen->is(tok::identifier) && !BeforeParen->is(TT_TypenameMacro) && BeforeParen->TokenText == BeforeParen->TokenText.upper() && (!BeforeParen->Previous || - BeforeParen->Previous->ClosesTemplateDeclaration)) + BeforeParen->Previous->ClosesTemplateDeclaration)) { Current.setType(TT_FunctionAnnotationRParen); + } + } } } + } } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() && Style.Language != FormatStyle::LK_Java) { // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it @@ -1849,12 +1910,13 @@ class AnnotatingParser { } else if (Current.is(tok::period)) { FormatToken *PreviousNoComment = Current.getPreviousNonComment(); if (PreviousNoComment && - PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) + PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) { Current.setType(TT_DesignatedInitializerPeriod); - else if (Style.Language == FormatStyle::LK_Java && Current.Previous && - Current.Previous->isOneOf(TT_JavaAnnotation, - TT_LeadingJavaAnnotation)) + } else if (Style.Language == FormatStyle::LK_Java && Current.Previous && + Current.Previous->isOneOf(TT_JavaAnnotation, + TT_LeadingJavaAnnotation)) { Current.setType(Current.Previous->getType()); + } } else if (canBeObjCSelectorComponent(Current) && // FIXME(bug 36976): ObjC return types shouldn't use // TT_CastRParen. @@ -1904,8 +1966,9 @@ class AnnotatingParser { return false; if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof, - Keywords.kw_as)) + Keywords.kw_as)) { return false; + } if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in)) return false; @@ -1927,15 +1990,17 @@ class AnnotatingParser { PreviousNotConst->Previous && PreviousNotConst->Previous->is(tok::hash); - if (PreviousNotConst->is(TT_TemplateCloser)) + if (PreviousNotConst->is(TT_TemplateCloser)) { return PreviousNotConst && PreviousNotConst->MatchingParen && PreviousNotConst->MatchingParen->Previous && PreviousNotConst->MatchingParen->Previous->isNot(tok::period) && PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); + } if (PreviousNotConst->is(tok::r_paren) && - PreviousNotConst->is(TT_TypeDeclarationParen)) + PreviousNotConst->is(TT_TypeDeclarationParen)) { return true; + } // If is a preprocess keyword like #define. if (IsPPKeyword) @@ -1967,11 +2032,13 @@ class AnnotatingParser { if (LeftOfParens && LeftOfParens->is(TT_TemplateCloser) && LeftOfParens->MatchingParen) { auto *Prev = LeftOfParens->MatchingParen->getPreviousNonComment(); - if (Prev && Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast, - tok::kw_reinterpret_cast, tok::kw_static_cast)) + if (Prev && + Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast, + tok::kw_reinterpret_cast, tok::kw_static_cast)) { // FIXME: Maybe we should handle identifiers ending with "_cast", // e.g. any_cast? return true; + } } return false; } @@ -1980,8 +2047,9 @@ class AnnotatingParser { bool rParenEndsCast(const FormatToken &Tok) { // C-style casts are only used in C++, C# and Java. if (!Style.isCSharp() && !Style.isCpp() && - Style.Language != FormatStyle::LK_Java) + Style.Language != FormatStyle::LK_Java) { return false; + } // Empty parens aren't casts and there are no casts at the end of the line. if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen) @@ -1994,8 +2062,9 @@ class AnnotatingParser { if (LeftOfParens->is(tok::r_paren) && LeftOfParens->isNot(TT_CastRParen)) { if (!LeftOfParens->MatchingParen || - !LeftOfParens->MatchingParen->Previous) + !LeftOfParens->MatchingParen->Previous) { return false; + } LeftOfParens = LeftOfParens->MatchingParen->Previous; } @@ -2022,21 +2091,24 @@ class AnnotatingParser { // as a (void *foo) cast. // void operator delete(void *foo) ATTRIB; if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous && - LeftOfParens->Previous->is(tok::kw_operator)) + LeftOfParens->Previous->is(tok::kw_operator)) { return false; + } // If there is an identifier (or with a few exceptions a keyword) right // before the parentheses, this is unlikely to be a cast. if (LeftOfParens->Tok.getIdentifierInfo() && !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case, - tok::kw_delete)) + tok::kw_delete)) { return false; + } // Certain other tokens right before the parentheses are also signals that // this cannot be a cast. if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator, - TT_TemplateCloser, tok::ellipsis)) + TT_TemplateCloser, tok::ellipsis)) { return false; + } } if (Tok.Next->is(tok::question)) @@ -2051,8 +2123,9 @@ class AnnotatingParser { if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const, tok::kw_requires, tok::kw_throw, tok::arrow, Keywords.kw_override, Keywords.kw_final) || - isCpp11AttributeSpecifier(*Tok.Next)) + isCpp11AttributeSpecifier(*Tok.Next)) { return false; + } // As Java has no function types, a "(" after the ")" likely means that this // is a cast. @@ -2062,8 +2135,9 @@ class AnnotatingParser { // If a (non-string) literal follows, this is likely a cast. if (Tok.Next->isNot(tok::string_literal) && (Tok.Next->Tok.isLiteral() || - Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) + Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) { return true; + } // Heuristically try to determine whether the parentheses contain a type. auto IsQualifiedPointerOrReference = [](FormatToken *T) { @@ -2112,9 +2186,10 @@ class AnnotatingParser { // Certain token types inside the parentheses mean that this can't be a // cast. for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok; - Token = Token->Next) + Token = Token->Next) { if (Token->is(TT_BinaryOperator)) return false; + } // If the following token is an identifier or 'this', this is a cast. All // cases where this can be something else are handled above. @@ -2124,8 +2199,9 @@ class AnnotatingParser { // Look for a cast `( x ) (`. if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) { if (Tok.Previous->is(tok::identifier) && - Tok.Previous->Previous->is(tok::l_paren)) + Tok.Previous->Previous->is(tok::l_paren)) { return true; + } } if (!Tok.Next->Next) @@ -2137,13 +2213,15 @@ class AnnotatingParser { bool NextIsUnary = Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star); if (!NextIsUnary || Tok.Next->is(tok::plus) || - !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) + !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) { return false; + } // Search for unexpected tokens. for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen; - Prev = Prev->Previous) + Prev = Prev->Previous) { if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) return false; + } return true; } @@ -2163,8 +2241,9 @@ class AnnotatingParser { TT_ConditionalExpr, tok::l_paren, tok::comma, tok::colon, tok::semi, tok::equal, tok::question, tok::l_square, tok::l_brace, tok::kw_case, tok::kw_co_await, tok::kw_co_return, tok::kw_co_yield, - tok::kw_delete, tok::kw_return, tok::kw_throw)) + tok::kw_delete, tok::kw_return, tok::kw_throw)) { return true; + } // We put sizeof here instead of only in determineStarAmpUsage. In the cases // where the unary `+` operator is overloaded, it is reasonable to write @@ -2205,8 +2284,9 @@ class AnnotatingParser { if (!NextToken || NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_noexcept) || NextToken->canBePointerOrReferenceQualifier() || - (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) + (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) { return TT_PointerOrReference; + } if (PrevToken->is(tok::coloncolon)) return TT_PointerOrReference; @@ -2226,16 +2306,18 @@ class AnnotatingParser { if (PrevToken->Tok.isLiteral() || PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, - tok::kw_false, tok::r_brace)) + tok::kw_false, tok::r_brace)) { return TT_BinaryOperator; + } const FormatToken *NextNonParen = NextToken; while (NextNonParen && NextNonParen->is(tok::l_paren)) NextNonParen = NextNonParen->getNextNonComment(); if (NextNonParen && (NextNonParen->Tok.isLiteral() || NextNonParen->isOneOf(tok::kw_true, tok::kw_false) || - NextNonParen->isUnaryOperator())) + NextNonParen->isUnaryOperator())) { return TT_BinaryOperator; + } // If we know we're in a template argument, there are no named declarations. // Thus, having an identifier on the right-hand side indicates a binary @@ -2321,8 +2403,9 @@ class ExpressionParser { // expression. while (Current && (Current->is(tok::kw_return) || (Current->is(tok::colon) && - Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) + Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) { next(); + } if (!Current || Precedence > PrecedenceArrowAndPeriod) return; @@ -2364,8 +2447,9 @@ class ExpressionParser { (Current->MatchingParen || Current->is(TT_TemplateString))) || (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || (CurrentPrecedence == prec::Conditional && - Precedence == prec::Assignment && Current->is(tok::colon))) + Precedence == prec::Assignment && Current->is(tok::colon))) { break; + } // Consume scopes: (), [], <> and {} // In addition to that we handle require clauses as scope, so that the @@ -2430,8 +2514,9 @@ class ExpressionParser { (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) || ((Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) && - NextNonComment->is(tok::less)))) + NextNonComment->is(tok::less)))) { return prec::Assignment; + } if (Current->is(TT_JsComputedPropertyName)) return prec::Assignment; if (Current->is(TT_LambdaArrow)) @@ -2440,24 +2525,28 @@ class ExpressionParser { return prec::Assignment; if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) || (Current->is(tok::comment) && NextNonComment && - NextNonComment->is(TT_SelectorName))) + NextNonComment->is(TT_SelectorName))) { return 0; + } if (Current->is(TT_RangeBasedForLoopColon)) return prec::Comma; if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && - Current->is(Keywords.kw_instanceof)) + Current->is(Keywords.kw_instanceof)) { return prec::Relational; + } if (Style.isJavaScript() && - Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) + Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) { return prec::Relational; + } if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) return Current->getPrecedence(); if (Current->isOneOf(tok::period, tok::arrow)) return PrecedenceArrowAndPeriod; if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements, - Keywords.kw_throws)) + Keywords.kw_throws)) { return 0; + } } return -1; } @@ -2485,9 +2574,10 @@ class ExpressionParser { next(); } parse(PrecedenceArrowAndPeriod); - for (FormatToken *Token : llvm::reverse(Tokens)) + for (FormatToken *Token : llvm::reverse(Tokens)) { // The actual precedence doesn't matter. addFakeParenthesis(Token, prec::Unknown); + } } void parseConditionalExpr() { @@ -2511,8 +2601,9 @@ class ExpressionParser { Current = Current->Next; while (Current && (Current->NewlinesBefore == 0 || SkipPastLeadingComments) && - Current->isTrailingComment()) + Current->isTrailingComment()) { Current = Current->Next; + } } const FormatStyle &Style; @@ -2603,8 +2694,9 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, if (Next->isOneOf(tok::kw_new, tok::kw_delete)) { // For 'new[]' and 'delete[]'. if (Next->Next && - Next->Next->startsSequence(tok::l_square, tok::r_square)) + Next->Next->startsSequence(tok::l_square, tok::r_square)) { Next = Next->Next->Next; + } continue; } if (Next->startsSequence(tok::l_square, tok::r_square)) { @@ -2668,8 +2760,9 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, return true; // Empty parentheses. // If there is an &/&& after the r_paren, this is likely a function. if (Next->MatchingParen->Next && - Next->MatchingParen->Next->is(TT_PointerOrReference)) + Next->MatchingParen->Next->is(TT_PointerOrReference)) { return true; + } // Check for K&R C function definitions (and C++ function definitions with // unnamed parameters), e.g.: @@ -2682,8 +2775,9 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, // return !b; // } if (IsCpp && Next->Next && Next->Next->is(tok::identifier) && - !Line.endsWith(tok::semi)) + !Line.endsWith(tok::semi)) { return true; + } for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; Tok = Tok->Next) { @@ -2694,11 +2788,13 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, continue; } if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || - Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) + Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) { return true; + } if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) || - Tok->Tok.isLiteral()) + Tok->Tok.isLiteral()) { return false; + } } return false; } @@ -2709,8 +2805,9 @@ bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const { if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel || Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevelDefinitions) && - Line.Level > 0) + Line.Level > 0) { return false; + } switch (Style.AlwaysBreakAfterReturnType) { case FormatStyle::RTBS_None: @@ -2746,11 +2843,12 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { Current->setType(TT_FunctionDeclarationName); const FormatToken *Prev = Current->Previous; if (Current->is(TT_LineComment)) { - if (Prev->is(BK_BracedInit) && Prev->opensScope()) + if (Prev->is(BK_BracedInit) && Prev->opensScope()) { Current->SpacesRequiredBefore = (Style.Cpp11BracedListStyle && !Style.SpacesInParentheses) ? 0 : 1; - else + } else { Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; + } // If we find a trailing comment, iterate backwards to determine whether // it seems to relate to a specific parameter. If so, break before that @@ -2766,8 +2864,9 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { break; if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { if (!Parameter->Previous->is(TT_CtorInitializerComma) && - Parameter->HasUnescapedNewline) + Parameter->HasUnescapedNewline) { Parameter->MustBreakBefore = true; + } break; } } @@ -2784,8 +2883,9 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { Current->MustBreakBefore = Current->MustBreakBefore || mustBreakBefore(Line, *Current); if (!Current->MustBreakBefore && InFunctionDecl && - Current->is(TT_FunctionDeclarationName)) + Current->is(TT_FunctionDeclarationName)) { Current->MustBreakBefore = mustBreakForReturnType(Line); + } } Current->CanBreakBefore = @@ -2799,11 +2899,12 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { if (Current->MustBreakBefore || Prev->Children.size() > 1 || (Prev->Children.size() == 1 && Prev->Children[0]->First->MustBreakBefore) || - Current->IsMultiline) + Current->IsMultiline) { Current->TotalLength = Prev->TotalLength + Style.ColumnLimit; - else + } else { Current->TotalLength = Prev->TotalLength + Current->ColumnWidth + ChildSize + Current->SpacesRequiredBefore; + } if (Current->is(TT_CtorInitializerColon)) InFunctionDecl = false; @@ -2836,8 +2937,9 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { Current->Role->precomputeFormattingInfos(Current); if (Current->MatchingParen && Current->MatchingParen->opensBlockOrBlockTypeList(Style) && - IndentLevel > 0) + IndentLevel > 0) { --IndentLevel; + } Current->IndentLevel = IndentLevel; if (Current->opensBlockOrBlockTypeList(Style)) ++IndentLevel; @@ -2924,8 +3026,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(TT_JsTypeColon)) return 35; if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || - (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) + (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) { return 100; + } // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()". if (Left.opensScope() && Right.closesScope()) return 200; @@ -2943,13 +3046,15 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 35; if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, TT_ArrayInitializerLSquare, - TT_DesignatedInitializerLSquare, TT_AttributeSquare)) + TT_DesignatedInitializerLSquare, TT_AttributeSquare)) { return 500; + } } if (Left.is(tok::coloncolon) || - (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) + (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) { return 500; + } if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || Right.is(tok::kw_operator)) { if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) @@ -2974,8 +3079,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 1000; if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, - TT_CtorInitializerColon)) + TT_CtorInitializerColon)) { return 2; + } if (Right.isMemberAccess()) { // Breaking before the "./->" of a chained call/member access is reasonably @@ -3032,17 +3138,20 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, // open paren (we'll prefer breaking after the protocol list's opening // angle bracket, if present). if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous && - Left.Previous->isOneOf(tok::identifier, tok::greater)) + Left.Previous->isOneOf(tok::identifier, tok::greater)) { return 500; + } if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0) return Style.PenaltyBreakOpenParenthesis; if (Left.is(tok::l_paren) && InFunctionDecl && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) + Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { return 100; + } if (Left.is(tok::l_paren) && Left.Previous && - (Left.Previous->is(tok::kw_for) || Left.Previous->isIf())) + (Left.Previous->is(tok::kw_for) || Left.Previous->isIf())) { return 1000; + } if (Left.is(tok::equal) && InFunctionDecl) return 110; if (Right.is(tok::r_brace)) @@ -3054,8 +3163,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, // here unless the style does not want us to place all arguments on the // next line. if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign && - (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) + (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) { return 0; + } if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle) return 19; return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter @@ -3068,21 +3178,25 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 60; if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous && Left.Previous->isLabelString() && - (Left.NextOperator || Left.OperatorIndex != 0)) + (Left.NextOperator || Left.OperatorIndex != 0)) { return 50; + } if (Right.is(tok::plus) && Left.isLabelString() && - (Right.NextOperator || Right.OperatorIndex != 0)) + (Right.NextOperator || Right.OperatorIndex != 0)) { return 25; + } if (Left.is(tok::comma)) return 1; if (Right.is(tok::lessless) && Left.isLabelString() && - (Right.NextOperator || Right.OperatorIndex != 1)) + (Right.NextOperator || Right.OperatorIndex != 1)) { return 25; + } if (Right.is(tok::lessless)) { // Breaking at a << is really cheap. - if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0) + if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0) { // Slightly prefer to break before the first one in log-like statements. return 2; + } return 1; } if (Left.ClosesTemplateDeclaration) @@ -3106,11 +3220,13 @@ bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const { if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always) return true; if (Right.is(TT_OverloadedOperatorLParen) && - Style.SpaceBeforeParensOptions.AfterOverloadedOperator) + Style.SpaceBeforeParensOptions.AfterOverloadedOperator) { return true; + } if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses && - Right.ParameterCount > 0) + Right.ParameterCount > 0) { return true; + } return false; } @@ -3118,23 +3234,26 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, const FormatToken &Right) const { if (Left.is(tok::kw_return) && - !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) + !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) { return true; + } if (Style.isJson() && Left.is(tok::string_literal) && Right.is(tok::colon)) return false; if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java) return true; if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && - Left.Tok.getObjCKeywordID() == tok::objc_property) + Left.Tok.getObjCKeywordID() == tok::objc_property) { return true; + } if (Right.is(tok::hashhash)) return Left.is(tok::hash); if (Left.isOneOf(tok::hashhash, tok::hash)) return Right.is(tok::hash); if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) || (Left.is(tok::l_brace) && Left.isNot(BK_Block) && - Right.is(tok::r_brace) && Right.isNot(BK_Block))) + Right.is(tok::r_brace) && Right.isNot(BK_Block))) { return Style.SpaceInEmptyParentheses; + } if (Style.SpacesInConditionalStatement) { const FormatToken *LeftParen = nullptr; if (Left.is(tok::l_paren)) @@ -3142,8 +3261,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, else if (Right.is(tok::r_paren) && Right.MatchingParen) LeftParen = Right.MatchingParen; if (LeftParen && LeftParen->Previous && - isKeywordWithCondition(*LeftParen->Previous)) + isKeywordWithCondition(*LeftParen->Previous)) { return true; + } } // auto{x} auto(x) @@ -3152,18 +3272,21 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, // operator co_await(x) if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && Left.Previous && - Left.Previous->is(tok::kw_operator)) + Left.Previous->is(tok::kw_operator)) { return false; + } // co_await (x), co_yield (x), co_return (x) if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) && - !Right.isOneOf(tok::semi, tok::r_paren)) + !Right.isOneOf(tok::semi, tok::r_paren)) { return true; + } - if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) + if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) { return (Right.is(TT_CastRParen) || (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) ? Style.SpacesInCStyleCastParentheses : Style.SpacesInParentheses; + } if (Right.isOneOf(tok::semi, tok::comma)) return false; if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) { @@ -3179,8 +3302,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(tok::at) && Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, tok::numeric_constant, tok::l_paren, tok::l_brace, - tok::kw_true, tok::kw_false)) + tok::kw_true, tok::kw_false)) { return false; + } if (Left.is(tok::colon)) return !Left.is(TT_ObjCMethodExpr); if (Left.is(tok::coloncolon)) @@ -3196,9 +3320,10 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, } return false; } - if (Right.is(tok::ellipsis)) + if (Right.is(tok::ellipsis)) { return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous && Left.Previous->is(tok::kw_case)); + } if (Left.is(tok::l_square) && Right.is(tok::amp)) return Style.SpacesInSquareBrackets; if (Right.is(TT_PointerOrReference)) { @@ -3215,15 +3340,18 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, // after pointer qualifiers. if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After || Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) && - (Left.is(TT_AttributeParen) || Left.canBePointerOrReferenceQualifier())) + (Left.is(TT_AttributeParen) || + Left.canBePointerOrReferenceQualifier())) { return true; + } if (Left.Tok.isLiteral()) return true; // for (auto a = 0, b = 0; const auto & c : {1, 2, 3}) if (Left.isTypeOrIdentifier() && Right.Next && Right.Next->Next && - Right.Next->Next->is(TT_RangeBasedForLoopColon)) + Right.Next->Next->is(TT_RangeBasedForLoopColon)) { return getTokenPointerOrReferenceAlignment(Right) != FormatStyle::PAS_Left; + } return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) && (getTokenPointerOrReferenceAlignment(Right) != FormatStyle::PAS_Left || @@ -3234,15 +3362,17 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && (!Left.is(TT_PointerOrReference) || (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right && - !Line.IsMultiVariableDeclStmt))) + !Line.IsMultiVariableDeclStmt))) { return true; + } if (Left.is(TT_PointerOrReference)) { // Add a space if the next token is a pointer qualifier and the style // requires spaces before pointer qualifiers. if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before || Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) && - Right.canBePointerOrReferenceQualifier()) + Right.canBePointerOrReferenceQualifier()) { return true; + } // & 1 if (Right.Tok.isLiteral()) return true; @@ -3251,19 +3381,22 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return true; // foo() -> const Bar * override/final if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) && - !Right.is(TT_StartOfName)) + !Right.is(TT_StartOfName)) { return true; + } // & { if (Right.is(tok::l_brace) && Right.is(BK_Block)) return true; // for (auto a = 0, b = 0; const auto& c : {1, 2, 3}) if (Left.Previous && Left.Previous->isTypeOrIdentifier() && Right.Next && - Right.Next->is(TT_RangeBasedForLoopColon)) + Right.Next->is(TT_RangeBasedForLoopColon)) { return getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right; + } if (Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare, - tok::l_paren)) + tok::l_paren)) { return false; + } if (getTokenPointerOrReferenceAlignment(Left) == FormatStyle::PAS_Right) return false; // FIXME: Setting IsMultiVariableDeclStmt for the whole line is error-prone, @@ -3275,15 +3408,17 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Line.IsMultiVariableDeclStmt && (Left.NestingLevel == Line.First->NestingLevel || ((Left.NestingLevel == Line.First->NestingLevel + 1) && - startsWithInitStatement(Line)))) + startsWithInitStatement(Line)))) { return false; + } return Left.Previous && !Left.Previous->isOneOf( tok::l_paren, tok::coloncolon, tok::l_square); } // Ensure right pointer alignment with ellipsis e.g. int *...P if (Left.is(tok::ellipsis) && Left.Previous && - Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp)) + Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp)) { return Style.PointerAlignment != FormatStyle::PAS_Right; + } if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; @@ -3321,11 +3456,12 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Previous) { if (Previous->endsSequence(tok::kw_operator)) return Style.PointerAlignment != FormatStyle::PAS_Left; - if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) + if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) { return (Style.PointerAlignment != FormatStyle::PAS_Left) || (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After) || (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both); + } } } const auto SpaceRequiredForArrayInitializerLSquare = @@ -3337,13 +3473,14 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, LSquareTok.endsSequence(tok::l_square, tok::colon, TT_SelectorName)); }; - if (Left.is(tok::l_square)) + if (Left.is(tok::l_square)) { return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) && SpaceRequiredForArrayInitializerLSquare(Left, Style)) || (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare, TT_LambdaLSquare) && Style.SpacesInSquareBrackets && Right.isNot(tok::r_square)); - if (Right.is(tok::r_square)) + } + if (Right.is(tok::r_square)) { return Right.MatchingParen && ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) && SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen, @@ -3353,23 +3490,27 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, TT_StructuredBindingLSquare, TT_LambdaLSquare)) || Right.MatchingParen->is(TT_AttributeParen)); + } if (Right.is(tok::l_square) && !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, TT_DesignatedInitializerLSquare, TT_StructuredBindingLSquare, TT_AttributeSquare) && !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) && !(!Left.is(tok::r_square) && Style.SpaceBeforeSquareBrackets && - Right.is(TT_ArraySubscriptLSquare))) + Right.is(TT_ArraySubscriptLSquare))) { return false; + } if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) return !Left.Children.empty(); // No spaces in "{}". if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) || (Right.is(tok::r_brace) && Right.MatchingParen && - Right.MatchingParen->isNot(BK_Block))) + Right.MatchingParen->isNot(BK_Block))) { return Style.Cpp11BracedListStyle ? Style.SpacesInParentheses : true; - if (Left.is(TT_BlockComment)) + } + if (Left.is(TT_BlockComment)) { // No whitespace in x(/*foo=*/1), except for JavaScript. return Style.isJavaScript() || !Left.TokenText.endswith("=*/"); + } // Space between template and attribute. // e.g. template [[nodiscard]] ... @@ -3379,30 +3520,37 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Right.is(tok::l_paren)) { if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen)) return spaceRequiredBeforeParens(Right); - if (Left.isOneOf(TT_RequiresClause, TT_RequiresClauseInARequiresExpression)) + if (Left.isOneOf(TT_RequiresClause, + TT_RequiresClauseInARequiresExpression)) { return Style.SpaceBeforeParensOptions.AfterRequiresInClause || spaceRequiredBeforeParens(Right); - if (Left.is(TT_RequiresExpression)) + } + if (Left.is(TT_RequiresExpression)) { return Style.SpaceBeforeParensOptions.AfterRequiresInExpression || spaceRequiredBeforeParens(Right); + } if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) || - (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) + (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) { return true; - if (Left.is(TT_ForEachMacro)) + } + if (Left.is(TT_ForEachMacro)) { return Style.SpaceBeforeParensOptions.AfterForeachMacros || spaceRequiredBeforeParens(Right); - if (Left.is(TT_IfMacro)) + } + if (Left.is(TT_IfMacro)) { return Style.SpaceBeforeParensOptions.AfterIfMacros || spaceRequiredBeforeParens(Right); + } if (Line.Type == LT_ObjCDecl) return true; if (Left.is(tok::semi)) return true; if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch, tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) || - Left.isIf(Line.Type != LT_PreprocessorDirective)) + Left.isIf(Line.Type != LT_PreprocessorDirective)) { return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); + } // TODO add Operator overloading specific Options to // SpaceBeforeParensOptions @@ -3410,50 +3558,58 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return spaceRequiredBeforeParens(Right); // Function declaration or definition if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) { - if (Line.mightBeFunctionDefinition()) + if (Line.mightBeFunctionDefinition()) { return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName || spaceRequiredBeforeParens(Right); - else + } else { return Style.SpaceBeforeParensOptions.AfterFunctionDeclarationName || spaceRequiredBeforeParens(Right); + } } // Lambda if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) && - Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) + Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) { return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName || spaceRequiredBeforeParens(Right); + } if (!Left.Previous || Left.Previous->isNot(tok::period)) { - if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) + if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) { return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); - if (Left.isOneOf(tok::kw_new, tok::kw_delete)) + } + if (Left.isOneOf(tok::kw_new, tok::kw_delete)) { return ((!Line.MightBeFunctionDecl || !Left.Previous) && Style.SpaceBeforeParens != FormatStyle::SBPO_Never) || spaceRequiredBeforeParens(Right); + } if (Left.is(tok::r_square) && Left.MatchingParen && Left.MatchingParen->Previous && - Left.MatchingParen->Previous->is(tok::kw_delete)) + Left.MatchingParen->Previous->is(tok::kw_delete)) { return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) || spaceRequiredBeforeParens(Right); + } } // Handle builtins like identifiers. if (Line.Type != LT_PreprocessorDirective && - (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) + (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) { return spaceRequiredBeforeParens(Right); + } return false; } if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) return false; - if (Right.is(TT_UnaryOperator)) + if (Right.is(TT_UnaryOperator)) { return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr)); + } if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, tok::r_paren) || Left.isSimpleTypeSpecifier()) && Right.is(tok::l_brace) && Right.getNextNonComment() && - Right.isNot(BK_Block)) + Right.isNot(BK_Block)) { return false; + } if (Left.is(tok::period) || Right.is(tok::period)) return false; // u#str, U#str, L#str, u8#str @@ -3462,33 +3618,38 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, (Left.TokenText == "L" || Left.TokenText == "u" || Left.TokenText == "U" || Left.TokenText == "u8" || Left.TokenText == "LR" || Left.TokenText == "uR" || - Left.TokenText == "UR" || Left.TokenText == "u8R")) + Left.TokenText == "UR" || Left.TokenText == "u8R")) { return false; + } if (Left.is(TT_TemplateCloser) && Left.MatchingParen && Left.MatchingParen->Previous && (Left.MatchingParen->Previous->is(tok::period) || - Left.MatchingParen->Previous->is(tok::coloncolon))) + Left.MatchingParen->Previous->is(tok::coloncolon))) { // Java call to generic function with explicit type: // A.>>DoSomething(); // A::>>DoSomething(); // With a Java 8 method reference. return false; + } if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square)) return false; - if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) + if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) { // Objective-C dictionary literal -> no space after opening brace. return false; + } if (Right.is(tok::r_brace) && Right.MatchingParen && - Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) + Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) { // Objective-C dictionary literal -> no space before closing brace. return false; + } if (Right.getType() == TT_TrailingAnnotation && Right.isOneOf(tok::amp, tok::ampamp) && Left.isOneOf(tok::kw_const, tok::kw_volatile) && - (!Right.Next || Right.Next->is(tok::semi))) + (!Right.Next || Right.Next->is(tok::semi))) { // Match const and volatile ref-qualifiers without any additional // qualifiers such as // void Fn() const &; return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left; + } return true; } @@ -3517,18 +3678,21 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return true; // Space between `module :` and `import :`. if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) && - Right.is(TT_ModulePartitionColon)) + Right.is(TT_ModulePartitionColon)) { return true; + } // No space between import foo:bar but keep a space between import :bar; if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon)) return false; // No space between :bar; if (Left.is(TT_ModulePartitionColon) && - Right.isOneOf(tok::identifier, tok::kw_private)) + Right.isOneOf(tok::identifier, tok::kw_private)) { return false; + } if (Left.is(tok::ellipsis) && Right.is(tok::identifier) && - Line.First->is(Keywords.kw_import)) + Line.First->is(Keywords.kw_import)) { return false; + } // Space in __attribute__((attr)) ::type. if (Left.is(TT_AttributeParen) && Right.is(tok::coloncolon)) return true; @@ -3536,20 +3700,24 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(tok::kw_operator)) return Right.is(tok::coloncolon); if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) && - !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) + !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) { return true; + } if (Left.is(tok::less) && Left.is(TT_OverloadedOperator) && - Right.is(TT_TemplateOpener)) + Right.is(TT_TemplateOpener)) { return true; + } } else if (Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { if (Right.is(tok::period) && Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, - Keywords.kw_repeated, Keywords.kw_extend)) + Keywords.kw_repeated, Keywords.kw_extend)) { return true; + } if (Right.is(tok::l_paren) && - Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) + Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) { return true; + } if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName)) return true; // Slashes occur in text protocol extension syntax: [type/type] { ... }. @@ -3557,8 +3725,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return false; if (Left.MatchingParen && Left.MatchingParen->is(TT_ProtoExtensionLSquare) && - Right.isOneOf(tok::l_brace, tok::less)) + Right.isOneOf(tok::l_brace, tok::less)) { return !Style.Cpp11BracedListStyle; + } // A percent is probably part of a formatting specification, such as %lld. if (Left.is(tok::percent)) return false; @@ -3626,11 +3795,13 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return true; // space between keywords and paren e.g. "using (" - if (Right.is(tok::l_paren)) + if (Right.is(tok::l_paren)) { if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when, - Keywords.kw_lock)) + Keywords.kw_lock)) { return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); + } + } // space between method modifier and opening parenthesis of a tuple return // type @@ -3639,15 +3810,17 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, Keywords.kw_internal, Keywords.kw_abstract, Keywords.kw_sealed, Keywords.kw_override, Keywords.kw_async, Keywords.kw_unsafe) && - Right.is(tok::l_paren)) + Right.is(tok::l_paren)) { return true; + } } else if (Style.isJavaScript()) { if (Left.is(TT_FatArrow)) return true; // for await ( ... if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous && - Left.Previous->is(tok::kw_for)) + Left.Previous->is(tok::kw_for)) { return true; + } if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) && Right.MatchingParen) { const FormatToken *Next = Right.MatchingParen->getNextNonComment(); @@ -3657,21 +3830,25 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return true; } if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || - (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) + (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) { return false; + } // In tagged template literals ("html`bar baz`"), there is no space between // the tag identifier and the template string. if (Keywords.IsJavaScriptIdentifier(Left, /* AcceptIdentifierName= */ false) && - Right.is(TT_TemplateString)) + Right.is(TT_TemplateString)) { return false; + } if (Right.is(tok::star) && - Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) + Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) { return false; + } if (Right.isOneOf(tok::l_brace, tok::l_square) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield, - Keywords.kw_extends, Keywords.kw_implements)) + Keywords.kw_extends, Keywords.kw_implements)) { return true; + } if (Right.is(tok::l_paren)) { // JS methods can use some keywords as names (e.g. `delete()`). if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo()) @@ -3679,12 +3856,14 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // Valid JS method names can include keywords, e.g. `foo.delete()` or // `bar.instanceof()`. Recognize call positions by preceding period. if (Left.Previous && Left.Previous->is(tok::period) && - Left.Tok.getIdentifierInfo()) + Left.Tok.getIdentifierInfo()) { return false; + } // Additional unary JavaScript operators that need a space after. if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof, - tok::kw_void)) + tok::kw_void)) { return true; + } } // `foo as const;` casts into a const type. if (Left.endsSequence(tok::kw_const, Keywords.kw_as)) @@ -3697,17 +3876,21 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, (Left.is(Keywords.kw_of) && Left.Previous && (Left.Previous->is(tok::identifier) || Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) && - (!Left.Previous || !Left.Previous->is(tok::period))) + (!Left.Previous || !Left.Previous->is(tok::period))) { return true; + } if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && - Left.Previous->is(tok::period) && Right.is(tok::l_paren)) + Left.Previous->is(tok::period) && Right.is(tok::l_paren)) { return false; + } if (Left.is(Keywords.kw_as) && - Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) + Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) { return true; + } if (Left.is(tok::kw_default) && Left.Previous && - Left.Previous->is(tok::kw_export)) + Left.Previous->is(tok::kw_export)) { return true; + } if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace)) return true; if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion)) @@ -3715,53 +3898,61 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator)) return false; if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) && - Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) + Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) { return false; + } if (Left.is(tok::ellipsis)) return false; if (Left.is(TT_TemplateCloser) && !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square, - Keywords.kw_implements, Keywords.kw_extends)) + Keywords.kw_implements, Keywords.kw_extends)) { // Type assertions ('expr') are not followed by whitespace. Other // locations that should have whitespace following are identified by the // above set of follower tokens. return false; + } if (Right.is(TT_NonNullAssertion)) return false; if (Left.is(TT_NonNullAssertion) && - Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) + Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) { return true; // "x! as string", "x! in y" + } } else if (Style.Language == FormatStyle::LK_Java) { if (Left.is(tok::r_square) && Right.is(tok::l_brace)) return true; - if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) + if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) { return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); + } if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected) || Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract, Keywords.kw_native)) && - Right.is(TT_TemplateOpener)) + Right.is(TT_TemplateOpener)) { return true; + } } if (Left.is(TT_ImplicitStringLiteral)) return Right.hasWhitespaceBefore(); if (Line.Type == LT_ObjCMethodDecl) { if (Left.is(TT_ObjCMethodSpecifier)) return true; - if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) + if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) { // Don't space between ')' and or ')' and 'new'. 'new' is not a // keyword in Objective-C, and '+ (instancetype)new;' is a standard class // method declaration. return false; + } } if (Line.Type == LT_ObjCProperty && - (Right.is(tok::equal) || Left.is(tok::equal))) + (Right.is(tok::equal) || Left.is(tok::equal))) { return false; + } if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) || - Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) + Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) { return true; + } if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen)) return true; if (Right.is(tok::comma)) @@ -3773,11 +3964,13 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon) return false; if (Right.is(TT_RangeBasedForLoopColon) && - !Style.SpaceBeforeRangeBasedForLoopColon) + !Style.SpaceBeforeRangeBasedForLoopColon) { return false; - if (Left.is(TT_BitFieldColon)) + } + if (Left.is(TT_BitFieldColon)) { return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both || Style.BitFieldColonSpacing == FormatStyle::BFCS_After; + } if (Right.is(tok::colon)) { if (Line.First->isOneOf(tok::kw_default, tok::kw_case)) return Style.SpaceBeforeCaseColon; @@ -3796,17 +3989,19 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return false; if (Right.is(TT_CSharpNamedArgumentColon)) return false; - if (Right.is(TT_BitFieldColon)) + if (Right.is(TT_BitFieldColon)) { return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both || Style.BitFieldColonSpacing == FormatStyle::BFCS_Before; + } return true; } // Do not merge "- -" into "--". if ((Left.isOneOf(tok::minus, tok::minusminus) && Right.isOneOf(tok::minus, tok::minusminus)) || (Left.isOneOf(tok::plus, tok::plusplus) && - Right.isOneOf(tok::plus, tok::plusplus))) + Right.isOneOf(tok::plus, tok::plusplus))) { return true; + } if (Left.is(TT_UnaryOperator)) { if (!Right.is(tok::l_paren)) { // The alternative operators for ~ and ! are "compl" and "not". @@ -3827,9 +4022,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // If the next token is a binary operator or a selector name, we have // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly. - if (Left.is(TT_CastRParen)) + if (Left.is(TT_CastRParen)) { return Style.SpaceAfterCStyleCast || Right.isOneOf(TT_BinaryOperator, TT_SelectorName); + } auto ShouldAddSpacesInAngles = [this, &Right]() { if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always) @@ -3841,29 +4037,34 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(tok::greater) && Right.is(tok::greater)) { if (Style.Language == FormatStyle::LK_TextProto || - (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) + (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) { return !Style.Cpp11BracedListStyle; + } return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && ((Style.Standard < FormatStyle::LS_Cpp11) || ShouldAddSpacesInAngles()); } if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) || Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || - (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) + (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) { return false; + } if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) && - Right.getPrecedence() == prec::Assignment) + Right.getPrecedence() == prec::Assignment) { return false; + } if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) && - (Left.is(tok::identifier) || Left.is(tok::kw_this))) + (Left.is(tok::identifier) || Left.is(tok::kw_this))) { return false; - if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) + } + if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) { // Generally don't remove existing spaces between an identifier and "::". // The identifier might actually be a macro name such as ALWAYS_INLINE. If // this turns out to be too lenient, add analysis of the identifier itself. return Right.hasWhitespaceBefore(); + } if (Right.is(tok::coloncolon) && - !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) + !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) { // Put a space between < and :: in vector< ::std::string > return (Left.is(TT_TemplateOpener) && ((Style.Standard < FormatStyle::LS_Cpp11) || @@ -3872,26 +4073,33 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, tok::kw___super, TT_TemplateOpener, TT_TemplateCloser)) || (Left.is(tok::l_paren) && Style.SpacesInParentheses); + } if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return ShouldAddSpacesInAngles(); // Space before TT_StructuredBindingLSquare. - if (Right.is(TT_StructuredBindingLSquare)) + if (Right.is(TT_StructuredBindingLSquare)) { return !Left.isOneOf(tok::amp, tok::ampamp) || getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right; + } // Space before & or && following a TT_StructuredBindingLSquare. if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) && - Right.isOneOf(tok::amp, tok::ampamp)) + Right.isOneOf(tok::amp, tok::ampamp)) { return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left; + } if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && - !Right.is(tok::r_paren))) + !Right.is(tok::r_paren))) { return true; + } if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) && - Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen)) + Left.MatchingParen && + Left.MatchingParen->is(TT_OverloadedOperatorLParen)) { return false; + } if (Right.is(tok::less) && Left.isNot(tok::l_paren) && - Line.startsWith(tok::hash)) + Line.startsWith(tok::hash)) { return true; + } if (Right.is(TT_TrailingUnaryOperator)) return false; if (Left.is(TT_RegexLiteral)) @@ -3940,51 +4148,59 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Style.isCSharp()) { if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) && - Style.BraceWrapping.AfterFunction) + Style.BraceWrapping.AfterFunction) { return true; + } if (Right.is(TT_CSharpNamedArgumentColon) || - Left.is(TT_CSharpNamedArgumentColon)) + Left.is(TT_CSharpNamedArgumentColon)) { return false; + } if (Right.is(TT_CSharpGenericTypeConstraint)) return true; if (Right.Next && Right.Next->is(TT_FatArrow) && (Right.is(tok::numeric_constant) || - (Right.is(tok::identifier) && Right.TokenText == "_"))) + (Right.is(tok::identifier) && Right.TokenText == "_"))) { return true; + } // Break after C# [...] and before public/protected/private/internal. if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) && (Right.isAccessSpecifier(/*ColonRequired=*/false) || - Right.is(Keywords.kw_internal))) + Right.is(Keywords.kw_internal))) { return true; + } // Break between ] and [ but only when there are really 2 attributes. if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) && - Left.is(tok::r_square) && Right.is(tok::l_square)) + Left.is(tok::r_square) && Right.is(tok::l_square)) { return true; + } } else if (Style.isJavaScript()) { // FIXME: This might apply to other languages and token kinds. if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous && - Left.Previous->is(tok::string_literal)) + Left.Previous->is(tok::string_literal)) { return true; + } if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 && Left.Previous && Left.Previous->is(tok::equal) && Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export, tok::kw_const) && // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match // above. - !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) + !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) { // Object literals on the top level of a file are treated as "enum-style". // Each key/value pair is put on a separate line, instead of bin-packing. return true; + } if (Left.is(tok::l_brace) && Line.Level == 0 && (Line.startsWith(tok::kw_enum) || Line.startsWith(tok::kw_const, tok::kw_enum) || Line.startsWith(tok::kw_export, tok::kw_enum) || - Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) + Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) { // JavaScript top-level enum key/value pairs are put on separate lines // instead of bin-packing. return true; + } if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous && Left.Previous->is(TT_FatArrow)) { // JS arrow function (=> {...}). @@ -4005,17 +4221,19 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, } if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && - !Left.Children.empty()) + !Left.Children.empty()) { // Support AllowShortFunctionsOnASingleLine for JavaScript. return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty || (Left.NestingLevel == 0 && Line.Level == 0 && Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly); + } } else if (Style.Language == FormatStyle::LK_Java) { if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && - Right.Next->is(tok::string_literal)) + Right.Next->is(tok::string_literal)) { return true; + } } else if (Style.Language == FormatStyle::LK_Cpp || Style.Language == FormatStyle::LK_ObjC || Style.Language == FormatStyle::LK_Proto || @@ -4036,8 +4254,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, // [ // ] if (Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) && - !Right.is(tok::r_square)) + !Right.is(tok::r_square)) { return true; + } // Always break after successive entries. // 1, // 2 @@ -4053,28 +4272,33 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, const FormatToken *BeforeClosingBrace = nullptr; if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || (Style.isJavaScript() && Left.is(tok::l_paren))) && - Left.isNot(BK_Block) && Left.MatchingParen) + Left.isNot(BK_Block) && Left.MatchingParen) { BeforeClosingBrace = Left.MatchingParen->Previous; - else if (Right.MatchingParen && - (Right.MatchingParen->isOneOf(tok::l_brace, - TT_ArrayInitializerLSquare) || - (Style.isJavaScript() && Right.MatchingParen->is(tok::l_paren)))) + } else if (Right.MatchingParen && + (Right.MatchingParen->isOneOf(tok::l_brace, + TT_ArrayInitializerLSquare) || + (Style.isJavaScript() && + Right.MatchingParen->is(tok::l_paren)))) { BeforeClosingBrace = &Left; + } if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || - BeforeClosingBrace->isTrailingComment())) + BeforeClosingBrace->isTrailingComment())) { return true; + } } - if (Right.is(tok::comment)) + if (Right.is(tok::comment)) { return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) && (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); + } if (Left.isTrailingComment()) return true; if (Left.IsUnterminatedLiteral) return true; if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) && - Right.Next->is(tok::string_literal)) + Right.Next->is(tok::string_literal)) { return true; + } if (Right.is(TT_RequiresClause)) { switch (Style.RequiresClausePosition) { case FormatStyle::RCPS_OwnLine: @@ -4105,29 +4329,36 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, } if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) { if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon && - (Left.is(TT_CtorInitializerComma) || Right.is(TT_CtorInitializerColon))) + (Left.is(TT_CtorInitializerComma) || + Right.is(TT_CtorInitializerColon))) { return true; + } if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon && - Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) + Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) { return true; + } } if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine && Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && - Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) + Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) { return true; + } // Break only if we have multiple inheritance. if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma && - Right.is(TT_InheritanceComma)) + Right.is(TT_InheritanceComma)) { return true; + } if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma && - Left.is(TT_InheritanceComma)) + Left.is(TT_InheritanceComma)) { return true; - if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) + } + if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) { // Multiline raw string literals are special wrt. line breaks. The author // has made a deliberate choice and might have aligned the contents of the // string literal accordingly. Thus, we try keep existing line breaks. return Right.IsMultiline && Right.NewlinesBefore > 0; + } if ((Left.is(tok::l_brace) || (Left.is(tok::less) && Left.Previous && Left.Previous->is(tok::equal))) && Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) { @@ -4147,28 +4378,32 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Style.BraceWrapping.AfterEnum) { if (Line.startsWith(tok::kw_enum) || - Line.startsWith(tok::kw_typedef, tok::kw_enum)) + Line.startsWith(tok::kw_typedef, tok::kw_enum)) { return true; + } // Ensure BraceWrapping for `public enum A {`. if (AccessSpecifier && FirstNonComment->Next && - FirstNonComment->Next->is(tok::kw_enum)) + FirstNonComment->Next->is(tok::kw_enum)) { return true; + } } // Ensure BraceWrapping for `public interface A {`. if (Style.BraceWrapping.AfterClass && ((AccessSpecifier && FirstNonComment->Next && FirstNonComment->Next->is(Keywords.kw_interface)) || - Line.startsWith(Keywords.kw_interface))) + Line.startsWith(Keywords.kw_interface))) { return true; + } return (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) || (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct); } if (Left.is(TT_ObjCBlockLBrace) && - Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) + Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) { return true; + } // Ensure wrapping after __attribute__((XX)) and @interface etc. if (Left.is(TT_AttributeParen) && Right.is(TT_ObjCDecl)) @@ -4176,26 +4411,30 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Left.is(TT_LambdaLBrace)) { if (IsFunctionArgument(Left) && - Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) + Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) { return false; + } if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None || Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline || (!Left.Children.empty() && - Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) + Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) { return true; + } } if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) && - Left.isOneOf(tok::star, tok::amp, tok::ampamp, TT_TemplateCloser)) + Left.isOneOf(tok::star, tok::amp, tok::ampamp, TT_TemplateCloser)) { return true; + } // Put multiple Java annotation on a new line. if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && Left.is(TT_LeadingJavaAnnotation) && Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && - (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) + (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) { return true; + } if (Right.is(TT_ProtoExtensionLSquare)) return true; @@ -4318,8 +4557,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // Language-specific stuff. if (Style.isCSharp()) { if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) || - Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) + Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) { return false; + } // Only break after commas for generic type constraints. if (Line.First->is(TT_CSharpGenericTypeConstraint)) return Left.is(TT_CSharpGenericTypeConstraintComma); @@ -4328,11 +4568,13 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return false; } else if (Style.Language == FormatStyle::LK_Java) { if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, - Keywords.kw_implements)) + Keywords.kw_implements)) { return false; + } if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends, - Keywords.kw_implements)) + Keywords.kw_implements)) { return true; + } } else if (Style.isJavaScript()) { const FormatToken *NonComment = Right.getPreviousNonComment(); if (NonComment && @@ -4342,16 +4584,19 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected, Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set, Keywords.kw_async, - Keywords.kw_await)) + Keywords.kw_await)) { return false; // Otherwise automatic semicolon insertion would trigger. + } if (Right.NestingLevel == 0 && (Left.Tok.getIdentifierInfo() || Left.isOneOf(tok::r_square, tok::r_paren)) && - Right.isOneOf(tok::l_square, tok::l_paren)) + Right.isOneOf(tok::l_square, tok::l_paren)) { return false; // Otherwise automatic semicolon insertion would trigger. + } if (NonComment && NonComment->is(tok::identifier) && - NonComment->TokenText == "asserts") + NonComment->TokenText == "asserts") { return false; + } if (Left.is(TT_FatArrow) && Right.is(tok::l_brace)) return false; if (Left.is(TT_JsTypeColon)) @@ -4394,13 +4639,15 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, Right.isOneOf(Keywords.kw_module, tok::kw_namespace, Keywords.kw_function, tok::kw_class, tok::kw_enum, Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var, - Keywords.kw_let, tok::kw_const)) + Keywords.kw_let, tok::kw_const)) { // See grammar for 'declare' statements at: // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10 return false; + } if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && - Right.isOneOf(tok::identifier, tok::string_literal)) + Right.isOneOf(tok::identifier, tok::string_literal)) { return false; // must not break in "module foo { ...}" + } if (Right.is(TT_TemplateString) && Right.closesScope()) return false; // Don't split tagged template literal so there is a break between the tag @@ -4417,17 +4664,19 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return false; if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) return !Right.is(tok::l_paren); - if (Right.is(TT_PointerOrReference)) + if (Right.is(TT_PointerOrReference)) { return Line.IsMultiVariableDeclStmt || (getTokenPointerOrReferenceAlignment(Right) == FormatStyle::PAS_Right && (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName))); + } if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || - Right.is(tok::kw_operator)) + Right.is(tok::kw_operator)) { return true; + } if (Left.is(TT_PointerOrReference)) return false; - if (Right.isTrailingComment()) + if (Right.isTrailingComment()) { // We rely on MustBreakBefore being set correctly here as we should not // change the "binding" behavior of a comment. // The first comment in a braced lists is always interpreted as belonging to @@ -4436,6 +4685,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return Left.is(BK_BracedInit) || (Left.is(TT_CtorInitializerColon) && Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon); + } if (Left.is(tok::question) && Right.is(tok::colon)) return false; if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) @@ -4447,12 +4697,14 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(TT_InheritanceColon)) return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon; if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) && - Left.isNot(TT_SelectorName)) + Left.isNot(TT_SelectorName)) { return true; + } if (Right.is(tok::colon) && - !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) + !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) { return false; + } if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { if (Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { @@ -4484,17 +4736,20 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // repeated: [ ... ] if (((Right.is(tok::l_brace) || Right.is(tok::less)) && Right.is(TT_DictLiteral)) || - Right.is(TT_ArrayInitializerLSquare)) + Right.is(TT_ArrayInitializerLSquare)) { return false; + } } return true; } if (Right.is(tok::r_square) && Right.MatchingParen && - Right.MatchingParen->is(TT_ProtoExtensionLSquare)) + Right.MatchingParen->is(TT_ProtoExtensionLSquare)) { return false; + } if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next && - Right.Next->is(TT_ObjCMethodExpr))) + Right.Next->is(TT_ObjCMethodExpr))) { return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls. + } if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) return true; if (Right.is(tok::kw_concept)) @@ -4506,8 +4761,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Left.ClosesRequiresClause) return true; if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen, - TT_OverloadedOperator)) + TT_OverloadedOperator)) { return false; + } if (Left.is(TT_RangeBasedForLoopColon)) return true; if (Right.is(TT_RangeBasedForLoopColon)) @@ -4515,36 +4771,44 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener)) return true; if ((Left.is(tok::greater) && Right.is(tok::greater)) || - (Left.is(tok::less) && Right.is(tok::less))) + (Left.is(tok::less) && Right.is(tok::less))) { return false; + } if (Right.is(TT_BinaryOperator) && Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All || - Right.getPrecedence() != prec::Assignment)) + Right.getPrecedence() != prec::Assignment)) { return true; + } if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) || - Left.is(tok::kw_operator)) + Left.is(tok::kw_operator)) { return false; + } if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) && - Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) + Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) { return false; + } if (Left.is(tok::equal) && Right.is(tok::l_brace) && - !Style.Cpp11BracedListStyle) + !Style.Cpp11BracedListStyle) { return false; + } if (Left.is(tok::l_paren) && - Left.isOneOf(TT_AttributeParen, TT_TypeDeclarationParen)) + Left.isOneOf(TT_AttributeParen, TT_TypeDeclarationParen)) { return false; + } if (Left.is(tok::l_paren) && Left.Previous && - (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) + (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) { return false; + } if (Right.is(TT_ImplicitStringLiteral)) return false; if (Right.is(TT_TemplateCloser)) return false; if (Right.is(tok::r_square) && Right.MatchingParen && - Right.MatchingParen->is(TT_LambdaLSquare)) + Right.MatchingParen->is(TT_LambdaLSquare)) { return false; + } // We only break before r_brace if there was a corresponding break before // the l_brace, which is tracked by BreakBeforeClosingBrace. @@ -4554,21 +4818,24 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // We only break before r_paren if we're in a block indented context. if (Right.is(tok::r_paren)) { if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent || - !Right.MatchingParen) + !Right.MatchingParen) { return false; + } const FormatToken *Previous = Right.MatchingParen->Previous; return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf())); } // Allow breaking after a trailing annotation, e.g. after a method // declaration. - if (Left.is(TT_TrailingAnnotation)) + if (Left.is(TT_TrailingAnnotation)) { return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, tok::less, tok::coloncolon); + } if (Right.is(tok::kw___attribute) || - (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))) + (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))) { return !Left.is(TT_AttributeSquare); + } if (Left.is(tok::identifier) && Right.is(tok::string_literal)) return true; @@ -4581,17 +4848,21 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(TT_CtorInitializerColon)) return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon; if (Left.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) { return false; + } if (Right.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) { return true; + } if (Left.is(TT_InheritanceComma) && - Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) + Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) { return false; + } if (Right.is(TT_InheritanceComma) && - Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) + Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) { return true; + } if (Left.is(TT_ArrayInitializerLSquare)) return true; if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const)) @@ -4600,11 +4871,13 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, !Left.isOneOf(tok::arrowstar, tok::lessless) && Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All && (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || - Left.getPrecedence() == prec::Assignment)) + Left.getPrecedence() == prec::Assignment)) { return true; + } if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) || - (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) + (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) { return false; + } auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine; if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) { diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index f92c36b449ae5f..b8a535b8d527f5 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -92,8 +92,9 @@ class LevelIndentTracker { if (static_cast(LevelIndent) - Offset >= 0) LevelIndent -= Offset; if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) && - !Line.InPPDirective) + !Line.InPPDirective) { IndentForLevel[Line.Level] = LevelIndent; + } } private: @@ -103,25 +104,30 @@ class LevelIndentTracker { /// characters to the left from their level. int getIndentOffset(const FormatToken &RootToken) { if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || - Style.isCSharp()) + Style.isCSharp()) { return 0; + } auto IsAccessModifier = [this, &RootToken]() { - if (RootToken.isAccessSpecifier(Style.isCpp())) + if (RootToken.isAccessSpecifier(Style.isCpp())) { return true; - else if (RootToken.isObjCAccessSpecifier()) + } else if (RootToken.isObjCAccessSpecifier()) { return true; + } // Handle Qt signals. else if ((RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) && - RootToken.Next && RootToken.Next->is(tok::colon))) + RootToken.Next && RootToken.Next->is(tok::colon))) { return true; - else if (RootToken.Next && - RootToken.Next->isOneOf(Keywords.kw_slots, Keywords.kw_qslots) && - RootToken.Next->Next && RootToken.Next->Next->is(tok::colon)) + } else if (RootToken.Next && + RootToken.Next->isOneOf(Keywords.kw_slots, + Keywords.kw_qslots) && + RootToken.Next->Next && RootToken.Next->Next->is(tok::colon)) { return true; + } // Handle malformed access specifier e.g. 'private' without trailing ':'. - else if (!RootToken.Next && RootToken.isAccessSpecifier(false)) + else if (!RootToken.Next && RootToken.isAccessSpecifier(false)) { return true; + } return false; }; @@ -205,12 +211,13 @@ class LineJoiner { const AnnotatedLine *Current = *Next; IndentTracker.nextLine(*Current); unsigned MergedLines = tryFitMultipleLinesInOne(IndentTracker, Next, End); - if (MergedLines > 0 && Style.ColumnLimit == 0) + if (MergedLines > 0 && Style.ColumnLimit == 0) { // Disallow line merging if there is a break at the start of one of the // input lines. for (unsigned i = 0; i < MergedLines; ++i) if (Next[i + 1]->First->NewlinesBefore > 0) MergedLines = 0; + } if (!DryRun) for (unsigned i = 0; i < MergedLines; ++i) join(*Next[0], *Next[i + 1]); @@ -237,8 +244,9 @@ class LineJoiner { if (NextLine.Type == LT_Invalid || NextLine.First->MustBreakBefore) return 0; if (TheLine->InPPDirective && - (!NextLine.InPPDirective || NextLine.First->HasUnescapedNewline)) + (!NextLine.InPPDirective || NextLine.First->HasUnescapedNewline)) { return 0; + } if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) return 0; @@ -254,8 +262,9 @@ class LineJoiner { if (TheLine->Last->is(TT_FunctionLBrace) && TheLine->First == TheLine->Last && !Style.BraceWrapping.SplitEmptyFunction && - NextLine.First->is(tok::r_brace)) + NextLine.First->is(tok::r_brace)) { return tryMergeSimpleBlock(I, E, Limit); + } const auto *PreviousLine = I != AnnotatedLines.begin() ? I[-1] : nullptr; // Handle empty record blocks where the brace has already been wrapped. @@ -267,22 +276,25 @@ class LineJoiner { if (Tok && Tok->is(tok::comment)) Tok = Tok->getNextNonComment(); - if (Tok && Tok->getNamespaceToken()) + if (Tok && Tok->getNamespaceToken()) { return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock ? tryMergeSimpleBlock(I, E, Limit) : 0; + } if (Tok && Tok->is(tok::kw_typedef)) Tok = Tok->getNextNonComment(); if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union, - tok::kw_extern, Keywords.kw_interface)) + tok::kw_extern, Keywords.kw_interface)) { return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock ? tryMergeSimpleBlock(I, E, Limit) : 0; + } if (Tok && Tok->is(tok::kw_template) && - Style.BraceWrapping.SplitEmptyRecord && EmptyBlock) + Style.BraceWrapping.SplitEmptyRecord && EmptyBlock) { return 0; + } } auto ShouldMergeShortFunctions = [this, &I, &NextLine, PreviousLine, @@ -290,8 +302,9 @@ class LineJoiner { if (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All) return true; if (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && - NextLine.First->is(tok::r_brace)) + NextLine.First->is(tok::r_brace)) { return true; + } if (Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly) { @@ -439,8 +452,9 @@ class LineJoiner { tok::ObjCKeywordKind kwId = PreviousLine->First->Next->Tok.getObjCKeywordID(); if (kwId == tok::objc_autoreleasepool || - kwId == tok::objc_synchronized) + kwId == tok::objc_synchronized) { return 0; + } } break; @@ -469,8 +483,9 @@ class LineJoiner { const FormatToken *PreviousPrevious = Previous->getPreviousNonComment(); if (PreviousPrevious && - PreviousPrevious->isOneOf(tok::kw_class, tok::kw_struct)) + PreviousPrevious->isOneOf(tok::kw_class, tok::kw_struct)) { return 0; + } } } } @@ -552,8 +567,9 @@ class LineJoiner { : 0; } if (TheLine->InPPDirective && - (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) + (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) { return tryMergeSimplePPDirective(I, E, Limit); + } return 0; } @@ -578,16 +594,19 @@ class LineJoiner { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always && I[1]->First->is(tok::l_brace) && - Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) + Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) { return 0; + } if (I[1]->InPPDirective != (*I)->InPPDirective || - (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) + (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) { return 0; + } Limit = limitConsideringMacros(I + 1, E, Limit); AnnotatedLine &Line = **I; if (!Line.First->is(tok::kw_do) && !Line.First->is(tok::kw_else) && - !Line.Last->is(tok::kw_else) && Line.Last->isNot(tok::r_paren)) + !Line.Last->is(tok::kw_else) && Line.Last->isNot(tok::r_paren)) { return 0; + } // Only merge `do while` if `do` is the only statement on the line. if (Line.First->is(tok::kw_do) && !Line.Last->is(tok::kw_do)) return 0; @@ -595,14 +614,16 @@ class LineJoiner { return 0; // Don't merge with loops, ifs, a single semicolon or a line comment. if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while, - TT_ForEachMacro, TT_LineComment)) + TT_ForEachMacro, TT_LineComment)) { return 0; + } // Only inline simple if's (no nested if or else), unless specified if (Style.AllowShortIfStatementsOnASingleLine == FormatStyle::SIS_WithoutElse) { if (I + 2 != E && Line.startsWith(tok::kw_if) && - I[2]->First->is(tok::kw_else)) + I[2]->First->is(tok::kw_else)) { return 0; + } } return 1; } @@ -612,8 +633,9 @@ class LineJoiner { SmallVectorImpl::const_iterator E, unsigned Limit) { if (Limit == 0 || I + 1 == E || - I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) + I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) { return 0; + } if (I[0]->Last->is(tok::l_brace) || I[1]->First->is(tok::l_brace)) return 0; unsigned NumStmts = 0; @@ -631,8 +653,9 @@ class LineJoiner { break; if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch, tok::kw_while) || - EndsWithComment) + EndsWithComment) { return 0; + } if (Line->First->is(tok::comment)) { if (Level != Line->Level) return 0; @@ -671,14 +694,16 @@ class LineJoiner { // FIXME: If an option to allow short exception handling clauses on a single // line is added, change this to not return for @try and friends. if (Style.Language != FormatStyle::LK_Java && - Line.First->isOneOf(tok::at, tok::minus, tok::plus)) + Line.First->isOneOf(tok::at, tok::minus, tok::plus)) { return 0; + } // Check that the current line allows merging. This depends on whether we // are in a control flow statements as well as several style flags. if (Line.First->is(tok::kw_case) || - (Line.First->Next && Line.First->Next->is(tok::kw_else))) + (Line.First->Next && Line.First->Next->is(tok::kw_else))) { return 0; + } // default: in switch statement if (Line.First->is(tok::kw_default)) { const FormatToken *Tok = Line.First->getNextNonComment(); @@ -692,42 +717,48 @@ class LineJoiner { if (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) return 0; if (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Empty && - !I[1]->First->is(tok::r_brace)) + !I[1]->First->is(tok::r_brace)) { return 0; + } // Don't merge when we can't except the case when // the control statement block is empty if (!Style.AllowShortIfStatementsOnASingleLine && Line.First->isOneOf(tok::kw_if, tok::kw_else) && !Style.BraceWrapping.AfterControlStatement && - !I[1]->First->is(tok::r_brace)) + !I[1]->First->is(tok::r_brace)) { return 0; + } if (!Style.AllowShortIfStatementsOnASingleLine && Line.First->isOneOf(tok::kw_if, tok::kw_else) && Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always && - I + 2 != E && !I[2]->First->is(tok::r_brace)) + I + 2 != E && !I[2]->First->is(tok::r_brace)) { return 0; + } if (!Style.AllowShortLoopsOnASingleLine && Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for, TT_ForEachMacro) && !Style.BraceWrapping.AfterControlStatement && - !I[1]->First->is(tok::r_brace)) + !I[1]->First->is(tok::r_brace)) { return 0; + } if (!Style.AllowShortLoopsOnASingleLine && Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for, TT_ForEachMacro) && Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always && - I + 2 != E && !I[2]->First->is(tok::r_brace)) + I + 2 != E && !I[2]->First->is(tok::r_brace)) { return 0; + } // FIXME: Consider an option to allow short exception handling clauses on // a single line. // FIXME: This isn't covered by tests. // FIXME: For catch, __except, __finally the first token on the line // is '}', so this isn't correct here. if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch, - Keywords.kw___except, tok::kw___finally)) + Keywords.kw___except, tok::kw___finally)) { return 0; + } } if (Line.Last->is(tok::l_brace)) { @@ -785,8 +816,9 @@ class LineJoiner { // } if (Line.First == Line.Last && Line.First->isNot(TT_FunctionLBrace) && Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_MultiLine) + FormatStyle::BWACS_MultiLine) { return 0; + } return 2; } @@ -820,8 +852,9 @@ class LineJoiner { SmallVectorImpl::const_iterator E, unsigned Limit) { if (I[0]->InPPDirective && I + 1 != E && - !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) + !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) { return Limit < 2 ? 0 : Limit - 2; + } return Limit; } @@ -923,10 +956,11 @@ class LineFormatter { const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); FormatToken &Previous = *State.NextToken->Previous; if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) || - Previous.Children.size() == 0) + Previous.Children.size() == 0) { // The previous token does not open a block. Nothing to do. We don't // assert so that we can simply call this function for all tokens. return true; + } if (NewLine) { const ParenState &P = State.Stack.back(); @@ -937,8 +971,9 @@ class LineFormatter { if (Style.LambdaBodyIndentation == FormatStyle::LBI_OuterScope && P.NestedBlockIndent == P.LastSpace) { if (State.NextToken->MatchingParen && - State.NextToken->MatchingParen->is(TT_LambdaLBrace)) + State.NextToken->MatchingParen->is(TT_LambdaLBrace)) { State.Stack.pop_back(); + } if (LBrace->is(TT_LambdaLBrace)) AdditionalIndent = 0; } @@ -968,8 +1003,9 @@ class LineFormatter { // If the child line exceeds the column limit, we wouldn't want to merge it. // We add +2 for the trailing " }". if (Style.ColumnLimit > 0 && - Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit) + Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit) { return false; + } if (!DryRun) { Whitespaces->replaceWhitespace( @@ -1141,9 +1177,10 @@ class OptimizingLineFormatter : public LineFormatter { if (Count > 50000) Node->State.IgnoreStackForComparison = true; - if (!Seen.insert(&Node->State).second) + if (!Seen.insert(&Node->State).second) { // State already examined with lower penalty. continue; + } FormatDecision LastFormat = Node->State.NextToken->getDecision(); if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) @@ -1208,10 +1245,11 @@ class OptimizingLineFormatter : public LineFormatter { LLVM_DEBUG({ printLineState(Node->Previous->State); - if (Node->NewLine) + if (Node->NewLine) { llvm::dbgs() << "Penalty for placing " << Node->Previous->State.NextToken->Tok.getName() << " on a new line: " << Penalty << "\n"; + } }); } } @@ -1291,26 +1329,28 @@ unsigned UnwrappedLineFormatter::format( (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) || (Style.isCSharp() && TheLine.InPPDirective); // don't split #regions in C# - if (Style.ColumnLimit == 0) + if (Style.ColumnLimit == 0) { NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) .formatLine(TheLine, NextStartColumn + Indent, FirstLine ? FirstStartColumn : 0, DryRun); - else if (FitsIntoOneLine) + } else if (FitsIntoOneLine) { Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) .formatLine(TheLine, NextStartColumn + Indent, FirstLine ? FirstStartColumn : 0, DryRun); - else + } else { Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) .formatLine(TheLine, NextStartColumn + Indent, FirstLine ? FirstStartColumn : 0, DryRun); + } RangeMinLevel = std::min(RangeMinLevel, TheLine.Level); } else { // If no token in the current line is affected, we still need to format // affected children. - if (TheLine.ChildrenAffected) + if (TheLine.ChildrenAffected) { for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) if (!Tok->Children.empty()) format(Tok->Children, DryRun); + } // Adapt following lines on the current indent level to the same level // unless the current \c AnnotatedLine is not at the beginning of a line. @@ -1323,13 +1363,14 @@ unsigned UnwrappedLineFormatter::format( StartsNewLine && ((PreviousLine && PreviousLine->Affected) || TheLine.LeadingEmptyLinesAffected); // Format the first token. - if (ReformatLeadingWhitespace) + if (ReformatLeadingWhitespace) { formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, TheLine.First->OriginalColumn, TheLine.First->OriginalColumn); - else + } else { Whitespaces->addUntouchableToken(*TheLine.First, TheLine.InPPDirective); + } // Notify the WhitespaceManager about the unchanged whitespace. for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next) @@ -1365,8 +1406,9 @@ void UnwrappedLineFormatter::formatFirstToken( (!RootToken.Next || (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)) && // Do not remove empty lines before namespace closing "}". - !getNamespaceToken(&Line, Lines)) + !getNamespaceToken(&Line, Lines)) { Newlines = std::min(Newlines, 1u); + } // Remove empty lines at the start of nested blocks (lambdas/arrow functions) if (PreviousLine == nullptr && Line.Level > 0) Newlines = std::min(Newlines, 1u); @@ -1381,8 +1423,9 @@ void UnwrappedLineFormatter::formatFirstToken( !PreviousLine->startsWithNamespace() && !(PrevPrevLine && PrevPrevLine->startsWithNamespace() && PreviousLine->startsWith(tok::l_brace)) && - !startsExternCBlock(*PreviousLine)) + !startsExternCBlock(*PreviousLine)) { Newlines = 1; + } // Insert or remove empty line before access specifiers. if (PreviousLine && RootToken.isAccessSpecifier()) { @@ -1443,8 +1486,9 @@ void UnwrappedLineFormatter::formatFirstToken( if (!Style.isJavaScript() && Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && (Line.Type == LT_PreprocessorDirective || - Line.Type == LT_ImportStatement)) + Line.Type == LT_ImportStatement)) { Indent = 0; + } Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent, /*IsAligned=*/false, diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index cb7a17f157ee0f..a20562dd77a4cd 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -358,10 +358,11 @@ void UnwrappedLineParser::parse() { // If we found an include guard then all preprocessor directives (other than // the guard) are over-indented by one. - if (IncludeGuard == IG_Found) + if (IncludeGuard == IG_Found) { for (auto &Line : Lines) if (Line.InPPDirective && Line.Level > 0) --Line.Level; + } // Create line with eof token. pushToken(FormatTok); @@ -406,8 +407,9 @@ void UnwrappedLineParser::parseFile() { // do not have a chance to be put on a line of their own until this point. // Here we add this newline before end-of-file comments. if (Style.Language == FormatStyle::LK_TextProto && - !CommentsBeforeNextToken.empty()) + !CommentsBeforeNextToken.empty()) { addUnwrappedLine(); + } flushComments(true); addUnwrappedLine(); } @@ -506,18 +508,19 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, addUnwrappedLine(); break; case tok::l_brace: - if (NextLBracesType != TT_Unknown) + if (NextLBracesType != TT_Unknown) { FormatTok->setFinalizedType(NextLBracesType); - else if (FormatTok->Previous && - FormatTok->Previous->ClosesRequiresClause) { + } else if (FormatTok->Previous && + FormatTok->Previous->ClosesRequiresClause) { // We need the 'default' case here to correctly parse a function // l_brace. ParseDefault(); continue; } if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && - tryToParseBracedList()) + tryToParseBracedList()) { continue; + } parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, /*KeepBraces=*/true, /*UnindentWhitesmithsBraces=*/false, CanContainBracedList, @@ -529,12 +532,14 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, case tok::r_brace: if (OpeningBrace) { if (!Style.RemoveBracesLLVM || - !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) + !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { return false; + } if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || IsPrecededByCommentOrPPDirective || - precededByCommentOrPPDirective()) + precededByCommentOrPPDirective()) { return false; + } const FormatToken *Next = Tokens->peekNextToken(); return Next->isNot(tok::comment) || Next->NewlinesBefore > 0; } @@ -565,8 +570,10 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, break; } if (!SwitchLabelEncountered && - (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) + (Style.IndentCaseLabels || + (Line->InPPDirective && Line->Level == 1))) { ++Line->Level; + } SwitchLabelEncountered = true; parseStructuralElement(); break; @@ -610,7 +617,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { switch (Tok->Tok.getKind()) { case tok::l_brace: if (Style.isJavaScript() && PrevTok) { - if (PrevTok->isOneOf(tok::colon, tok::less)) + if (PrevTok->isOneOf(tok::colon, tok::less)) { // A ':' indicates this code is in a type, or a braced list // following a label in an object literal ({a: {b: 1}}). // A '<' could be an object used in a comparison, but that is nonsense @@ -621,9 +628,10 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { // trigger BK_Block. In both cases, this must be parsed as an inline // braced init. Tok->setBlockKind(BK_BracedInit); - else if (PrevTok->is(tok::r_paren)) + } else if (PrevTok->is(tok::r_paren)) { // `) { }` can only occur in function or method declarations in JS. Tok->setBlockKind(BK_Block); + } } else { Tok->setBlockKind(BK_Unknown); } @@ -1050,10 +1058,11 @@ void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { Line += Lines.size(); if (Unreachable || - (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) + (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { PPStack.push_back({PP_Unreachable, Line}); - else + } else { PPStack.push_back({PP_Conditional, Line}); + } } void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { @@ -1107,7 +1116,7 @@ void UnwrappedLineParser::parsePPIf(bool IfDef) { // If there's a #ifndef on the first line, and the only lines before it are // comments, it could be an include guard. bool MaybeIncludeGuard = IfNDef; - if (IncludeGuard == IG_Inited && MaybeIncludeGuard) + if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { for (auto &Line : Lines) { if (!Line.Tokens.front().Tok->is(tok::comment)) { MaybeIncludeGuard = false; @@ -1115,6 +1124,7 @@ void UnwrappedLineParser::parsePPIf(bool IfDef) { break; } } + } --PPBranchLevel; parsePPUnknown(); ++PPBranchLevel; @@ -1143,8 +1153,9 @@ void UnwrappedLineParser::parsePPEndIf() { // If the #endif of a potential include guard is the last thing in the file, // then we found an include guard. if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && - Style.IndentPPDirectives != FormatStyle::PPDIS_None) + Style.IndentPPDirectives != FormatStyle::PPDIS_None) { IncludeGuard = IG_Found; + } } void UnwrappedLineParser::parsePPDefine() { @@ -1178,8 +1189,9 @@ void UnwrappedLineParser::parsePPDefine() { FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); nextToken(); if (FormatTok->Tok.getKind() == tok::l_paren && - !FormatTok->hasWhitespaceBefore()) + !FormatTok->hasWhitespaceBefore()) { parseParens(); + } if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) Line->Level += PPBranchLevel + 1; addUnwrappedLine(); @@ -1302,8 +1314,9 @@ static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, return false; if (!isC78Type(*Tok) && - !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) + !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { return false; + } if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) return false; @@ -1332,8 +1345,9 @@ void UnwrappedLineParser::parseModuleImport() { // Mark tokens up to the trailing line comments as implicit string // literals. if (FormatTok->isNot(tok::comment) && - !FormatTok->TokenText.startswith("//")) + !FormatTok->TokenText.startswith("//")) { FormatTok->setFinalizedType(TT_ImplicitStringLiteral); + } nextToken(); } } @@ -1386,11 +1400,13 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && (PreviousMustBeValue || Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, - tok::minusminus))) + tok::minusminus))) { return addUnwrappedLine(); + } if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && - isJSDeclOrStmt(Keywords, Next)) + isJSDeclOrStmt(Keywords, Next)) { return addUnwrappedLine(); + } } void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, @@ -1430,40 +1446,46 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, case tok::kw_protected: case tok::kw_private: if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || - Style.isCSharp()) + Style.isCSharp()) { nextToken(); - else + } else { parseAccessSpecifier(); + } return; case tok::kw_if: - if (Style.isJavaScript() && Line->MustBeDeclaration) + if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; + } parseIfThenElse(IfKind); return; case tok::kw_for: case tok::kw_while: - if (Style.isJavaScript() && Line->MustBeDeclaration) + if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; + } parseForOrWhileLoop(); return; case tok::kw_do: - if (Style.isJavaScript() && Line->MustBeDeclaration) + if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; + } parseDoWhile(); return; case tok::kw_switch: - if (Style.isJavaScript() && Line->MustBeDeclaration) + if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'switch: string' field declaration. break; + } parseSwitch(); return; case tok::kw_default: - if (Style.isJavaScript() && Line->MustBeDeclaration) + if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'default: string' field declaration. break; + } nextToken(); if (FormatTok->is(tok::colon)) { parseLabel(); @@ -1481,9 +1503,10 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, return; case tok::kw_try: case tok::kw___try: - if (Style.isJavaScript() && Line->MustBeDeclaration) + if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; + } parseTryCatch(); return; case tok::kw_extern: @@ -1617,21 +1640,24 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, nextToken(); if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_Always) + FormatStyle::BWACS_Always) { addUnwrappedLine(); + } parseBlock(); } addUnwrappedLine(); return; case tok::objc_synchronized: nextToken(); - if (FormatTok->is(tok::l_paren)) + if (FormatTok->is(tok::l_paren)) { // Skip synchronization object parseParens(); + } if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_Always) + FormatStyle::BWACS_Always) { addUnwrappedLine(); + } parseBlock(); } addUnwrappedLine(); @@ -1680,8 +1706,9 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, Keywords.kw_CF_CLOSED_ENUM, - Keywords.kw_NS_CLOSED_ENUM)) + Keywords.kw_NS_CLOSED_ENUM)) { parseEnum(); + } break; case tok::kw_struct: case tok::kw_union: @@ -1693,13 +1720,15 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, nextToken(); // In Java, classes have an implicit static member "class". if (Style.Language == FormatStyle::LK_Java && FormatTok && - FormatTok->is(tok::kw_class)) + FormatTok->is(tok::kw_class)) { nextToken(); + } if (Style.isJavaScript() && FormatTok && - FormatTok->Tok.getIdentifierInfo()) + FormatTok->Tok.getIdentifierInfo()) { // JavaScript only has pseudo keywords, all keywords are allowed to // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 nextToken(); + } break; case tok::semi: nextToken(); @@ -1728,8 +1757,9 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, case tok::caret: nextToken(); if (FormatTok->Tok.isAnyIdentifier() || - FormatTok->isSimpleTypeSpecifier()) + FormatTok->isSimpleTypeSpecifier()) { nextToken(); + } if (FormatTok->is(tok::l_paren)) parseParens(); if (FormatTok->is(tok::l_brace)) @@ -1748,8 +1778,9 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, // If necessary, we could set the type to something different than // TT_FunctionLBrace. if (Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_Always) + FormatStyle::BWACS_Always) { addUnwrappedLine(); + } } else if (Style.BraceWrapping.AfterFunction) { addUnwrappedLine(); } @@ -2206,8 +2237,9 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, // replace this by using parseAssignmentExpression() inside. do { if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && - tryToParseChildBlock()) + tryToParseChildBlock()) { continue; + } if (Style.isJavaScript()) { if (FormatTok->is(Keywords.kw_function) || FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { @@ -2336,10 +2368,12 @@ void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { case tok::identifier: if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function) || - FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) + FormatTok->startsSequence(Keywords.kw_async, + Keywords.kw_function))) { tryToParseJSFunction(); - else + } else { nextToken(); + } break; case tok::kw_requires: { auto RequiresToken = FormatTok; @@ -2554,8 +2588,10 @@ FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, ElseLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); if (parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, - /*MunchSemi=*/true, KeepElseBraces) == IfStmtKind::IfOnly) + /*MunchSemi=*/true, + KeepElseBraces) == IfStmtKind::IfOnly) { Kind = IfStmtKind::IfElseIf; + } addUnwrappedLine(); } else if (FormatTok->is(tok::kw_if)) { const FormatToken *Previous = Tokens->getPreviousToken(); @@ -2676,8 +2712,9 @@ void UnwrappedLineParser::parseTryCatch() { ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && FormatTok->is(Keywords.kw_finally)) || (FormatTok->isObjCAtKeyword(tok::objc_catch) || - FormatTok->isObjCAtKeyword(tok::objc_finally)))) + FormatTok->isObjCAtKeyword(tok::objc_finally)))) { break; + } nextToken(); while (FormatTok->isNot(tok::l_brace)) { if (FormatTok->is(tok::l_paren)) { @@ -2719,13 +2756,14 @@ void UnwrappedLineParser::parseNamespace() { } else { while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, tok::l_square, tok::period, tok::l_paren) || - (Style.isCSharp() && FormatTok->is(tok::kw_union))) + (Style.isCSharp() && FormatTok->is(tok::kw_union))) { if (FormatTok->is(tok::l_square)) parseSquare(); else if (FormatTok->is(tok::l_paren)) parseParens(); else nextToken(); + } } if (FormatTok->is(tok::l_brace)) { if (ShouldBreakBeforeBrace(Style, InitialToken)) @@ -2884,8 +2922,9 @@ void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { FormatStyle::BWACS_Always) { addUnwrappedLine(); if (!Style.IndentCaseBlocks && - Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { ++Line->Level; + } } parseStructuralElement(); } @@ -3411,8 +3450,9 @@ bool UnwrappedLineParser::parseEnum() { } if (!Style.AllowShortEnumsOnASingleLine && - ShouldBreakBeforeBrace(Style, InitialToken)) + ShouldBreakBeforeBrace(Style, InitialToken)) { addUnwrappedLine(); + } // Parse enum body. nextToken(); if (!Style.AllowShortEnumsOnASingleLine) { @@ -3697,8 +3737,9 @@ void UnwrappedLineParser::parseObjCProtocolList() { nextToken(); // Early exit in case someone forgot a close angle. if (FormatTok->isOneOf(tok::semi, tok::l_brace) || - FormatTok->isObjCAtKeyword(tok::objc_end)) + FormatTok->isObjCAtKeyword(tok::objc_end)) { return; + } } while (!eof() && FormatTok->isNot(tok::greater)); nextToken(); // Skip '>'. } @@ -3743,9 +3784,10 @@ void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { // The base class can also have lightweight generics applied to it. if (FormatTok->is(tok::less)) parseObjCLightweightGenerics(); - } else if (FormatTok->is(tok::l_paren)) + } else if (FormatTok->is(tok::l_paren)) { // Skip category, if present. parseParens(); + } if (FormatTok->is(tok::less)) parseObjCProtocolList(); @@ -3777,11 +3819,12 @@ void UnwrappedLineParser::parseObjCLightweightGenerics() { nextToken(); // Early exit in case someone forgot a close angle. if (FormatTok->isOneOf(tok::semi, tok::l_brace) || - FormatTok->isObjCAtKeyword(tok::objc_end)) + FormatTok->isObjCAtKeyword(tok::objc_end)) { break; - if (FormatTok->is(tok::less)) + } + if (FormatTok->is(tok::less)) { ++NumOpenAngles; - else if (FormatTok->is(tok::greater)) { + } else if (FormatTok->is(tok::greater)) { assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); --NumOpenAngles; } @@ -3795,9 +3838,10 @@ bool UnwrappedLineParser::parseObjCProtocol() { assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); nextToken(); - if (FormatTok->is(tok::l_paren)) + if (FormatTok->is(tok::l_paren)) { // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". return false; + } // The definition/declaration form, // @protocol Foo @@ -3845,8 +3889,9 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { // parsing the structural element, i.e. the declaration or expression for // `export default`. if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && - !FormatTok->isStringLiteral()) + !FormatTok->isStringLiteral()) { return; + } while (!eof()) { if (FormatTok->is(tok::semi)) @@ -3943,8 +3988,9 @@ continuesLineCommentSection(const FormatToken &FormatTok, StringRef IndentContent = FormatTok.TokenText; if (FormatTok.TokenText.startswith("//") || - FormatTok.TokenText.startswith("/*")) + FormatTok.TokenText.startswith("/*")) { IndentContent = FormatTok.TokenText.substr(2); + } if (CommentPragmasRegex.match(IndentContent)) return false; @@ -4117,8 +4163,9 @@ void UnwrappedLineParser::distributeComments( continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); } if (!FormatTok->ContinuesLineCommentSection && - (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) + (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { ShouldPushCommentsInCurrentLine = false; + } if (ShouldPushCommentsInCurrentLine) pushToken(FormatTok); else @@ -4180,8 +4227,9 @@ void UnwrappedLineParser::readToken(int LevelDifference) { // before the preprocessor directive, at the same level as the // preprocessor directive, as we consider them to apply to the directive. if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && - PPBranchLevel > 0) + PPBranchLevel > 0) { Line->Level += PPBranchLevel; + } flushComments(isOnNewLine(*FormatTok)); parsePPDirective(); PreviousWasComment = FormatTok->is(tok::comment); @@ -4190,8 +4238,9 @@ void UnwrappedLineParser::readToken(int LevelDifference) { } if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && - !Line->InPPDirective) + !Line->InPPDirective) { continue; + } if (!FormatTok->is(tok::comment)) { distributeComments(Comments, FormatTok); diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index f20383800ab43f..fe43981e27d006 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -167,11 +167,12 @@ void WhitespaceManager::calculateLineBreakInformation() { // If there are multiple changes in this token, sum up all the changes until // the end of the line. - if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0) + if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0) { LastOutsideTokenChange->TokenLength += Changes[i - 1].TokenLength + Changes[i - 1].Spaces; - else + } else { LastOutsideTokenChange = &Changes[i - 1]; + } Changes[i].PreviousEndOfTokenColumn = Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength; @@ -227,13 +228,14 @@ void WhitespaceManager::calculateLineBreakInformation() { Change.StartOfBlockComment = nullptr; Change.IndentationOffset = 0; if (Change.Tok->is(tok::comment)) { - if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken) + if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken) { LastBlockComment = &Change; - else { - if ((Change.StartOfBlockComment = LastBlockComment)) + } else { + if ((Change.StartOfBlockComment = LastBlockComment)) { Change.IndentationOffset = Change.StartOfTokenColumn - Change.StartOfBlockComment->StartOfTokenColumn; + } } } else { LastBlockComment = nullptr; @@ -303,18 +305,21 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, for (unsigned i = Start; i != End; ++i) { if (ScopeStack.size() != 0 && Changes[i].indentAndNestingLevel() < - Changes[ScopeStack.back()].indentAndNestingLevel()) + Changes[ScopeStack.back()].indentAndNestingLevel()) { ScopeStack.pop_back(); + } // Compare current token to previous non-comment token to ensure whether // it is in a deeper scope or not. unsigned PreviousNonComment = i - 1; while (PreviousNonComment > Start && - Changes[PreviousNonComment].Tok->is(tok::comment)) + Changes[PreviousNonComment].Tok->is(tok::comment)) { --PreviousNonComment; + } if (i != Start && Changes[i].indentAndNestingLevel() > - Changes[PreviousNonComment].indentAndNestingLevel()) + Changes[PreviousNonComment].indentAndNestingLevel()) { ScopeStack.push_back(i); + } bool InsideNestedScope = ScopeStack.size() != 0; bool ContinuedStringLiteral = i > Start && @@ -337,10 +342,11 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, Changes[i].Spaces += Shift; // FIXME: This is a workaround that should be removed when we fix // http://llvm.org/PR53699. An assertion later below verifies this. - if (Changes[i].NewlinesBefore == 0) + if (Changes[i].NewlinesBefore == 0) { Changes[i].Spaces = std::max(Changes[i].Spaces, static_cast(Changes[i].Tok->SpacesRequiredBefore)); + } } // This is for function parameters that are split across multiple lines, @@ -358,8 +364,9 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, // Continued function declaration if (ScopeStart > Start + 1 && - Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)) + Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)) { return true; + } // Continued function call if (ScopeStart > Start + 1 && @@ -367,13 +374,15 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, Changes[ScopeStart - 1].Tok->is(tok::l_paren) && Changes[ScopeStart].Tok->isNot(TT_LambdaLSquare)) { if (Changes[i].Tok->MatchingParen && - Changes[i].Tok->MatchingParen->is(TT_LambdaLBrace)) + Changes[i].Tok->MatchingParen->is(TT_LambdaLBrace)) { return false; + } if (Changes[ScopeStart].NewlinesBefore > 0) return false; if (Changes[i].Tok->is(tok::l_brace) && - Changes[i].Tok->is(BK_BracedInit)) + Changes[i].Tok->is(BK_BracedInit)) { return true; + } return Style.BinPackArguments; } @@ -387,16 +396,18 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, // Continued ternary operator if (Changes[i].Tok->Previous && - Changes[i].Tok->Previous->is(TT_ConditionalExpr)) + Changes[i].Tok->Previous->is(TT_ConditionalExpr)) { return true; + } // Continued direct-list-initialization using braced list. if (ScopeStart > Start + 1 && Changes[ScopeStart - 2].Tok->is(tok::identifier) && Changes[ScopeStart - 1].Tok->is(tok::l_brace) && Changes[i].Tok->is(tok::l_brace) && - Changes[i].Tok->is(BK_BracedInit)) + Changes[i].Tok->is(BK_BracedInit)) { return true; + } // Continued braced list. if (ScopeStart > Start + 1 && @@ -406,8 +417,9 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, for (unsigned OuterScopeStart : llvm::reverse(ScopeStack)) { // Lambda. if (OuterScopeStart > Start && - Changes[OuterScopeStart - 1].Tok->is(TT_LambdaLBrace)) + Changes[OuterScopeStart - 1].Tok->is(TT_LambdaLBrace)) { return false; + } } if (Changes[ScopeStart].NewlinesBefore > 0) return false; @@ -532,10 +544,11 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, // We need to adjust the StartOfTokenColumn of each Change that is on a line // containing any matching token to be aligned and located after such token. auto AlignCurrentSequence = [&] { - if (StartOfSequence > 0 && StartOfSequence < EndOfSequence) + if (StartOfSequence > 0 && StartOfSequence < EndOfSequence) { AlignTokenSequence(Style, StartOfSequence, EndOfSequence, WidthLeft + WidthAnchor, RightJustify, Matches, Changes); + } WidthLeft = 0; WidthAnchor = 0; WidthRight = 0; @@ -567,8 +580,9 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, // A new line starts, re-initialize line status tracking bools. // Keep the match state if a string literal is continued on this line. if (i == 0 || !Changes[i].Tok->is(tok::string_literal) || - !Changes[i - 1].Tok->is(tok::string_literal)) + !Changes[i - 1].Tok->is(tok::string_literal)) { FoundMatchOnLine = false; + } LineIsComment = true; } @@ -601,12 +615,12 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, unsigned ChangeWidthLeft = Changes[i].StartOfTokenColumn; unsigned ChangeWidthAnchor = 0; unsigned ChangeWidthRight = 0; - if (RightJustify) { + if (RightJustify) if (ACS.PadOperators) ChangeWidthAnchor = Changes[i].TokenLength; else ChangeWidthLeft += Changes[i].TokenLength; - } else + else ChangeWidthRight = Changes[i].TokenLength; for (unsigned j = i + 1; j != e && Changes[j].NewlinesBefore == 0; ++j) { ChangeWidthRight += Changes[j].Spaces; @@ -749,9 +763,10 @@ void WhitespaceManager::alignConsecutiveMacros() { !FoundMatchOnLine && !(LineIsComment && Style.AlignConsecutiveMacros.AcrossComments); - if (EmptyLineBreak || NoMatchBreak) + if (EmptyLineBreak || NoMatchBreak) { AlignMacroSequence(StartOfSequence, EndOfSequence, MinColumn, MaxColumn, FoundMatchOnLine, AlignMacrosMatches, Changes); + } // A new line starts, re-initialize line status tracking bools. FoundMatchOnLine = false; @@ -857,8 +872,9 @@ void WhitespaceManager::alignConsecutiveDeclarations() { if (!Next->Tok.getIdentifierInfo()) break; if (Next->isOneOf(TT_StartOfName, TT_FunctionDeclarationName, - tok::kw_operator)) + tok::kw_operator)) { return false; + } } return true; }, @@ -1198,9 +1214,10 @@ bool WhitespaceManager::isSplitCell(const CellDescription &Cell) { if (Cell.HasSplit) return true; for (const auto *Next = Cell.NextColumnElement; Next != nullptr; - Next = Next->NextColumnElement) + Next = Next->NextColumnElement) { if (Next->HasSplit) return true; + } return false; } @@ -1372,12 +1389,13 @@ void WhitespaceManager::generateChanges() { } if (C.CreateReplacement) { std::string ReplacementText = C.PreviousLinePostfix; - if (C.ContinuesPPDirective) + if (C.ContinuesPPDirective) { appendEscapedNewlineText(ReplacementText, C.NewlinesBefore, C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn); - else + } else { appendNewlineText(ReplacementText, C.NewlinesBefore); + } // FIXME: This assert should hold if we computed the column correctly. // assert((int)C.StartOfTokenColumn >= C.Spaces); appendIndentText( @@ -1395,8 +1413,9 @@ void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) { SourceMgr.getFileOffset(Range.getBegin()); // Don't create a replacement, if it does not change anything. if (StringRef(SourceMgr.getCharacterData(Range.getBegin()), - WhitespaceLength) == Text) + WhitespaceLength) == Text) { return; + } auto Err = Replaces.add(tooling::Replacement( SourceMgr, CharSourceRange::getCharRange(Range), Text)); // FIXME: better error handling. For now, just print an error message in the From 793bb7049db012df4fe3ff6e8abc320677845400 Mon Sep 17 00:00:00 2001 From: Chenbing Zheng Date: Wed, 25 May 2022 10:09:58 +0800 Subject: [PATCH 423/908] [InstCombine] add test for trunc-shl-trunc ; NFC --- .../Transforms/InstCombine/trunc-shift-trunc.ll | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll index 756f5dcf2b27b2..e578b604c9d6ae 100644 --- a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll @@ -19,6 +19,19 @@ define i8 @trunc_lshr_trunc(i64 %a) { ret i8 %d } +define <2 x i8> @trunc_shl_trunc(<2 x i64> %a) { +; CHECK-LABEL: @trunc_shl_trunc( +; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], +; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; CHECK-NEXT: ret <2 x i8> [[D]] +; + %b = trunc <2 x i64> %a to <2 x i32> + %c = shl <2 x i32> %b, + %d = trunc <2 x i32> %c to <2 x i8> + ret <2 x i8> %d +} + define <2 x i8> @trunc_lshr_trunc_uniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_lshr_trunc_uniform( ; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], From 269e3f7369d2286a1e711a7fa0442ca64817ff9f Mon Sep 17 00:00:00 2001 From: Chenbing Zheng Date: Wed, 25 May 2022 10:21:39 +0800 Subject: [PATCH 424/908] [InstCombine] [NFC] Move transforms for truncated shifts into narrowBinOp Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D126056 --- .../InstCombine/InstCombineCasts.cpp | 49 ++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index fd76ea6bbcc731..d000d79d5e3834 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -636,10 +636,12 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) { /// Try to narrow the width of math or bitwise logic instructions by pulling a /// truncate ahead of binary operators. -/// TODO: Transforms for truncated shifts should be moved into here. Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) { Type *SrcTy = Trunc.getSrcTy(); Type *DestTy = Trunc.getType(); + unsigned SrcWidth = SrcTy->getScalarSizeInBits(); + unsigned DestWidth = DestTy->getScalarSizeInBits(); + if (!isa(SrcTy) && !shouldChangeType(SrcTy, DestTy)) return nullptr; @@ -682,7 +684,30 @@ Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) { } break; } - + case Instruction::LShr: + case Instruction::AShr: { + // trunc (*shr (trunc A), C) --> trunc(*shr A, C) + Value *A; + Constant *C; + if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) { + unsigned MaxShiftAmt = SrcWidth - DestWidth; + // If the shift is small enough, all zero/sign bits created by the shift + // are removed by the trunc. + if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, + APInt(SrcWidth, MaxShiftAmt)))) { + auto *OldShift = cast(Trunc.getOperand(0)); + bool IsExact = OldShift->isExact(); + auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true); + ShAmt = Constant::mergeUndefsWith(ShAmt, C); + Value *Shift = + OldShift->getOpcode() == Instruction::AShr + ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact) + : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact); + return CastInst::CreateTruncOrBitCast(Shift, DestTy); + } + } + break; + } default: break; } @@ -870,26 +895,6 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { // TODO: Mask high bits with 'and'. } - // trunc (*shr (trunc A), C) --> trunc(*shr A, C) - if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) { - unsigned MaxShiftAmt = SrcWidth - DestWidth; - - // If the shift is small enough, all zero/sign bits created by the shift are - // removed by the trunc. - if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, - APInt(SrcWidth, MaxShiftAmt)))) { - auto *OldShift = cast(Src); - bool IsExact = OldShift->isExact(); - auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true); - ShAmt = Constant::mergeUndefsWith(ShAmt, C); - Value *Shift = - OldShift->getOpcode() == Instruction::AShr - ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact) - : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact); - return CastInst::CreateTruncOrBitCast(Shift, DestTy); - } - } - if (Instruction *I = narrowBinOp(Trunc)) return I; From a1ffba8d528681d55c901a997beedbc69946eb90 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 25 May 2022 10:30:32 +0800 Subject: [PATCH 425/908] [C++20] [Coroutines] Conform the updates for CWG issue 2585 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the updates in CWG issue 2585 https://cplusplus.github.io/CWG/issues/2585.html, we shouldn't find an allocation function with (size, p0, …, pn) in global scope. Reviewed By: erichkeane Differential Revision: https://reviews.llvm.org/D126187 --- clang/docs/ReleaseNotes.rst | 7 +- clang/lib/Sema/SemaCoroutine.cpp | 86 +++++++++++-------- clang/test/CodeGenCoroutines/coro-alloc-2.cpp | 30 +++++++ 3 files changed, 87 insertions(+), 36 deletions(-) create mode 100644 clang/test/CodeGenCoroutines/coro-alloc-2.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a889c74a55239f..7a54a33685471c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -149,8 +149,11 @@ Bug Fixes because there is no way to fully qualify the enumerator name, so this "extension" was unintentional and useless. This fixes `Issue 42372 `_. -- Clang shouldn't lookup allocation function in global scope for coroutines - in case it found the allocation function name in the promise_type body. +- Clang will now find and emit a call to an allocation function in a + promise_type body for coroutines if there is any allocation function + declaration in the scope of promise_type. Additionally, to implement CWG2585, + a coroutine will no longer generate a call to a global allocation function + with the signature (std::size_t, p0, ..., pn). This fixes Issue `Issue 54881 `_. Improvements to Clang's diagnostics diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index b43b0b3eb957cd..55f1c2989578da 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1239,6 +1239,41 @@ bool CoroutineStmtBuilder::makeReturnOnAllocFailure() { return true; } +// Collect placement arguments for allocation function of coroutine FD. +// Return true if we collect placement arguments succesfully. Return false, +// otherwise. +static bool collectPlacementArgs(Sema &S, FunctionDecl &FD, SourceLocation Loc, + SmallVectorImpl &PlacementArgs) { + if (auto *MD = dyn_cast(&FD)) { + if (MD->isInstance() && !isLambdaCallOperator(MD)) { + ExprResult ThisExpr = S.ActOnCXXThis(Loc); + if (ThisExpr.isInvalid()) + return false; + ThisExpr = S.CreateBuiltinUnaryOp(Loc, UO_Deref, ThisExpr.get()); + if (ThisExpr.isInvalid()) + return false; + PlacementArgs.push_back(ThisExpr.get()); + } + } + + for (auto *PD : FD.parameters()) { + if (PD->getType()->isDependentType()) + continue; + + // Build a reference to the parameter. + auto PDLoc = PD->getLocation(); + ExprResult PDRefExpr = + S.BuildDeclRefExpr(PD, PD->getOriginalType().getNonReferenceType(), + ExprValueKind::VK_LValue, PDLoc); + if (PDRefExpr.isInvalid()) + return false; + + PlacementArgs.push_back(PDRefExpr.get()); + } + + return true; +} + bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // Form and check allocation and deallocation calls. assert(!IsPromiseDependentType && @@ -1255,13 +1290,7 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // allocated, followed by the coroutine function's arguments. If a matching // allocation function exists, use it. Otherwise, use an allocation function // that just takes the requested size. - - FunctionDecl *OperatorNew = nullptr; - FunctionDecl *OperatorDelete = nullptr; - FunctionDecl *UnusedResult = nullptr; - bool PassAlignment = false; - SmallVector PlacementArgs; - + // // [dcl.fct.def.coroutine]p9 // An implementation may need to allocate additional storage for a // coroutine. @@ -1288,31 +1317,12 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // and p_i denotes the i-th function parameter otherwise. For a non-static // member function, q_1 is an lvalue that denotes *this; any other q_i is an // lvalue that denotes the parameter copy corresponding to p_i. - if (auto *MD = dyn_cast(&FD)) { - if (MD->isInstance() && !isLambdaCallOperator(MD)) { - ExprResult ThisExpr = S.ActOnCXXThis(Loc); - if (ThisExpr.isInvalid()) - return false; - ThisExpr = S.CreateBuiltinUnaryOp(Loc, UO_Deref, ThisExpr.get()); - if (ThisExpr.isInvalid()) - return false; - PlacementArgs.push_back(ThisExpr.get()); - } - } - for (auto *PD : FD.parameters()) { - if (PD->getType()->isDependentType()) - continue; - // Build a reference to the parameter. - auto PDLoc = PD->getLocation(); - ExprResult PDRefExpr = - S.BuildDeclRefExpr(PD, PD->getOriginalType().getNonReferenceType(), - ExprValueKind::VK_LValue, PDLoc); - if (PDRefExpr.isInvalid()) - return false; - - PlacementArgs.push_back(PDRefExpr.get()); - } + FunctionDecl *OperatorNew = nullptr; + FunctionDecl *OperatorDelete = nullptr; + FunctionDecl *UnusedResult = nullptr; + bool PassAlignment = false; + SmallVector PlacementArgs; bool PromiseContainNew = [this, &PromiseType]() -> bool { DeclarationName NewName = @@ -1330,8 +1340,10 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // The allocation function's name is looked up by searching for it in the // scope of the promise type. // - If any declarations are found, ... - // - Otherwise, a search is performed in the global scope. - Sema::AllocationFunctionScope NewScope = PromiseContainNew ? Sema::AFS_Class : Sema::AFS_Global; + // - If no declarations are found in the scope of the promise type, a search + // is performed in the global scope. + Sema::AllocationFunctionScope NewScope = + PromiseContainNew ? Sema::AFS_Class : Sema::AFS_Global; S.FindAllocationFunctions(Loc, SourceRange(), NewScope, /*DeleteScope*/ Sema::AFS_Both, PromiseType, @@ -1339,13 +1351,19 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { OperatorNew, UnusedResult, /*Diagnose*/ false); }; + // We don't expect to call to global operator new with (size, p0, …, pn). + // So if we choose to lookup the allocation function in global scope, we + // shouldn't lookup placement arguments. + if (PromiseContainNew && !collectPlacementArgs(S, FD, Loc, PlacementArgs)) + return false; + LookupAllocationFunction(); // [dcl.fct.def.coroutine]p9 // If no viable function is found ([over.match.viable]), overload resolution // is performed again on a function call created by passing just the amount of // space required as an argument of type std::size_t. - if (!OperatorNew && !PlacementArgs.empty()) { + if (!OperatorNew && !PlacementArgs.empty() && PromiseContainNew) { PlacementArgs.clear(); LookupAllocationFunction(); } diff --git a/clang/test/CodeGenCoroutines/coro-alloc-2.cpp b/clang/test/CodeGenCoroutines/coro-alloc-2.cpp new file mode 100644 index 00000000000000..8f4b8c5241f606 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-alloc-2.cpp @@ -0,0 +1,30 @@ +// Tests that we wouldn't generate an allocation call in global scope with (std::size_t, p0, ..., pn) +// RUN: %clang_cc1 %s -std=c++20 -S -triple x86_64-unknown-linux-gnu -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s +#include "Inputs/coroutine.h" + +namespace std { +typedef decltype(sizeof(int)) size_t; +} + +struct Allocator {}; + +struct resumable { + struct promise_type { + + resumable get_return_object() { return {}; } + auto initial_suspend() { return std::suspend_always(); } + auto final_suspend() noexcept { return std::suspend_always(); } + void unhandled_exception() {} + void return_void(){}; + }; +}; + +void *operator new(std::size_t, void *); + +resumable f1(void *) { + co_return; +} + +// CHECK: coro.alloc: +// CHECK-NEXT: [[SIZE:%.+]] = call [[BITWIDTH:.+]] @llvm.coro.size.[[BITWIDTH]]() +// CHECK-NEXT: call {{.*}} ptr @_Znwm([[BITWIDTH]] noundef [[SIZE]]) From 80c4910f3d4c8b36120bcdb6670d15c693f0f0df Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Tue, 24 May 2022 22:43:37 -0400 Subject: [PATCH 426/908] Revert "[MachineSink] replace MachineLoop with MachineCycle" This reverts commit 62a9b36fcf728b104ea87e6eb84c0be69b779df7. Cause build failure on lldb incremental buildbot: https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/43994/changes --- llvm/include/llvm/ADT/GenericCycleImpl.h | 50 ----- llvm/include/llvm/ADT/GenericCycleInfo.h | 15 -- .../llvm/CodeGen/MachineCycleAnalysis.h | 35 ---- llvm/include/llvm/CodeGen/MachineSSAContext.h | 2 - llvm/lib/CodeGen/MachineCycleAnalysis.cpp | 100 ++++----- llvm/lib/CodeGen/MachineSink.cpp | 190 +++++++++--------- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 - llvm/test/CodeGen/AArch64/loop-sink-limit.mir | 4 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 4 - llvm/test/CodeGen/ARM/O3-pipeline.ll | 1 - llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 1 - llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 - llvm/test/CodeGen/X86/opt-pipeline.ll | 1 - llvm/test/CodeGen/X86/pr38795.ll | 93 +++++---- llvm/test/CodeGen/X86/switch-phi-const.ll | 4 +- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 36 ++-- 16 files changed, 198 insertions(+), 340 deletions(-) diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h index ea2847f8c8ee62..17f3a793c985b2 100644 --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -66,44 +66,6 @@ void GenericCycle::getExitBlocks( } } -template -auto GenericCycle::getCyclePreheader() const -> BlockT * { - BlockT *Predecessor = getCyclePredecessor(); - if (!Predecessor) - return nullptr; - - assert(isReducible() && "Cycle Predecessor must be in a reducible cycle!"); - - if (succ_size(Predecessor) != 1) - return nullptr; - - // Make sure we are allowed to hoist instructions into the predecessor. - if (!Predecessor->isLegalToHoistInto()) - return nullptr; - - return Predecessor; -} - -template -auto GenericCycle::getCyclePredecessor() const -> BlockT * { - if (!isReducible()) - return nullptr; - - BlockT *Out = nullptr; - - // Loop over the predecessors of the header node... - BlockT *Header = getHeader(); - for (const auto Pred : predecessors(Header)) { - if (!contains(Pred)) { - if (Out && Out != Pred) - return nullptr; - Out = Pred; - } - } - - return Out; -} - /// \brief Helper class for computing cycle information. template class GenericCycleInfoCompute { using BlockT = typename ContextT::BlockT; @@ -364,18 +326,6 @@ auto GenericCycleInfo::getCycle(const BlockT *Block) const return nullptr; } -/// \brief get the depth for the cycle which containing a given block. -/// -/// \returns the depth for the innermost cycle containing \p Block or 0 if it is -/// not contained in any cycle. -template -unsigned GenericCycleInfo::getCycleDepth(const BlockT *Block) const { - CycleT *Cycle = getCycle(Block); - if (!Cycle) - return 0; - return Cycle->getDepth(); -} - #ifndef NDEBUG /// \brief Validate the internal consistency of the cycle tree. /// diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h index 970664b8571544..5c2190411c1e04 100644 --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -100,10 +100,6 @@ template class GenericCycle { BlockT *getHeader() const { return Entries[0]; } - const SmallVectorImpl & getEntries() const { - return Entries; - } - /// \brief Return whether \p Block is an entry block of the cycle. bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); } @@ -128,16 +124,6 @@ template class GenericCycle { /// branched to. void getExitBlocks(SmallVectorImpl &TmpStorage) const; - /// Return the preheader block for this cycle. Pre-header is well-defined for - /// reducible cycle in docs/LoopTerminology.rst as: the only one entering - /// block and its only edge is to the entry block. Return null for irreducible - /// cycles. - BlockT *getCyclePreheader() const; - - /// If the cycle has exactly one entry with exactly one predecessor, return - /// it, otherwise return nullptr. - BlockT *getCyclePredecessor() const; - /// Iteration over child cycles. //@{ using const_child_iterator_base = @@ -253,7 +239,6 @@ template class GenericCycleInfo { const ContextT &getSSAContext() const { return Context; } CycleT *getCycle(const BlockT *Block) const; - unsigned getCycleDepth(const BlockT *Block) const; CycleT *getTopLevelParentCycle(const BlockT *Block) const; /// Move \p Child to \p NewParent by manipulating Children vectors. diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h index bdcf64a2075337..54d9860e54c9a5 100644 --- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -16,8 +16,6 @@ #include "llvm/ADT/GenericCycleInfo.h" #include "llvm/CodeGen/MachineSSAContext.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/InitializePasses.h" namespace llvm { @@ -27,39 +25,6 @@ extern template class GenericCycle; using MachineCycleInfo = GenericCycleInfo; using MachineCycle = MachineCycleInfo::CycleT; -/// Legacy analysis pass which computes a \ref MachineCycleInfo. -class MachineCycleInfoWrapperPass : public MachineFunctionPass { - MachineFunction *F = nullptr; - MachineCycleInfo CI; - -public: - static char ID; - - MachineCycleInfoWrapperPass(); - - MachineCycleInfo &getCycleInfo() { return CI; } - const MachineCycleInfo &getCycleInfo() const { return CI; } - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; - void print(raw_ostream &OS, const Module *M = nullptr) const override; -}; - -class MachineCycleInfoPrinterPass : public MachineFunctionPass { -public: - static char ID; - - MachineCycleInfoPrinterPass(); - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; -}; - -// TODO: add this function to GenericCycle template after implementing IR -// version. -bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I); - } // end namespace llvm #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h index f59d7cf8a52295..c945b8fefc80d3 100644 --- a/llvm/include/llvm/CodeGen/MachineSSAContext.h +++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -28,8 +28,6 @@ template class DominatorTreeBase; inline auto successors(MachineBasicBlock *BB) { return BB->successors(); } inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); } -inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); } -inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); } template <> class GenericSSAContext { const MachineRegisterInfo *RegInfo = nullptr; diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp index be171c2aa409dc..42a5e2b7af01ba 100644 --- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -8,15 +8,50 @@ #include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/ADT/GenericCycleImpl.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/InitializePasses.h" using namespace llvm; template class llvm::GenericCycleInfo; template class llvm::GenericCycle; +namespace { + +/// Legacy analysis pass which computes a \ref MachineCycleInfo. +class MachineCycleInfoWrapperPass : public MachineFunctionPass { + MachineFunction *F = nullptr; + MachineCycleInfo CI; + +public: + static char ID; + + MachineCycleInfoWrapperPass(); + + MachineCycleInfo &getCycleInfo() { return CI; } + const MachineCycleInfo &getCycleInfo() const { return CI; } + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; + + // TODO: verify analysis +}; + +class MachineCycleInfoPrinterPass : public MachineFunctionPass { +public: + static char ID; + + MachineCycleInfoPrinterPass(); + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +} // namespace + char MachineCycleInfoWrapperPass::ID = 0; MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass() @@ -76,62 +111,3 @@ bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) { CI.print(errs()); return false; } - -bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) { - MachineFunction *MF = I.getParent()->getParent(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - const TargetSubtargetInfo &ST = MF->getSubtarget(); - const TargetRegisterInfo *TRI = ST.getRegisterInfo(); - const TargetInstrInfo *TII = ST.getInstrInfo(); - - // The instruction is cycle invariant if all of its operands are. - for (const MachineOperand &MO : I.operands()) { - if (!MO.isReg()) - continue; - - Register Reg = MO.getReg(); - if (Reg == 0) - continue; - - // An instruction that uses or defines a physical register can't e.g. be - // hoisted, so mark this as not invariant. - if (Register::isPhysicalRegister(Reg)) { - if (MO.isUse()) { - // If the physreg has no defs anywhere, it's just an ambient register - // and we can freely move its uses. Alternatively, if it's allocatable, - // it could get allocated to something with a def during allocation. - // However, if the physreg is known to always be caller saved/restored - // then this use is safe to hoist. - if (!MRI->isConstantPhysReg(Reg) && - !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && - !TII->isIgnorableUse(MO)) - return false; - // Otherwise it's safe to move. - continue; - } else if (!MO.isDead()) { - // A def that isn't dead can't be moved. - return false; - } else if (any_of(Cycle->getEntries(), - [&](const MachineBasicBlock *Block) { - return Block->isLiveIn(Reg); - })) { - // If the reg is live into any header of the cycle we can't hoist an - // instruction which would clobber it. - return false; - } - } - - if (!MO.isUse()) - continue; - - assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!"); - - // If the cycle contains the definition of an operand, then the instruction - // isn't cycle invariant. - if (Cycle->contains(MRI->getVRegDef(Reg)->getParent())) - return false; - } - - // If we got this far, the instruction is cycle invariant! - return true; -} diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 797433e7a2809c..966b012725d81f 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -29,7 +29,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -96,18 +95,18 @@ static cl::opt SinkLoadBlocksThreshold( cl::init(20), cl::Hidden); static cl::opt - SinkInstsIntoCycle("sink-insts-to-avoid-spills", - cl::desc("Sink instructions into cycles to avoid " - "register spills"), - cl::init(false), cl::Hidden); - -static cl::opt SinkIntoCycleLimit( - "machine-sink-cycle-limit", - cl::desc("The maximum number of instructions considered for cycle sinking."), +SinkInstsIntoLoop("sink-insts-to-avoid-spills", + cl::desc("Sink instructions into loops to avoid " + "register spills"), + cl::init(false), cl::Hidden); + +static cl::opt SinkIntoLoopLimit( + "machine-sink-loop-limit", + cl::desc("The maximum number of instructions considered for loop sinking."), cl::init(50), cl::Hidden); STATISTIC(NumSunk, "Number of machine instructions sunk"); -STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle"); +STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); @@ -120,7 +119,7 @@ namespace { MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree - MachineCycleInfo *CI; + MachineLoopInfo *LI; MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; @@ -181,9 +180,8 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); @@ -234,9 +232,9 @@ namespace { MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, - SmallVectorImpl &Candidates); - bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); + void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, + SmallVectorImpl &Candidates); + bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -263,7 +261,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) @@ -380,27 +378,26 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { return false; } -void MachineSinking::FindCycleSinkCandidates( - MachineCycle *Cycle, MachineBasicBlock *BB, +void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { - LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); + LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); if (!TII->shouldSink(MI)) { - LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " + LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " "target\n"); continue; } - if (!isCycleInvariant(Cycle, MI)) { - LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n"); + if (!L->isLoopInvariant(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); continue; } bool DontMoveAcrossStore = true; if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); continue; } if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); continue; } if (MI.isConvergent()) @@ -412,7 +409,7 @@ void MachineSinking::FindCycleSinkCandidates( if (!MRI->hasOneDef(MO.getReg())) continue; - LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); } } @@ -428,12 +425,22 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DT = &getAnalysis(); PDT = &getAnalysis(); - CI = &getAnalysis().getCycleInfo(); + LI = &getAnalysis(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); + // MachineSink currently uses MachineLoopInfo, which only recognizes natural + // loops. As such, we could sink instructions into irreducible cycles, which + // would be non-profitable. + // WARNING: The current implementation of hasStoreBetween() is incorrect for + // sinking into irreducible cycles (PR53990), this bailout is currently + // necessary for correctness, not just profitability. + ReversePostOrderTraversal RPOT(&*MF.begin()); + if (containsIrreducibleCFG(RPOT, *LI)) + return false; + bool EverMadeChange = false; while (true) { @@ -466,33 +473,32 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { EverMadeChange = true; } - if (SinkInstsIntoCycle) { - SmallVector Cycles(CI->toplevel_begin(), - CI->toplevel_end()); - for (auto *Cycle : Cycles) { - MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + if (SinkInstsIntoLoop) { + SmallVector Loops(LI->begin(), LI->end()); + for (auto *L : Loops) { + MachineBasicBlock *Preheader = LI->findLoopPreheader(L); if (!Preheader) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); continue; } SmallVector Candidates; - FindCycleSinkCandidates(Cycle, Preheader, Candidates); + FindLoopSinkCandidates(L, Preheader, Candidates); // Walk the candidates in reverse order so that we start with the use // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; for (MachineInstr *I : llvm::reverse(Candidates)) { - if (i++ == SinkIntoCycleLimit) { - LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " + if (i++ == SinkIntoLoopLimit) { + LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " "be analysed."); break; } - if (!SinkIntoCycle(Cycle, *I)) + if (!SinkIntoLoop(L, *I)) break; EverMadeChange = true; - ++NumCycleSunk; + ++NumLoopSunk; } } } @@ -514,12 +520,12 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an - // unreachable cycle there may be nowhere to stop. + // unreachable loop there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; - // Cache all successors, sorted by frequency info and cycle depth. + // Cache all successors, sorted by frequency info and loop depth. AllSuccsCache AllSuccessors; // Walk the basic block bottom-up. Remember if we saw a store. @@ -638,16 +644,13 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) return false; - // Avoid breaking back edge. From == To means backedge for single BB cycle. + // Avoid breaking back edge. From == To means backedge for single BB loop. if (!SplitEdges || FromBB == ToBB) return false; - MachineCycle *FromCycle = CI->getCycle(FromBB); - MachineCycle *ToCycle = CI->getCycle(ToBB); - - // Check for backedges of more "complex" cycles. - if (FromCycle == ToCycle && FromCycle && - (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB)) + // Check for backedges of more "complex" loops. + if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && + LI->isLoopHeader(ToBB)) return false; // It's not always legal to break critical edges and sink the computation @@ -750,9 +753,9 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, if (!PDT->dominates(SuccToSinkTo, MBB)) return true; - // It is profitable to sink an instruction from a deeper cycle to a shallower - // cycle, even if the latter post-dominates the former (PR21115). - if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo)) + // It is profitable to sink an instruction from a deeper loop to a shallower + // loop, even if the latter post-dominates the former (PR21115). + if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) return true; // Check if only use in post dominated block is PHI instruction. @@ -773,11 +776,11 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - MachineCycle *MCycle = CI->getCycle(MBB); + MachineLoop *ML = LI->getLoopFor(MBB); - // If the instruction is not inside a cycle, it is not profitable to sink MI to + // If the instruction is not inside a loop, it is not profitable to sink MI to // a post dominate block SuccToSinkTo. - if (!MCycle) + if (!ML) return false; auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { @@ -795,7 +798,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; }; - // If this instruction is inside a Cycle and sinking this instruction can make + // If this instruction is inside a loop and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. @@ -823,15 +826,14 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; } else { MachineInstr *DefMI = MRI->getVRegDef(Reg); - MachineCycle *Cycle = CI->getCycle(DefMI->getParent()); - // DefMI is defined outside of cycle. There should be no live range - // impact for this operand. Defination outside of cycle means: - // 1: defination is outside of cycle. - // 2: defination is in this cycle, but it is a PHI in the cycle header. - if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() && - Cycle->getHeader() == DefMI->getParent())) + // DefMI is defined outside of loop. There should be no live range + // impact for this operand. Defination outside of loop means: + // 1: defination is outside of loop. + // 2: defination is in this loop, but it is a PHI in the loop header. + if (LI->getLoopFor(DefMI->getParent()) != ML || + (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) continue; - // The DefMI is defined inside the cycle. + // The DefMI is defined inside the loop. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { @@ -841,8 +843,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, } } - // If MI is in cycle and all its operands are alive across the whole cycle or - // if no operand sinking make register pressure set exceed limit, it is + // If MI is in loop and all its operands are alive across the whole loop or if + // no operand sinking make register pressure set exceed limit, it is // profitable to sink MI. return true; } @@ -874,14 +876,14 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccs.push_back(DTChild->getBlock()); } - // Sort Successors according to their cycle depth or block frequency info. + // Sort Successors according to their loop depth or block frequency info. llvm::stable_sort( AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; return HasBlockFreq ? LHSFreq < RHSFreq - : CI->getCycleDepth(L) < CI->getCycleDepth(R); + : LI->getLoopDepth(L) < LI->getLoopDepth(R); }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); @@ -896,7 +898,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) { assert (MBB && "Invalid MachineBasicBlock!"); - // loop over all the operands of the specified instruction. If there is + // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. // SuccToSinkTo - This is the successor to sink this instruction to, once we @@ -943,7 +945,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, // Otherwise, we should look at all the successors and decide which one // we should sink to. If we have reliable block frequency information // (frequency != 0) available, give successors with smaller frequencies - // higher priority, otherwise prioritize smaller cycle depths. + // higher priority, otherwise prioritize smaller loop depths. for (MachineBasicBlock *SuccBlock : GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { bool LocalUse = false; @@ -966,7 +968,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, } // It is not possible to sink an instruction into its own block. This can - // happen with cycles. + // happen with loops. if (MBB == SuccToSinkTo) return nullptr; @@ -1220,70 +1222,68 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } -/// Sink instructions into cycles if profitable. This especially tries to -/// prevent register spills caused by register pressure if there is little to no -/// overhead moving instructions into cycles. -bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { - LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); - MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); - assert(Preheader && "Cycle sink needs a preheader block"); +/// Sink instructions into loops if profitable. This especially tries to prevent +/// register spills caused by register pressure if there is little to no +/// overhead moving instructions into loops. +bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { + LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + assert(Preheader && "Loop sink needs a preheader block"); MachineBasicBlock *SinkBlock = nullptr; bool CanSink = true; const MachineOperand &MO = I.getOperand(0); for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); - if (!Cycle->contains(MI.getParent())) { - LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); + if (!L->contains(&MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); CanSink = false; break; } // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every cycle + // the instruction (and thus possibly executing it on every loop // iteration) is more expensive than a register. // For now assumes that copies are cheap and thus almost always worth it. if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); CanSink = false; break; } if (!SinkBlock) { SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " + LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " << printMBBReference(*SinkBlock) << "\n"); continue; } SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); CanSink = false; break; } - LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " << + LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << printMBBReference(*SinkBlock) << "\n"); } if (!CanSink) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); return false; } if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); return false; } if (SinkBlock == Preheader) { - LLVM_DEBUG( - dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); return false; } if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) { - LLVM_DEBUG( - dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n"); return false; } - LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n"); + LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, I); @@ -1407,11 +1407,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, TryBreak = true; } - // Don't sink instructions into a cycle. - if (!TryBreak && CI->getCycle(SuccToSinkTo) && - (!CI->getCycle(SuccToSinkTo)->isReducible() || - CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) { - LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n"); + // Don't sink instructions into a loop. + if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { + LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); TryBreak = true; } diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 2f205f9ab37356..0f532f4fe3240d 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -132,7 +132,6 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir index 0a886925ffab49..bdb99f296fcabe 100644 --- a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir +++ b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir @@ -1,10 +1,10 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \ -# RUN: -machine-sink-cycle-limit=1 -verify-machineinstrs %s -o - 2>&1 | \ +# RUN: -machine-sink-loop-limit=1 -verify-machineinstrs %s -o - 2>&1 | \ # RUN: FileCheck %s --check-prefix=SINK1 # # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \ -# RUN: -machine-sink-cycle-limit=2 -verify-machineinstrs %s -o - 2>&1 | \ +# RUN: -machine-sink-loop-limit=2 -verify-machineinstrs %s -o - 2>&1 | \ # RUN: FileCheck %s --check-prefix=SINK2 --- | diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 684774b18308e0..e23bea234be9fa 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -296,7 +296,6 @@ ; GCN-O1-NEXT: Machine Block Frequency Analysis ; GCN-O1-NEXT: Machine Common Subexpression Elimination ; GCN-O1-NEXT: MachinePostDominator Tree Construction -; GCN-O1-NEXT: Machine Cycle Info Analysis ; GCN-O1-NEXT: Machine code sinking ; GCN-O1-NEXT: Peephole Optimizations ; GCN-O1-NEXT: Remove dead machine instructions @@ -575,7 +574,6 @@ ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Machine Common Subexpression Elimination ; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction -; GCN-O1-OPTS-NEXT: Machine Cycle Info Analysis ; GCN-O1-OPTS-NEXT: Machine code sinking ; GCN-O1-OPTS-NEXT: Peephole Optimizations ; GCN-O1-OPTS-NEXT: Remove dead machine instructions @@ -863,7 +861,6 @@ ; GCN-O2-NEXT: Machine Block Frequency Analysis ; GCN-O2-NEXT: Machine Common Subexpression Elimination ; GCN-O2-NEXT: MachinePostDominator Tree Construction -; GCN-O2-NEXT: Machine Cycle Info Analysis ; GCN-O2-NEXT: Machine code sinking ; GCN-O2-NEXT: Peephole Optimizations ; GCN-O2-NEXT: Remove dead machine instructions @@ -1164,7 +1161,6 @@ ; GCN-O3-NEXT: Machine Block Frequency Analysis ; GCN-O3-NEXT: Machine Common Subexpression Elimination ; GCN-O3-NEXT: MachinePostDominator Tree Construction -; GCN-O3-NEXT: Machine Cycle Info Analysis ; GCN-O3-NEXT: Machine code sinking ; GCN-O3-NEXT: Peephole Optimizations ; GCN-O3-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 098422a4a770d5..d2507bf6365b6d 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -91,7 +91,6 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index eff5184e9c84c8..d67fed77d35699 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -111,7 +111,6 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 21b8b8f7842b18..89b14bef9bce3e 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -90,7 +90,6 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 8552ebc348cbb9..e83266f72e488f 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -107,7 +107,6 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index d805dcad8b6e64..b526e4f471b1a2 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -32,13 +32,14 @@ define dso_local void @fn() { ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_16: # %for.inc +; CHECK-NEXT: .LBB0_15: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_20 Depth 2 +; CHECK-NEXT: # Child Loop BB0_19 Depth 2 ; CHECK-NEXT: cmpb $8, %dl ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: ja .LBB0_3 @@ -55,7 +56,7 @@ define dso_local void @fn() { ; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h ; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: jg .LBB0_8 +; CHECK-NEXT: jg .LBB0_9 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi @@ -64,12 +65,10 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload ; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: jne .LBB0_16 +; CHECK-NEXT: jne .LBB0_15 ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %if.then @@ -78,82 +77,82 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_9: # %if.end21 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb %dl, %dh ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_7 -; CHECK-NEXT: .LBB0_11: # %af +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: movl $0, %edi +; CHECK-NEXT: movb %cl, %dl +; CHECK-NEXT: je .LBB0_19 +; CHECK-NEXT: # %bb.7: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_12 -; CHECK-NEXT: .LBB0_17: # %if.end39 +; CHECK-NEXT: jne .LBB0_8 +; CHECK-NEXT: .LBB0_16: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: # %bb.18: # %if.then41 +; CHECK-NEXT: je .LBB0_18 +; CHECK-NEXT: # %bb.17: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_19: # %for.end46 +; CHECK-NEXT: .LBB0_18: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_20 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_8: # %if.end21 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movb %dl, %dh -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_20: # %for.cond47 +; CHECK-NEXT: .LBB0_19: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: # %bb.21: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: # %bb.20: # %for.cond47 +; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: .LBB0_9: # %ae +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: .LBB0_10: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: # %bb.12: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: # %bb.14: # %if.end26 +; CHECK-NEXT: je .LBB0_15 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_16 -; CHECK-NEXT: # %bb.15: # %if.then31 +; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: # %bb.14: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_16 +; CHECK-NEXT: jmp .LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_17 -; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: jmp .LBB0_6 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll index 981a60d09545f7..e28169217c01e7 100644 --- a/llvm/test/CodeGen/X86/switch-phi-const.ll +++ b/llvm/test/CodeGen/X86/switch-phi-const.ll @@ -93,12 +93,12 @@ define void @switch_trunc_phi_const(i32 %x) { ; CHECK: # %bb.0: # %bb0 ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: movzbl %dil, %ecx +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: movl $3895, %edx # imm = 0xF37 ; CHECK-NEXT: decl %ecx ; CHECK-NEXT: cmpl $54, %ecx ; CHECK-NEXT: ja .LBB1_8 ; CHECK-NEXT: # %bb.1: # %bb0 -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: movl $3895, %edx # imm = 0xF37 ; CHECK-NEXT: jmpq *.LJTI1_0(,%rcx,8) ; CHECK-NEXT: .LBB1_8: # %default ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index 0f8bb837f82a51..b44895293b4117 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1377,6 +1377,8 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: pushq %rbx ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1386,24 +1388,20 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_3 -; ENABLE-NEXT: ## %bb.4: ## %for.body4.i -; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; ENABLE-NEXT: movl (%rax), %edi +; ENABLE-NEXT: je LBB16_4 +; ENABLE-NEXT: ## %bb.3: ## %for.body4.i ; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something -; ENABLE-NEXT: jmp LBB16_5 -; ENABLE-NEXT: LBB16_3: -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: .p2align 4, 0x90 -; ENABLE-NEXT: LBB16_5: ## %for.inc +; ENABLE-NEXT: LBB16_4: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: incl %ebx ; ENABLE-NEXT: cmpl $7, %ebx -; ENABLE-NEXT: jl LBB16_5 -; ENABLE-NEXT: ## %bb.6: ## %fn1.exit +; ENABLE-NEXT: jl LBB16_4 +; ENABLE-NEXT: ## %bb.5: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: addq $8, %rsp ; ENABLE-NEXT: popq %rbx @@ -1420,6 +1418,8 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: pushq %rbx ; DISABLE-NEXT: pushq %rax ; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1429,24 +1429,20 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_3 -; DISABLE-NEXT: ## %bb.4: ## %for.body4.i -; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; DISABLE-NEXT: movl (%rax), %edi +; DISABLE-NEXT: je LBB16_4 +; DISABLE-NEXT: ## %bb.3: ## %for.body4.i ; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something -; DISABLE-NEXT: jmp LBB16_5 -; DISABLE-NEXT: LBB16_3: -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: .p2align 4, 0x90 -; DISABLE-NEXT: LBB16_5: ## %for.inc +; DISABLE-NEXT: LBB16_4: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: incl %ebx ; DISABLE-NEXT: cmpl $7, %ebx -; DISABLE-NEXT: jl LBB16_5 -; DISABLE-NEXT: ## %bb.6: ## %fn1.exit +; DISABLE-NEXT: jl LBB16_4 +; DISABLE-NEXT: ## %bb.5: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: addq $8, %rsp ; DISABLE-NEXT: popq %rbx From b06049bc3b79be4a037a60dec4ad19a68e196a8f Mon Sep 17 00:00:00 2001 From: Konstantin Varlamov Date: Tue, 24 May 2022 19:56:02 -0700 Subject: [PATCH 427/908] [libc++][NFC] Add more tests to `move_{iterator,sentinel}`. More test coverage for the parts added by the One Ranges Proposal. Differential Revision: https://reviews.llvm.org/D124906 --- libcxx/include/__iterator/move_iterator.h | 15 +- libcxx/include/__iterator/move_sentinel.h | 2 +- libcxx/include/iterator | 60 +++++-- .../move.iter.nonmember/iter_move.pass.cpp | 89 ++++++++++ .../move.iter.nonmember/iter_swap.pass.cpp | 90 ++++++++++ .../move.iter.op.-/sentinel.pass.cpp | 99 +++++++++++ .../move.iter.op.const/default.pass.cpp | 33 ++-- .../move.iter.op.conv/base.pass.cpp | 117 +++++++++++++ .../tested_elsewhere.pass.cpp | 13 -- .../move.iter.op.incr/post.pass.cpp | 54 ++++-- .../difference_type.pass.cpp | 20 ++- .../move.iter.op.star/op_star.pass.cpp | 41 +++-- .../move.iterator/types.pass.cpp | 161 +++++++++++------- .../move.sentinel/assign.converting.pass.cpp | 7 +- .../move.sentinel/base.pass.cpp | 3 + .../concept_conformance.compile.pass.cpp | 15 ++ .../move.sentinel/ctor.converting.pass.cpp | 5 + .../move.sentinel/ctor.default.pass.cpp | 7 + .../move.sentinel/ctor.sentinel.pass.cpp | 5 + .../move.sentinel/op_eq.pass.cpp | 42 ++--- .../reverse.iter.nonmember/iter_move.pass.cpp | 45 +---- .../reverse.iter.nonmember/iter_swap.pass.cpp | 52 +----- .../reverse.iterators/types.compile.pass.cpp | 2 + .../range.lazy.split.inner/iter_move.pass.cpp | 34 ++-- .../range.lazy.split.inner/iter_swap.pass.cpp | 42 +++-- .../ranges_uninitialized_move.pass.cpp | 47 +---- .../ranges_uninitialized_move_n.pass.cpp | 49 +----- libcxx/test/support/test_iterators.h | 71 ++++++++ 28 files changed, 856 insertions(+), 364 deletions(-) create mode 100644 libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp create mode 100644 libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp create mode 100644 libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.-/sentinel.pass.cpp create mode 100644 libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.conv/base.pass.cpp delete mode 100644 libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.conv/tested_elsewhere.pass.cpp diff --git a/libcxx/include/__iterator/move_iterator.h b/libcxx/include/__iterator/move_iterator.h index 5beb8319d1728c..e0c7e73a9cc3b3 100644 --- a/libcxx/include/__iterator/move_iterator.h +++ b/libcxx/include/__iterator/move_iterator.h @@ -87,10 +87,13 @@ class _LIBCPP_TEMPLATE_VIS move_iterator >::type reference; #endif // _LIBCPP_STD_VER > 17 -#if _LIBCPP_STD_VER > 17 - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14 explicit move_iterator(_Iter __i) : __current_(std::move(__i)) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator& operator++() { ++__current_; return *this; } + +#if _LIBCPP_STD_VER > 17 _LIBCPP_HIDE_FROM_ABI constexpr move_iterator() requires is_constructible_v<_Iter> : __current_() {} @@ -117,9 +120,6 @@ class _LIBCPP_TEMPLATE_VIS move_iterator _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](difference_type __n) const { return ranges::iter_move(__current_ + __n); } - _LIBCPP_HIDE_FROM_ABI constexpr - move_iterator& operator++() { ++__current_; return *this; } - _LIBCPP_HIDE_FROM_ABI constexpr auto operator++(int) requires forward_iterator<_Iter> @@ -130,9 +130,6 @@ class _LIBCPP_TEMPLATE_VIS move_iterator _LIBCPP_HIDE_FROM_ABI constexpr void operator++(int) { ++__current_; } #else - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14 - explicit move_iterator(_Iter __i) : __current_(std::move(__i)) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14 move_iterator() : __current_() {} @@ -163,8 +160,6 @@ class _LIBCPP_TEMPLATE_VIS move_iterator _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14 reference operator[](difference_type __n) const { return static_cast(__current_[__n]); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator& operator++() { ++__current_; return *this; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14 move_iterator operator++(int) { move_iterator __tmp(*this); ++__current_; return __tmp; } #endif // _LIBCPP_STD_VER > 17 diff --git a/libcxx/include/__iterator/move_sentinel.h b/libcxx/include/__iterator/move_sentinel.h index abacc91be84240..5adf877b3490e8 100644 --- a/libcxx/include/__iterator/move_sentinel.h +++ b/libcxx/include/__iterator/move_sentinel.h @@ -31,7 +31,7 @@ class _LIBCPP_TEMPLATE_VIS move_sentinel move_sentinel() = default; _LIBCPP_HIDE_FROM_ABI constexpr - explicit move_sentinel(_Sent __s) : __last_(_VSTD::move(__s)) {} + explicit move_sentinel(_Sent __s) : __last_(std::move(__s)) {} template requires convertible_to diff --git a/libcxx/include/iterator b/libcxx/include/iterator index 1f0390e83a59f1..e98372527cc026 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -386,12 +386,13 @@ constexpr insert_iterator inserter(Container& x, ranges::iterator_t class move_iterator { public: - typedef Iterator iterator_type; - typedef typename iterator_traits::difference_type difference_type; - typedef Iterator pointer; - typedef typename iterator_traits::value_type value_type; - typedef typename iterator_traits::iterator_category iterator_category; - typedef value_type&& reference; + using iterator_type = Iterator; + using iterator_concept = input_iterator_tag; // From C++20 + using iterator_category = see below; // not always present starting from C++20 + using value_type = iter_value_t; // Until C++20, iterator_traits::value_type + using difference_type = iter_difference_t; // Until C++20, iterator_traits::difference_type; + using pointer = Iterator; + using reference = iter_rvalue_reference_t; // Until C++20, value_type&& constexpr move_iterator(); // all the constexprs are in C++17 constexpr explicit move_iterator(Iterator i); @@ -399,18 +400,40 @@ public: constexpr move_iterator(const move_iterator& u); template constexpr move_iterator& operator=(const move_iterator& u); - constexpr iterator_type base() const; + + constexpr iterator_type base() const; // Until C++20 + constexpr const Iterator& base() const & noexcept; // From C++20 + constexpr Iterator base() &&; // From C++20 + constexpr reference operator*() const; - constexpr pointer operator->() const; + constexpr pointer operator->() const; // Removed in C++20 constexpr move_iterator& operator++(); - constexpr move_iterator operator++(int); + constexpr auto operator++(int); // Return type was move_iterator until C++20 constexpr move_iterator& operator--(); constexpr move_iterator operator--(int); constexpr move_iterator operator+(difference_type n) const; constexpr move_iterator& operator+=(difference_type n); constexpr move_iterator operator-(difference_type n) const; constexpr move_iterator& operator-=(difference_type n); - constexpr unspecified operator[](difference_type n) const; + constexpr reference operator[](difference_type n) const; // Return type unspecified until C++20 + + template S> + friend constexpr bool + operator==(const move_iterator& x, const move_sentinel& y); // Since C++20 + template S> + friend constexpr iter_difference_t + operator-(const move_sentinel& x, const move_iterator& y); // Since C++20 + template S> + friend constexpr iter_difference_t + operator-(const move_iterator& x, const move_sentinel& y); // Since C++20 + friend constexpr iter_rvalue_reference_t + iter_move(const move_iterator& i) + noexcept(noexcept(ranges::iter_move(i.current))); // Since C++20 + template Iterator2> + friend constexpr void + iter_swap(const move_iterator& x, const move_iterator& y) + noexcept(noexcept(ranges::iter_swap(x.current, y.current))); // Since C++20 + private: Iterator current; // exposition only }; @@ -452,6 +475,23 @@ constexpr move_iterator operator+( // constexpr in C++17 template // constexpr in C++17 constexpr move_iterator make_move_iterator(const Iterator& i); +template +class move_sentinel { +public: + constexpr move_sentinel(); + constexpr explicit move_sentinel(S s); + template + requires convertible_to + constexpr move_sentinel(const move_sentinel& s); + template + requires assignable_from + constexpr move_sentinel& operator=(const move_sentinel& s); + + constexpr S base() const; +private: + S last; // exposition only +}; + // [default.sentinel], default sentinel struct default_sentinel_t; inline constexpr default_sentinel_t default_sentinel{}; diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp new file mode 100644 index 00000000000000..a9e6d17720ab3c --- /dev/null +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp @@ -0,0 +1,89 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// +// +// friend constexpr iter_rvalue_reference_t +// iter_move(const move_iterator& i) +// noexcept(noexcept(ranges::iter_move(i.current))); // Since C++20 + +#include + +#include +#include +#include +#include "test_iterators.h" +#include "test_macros.h" + +int global; + +template +struct MaybeNoexceptMove { + int x; + using value_type = int; + using difference_type = ptrdiff_t; + + constexpr friend value_type&& iter_move(MaybeNoexceptMove) noexcept(IsNoexcept) { + return std::move(global); + } + + int& operator*() const { static int a; return a; } + + MaybeNoexceptMove& operator++(); + MaybeNoexceptMove operator++(int); +}; +using ThrowingBase = MaybeNoexceptMove; +using NoexceptBase = MaybeNoexceptMove; +static_assert(std::input_iterator); +ASSERT_NOT_NOEXCEPT(std::ranges::iter_move(std::declval())); +ASSERT_NOEXCEPT(std::ranges::iter_move(std::declval())); + +constexpr bool test() { + // Can use `iter_move` with a regular array. + { + int a[] = {0, 1, 2}; + + std::move_iterator i(a); + static_assert(std::same_as); + assert(iter_move(i) == 0); + + ++i; + assert(iter_move(i) == 1); + } + + // Check that the `iter_move` customization point is being used. + { + int a[] = {0, 1, 2}; + + int iter_move_invocations = 0; + adl::Iterator base = adl::Iterator::TrackMoves(a, iter_move_invocations); + std::move_iterator i(base); + int x = iter_move(i); + assert(x == 0); + assert(iter_move_invocations == 1); + } + + // Check the `noexcept` specification. + { + using ThrowingIter = std::move_iterator; + ASSERT_NOT_NOEXCEPT(iter_move(std::declval())); + using NoexceptIter = std::move_iterator; + ASSERT_NOEXCEPT(iter_move(std::declval())); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp new file mode 100644 index 00000000000000..075930dcb0a073 --- /dev/null +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp @@ -0,0 +1,90 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// +// +// template Iterator2> +// friend constexpr void +// iter_swap(const move_iterator& x, const move_iterator& y) +// noexcept(noexcept(ranges::iter_swap(x.current, y.current))); // Since C++20 + +#include + +#include +#include +#include +#include "test_iterators.h" +#include "test_macros.h" + +template +struct MaybeNoexceptSwap { + using value_type = int; + using difference_type = ptrdiff_t; + + constexpr friend void iter_swap(MaybeNoexceptSwap, MaybeNoexceptSwap) noexcept(IsNoexcept) { + } + + int& operator*() const { static int x; return x; } + + MaybeNoexceptSwap& operator++(); + MaybeNoexceptSwap operator++(int); +}; +using ThrowingBase = MaybeNoexceptSwap; +using NoexceptBase = MaybeNoexceptSwap; +static_assert(std::input_iterator); +ASSERT_NOT_NOEXCEPT(std::ranges::iter_swap(std::declval(), std::declval())); +ASSERT_NOEXCEPT(std::ranges::iter_swap(std::declval(), std::declval())); + +constexpr bool test() { + // Can use `iter_swap` with a regular array. + { + int a[] = {0, 1, 2}; + + std::move_iterator b(a); + std::move_iterator e(a + 2); + assert(a[0] == 0); + assert(a[2] == 2); + + static_assert(std::same_as); + iter_swap(b, e); + assert(a[0] == 2); + assert(a[2] == 0); + } + + // Check that the `iter_swap` customization point is being used. + { + int iter_swap_invocations = 0; + adl::Iterator base1 = adl::Iterator::TrackSwaps(iter_swap_invocations); + adl::Iterator base2 = adl::Iterator::TrackSwaps(iter_swap_invocations); + std::move_iterator i1(base1), i2(base2); + iter_swap(i1, i2); + assert(iter_swap_invocations == 1); + + iter_swap(i2, i1); + assert(iter_swap_invocations == 2); + } + + // Check the `noexcept` specification. + { + using ThrowingIter = std::move_iterator; + ASSERT_NOT_NOEXCEPT(iter_swap(std::declval(), std::declval())); + using NoexceptIter = std::move_iterator; + ASSERT_NOEXCEPT(iter_swap(std::declval(), std::declval())); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.-/sentinel.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.-/sentinel.pass.cpp new file mode 100644 index 00000000000000..d84a38c8c6e0b0 --- /dev/null +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.-/sentinel.pass.cpp @@ -0,0 +1,99 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// + +// move_iterator + +// template S> +// friend constexpr iter_difference_t +// operator-(const move_sentinel& x, const move_iterator& y); // Since C++20 +// template S> +// friend constexpr iter_difference_t +// operator-(const move_iterator& x, const move_sentinel& y); // Since C++20 + +#include +#include + +#include "test_macros.h" +#include "test_iterators.h" + +// The `operator-` calls the underlying iterator and sentinel's `operator-`. + +struct CustomIt { + using value_type = int; + using difference_type = int; + using reference = int&; + using pointer = int*; + using iterator_category = std::input_iterator_tag; + + CustomIt() = default; + TEST_CONSTEXPR_CXX17 explicit CustomIt(int* p) : p_(p) {} + int& operator*() const; + CustomIt& operator++(); + CustomIt operator++(int); + constexpr friend difference_type operator-(const CustomIt& a, const CustomIt& b) { return a.p_ - b.p_; } + int *p_ = nullptr; +}; + +template > +constexpr void test_one() { + int arr[] = {3, 1, 4}; + + const std::move_iterator it_a{It(arr)}; + const std::move_iterator it_b{It(arr + 1)}; + + const std::move_sentinel sent_a{Sent(It(arr))}; + const std::move_sentinel sent_b{Sent(It(arr + 1))}; + const std::move_sentinel sent_c{Sent(It(arr + 2))}; + + ASSERT_SAME_TYPE(decltype(it_a - sent_a), std::iter_difference_t); + ASSERT_SAME_TYPE(decltype(sent_a - it_a), std::iter_difference_t); + + // it_a + assert(it_a - sent_a == 0); + assert(sent_a - it_a == 0); + + assert(it_a - sent_b == -1); + assert(sent_b - it_a == 1); + + assert(it_a - sent_c == -2); + assert(sent_c - it_a == 2); + + // it_b + assert(it_b - sent_a == 1); + assert(sent_a - it_b == -1); + + assert(it_b - sent_b == 0); + assert(sent_b - it_b == 0); + + assert(it_b - sent_c == -1); + assert(sent_c - it_b == 1); +} + +constexpr bool test() { + test_one(); + test_one>(); + test_one>(); + test_one>(); + test_one>(); + test_one(); + test_one(); + test_one>(); + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.const/default.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.const/default.pass.cpp index 04d3d6e1866a1a..d095182c560d83 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.const/default.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.const/default.pass.cpp @@ -13,33 +13,42 @@ // move_iterator(); // // constexpr in C++17 +// +// requires the underlying iterator to be default-constructible (extension). #include +#include #include "test_macros.h" #include "test_iterators.h" +#if TEST_STD_VER > 17 +struct NoDefaultCtr : forward_iterator { + NoDefaultCtr() = delete; +}; + +LIBCPP_STATIC_ASSERT( std::is_default_constructible_v>>); +LIBCPP_STATIC_ASSERT(!std::is_default_constructible_v>); +#endif + template -void -test() -{ +void test() { std::move_iterator r; (void)r; } -int main(int, char**) -{ - // we don't have a test iterator that is both input and default-constructible, so not testing that case - test >(); - test >(); - test >(); - test(); +int main(int, char**) { + // we don't have a test iterator that is both input and default-constructible, so not testing that case + test >(); + test >(); + test >(); + test(); #if TEST_STD_VER > 14 - { + { constexpr std::move_iterator it; (void)it; - } + } #endif return 0; diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.conv/base.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.conv/base.pass.cpp new file mode 100644 index 00000000000000..6e348b59be16bd --- /dev/null +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.conv/base.pass.cpp @@ -0,0 +1,117 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// +// +// constexpr iterator_type base() const; // Until C++20 +// constexpr const Iterator& base() const & noexcept; // From C++20 +// constexpr Iterator base() &&; // From C++20 + +#include + +#include +#include "test_iterators.h" +#include "test_macros.h" + +struct MoveOnlyIterator { + using It = int*; + + It it_; + + using iterator_category = std::input_iterator_tag; + using value_type = int; + using difference_type = std::ptrdiff_t; + using reference = int&; + + TEST_CONSTEXPR explicit MoveOnlyIterator(It it) : it_(it) {} + MoveOnlyIterator(MoveOnlyIterator&&) = default; + MoveOnlyIterator& operator=(MoveOnlyIterator&&) = default; + MoveOnlyIterator(const MoveOnlyIterator&) = delete; + MoveOnlyIterator& operator=(const MoveOnlyIterator&) = delete; + + TEST_CONSTEXPR reference operator*() const { return *it_; } + + TEST_CONSTEXPR_CXX14 MoveOnlyIterator& operator++() { ++it_; return *this; } + TEST_CONSTEXPR_CXX14 MoveOnlyIterator operator++(int) { return MoveOnlyIterator(it_++); } + + friend TEST_CONSTEXPR bool operator==(const MoveOnlyIterator& x, const MoveOnlyIterator& y) {return x.it_ == y.it_;} + friend TEST_CONSTEXPR bool operator!=(const MoveOnlyIterator& x, const MoveOnlyIterator& y) {return x.it_ != y.it_;} + + friend TEST_CONSTEXPR It base(const MoveOnlyIterator& i) { return i.it_; } +}; + +#if TEST_STD_VER > 17 +static_assert( std::input_iterator); +#endif +static_assert(!std::is_copy_constructible::value, ""); + +template +TEST_CONSTEXPR_CXX14 void test_one() { + // Non-const lvalue. + { + int a[] = {1, 2, 3}; + + auto i = std::move_iterator(It(a)); +#if TEST_STD_VER > 17 + ASSERT_SAME_TYPE(decltype(i.base()), const It&); + ASSERT_NOEXCEPT(i.base()); +#else + ASSERT_SAME_TYPE(decltype(i.base()), It); +#endif + assert(i.base() == It(a)); + + ++i; + assert(i.base() == It(a + 1)); + } + + // Const lvalue. + { + int a[] = {1, 2, 3}; + + const auto i = std::move_iterator(It(a)); +#if TEST_STD_VER > 17 + ASSERT_SAME_TYPE(decltype(i.base()), const It&); + ASSERT_NOEXCEPT(i.base()); +#else + ASSERT_SAME_TYPE(decltype(i.base()), It); +#endif + assert(i.base() == It(a)); + } + + // Rvalue. + { + int a[] = {1, 2, 3}; + + auto i = std::move_iterator(It(a)); + ASSERT_SAME_TYPE(decltype(std::move(i).base()), It); + assert(std::move(i).base() == It(a)); + } +} + +TEST_CONSTEXPR_CXX14 bool test() { + test_one >(); + test_one >(); + test_one >(); + test_one >(); + test_one(); + test_one(); +#if TEST_STD_VER > 17 + test_one>(); +#endif + + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER > 14 + static_assert(test()); +#endif + + return 0; +} diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.conv/tested_elsewhere.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.conv/tested_elsewhere.pass.cpp deleted file mode 100644 index 1f764da05d6b5f..00000000000000 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.conv/tested_elsewhere.pass.cpp +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -int main(int, char**) -{ - - return 0; -} diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.incr/post.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.incr/post.pass.cpp index a33bb08fa0979a..e2b831b717b906 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.incr/post.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.incr/post.pass.cpp @@ -10,9 +10,7 @@ // move_iterator -// move_iterator operator++(int); -// -// constexpr in C++17 +// constexpr auto operator++(int); // Return type was move_iterator until C++20 #include #include @@ -31,30 +29,27 @@ void test_single_pass(It i, It x) { #endif template -void -test(It i, It x) -{ +void test(It i, It x) { std::move_iterator r(i); std::move_iterator rr = r++; assert(r.base() == x); assert(rr.base() == i); } -int main(int, char**) -{ - char s[] = "123"; +int main(int, char**) { + char s[] = "123"; #if TEST_STD_VER > 17 - test_single_pass(cpp17_input_iterator(s), cpp17_input_iterator(s + 1)); + test_single_pass(cpp17_input_iterator(s), cpp17_input_iterator(s + 1)); #else - test(cpp17_input_iterator(s), cpp17_input_iterator(s+1)); + test(cpp17_input_iterator(s), cpp17_input_iterator(s+1)); #endif - test(forward_iterator(s), forward_iterator(s+1)); - test(bidirectional_iterator(s), bidirectional_iterator(s+1)); - test(random_access_iterator(s), random_access_iterator(s+1)); - test(s, s+1); + test(forward_iterator(s), forward_iterator(s+1)); + test(bidirectional_iterator(s), bidirectional_iterator(s+1)); + test(random_access_iterator(s), random_access_iterator(s+1)); + test(s, s+1); #if TEST_STD_VER > 14 - { + { constexpr const char *p = "123456789"; typedef std::move_iterator MI; constexpr MI it1 = std::make_move_iterator(p); @@ -63,7 +58,32 @@ int main(int, char**) constexpr MI it3 = std::make_move_iterator(p) ++; static_assert(it1 == it3, ""); static_assert(it2 != it3, ""); - } + } +#endif + +#if TEST_STD_VER > 17 + // Forward iterators return a copy. + { + int a[] = {1, 2, 3}; + using MoveIter = std::move_iterator>; + + MoveIter i = MoveIter(forward_iterator(a)); + ASSERT_SAME_TYPE(decltype(i++), MoveIter); + auto j = i++; + assert(base(j.base()) == a); + assert(base(i.base()) == a + 1); + } + + // Non-forward iterators return void. + { + int a[] = {1, 2, 3}; + using MoveIter = std::move_iterator>; + + MoveIter i = MoveIter(cpp20_input_iterator(a)); + ASSERT_SAME_TYPE(decltype(i++), void); + i++; + assert(base(i.base()) == a + 1); + } #endif return 0; diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.index/difference_type.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.index/difference_type.pass.cpp index ecf3b61058b7f5..df199873ffaec8 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.index/difference_type.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.index/difference_type.pass.cpp @@ -10,10 +10,7 @@ // move_iterator -// requires RandomAccessIterator -// unspecified operator[](difference_type n) const; -// -// constexpr in C++17 +// constexpr reference operator[](difference_type n) const; // Return type unspecified until C++20 #include #include @@ -65,5 +62,20 @@ int main(int, char**) } #endif +#if TEST_STD_VER > 17 + // Ensure the `iter_move` customization point is being used. + { + int a[] = {0, 1, 2}; + + int iter_moves = 0; + adl::Iterator i = adl::Iterator::TrackMoves(a, iter_moves); + std::move_iterator mi(i); + + auto x = mi[0]; + assert(x == 0); + assert(iter_moves == 1); + } +#endif + return 0; } diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.star/op_star.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.star/op_star.pass.cpp index e6e826d835256e..22f601e6ca30fb 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.star/op_star.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.star/op_star.pass.cpp @@ -18,6 +18,7 @@ #include #include +#include "test_iterators.h" #include "test_macros.h" class A @@ -47,28 +48,42 @@ struct do_nothing }; -int main(int, char**) -{ - { - A a; - test(&a, A()); - } +int main(int, char**) { + { + A a; + test(&a, A()); + } #if TEST_STD_VER >= 11 - { - int i; - std::unique_ptr p(&i); - test(&p, std::unique_ptr(&i)); - } + { + int i; + std::unique_ptr p(&i); + test(&p, std::unique_ptr(&i)); + } #endif #if TEST_STD_VER > 14 - { + { constexpr const char *p = "123456789"; typedef std::move_iterator MI; constexpr MI it1 = std::make_move_iterator(p); constexpr MI it2 = std::make_move_iterator(p+1); static_assert(*it1 == p[0], ""); static_assert(*it2 == p[1], ""); - } + } +#endif + +#if TEST_STD_VER > 17 + // Ensure the `iter_move` customization point is being used. + { + int a[] = {0, 1, 2}; + + int iter_moves = 0; + adl::Iterator i = adl::Iterator::TrackMoves(a, iter_moves); + std::move_iterator mi(i); + + auto x = *mi; + assert(x == 0); + assert(iter_moves == 1); + } #endif return 0; diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/types.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/types.pass.cpp index 2d0dadfeec8f14..f262b4d29eebe4 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/types.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/types.pass.cpp @@ -15,11 +15,13 @@ // template // class move_iterator { // public: -// typedef Iter iterator_type; -// typedef Iter::difference_type difference_type; -// typedef Iter pointer; -// typedef Iter::value_type value_type; -// typedef value_type&& reference; +// using iterator_type = Iterator; +// using iterator_concept = input_iterator_tag; // From C++20 +// using iterator_category = see below; // not always present starting from C++20 +// using value_type = iter_value_t; // Until C++20, iterator_traits::value_type +// using difference_type = iter_difference_t; // Until C++20, iterator_traits::difference_type; +// using pointer = Iterator; +// using reference = iter_rvalue_reference_t; // Until C++20, value_type&& // }; #include @@ -28,6 +30,37 @@ #include "test_macros.h" #include "test_iterators.h" +struct FooIter { + using iterator_category = std::bidirectional_iterator_tag; + using value_type = void*; + using difference_type = void*; + using pointer = void*; + using reference = char&; + bool& operator*() const; +}; + +#if TEST_STD_VER > 17 +template <> +struct std::indirectly_readable_traits { + using value_type = int; +}; +template <> +struct std::incrementable_traits { + using difference_type = char; +}; +#endif + +#if TEST_STD_VER > 17 +// Not using `FooIter::value_type`. +static_assert(std::is_same_v::value_type, int>); +// Not using `FooIter::difference_type`. +static_assert(std::is_same_v::difference_type, char>); +static_assert(std::is_same_v::reference, bool&>); + +#else +static_assert(std::is_same::reference, char&>::value, ""); +#endif + template struct DummyIt { typedef std::forward_iterator_tag iterator_category; @@ -40,71 +73,79 @@ struct DummyIt { }; template -void -test() -{ - typedef std::move_iterator R; - typedef std::iterator_traits T; - static_assert((std::is_same::value), ""); - static_assert((std::is_same::value), ""); - static_assert((std::is_same::value), ""); - static_assert((std::is_same::value), ""); - static_assert((std::is_same::value), ""); +void test() { + typedef std::move_iterator R; + typedef std::iterator_traits T; + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + #if TEST_STD_VER > 17 - if constexpr (std::is_same_v) { - static_assert((std::is_same::value), ""); - } else { - static_assert((std::is_same::value), ""); - } + static_assert((std::is_same_v>), ""); #else + static_assert((std::is_same::value), ""); +#endif + +#if TEST_STD_VER > 17 + if constexpr (std::is_same_v) { + static_assert((std::is_same::value), ""); + } else { static_assert((std::is_same::value), ""); + } +#else + static_assert((std::is_same::value), ""); +#endif + +#if TEST_STD_VER > 17 + static_assert(std::is_same_v); #endif } -int main(int, char**) -{ - test >(); - test >(); - test >(); - test >(); - test(); +int main(int, char**) { + test >(); + test >(); + test >(); + test >(); + test(); + #if TEST_STD_VER >= 11 - { - typedef DummyIt T; - typedef std::move_iterator It; - static_assert(std::is_same::value, ""); - } - { - typedef DummyIt > T; - typedef std::move_iterator It; - static_assert(std::is_same >::value, ""); - } - { - // Check that move_iterator uses whatever reference type it's given - // when it's not a reference. - typedef DummyIt T; - typedef std::move_iterator It; - static_assert(std::is_same::value, ""); - } - { - typedef DummyIt T; - typedef std::move_iterator It; - static_assert(std::is_same::value, ""); - } - { - typedef DummyIt T; - typedef std::move_iterator It; - static_assert(std::is_same::value, ""); - } + { + typedef DummyIt T; + typedef std::move_iterator It; + static_assert(std::is_same::value, ""); + } + { + typedef DummyIt > T; + typedef std::move_iterator It; + static_assert(std::is_same >::value, ""); + } + { + // Check that move_iterator uses whatever reference type it's given + // when it's not a reference. + typedef DummyIt T; + typedef std::move_iterator It; + static_assert(std::is_same::value, ""); + } + { + typedef DummyIt T; + typedef std::move_iterator It; + static_assert(std::is_same::value, ""); + } + { + typedef DummyIt T; + typedef std::move_iterator It; + static_assert(std::is_same::value, ""); + } #endif #if TEST_STD_VER > 17 - test>(); - static_assert(std::is_same_v>::iterator_concept, std::input_iterator_tag>); - static_assert(std::is_same_v>::iterator_concept, std::input_iterator_tag>); - static_assert(std::is_same_v>::iterator_concept, std::input_iterator_tag>); - static_assert(std::is_same_v>::iterator_concept, std::input_iterator_tag>); - static_assert(std::is_same_v::iterator_concept, std::input_iterator_tag>); + test>(); + static_assert(std::is_same_v>::iterator_concept, std::input_iterator_tag>); + static_assert(std::is_same_v>::iterator_concept, std::input_iterator_tag>); + static_assert(std::is_same_v>::iterator_concept, std::input_iterator_tag>); + static_assert(std::is_same_v>::iterator_concept, std::input_iterator_tag>); + static_assert(std::is_same_v::iterator_concept, std::input_iterator_tag>); #endif return 0; diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/assign.converting.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/assign.converting.pass.cpp index 03ef6e2b085f1a..57f9ad70b7adaf 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/assign.converting.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/assign.converting.pass.cpp @@ -21,7 +21,7 @@ #include struct NonAssignable { - NonAssignable& operator=(int i); + NonAssignable& operator=(int i); }; static_assert(std::semiregular); static_assert(std::is_assignable_v); @@ -29,17 +29,22 @@ static_assert(!std::assignable_from); constexpr bool test() { + // Assigning from an lvalue. { std::move_sentinel m(42); std::move_sentinel m2; m2 = m; assert(m2.base() == 42L); } + + // Assigning from an rvalue. { std::move_sentinel m2; m2 = std::move_sentinel(43); assert(m2.base() == 43L); } + + // SFINAE checks. { static_assert( std::is_assignable_v, std::move_sentinel>); static_assert(!std::is_assignable_v, std::move_sentinel>); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/base.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/base.pass.cpp index bfb202ac36b692..b3b0293c3d48b4 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/base.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/base.pass.cpp @@ -22,6 +22,7 @@ constexpr bool test() { + // The sentinel type is a value. { auto m = std::move_sentinel(42); const auto& cm = m; @@ -34,6 +35,8 @@ constexpr bool test() ASSERT_SAME_TYPE(decltype(std::move(m).base()), int); ASSERT_SAME_TYPE(decltype(std::move(cm).base()), int); } + + // The sentinel type is a pointer. { int a[] = {1, 2, 3}; auto m = std::move_sentinel(a); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/concept_conformance.compile.pass.cpp index 88e0f77ea39fab..80dc4de7311664 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/concept_conformance.compile.pass.cpp @@ -19,6 +19,7 @@ void test() { + // Pointer. { using It = int*; static_assert( std::sentinel_for, std::move_iterator>); @@ -28,6 +29,8 @@ void test() static_assert( std::sentinel_for>, std::move_iterator>); static_assert( std::sized_sentinel_for>, std::move_iterator>); } + + // `Cpp17InputIterator`. { using It = cpp17_input_iterator; static_assert( std::sentinel_for>, std::move_iterator>); @@ -35,6 +38,8 @@ void test() static_assert( std::sentinel_for>, std::move_iterator>); static_assert( std::sized_sentinel_for>, std::move_iterator>); } + + // `std::input_iterator`. { using It = cpp20_input_iterator; static_assert( std::sentinel_for>, std::move_iterator>); @@ -42,6 +47,8 @@ void test() static_assert( std::sentinel_for>, std::move_iterator>); static_assert( std::sized_sentinel_for>, std::move_iterator>); } + + // `std::forward_iterator`. { using It = forward_iterator; static_assert( std::sentinel_for, std::move_iterator>); @@ -51,6 +58,8 @@ void test() static_assert( std::sentinel_for>, std::move_iterator>); static_assert( std::sized_sentinel_for>, std::move_iterator>); } + + // `std::bidirectional_iterator`. { using It = bidirectional_iterator; static_assert( std::sentinel_for, std::move_iterator>); @@ -60,6 +69,8 @@ void test() static_assert( std::sentinel_for>, std::move_iterator>); static_assert( std::sized_sentinel_for>, std::move_iterator>); } + + // `std::random_access_iterator`. { using It = random_access_iterator; static_assert( std::sentinel_for, std::move_iterator>); @@ -69,6 +80,8 @@ void test() static_assert( std::sentinel_for>, std::move_iterator>); static_assert( std::sized_sentinel_for>, std::move_iterator>); } + + // `std::contiguous_iterator`. { using It = contiguous_iterator; static_assert( std::sentinel_for, std::move_iterator>); @@ -78,6 +91,8 @@ void test() static_assert( std::sentinel_for>, std::move_iterator>); static_assert( std::sized_sentinel_for>, std::move_iterator>); } + + // `std::contiguous_iterator` with the spaceship operator. { using It = three_way_contiguous_iterator; static_assert( std::sentinel_for, std::move_iterator>); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.converting.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.converting.pass.cpp index b77d4836cd4e53..5f5be0c7f1591e 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.converting.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.converting.pass.cpp @@ -31,15 +31,20 @@ static_assert(!std::convertible_to); constexpr bool test() { + // Constructing from an lvalue. { std::move_sentinel m(42); std::move_sentinel m2 = m; assert(m2.base() == 42L); } + + // Constructing from an rvalue. { std::move_sentinel m2 = std::move_sentinel(43); assert(m2.base() == 43L); } + + // SFINAE checks. { static_assert( std::is_convertible_v, std::move_sentinel>); static_assert( std::is_convertible_v, std::move_sentinel>); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.default.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.default.pass.cpp index 655825de7d6f28..c446373eff2b66 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.default.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.default.pass.cpp @@ -19,14 +19,20 @@ constexpr bool test() { + + // The underlying sentinel is an integer. { std::move_sentinel m; assert(m.base() == 0); } + + // The underlying sentinel is a pointer. { std::move_sentinel m; assert(m.base() == nullptr); } + + // The underlying sentinel is a user-defined type with an explicit default constructor. { struct S { explicit S() = default; @@ -35,6 +41,7 @@ constexpr bool test() std::move_sentinel m; assert(m.base().i == 3); } + return true; } diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.sentinel.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.sentinel.pass.cpp index 226a24a2bee8ea..1fc2843f465aa6 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.sentinel.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/ctor.sentinel.pass.cpp @@ -19,17 +19,22 @@ constexpr bool test() { + // The underlying sentinel is an integer. { static_assert(!std::is_convertible_v>); std::move_sentinel m(42); assert(m.base() == 42); } + + // The underlying sentinel is a pointer. { static_assert(!std::is_convertible_v>); int i = 42; std::move_sentinel m(&i); assert(m.base() == &i); } + + // The underlying sentinel is a user-defined type with an explicit default constructor. { struct S { explicit S() = default; diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/op_eq.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/op_eq.pass.cpp index fef17c5d188bb7..9519eb2a9fad6c 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/op_eq.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.sentinel/op_eq.pass.cpp @@ -38,30 +38,25 @@ static_assert( HasNotEquals, std::move_sentinel, std::move_sentinel>); template -constexpr bool test_one() -{ - { - char s[] = "abc"; - const auto it = std::move_iterator(It(s)); - const auto sent1 = std::move_sentinel>(sentinel_wrapper(It(s))); - const auto sent2 = std::move_sentinel>(sentinel_wrapper(It(s + 1))); - ASSERT_SAME_TYPE(decltype(it == sent1), bool); - assert( (it == sent1)); - assert(!(it != sent1)); - assert(!(it == sent2)); - assert( (it != sent2)); - assert( (sent1 == it)); - assert(!(sent1 != it)); - assert(!(sent2 == it)); - assert( (sent2 != it)); - static_assert(!HasEquals); - static_assert(!HasLess); - } - return true; +constexpr void test_one() { + char s[] = "abc"; + const auto it = std::move_iterator(It(s)); + const auto sent1 = std::move_sentinel>(sentinel_wrapper(It(s))); + const auto sent2 = std::move_sentinel>(sentinel_wrapper(It(s + 1))); + ASSERT_SAME_TYPE(decltype(it == sent1), bool); + assert( (it == sent1)); + assert(!(it != sent1)); + assert(!(it == sent2)); + assert( (it != sent2)); + assert( (sent1 == it)); + assert(!(sent1 != it)); + assert(!(sent2 == it)); + assert( (sent2 != it)); + static_assert(!HasEquals); + static_assert(!HasLess); } -constexpr bool test() -{ +constexpr bool test() { test_one>(); test_one>(); test_one>(); @@ -75,8 +70,7 @@ constexpr bool test() return true; } -int main(int, char**) -{ +int main(int, char**) { test(); static_assert(test()); diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp index acf91784ef9b46..712425a0c44ff2 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp @@ -20,48 +20,9 @@ #include #include #include +#include "test_iterators.h" #include "test_macros.h" -namespace adl { - -struct Iterator { - using value_type = int; - using difference_type = ptrdiff_t; - - value_type* ptr_ = nullptr; - int* iter_move_invocations_ = nullptr; - - constexpr Iterator() = default; - constexpr explicit Iterator(int* p, int& iter_moves) : ptr_(p), iter_move_invocations_(&iter_moves) {} - - constexpr value_type& operator*() const { return *ptr_; } - - Iterator& operator++() { ++ptr_; return *this; } - Iterator operator++(int) { - Iterator prev = *this; - ++ptr_; - return prev; - } - - constexpr Iterator& operator--() { --ptr_; return *this; } - constexpr Iterator operator--(int) { - Iterator prev = *this; - --ptr_; - return prev; - } - - constexpr friend value_type&& iter_move(Iterator iter) { - if (iter.iter_move_invocations_) { - ++(*iter.iter_move_invocations_); - } - return std::move(*iter); - } - - friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.ptr_ == rhs.ptr_; } -}; - -} // namespace adl - constexpr bool test() { // Can use `iter_move` with a regular array. { @@ -76,13 +37,13 @@ constexpr bool test() { assert(iter_move(ri) == 1); } - // Ensure the `iter_move` customization point is being used. + // Check that the `iter_move` customization point is being used. { constexpr int N = 3; int a[N] = {0, 1, 2}; int iter_move_invocations = 0; - adl::Iterator i(a + N, iter_move_invocations); + adl::Iterator i = adl::Iterator::TrackMoves(a + N, iter_move_invocations); std::reverse_iterator ri(i); int x = iter_move(ri); assert(x == 2); diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp index 3ae27368007ab3..aaad0eb51e2f87 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp @@ -22,47 +22,9 @@ #include #include #include +#include "test_iterators.h" #include "test_macros.h" -namespace adl { - -struct Iterator { - using value_type = int; - using difference_type = ptrdiff_t; - - value_type* ptr_ = nullptr; - int* iter_swap_invocations_ = nullptr; - - constexpr Iterator() = default; - constexpr explicit Iterator(int& iter_swaps) : iter_swap_invocations_(&iter_swaps) {} - - value_type& operator*() const { return *ptr_; } - - Iterator& operator++() { ++ptr_; return *this; } - Iterator operator++(int) { - Iterator prev = *this; - ++ptr_; - return prev; - } - - Iterator& operator--() { --ptr_; return *this; } - Iterator operator--(int) { - Iterator prev = *this; - --ptr_; - return prev; - } - - constexpr friend void iter_swap(Iterator a, Iterator) { - if (a.iter_swap_invocations_) { - ++(*a.iter_swap_invocations_); - } - } - - friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.ptr_ == rhs.ptr_; } -}; - -} // namespace adl - constexpr bool test() { // Can use `iter_swap` with a regular array. { @@ -80,15 +42,17 @@ constexpr bool test() { assert(a[2] == 0); } - // Ensure the `iter_swap` customization point is being used. + // Check that the `iter_swap` customization point is being used. { int iter_swap_invocations = 0; - adl::Iterator i1(iter_swap_invocations), i2(iter_swap_invocations); - std::reverse_iterator ri1(i1), ri2(i2); - iter_swap(i1, i2); + int a[] = {0, 1, 2}; + adl::Iterator base1 = adl::Iterator::TrackSwaps(a + 1, iter_swap_invocations); + adl::Iterator base2 = adl::Iterator::TrackSwaps(a + 2, iter_swap_invocations); + std::reverse_iterator ri1(base1), ri2(base2); + iter_swap(ri1, ri2); assert(iter_swap_invocations == 1); - iter_swap(i2, i1); + iter_swap(ri2, ri1); assert(iter_swap_invocations == 2); } diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/types.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/types.compile.pass.cpp index 419df883f1026a..f8c9ce72daa9d1 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/types.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/types.compile.pass.cpp @@ -82,7 +82,9 @@ struct std::incrementable_traits { using difference_type = char; }; +// Not using `FooIter::value_type`. static_assert(std::is_same_v::value_type, int>); +// Not using `FooIter::difference_type`. static_assert(std::is_same_v::difference_type, char>); #endif diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp index 7b648d110a8356..39f81a4a5e39b6 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp @@ -22,40 +22,40 @@ namespace adl { template -struct Iterator { +struct MaybeNoexceptIterator { using value_type = int; using difference_type = ptrdiff_t; value_type* ptr_ = nullptr; int* iter_move_invocations_ = nullptr; - constexpr Iterator() = default; - constexpr explicit Iterator(int* p, int& iter_moves) : ptr_(p), iter_move_invocations_(&iter_moves) {} + constexpr MaybeNoexceptIterator() = default; + constexpr explicit MaybeNoexceptIterator(int* p, int& iter_moves) : ptr_(p), iter_move_invocations_(&iter_moves) {} constexpr value_type& operator*() const { return *ptr_; } - Iterator& operator++() { ++ptr_; return *this; } - Iterator operator++(int) { - Iterator prev = *this; + MaybeNoexceptIterator& operator++() { ++ptr_; return *this; } + MaybeNoexceptIterator operator++(int) { + MaybeNoexceptIterator prev = *this; ++ptr_; return prev; } - constexpr Iterator& operator--() { --ptr_; return *this; } - constexpr Iterator operator--(int) { - Iterator prev = *this; + constexpr MaybeNoexceptIterator& operator--() { --ptr_; return *this; } + constexpr MaybeNoexceptIterator operator--(int) { + MaybeNoexceptIterator prev = *this; --ptr_; return prev; } - constexpr friend value_type&& iter_move(Iterator iter) noexcept(IsNoexcept) { + constexpr friend value_type&& iter_move(MaybeNoexceptIterator iter) noexcept(IsNoexcept) { if (iter.iter_move_invocations_) { ++(*iter.iter_move_invocations_); } return std::move(*iter); } - friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.ptr_ == rhs.ptr_; } + friend bool operator==(const MaybeNoexceptIterator& lhs, const MaybeNoexceptIterator& rhs) { return lhs.ptr_ == rhs.ptr_; } }; template @@ -68,8 +68,12 @@ struct View : std::ranges::view_base { constexpr View(int& iter_move_invocations) : iter_moves(&iter_move_invocations) { } - constexpr adl::Iterator begin() { return adl::Iterator(a, *iter_moves); } - constexpr adl::Iterator end() { return adl::Iterator(a + N, *iter_moves); } + constexpr adl::MaybeNoexceptIterator begin() { + return adl::MaybeNoexceptIterator(a, *iter_moves); + } + constexpr adl::MaybeNoexceptIterator end() { + return adl::MaybeNoexceptIterator(a + N, *iter_moves); + } }; } // namespace adl @@ -134,7 +138,7 @@ constexpr bool test() { using ThrowingSplitView = std::ranges::lazy_split_view, adl::View>; using ThrowingValueType = std::ranges::iterator_t::value_type; using ThrowingIter = std::ranges::iterator_t; - ASSERT_NOT_NOEXCEPT(std::ranges::iter_move(std::declval>())); + ASSERT_NOT_NOEXCEPT(std::ranges::iter_move(std::declval>())); ASSERT_NOT_NOEXCEPT(iter_move(std::declval())); } @@ -142,7 +146,7 @@ constexpr bool test() { using NoexceptSplitView = std::ranges::lazy_split_view, adl::View>; using NoexceptValueType = std::ranges::iterator_t::value_type; using NoexceptIter = std::ranges::iterator_t; - ASSERT_NOEXCEPT(std::ranges::iter_move(std::declval>())); + ASSERT_NOEXCEPT(std::ranges::iter_move(std::declval>())); ASSERT_NOEXCEPT(iter_move(std::declval())); } } diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp index 52bf8d20fe82ce..4bcbcafaeab927 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp @@ -23,39 +23,41 @@ namespace adl { template -struct Iterator { +struct MaybeNoexceptIterator { using value_type = int; using difference_type = ptrdiff_t; value_type* ptr_ = nullptr; int* iter_swap_invocations_ = nullptr; - constexpr Iterator() = default; - constexpr explicit Iterator(int& iter_swaps) : iter_swap_invocations_(&iter_swaps) {} + constexpr MaybeNoexceptIterator() = default; + constexpr explicit MaybeNoexceptIterator(int& iter_swaps) : iter_swap_invocations_(&iter_swaps) {} value_type& operator*() const { return *ptr_; } - Iterator& operator++() { ++ptr_; return *this; } - Iterator operator++(int) { - Iterator prev = *this; + MaybeNoexceptIterator& operator++() { ++ptr_; return *this; } + MaybeNoexceptIterator operator++(int) { + MaybeNoexceptIterator prev = *this; ++ptr_; return prev; } - Iterator& operator--() { --ptr_; return *this; } - Iterator operator--(int) { - Iterator prev = *this; + MaybeNoexceptIterator& operator--() { --ptr_; return *this; } + MaybeNoexceptIterator operator--(int) { + MaybeNoexceptIterator prev = *this; --ptr_; return prev; } - constexpr friend void iter_swap(Iterator a, Iterator) noexcept(IsNoexcept) { + constexpr friend void iter_swap(MaybeNoexceptIterator a, MaybeNoexceptIterator) noexcept(IsNoexcept) { if (a.iter_swap_invocations_) { ++(*a.iter_swap_invocations_); } } - friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.ptr_ == rhs.ptr_; } + friend bool operator==(const MaybeNoexceptIterator& lhs, const MaybeNoexceptIterator& rhs) { + return lhs.ptr_ == rhs.ptr_; + } }; template @@ -66,8 +68,12 @@ struct View : std::ranges::view_base { constexpr View(int& iter_swap_invocations) : iter_swaps(&iter_swap_invocations) { } - constexpr adl::Iterator begin() { return adl::Iterator(*iter_swaps); } - constexpr adl::Iterator end() { return adl::Iterator(*iter_swaps); } + constexpr adl::MaybeNoexceptIterator begin() { + return adl::MaybeNoexceptIterator(*iter_swaps); + } + constexpr adl::MaybeNoexceptIterator end() { + return adl::MaybeNoexceptIterator(*iter_swaps); + } }; } // namespace adl @@ -189,8 +195,9 @@ constexpr bool test() { using ThrowingSplitView = std::ranges::lazy_split_view, adl::View>; using ThrowingValueType = std::ranges::iterator_t::value_type; using ThrowingIter = std::ranges::iterator_t; - ASSERT_NOT_NOEXCEPT( - std::ranges::iter_swap(std::declval>(), std::declval>())); + ASSERT_NOT_NOEXCEPT(std::ranges::iter_swap( + std::declval>(), + std::declval>())); ASSERT_NOT_NOEXCEPT(iter_swap(std::declval(), std::declval())); } @@ -198,8 +205,9 @@ constexpr bool test() { using NoexceptSplitView = std::ranges::lazy_split_view, adl::View>; using NoexceptValueType = std::ranges::iterator_t::value_type; using NoexceptIter = std::ranges::iterator_t; - ASSERT_NOEXCEPT( - std::ranges::iter_swap(std::declval>(), std::declval>())); + ASSERT_NOEXCEPT(std::ranges::iter_swap( + std::declval>(), + std::declval>())); ASSERT_NOEXCEPT(iter_swap(std::declval(), std::declval())); } } diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp index 44317ad9e1bc7a..c317efca64b291 100644 --- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp +++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp @@ -41,40 +41,6 @@ struct NotConvertibleFromInt {}; static_assert(!std::is_invocable_v); -namespace adl { - -static int iter_move_invocations = 0; - -template -struct Iterator { - using value_type = T; - using difference_type = int; - using iterator_concept = std::input_iterator_tag; - - T* ptr = nullptr; - - Iterator() = default; - explicit Iterator(int* p) : ptr(p) {} - - T& operator*() const { return *ptr; } - - Iterator& operator++() { ++ptr; return *this; } - Iterator operator++(int) { - Iterator prev = *this; - ++ptr; - return prev; - } - - friend T&& iter_move(Iterator iter) { - ++iter_move_invocations; - return std::move(*iter); - } - - friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.ptr == rhs.ptr; } -}; - -} // namespace adl - int main(int, char**) { // An empty range -- no default constructors should be invoked. { @@ -411,17 +377,18 @@ int main(int, char**) { constexpr int N = 3; int in[N] = {1, 2, 3}; Buffer out; - adl::Iterator begin(in); - adl::Iterator end(in + N); + int iter_moves = 0; + adl::Iterator begin = adl::Iterator::TrackMoves(in, iter_moves); + adl::Iterator end = adl::Iterator::TrackMoves(in + N, iter_moves); std::ranges::uninitialized_move(begin, end, out.begin(), out.end()); - assert(adl::iter_move_invocations == 3); - adl::iter_move_invocations = 0; + assert(iter_moves == 3); + iter_moves = 0; std::ranges::subrange range(begin, end); std::ranges::uninitialized_move(range, out); - assert(adl::iter_move_invocations == 3); - adl::iter_move_invocations = 0; + assert(iter_moves == 3); + iter_moves = 0; } return 0; diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp index edd0e7811bc8b2..70a68880e0f44c 100644 --- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp +++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp @@ -24,8 +24,8 @@ #include "../buffer.h" #include "../counted.h" -#include "test_macros.h" #include "test_iterators.h" +#include "test_macros.h" // TODO(varconst): consolidate the ADL checks into a single file. // Because this is a variable and not a function, it's guaranteed that ADL won't be used. However, @@ -38,40 +38,6 @@ struct NotConvertibleFromInt {}; static_assert(!std::is_invocable_v); -namespace adl { - -static int iter_move_invocations = 0; - -template -struct Iterator { - using value_type = T; - using difference_type = int; - using iterator_concept = std::input_iterator_tag; - - T* ptr = nullptr; - - Iterator() = default; - explicit Iterator(int* p) : ptr(p) {} - - T& operator*() const { return *ptr; } - - Iterator& operator++() { ++ptr; return *this; } - Iterator operator++(int) { - Iterator prev = *this; - ++ptr; - return prev; - } - - friend T&& iter_move(Iterator iter) { - ++iter_move_invocations; - return std::move(*iter); - } - - friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.ptr == rhs.ptr; } -}; - -} // namespace adl - int main(int, char**) { // An empty range -- no default constructors should be invoked. { @@ -187,17 +153,18 @@ int main(int, char**) { constexpr int N = 3; int in[N] = {1, 2, 3}; Buffer out; - adl::Iterator begin(in); - adl::Iterator end(in + N); + int iter_moves = 0; + adl::Iterator begin = adl::Iterator::TrackMoves(in, iter_moves); + adl::Iterator end = adl::Iterator::TrackMoves(in + N, iter_moves); std::ranges::uninitialized_move(begin, end, out.begin(), out.end()); - assert(adl::iter_move_invocations == 3); - adl::iter_move_invocations = 0; + assert(iter_moves == 3); + iter_moves = 0; std::ranges::subrange range(begin, end); std::ranges::uninitialized_move(range, out); - assert(adl::iter_move_invocations == 3); - adl::iter_move_invocations = 0; + assert(iter_moves == 3); + iter_moves = 0; } return 0; diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h index 9377b5fc56098b..6d3a8e2699edda 100644 --- a/libcxx/test/support/test_iterators.h +++ b/libcxx/test/support/test_iterators.h @@ -721,6 +721,77 @@ class sized_sentinel { private: decltype(base(std::declval())) base_; }; + +namespace adl { + +class Iterator { + public: + using value_type = int; + using difference_type = ptrdiff_t; + + private: + value_type* ptr_ = nullptr; + int* iter_moves_ = nullptr; + int* iter_swaps_ = nullptr; + + constexpr Iterator(int* p, int* iter_moves, int* iter_swaps) + : ptr_(p) + , iter_moves_(iter_moves) + , iter_swaps_(iter_swaps) {} + + public: + constexpr Iterator() = default; + static constexpr Iterator TrackMoves(int* p, int& iter_moves) { + return Iterator(p, &iter_moves, /*iter_swaps=*/nullptr); + } + static constexpr Iterator TrackSwaps(int& iter_swaps) { + return Iterator(/*p=*/nullptr, /*iter_moves=*/nullptr, &iter_swaps); + } + static constexpr Iterator TrackSwaps(int* p, int& iter_swaps) { + return Iterator(p, /*iter_moves=*/nullptr, &iter_swaps); + } + + constexpr int iter_moves() const { assert(iter_moves_); return *iter_moves_; } + constexpr int iter_swaps() const { assert(iter_swaps_); return *iter_swaps_; } + + constexpr value_type& operator*() const { return *ptr_; } + + constexpr Iterator operator+(difference_type n) const { + return Iterator(ptr_ + n, iter_moves_, iter_swaps_); + } + + constexpr Iterator& operator++() { ++ptr_; return *this; } + constexpr Iterator operator++(int) { + Iterator prev = *this; + ++ptr_; + return prev; + } + + constexpr Iterator& operator--() { --ptr_; return *this; } + constexpr Iterator operator--(int) { + Iterator prev = *this; + --ptr_; + return prev; + } + + constexpr friend void iter_swap(Iterator a, Iterator) { + if (a.iter_swaps_) { + ++(*a.iter_swaps_); + } + } + + constexpr friend value_type&& iter_move(Iterator iter) { + if (iter.iter_moves_) { + ++(*iter.iter_moves_); + } + return std::move(*iter); + } + + constexpr friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.ptr_ == rhs.ptr_; } +}; + +} // namespace adl + #endif // TEST_STD_VER > 17 #endif // SUPPORT_TEST_ITERATORS_H From e59f648d698efe58b96e9b6224449b2b8cfa872a Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 24 May 2022 22:14:56 +0000 Subject: [PATCH 428/908] Move GCC-compatible pod-packing change to v15/old behavior available at v14 and below Since this didn't make it into the v14 release - anyone requesting the v14 ABI shouldn't get this GCC-compatible change that isn't backwards compatible with v14 Clang. Differential Revision: https://reviews.llvm.org/D126334 --- clang/docs/ReleaseNotes.rst | 2 +- clang/include/clang/Basic/LangOptions.h | 5 +---- clang/lib/AST/RecordLayoutBuilder.cpp | 2 +- clang/lib/Frontend/CompilerInvocation.cpp | 4 ---- clang/test/SemaCXX/class-layout.cpp | 6 +++--- 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7a54a33685471c..a457a8fb2efe44 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -431,7 +431,7 @@ ABI Changes in Clang attribute is also specified on the member. Clang historically did perform such packing. Clang now matches the gcc behavior (except on Darwin and PS4). You can switch back to the old ABI behavior with the flag: - ``-fclang-abi-compat=13.0``. + ``-fclang-abi-compat=14.0``. OpenMP Support in Clang ----------------------- diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index d442b4d96e7652..6e7763a4a2b471 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -214,12 +214,9 @@ class LangOptions : public LangOptionsBase { /// global-scope inline variables incorrectly. Ver12, - /// Attempt to be ABI-compatible with code generated by Clang 13.0.x. - /// This causes clang to not pack non-POD members of packed structs. - Ver13, - /// Attempt to be ABI-compatible with code generated by Clang 14.0.x. /// This causes clang to mangle dependent nested names incorrectly. + /// This causes clang to pack non-POD members of packed structs. Ver14, /// Conform to the underlying platform's C and C++ ABIs as closely diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 202823da7baaea..8827e956fc2f47 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1890,7 +1890,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, llvm::Triple Target = Context.getTargetInfo().getTriple(); bool FieldPacked = (Packed && (!FieldClass || FieldClass->isPOD() || Context.getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver13 || + LangOptions::ClangABI::Ver14 || Target.isPS4() || Target.isOSDarwin())) || D->hasAttr(); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 88125dad8a925d..32b084dfedecc5 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3516,8 +3516,6 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Args, OPT_fclang_abi_compat_EQ, "11.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver12) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "12.0", SA); - else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver13) - GenerateArg(Args, OPT_fclang_abi_compat_EQ, "13.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver14) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "14.0", SA); @@ -4010,8 +4008,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Opts.setClangABICompat(LangOptions::ClangABI::Ver11); else if (Major <= 12) Opts.setClangABICompat(LangOptions::ClangABI::Ver12); - else if (Major <= 13) - Opts.setClangABICompat(LangOptions::ClangABI::Ver13); else if (Major <= 14) Opts.setClangABICompat(LangOptions::ClangABI::Ver14); } else if (Ver != "latest") { diff --git a/clang/test/SemaCXX/class-layout.cpp b/clang/test/SemaCXX/class-layout.cpp index 79fa677071109c..940966950d8bea 100644 --- a/clang/test/SemaCXX/class-layout.cpp +++ b/clang/test/SemaCXX/class-layout.cpp @@ -1,9 +1,9 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -fsyntax-only -verify -std=c++98 -Wno-inaccessible-base // RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -fsyntax-only -verify -std=c++11 -Wno-inaccessible-base -// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -fsyntax-only -verify -std=c++11 -Wno-inaccessible-base -DCLANG_ABI_COMPAT=13 +// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -fsyntax-only -verify -std=c++11 -Wno-inaccessible-base -DCLANG_ABI_COMPAT=14 // RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -fsyntax-only -verify -std=c++11 -Wno-inaccessible-base -DCLANG_ABI_COMPAT=6 // RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -fsyntax-only -verify -std=c++11 -Wno-inaccessible-base -fclang-abi-compat=6 -DCLANG_ABI_COMPAT=6 -// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -fsyntax-only -verify -std=c++11 -Wno-inaccessible-base -fclang-abi-compat=13 -DCLANG_ABI_COMPAT=13 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -fsyntax-only -verify -std=c++11 -Wno-inaccessible-base -fclang-abi-compat=14 -DCLANG_ABI_COMPAT=14 // expected-no-diagnostics #define SA(n, p) int a##n[(p) ? 1 : -1] @@ -620,7 +620,7 @@ struct t2 { char c2; t1 v1; } __attribute__((packed)); -#if defined(CLANG_ABI_COMPAT) && CLANG_ABI_COMPAT <= 13 +#if defined(CLANG_ABI_COMPAT) && CLANG_ABI_COMPAT <= 14 _Static_assert(_Alignof(t1) == 4, ""); _Static_assert(_Alignof(t2) == 1, ""); #else From fd937366579e68e4413cda13b1b78a74b234be81 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 24 May 2022 07:20:55 +0100 Subject: [PATCH 429/908] [RISCV] Replace untested code with assert We found untested code where negative frame indices were ostensibly handled despite it being in a block guarded by !MFI.isFixedObjectIndex. While the implementation of MachineFrameInfo::isFixedObjectIndex suggests this is possible (i.e., if a frame index was more negative - less than the number of fixed objects), I couldn't find any test in tree -- for any target -- where a negative frame index wasn't also a fixed object offset. I couldn't find a way of creating such a object with the public MachineFrameInfo creation APIs. Even MachineFrameInfo::getObjectIndexBegin starts counting at the negative number of fixed objects, so such frame indices wouldn't be covered by loops using the provided begin/end methods. Given all this, an assert that any object encountered in the block is non-negative seems reasonable. Reviewed By: StephenFan, kito-cheng Differential Revision: https://reviews.llvm.org/D126278 --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index f77443386eb9d8..5be78a5ccc86a4 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -757,8 +757,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // objects to the required alignment. if (MFI.getStackID(FI) == TargetStackID::Default) { Offset += StackOffset::getFixed(MFI.getStackSize()); - if (FI < 0) - Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); + assert(FI >= 0 && "Unhandled negative frame index"); } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { // Ensure the base of the RVV stack is correctly aligned: add on the // alignment padding. From 360411957b6f40f50b35097b7abedd91250f2434 Mon Sep 17 00:00:00 2001 From: Mehdi Chinoune Date: Mon, 23 May 2022 16:25:41 -0500 Subject: [PATCH 430/908] [flang][MSVC] Fix building with `/permissive-` flag CLOCK_REALTIME is POSIX defined and never available with MSVC, even without /permissive-. The difference is that the template is never instantiated and the compiler ignores the undefined identifier. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D125262 --- flang/runtime/time-intrinsic.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/flang/runtime/time-intrinsic.cpp b/flang/runtime/time-intrinsic.cpp index 7ca3a22ee2cb66..5f7b50964fe2a9 100644 --- a/flang/runtime/time-intrinsic.cpp +++ b/flang/runtime/time-intrinsic.cpp @@ -62,10 +62,13 @@ template double GetCpuTime(fallback_implementation) { #define CLOCKID CLOCK_THREAD_CPUTIME_ID #elif defined CLOCK_MONOTONIC #define CLOCKID CLOCK_MONOTONIC -#else +#elif defined CLOCK_REALTIME #define CLOCKID CLOCK_REALTIME +#else +#undef CLOCKID #endif +#ifdef CLOCKID // POSIX implementation using clock_gettime. This is only enabled where // clock_gettime is available. template @@ -80,6 +83,7 @@ double GetCpuTime(preferred_implementation, // Return some negative value to represent failure. return -1.0; } +#endif using count_t = std::int64_t; using unsigned_count_t = std::uint64_t; @@ -136,6 +140,7 @@ constexpr unsigned_count_t DS_PER_SEC{10u}; constexpr unsigned_count_t MS_PER_SEC{1'000u}; constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u}; +#ifdef CLOCKID template count_t GetSystemClockCount(int kind, preferred_implementation, // We need some dummy parameters to pass to decltype(clock_gettime). @@ -156,6 +161,7 @@ count_t GetSystemClockCount(int kind, preferred_implementation, return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1); } } +#endif template count_t GetSystemClockCountRate(int kind, preferred_implementation, From 66db5312bd6676cae721b3ad9f76b68af7909e7c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 May 2022 19:10:40 -0700 Subject: [PATCH 431/908] [RISCV] Fix vnsrl/vnsra isel patterns that are dropping VL. We were incorrectly using VLMax instead of the passed VL. Reviewed By: khchen, reames Differential Revision: https://reviews.llvm.org/D126319 --- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 2 +- .../RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 15ad6424b19dc2..939b3e3d2f5441 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -826,7 +826,7 @@ multiclass VPatNarrowShiftSplatExt_WX(instruction_name#"_WX_"#vti.LMul.MX) - wti.RegClass:$rs2, GPR:$rs1, vti.AVL, vti.Log2SEW)>; + wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll index 917498f08cc3a9..e1fc31960582c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll @@ -18,7 +18,7 @@ define <8 x i8> @vnsra_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) { define <8 x i8> @vnsra_v8i16_v8i8_scalar_sext(<8 x i16> %x, i8 %y) { ; CHECK-LABEL: vnsra_v8i16_v8i8_scalar_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vnsra.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <8 x i8> poison, i8 %y, i8 0 @@ -32,7 +32,7 @@ define <8 x i8> @vnsra_v8i16_v8i8_scalar_sext(<8 x i16> %x, i8 %y) { define <8 x i8> @vnsra_v8i16_v8i8_scalar_zext(<8 x i16> %x, i8 %y) { ; CHECK-LABEL: vnsra_v8i16_v8i8_scalar_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vnsra.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <8 x i8> poison, i8 %y, i8 0 @@ -59,7 +59,7 @@ define <4 x i16> @vnsra_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) { define <4 x i16> @vnsra_v4i32_v4i16_scalar_sext(<4 x i32> %x, i16 %y) { ; CHECK-LABEL: vnsra_v4i32_v4i16_scalar_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vnsra.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <4 x i16> poison, i16 %y, i16 0 @@ -73,7 +73,7 @@ define <4 x i16> @vnsra_v4i32_v4i16_scalar_sext(<4 x i32> %x, i16 %y) { define <4 x i16> @vnsra_v4i32_v4i16_scalar_zext(<4 x i32> %x, i16 %y) { ; CHECK-LABEL: vnsra_v4i32_v4i16_scalar_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vnsra.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <4 x i16> poison, i16 %y, i16 0 @@ -100,7 +100,7 @@ define <2 x i32> @vnsra_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) { define <2 x i32> @vnsra_v2i64_v2i32_scalar_sext(<2 x i64> %x, i32 %y) { ; CHECK-LABEL: vnsra_v2i64_v2i32_scalar_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vnsra.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <2 x i32> poison, i32 %y, i32 0 @@ -114,7 +114,7 @@ define <2 x i32> @vnsra_v2i64_v2i32_scalar_sext(<2 x i64> %x, i32 %y) { define <2 x i32> @vnsra_v2i64_v2i32_scalar_zext(<2 x i64> %x, i32 %y) { ; CHECK-LABEL: vnsra_v2i64_v2i32_scalar_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vnsra.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <2 x i32> poison, i32 %y, i32 0 @@ -174,7 +174,7 @@ define <8 x i8> @vnsrl_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) { define <8 x i8> @vnsrl_v8i16_v8i8_scalar_sext(<8 x i16> %x, i8 %y) { ; CHECK-LABEL: vnsrl_v8i16_v8i8_scalar_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vnsrl.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <8 x i8> poison, i8 %y, i16 0 @@ -188,7 +188,7 @@ define <8 x i8> @vnsrl_v8i16_v8i8_scalar_sext(<8 x i16> %x, i8 %y) { define <8 x i8> @vnsrl_v8i16_v8i8_scalar_zext(<8 x i16> %x, i8 %y) { ; CHECK-LABEL: vnsrl_v8i16_v8i8_scalar_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vnsrl.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <8 x i8> poison, i8 %y, i16 0 @@ -215,7 +215,7 @@ define <4 x i16> @vnsrl_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) { define <4 x i16> @vnsrl_v4i32_v4i16_scalar_sext(<4 x i32> %x, i16 %y) { ; CHECK-LABEL: vnsrl_v4i32_v4i16_scalar_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vnsrl.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <4 x i16> poison, i16 %y, i16 0 @@ -229,7 +229,7 @@ define <4 x i16> @vnsrl_v4i32_v4i16_scalar_sext(<4 x i32> %x, i16 %y) { define <4 x i16> @vnsrl_v4i32_v4i16_scalar_zext(<4 x i32> %x, i16 %y) { ; CHECK-LABEL: vnsrl_v4i32_v4i16_scalar_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vnsrl.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <4 x i16> poison, i16 %y, i16 0 @@ -256,7 +256,7 @@ define <2 x i32> @vnsrl_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) { define <2 x i32> @vnsrl_v2i64_v2i32_scalar_sext(<2 x i64> %x, i32 %y) { ; CHECK-LABEL: vnsrl_v2i64_v2i32_scalar_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vnsrl.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <2 x i32> poison, i32 %y, i32 0 @@ -270,7 +270,7 @@ define <2 x i32> @vnsrl_v2i64_v2i32_scalar_sext(<2 x i64> %x, i32 %y) { define <2 x i32> @vnsrl_v2i64_v2i32_scalar_zext(<2 x i64> %x, i32 %y) { ; CHECK-LABEL: vnsrl_v2i64_v2i32_scalar_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vnsrl.wx v8, v8, a0 ; CHECK-NEXT: ret %insert = insertelement <2 x i32> poison, i32 %y, i32 0 From 232bf8189ef7d574a468bd5bfd1e84e962f7f16e Mon Sep 17 00:00:00 2001 From: Sunil Kuravinakop Date: Tue, 24 May 2022 23:55:08 -0500 Subject: [PATCH 432/908] [OpenMP] atomic compare fail : Parser & AST support This is a support for " #pragma omp atomic compare fail ". It has Parser & AST support for now. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D123235 --- clang/include/clang/AST/ASTNodeTraverser.h | 11 ++ clang/include/clang/AST/OpenMPClause.h | 127 ++++++++++++++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 5 + .../clang/Basic/DiagnosticSemaKinds.td | 4 + clang/include/clang/Parse/Parser.h | 2 + clang/include/clang/Sema/Sema.h | 3 + clang/lib/AST/OpenMPClause.cpp | 31 +++++ clang/lib/AST/StmtProfile.cpp | 2 + clang/lib/Basic/OpenMPKinds.cpp | 14 ++ clang/lib/Parse/ParseOpenMP.cpp | 46 ++++++- clang/lib/Sema/SemaOpenMP.cpp | 93 +++++++++++++ clang/lib/Sema/TreeTransform.h | 7 + clang/lib/Serialization/ASTReader.cpp | 28 ++++ clang/lib/Serialization/ASTWriter.cpp | 8 ++ clang/test/OpenMP/atomic_ast_print.cpp | 40 ++++++ clang/test/OpenMP/atomic_messages.cpp | 18 +++ clang/tools/libclang/CIndex.cpp | 2 + flang/lib/Semantics/check-omp-structure.cpp | 1 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 +- 19 files changed, 444 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index f2c5c01ac88deb..cb5bbeb6448128 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -214,6 +214,10 @@ class ASTNodeTraverser } void Visit(const OMPClause *C) { + if (OMPFailClause::classof(C)) { + Visit(static_cast(C)); + return; + } getNodeDelegate().AddChild([=] { getNodeDelegate().Visit(C); for (const auto *S : C->children()) @@ -221,6 +225,13 @@ class ASTNodeTraverser }); } + void Visit(const OMPFailClause *C) { + getNodeDelegate().AddChild([=] { + getNodeDelegate().Visit(C); + const OMPClause *MOC = C->const_getMemoryOrderClause(); + Visit(MOC); + }); + } void Visit(const GenericSelectionExpr::ConstAssociation &A) { getNodeDelegate().AddChild([=] { getNodeDelegate().Visit(A); diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index a745df11434685..aa9503c565779e 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -2266,6 +2266,133 @@ class OMPCompareClause final : public OMPClause { } }; +/// This represents 'fail' clause in the '#pragma omp atomic' +/// directive. +/// +/// \code +/// #pragma omp atomic compare fail +/// \endcode +/// In this example directive '#pragma omp atomic compare' has 'fail' clause. +class OMPFailClause final + : public OMPClause, + private llvm::TrailingObjects { + OMPClause *MemoryOrderClause; + + friend class OMPClauseReader; + friend TrailingObjects; + + /// Define the sizes of each trailing object array except the last one. This + /// is required for TrailingObjects to work properly. + size_t numTrailingObjects(OverloadToken) const { + // 2 locations: for '(' and argument location. + return 2; + } + + /// Sets the location of '(' in fail clause. + void setLParenLoc(SourceLocation Loc) { + *getTrailingObjects() = Loc; + } + + /// Sets the location of memoryOrder clause argument in fail clause. + void setArgumentLoc(SourceLocation Loc) { + *std::next(getTrailingObjects(), 1) = Loc; + } + + /// Sets the mem_order clause for 'atomic compare fail' directive. + void setMemOrderClauseKind(OpenMPClauseKind MemOrder) { + OpenMPClauseKind *MOCK = getTrailingObjects(); + *MOCK = MemOrder; + } + + /// Sets the mem_order clause for 'atomic compare fail' directive. + void setMemOrderClause(OMPClause *MemoryOrderClauseParam) { + MemoryOrderClause = MemoryOrderClauseParam; + } +public: + /// Build 'fail' clause. + /// + /// \param StartLoc Starting location of the clause. + /// \param EndLoc Ending location of the clause. + OMPFailClause(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_fail, StartLoc, EndLoc) {} + + /// Build an empty clause. + OMPFailClause() + : OMPClause(llvm::omp::OMPC_fail, SourceLocation(), SourceLocation()) {} + + static OMPFailClause *CreateEmpty(const ASTContext &C); + static OMPFailClause *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc); + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_fail; + } + + void initFailClause(SourceLocation LParenLoc, OMPClause *MemOClause, + SourceLocation MemOrderLoc) { + + setLParenLoc(LParenLoc); + MemoryOrderClause = MemOClause; + setArgumentLoc(MemOrderLoc); + + OpenMPClauseKind ClauseKind = MemoryOrderClause->getClauseKind(); + OpenMPClauseKind MemClauseKind = llvm::omp::OMPC_unknown; + switch(ClauseKind) { + case llvm::omp::OMPC_acq_rel: + case llvm::omp::OMPC_acquire: + MemClauseKind = llvm::omp::OMPC_acquire; + break; + case llvm::omp::OMPC_relaxed: + case llvm::omp::OMPC_release: + MemClauseKind = llvm::omp::OMPC_relaxed; + break; + case llvm::omp::OMPC_seq_cst: + MemClauseKind = llvm::omp::OMPC_seq_cst; + break; + default : break; + } + setMemOrderClauseKind(MemClauseKind); + } + + /// Gets the location of '(' in fail clause. + SourceLocation getLParenLoc() const { + return *getTrailingObjects(); + } + + OMPClause *getMemoryOrderClause() { return MemoryOrderClause; } + + const OMPClause *const_getMemoryOrderClause() const { + return static_cast(MemoryOrderClause); + } + + /// Gets the location of memoryOrder clause argument in fail clause. + SourceLocation getArgumentLoc() const { + return *std::next(getTrailingObjects(), 1); + } + + /// Gets the dependence kind in clause for 'depobj' directive. + OpenMPClauseKind getMemOrderClauseKind() const { + return *getTrailingObjects(); + } +}; + /// This represents 'seq_cst' clause in the '#pragma omp atomic' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index ae6442d75dd4b4..e29619a5cecc6f 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3346,6 +3346,11 @@ bool RecursiveASTVisitor::VisitOMPCompareClause(OMPCompareClause *) { return true; } +template +bool RecursiveASTVisitor::VisitOMPFailClause(OMPFailClause *) { + return true; +} + template bool RecursiveASTVisitor::VisitOMPSeqCstClause(OMPSeqCstClause *) { return true; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 384a8ce3314d42..2969e24e3baf86 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10633,6 +10633,10 @@ def note_omp_atomic_compare: Note< "expect binary operator in conditional expression|expect '<', '>' or '==' as order operator|expect comparison in a form of 'x == e', 'e == x', 'x ordop expr', or 'expr ordop x'|" "expect lvalue for result value|expect scalar value|expect integer value|unexpected 'else' statement|expect '==' operator|expect an assignment statement 'v = x'|" "expect a 'if' statement|expect no more than two statements|expect a compound statement|expect 'else' statement|expect a form 'r = x == e; if (r) ...'}0">; +def err_omp_atomic_fail_wrong_or_no_clauses : Error<"expected a memory order clause">; +def err_omp_atomic_fail_extra_mem_order_clauses : Error<"directive '#pragma omp atomic compare fail' cannot contain more than one memory order clause">; +def err_omp_atomic_fail_extra_clauses : Error<"directive '#pragma omp atomic compare' cannot contain more than one fail clause">; +def err_omp_atomic_fail_no_compare : Error<"expected 'compare' clause with the 'fail' modifier">; def err_omp_atomic_several_clauses : Error< "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause">; def err_omp_several_mem_order_clauses : Error< diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index a20866e410aa49..a4d90ce467e48e 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -433,6 +433,8 @@ class Parser : public CodeCompletionHandler { /// a statement expression and builds a suitable expression statement. StmtResult handleExprStmt(ExprResult E, ParsedStmtContext StmtCtx); + OMPClause *ParseOpenMPFailClause(OMPClause *Clause); + public: Parser(Preprocessor &PP, Sema &Actions, bool SkipFunctionBodies); ~Parser() override; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 94440ff9ce55e3..9cbbeadc3d74bf 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11357,6 +11357,9 @@ class Sema final { /// Called on well-formed 'compare' clause. OMPClause *ActOnOpenMPCompareClause(SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed 'fail' clause. + OMPClause *ActOnOpenMPFailClause(SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed 'seq_cst' clause. OMPClause *ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc); diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index dc2d90e366bc76..acf80ed52c3d5d 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -127,6 +127,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_update: case OMPC_capture: case OMPC_compare: + case OMPC_fail: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -220,6 +221,7 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_update: case OMPC_capture: case OMPC_compare: + case OMPC_fail: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -412,6 +414,25 @@ OMPUpdateClause *OMPUpdateClause::CreateEmpty(const ASTContext &C, return Clause; } +OMPFailClause *OMPFailClause::Create(const ASTContext &C, + SourceLocation StartLoc, + SourceLocation EndLoc) { + void *Mem = + C.Allocate(totalSizeToAlloc(2, 1), + alignof(OMPFailClause)); + auto *Clause = + new (Mem) OMPFailClause(StartLoc, EndLoc); + return Clause; +} + +OMPFailClause *OMPFailClause::CreateEmpty(const ASTContext &C) { + void *Mem = + C.Allocate(totalSizeToAlloc(2, 1), + alignof(OMPFailClause)); + auto *Clause = new (Mem) OMPFailClause(); + return Clause; +} + void OMPPrivateClause::setPrivateCopies(ArrayRef VL) { assert(VL.size() == varlist_size() && "Number of private copies is not the same as the preallocated buffer"); @@ -1847,6 +1868,16 @@ void OMPClausePrinter::VisitOMPCompareClause(OMPCompareClause *) { OS << "compare"; } +void OMPClausePrinter::VisitOMPFailClause(OMPFailClause *Node) { + OS << "fail"; + if(Node) { + OS << "("; + OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(), + static_cast(Node->getMemOrderClauseKind())); + OS << ")"; + } +} + void OMPClausePrinter::VisitOMPSeqCstClause(OMPSeqCstClause *) { OS << "seq_cst"; } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 77d5d95a1ad108..3779e1f349b0e3 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -557,6 +557,8 @@ void OMPClauseProfiler::VisitOMPCaptureClause(const OMPCaptureClause *) {} void OMPClauseProfiler::VisitOMPCompareClause(const OMPCompareClause *) {} +void OMPClauseProfiler::VisitOMPFailClause(const OMPFailClause *) {} + void OMPClauseProfiler::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseProfiler::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 2de81dcef3ec0d..5ff4e023fb3c18 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -366,6 +366,20 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, #include "clang/Basic/OpenMPKinds.def" } llvm_unreachable("Invalid OpenMP 'depend' clause type"); + case OMPC_fail: { + OpenMPClauseKind CK = static_cast(Type); + switch (CK) { + case OMPC_acquire: + return "acquire"; + case OMPC_relaxed: + return "relaxed"; + case OMPC_seq_cst: + return "seq_cst"; + default: + return "unknown"; + } + llvm_unreachable("Invalid OpenMP 'fail' clause modifier"); + } case OMPC_device: switch (Type) { case OMPC_DEVICE_unknown: diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 48ee1b02921e2c..3d5c9c949760b7 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3234,6 +3234,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_write: case OMPC_capture: case OMPC_compare: + case OMPC_fail: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -3620,6 +3621,45 @@ OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind, Val.getValue().Loc, Val.getValue().RLoc); } +OMPClause *Parser::ParseOpenMPFailClause(OMPClause *Clause) { + + OMPFailClause *FailClause = static_cast(Clause); + SourceLocation LParenLoc; + if (Tok.is(tok::l_paren)) { + LParenLoc = Tok.getLocation(); + ConsumeAnyToken(); + } else { + Diag(diag::err_expected_lparen_after) << getOpenMPClauseName(OMPC_fail); + return Clause; + } + + + OpenMPClauseKind CKind = Tok.isAnnotation() + ? OMPC_unknown + : getOpenMPClauseKind(PP.getSpelling(Tok)); + if (CKind == OMPC_unknown) { + Diag(diag::err_omp_expected_clause) << ("atomic compare fail"); + return Clause; + } + OMPClause *MemoryOrderClause = ParseOpenMPClause(CKind, false); + SourceLocation MemOrderLoc; + // Store Memory Order SubClause for Sema. + if (MemoryOrderClause) { + MemOrderLoc = Tok.getLocation(); + } + + if (Tok.is(tok::r_paren)) { + FailClause->initFailClause(LParenLoc,MemoryOrderClause,MemOrderLoc); + ConsumeAnyToken(); + } else { + const IdentifierInfo *Arg = Tok.getIdentifierInfo(); + Diag(Tok, diag::err_expected_rparen_after) + << (Arg ? Arg->getName() : "atomic compare fail"); + } + + return Clause; +} + /// Parsing of OpenMP clauses like 'ordered'. /// /// ordered-clause: @@ -3652,7 +3692,11 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPClauseKind Kind, bool ParseOnly) { if (ParseOnly) return nullptr; - return Actions.ActOnOpenMPClause(Kind, Loc, Tok.getLocation()); + OMPClause *Clause = Actions.ActOnOpenMPClause(Kind, Loc, Tok.getLocation()); + if (Kind == llvm::omp::Clause::OMPC_fail) { + Clause = ParseOpenMPFailClause(Clause); + } + return Clause; } /// Parsing of OpenMP clauses with single expressions and some additional diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index ce65fe467c3fa0..1fdb7df1feea4c 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -12026,6 +12026,68 @@ bool OpenMPAtomicCompareCaptureChecker::checkStmt(Stmt *S, return checkType(ErrorInfo); } + +class OpenMPAtomicFailChecker { + +protected: + Sema &SemaRef; + ASTContext &Context; + +public: + // Error descriptor type which will be returned to Sema + unsigned int ErrorNo; + + OpenMPAtomicFailChecker(Sema &S) : SemaRef(S), Context(S.getASTContext()) {} + + /// Check if all results conform with spec in terms of lvalue/rvalue + /// and scalar type. + bool checkSubClause(ArrayRef Clauses, SourceLocation *ErrorLoc); + /// Return the error descriptor that will guide the error message emission. + unsigned getErrorDesc() const { return ErrorNo; } +}; + +bool OpenMPAtomicFailChecker::checkSubClause(ArrayRef Clauses, + SourceLocation *ErrorLoc) { + int NoOfFails = 0; + ErrorNo = 0; + SourceLocation ClauseLoc; + for (const OMPClause *C : Clauses) { + if (C->getClauseKind() == OMPC_fail) { + NoOfFails++; + const OMPFailClause *FC = static_cast(C); + const OMPClause *MemOrderC = FC->const_getMemoryOrderClause(); + /* Clauses contains OMPC_fail and the subclause */ + if (MemOrderC) { + OpenMPClauseKind ClauseKind = MemOrderC->getClauseKind(); + if ((ClauseKind == OMPC_acq_rel) || (ClauseKind == OMPC_acquire) || + (ClauseKind == OMPC_relaxed) || (ClauseKind == OMPC_release) || + (ClauseKind == OMPC_seq_cst)) { + switch(ClauseKind) { + case OMPC_acq_rel : + ClauseKind = OMPC_acquire; + break; + case OMPC_release : + ClauseKind = OMPC_relaxed; + break; + default : break; + } + continue; + } else { + ErrorNo = diag::err_omp_atomic_fail_wrong_or_no_clauses; + *ErrorLoc = MemOrderC->getBeginLoc(); + continue; + } + } + } + } + if (NoOfFails > 1) { + ErrorNo = diag::err_omp_atomic_fail_extra_clauses; + *ErrorLoc = ClauseLoc; + } + + return !ErrorNo; +} + } // namespace StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, @@ -12046,6 +12108,8 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, SourceLocation AtomicKindLoc; OpenMPClauseKind MemOrderKind = OMPC_unknown; SourceLocation MemOrderLoc; + llvm::omp::Clause SubClause = OMPC_unknown; + SourceLocation SubClauseLoc; bool MutexClauseEncountered = false; llvm::SmallSet EncounteredAtomicKinds; for (const OMPClause *C : Clauses) { @@ -12074,6 +12138,16 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, } break; } + case OMPC_fail: { + if (AtomicKind != OMPC_compare) { + Diag(C->getBeginLoc(), diag::err_omp_atomic_fail_no_compare) + << SourceRange(C->getBeginLoc(), C->getEndLoc()); + return StmtError(); + } + SubClause = OMPC_fail; + SubClauseLoc = C->getBeginLoc(); + break; + } case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -12552,6 +12626,17 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, CE = Checker.getCond(); // We reuse IsXLHSInRHSPart to tell if it is in the form 'x ordop expr'. IsXLHSInRHSPart = Checker.isXBinopExpr(); + if (SubClause == OMPC_fail) { + OpenMPAtomicFailChecker Checker(*this); + SourceLocation ErrorLoc, NoteLoc; + NoteLoc = ErrorLoc = Body->getBeginLoc(); + ErrorLoc = SubClauseLoc; + if (!Checker.checkSubClause(Clauses,&ErrorLoc)) { + unsigned ErrorNo = Checker.getErrorDesc(); + Diag(ErrorLoc, ErrorNo); + return StmtError(); + } + } } } @@ -16491,6 +16576,9 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_compare: Res = ActOnOpenMPCompareClause(StartLoc, EndLoc); break; + case OMPC_fail: + Res = ActOnOpenMPFailClause(StartLoc, EndLoc); + break; case OMPC_seq_cst: Res = ActOnOpenMPSeqCstClause(StartLoc, EndLoc); break; @@ -16644,6 +16732,11 @@ OMPClause *Sema::ActOnOpenMPCompareClause(SourceLocation StartLoc, return new (Context) OMPCompareClause(StartLoc, EndLoc); } +OMPClause *Sema::ActOnOpenMPFailClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return OMPFailClause::Create(Context, StartLoc, EndLoc); +} + OMPClause *Sema::ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc) { return new (Context) OMPSeqCstClause(StartLoc, EndLoc); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index fae712e04db865..c951b6ca5d6a03 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9559,6 +9559,13 @@ TreeTransform::TransformOMPCompareClause(OMPCompareClause *C) { return C; } +template +OMPClause * +TreeTransform::TransformOMPFailClause(OMPFailClause *C) { + // No need to rebuild this clause, no template-dependent parameters. + return C; +} + template OMPClause * TreeTransform::TransformOMPSeqCstClause(OMPSeqCstClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 76629a516cddd8..a669f30d95c701 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11744,6 +11744,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_compare: C = new (Context) OMPCompareClause(); break; + case llvm::omp::OMPC_fail: + C = OMPFailClause::CreateEmpty(Context); + break; case llvm::omp::OMPC_seq_cst: C = new (Context) OMPSeqCstClause(); break; @@ -12113,6 +12116,31 @@ void OMPClauseReader::VisitOMPCaptureClause(OMPCaptureClause *) {} void OMPClauseReader::VisitOMPCompareClause(OMPCompareClause *) {} +void OMPClauseReader::VisitOMPFailClause(OMPFailClause *C) { + C->setLParenLoc(Record.readSourceLocation()); + SourceLocation SourceLoc = Record.readSourceLocation(); + C->setArgumentLoc(SourceLoc); + OpenMPClauseKind CKind = Record.readEnum(); + C->setMemOrderClauseKind(CKind); + + SourceLocation EndLoc; + OMPClause *MemoryOrderClause = NULL; + switch(CKind) { + case llvm::omp::OMPC_acquire: + MemoryOrderClause = new (Context) OMPAcquireClause(SourceLoc, EndLoc); + break; + case llvm::omp::OMPC_relaxed: + MemoryOrderClause = new (Context) OMPRelaxedClause(SourceLoc, EndLoc); + break; + case llvm::omp::OMPC_seq_cst: + MemoryOrderClause = new (Context) OMPSeqCstClause(SourceLoc, EndLoc); + break; + default: + break; + } + C->setMemOrderClause(MemoryOrderClause); +} + void OMPClauseReader::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseReader::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 61be9265db0cce..8a14ce23866055 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6327,6 +6327,14 @@ void OMPClauseWriter::VisitOMPCaptureClause(OMPCaptureClause *) {} void OMPClauseWriter::VisitOMPCompareClause(OMPCompareClause *) {} +void OMPClauseWriter::VisitOMPFailClause(OMPFailClause *C) { + // Record.AddSourceLocation(C->getLParenLoc()); + // Copied from VisitOMPUpdateClause + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getArgumentLoc()); + Record.writeEnum(C->getMemOrderClauseKind()); +} + void OMPClauseWriter::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseWriter::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/test/OpenMP/atomic_ast_print.cpp b/clang/test/OpenMP/atomic_ast_print.cpp index 201f62ab2117e3..c877cea915503b 100644 --- a/clang/test/OpenMP/atomic_ast_print.cpp +++ b/clang/test/OpenMP/atomic_ast_print.cpp @@ -226,6 +226,16 @@ T foo(T argc) { { v = a; if (a < b) { a = b; } } #pragma omp atomic compare capture hint(6) { v = a == b; if (v) a = c; } +#pragma omp atomic compare fail(acq_rel) + { if (a < c) { a = c; } } +#pragma omp atomic compare fail(acquire) + { if (a < c) { a = c; } } +#pragma omp atomic compare fail(release) + { if (a < c) { a = c; } } +#pragma omp atomic compare fail(relaxed) + { if (a < c) { a = c; } } +#pragma omp atomic compare fail(seq_cst) + { if (a < c) { a = c; } } #endif return T(); } @@ -1099,6 +1109,16 @@ int main(int argc, char **argv) { { v = a; if (a < b) { a = b; } } #pragma omp atomic compare capture hint(6) { v = a == b; if (v) a = c; } +#pragma omp atomic compare fail(acq_rel) + if(a < b) { a = b; } +#pragma omp atomic compare fail(acquire) + if(a < b) { a = b; } +#pragma omp atomic compare fail(release) + if(a < b) { a = b; } +#pragma omp atomic compare fail(relaxed) + if(a < b) { a = b; } +#pragma omp atomic compare fail(seq_cst) + if(a < b) { a = b; } #endif // CHECK-NEXT: #pragma omp atomic // CHECK-NEXT: a++; @@ -1429,6 +1449,26 @@ int main(int argc, char **argv) { // CHECK-51-NEXT: if (v) // CHECK-51-NEXT: a = c; // CHECK-51-NEXT: } + // CHECK-51-NEXT: #pragma omp atomic compare fail(acquire) + // CHECK-51-NEXT: if (a < b) { + // CHECK-51-NEXT: a = b; + // CHECK-51-NEXT: } + // CHECK-51-NEXT: #pragma omp atomic compare fail(acquire) + // CHECK-51-NEXT: if (a < b) { + // CHECK-51-NEXT: a = b; + // CHECK-51-NEXT: } + // CHECK-51-NEXT: #pragma omp atomic compare fail(relaxed) + // CHECK-51-NEXT: if (a < b) { + // CHECK-51-NEXT: a = b; + // CHECK-51-NEXT: } + // CHECK-51-NEXT: #pragma omp atomic compare fail(relaxed) + // CHECK-51-NEXT: if (a < b) { + // CHECK-51-NEXT: a = b; + // CHECK-51-NEXT: } + // CHECK-51-NEXT: #pragma omp atomic compare fail(seq_cst) + // CHECK-51-NEXT: if (a < b) { + // CHECK-51-NEXT: a = b; + // CHECK-51-NEXT: } // expect-note@+1 {{in instantiation of function template specialization 'foo' requested here}} return foo(a); } diff --git a/clang/test/OpenMP/atomic_messages.cpp b/clang/test/OpenMP/atomic_messages.cpp index 23fd24bfcf1185..656fd498c74aae 100644 --- a/clang/test/OpenMP/atomic_messages.cpp +++ b/clang/test/OpenMP/atomic_messages.cpp @@ -958,6 +958,24 @@ int mixed() { // expected-error@+1 {{directive '#pragma omp atomic' cannot contain more than one 'capture' clause}} #pragma omp atomic compare compare capture capture { v = a; if (a > b) a = b; } +// expected-error@+1 {{expected 'compare' clause with the 'fail' modifier}} +#pragma omp atomic fail(seq_cst) + if(v == a) { v = a; } +// expected-error@+1 {{expected '(' after 'fail'}} +#pragma omp atomic compare fail + if(v < a) { v = a; } +// expected-error@+1 {{expected a memory order clause}} +#pragma omp atomic compare fail(capture) + if(v < a) { v = a; } + // expected-error@+2 {{expected ')' after 'atomic compare fail'}} + // expected-warning@+1 {{extra tokens at the end of '#pragma omp atomic' are ignored}} +#pragma omp atomic compare fail(seq_cst | acquire) + if(v < a) { v = a; } +// expected-error@+1 {{directive '#pragma omp atomic' cannot contain more than one 'fail' clause}} +#pragma omp atomic compare fail(relaxed) fail(seq_cst) + if(v < a) { v = a; } + + #endif // expected-note@+1 {{in instantiation of function template specialization 'mixed' requested here}} return mixed(); diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 97ce1c7533f23a..cf31524bbc4352 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2368,6 +2368,8 @@ void OMPClauseEnqueue::VisitOMPCaptureClause(const OMPCaptureClause *) {} void OMPClauseEnqueue::VisitOMPCompareClause(const OMPCompareClause *) {} +void OMPClauseEnqueue::VisitOMPFailClause(const OMPFailClause *) {} + void OMPClauseEnqueue::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseEnqueue::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 83acaba367ffd2..73d660f33d995e 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1807,6 +1807,7 @@ CHECK_SIMPLE_CLAUSE(Bind, OMPC_bind) CHECK_SIMPLE_CLAUSE(Align, OMPC_align) CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare) CHECK_SIMPLE_CLAUSE(CancellationConstructType, OMPC_cancellation_construct_type) +CHECK_SIMPLE_CLAUSE(Fail, OMPC_fail) CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize) CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 9575f6df6caeab..8218be1f966bfa 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -199,6 +199,7 @@ def OMPC_Write : Clause<"write"> { let clangClass = "OMPWriteClause"; } def OMPC_Update : Clause<"update"> { let clangClass = "OMPUpdateClause"; } def OMPC_Capture : Clause<"capture"> { let clangClass = "OMPCaptureClause"; } def OMPC_Compare : Clause<"compare"> { let clangClass = "OMPCompareClause"; } +def OMPC_Fail : Clause<"fail"> { let clangClass = "OMPFailClause"; } def OMPC_SeqCst : Clause<"seq_cst"> { let clangClass = "OMPSeqCstClause"; } def OMPC_AcqRel : Clause<"acq_rel"> { let clangClass = "OMPAcqRelClause"; } def OMPC_Acquire : Clause<"acquire"> { let clangClass = "OMPAcquireClause"; } @@ -569,7 +570,8 @@ def OMP_Atomic : Directive<"atomic"> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause ]; } def OMP_Target : Directive<"target"> { From ca27f3e3b26ed5389d4a361f274e3be516eb282d Mon Sep 17 00:00:00 2001 From: Sunil Kuravinakop Date: Tue, 24 May 2022 23:59:19 -0500 Subject: [PATCH 433/908] [Clang][OpenMP] Support for omp nothing Patch to support "#pragma omp nothing" Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D123286 --- clang/lib/Basic/OpenMPKinds.cpp | 3 +++ clang/lib/Parse/ParseOpenMP.cpp | 10 +++++++++ clang/test/OpenMP/nothing_messages.cpp | 27 ++++++++++++++++++++++++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + 4 files changed, 41 insertions(+) create mode 100644 clang/test/OpenMP/nothing_messages.cpp diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 5ff4e023fb3c18..dbfe2117b00a61 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -730,6 +730,9 @@ void clang::getOpenMPCaptureRegions( case OMPD_teams_loop: CaptureRegions.push_back(OMPD_teams); break; + case OMPD_nothing: + CaptureRegions.push_back(OMPD_nothing); + break; case OMPD_loop: // TODO: 'loop' may require different capture regions depending on the bind // clause or the parent directive when there is no bind clause. Use diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 3d5c9c949760b7..1e484f56441704 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2488,6 +2488,16 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( bool HasAssociatedStatement = true; switch (DKind) { + case OMPD_nothing: + if ((StmtCtx & ParsedStmtContext::AllowStandaloneOpenMPDirectives) == + ParsedStmtContext()) + Diag(Tok, diag::err_omp_immediate_directive) + << getOpenMPDirectiveName(DKind) << 0; + ConsumeToken(); + skipUntilPragmaOpenMPEnd(DKind); + if (Tok.is(tok::annot_pragma_openmp_end)) + ConsumeAnnotationToken(); + break; case OMPD_metadirective: { ConsumeToken(); SmallVector VMIs; diff --git a/clang/test/OpenMP/nothing_messages.cpp b/clang/test/OpenMP/nothing_messages.cpp new file mode 100644 index 00000000000000..cd6d0defe492fb --- /dev/null +++ b/clang/test/OpenMP/nothing_messages.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -verify=expected -fopenmp -ferror-limit 100 %s -Wuninitialized + +int mixed() { + int x = 0; + int d = 4; + +#pragma omp nothing + x=d; + + if(!x) { +#pragma omp nothing + x=d; + } + +// expected-error@+2 {{#pragma omp nothing' cannot be an immediate substatement}} + if(!x) +#pragma omp nothing + x=d; + +// expected-warning@+2 {{extra tokens at the end of '#pragma omp nothing' are ignored}} + if(!x) { +#pragma omp nothing seq_cst + x=d; + } + + return 0; +} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 8218be1f966bfa..b4abc64bc21185 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -629,6 +629,7 @@ def OMP_Requires : Directive<"requires"> { VersionedClause ]; } +def OMP_Nothing : Directive<"nothing"> {} def OMP_TargetData : Directive<"target data"> { let allowedClauses = [ VersionedClause, From 9698a445c664e6f0da5727364338ee99de537d6a Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 25 May 2022 09:33:41 +0200 Subject: [PATCH 434/908] Fix warning by handling OMPC_fail in switch statement. --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 61e3661e59be0d..ebe65e5f72b3e7 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6317,6 +6317,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_bind: case OMPC_align: case OMPC_cancellation_construct_type: + case OMPC_fail: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } From 643df8fa8ef58d883cbb554c7e71910dc8a8673c Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 10 May 2022 15:27:52 -0400 Subject: [PATCH 435/908] [libc++] Make sure that all headers can be included with modules enabled This commit ensures that we can include all libc++ headers with modules enabled. It adds a test to ensure that this doesn't regress, which is necessary because our modules CI job does not build in all Standard modes. Differential Revision: https://reviews.llvm.org/D125331 --- libcxx/include/__filesystem/u8path.h | 8 + libcxx/include/barrier | 1 + libcxx/include/latch | 1 + libcxx/include/module.modulemap | 4 - libcxx/include/semaphore | 1 + libcxx/include/span | 1 + .../headers_declare_assertion_handler.sh.cpp | 16 +- libcxx/test/libcxx/clang_tidy.sh.cpp | 30 +- libcxx/test/libcxx/double_include.sh.cpp | 30 +- .../libcxx/min_max_macros.compile.pass.cpp | 30 +- libcxx/test/libcxx/modules_include.sh.cpp | 610 ++++++++++++++++++ .../test/libcxx/nasty_macros.compile.pass.cpp | 30 +- .../libcxx/no_assert_include.compile.pass.cpp | 30 +- libcxx/utils/generate_header_tests.py | 18 +- 14 files changed, 753 insertions(+), 57 deletions(-) create mode 100644 libcxx/test/libcxx/modules_include.sh.cpp diff --git a/libcxx/include/__filesystem/u8path.h b/libcxx/include/__filesystem/u8path.h index ee25521f2898f2..d35faa14bb8a0f 100644 --- a/libcxx/include/__filesystem/u8path.h +++ b/libcxx/include/__filesystem/u8path.h @@ -10,11 +10,19 @@ #ifndef _LIBCPP___FILESYSTEM_U8PATH_H #define _LIBCPP___FILESYSTEM_U8PATH_H +#include <__algorithm/unwrap_iter.h> #include <__availability> #include <__config> #include <__filesystem/path.h> +#include #include +// Only required on Windows for __widen_from_utf8, and included conservatively +// because it requires support for localization. +#if defined(_LIBCPP_WIN32API) +# include +#endif + #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif diff --git a/libcxx/include/barrier b/libcxx/include/barrier index 36ea6f589d7e6e..f03c089a14b96c 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -50,6 +50,7 @@ namespace std #include <__config> #include <__thread/timed_backoff_policy.h> #include +#include #ifndef _LIBCPP_HAS_NO_TREE_BARRIER # include #endif diff --git a/libcxx/include/latch b/libcxx/include/latch index 21abbad2171fbc..85936750b4c113 100644 --- a/libcxx/include/latch +++ b/libcxx/include/latch @@ -44,6 +44,7 @@ namespace std #include <__availability> #include <__config> #include +#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 132793f1172c39..6a771ea69fe904 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -3,7 +3,6 @@ // include cycle. module std_config [system] [extern_c] { header "__config" - header "__config_site" export * } @@ -370,7 +369,6 @@ module std [system] { export * } module barrier { - requires cplusplus14 header "barrier" export * } @@ -698,7 +696,6 @@ module std [system] { } } module latch { - requires cplusplus14 header "latch" export * } @@ -906,7 +903,6 @@ module std [system] { export * } module semaphore { - requires cplusplus14 header "semaphore" export * } diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore index ce1fa6987c4a91..228cf773f0fd73 100644 --- a/libcxx/include/semaphore +++ b/libcxx/include/semaphore @@ -52,6 +52,7 @@ using binary_semaphore = counting_semaphore<1>; #include <__thread/timed_backoff_policy.h> #include <__threading_support> #include +#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/span b/libcxx/include/span index 98ecfd4fa61ada..15564d8378523b 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -138,6 +138,7 @@ template #include <__ranges/enable_borrowed_range.h> #include <__ranges/enable_view.h> #include <__ranges/size.h> +#include <__utility/forward.h> #include // for array #include // for byte #include // for iterators diff --git a/libcxx/test/libcxx/assertions/headers_declare_assertion_handler.sh.cpp b/libcxx/test/libcxx/assertions/headers_declare_assertion_handler.sh.cpp index 167da28a7add28..8c8be13b56b935 100644 --- a/libcxx/test/libcxx/assertions/headers_declare_assertion_handler.sh.cpp +++ b/libcxx/test/libcxx/assertions/headers_declare_assertion_handler.sh.cpp @@ -641,13 +641,13 @@ int main(int, char**) { return 0; } #endif // RUN: %{build} -DTEST_119 -#if defined(TEST_119) +#if defined(TEST_119) && __cplusplus >= 201103L # include using HandlerType = decltype(std::__libcpp_assertion_handler); #endif // RUN: %{build} -DTEST_120 -#if defined(TEST_120) && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) +#if defined(TEST_120) && __cplusplus >= 201103L && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) # include using HandlerType = decltype(std::__libcpp_assertion_handler); #endif @@ -665,13 +665,13 @@ int main(int, char**) { return 0; } #endif // RUN: %{build} -DTEST_123 -#if defined(TEST_123) +#if defined(TEST_123) && __cplusplus >= 201103L # include using HandlerType = decltype(std::__libcpp_assertion_handler); #endif // RUN: %{build} -DTEST_124 -#if defined(TEST_124) +#if defined(TEST_124) && __cplusplus >= 201103L # include using HandlerType = decltype(std::__libcpp_assertion_handler); #endif @@ -695,7 +695,7 @@ int main(int, char**) { return 0; } #endif // RUN: %{build} -DTEST_128 -#if defined(TEST_128) +#if defined(TEST_128) && __cplusplus >= 201103L # include using HandlerType = decltype(std::__libcpp_assertion_handler); #endif @@ -713,7 +713,7 @@ int main(int, char**) { return 0; } #endif // RUN: %{build} -DTEST_131 -#if defined(TEST_131) +#if defined(TEST_131) && __cplusplus >= 201103L # include using HandlerType = decltype(std::__libcpp_assertion_handler); #endif @@ -725,7 +725,7 @@ int main(int, char**) { return 0; } #endif // RUN: %{build} -DTEST_133 -#if defined(TEST_133) +#if defined(TEST_133) && __cplusplus >= 201103L # include using HandlerType = decltype(std::__libcpp_assertion_handler); #endif @@ -743,7 +743,7 @@ int main(int, char**) { return 0; } #endif // RUN: %{build} -DTEST_136 -#if defined(TEST_136) +#if defined(TEST_136) && __cplusplus >= 201103L # include using HandlerType = decltype(std::__libcpp_assertion_handler); #endif diff --git a/libcxx/test/libcxx/clang_tidy.sh.cpp b/libcxx/test/libcxx/clang_tidy.sh.cpp index c1ace4d3201f43..d3cae11d864cfc 100644 --- a/libcxx/test/libcxx/clang_tidy.sh.cpp +++ b/libcxx/test/libcxx/clang_tidy.sh.cpp @@ -207,8 +207,10 @@ END-SCRIPT #if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) # include #endif -#include -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) +#if __cplusplus >= 201103L +# include +#endif +#if __cplusplus >= 201103L && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) # include #endif #if __cplusplus >= 201103L @@ -217,8 +219,12 @@ END-SCRIPT #if __cplusplus >= 201103L # include #endif -#include -#include +#if __cplusplus >= 201103L +# include +#endif +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif @@ -228,25 +234,33 @@ END-SCRIPT #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L # include #endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif diff --git a/libcxx/test/libcxx/double_include.sh.cpp b/libcxx/test/libcxx/double_include.sh.cpp index 39c5c9860309b2..8d3cfc0be31cd4 100644 --- a/libcxx/test/libcxx/double_include.sh.cpp +++ b/libcxx/test/libcxx/double_include.sh.cpp @@ -210,8 +210,10 @@ END-SCRIPT #if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) # include #endif -#include -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) +#if __cplusplus >= 201103L +# include +#endif +#if __cplusplus >= 201103L && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) # include #endif #if __cplusplus >= 201103L @@ -220,8 +222,12 @@ END-SCRIPT #if __cplusplus >= 201103L # include #endif -#include -#include +#if __cplusplus >= 201103L +# include +#endif +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif @@ -231,25 +237,33 @@ END-SCRIPT #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L # include #endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif diff --git a/libcxx/test/libcxx/min_max_macros.compile.pass.cpp b/libcxx/test/libcxx/min_max_macros.compile.pass.cpp index faa3ec0ef25763..2f5aef69139f76 100644 --- a/libcxx/test/libcxx/min_max_macros.compile.pass.cpp +++ b/libcxx/test/libcxx/min_max_macros.compile.pass.cpp @@ -329,9 +329,11 @@ TEST_MACROS(); # include TEST_MACROS(); #endif -#include +#if __cplusplus >= 201103L +# include TEST_MACROS(); -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) +#endif +#if __cplusplus >= 201103L && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) # include TEST_MACROS(); #endif @@ -343,10 +345,14 @@ TEST_MACROS(); # include TEST_MACROS(); #endif -#include +#if __cplusplus >= 201103L +# include TEST_MACROS(); -#include +#endif +#if __cplusplus >= 201103L +# include TEST_MACROS(); +#endif #if __cplusplus >= 201103L # include TEST_MACROS(); @@ -359,8 +365,10 @@ TEST_MACROS(); # include TEST_MACROS(); #endif -#include +#if __cplusplus >= 201103L +# include TEST_MACROS(); +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L # include TEST_MACROS(); @@ -369,14 +377,18 @@ TEST_MACROS(); # include TEST_MACROS(); #endif -#include +#if __cplusplus >= 201103L +# include TEST_MACROS(); +#endif #if __cplusplus >= 201103L # include TEST_MACROS(); #endif -#include +#if __cplusplus >= 201103L +# include TEST_MACROS(); +#endif #if __cplusplus >= 201103L # include TEST_MACROS(); @@ -385,8 +397,10 @@ TEST_MACROS(); # include TEST_MACROS(); #endif -#include +#if __cplusplus >= 201103L +# include TEST_MACROS(); +#endif #if __cplusplus >= 201103L # include TEST_MACROS(); diff --git a/libcxx/test/libcxx/modules_include.sh.cpp b/libcxx/test/libcxx/modules_include.sh.cpp new file mode 100644 index 00000000000000..d165d53eb6a4d8 --- /dev/null +++ b/libcxx/test/libcxx/modules_include.sh.cpp @@ -0,0 +1,610 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Test that we can include each header in a TU while using modules. +// This is important notably because the LLDB data formatters use +// libc++ headers with modules enabled. + +// The system-provided seems to be broken on AIX +// XFAIL: LIBCXX-AIX-FIXME + +// GCC doesn't support -fcxx-modules +// UNSUPPORTED: gcc + +// The Windows headers don't appear to be compatible with modules +// UNSUPPORTED: windows + +// TODO: Some headers produce errors when we include them and the library has been +// configured without support for them, which breaks the modules build. +// UNSUPPORTED: libcpp-has-no-localization, libcpp-has-no-filesystem-library, libcpp-has-no-threads, libcpp-has-no-wide-characters + +// Prevent from generating deprecated warnings for this test. +#if defined(__DEPRECATED) +# undef __DEPRECATED +#endif + +#include <__config> + +/* +BEGIN-SCRIPT + +for i, header in enumerate(public_headers): + print("// {}: %{{cxx}} %s %{{flags}} %{{compile_flags}} -fmodules -fcxx-modules -fsyntax-only -DTEST_{}".format('RUN', i)) + if header in header_restrictions: + print("#if defined(TEST_{}) && {}".format(i, header_restrictions[header])) + else: + print("#if defined(TEST_{})".format(i)) + print("#include <{}>".format(header)) + print("#endif") + +END-SCRIPT +*/ + +// DO NOT MANUALLY EDIT ANYTHING BETWEEN THE MARKERS BELOW +// GENERATED-MARKER +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_0 +#if defined(TEST_0) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_1 +#if defined(TEST_1) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_2 +#if defined(TEST_2) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_3 +#if defined(TEST_3) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_4 +#if defined(TEST_4) && !defined(_LIBCPP_HAS_NO_THREADS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_5 +#if defined(TEST_5) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_6 +#if defined(TEST_6) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_7 +#if defined(TEST_7) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_8 +#if defined(TEST_8) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_9 +#if defined(TEST_9) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_10 +#if defined(TEST_10) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_11 +#if defined(TEST_11) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_12 +#if defined(TEST_12) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_13 +#if defined(TEST_13) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_14 +#if defined(TEST_14) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_15 +#if defined(TEST_15) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_16 +#if defined(TEST_16) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_17 +#if defined(TEST_17) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_18 +#if defined(TEST_18) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_19 +#if defined(TEST_19) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_20 +#if defined(TEST_20) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_21 +#if defined(TEST_21) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_22 +#if defined(TEST_22) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_23 +#if defined(TEST_23) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_24 +#if defined(TEST_24) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_25 +#if defined(TEST_25) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_26 +#if defined(TEST_26) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_27 +#if defined(TEST_27) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_28 +#if defined(TEST_28) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_29 +#if defined(TEST_29) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_30 +#if defined(TEST_30) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_31 +#if defined(TEST_31) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_32 +#if defined(TEST_32) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_33 +#if defined(TEST_33) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_34 +#if defined(TEST_34) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_35 +#if defined(TEST_35) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_36 +#if defined(TEST_36) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_37 +#if defined(TEST_37) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_38 +#if defined(TEST_38) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_39 +#if defined(TEST_39) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_40 +#if defined(TEST_40) && !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_41 +#if defined(TEST_41) && !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_42 +#if defined(TEST_42) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_43 +#if defined(TEST_43) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_44 +#if defined(TEST_44) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_45 +#if defined(TEST_45) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_46 +#if defined(TEST_46) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_47 +#if defined(TEST_47) && !defined(_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_48 +#if defined(TEST_48) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_49 +#if defined(TEST_49) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_50 +#if defined(TEST_50) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_51 +#if defined(TEST_51) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_52 +#if defined(TEST_52) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_53 +#if defined(TEST_53) && !defined(_LIBCPP_HAS_NO_THREADS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_54 +#if defined(TEST_54) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_55 +#if defined(TEST_55) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_56 +#if defined(TEST_56) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_57 +#if defined(TEST_57) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_58 +#if defined(TEST_58) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_59 +#if defined(TEST_59) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_60 +#if defined(TEST_60) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_61 +#if defined(TEST_61) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_62 +#if defined(TEST_62) && !defined(_LIBCPP_HAS_NO_THREADS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_63 +#if defined(TEST_63) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_64 +#if defined(TEST_64) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_65 +#if defined(TEST_65) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_66 +#if defined(TEST_66) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_67 +#if defined(TEST_67) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_68 +#if defined(TEST_68) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_69 +#if defined(TEST_69) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_70 +#if defined(TEST_70) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_71 +#if defined(TEST_71) && !defined(_LIBCPP_HAS_NO_THREADS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_72 +#if defined(TEST_72) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_73 +#if defined(TEST_73) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_74 +#if defined(TEST_74) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_75 +#if defined(TEST_75) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_76 +#if defined(TEST_76) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_77 +#if defined(TEST_77) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_78 +#if defined(TEST_78) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_79 +#if defined(TEST_79) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_80 +#if defined(TEST_80) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_81 +#if defined(TEST_81) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_82 +#if defined(TEST_82) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_83 +#if defined(TEST_83) && !defined(_LIBCPP_HAS_NO_THREADS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_84 +#if defined(TEST_84) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_85 +#if defined(TEST_85) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_86 +#if defined(TEST_86) && !defined(_LIBCPP_HAS_NO_THREADS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_87 +#if defined(TEST_87) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_88 +#if defined(TEST_88) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_89 +#if defined(TEST_89) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_90 +#if defined(TEST_90) && __cplusplus > 202002L && !defined(_LIBCPP_HAS_NO_THREADS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_91 +#if defined(TEST_91) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_92 +#if defined(TEST_92) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_93 +#if defined(TEST_93) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_94 +#if defined(TEST_94) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_95 +#if defined(TEST_95) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_96 +#if defined(TEST_96) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_97 +#if defined(TEST_97) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_98 +#if defined(TEST_98) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_99 +#if defined(TEST_99) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_100 +#if defined(TEST_100) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_101 +#if defined(TEST_101) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_102 +#if defined(TEST_102) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_103 +#if defined(TEST_103) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_104 +#if defined(TEST_104) && !defined(_LIBCPP_HAS_NO_THREADS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_105 +#if defined(TEST_105) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_106 +#if defined(TEST_106) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_107 +#if defined(TEST_107) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_108 +#if defined(TEST_108) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_109 +#if defined(TEST_109) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_110 +#if defined(TEST_110) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_111 +#if defined(TEST_111) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_112 +#if defined(TEST_112) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_113 +#if defined(TEST_113) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_114 +#if defined(TEST_114) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_115 +#if defined(TEST_115) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_116 +#if defined(TEST_116) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_117 +#if defined(TEST_117) && !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_118 +#if defined(TEST_118) && !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_119 +#if defined(TEST_119) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_120 +#if defined(TEST_120) && __cplusplus >= 201103L && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_121 +#if defined(TEST_121) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_122 +#if defined(TEST_122) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_123 +#if defined(TEST_123) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_124 +#if defined(TEST_124) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_125 +#if defined(TEST_125) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_126 +#if defined(TEST_126) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_127 +#if defined(TEST_127) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_128 +#if defined(TEST_128) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_129 +#if defined(TEST_129) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_130 +#if defined(TEST_130) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_131 +#if defined(TEST_131) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_132 +#if defined(TEST_132) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_133 +#if defined(TEST_133) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_134 +#if defined(TEST_134) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_135 +#if defined(TEST_135) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_136 +#if defined(TEST_136) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_137 +#if defined(TEST_137) && __cplusplus >= 201103L +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_138 +#if defined(TEST_138) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fsyntax-only -DTEST_139 +#if defined(TEST_139) +#include +#endif +// GENERATED-MARKER diff --git a/libcxx/test/libcxx/nasty_macros.compile.pass.cpp b/libcxx/test/libcxx/nasty_macros.compile.pass.cpp index 9684df18ee318f..f6e04d2dbd75b5 100644 --- a/libcxx/test/libcxx/nasty_macros.compile.pass.cpp +++ b/libcxx/test/libcxx/nasty_macros.compile.pass.cpp @@ -324,8 +324,10 @@ END-SCRIPT #if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) # include #endif -#include -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) +#if __cplusplus >= 201103L +# include +#endif +#if __cplusplus >= 201103L && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) # include #endif #if __cplusplus >= 201103L @@ -334,8 +336,12 @@ END-SCRIPT #if __cplusplus >= 201103L # include #endif -#include -#include +#if __cplusplus >= 201103L +# include +#endif +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif @@ -345,25 +351,33 @@ END-SCRIPT #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L # include #endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif diff --git a/libcxx/test/libcxx/no_assert_include.compile.pass.cpp b/libcxx/test/libcxx/no_assert_include.compile.pass.cpp index 1609c588c7b752..c4604a7d5da66c 100644 --- a/libcxx/test/libcxx/no_assert_include.compile.pass.cpp +++ b/libcxx/test/libcxx/no_assert_include.compile.pass.cpp @@ -207,8 +207,10 @@ END-SCRIPT #if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) # include #endif -#include -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) +#if __cplusplus >= 201103L +# include +#endif +#if __cplusplus >= 201103L && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES) # include #endif #if __cplusplus >= 201103L @@ -217,8 +219,12 @@ END-SCRIPT #if __cplusplus >= 201103L # include #endif -#include -#include +#if __cplusplus >= 201103L +# include +#endif +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif @@ -228,25 +234,33 @@ END-SCRIPT #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L # include #endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif #if __cplusplus >= 201103L # include #endif -#include +#if __cplusplus >= 201103L +# include +#endif #if __cplusplus >= 201103L # include #endif diff --git a/libcxx/utils/generate_header_tests.py b/libcxx/utils/generate_header_tests.py index 04d4d7de4a6e5a..fa9ab3cd7dcaae 100755 --- a/libcxx/utils/generate_header_tests.py +++ b/libcxx/utils/generate_header_tests.py @@ -39,18 +39,25 @@ "cwchar": "!defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)", "wchar.h": "!defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)", - "experimental/coroutine": "!defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES)", - - "experimental/regex": "!defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L", + "experimental/algorithm": "__cplusplus >= 201103L", + "experimental/coroutine": "__cplusplus >= 201103L && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES)", "experimental/deque": "__cplusplus >= 201103L", - "experimental/map": "__cplusplus >= 201103L", - "experimental/memory_resource": "__cplusplus >= 201103L", "experimental/forward_list": "__cplusplus >= 201103L", + "experimental/functional": "__cplusplus >= 201103L", + "experimental/iterator": "__cplusplus >= 201103L", "experimental/list": "__cplusplus >= 201103L", + "experimental/map": "__cplusplus >= 201103L", + "experimental/memory_resource": "__cplusplus >= 201103L", + "experimental/propagate_const": "__cplusplus >= 201103L", + "experimental/regex": "!defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L", "experimental/set": "__cplusplus >= 201103L", + "experimental/simd": "__cplusplus >= 201103L", + "experimental/span": "__cplusplus >= 201103L", "experimental/string": "__cplusplus >= 201103L", + "experimental/type_traits": "__cplusplus >= 201103L", "experimental/unordered_map": "__cplusplus >= 201103L", "experimental/unordered_set": "__cplusplus >= 201103L", + "experimental/utility": "__cplusplus >= 201103L", "experimental/vector": "__cplusplus >= 201103L", } @@ -133,6 +140,7 @@ def main(): produce(test.joinpath('libcxx/clang_tidy.sh.cpp'), variables) produce(test.joinpath('libcxx/double_include.sh.cpp'), variables) produce(test.joinpath('libcxx/min_max_macros.compile.pass.cpp'), variables) + produce(test.joinpath('libcxx/modules_include.sh.cpp'), variables) produce(test.joinpath('libcxx/nasty_macros.compile.pass.cpp'), variables) produce(test.joinpath('libcxx/no_assert_include.compile.pass.cpp'), variables) produce(test.joinpath('libcxx/private_headers.verify.cpp'), variables) From 8a6698b5238228876e7e5eb20ed82167ab1b04db Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 24 May 2022 15:20:01 +0200 Subject: [PATCH 436/908] [ValueTracking] Loads with !dereferenceable metadata cannot be undef/poison A load with !dereferenceable or !dereferenceable_or_null metadata must return a well-defined (non-undef/poison) value. Effectively they imply !noundef. This is the same as we do for the dereferenceable(N) attribute. This should fix https://github.com/llvm/llvm-project/issues/55672, or at least the specific case discussed there. Differential Revision: https://reviews.llvm.org/D126296 --- llvm/lib/Analysis/ValueTracking.cpp | 4 +++- llvm/test/Transforms/InstCombine/freeze.ll | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index a32633589c94a6..c7586f271df103 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5191,7 +5191,9 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, } if (auto *I = dyn_cast(V)) - if (I->getMetadata(LLVMContext::MD_noundef)) + if (I->hasMetadata(LLVMContext::MD_noundef) || + I->hasMetadata(LLVMContext::MD_dereferenceable) || + I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)) return true; if (programUndefinedIfUndefOrPoison(V, PoisonOnly)) diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll index 43b4c4a90e49f4..92c1e9a1328b6b 100644 --- a/llvm/test/Transforms/InstCombine/freeze.ll +++ b/llvm/test/Transforms/InstCombine/freeze.ll @@ -834,8 +834,7 @@ define i8* @freeze_load_noundef(i8** %ptr) { define i8* @freeze_load_dereferenceable(i8** %ptr) { ; CHECK-LABEL: @freeze_load_dereferenceable( ; CHECK-NEXT: [[P:%.*]] = load i8*, i8** [[PTR:%.*]], align 8, !dereferenceable !1 -; CHECK-NEXT: [[P_FR:%.*]] = freeze i8* [[P]] -; CHECK-NEXT: ret i8* [[P_FR]] +; CHECK-NEXT: ret i8* [[P]] ; %p = load i8*, i8** %ptr, !dereferenceable !1 %p.fr = freeze i8* %p @@ -845,8 +844,7 @@ define i8* @freeze_load_dereferenceable(i8** %ptr) { define i8* @freeze_load_dereferenceable_or_null(i8** %ptr) { ; CHECK-LABEL: @freeze_load_dereferenceable_or_null( ; CHECK-NEXT: [[P:%.*]] = load i8*, i8** [[PTR:%.*]], align 8, !dereferenceable_or_null !1 -; CHECK-NEXT: [[P_FR:%.*]] = freeze i8* [[P]] -; CHECK-NEXT: ret i8* [[P_FR]] +; CHECK-NEXT: ret i8* [[P]] ; %p = load i8*, i8** %ptr, !dereferenceable_or_null !1 %p.fr = freeze i8* %p From b3c5c22c13785570ae2895cc53f407133b602643 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Wed, 25 May 2022 09:58:00 +0200 Subject: [PATCH 437/908] [mlir] Add `complex.atan2` operation. Differential Revision: https://reviews.llvm.org/D126357 --- .../mlir/Dialect/Complex/IR/ComplexOps.td | 18 ++++++++++++++++++ mlir/test/Dialect/Complex/ops.mlir | 3 +++ 2 files changed, 21 insertions(+) diff --git a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td index 4a7377e7fe6437..a8fcf86de3af21 100644 --- a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td +++ b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td @@ -75,6 +75,24 @@ def AddOp : ComplexArithmeticOp<"add"> { }]; } +//===----------------------------------------------------------------------===// +// Atan2 +//===----------------------------------------------------------------------===// + +def Atan2Op : ComplexArithmeticOp<"atan2"> { + let summary = "complex 2-argument arctangent"; + let description = [{ + For complex numbers it is expressed using complex logarithm + atan2(y, x) = -i * log((x + i * y) / sqrt(x**2 + y**2)) + + Example: + + ```mlir + %a = complex.atan2 %b, %c : complex + ``` + }]; +} + //===----------------------------------------------------------------------===// // ConstantOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Complex/ops.mlir b/mlir/test/Dialect/Complex/ops.mlir index 1b302af74b9c7b..f8902bc1b6a451 100644 --- a/mlir/test/Dialect/Complex/ops.mlir +++ b/mlir/test/Dialect/Complex/ops.mlir @@ -77,5 +77,8 @@ func.func @ops(%f: f32) { // CHECK: complex.rsqrt %[[C]] : complex %rsqrt = complex.rsqrt %complex : complex + // CHECK: complex.atan2 %[[C]], %[[C]] : complex + %atan2 = complex.atan2 %complex, %complex : complex + return } From 29a5a7c6d47aa9fd6c715508491c8f110f706869 Mon Sep 17 00:00:00 2001 From: Lewis Revill Date: Mon, 25 Apr 2022 12:24:09 +0100 Subject: [PATCH 438/908] [RISCV] Add pre-emit pass to make more instructions compressible When optimizing for size, this pass searches for instructions that are prevented from being compressed by one of the following: 1. The use of a single uncompressed register. 2. A base register + offset where the offset is too large to be compressed and the base register may or may not already be compressed. In the first case, if there is a compressed register available, then the uncompressed register is copied to the compressed register and its uses replaced. This is only done if there are enough uses that code size would be improved. In the second case, if a compressed register is available, then the original base register is copied and adjusted such that: new_base_register = base_register + adjustment base_register + large_offset = new_base_register + small_offset and the uses of the base register are replaced with the new base register. Again this is only done if there are enough uses for code size to be improved. This pass was authored by Lewis Revill, with large offset optimization added by Craig Blackmore. Differential Revision: https://reviews.llvm.org/D92105 --- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + llvm/lib/Target/RISCV/RISCV.h | 3 + .../Target/RISCV/RISCVMakeCompressible.cpp | 382 ++++++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 6 +- llvm/test/CodeGen/RISCV/O0-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + .../CodeGen/RISCV/make-compressible-rv64.mir | 341 +++++ llvm/test/CodeGen/RISCV/make-compressible.mir | 1133 +++++++++++++++++ 8 files changed, 1867 insertions(+), 1 deletion(-) create mode 100644 llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp create mode 100644 llvm/test/CodeGen/RISCV/make-compressible-rv64.mir create mode 100644 llvm/test/CodeGen/RISCV/make-compressible.mir diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 1e706a4b8dfb04..6e49b17c12b48a 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -21,6 +21,7 @@ add_public_tablegen_target(RISCVCommonTableGen) add_llvm_target(RISCVCodeGen RISCVAsmPrinter.cpp RISCVCallLowering.cpp + RISCVMakeCompressible.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp RISCVFrameLowering.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 5b28f0d365d0f2..9a8b0623875469 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -37,6 +37,9 @@ bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM); +FunctionPass *createRISCVMakeCompressibleOptPass(); +void initializeRISCVMakeCompressibleOptPass(PassRegistry &); + FunctionPass *createRISCVGatherScatterLoweringPass(); void initializeRISCVGatherScatterLoweringPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp new file mode 100644 index 00000000000000..1fc424411c12f4 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -0,0 +1,382 @@ +//===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass searches for instructions that are prevented from being compressed +// by one of the following: +// +// 1. The use of a single uncompressed register. +// 2. A base register + offset where the offset is too large to be compressed +// and the base register may or may not be compressed. +// +// +// For case 1, if a compressed register is available, then the uncompressed +// register is copied to the compressed register and its uses are replaced. +// +// For example, storing zero uses the uncompressible zero register: +// sw zero, 0(a0) # if zero +// sw zero, 8(a0) # if zero +// sw zero, 4(a0) # if zero +// sw zero, 24(a0) # if zero +// +// If a compressed register (e.g. a1) is available, the above can be transformed +// to the following to improve code size: +// li a1, 0 +// c.sw a1, 0(a0) +// c.sw a1, 8(a0) +// c.sw a1, 4(a0) +// c.sw a1, 24(a0) +// +// +// For case 2, if a compressed register is available, then the original base +// is copied and adjusted such that: +// +// new_base_register = base_register + adjustment +// base_register + large_offset = new_base_register + small_offset +// +// For example, the following offsets are too large for c.sw: +// lui a2, 983065 +// sw a1, -236(a2) +// sw a1, -240(a2) +// sw a1, -244(a2) +// sw a1, -248(a2) +// sw a1, -252(a2) +// sw a0, -256(a2) +// +// If a compressed register is available (e.g. a3), a new base could be created +// such that the addresses can accessed with a compressible offset, thus +// improving code size: +// lui a2, 983065 +// addi a3, a2, -256 +// c.sw a1, 20(a3) +// c.sw a1, 16(a3) +// c.sw a1, 12(a3) +// c.sw a1, 8(a3) +// c.sw a1, 4(a3) +// c.sw a0, 0(a3) +// +// +// This optimization is only applied if there are enough uses of the copied +// register for code size to be reduced. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-make-compressible" +#define RISCV_COMPRESS_INSTRS_NAME "RISCV Make Compressible" + +namespace { + +struct RISCVMakeCompressibleOpt : public MachineFunctionPass { + static char ID; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) { + initializeRISCVMakeCompressibleOptPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; } +}; +} // namespace + +char RISCVMakeCompressibleOpt::ID = 0; +INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible", + RISCV_COMPRESS_INSTRS_NAME, false, false) + +// Return log2(widthInBytes) of load/store done by Opcode. +static unsigned log2LdstWidth(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode"); + case RISCV::LW: + case RISCV::SW: + case RISCV::FLW: + case RISCV::FSW: + return 2; + case RISCV::LD: + case RISCV::SD: + case RISCV::FLD: + case RISCV::FSD: + return 3; + } +} + +// Return a mask for the offset bits of a non-stack-pointer based compressed +// load/store. +static uint8_t compressedLDSTOffsetMask(unsigned Opcode) { + return 0x1f << log2LdstWidth(Opcode); +} + +// Return true if Offset fits within a compressed stack-pointer based +// load/store. +static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) { + return log2LdstWidth(Opcode) == 2 ? isShiftedUInt<6, 2>(Offset) + : isShiftedUInt<6, 3>(Offset); +} + +// Given an offset for a load/store, return the adjustment required to the base +// register such that the address can be accessed with a compressible offset. +// This will return 0 if the offset is already compressible. +static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) { + // Return the excess bits that do not fit in a compressible offset. + return Offset & ~compressedLDSTOffsetMask(Opcode); +} + +// Return true if Reg is in a compressed register class. +static bool isCompressedReg(Register Reg) { + return RISCV::GPRCRegClass.contains(Reg) || + RISCV::FPR32CRegClass.contains(Reg) || + RISCV::FPR64CRegClass.contains(Reg); +} + +// Return true if MI is a load for which there exists a compressed version. +static bool isCompressibleLoad(const MachineInstr &MI) { + const RISCVSubtarget &STI = MI.getMF()->getSubtarget(); + const unsigned Opcode = MI.getOpcode(); + + return Opcode == RISCV::LW || (!STI.is64Bit() && Opcode == RISCV::FLW) || + Opcode == RISCV::LD || Opcode == RISCV::FLD; +} + +// Return true if MI is a store for which there exists a compressed version. +static bool isCompressibleStore(const MachineInstr &MI) { + const RISCVSubtarget &STI = MI.getMF()->getSubtarget(); + const unsigned Opcode = MI.getOpcode(); + + return Opcode == RISCV::SW || (!STI.is64Bit() && Opcode == RISCV::FSW) || + Opcode == RISCV::SD || Opcode == RISCV::FSD; +} + +// Find a single register and/or large offset which, if compressible, would +// allow the given instruction to be compressed. +// +// Possible return values: +// +// {Reg, 0} - Uncompressed Reg needs replacing with a compressed +// register. +// {Reg, N} - Reg needs replacing with a compressed register and +// N needs adding to the new register. (Reg may be +// compressed or uncompressed). +// {RISCV::NoRegister, 0} - No suitable optimization found for this +// instruction. +static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { + const unsigned Opcode = MI.getOpcode(); + + if (isCompressibleLoad(MI) || isCompressibleStore(MI)) { + const MachineOperand &MOImm = MI.getOperand(2); + if (!MOImm.isImm()) + return RegImmPair(RISCV::NoRegister, 0); + + int64_t Offset = MOImm.getImm(); + int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode); + Register Base = MI.getOperand(1).getReg(); + + // Memory accesses via the stack pointer do not have a requirement for + // either of the registers to be compressible and can take a larger offset. + if (RISCV::SPRegClass.contains(Base)) { + if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust) + return RegImmPair(Base, NewBaseAdjust); + } else { + Register SrcDest = MI.getOperand(0).getReg(); + bool SrcDestCompressed = isCompressedReg(SrcDest); + bool BaseCompressed = isCompressedReg(Base); + + // If only Base and/or offset prevent compression, then return Base and + // any adjustment required to make the offset compressible. + if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed) + return RegImmPair(Base, NewBaseAdjust); + + // For loads, we can only change the base register since dest is defined + // rather than used. + // + // For stores, we can change SrcDest (and Base if SrcDest == Base) but + // cannot resolve an uncompressible offset in this case. + if (isCompressibleStore(MI)) { + if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) && + !NewBaseAdjust) + return RegImmPair(SrcDest, NewBaseAdjust); + } + } + } + return RegImmPair(RISCV::NoRegister, 0); +} + +// Check all uses after FirstMI of the given register, keeping a vector of +// instructions that would be compressible if the given register (and offset if +// applicable) were compressible. +// +// If there are enough uses for this optimization to improve code size and a +// compressed register is available, return that compressed register. +static Register analyzeCompressibleUses(MachineInstr &FirstMI, + RegImmPair RegImm, + SmallVectorImpl &MIs) { + MachineBasicBlock &MBB = *FirstMI.getParent(); + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + + RegScavenger RS; + RS.enterBasicBlock(MBB); + + for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(), + E = MBB.instr_end(); + I != E; ++I) { + MachineInstr &MI = *I; + + // Determine if this is an instruction which would benefit from using the + // new register. + RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI); + if (CandidateRegImm.Reg == RegImm.Reg && + CandidateRegImm.Imm == RegImm.Imm) { + // Advance tracking since the value in the new register must be live for + // this instruction too. + RS.forward(I); + + MIs.push_back(&MI); + } + + // If RegImm.Reg is modified by this instruction, then we cannot optimize + // past this instruction. If the register is already compressed, then it may + // possible to optimize a large offset in the current instruction - this + // will have been detected by the preceeding call to + // getRegImmPairPreventingCompression. + if (MI.modifiesRegister(RegImm.Reg, TRI)) + break; + } + + // Adjusting the base costs one new uncompressed addi and therefore three uses + // are required for a code size reduction. If no base adjustment is required, + // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying" + // the zero register) and therefore two uses are required for a code size + // reduction. + if (MIs.size() < 2 || (RegImm.Imm != 0 && MIs.size() < 3)) + return RISCV::NoRegister; + + // Find a compressible register which will be available from the first + // instruction we care about to the last. + const TargetRegisterClass *RCToScavenge; + + // Work out the compressed register class from which to scavenge. + if (RISCV::GPRRegClass.contains(RegImm.Reg)) + RCToScavenge = &RISCV::GPRCRegClass; + else if (RISCV::FPR32RegClass.contains(RegImm.Reg)) + RCToScavenge = &RISCV::FPR32CRegClass; + else if (RISCV::FPR64RegClass.contains(RegImm.Reg)) + RCToScavenge = &RISCV::FPR64CRegClass; + else + return RISCV::NoRegister; + + return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(), + /*RestoreAfter=*/false, /*SPAdj=*/0, + /*AllowSpill=*/false); +} + +// Update uses of the old register in the given instruction to the new register. +static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm, + Register NewReg) { + unsigned Opcode = MI.getOpcode(); + + // If this pass is extended to support more instructions, the check for + // definedness may need to be strengthened. + assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) && + "Unsupported instruction for this optimization."); + + // Update registers + for (MachineOperand &MO : MI.operands()) + if (MO.isReg() && MO.getReg() == OldRegImm.Reg) { + // Do not update operands that define the old register. + // + // The new register was scavenged for the range of instructions that are + // being updated, therefore it should not be defined within this range + // except possibly in the final instruction. + if (MO.isDef()) { + assert(isCompressibleLoad(MI)); + continue; + } + // Update reg + MO.setReg(NewReg); + } + + // Update offset + MachineOperand &MOImm = MI.getOperand(2); + int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode); + MOImm.setImm(NewOffset); +} + +bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) { + // This is a size optimization. + if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasMinSize()) + return false; + + const RISCVSubtarget &STI = Fn.getSubtarget(); + const RISCVInstrInfo &TII = *STI.getInstrInfo(); + + // This optimization only makes sense if compressed instructions are emitted. + if (!STI.hasStdExtC()) + return false; + + for (MachineBasicBlock &MBB : Fn) { + LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); + for (MachineInstr &MI : MBB) { + // Determine if this instruction would otherwise be compressed if not for + // an uncompressible register or offset. + RegImmPair RegImm = getRegImmPairPreventingCompression(MI); + if (!RegImm.Reg && RegImm.Imm == 0) + continue; + + // Determine if there is a set of instructions for which replacing this + // register with a compressed register (and compressible offset if + // applicable) is possible and will allow compression. + SmallVector MIs; + Register NewReg = analyzeCompressibleUses(MI, RegImm, MIs); + if (!NewReg) + continue; + + // Create the appropriate copy and/or offset. + if (RISCV::GPRRegClass.contains(RegImm.Reg)) { + assert(isInt<12>(RegImm.Imm)); + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg) + .addReg(RegImm.Reg) + .addImm(RegImm.Imm); + } else { + // If we are looking at replacing an FPR register we don't expect to + // have any offset. The only compressible FP instructions with an offset + // are loads and stores, for which the offset applies to the GPR operand + // not the FPR operand. + assert(RegImm.Imm == 0); + unsigned Opcode = RISCV::FPR32RegClass.contains(RegImm.Reg) + ? RISCV::FSGNJ_S + : RISCV::FSGNJ_D; + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(Opcode), NewReg) + .addReg(RegImm.Reg) + .addReg(RegImm.Reg); + } + + // Update the set of instructions to use the compressed register and + // compressible offset instead. These instructions should now be + // compressible. + // TODO: Update all uses if RegImm.Imm == 0? Not just those that are + // expected to become compressible. + for (MachineInstr *UpdateMI : MIs) + updateOperands(*UpdateMI, RegImm, NewReg); + } + } + return true; +} + +/// Returns an instance of the Make Compressible Optimization pass. +FunctionPass *llvm::createRISCVMakeCompressibleOptPass() { + return new RISCVMakeCompressibleOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 6bf670b20f1a66..e5bfe782d11319 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -46,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine Y(getTheRISCV64Target()); auto *PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); + initializeRISCVMakeCompressibleOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVSExtWRemovalPass(*PR); @@ -208,7 +209,10 @@ bool RISCVPassConfig::addGlobalInstructionSelect() { void RISCVPassConfig::addPreSched2() {} -void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } +void RISCVPassConfig::addPreEmitPass() { + addPass(&BranchRelaxationPassID); + addPass(createRISCVMakeCompressibleOptPass()); +} void RISCVPassConfig::addPreEmitPass2() { addPass(createRISCVExpandPseudoPass()); diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index c3c2807ed8379a..94c775c809ecaf 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -60,6 +60,7 @@ ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: RISCV Make Compressible ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 89b14bef9bce3e..c1f69756b921d3 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -151,6 +151,7 @@ ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: RISCV Make Compressible ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis diff --git a/llvm/test/CodeGen/RISCV/make-compressible-rv64.mir b/llvm/test/CodeGen/RISCV/make-compressible-rv64.mir new file mode 100644 index 00000000000000..bfe8133162415a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/make-compressible-rv64.mir @@ -0,0 +1,341 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=riscv64 -mattr=+c -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck %s +--- | + + define void @store_common_value(i64* %a, i64* %b, i64* %c) #0 { + entry: + store i64 0, i64* %a, align 8 + store i64 0, i64* %b, align 8 + store i64 0, i64* %c, align 8 + ret void + } + + define void @store_common_ptr(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %p) #0 { + entry: + store volatile i64 1, i64* %p, align 8 + store volatile i64 3, i64* %p, align 8 + store volatile i64 5, i64* %p, align 8 + ret void + } + + define void @store_common_ptr_self(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %p) #0 { + entry: + %q = bitcast i64* %p to i64** + store volatile i64 1, i64* %p, align 8 + store volatile i64 3, i64* %p, align 8 + store volatile i64* %p, i64** %q, align 8 + ret void + } + + define void @load_common_ptr(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %p) #0 { + entry: + %g = load volatile i64, i64* %p, align 8 + %h = load volatile i64, i64* %p, align 8 + %i = load volatile i64, i64* %p, align 8 + ret void + } + + define void @store_large_offset(i64* %p) #0 { + entry: + %0 = getelementptr inbounds i64, i64* %p, i64 100 + store volatile i64 1, i64* %0, align 8 + %1 = getelementptr inbounds i64, i64* %p, i64 101 + store volatile i64 3, i64* %1, align 8 + %2 = getelementptr inbounds i64, i64* %p, i64 102 + store volatile i64 5, i64* %2, align 8 + %3 = getelementptr inbounds i64, i64* %p, i64 103 + store volatile i64 7, i64* %3, align 8 + ret void + } + + define void @load_large_offset(i64* %p) #0 { + entry: + %0 = getelementptr inbounds i64, i64* %p, i64 100 + %a = load volatile i64, i64* %0, align 8 + %1 = getelementptr inbounds i64, i64* %p, i64 101 + %b = load volatile i64, i64* %1, align 8 + %2 = getelementptr inbounds i64, i64* %p, i64 102 + %c = load volatile i64, i64* %2, align 8 + %3 = getelementptr inbounds i64, i64* %p, i64 103 + %d = load volatile i64, i64* %3, align 8 + ret void + } + + define void @store_common_value_no_opt(i64* %a) #0 { + entry: + store i64 0, i64* %a, align 8 + ret void + } + + define void @store_common_ptr_no_opt(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %p) #0 { + entry: + store volatile i64 1, i64* %p, align 8 + ret void + } + + define void @load_common_ptr_no_opt(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %p) #0 { + entry: + %g = load volatile i64, i64* %p, align 8 + ret void + } + + define void @store_large_offset_no_opt(i64* %p) #0 { + entry: + %0 = getelementptr inbounds i64, i64* %p, i64 100 + store volatile i64 1, i64* %0, align 8 + %1 = getelementptr inbounds i64, i64* %p, i64 101 + store volatile i64 3, i64* %1, align 8 + ret void + } + + define void @load_large_offset_no_opt(i64* %p) #0 { + entry: + %0 = getelementptr inbounds i64, i64* %p, i64 100 + %a = load volatile i64, i64* %0, align 8 + %1 = getelementptr inbounds i64, i64* %p, i64 101 + %b = load volatile i64, i64* %1, align 8 + ret void + } + + attributes #0 = { minsize "target-features"="+c" } + +... +--- +name: store_common_value +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11, $x12 + + ; CHECK-LABEL: name: store_common_value + ; CHECK: liveins: $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x13 = ADDI $x0, 0 + ; CHECK-NEXT: SD $x13, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; CHECK-NEXT: SD $x13, killed renamable $x11, 0 :: (store (s64) into %ir.b) + ; CHECK-NEXT: SD $x13, killed renamable $x12, 0 :: (store (s64) into %ir.c) + ; CHECK-NEXT: PseudoRET + SD $x0, killed renamable $x10, 0 :: (store (s64) into %ir.a) + SD $x0, killed renamable $x11, 0 :: (store (s64) into %ir.b) + SD $x0, killed renamable $x12, 0 :: (store (s64) into %ir.c) + PseudoRET + +... +--- +name: store_common_ptr +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: store_common_ptr + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x10 = ADDI $x0, 1 + ; CHECK-NEXT: $x11 = ADDI $x16, 0 + ; CHECK-NEXT: SD killed renamable $x10, $x11, 0 :: (volatile store (s64) into %ir.p) + ; CHECK-NEXT: renamable $x10 = ADDI $x0, 3 + ; CHECK-NEXT: SD killed renamable $x10, $x11, 0 :: (volatile store (s64) into %ir.p) + ; CHECK-NEXT: renamable $x10 = ADDI $x0, 5 + ; CHECK-NEXT: SD killed renamable $x10, killed $x11, 0 :: (volatile store (s64) into %ir.p) + ; CHECK-NEXT: PseudoRET + renamable $x10 = ADDI $x0, 1 + SD killed renamable $x10, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + renamable $x10 = ADDI $x0, 3 + SD killed renamable $x10, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + renamable $x10 = ADDI $x0, 5 + SD killed renamable $x10, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + PseudoRET + +... +--- +name: store_common_ptr_self +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: store_common_ptr_self + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x10 = ADDI $x0, 1 + ; CHECK-NEXT: $x11 = ADDI $x16, 0 + ; CHECK-NEXT: SD killed renamable $x10, $x11, 0 :: (volatile store (s64) into %ir.p) + ; CHECK-NEXT: renamable $x10 = ADDI $x0, 3 + ; CHECK-NEXT: SD killed renamable $x10, $x11, 0 :: (volatile store (s64) into %ir.p) + ; CHECK-NEXT: SD killed $x11, $x11, 0 :: (volatile store (s64) into %ir.q) + ; CHECK-NEXT: PseudoRET + renamable $x10 = ADDI $x0, 1 + SD killed renamable $x10, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + renamable $x10 = ADDI $x0, 3 + SD killed renamable $x10, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + SD killed renamable $x16, renamable $x16, 0 :: (volatile store (s64) into %ir.q) + PseudoRET + +... +--- +name: load_common_ptr +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_common_ptr + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x11 = ADDI $x16, 0 + ; CHECK-NEXT: dead renamable $x10 = LD $x11, 0 :: (volatile load (s64) from %ir.p) + ; CHECK-NEXT: dead renamable $x10 = LD $x11, 0 :: (volatile load (s64) from %ir.p) + ; CHECK-NEXT: dead renamable $x10 = LD killed $x11, 0 :: (volatile load (s64) from %ir.p) + ; CHECK-NEXT: PseudoRET + dead renamable $x10 = LD renamable $x16, 0 :: (volatile load (s64) from %ir.p) + dead renamable $x10 = LD renamable $x16, 0 :: (volatile load (s64) from %ir.p) + dead renamable $x10 = LD killed renamable $x16, 0 :: (volatile load (s64) from %ir.p) + PseudoRET + +... +--- +name: store_large_offset +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: store_large_offset + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x11 = ADDI $x0, 1 + ; CHECK-NEXT: $x12 = ADDI $x10, 768 + ; CHECK-NEXT: SD killed renamable $x11, $x12, 32 :: (volatile store (s64) into %ir.0) + ; CHECK-NEXT: renamable $x11 = ADDI $x0, 3 + ; CHECK-NEXT: SD killed renamable $x11, $x12, 40 :: (volatile store (s64) into %ir.1) + ; CHECK-NEXT: renamable $x11 = ADDI $x0, 5 + ; CHECK-NEXT: SD killed renamable $x11, $x12, 48 :: (volatile store (s64) into %ir.2) + ; CHECK-NEXT: renamable $x11 = ADDI $x0, 7 + ; CHECK-NEXT: SD killed renamable $x11, killed $x12, 56 :: (volatile store (s64) into %ir.3) + ; CHECK-NEXT: PseudoRET + renamable $x11 = ADDI $x0, 1 + SD killed renamable $x11, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + renamable $x11 = ADDI $x0, 3 + SD killed renamable $x11, renamable $x10, 808 :: (volatile store (s64) into %ir.1) + renamable $x11 = ADDI $x0, 5 + SD killed renamable $x11, renamable $x10, 816 :: (volatile store (s64) into %ir.2) + renamable $x11 = ADDI $x0, 7 + SD killed renamable $x11, killed renamable $x10, 824 :: (volatile store (s64) into %ir.3) + PseudoRET + +... +--- +name: load_large_offset +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: load_large_offset + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = ADDI $x10, 768 + ; CHECK-NEXT: dead renamable $x11 = LD $x12, 32 :: (volatile load (s64) from %ir.0) + ; CHECK-NEXT: dead renamable $x11 = LD $x12, 40 :: (volatile load (s64) from %ir.1) + ; CHECK-NEXT: dead renamable $x11 = LD $x12, 48 :: (volatile load (s64) from %ir.2) + ; CHECK-NEXT: dead renamable $x10 = LD killed $x12, 56 :: (volatile load (s64) from %ir.3) + ; CHECK-NEXT: PseudoRET + dead renamable $x11 = LD renamable $x10, 800 :: (volatile load (s64) from %ir.0) + dead renamable $x11 = LD renamable $x10, 808 :: (volatile load (s64) from %ir.1) + dead renamable $x11 = LD renamable $x10, 816 :: (volatile load (s64) from %ir.2) + dead renamable $x10 = LD killed renamable $x10, 824 :: (volatile load (s64) from %ir.3) + PseudoRET + +... +--- +name: store_common_value_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: store_common_value_no_opt + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SD $x0, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; CHECK-NEXT: PseudoRET + SD $x0, killed renamable $x10, 0 :: (store (s64) into %ir.a) + PseudoRET + +... +--- +name: store_common_ptr_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: store_common_ptr_no_opt + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x10 = ADDI $x0, 1 + ; CHECK-NEXT: SD killed renamable $x10, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; CHECK-NEXT: PseudoRET + renamable $x10 = ADDI $x0, 1 + SD killed renamable $x10, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + PseudoRET + +... +--- +name: load_common_ptr_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_common_ptr_no_opt + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead renamable $x10 = LD killed renamable $x16, 0 :: (volatile load (s64) from %ir.p) + ; CHECK-NEXT: PseudoRET + dead renamable $x10 = LD killed renamable $x16, 0 :: (volatile load (s64) from %ir.p) + PseudoRET + +... +--- +name: store_large_offset_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: store_large_offset_no_opt + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x11 = ADDI $x0, 1 + ; CHECK-NEXT: SD killed renamable $x11, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + ; CHECK-NEXT: renamable $x11 = ADDI $x0, 3 + ; CHECK-NEXT: SD killed renamable $x11, killed renamable $x10, 808 :: (volatile store (s64) into %ir.1) + ; CHECK-NEXT: PseudoRET + renamable $x11 = ADDI $x0, 1 + SD killed renamable $x11, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + renamable $x11 = ADDI $x0, 3 + SD killed renamable $x11, killed renamable $x10, 808 :: (volatile store (s64) into %ir.1) + PseudoRET + +... +--- +name: load_large_offset_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: load_large_offset_no_opt + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead renamable $x11 = LD renamable $x10, 800 :: (volatile load (s64) from %ir.0) + ; CHECK-NEXT: dead renamable $x10 = LD killed renamable $x10, 808 :: (volatile load (s64) from %ir.1) + ; CHECK-NEXT: PseudoRET + dead renamable $x11 = LD renamable $x10, 800 :: (volatile load (s64) from %ir.0) + dead renamable $x10 = LD killed renamable $x10, 808 :: (volatile load (s64) from %ir.1) + PseudoRET + +... diff --git a/llvm/test/CodeGen/RISCV/make-compressible.mir b/llvm/test/CodeGen/RISCV/make-compressible.mir new file mode 100644 index 00000000000000..f49868402c24d0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/make-compressible.mir @@ -0,0 +1,1133 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=riscv32 -mattr=+c,+f,+d -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefix=RV32 %s +# RUN: llc -o - %s -mtriple=riscv64 -mattr=+c,+f,+d -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefix=RV64 %s +--- | + + define void @store_common_value(i32* %a, i32* %b, i32* %c) #0 { + entry: + store i32 0, i32* %a, align 4 + store i32 0, i32* %b, align 4 + store i32 0, i32* %c, align 4 + ret void + } + + define void @store_common_value_float(float* %a, float* %b, float* %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j) #0 { + entry: + store float %j, float* %a, align 4 + store float %j, float* %b, align 4 + store float %j, float* %c, align 4 + ret void + } + + define void @store_common_value_double(double* %a, double* %b, double* %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j) #0 { + entry: + store double %j, double* %a, align 8 + store double %j, double* %b, align 8 + store double %j, double* %c, align 8 + ret void + } + + define void @store_common_ptr(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32* %p) #0 { + entry: + store volatile i32 1, i32* %p, align 4 + store volatile i32 3, i32* %p, align 4 + store volatile i32 5, i32* %p, align 4 + ret void + } + + define void @store_common_ptr_self(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32* %p) #0 { + entry: + %q = bitcast i32* %p to i32** + store volatile i32 1, i32* %p, align 4 + store volatile i32 3, i32* %p, align 4 + store volatile i32* %p, i32** %q, align 4 + ret void + } + + define void @store_common_ptr_float(float %a, float %b, float %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, float* %p) #0 { + entry: + store volatile float %a, float* %p, align 4 + store volatile float %b, float* %p, align 4 + store volatile float %c, float* %p, align 4 + ret void + } + + define void @store_common_ptr_double(double %a, double %b, double %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, double* %p) #0 { + entry: + store volatile double %a, double* %p, align 8 + store volatile double %b, double* %p, align 8 + store volatile double %c, double* %p, align 8 + ret void + } + + define void @load_common_ptr(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32* %p) #0 { + entry: + %g = load volatile i32, i32* %p, align 4 + %h = load volatile i32, i32* %p, align 4 + %i = load volatile i32, i32* %p, align 4 + ret void + } + + define void @load_common_ptr_float(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, float* %g) #0 { + entry: + %0 = load float, float* %g, align 4 + %arrayidx1 = getelementptr inbounds float, float* %g, i32 1 + %1 = load float, float* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds float, float* %g, i32 2 + %2 = load float, float* %arrayidx2, align 4 + tail call void @load_common_ptr_float_1(float %0, float %1, float %2) + ret void + } + + declare void @load_common_ptr_float_1(float, float, float) #0 + + define void @load_common_ptr_double(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, double* %g) #0 { + entry: + %0 = load double, double* %g, align 8 + %arrayidx1 = getelementptr inbounds double, double* %g, i32 1 + %1 = load double, double* %arrayidx1, align 8 + %arrayidx2 = getelementptr inbounds double, double* %g, i32 2 + %2 = load double, double* %arrayidx2, align 8 + tail call void @load_common_ptr_double_1(double %0, double %1, double %2) + ret void + } + + declare void @load_common_ptr_double_1(double, double, double) #0 + + define void @store_large_offset(i32* %p) #0 { + entry: + %0 = getelementptr inbounds i32, i32* %p, i32 100 + store volatile i32 1, i32* %0, align 4 + %1 = getelementptr inbounds i32, i32* %p, i32 101 + store volatile i32 3, i32* %1, align 4 + %2 = getelementptr inbounds i32, i32* %p, i32 102 + store volatile i32 5, i32* %2, align 4 + %3 = getelementptr inbounds i32, i32* %p, i32 103 + store volatile i32 7, i32* %3, align 4 + ret void + } + + define void @store_large_offset_float(float* %p, float %a, float %b, float %c, float %d) #0 { + entry: + %0 = getelementptr inbounds float, float* %p, i32 100 + store volatile float %a, float* %0, align 4 + %1 = getelementptr inbounds float, float* %p, i32 101 + store volatile float %b, float* %1, align 4 + %2 = getelementptr inbounds float, float* %p, i32 102 + store volatile float %c, float* %2, align 4 + %3 = getelementptr inbounds float, float* %p, i32 103 + store volatile float %d, float* %3, align 4 + ret void + } + + define void @store_large_offset_double(double* %p, double %a, double %b, double %c, double %d) #0 { + entry: + %0 = getelementptr inbounds double, double* %p, i32 100 + store volatile double %a, double* %0, align 8 + %1 = getelementptr inbounds double, double* %p, i32 101 + store volatile double %b, double* %1, align 8 + %2 = getelementptr inbounds double, double* %p, i32 102 + store volatile double %c, double* %2, align 8 + %3 = getelementptr inbounds double, double* %p, i32 103 + store volatile double %d, double* %3, align 8 + ret void + } + + define void @load_large_offset(i32* %p) #0 { + entry: + %0 = getelementptr inbounds i32, i32* %p, i32 100 + %a = load volatile i32, i32* %0, align 4 + %1 = getelementptr inbounds i32, i32* %p, i32 101 + %b = load volatile i32, i32* %1, align 4 + %2 = getelementptr inbounds i32, i32* %p, i32 102 + %c = load volatile i32, i32* %2, align 4 + %3 = getelementptr inbounds i32, i32* %p, i32 103 + %d = load volatile i32, i32* %3, align 4 + ret void + } + + define void @load_large_offset_float(float* %p) #0 { + entry: + %arrayidx = getelementptr inbounds float, float* %p, i32 100 + %0 = load float, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %p, i32 101 + %1 = load float, float* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds float, float* %p, i32 102 + %2 = load float, float* %arrayidx2, align 4 + tail call void @load_large_offset_float_1(float %0, float %1, float %2) + ret void + } + + declare void @load_large_offset_float_1(float, float, float) #0 + + define void @load_large_offset_double(double* %p) #0 { + entry: + %arrayidx = getelementptr inbounds double, double* %p, i32 100 + %0 = load double, double* %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds double, double* %p, i32 101 + %1 = load double, double* %arrayidx1, align 8 + %arrayidx2 = getelementptr inbounds double, double* %p, i32 102 + %2 = load double, double* %arrayidx2, align 8 + tail call void @load_large_offset_double_1(double %0, double %1, double %2) + ret void + } + + declare void @load_large_offset_double_1(double, double, double) #0 + + define void @store_common_value_no_opt(i32* %a) #0 { + entry: + store i32 0, i32* %a, align 4 + ret void + } + + define void @store_common_value_float_no_opt(float* %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h) #0 { + entry: + store float %h, float* %a, align 4 + ret void + } + + define void @store_common_value_double_no_opt(double* %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h) #0 { + entry: + store double %h, double* %a, align 8 + ret void + } + + define void @store_common_ptr_no_opt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32* %p) #0 { + entry: + store volatile i32 1, i32* %p, align 4 + ret void + } + + define void @store_common_ptr_float_no_opt(float %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, float* %p) #0 { + entry: + store volatile float %a, float* %p, align 4 + ret void + } + + define void @store_common_ptr_double_no_opt(double %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, double* %p) #0 { + entry: + store volatile double %a, double* %p, align 8 + ret void + } + + define void @load_common_ptr_no_opt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32* %p) #0 { + entry: + %g = load volatile i32, i32* %p, align 4 + ret void + } + + define float @load_common_ptr_float_no_opt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, float* %g) #0 { + entry: + %0 = load float, float* %g, align 4 + ret float %0 + } + + define double @load_common_ptr_double_no_opt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, double* %g) #0 { + entry: + %0 = load double, double* %g, align 8 + ret double %0 + } + + define void @store_large_offset_no_opt(i32* %p) #0 { + entry: + %0 = getelementptr inbounds i32, i32* %p, i32 100 + store volatile i32 1, i32* %0, align 4 + %1 = getelementptr inbounds i32, i32* %p, i32 101 + store volatile i32 3, i32* %1, align 4 + ret void + } + + define void @store_large_offset_float_no_opt(float* %p, float %a, float %b) #0 { + entry: + %0 = getelementptr inbounds float, float* %p, i32 100 + store volatile float %a, float* %0, align 4 + %1 = getelementptr inbounds float, float* %p, i32 101 + store volatile float %b, float* %1, align 4 + ret void + } + + define void @store_large_offset_double_no_opt(double* %p, double %a, double %b) #0 { + entry: + %0 = getelementptr inbounds double, double* %p, i32 100 + store volatile double %a, double* %0, align 8 + %1 = getelementptr inbounds double, double* %p, i32 101 + store volatile double %b, double* %1, align 8 + ret void + } + + define void @load_large_offset_no_opt(i32* %p) #0 { + entry: + %0 = getelementptr inbounds i32, i32* %p, i32 100 + %a = load volatile i32, i32* %0, align 4 + %1 = getelementptr inbounds i32, i32* %p, i32 101 + %b = load volatile i32, i32* %1, align 4 + ret void + } + + define { float, float } @load_large_offset_float_no_opt(float* %p) #0 { + entry: + %arrayidx = getelementptr inbounds float, float* %p, i32 100 + %0 = load float, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %p, i32 101 + %1 = load float, float* %arrayidx1, align 4 + %2 = insertvalue { float, float } undef, float %0, 0 + %3 = insertvalue { float, float } %2, float %1, 1 + ret { float, float } %3 + } + + define { double, double } @load_large_offset_double_no_opt(double* %p) #0 { + entry: + %arrayidx = getelementptr inbounds double, double* %p, i32 100 + %0 = load double, double* %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds double, double* %p, i32 101 + %1 = load double, double* %arrayidx1, align 8 + %2 = insertvalue { double, double } undef, double %0, 0 + %3 = insertvalue { double, double } %2, double %1, 1 + ret { double, double } %3 + } + + attributes #0 = { minsize "target-features"="+c,+f,+d" } + +... +--- +name: store_common_value +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11, $x12 + + ; RV32-LABEL: name: store_common_value + ; RV32: liveins: $x10, $x11, $x12 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x13 = ADDI $x0, 0 + ; RV32-NEXT: SW $x13, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV32-NEXT: SW $x13, killed renamable $x11, 0 :: (store (s32) into %ir.b) + ; RV32-NEXT: SW $x13, killed renamable $x12, 0 :: (store (s32) into %ir.c) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_value + ; RV64: liveins: $x10, $x11, $x12 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: $x13 = ADDI $x0, 0 + ; RV64-NEXT: SW $x13, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV64-NEXT: SW $x13, killed renamable $x11, 0 :: (store (s32) into %ir.b) + ; RV64-NEXT: SW $x13, killed renamable $x12, 0 :: (store (s32) into %ir.c) + ; RV64-NEXT: PseudoRET + SW $x0, killed renamable $x10, 0 :: (store (s32) into %ir.a) + SW $x0, killed renamable $x11, 0 :: (store (s32) into %ir.b) + SW $x0, killed renamable $x12, 0 :: (store (s32) into %ir.c) + PseudoRET + +... +--- +name: store_common_value_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11, $x12, $f16_f + + ; RV32-LABEL: name: store_common_value_float + ; RV32: liveins: $x10, $x11, $x12, $f16_f + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $f10_f = FSGNJ_S $f16_f, $f16_f + ; RV32-NEXT: FSW $f10_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV32-NEXT: FSW $f10_f, killed renamable $x11, 0 :: (store (s32) into %ir.b) + ; RV32-NEXT: FSW killed $f10_f, killed renamable $x12, 0 :: (store (s32) into %ir.c) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_value_float + ; RV64: liveins: $x10, $x11, $x12, $f16_f + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: FSW renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV64-NEXT: FSW renamable $f16_f, killed renamable $x11, 0 :: (store (s32) into %ir.b) + ; RV64-NEXT: FSW killed renamable $f16_f, killed renamable $x12, 0 :: (store (s32) into %ir.c) + ; RV64-NEXT: PseudoRET + FSW renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) + FSW renamable $f16_f, killed renamable $x11, 0 :: (store (s32) into %ir.b) + FSW killed renamable $f16_f, killed renamable $x12, 0 :: (store (s32) into %ir.c) + PseudoRET + +... +--- +name: store_common_value_double +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11, $x12, $f16_d + + ; RV32-LABEL: name: store_common_value_double + ; RV32: liveins: $x10, $x11, $x12, $f16_d + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $f10_d = FSGNJ_D $f16_d, $f16_d + ; RV32-NEXT: FSD $f10_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; RV32-NEXT: FSD $f10_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) + ; RV32-NEXT: FSD killed $f10_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_value_double + ; RV64: liveins: $x10, $x11, $x12, $f16_d + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: $f10_d = FSGNJ_D $f16_d, $f16_d + ; RV64-NEXT: FSD $f10_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; RV64-NEXT: FSD $f10_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) + ; RV64-NEXT: FSD killed $f10_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) + ; RV64-NEXT: PseudoRET + FSD renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + FSD renamable $f16_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) + FSD killed renamable $f16_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) + PseudoRET + +... +--- +name: store_common_ptr +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; RV32-LABEL: name: store_common_ptr + ; RV32: liveins: $x16 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: renamable $x10 = ADDI $x0, 1 + ; RV32-NEXT: $x11 = ADDI $x16, 0 + ; RV32-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: renamable $x10 = ADDI $x0, 3 + ; RV32-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: renamable $x10 = ADDI $x0, 5 + ; RV32-NEXT: SW killed renamable $x10, killed $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_ptr + ; RV64: liveins: $x16 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $x10 = ADDI $x0, 1 + ; RV64-NEXT: $x11 = ADDI $x16, 0 + ; RV64-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: renamable $x10 = ADDI $x0, 3 + ; RV64-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: renamable $x10 = ADDI $x0, 5 + ; RV64-NEXT: SW killed renamable $x10, killed $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: PseudoRET + renamable $x10 = ADDI $x0, 1 + SW killed renamable $x10, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + renamable $x10 = ADDI $x0, 3 + SW killed renamable $x10, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + renamable $x10 = ADDI $x0, 5 + SW killed renamable $x10, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + PseudoRET + +... +--- +name: store_common_ptr_self +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; RV32-LABEL: name: store_common_ptr_self + ; RV32: liveins: $x16 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: renamable $x10 = ADDI $x0, 1 + ; RV32-NEXT: $x11 = ADDI $x16, 0 + ; RV32-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: renamable $x10 = ADDI $x0, 3 + ; RV32-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: SW killed $x11, $x11, 0 :: (volatile store (s32) into %ir.q) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_ptr_self + ; RV64: liveins: $x16 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $x10 = ADDI $x0, 1 + ; RV64-NEXT: $x11 = ADDI $x16, 0 + ; RV64-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: renamable $x10 = ADDI $x0, 3 + ; RV64-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: SW killed $x11, $x11, 0 :: (volatile store (s32) into %ir.q) + ; RV64-NEXT: PseudoRET + renamable $x10 = ADDI $x0, 1 + SW killed renamable $x10, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + renamable $x10 = ADDI $x0, 3 + SW killed renamable $x10, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + SW killed renamable $x16, renamable $x16, 0 :: (volatile store (s32) into %ir.q) + PseudoRET + +... +--- +name: store_common_ptr_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16, $f10_f, $f11_f, $f12_f + + ; RV32-LABEL: name: store_common_ptr_float + ; RV32: liveins: $x16, $f10_f, $f11_f, $f12_f + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x10 = ADDI $x16, 0 + ; RV32-NEXT: FSW killed renamable $f10_f, $x10, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: FSW killed renamable $f11_f, $x10, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: FSW killed renamable $f12_f, killed $x10, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_ptr_float + ; RV64: liveins: $x16, $f10_f, $f11_f, $f12_f + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: FSW killed renamable $f10_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: FSW killed renamable $f11_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: FSW killed renamable $f12_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: PseudoRET + FSW killed renamable $f10_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + FSW killed renamable $f11_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + FSW killed renamable $f12_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + PseudoRET + +... +--- +name: store_common_ptr_double +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16, $f10_d, $f11_d, $f12_d + + ; RV32-LABEL: name: store_common_ptr_double + ; RV32: liveins: $x16, $f10_d, $f11_d, $f12_d + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x10 = ADDI $x16, 0 + ; RV32-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV32-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV32-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_ptr_double + ; RV64: liveins: $x16, $f10_d, $f11_d, $f12_d + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: $x10 = ADDI $x16, 0 + ; RV64-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV64-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV64-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV64-NEXT: PseudoRET + FSD killed renamable $f10_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + FSD killed renamable $f11_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + FSD killed renamable $f12_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + PseudoRET + +... +--- +name: load_common_ptr +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; RV32-LABEL: name: load_common_ptr + ; RV32: liveins: $x16 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x11 = ADDI $x16, 0 + ; RV32-NEXT: dead renamable $x10 = LW $x11, 0 :: (volatile load (s32) from %ir.p) + ; RV32-NEXT: dead renamable $x10 = LW $x11, 0 :: (volatile load (s32) from %ir.p) + ; RV32-NEXT: dead renamable $x10 = LW killed $x11, 0 :: (volatile load (s32) from %ir.p) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: load_common_ptr + ; RV64: liveins: $x16 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: $x11 = ADDI $x16, 0 + ; RV64-NEXT: dead renamable $x10 = LW $x11, 0 :: (volatile load (s32) from %ir.p) + ; RV64-NEXT: dead renamable $x10 = LW $x11, 0 :: (volatile load (s32) from %ir.p) + ; RV64-NEXT: dead renamable $x10 = LW killed $x11, 0 :: (volatile load (s32) from %ir.p) + ; RV64-NEXT: PseudoRET + dead renamable $x10 = LW renamable $x16, 0 :: (volatile load (s32) from %ir.p) + dead renamable $x10 = LW renamable $x16, 0 :: (volatile load (s32) from %ir.p) + dead renamable $x10 = LW killed renamable $x16, 0 :: (volatile load (s32) from %ir.p) + PseudoRET + +... +--- +name: load_common_ptr_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; RV32-LABEL: name: load_common_ptr_float + ; RV32: liveins: $x16 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x10 = ADDI $x16, 0 + ; RV32-NEXT: renamable $f10_f = FLW $x10, 0 :: (load (s32) from %ir.g) + ; RV32-NEXT: renamable $f11_f = FLW $x10, 4 :: (load (s32) from %ir.arrayidx1) + ; RV32-NEXT: renamable $f12_f = FLW killed $x10, 8 :: (load (s32) from %ir.arrayidx2) + ; RV32-NEXT: PseudoTAIL target-flags(riscv-plt) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; RV64-LABEL: name: load_common_ptr_float + ; RV64: liveins: $x16 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $f10_f = FLW renamable $x16, 0 :: (load (s32) from %ir.g) + ; RV64-NEXT: renamable $f11_f = FLW renamable $x16, 4 :: (load (s32) from %ir.arrayidx1) + ; RV64-NEXT: renamable $f12_f = FLW killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2) + ; RV64-NEXT: PseudoTAIL target-flags(riscv-plt) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + renamable $f10_f = FLW renamable $x16, 0 :: (load (s32) from %ir.g) + renamable $f11_f = FLW renamable $x16, 4 :: (load (s32) from %ir.arrayidx1) + renamable $f12_f = FLW killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2) + PseudoTAIL target-flags(riscv-plt) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + +... +--- +name: load_common_ptr_double +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; RV32-LABEL: name: load_common_ptr_double + ; RV32: liveins: $x16 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x10 = ADDI $x16, 0 + ; RV32-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g) + ; RV32-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1) + ; RV32-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2) + ; RV32-NEXT: PseudoTAIL target-flags(riscv-plt) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; RV64-LABEL: name: load_common_ptr_double + ; RV64: liveins: $x16 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: $x10 = ADDI $x16, 0 + ; RV64-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g) + ; RV64-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1) + ; RV64-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2) + ; RV64-NEXT: PseudoTAIL target-flags(riscv-plt) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + renamable $f10_d = FLD renamable $x16, 0 :: (load (s64) from %ir.g) + renamable $f11_d = FLD renamable $x16, 8 :: (load (s64) from %ir.arrayidx1) + renamable $f12_d = FLD killed renamable $x16, 16 :: (load (s64) from %ir.arrayidx2) + PseudoTAIL target-flags(riscv-plt) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + +... +--- +name: store_large_offset +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; RV32-LABEL: name: store_large_offset + ; RV32: liveins: $x10 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: renamable $x11 = ADDI $x0, 1 + ; RV32-NEXT: $x12 = ADDI $x10, 384 + ; RV32-NEXT: SW killed renamable $x11, $x12, 16 :: (volatile store (s32) into %ir.0) + ; RV32-NEXT: renamable $x11 = ADDI $x0, 3 + ; RV32-NEXT: SW killed renamable $x11, $x12, 20 :: (volatile store (s32) into %ir.1) + ; RV32-NEXT: renamable $x11 = ADDI $x0, 5 + ; RV32-NEXT: SW killed renamable $x11, $x12, 24 :: (volatile store (s32) into %ir.2) + ; RV32-NEXT: renamable $x11 = ADDI $x0, 7 + ; RV32-NEXT: SW killed renamable $x11, killed $x12, 28 :: (volatile store (s32) into %ir.3) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_large_offset + ; RV64: liveins: $x10 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $x11 = ADDI $x0, 1 + ; RV64-NEXT: $x12 = ADDI $x10, 384 + ; RV64-NEXT: SW killed renamable $x11, $x12, 16 :: (volatile store (s32) into %ir.0) + ; RV64-NEXT: renamable $x11 = ADDI $x0, 3 + ; RV64-NEXT: SW killed renamable $x11, $x12, 20 :: (volatile store (s32) into %ir.1) + ; RV64-NEXT: renamable $x11 = ADDI $x0, 5 + ; RV64-NEXT: SW killed renamable $x11, $x12, 24 :: (volatile store (s32) into %ir.2) + ; RV64-NEXT: renamable $x11 = ADDI $x0, 7 + ; RV64-NEXT: SW killed renamable $x11, killed $x12, 28 :: (volatile store (s32) into %ir.3) + ; RV64-NEXT: PseudoRET + renamable $x11 = ADDI $x0, 1 + SW killed renamable $x11, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + renamable $x11 = ADDI $x0, 3 + SW killed renamable $x11, renamable $x10, 404 :: (volatile store (s32) into %ir.1) + renamable $x11 = ADDI $x0, 5 + SW killed renamable $x11, renamable $x10, 408 :: (volatile store (s32) into %ir.2) + renamable $x11 = ADDI $x0, 7 + SW killed renamable $x11, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3) + PseudoRET + +... +--- +name: store_large_offset_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f + + ; RV32-LABEL: name: store_large_offset_float + ; RV32: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x11 = ADDI $x10, 384 + ; RV32-NEXT: FSW killed renamable $f10_f, $x11, 16 :: (volatile store (s32) into %ir.0) + ; RV32-NEXT: FSW killed renamable $f11_f, $x11, 20 :: (volatile store (s32) into %ir.1) + ; RV32-NEXT: FSW killed renamable $f12_f, $x11, 24 :: (volatile store (s32) into %ir.2) + ; RV32-NEXT: FSW killed renamable $f13_f, killed $x11, 28 :: (volatile store (s32) into %ir.3) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_large_offset_float + ; RV64: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + ; RV64-NEXT: FSW killed renamable $f11_f, renamable $x10, 404 :: (volatile store (s32) into %ir.1) + ; RV64-NEXT: FSW killed renamable $f12_f, renamable $x10, 408 :: (volatile store (s32) into %ir.2) + ; RV64-NEXT: FSW killed renamable $f13_f, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3) + ; RV64-NEXT: PseudoRET + FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + FSW killed renamable $f11_f, renamable $x10, 404 :: (volatile store (s32) into %ir.1) + FSW killed renamable $f12_f, renamable $x10, 408 :: (volatile store (s32) into %ir.2) + FSW killed renamable $f13_f, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3) + PseudoRET + +... +--- +name: store_large_offset_double +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d + + ; RV32-LABEL: name: store_large_offset_double + ; RV32: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x11 = ADDI $x10, 768 + ; RV32-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0) + ; RV32-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1) + ; RV32-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2) + ; RV32-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_large_offset_double + ; RV64: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: $x11 = ADDI $x10, 768 + ; RV64-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0) + ; RV64-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1) + ; RV64-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2) + ; RV64-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3) + ; RV64-NEXT: PseudoRET + FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + FSD killed renamable $f11_d, renamable $x10, 808 :: (volatile store (s64) into %ir.1) + FSD killed renamable $f12_d, renamable $x10, 816 :: (volatile store (s64) into %ir.2) + FSD killed renamable $f13_d, killed renamable $x10, 824 :: (volatile store (s64) into %ir.3) + PseudoRET + +... +--- +name: load_large_offset +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; RV32-LABEL: name: load_large_offset + ; RV32: liveins: $x10 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x12 = ADDI $x10, 384 + ; RV32-NEXT: dead renamable $x11 = LW $x12, 16 :: (volatile load (s32) from %ir.0) + ; RV32-NEXT: dead renamable $x11 = LW $x12, 20 :: (volatile load (s32) from %ir.1) + ; RV32-NEXT: dead renamable $x11 = LW $x12, 24 :: (volatile load (s32) from %ir.2) + ; RV32-NEXT: dead renamable $x10 = LW killed $x12, 28 :: (volatile load (s32) from %ir.3) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: load_large_offset + ; RV64: liveins: $x10 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: $x12 = ADDI $x10, 384 + ; RV64-NEXT: dead renamable $x11 = LW $x12, 16 :: (volatile load (s32) from %ir.0) + ; RV64-NEXT: dead renamable $x11 = LW $x12, 20 :: (volatile load (s32) from %ir.1) + ; RV64-NEXT: dead renamable $x11 = LW $x12, 24 :: (volatile load (s32) from %ir.2) + ; RV64-NEXT: dead renamable $x10 = LW killed $x12, 28 :: (volatile load (s32) from %ir.3) + ; RV64-NEXT: PseudoRET + dead renamable $x11 = LW renamable $x10, 400 :: (volatile load (s32) from %ir.0) + dead renamable $x11 = LW renamable $x10, 404 :: (volatile load (s32) from %ir.1) + dead renamable $x11 = LW renamable $x10, 408 :: (volatile load (s32) from %ir.2) + dead renamable $x10 = LW killed renamable $x10, 412 :: (volatile load (s32) from %ir.3) + PseudoRET + +... +--- +name: load_large_offset_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; RV32-LABEL: name: load_large_offset_float + ; RV32: liveins: $x10 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x11 = ADDI $x10, 384 + ; RV32-NEXT: renamable $f10_f = FLW $x11, 16 :: (load (s32) from %ir.arrayidx) + ; RV32-NEXT: renamable $f11_f = FLW $x11, 20 :: (load (s32) from %ir.arrayidx1) + ; RV32-NEXT: renamable $f12_f = FLW killed $x11, 24 :: (load (s32) from %ir.arrayidx2) + ; RV32-NEXT: PseudoTAIL target-flags(riscv-plt) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; RV64-LABEL: name: load_large_offset_float + ; RV64: liveins: $x10 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx) + ; RV64-NEXT: renamable $f11_f = FLW renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) + ; RV64-NEXT: renamable $f12_f = FLW killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2) + ; RV64-NEXT: PseudoTAIL target-flags(riscv-plt) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx) + renamable $f11_f = FLW renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) + renamable $f12_f = FLW killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2) + PseudoTAIL target-flags(riscv-plt) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + +... +--- +name: load_large_offset_double +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; RV32-LABEL: name: load_large_offset_double + ; RV32: liveins: $x10 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x11 = ADDI $x10, 768 + ; RV32-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx) + ; RV32-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1) + ; RV32-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2) + ; RV32-NEXT: PseudoTAIL target-flags(riscv-plt) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; RV64-LABEL: name: load_large_offset_double + ; RV64: liveins: $x10 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: $x11 = ADDI $x10, 768 + ; RV64-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx) + ; RV64-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1) + ; RV64-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2) + ; RV64-NEXT: PseudoTAIL target-flags(riscv-plt) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx) + renamable $f11_d = FLD renamable $x10, 808 :: (load (s64) from %ir.arrayidx1) + renamable $f12_d = FLD killed renamable $x10, 816 :: (load (s64) from %ir.arrayidx2) + PseudoTAIL target-flags(riscv-plt) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + +... +--- +name: store_common_value_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; RV32-LABEL: name: store_common_value_no_opt + ; RV32: liveins: $x10 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: SW $x0, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_value_no_opt + ; RV64: liveins: $x10 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: SW $x0, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV64-NEXT: PseudoRET + SW $x0, killed renamable $x10, 0 :: (store (s32) into %ir.a) + PseudoRET + +... +--- +name: store_common_value_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $f16_f + + ; RV32-LABEL: name: store_common_value_float_no_opt + ; RV32: liveins: $x10, $f16_f + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: FSW killed renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_value_float_no_opt + ; RV64: liveins: $x10, $f16_f + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: FSW killed renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV64-NEXT: PseudoRET + FSW killed renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) + PseudoRET + +... +--- +name: store_common_value_double_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $f16_d + + ; RV32-LABEL: name: store_common_value_double_no_opt + ; RV32: liveins: $x10, $f16_d + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: FSD killed renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_value_double_no_opt + ; RV64: liveins: $x10, $f16_d + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: FSD killed renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; RV64-NEXT: PseudoRET + FSD killed renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + PseudoRET + +... +--- +name: store_common_ptr_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; RV32-LABEL: name: store_common_ptr_no_opt + ; RV32: liveins: $x16 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: renamable $x10 = ADDI $x0, 1 + ; RV32-NEXT: SW killed renamable $x10, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_ptr_no_opt + ; RV64: liveins: $x16 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $x10 = ADDI $x0, 1 + ; RV64-NEXT: SW killed renamable $x10, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: PseudoRET + renamable $x10 = ADDI $x0, 1 + SW killed renamable $x10, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + PseudoRET + +... +--- +name: store_common_ptr_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16, $f10_f + + ; RV32-LABEL: name: store_common_ptr_float_no_opt + ; RV32: liveins: $x16, $f10_f + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: FSW killed renamable $f10_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_ptr_float_no_opt + ; RV64: liveins: $x16, $f10_f + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: FSW killed renamable $f10_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV64-NEXT: PseudoRET + FSW killed renamable $f10_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + PseudoRET + +... +--- +name: store_common_ptr_double_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16, $f10_d + + ; RV32-LABEL: name: store_common_ptr_double_no_opt + ; RV32: liveins: $x16, $f10_d + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: FSD killed renamable $f10_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_common_ptr_double_no_opt + ; RV64: liveins: $x16, $f10_d + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: FSD killed renamable $f10_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV64-NEXT: PseudoRET + FSD killed renamable $f10_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + PseudoRET + +... +--- +name: load_common_ptr_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; RV32-LABEL: name: load_common_ptr_no_opt + ; RV32: liveins: $x16 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: dead renamable $x10 = LW killed renamable $x16, 0 :: (volatile load (s32) from %ir.p) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: load_common_ptr_no_opt + ; RV64: liveins: $x16 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: dead renamable $x10 = LW killed renamable $x16, 0 :: (volatile load (s32) from %ir.p) + ; RV64-NEXT: PseudoRET + dead renamable $x10 = LW killed renamable $x16, 0 :: (volatile load (s32) from %ir.p) + PseudoRET + +... +--- +name: load_common_ptr_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; RV32-LABEL: name: load_common_ptr_float_no_opt + ; RV32: liveins: $x16 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: renamable $f10_f = FLW killed renamable $x16, 0 :: (load (s32) from %ir.g) + ; RV32-NEXT: PseudoRET implicit $f10_f + ; RV64-LABEL: name: load_common_ptr_float_no_opt + ; RV64: liveins: $x16 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $f10_f = FLW killed renamable $x16, 0 :: (load (s32) from %ir.g) + ; RV64-NEXT: PseudoRET implicit $f10_f + renamable $f10_f = FLW killed renamable $x16, 0 :: (load (s32) from %ir.g) + PseudoRET implicit $f10_f + +... +--- +name: load_common_ptr_double_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; RV32-LABEL: name: load_common_ptr_double_no_opt + ; RV32: liveins: $x16 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: renamable $f10_d = FLD killed renamable $x16, 0 :: (load (s64) from %ir.g) + ; RV32-NEXT: PseudoRET implicit $f10_d + ; RV64-LABEL: name: load_common_ptr_double_no_opt + ; RV64: liveins: $x16 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $f10_d = FLD killed renamable $x16, 0 :: (load (s64) from %ir.g) + ; RV64-NEXT: PseudoRET implicit $f10_d + renamable $f10_d = FLD killed renamable $x16, 0 :: (load (s64) from %ir.g) + PseudoRET implicit $f10_d + +... +--- +name: store_large_offset_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; RV32-LABEL: name: store_large_offset_no_opt + ; RV32: liveins: $x10 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: renamable $x11 = ADDI $x0, 1 + ; RV32-NEXT: SW killed renamable $x11, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + ; RV32-NEXT: renamable $x11 = ADDI $x0, 3 + ; RV32-NEXT: SW killed renamable $x11, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_large_offset_no_opt + ; RV64: liveins: $x10 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $x11 = ADDI $x0, 1 + ; RV64-NEXT: SW killed renamable $x11, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + ; RV64-NEXT: renamable $x11 = ADDI $x0, 3 + ; RV64-NEXT: SW killed renamable $x11, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) + ; RV64-NEXT: PseudoRET + renamable $x11 = ADDI $x0, 1 + SW killed renamable $x11, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + renamable $x11 = ADDI $x0, 3 + SW killed renamable $x11, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) + PseudoRET + +... +--- +name: store_large_offset_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $f10_f, $f11_f + + ; RV32-LABEL: name: store_large_offset_float_no_opt + ; RV32: liveins: $x10, $f10_f, $f11_f + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + ; RV32-NEXT: FSW killed renamable $f11_f, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_large_offset_float_no_opt + ; RV64: liveins: $x10, $f10_f, $f11_f + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + ; RV64-NEXT: FSW killed renamable $f11_f, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) + ; RV64-NEXT: PseudoRET + FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + FSW killed renamable $f11_f, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) + PseudoRET + +... +--- +name: store_large_offset_double_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $f10_d, $f11_d + + ; RV32-LABEL: name: store_large_offset_double_no_opt + ; RV32: liveins: $x10, $f10_d, $f11_d + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + ; RV32-NEXT: FSD killed renamable $f11_d, killed renamable $x10, 808 :: (volatile store (s64) into %ir.1) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: store_large_offset_double_no_opt + ; RV64: liveins: $x10, $f10_d, $f11_d + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + ; RV64-NEXT: FSD killed renamable $f11_d, killed renamable $x10, 808 :: (volatile store (s64) into %ir.1) + ; RV64-NEXT: PseudoRET + FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + FSD killed renamable $f11_d, killed renamable $x10, 808 :: (volatile store (s64) into %ir.1) + PseudoRET + +... +--- +name: load_large_offset_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; RV32-LABEL: name: load_large_offset_no_opt + ; RV32: liveins: $x10 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: dead renamable $x11 = LW renamable $x10, 400 :: (volatile load (s32) from %ir.0) + ; RV32-NEXT: dead renamable $x10 = LW killed renamable $x10, 404 :: (volatile load (s32) from %ir.1) + ; RV32-NEXT: PseudoRET + ; RV64-LABEL: name: load_large_offset_no_opt + ; RV64: liveins: $x10 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: dead renamable $x11 = LW renamable $x10, 400 :: (volatile load (s32) from %ir.0) + ; RV64-NEXT: dead renamable $x10 = LW killed renamable $x10, 404 :: (volatile load (s32) from %ir.1) + ; RV64-NEXT: PseudoRET + dead renamable $x11 = LW renamable $x10, 400 :: (volatile load (s32) from %ir.0) + dead renamable $x10 = LW killed renamable $x10, 404 :: (volatile load (s32) from %ir.1) + PseudoRET + +... +--- +name: load_large_offset_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; RV32-LABEL: name: load_large_offset_float_no_opt + ; RV32: liveins: $x10 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx) + ; RV32-NEXT: renamable $f11_f = FLW killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) + ; RV32-NEXT: PseudoRET implicit $f10_f, implicit $f11_f + ; RV64-LABEL: name: load_large_offset_float_no_opt + ; RV64: liveins: $x10 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx) + ; RV64-NEXT: renamable $f11_f = FLW killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) + ; RV64-NEXT: PseudoRET implicit $f10_f, implicit $f11_f + renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx) + renamable $f11_f = FLW killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) + PseudoRET implicit $f10_f, implicit $f11_f + +... +--- +name: load_large_offset_double_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; RV32-LABEL: name: load_large_offset_double_no_opt + ; RV32: liveins: $x10 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx) + ; RV32-NEXT: renamable $f11_d = FLD killed renamable $x10, 808 :: (load (s64) from %ir.arrayidx1) + ; RV32-NEXT: PseudoRET implicit $f10_d, implicit $f11_d + ; RV64-LABEL: name: load_large_offset_double_no_opt + ; RV64: liveins: $x10 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx) + ; RV64-NEXT: renamable $f11_d = FLD killed renamable $x10, 808 :: (load (s64) from %ir.arrayidx1) + ; RV64-NEXT: PseudoRET implicit $f10_d, implicit $f11_d + renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx) + renamable $f11_d = FLD killed renamable $x10, 808 :: (load (s64) from %ir.arrayidx1) + PseudoRET implicit $f10_d, implicit $f11_d + +... From 7af89a379cce98382418be5b5433be6163e037f4 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sat, 21 May 2022 18:26:29 +0200 Subject: [PATCH 439/908] [libc++] Implement ranges::fill{, _n} Reviewed By: var-const, #libc Spies: libcxx-commits, mgorny Differential Revision: https://reviews.llvm.org/D123462 --- libcxx/docs/Status/RangesAlgorithms.csv | 4 +- libcxx/include/CMakeLists.txt | 2 + libcxx/include/__algorithm/ranges_fill.h | 59 ++++++++ libcxx/include/__algorithm/ranges_fill_n.h | 48 ++++++ libcxx/include/algorithm | 10 ++ libcxx/include/module.modulemap | 2 + libcxx/test/libcxx/private_headers.verify.cpp | 2 + .../alg.fill/ranges.fill.pass.cpp | 143 ++++++++++++++++++ .../alg.fill/ranges.fill_n.pass.cpp | 91 +++++++++++ libcxx/test/support/almost_satisfies_types.h | 38 +++++ 10 files changed, 397 insertions(+), 2 deletions(-) create mode 100644 libcxx/include/__algorithm/ranges_fill.h create mode 100644 libcxx/include/__algorithm/ranges_fill_n.h create mode 100644 libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp create mode 100644 libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp diff --git a/libcxx/docs/Status/RangesAlgorithms.csv b/libcxx/docs/Status/RangesAlgorithms.csv index 4395c91aa24b75..1a5989db42bf26 100644 --- a/libcxx/docs/Status/RangesAlgorithms.csv +++ b/libcxx/docs/Status/RangesAlgorithms.csv @@ -42,8 +42,8 @@ Write,copy_n,Nikolas Klauser,`D122982 `_,✅ Write,copy_backward,Nikolas Klauser,`D122982 `_,✅ Write,move,Not assigned,n/a,Not started Write,move_backward,Not assigned,n/a,Not started -Write,fill,Not assigned,n/a,Not started -Write,fill_n,Not assigned,n/a,Not started +Write,fill,Nikolas Klauser,`D123462 `_,✅ +Write,fill_n,Nikolas Klauser,`D123462 `_,✅ Write,transform,Nikolas Klauser,`D122173 `_,✅ Write,generate,Not assigned,n/a,Not started Write,generate_n,Not assigned,n/a,Not started diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index b6e4f6d6cf5264..68482bf716f1fd 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -72,6 +72,8 @@ set(files __algorithm/ranges_copy_n.h __algorithm/ranges_count.h __algorithm/ranges_count_if.h + __algorithm/ranges_fill.h + __algorithm/ranges_fill_n.h __algorithm/ranges_find.h __algorithm/ranges_find_if.h __algorithm/ranges_find_if_not.h diff --git a/libcxx/include/__algorithm/ranges_fill.h b/libcxx/include/__algorithm/ranges_fill.h new file mode 100644 index 00000000000000..846e31885141ba --- /dev/null +++ b/libcxx/include/__algorithm/ranges_fill.h @@ -0,0 +1,59 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_FILL_H +#define _LIBCPP___ALGORITHM_RANGES_FILL_H + +#include <__algorithm/ranges_fill_n.h> +#include <__config> +#include <__iterator/concepts.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __fill { +struct __fn { + template _Iter, sentinel_for<_Iter> _Sent> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Sent __last, const _Type& __value) const { + if constexpr(random_access_iterator<_Iter>) { + return ranges::fill_n(__first, __last - __first, __value); + } else { + for (; __first != __last; ++__first) + *__first = __value; + return __first; + } + } + + template _Range> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __range, const _Type& __value) const { + return (*this)(ranges::begin(__range), ranges::end(__range), __value); + } +}; +} // namespace __fill + +inline namespace __cpo { + inline constexpr auto fill = __fill::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_FILL_H diff --git a/libcxx/include/__algorithm/ranges_fill_n.h b/libcxx/include/__algorithm/ranges_fill_n.h new file mode 100644 index 00000000000000..d93c573406ec38 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_fill_n.h @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_FILL_N_H +#define _LIBCPP___ALGORITHM_RANGES_FILL_N_H + +#include <__config> +#include <__iterator/concepts.h> +#include <__iterator/incrementable_traits.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __fill_n { +struct __fn { + template _Iter> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, iter_difference_t<_Iter> __n, const _Type& __value) const { + for (; __n != 0; --__n) { + *__first = __value; + ++__first; + } + return __first; + } +}; +} // namespace __fill_n + +inline namespace __cpo { + inline constexpr auto fill_n = __fill_n::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_FILL_N_H diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 224c9e1ab84656..ba8ac23e4bddc0 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -283,6 +283,14 @@ namespace ranges { requires permutable> constexpr borrowed_iterator_t ranges::reverse(R&& r); // since C++20 + template O, sentinel_for S> + constexpr O ranges::fill(O first, S last, const T& value); // since C++20 + + template R> + constexpr borrowed_iterator_t ranges::fill(R&& r, const T& value); // since C++20 + + template O> + constexpr O ranges::fill_n(O first, iter_difference_t n, const T& value); // since C++20 } constexpr bool // constexpr in C++20 @@ -1005,6 +1013,8 @@ template #include <__algorithm/ranges_copy_n.h> #include <__algorithm/ranges_count.h> #include <__algorithm/ranges_count_if.h> +#include <__algorithm/ranges_fill.h> +#include <__algorithm/ranges_fill_n.h> #include <__algorithm/ranges_find.h> #include <__algorithm/ranges_find_if.h> #include <__algorithm/ranges_find_if_not.h> diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 6a771ea69fe904..d3a4b45c4d96d5 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -304,6 +304,8 @@ module std [system] { module ranges_copy_n { private header "__algorithm/ranges_copy_n.h" } module ranges_count { private header "__algorithm/ranges_count.h" } module ranges_count_if { private header "__algorithm/ranges_count_if.h" } + module ranges_fill { private header "__algorithm/ranges_fill.h" } + module ranges_fill_n { private header "__algorithm/ranges_fill_n.h" } module ranges_find { private header "__algorithm/ranges_find.h" } module ranges_find_if { private header "__algorithm/ranges_find_if.h" } module ranges_find_if_not { private header "__algorithm/ranges_find_if_not.h" } diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index fe0f19baf6af9e..16186899f3810f 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -109,6 +109,8 @@ END-SCRIPT #include <__algorithm/ranges_copy_n.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_copy_n.h'}} #include <__algorithm/ranges_count.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_count.h'}} #include <__algorithm/ranges_count_if.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_count_if.h'}} +#include <__algorithm/ranges_fill.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_fill.h'}} +#include <__algorithm/ranges_fill_n.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_fill_n.h'}} #include <__algorithm/ranges_find.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_find.h'}} #include <__algorithm/ranges_find_if.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_find_if.h'}} #include <__algorithm/ranges_find_if_not.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_find_if_not.h'}} diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp new file mode 100644 index 00000000000000..b82d753e7ef608 --- /dev/null +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp @@ -0,0 +1,143 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// template O, sentinel_for S> +// constexpr O ranges::fill(O first, S last, const T& value); +// template R> +// constexpr borrowed_iterator_t ranges::fill(R&& r, const T& value); + +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +template > +concept HasFillIt = requires(Iter iter, Sent sent) { std::ranges::fill(iter, sent, int{}); }; + +static_assert(HasFillIt); +static_assert(!HasFillIt); +static_assert(!HasFillIt); +static_assert(!HasFillIt); +static_assert(!HasFillIt); + +template +concept HasFillR = requires(Range range) { std::ranges::fill(range, int{}); }; + +static_assert(HasFillR>); +static_assert(!HasFillR); +static_assert(!HasFillR); +static_assert(!HasFillR); +static_assert(!HasFillR); + +template +constexpr void test_iterators() { + { // simple test + { + int a[3]; + std::same_as auto ret = std::ranges::fill(It(a), Sent(It(a + 3)), 1); + assert(std::all_of(a, a + 3, [](int i) { return i == 1; })); + assert(base(ret) == a + 3); + } + { + int a[3]; + auto range = std::ranges::subrange(It(a), Sent(It(a + 3))); + std::same_as auto ret = std::ranges::fill(range, 1); + assert(std::all_of(a, a + 3, [](int i) { return i == 1; })); + assert(base(ret) == a + 3); + } + } + + { // check that an empty range works + { + std::array a; + auto ret = std::ranges::fill(It(a.data()), Sent(It(a.data())), 1); + assert(base(ret) == a.data()); + } + { + std::array a; + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data()))); + auto ret = std::ranges::fill(range, 1); + assert(base(ret) == a.data()); + } + } +} + +constexpr bool test() { + test_iterators, sentinel_wrapper>>(); + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators(); + + { // check that every element is copied once + struct S { + bool copied = false; + constexpr S& operator=(const S&) { + copied = true; + return *this; + } + }; + { + S a[5]; + std::ranges::fill(a, a + 5, S {true}); + assert(std::all_of(a, a + 5, [](S& s) { return s.copied; })); + } + { + S a[5]; + std::ranges::fill(a, S {true}); + assert(std::all_of(a, a + 5, [](S& s) { return s.copied; })); + } + } + + { // check that std::ranges::dangling is returned + [[maybe_unused]] std::same_as decltype(auto) ret = + std::ranges::fill(std::array {}, 1); + } + + { // check that std::ranges::dangling isn't returned with a borrowing range + std::array a{}; + [[maybe_unused]] std::same_as::iterator> decltype(auto) ret = + std::ranges::fill(std::views::all(a), 1); + assert(std::all_of(a.begin(), a.end(), [](int i) { return i == 1; })); + } + + { // check that non-trivially copyable items are copied properly + { + std::array a; + auto ret = std::ranges::fill(a.begin(), a.end(), "long long string so no SSO"); + assert(ret == a.data() + a.size()); + assert(std::all_of(a.begin(), a.end(), [](auto& s) { return s == "long long string so no SSO"; })); + } + { + std::array a; + auto ret = std::ranges::fill(a, "long long string so no SSO"); + assert(ret == a.data() + a.size()); + assert(std::all_of(a.begin(), a.end(), [](auto& s) { return s == "long long string so no SSO"; })); + } + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp new file mode 100644 index 00000000000000..1e984a8367c241 --- /dev/null +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp @@ -0,0 +1,91 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// template O> +// constexpr O ranges::fill_n(O first, iter_difference_t n, const T& value); + +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +template +concept HasFillN = requires(Iter iter) { std::ranges::fill_n(iter, int{}, int{}); }; + +struct WrongType {}; + +static_assert(HasFillN); +static_assert(!HasFillN); +static_assert(!HasFillN); +static_assert(!HasFillN); + +template +constexpr void test_iterators() { + { // simple test + int a[3]; + std::same_as decltype(auto) ret = std::ranges::fill_n(It(a), 3, 1); + assert(std::all_of(a, a + 3, [](int i) { return i == 1; })); + assert(base(ret) == a + 3); + } + + { // check that an empty range works + std::array a; + auto ret = std::ranges::fill_n(It(a.data()), 0, 1); + assert(base(ret) == a.data()); + } +} + +constexpr bool test() { + test_iterators, sentinel_wrapper>>(); + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators(); + + { // check that every element is copied once + struct S { + bool copied = false; + constexpr S& operator=(const S&) { + assert(!copied); + copied = true; + return *this; + } + }; + + S a[5]; + std::ranges::fill_n(a, 5, S {}); + assert(std::all_of(a, a + 5, [](S& s) { return s.copied; })); + } + + { // check that non-trivially copyable items are copied properly + std::array a; + auto ret = std::ranges::fill_n(a.data(), 10, "long long string so no SSO"); + assert(ret == a.data() + a.size()); + assert(std::all_of(a.begin(), a.end(), [](auto& s) { return s == "long long string so no SSO"; })); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/support/almost_satisfies_types.h b/libcxx/test/support/almost_satisfies_types.h index 49984a42c88e43..99ea852d2ff9a5 100644 --- a/libcxx/test/support/almost_satisfies_types.h +++ b/libcxx/test/support/almost_satisfies_types.h @@ -107,6 +107,7 @@ class SentinelForNotSemiregular { }; using InputRangeNotSentinelSemiregular = UncheckedRange, SentinelForNotSemiregular>; +using OutputRangeNotSentinelSemiregular = UncheckedRange, SentinelForNotSemiregular>; static_assert(std::input_or_output_iterator); static_assert(!std::semiregular); @@ -123,6 +124,8 @@ class SentinelForNotWeaklyEqualityComparableWith { using InputRangeNotSentinelEqualityComparableWith = UncheckedRange, SentinelForNotWeaklyEqualityComparableWith>; +using OutputRangeNotSentinelEqualityComparableWith = + UncheckedRange, SentinelForNotWeaklyEqualityComparableWith>; static_assert(std::input_or_output_iterator); static_assert(std::semiregular); @@ -220,4 +223,39 @@ static_assert(std::forward_iterator); static_assert(!std::permutable); static_assert(!std::indirectly_swappable); +class OutputIteratorNotInputOrOutputIterator { +public: + using difference_type = long; + using value_type = int; + using iterator_category = std::input_iterator_tag; + + int& operator++(); + void operator++(int); + int& operator*(); +}; + +using OutputRangeNotInputOrOutputIterator = UncheckedRange; + +static_assert(!std::input_or_output_iterator); +static_assert(std::indirectly_writable); +static_assert(!std::output_iterator); +static_assert(!std::ranges::input_range); + +class OutputIteratorNotIndirectlyWritable { +public: + using difference_type = long; + using iterator_category = std::input_iterator_tag; + + OutputIteratorNotIndirectlyWritable& operator++(); + void operator++(int); + const int& operator*() const; +}; + +using OutputRangeNotIndirectlyWritable = UncheckedRange; + +static_assert(std::input_or_output_iterator); +static_assert(!std::indirectly_writable); +static_assert(!std::output_iterator); +static_assert(!std::ranges::output_range); + #endif // ALMOST_SATISFIES_TYPES_H From 1ba42dd04b8d9310f75d488334526383490e1fa1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 25 May 2022 09:29:52 +0100 Subject: [PATCH 440/908] [VPlan] Use MapVector for LiveOuts for deterministic iteration. During code-gen, we iterate over the LiveOuts and the differences in iteration order can cause slightly different outputs. --- llvm/lib/Transforms/Vectorize/VPlan.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 7f4a5a98d824da..7072deb6a7860e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -30,6 +30,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -2513,7 +2514,7 @@ class VPlan { bool Value2VPValueEnabled = true; /// Values used outside the plan. - DenseMap LiveOuts; + MapVector LiveOuts; public: VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) { @@ -2705,7 +2706,7 @@ class VPlan { LiveOuts.erase(PN); } - const DenseMap &getLiveOuts() const { + const MapVector &getLiveOuts() const { return LiveOuts; } From b81171ce19d1001645e6e04db011a7dc4d42d564 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 25 May 2022 08:31:44 +0000 Subject: [PATCH 441/908] [gn build] Port 29a5a7c6d47a --- llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index b19a237796681d..ca2f4cc1a34a8c 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -75,6 +75,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVLegalizerInfo.cpp", "RISCVMCInstLower.cpp", "RISCVMachineFunctionInfo.cpp", + "RISCVMakeCompressible.cpp", "RISCVMergeBaseOffset.cpp", "RISCVRedundantCopyElimination.cpp", "RISCVRegisterBankInfo.cpp", From 9ffb5944a699b6a0d69c169ceff97636395ee30f Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 25 May 2022 08:31:45 +0000 Subject: [PATCH 442/908] [gn build] Port 7af89a379cce --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 335397c3f974af..4e7861d3a8b2bc 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -128,6 +128,8 @@ if (current_toolchain == default_toolchain) { "__algorithm/ranges_copy_n.h", "__algorithm/ranges_count.h", "__algorithm/ranges_count_if.h", + "__algorithm/ranges_fill.h", + "__algorithm/ranges_fill_n.h", "__algorithm/ranges_find.h", "__algorithm/ranges_find_if.h", "__algorithm/ranges_find_if_not.h", From f75bc5bfc8f898de10e14163998c63eeb2b9f705 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Tue, 24 May 2022 10:01:18 +0200 Subject: [PATCH 443/908] [analyzer] Fix symbol simplification assertion failure Fixes https://github.com/llvm/llvm-project/issues/55546 The assertion mentioned in the issue is triggered because an inconsistency is formed in the Sym->Class and Class->Sym relations. A simpler but similar inconsistency is demonstrated here: https://reviews.llvm.org/D114887 . Previously in `removeMember`, we didn't remove the old symbol's Sym->Class relation. Back then, we explained it with the following two bullet points: > 1) This way constraints for the old symbol can still be found via it's > equivalence class that it used to be the member of. > 2) Performance and resource reasons. We can spare one removal and thus one > additional tree in the forest of `ClassMap`. This patch do remove the old symbol's Sym->Class relation in order to keep the Sym->Class relation consistent with the Class->Sym relations. Point 2) above has negligible performance impact, empirical measurements do not show any noticeable difference in the run-time. Point 1) above seems to be a not well justified statement. This is because we cannot create a new symbol that would be equal to the old symbol after the simplification had happened. The reason for this is that the SValBuilder uses the available constant constraints for each sub-symbol. Differential Revision: https://reviews.llvm.org/D126281 --- .../Core/RangeConstraintManager.cpp | 12 ++++----- .../symbol-simplification-assertion.c | 25 +++++++++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) create mode 100644 clang/test/Analysis/symbol-simplification-assertion.c diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 1c17d48844ea30..e0ceac51d14f34 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -2508,12 +2508,6 @@ EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) { SymbolSet ClsMembers = getClassMembers(State); assert(ClsMembers.contains(Old)); - // We don't remove `Old`'s Sym->Class relation for two reasons: - // 1) This way constraints for the old symbol can still be found via it's - // equivalence class that it used to be the member of. - // 2) Performance and resource reasons. We can spare one removal and thus one - // additional tree in the forest of `ClassMap`. - // Remove `Old`'s Class->Sym relation. SymbolSet::Factory &F = getMembersFactory(State); ClassMembersTy::Factory &EMFactory = State->get_context(); @@ -2527,6 +2521,12 @@ EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) { ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers); State = State->set(ClassMembersMap); + // Remove `Old`'s Sym->Class relation. + ClassMapTy Classes = State->get(); + ClassMapTy::Factory &CMF = State->get_context(); + Classes = CMF.remove(Classes, Old); + State = State->set(Classes); + return State; } diff --git a/clang/test/Analysis/symbol-simplification-assertion.c b/clang/test/Analysis/symbol-simplification-assertion.c new file mode 100644 index 00000000000000..e17f5a619158ab --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-assertion.c @@ -0,0 +1,25 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=true \ +// RUN: -verify + +// Here we test that no assertion is fired during symbol simplification. +// Related issue: https://github.com/llvm/llvm-project/issues/55546 + +extern void abort() __attribute__((__noreturn__)); +#define assert(expr) ((expr) ? (void)(0) : abort()) + +void clang_analyzer_warnIfReached(); + +int a, b, c; +long L, L1; +void test() { + assert(a + L + 1 + b != c); + assert(L == a); + assert(c == 0); + L1 = 0; + assert(a + L1 + 1 + b != c); + assert(a == 0); // no-assertion + clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} +} From 0d7f8d42fd170a434006a928a7106b894bdbdd16 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 25 May 2022 10:05:25 +0100 Subject: [PATCH 444/908] [OpenCL] Remove argument names from async copy builtins This simplifies completeness comparisons against OpenCLBuiltins.td and also makes the header no longer "claim" the argument name identifiers. Continues the direction set out in D119560. --- clang/lib/Headers/opencl-c.h | 662 +++++++++++++++++------------------ 1 file changed, 331 insertions(+), 331 deletions(-) diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 69e2c85610bae2..1942da77f5e50d 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -12515,141 +12515,141 @@ cl_mem_fence_flags __ovld get_fence(void *ptr); * synchronization of source data such as using a * barrier before performing the copy. */ -event_t __ovld async_work_group_copy(__local char *dst, const __global char *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local short *dst, const __global short *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local int *dst, const __global int *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uint *dst, const __global uint *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local long *dst, const __global long *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local float *dst, const __global float *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global char *dst, const __local char *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global short *dst, const __local short *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global int *dst, const __local int *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uint *dst, const __local uint *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global long *dst, const __local long *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global float *dst, const __local float *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event); +event_t __ovld async_work_group_copy(__local char *, const __global char *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uchar *, const __global uchar *, size_t, event_t); +event_t __ovld async_work_group_copy(__local short *, const __global short *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ushort *, const __global ushort *, size_t, event_t); +event_t __ovld async_work_group_copy(__local int *, const __global int *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uint *, const __global uint *, size_t, event_t); +event_t __ovld async_work_group_copy(__local long *, const __global long *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ulong *, const __global ulong *, size_t, event_t); +event_t __ovld async_work_group_copy(__local float *, const __global float *, size_t, event_t); +event_t __ovld async_work_group_copy(__local char2 *, const __global char2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uchar2 *, const __global uchar2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local short2 *, const __global short2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ushort2 *, const __global ushort2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local int2 *, const __global int2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uint2 *, const __global uint2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local long2 *, const __global long2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ulong2 *, const __global ulong2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local float2 *, const __global float2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local char3 *, const __global char3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uchar3 *, const __global uchar3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local short3 *, const __global short3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ushort3 *, const __global ushort3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local int3 *, const __global int3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uint3 *, const __global uint3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local long3 *, const __global long3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ulong3 *, const __global ulong3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local float3 *, const __global float3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local char4 *, const __global char4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uchar4 *, const __global uchar4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local short4 *, const __global short4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ushort4 *, const __global ushort4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local int4 *, const __global int4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uint4 *, const __global uint4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local long4 *, const __global long4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ulong4 *, const __global ulong4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local float4 *, const __global float4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local char8 *, const __global char8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uchar8 *, const __global uchar8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local short8 *, const __global short8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ushort8 *, const __global ushort8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local int8 *, const __global int8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uint8 *, const __global uint8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local long8 *, const __global long8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ulong8 *, const __global ulong8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local float8 *, const __global float8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local char16 *, const __global char16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uchar16 *, const __global uchar16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local short16 *, const __global short16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ushort16 *, const __global ushort16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local int16 *, const __global int16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local uint16 *, const __global uint16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local long16 *, const __global long16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local ulong16 *, const __global ulong16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local float16 *, const __global float16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global char *, const __local char *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uchar *, const __local uchar *, size_t, event_t); +event_t __ovld async_work_group_copy(__global short *, const __local short *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ushort *, const __local ushort *, size_t, event_t); +event_t __ovld async_work_group_copy(__global int *, const __local int *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uint *, const __local uint *, size_t, event_t); +event_t __ovld async_work_group_copy(__global long *, const __local long *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ulong *, const __local ulong *, size_t, event_t); +event_t __ovld async_work_group_copy(__global float *, const __local float *, size_t, event_t); +event_t __ovld async_work_group_copy(__global char2 *, const __local char2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uchar2 *, const __local uchar2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global short2 *, const __local short2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ushort2 *, const __local ushort2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global int2 *, const __local int2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uint2 *, const __local uint2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global long2 *, const __local long2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ulong2 *, const __local ulong2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global float2 *, const __local float2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global char3 *, const __local char3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uchar3 *, const __local uchar3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global short3 *, const __local short3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ushort3 *, const __local ushort3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global int3 *, const __local int3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uint3 *, const __local uint3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global long3 *, const __local long3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ulong3 *, const __local ulong3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global float3 *, const __local float3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global char4 *, const __local char4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uchar4 *, const __local uchar4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global short4 *, const __local short4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ushort4 *, const __local ushort4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global int4 *, const __local int4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uint4 *, const __local uint4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global long4 *, const __local long4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ulong4 *, const __local ulong4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global float4 *, const __local float4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global char8 *, const __local char8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uchar8 *, const __local uchar8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global short8 *, const __local short8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ushort8 *, const __local ushort8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global int8 *, const __local int8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uint8 *, const __local uint8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global long8 *, const __local long8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ulong8 *, const __local ulong8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global float8 *, const __local float8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global char16 *, const __local char16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uchar16 *, const __local uchar16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global short16 *, const __local short16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ushort16 *, const __local ushort16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global int16 *, const __local int16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global uint16 *, const __local uint16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global long16 *, const __local long16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global ulong16 *, const __local ulong16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global float16 *, const __local float16 *, size_t, event_t); #ifdef cl_khr_fp64 -event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global double *dst, const __local double *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event); +event_t __ovld async_work_group_copy(__local double *, const __global double *, size_t, event_t); +event_t __ovld async_work_group_copy(__local double2 *, const __global double2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local double3 *, const __global double3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local double4 *, const __global double4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local double8 *, const __global double8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local double16 *, const __global double16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global double *, const __local double *, size_t, event_t); +event_t __ovld async_work_group_copy(__global double2 *, const __local double2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global double3 *, const __local double3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global double4 *, const __local double4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global double8 *, const __local double8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global double16 *, const __local double16 *, size_t, event_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 -event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global half *dst, const __local half *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, event_t event); -event_t __ovld async_work_group_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, event_t event); +event_t __ovld async_work_group_copy(__local half *, const __global half *, size_t, event_t); +event_t __ovld async_work_group_copy(__local half2 *, const __global half2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local half3 *, const __global half3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local half4 *, const __global half4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local half8 *, const __global half8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__local half16 *, const __global half16 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global half *, const __local half *, size_t, event_t); +event_t __ovld async_work_group_copy(__global half2 *, const __local half2 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global half3 *, const __local half3 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global half4 *, const __local half4 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global half8 *, const __local half8 *, size_t, event_t); +event_t __ovld async_work_group_copy(__global half16 *, const __local half16 *, size_t, event_t); #endif //cl_khr_fp16 /** @@ -12678,141 +12678,141 @@ event_t __ovld async_work_group_copy(__global half16 *dst, const __local half16 * synchronization of source data such as using a * barrier before performing the copy. */ -event_t __ovld async_work_group_strided_copy(__local char *dst, const __global char *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local short *dst, const __global short *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local int *dst, const __global int *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uint *dst, const __global uint *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local long *dst, const __global long *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local float *dst, const __global float *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global char *dst, const __local char *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global short *dst, const __local short *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global int *dst, const __local int *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uint *dst, const __local uint *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global long *dst, const __local long *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global float *dst, const __local float *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event); +event_t __ovld async_work_group_strided_copy(__local char *, const __global char *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uchar *, const __global uchar *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local short *, const __global short *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ushort *, const __global ushort *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local int *, const __global int *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uint *, const __global uint *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local long *, const __global long *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ulong *, const __global ulong *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local float *, const __global float *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local char2 *, const __global char2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uchar2 *, const __global uchar2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local short2 *, const __global short2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ushort2 *, const __global ushort2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local int2 *, const __global int2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uint2 *, const __global uint2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local long2 *, const __global long2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ulong2 *, const __global ulong2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local float2 *, const __global float2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local char3 *, const __global char3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uchar3 *, const __global uchar3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local short3 *, const __global short3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ushort3 *, const __global ushort3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local int3 *, const __global int3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uint3 *, const __global uint3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local long3 *, const __global long3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ulong3 *, const __global ulong3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local float3 *, const __global float3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local char4 *, const __global char4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uchar4 *, const __global uchar4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local short4 *, const __global short4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ushort4 *, const __global ushort4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local int4 *, const __global int4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uint4 *, const __global uint4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local long4 *, const __global long4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ulong4 *, const __global ulong4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local float4 *, const __global float4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local char8 *, const __global char8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uchar8 *, const __global uchar8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local short8 *, const __global short8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ushort8 *, const __global ushort8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local int8 *, const __global int8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uint8 *, const __global uint8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local long8 *, const __global long8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ulong8 *, const __global ulong8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local float8 *, const __global float8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local char16 *, const __global char16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uchar16 *, const __global uchar16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local short16 *, const __global short16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ushort16 *, const __global ushort16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local int16 *, const __global int16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local uint16 *, const __global uint16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local long16 *, const __global long16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local ulong16 *, const __global ulong16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local float16 *, const __global float16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global char *, const __local char *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uchar *, const __local uchar *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global short *, const __local short *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ushort *, const __local ushort *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global int *, const __local int *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uint *, const __local uint *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global long *, const __local long *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ulong *, const __local ulong *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global float *, const __local float *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global char2 *, const __local char2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uchar2 *, const __local uchar2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global short2 *, const __local short2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ushort2 *, const __local ushort2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global int2 *, const __local int2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uint2 *, const __local uint2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global long2 *, const __local long2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ulong2 *, const __local ulong2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global float2 *, const __local float2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global char3 *, const __local char3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uchar3 *, const __local uchar3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global short3 *, const __local short3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ushort3 *, const __local ushort3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global int3 *, const __local int3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uint3 *, const __local uint3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global long3 *, const __local long3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ulong3 *, const __local ulong3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global float3 *, const __local float3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global char4 *, const __local char4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uchar4 *, const __local uchar4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global short4 *, const __local short4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ushort4 *, const __local ushort4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global int4 *, const __local int4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uint4 *, const __local uint4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global long4 *, const __local long4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ulong4 *, const __local ulong4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global float4 *, const __local float4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global char8 *, const __local char8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uchar8 *, const __local uchar8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global short8 *, const __local short8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ushort8 *, const __local ushort8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global int8 *, const __local int8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uint8 *, const __local uint8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global long8 *, const __local long8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ulong8 *, const __local ulong8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global float8 *, const __local float8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global char16 *, const __local char16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uchar16 *, const __local uchar16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global short16 *, const __local short16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ushort16 *, const __local ushort16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global int16 *, const __local int16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global uint16 *, const __local uint16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global long16 *, const __local long16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global ulong16 *, const __local ulong16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global float16 *, const __local float16 *, size_t, size_t, event_t); #ifdef cl_khr_fp64 -event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global double *dst, const __local double *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event); +event_t __ovld async_work_group_strided_copy(__local double *, const __global double *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local double2 *, const __global double2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local double3 *, const __global double3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local double4 *, const __global double4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local double8 *, const __global double8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local double16 *, const __global double16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global double *, const __local double *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global double2 *, const __local double2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global double3 *, const __local double3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global double4 *, const __local double4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global double8 *, const __local double8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global double16 *, const __local double16 *, size_t, size_t, event_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 -event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, size_t src_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global half *dst, const __local half *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, size_t dst_stride, event_t event); -event_t __ovld async_work_group_strided_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, size_t dst_stride, event_t event); +event_t __ovld async_work_group_strided_copy(__local half *, const __global half *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local half2 *, const __global half2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local half3 *, const __global half3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local half4 *, const __global half4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local half8 *, const __global half8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__local half16 *, const __global half16 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global half *, const __local half *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global half2 *, const __local half2 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global half3 *, const __local half3 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global half4 *, const __local half4 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global half8 *, const __local half8 *, size_t, size_t, event_t); +event_t __ovld async_work_group_strided_copy(__global half16 *, const __local half16 *, size_t, size_t, event_t); #endif //cl_khr_fp16 /** @@ -12826,7 +12826,7 @@ event_t __ovld async_work_group_strided_copy(__global half16 *dst, const __local * the same num_events and event objects specified * in event_list; otherwise the results are undefined. */ -void __ovld wait_group_events(int num_events, event_t *event_list); +void __ovld wait_group_events(int, event_t *); /** * Prefetch num_elements * sizeof(gentype) @@ -12835,75 +12835,75 @@ void __ovld wait_group_events(int num_events, event_t *event_list); * and does not affect the functional * behavior of the kernel. */ -void __ovld prefetch(const __global char *p, size_t num_elements); -void __ovld prefetch(const __global uchar *p, size_t num_elements); -void __ovld prefetch(const __global short *p, size_t num_elements); -void __ovld prefetch(const __global ushort *p, size_t num_elements); -void __ovld prefetch(const __global int *p, size_t num_elements); -void __ovld prefetch(const __global uint *p, size_t num_elements); -void __ovld prefetch(const __global long *p, size_t num_elements); -void __ovld prefetch(const __global ulong *p, size_t num_elements); -void __ovld prefetch(const __global float *p, size_t num_elements); -void __ovld prefetch(const __global char2 *p, size_t num_elements); -void __ovld prefetch(const __global uchar2 *p, size_t num_elements); -void __ovld prefetch(const __global short2 *p, size_t num_elements); -void __ovld prefetch(const __global ushort2 *p, size_t num_elements); -void __ovld prefetch(const __global int2 *p, size_t num_elements); -void __ovld prefetch(const __global uint2 *p, size_t num_elements); -void __ovld prefetch(const __global long2 *p, size_t num_elements); -void __ovld prefetch(const __global ulong2 *p, size_t num_elements); -void __ovld prefetch(const __global float2 *p, size_t num_elements); -void __ovld prefetch(const __global char3 *p, size_t num_elements); -void __ovld prefetch(const __global uchar3 *p, size_t num_elements); -void __ovld prefetch(const __global short3 *p, size_t num_elements); -void __ovld prefetch(const __global ushort3 *p, size_t num_elements); -void __ovld prefetch(const __global int3 *p, size_t num_elements); -void __ovld prefetch(const __global uint3 *p, size_t num_elements); -void __ovld prefetch(const __global long3 *p, size_t num_elements); -void __ovld prefetch(const __global ulong3 *p, size_t num_elements); -void __ovld prefetch(const __global float3 *p, size_t num_elements); -void __ovld prefetch(const __global char4 *p, size_t num_elements); -void __ovld prefetch(const __global uchar4 *p, size_t num_elements); -void __ovld prefetch(const __global short4 *p, size_t num_elements); -void __ovld prefetch(const __global ushort4 *p, size_t num_elements); -void __ovld prefetch(const __global int4 *p, size_t num_elements); -void __ovld prefetch(const __global uint4 *p, size_t num_elements); -void __ovld prefetch(const __global long4 *p, size_t num_elements); -void __ovld prefetch(const __global ulong4 *p, size_t num_elements); -void __ovld prefetch(const __global float4 *p, size_t num_elements); -void __ovld prefetch(const __global char8 *p, size_t num_elements); -void __ovld prefetch(const __global uchar8 *p, size_t num_elements); -void __ovld prefetch(const __global short8 *p, size_t num_elements); -void __ovld prefetch(const __global ushort8 *p, size_t num_elements); -void __ovld prefetch(const __global int8 *p, size_t num_elements); -void __ovld prefetch(const __global uint8 *p, size_t num_elements); -void __ovld prefetch(const __global long8 *p, size_t num_elements); -void __ovld prefetch(const __global ulong8 *p, size_t num_elements); -void __ovld prefetch(const __global float8 *p, size_t num_elements); -void __ovld prefetch(const __global char16 *p, size_t num_elements); -void __ovld prefetch(const __global uchar16 *p, size_t num_elements); -void __ovld prefetch(const __global short16 *p, size_t num_elements); -void __ovld prefetch(const __global ushort16 *p, size_t num_elements); -void __ovld prefetch(const __global int16 *p, size_t num_elements); -void __ovld prefetch(const __global uint16 *p, size_t num_elements); -void __ovld prefetch(const __global long16 *p, size_t num_elements); -void __ovld prefetch(const __global ulong16 *p, size_t num_elements); -void __ovld prefetch(const __global float16 *p, size_t num_elements); +void __ovld prefetch(const __global char *, size_t); +void __ovld prefetch(const __global uchar *, size_t); +void __ovld prefetch(const __global short *, size_t); +void __ovld prefetch(const __global ushort *, size_t); +void __ovld prefetch(const __global int *, size_t); +void __ovld prefetch(const __global uint *, size_t); +void __ovld prefetch(const __global long *, size_t); +void __ovld prefetch(const __global ulong *, size_t); +void __ovld prefetch(const __global float *, size_t); +void __ovld prefetch(const __global char2 *, size_t); +void __ovld prefetch(const __global uchar2 *, size_t); +void __ovld prefetch(const __global short2 *, size_t); +void __ovld prefetch(const __global ushort2 *, size_t); +void __ovld prefetch(const __global int2 *, size_t); +void __ovld prefetch(const __global uint2 *, size_t); +void __ovld prefetch(const __global long2 *, size_t); +void __ovld prefetch(const __global ulong2 *, size_t); +void __ovld prefetch(const __global float2 *, size_t); +void __ovld prefetch(const __global char3 *, size_t); +void __ovld prefetch(const __global uchar3 *, size_t); +void __ovld prefetch(const __global short3 *, size_t); +void __ovld prefetch(const __global ushort3 *, size_t); +void __ovld prefetch(const __global int3 *, size_t); +void __ovld prefetch(const __global uint3 *, size_t); +void __ovld prefetch(const __global long3 *, size_t); +void __ovld prefetch(const __global ulong3 *, size_t); +void __ovld prefetch(const __global float3 *, size_t); +void __ovld prefetch(const __global char4 *, size_t); +void __ovld prefetch(const __global uchar4 *, size_t); +void __ovld prefetch(const __global short4 *, size_t); +void __ovld prefetch(const __global ushort4 *, size_t); +void __ovld prefetch(const __global int4 *, size_t); +void __ovld prefetch(const __global uint4 *, size_t); +void __ovld prefetch(const __global long4 *, size_t); +void __ovld prefetch(const __global ulong4 *, size_t); +void __ovld prefetch(const __global float4 *, size_t); +void __ovld prefetch(const __global char8 *, size_t); +void __ovld prefetch(const __global uchar8 *, size_t); +void __ovld prefetch(const __global short8 *, size_t); +void __ovld prefetch(const __global ushort8 *, size_t); +void __ovld prefetch(const __global int8 *, size_t); +void __ovld prefetch(const __global uint8 *, size_t); +void __ovld prefetch(const __global long8 *, size_t); +void __ovld prefetch(const __global ulong8 *, size_t); +void __ovld prefetch(const __global float8 *, size_t); +void __ovld prefetch(const __global char16 *, size_t); +void __ovld prefetch(const __global uchar16 *, size_t); +void __ovld prefetch(const __global short16 *, size_t); +void __ovld prefetch(const __global ushort16 *, size_t); +void __ovld prefetch(const __global int16 *, size_t); +void __ovld prefetch(const __global uint16 *, size_t); +void __ovld prefetch(const __global long16 *, size_t); +void __ovld prefetch(const __global ulong16 *, size_t); +void __ovld prefetch(const __global float16 *, size_t); #ifdef cl_khr_fp64 -void __ovld prefetch(const __global double *p, size_t num_elements); -void __ovld prefetch(const __global double2 *p, size_t num_elements); -void __ovld prefetch(const __global double3 *p, size_t num_elements); -void __ovld prefetch(const __global double4 *p, size_t num_elements); -void __ovld prefetch(const __global double8 *p, size_t num_elements); -void __ovld prefetch(const __global double16 *p, size_t num_elements); +void __ovld prefetch(const __global double *, size_t); +void __ovld prefetch(const __global double2 *, size_t); +void __ovld prefetch(const __global double3 *, size_t); +void __ovld prefetch(const __global double4 *, size_t); +void __ovld prefetch(const __global double8 *, size_t); +void __ovld prefetch(const __global double16 *, size_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 -void __ovld prefetch(const __global half *p, size_t num_elements); -void __ovld prefetch(const __global half2 *p, size_t num_elements); -void __ovld prefetch(const __global half3 *p, size_t num_elements); -void __ovld prefetch(const __global half4 *p, size_t num_elements); -void __ovld prefetch(const __global half8 *p, size_t num_elements); -void __ovld prefetch(const __global half16 *p, size_t num_elements); +void __ovld prefetch(const __global half *, size_t); +void __ovld prefetch(const __global half2 *, size_t); +void __ovld prefetch(const __global half3 *, size_t); +void __ovld prefetch(const __global half4 *, size_t); +void __ovld prefetch(const __global half8 *, size_t); +void __ovld prefetch(const __global half16 *, size_t); #endif // cl_khr_fp16 // OpenCL v1.1 s6.11.1, v1.2 s6.12.11 - Atomic Functions From 089036444eb56a5c559ab1bad96da4ea592b35a2 Mon Sep 17 00:00:00 2001 From: Nigel Perks Date: Wed, 25 May 2022 10:09:59 +0100 Subject: [PATCH 445/908] [XCORE][CodeGen][NFC] Revert: Use ArrayRef in TargetLowering functions Revert 6365bde6585651b7813010dc63df7984a5b8ad41 Restore more readable version in line with other targets, which did not apply the change. Differential Revision: https://reviews.llvm.org/D123661 --- llvm/lib/Target/XCore/XCoreISelLowering.cpp | 50 +++++++++++++-------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 0ed5a0eae22895..81e7dd9ccec091 100644 --- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -88,17 +88,24 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // XCore does not have the NodeTypes below. - setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); // 64bit - setOperationAction({ISD::ADD, ISD::SUB}, MVT::i64, Custom); - setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom); - setOperationAction( - {ISD::MULHS, ISD::MULHU, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}, - MVT::i32, Expand); + setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::SUB, MVT::i64, Custom); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); // Bit Manipulation - setOperationAction({ISD::CTPOP, ISD::ROTL, ISD::ROTR}, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::ROTL , MVT::i32, Expand); + setOperationAction(ISD::ROTR , MVT::i32, Expand); setOperationAction(ISD::BITREVERSE , MVT::i32, Legal); setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -106,29 +113,35 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, // Jump tables. setOperationAction(ISD::BR_JT, MVT::Other, Custom); - setOperationAction({ISD::GlobalAddress, ISD::BlockAddress}, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32 , Custom); // Conversion of i64 -> double produces constantpool nodes - setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); // Loads for (MVT VT : MVT::integer_valuetypes()) { - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, MVT::i1, - Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Expand); } // Custom expand misaligned loads / stores. - setOperationAction({ISD::LOAD, ISD::STORE}, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); // Varargs - setOperationAction({ISD::VAEND, ISD::VACOPY}, MVT::Other, Expand); - setOperationAction({ISD::VAARG, ISD::VASTART}, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VASTART, MVT::Other, Custom); // Dynamic stack - setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // Exception handling @@ -139,11 +152,12 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, // We request a fence for ATOMIC_* instructions, to reduce them to Monotonic. // As we are always Sequential Consistent, an ATOMIC_FENCE becomes a no OP. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - setOperationAction({ISD::ATOMIC_LOAD, ISD::ATOMIC_STORE}, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); // TRAMPOLINE is custom lowered. - setOperationAction({ISD::INIT_TRAMPOLINE, ISD::ADJUST_TRAMPOLINE}, MVT::Other, - Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); From 8919447c71ab5940965fc2211c96f1f8b51d5da3 Mon Sep 17 00:00:00 2001 From: Javed Absar Date: Mon, 23 May 2022 14:48:57 +0100 Subject: [PATCH 446/908] [mlir] Fix warning `missing base in copy ctor` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This suppresse annoying warning when building mlir. ``` warning: base class ‘class mlir::PassWrapper<{anonymous}::TestStatisticPass, mlir::OperationPass >’ should be explicitly initialized in the copy constructor [-Wextra] ``` Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D126209 --- mlir/test/lib/Pass/TestPassManager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/test/lib/Pass/TestPassManager.cpp b/mlir/test/lib/Pass/TestPassManager.cpp index c19221d923e3f8..a61cfacb7cb090 100644 --- a/mlir/test/lib/Pass/TestPassManager.cpp +++ b/mlir/test/lib/Pass/TestPassManager.cpp @@ -62,7 +62,7 @@ struct TestOptionsPass llvm::cl::desc("Example string option")}; }; TestOptionsPass() = default; - TestOptionsPass(const TestOptionsPass &) {} + TestOptionsPass(const TestOptionsPass &) : PassWrapper() {} TestOptionsPass(const Options &options) { listOption = options.listOption; stringOption = options.stringOption; @@ -168,7 +168,7 @@ struct TestStatisticPass MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestStatisticPass) TestStatisticPass() = default; - TestStatisticPass(const TestStatisticPass &) {} + TestStatisticPass(const TestStatisticPass &) : PassWrapper() {} StringRef getArgument() const final { return "test-stats-pass"; } StringRef getDescription() const final { return "Test pass statistics"; } From cd2292ef824591cc34cc299910a3098545c840c7 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 24 May 2022 20:21:45 +0200 Subject: [PATCH 447/908] [pseudo] A basic implementation of compiling cxx grammar at build time. The main idea is to compile the cxx grammar at build time, and construct the core pieces (Grammar, LRTable) of the pseudoparse based on the compiled data sources. This is a tiny implementation, which is good for start: - defines how the public API should look like; - integrates the cxx grammar compilation workflow with the cmake system. - onlynonterminal symbols of the C++ grammar are compiled, anything else are still doing the real compilation work at runtime, we can opt-in more bits in the future; - splits the monolithic clangPsuedo library for better layering; Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D125667 --- clang-tools-extra/pseudo/CMakeLists.txt | 2 + clang-tools-extra/pseudo/gen/CMakeLists.txt | 10 +++ clang-tools-extra/pseudo/gen/Main.cpp | 89 +++++++++++++++++++ .../pseudo/include/CMakeLists.txt | 29 ++++++ .../pseudo/include/clang-pseudo/cxx/CXX.h | 51 +++++++++++ clang-tools-extra/pseudo/lib/CMakeLists.txt | 9 +- .../pseudo/lib/cxx/CMakeLists.txt | 9 ++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp | 34 +++++++ .../pseudo/lib/grammar/CMakeLists.txt | 18 ++++ .../pseudo/lib/{ => grammar}/Grammar.cpp | 0 .../pseudo/lib/{ => grammar}/GrammarBNF.cpp | 0 .../pseudo/lib/{ => grammar}/LRGraph.cpp | 0 .../pseudo/lib/{ => grammar}/LRTable.cpp | 0 .../pseudo/lib/{ => grammar}/LRTableBuild.cpp | 0 14 files changed, 246 insertions(+), 5 deletions(-) create mode 100644 clang-tools-extra/pseudo/gen/CMakeLists.txt create mode 100644 clang-tools-extra/pseudo/gen/Main.cpp create mode 100644 clang-tools-extra/pseudo/include/CMakeLists.txt create mode 100644 clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h create mode 100644 clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt create mode 100644 clang-tools-extra/pseudo/lib/cxx/CXX.cpp create mode 100644 clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt rename clang-tools-extra/pseudo/lib/{ => grammar}/Grammar.cpp (100%) rename clang-tools-extra/pseudo/lib/{ => grammar}/GrammarBNF.cpp (100%) rename clang-tools-extra/pseudo/lib/{ => grammar}/LRGraph.cpp (100%) rename clang-tools-extra/pseudo/lib/{ => grammar}/LRTable.cpp (100%) rename clang-tools-extra/pseudo/lib/{ => grammar}/LRTableBuild.cpp (100%) diff --git a/clang-tools-extra/pseudo/CMakeLists.txt b/clang-tools-extra/pseudo/CMakeLists.txt index 0891cc0a0f8857..24bc1530bb7d6f 100644 --- a/clang-tools-extra/pseudo/CMakeLists.txt +++ b/clang-tools-extra/pseudo/CMakeLists.txt @@ -1,5 +1,7 @@ include_directories(include) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) +add_subdirectory(include) +add_subdirectory(gen) add_subdirectory(lib) add_subdirectory(tool) add_subdirectory(fuzzer) diff --git a/clang-tools-extra/pseudo/gen/CMakeLists.txt b/clang-tools-extra/pseudo/gen/CMakeLists.txt new file mode 100644 index 00000000000000..a104e05b53da99 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_executable(pseudo-gen + Main.cpp + ) + +target_link_libraries(pseudo-gen + PRIVATE + clangPseudoGrammar + ) diff --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp new file mode 100644 index 00000000000000..535f863268df13 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/Main.cpp @@ -0,0 +1,89 @@ +//===--- Main.cpp - Compile BNF grammar -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a tool to compile a BNF grammar, it is used by the build system to +// generate a necessary data bits to statically construct core pieces (Grammar, +// LRTable etc) of the LR parser. +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/Grammar.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" +#include + +using llvm::cl::desc; +using llvm::cl::init; +using llvm::cl::opt; +using llvm::cl::values; + +namespace { +enum EmitType { + EmitSymbolList, + EmitGrammarContent, +}; + +opt Grammar("grammar", desc("Parse a BNF grammar file."), + init("")); +opt + Emit(desc("which information to emit:"), + values(clEnumValN(EmitSymbolList, "emit-symbol-list", + "Print nonterminal symbols (default)"), + clEnumValN(EmitGrammarContent, "emit-grammar-content", + "Print the BNF grammar content as a string"))); +std::string readOrDie(llvm::StringRef Path) { + llvm::ErrorOr> Text = + llvm::MemoryBuffer::getFile(Path); + if (std::error_code EC = Text.getError()) { + llvm::errs() << "Error: can't read grammar file '" << Path + << "': " << EC.message() << "\n"; + ::exit(1); + } + return Text.get()->getBuffer().str(); +} +} // namespace + +int main(int argc, char *argv[]) { + llvm::cl::ParseCommandLineOptions(argc, argv, ""); + if (!Grammar.getNumOccurrences()) { + llvm::errs() << "Grammar file must be provided!\n"; + return 1; + } + + std::string GrammarText = readOrDie(Grammar); + std::vector Diags; + auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags); + + if (!Diags.empty()) { + llvm::errs() << llvm::join(Diags, "\n"); + return 1; + } + switch (Emit) { + + case EmitSymbolList: + for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size(); + ++ID) { + std::string Name = G->symbolName(ID).str(); + // translation-unit -> translation_unit + std::replace(Name.begin(), Name.end(), '-', '_'); + llvm::outs() << (llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID)); + } + break; + case EmitGrammarContent: + for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) { + llvm::outs() << '"'; + llvm::outs().write_escaped((Line + "\n").str()); + llvm::outs() << "\"\n"; + } + break; + } + + return 0; +} diff --git a/clang-tools-extra/pseudo/include/CMakeLists.txt b/clang-tools-extra/pseudo/include/CMakeLists.txt new file mode 100644 index 00000000000000..e2a6f0efc0a32c --- /dev/null +++ b/clang-tools-extra/pseudo/include/CMakeLists.txt @@ -0,0 +1,29 @@ +# The cxx.bnf grammar file +set(cxx_bnf ${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxx.bnf) + +# Generate inc files. +set(cxx_symbols_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXSymbols.inc) +add_custom_command(OUTPUT ${cxx_symbols_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-symbol-list + > ${cxx_symbols_inc} + COMMENT "Generating nonterminal symbol file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +set(cxx_bnf_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXBNF.inc) +add_custom_command(OUTPUT ${cxx_bnf_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-grammar-content + > ${cxx_bnf_inc} + COMMENT "Generating bnf string file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +# add_custom_command does not create a new target, we need to deine a target +# explicitly, so that other targets can depend on it. +add_custom_target(cxx_gen + DEPENDS ${cxx_symbols_inc} ${cxx_bnf_inc} + VERBATIM) diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h new file mode 100644 index 00000000000000..edeeb636d83ed8 --- /dev/null +++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h @@ -0,0 +1,51 @@ +//===--- CXX.h - Public interfaces for the C++ grammar -----------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines public interfaces for the C++ grammar +// (pseudo/lib/cxx.bnf). It provides a fast way to access core building pieces +// of the LR parser, e.g. Grammar, LRTable, rather than parsing the grammar +// file at the runtime. +// +// We do a compilation of the C++ BNF grammar at build time, and generate +// critical data sources. The implementation of the interfaces are based on the +// generated data sources. +// +// FIXME: not everything is fully compiled yet. The implementation of the +// interfaces are still parsing the grammar file at the runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_PSEUDO_CXX_CXX_H +#define CLANG_PSEUDO_CXX_CXX_H + +#include "clang-pseudo/Grammar.h" + +namespace clang { +namespace pseudo { +class LRTable; + +namespace cxx { +// Symbol represents nonterminal symbols in the C++ grammar. +// It provides a simple uniform way to access a particular nonterminal. +enum class Symbol : SymbolID { +#define NONTERMINAL(X, Y) X = Y, +#include "CXXSymbols.inc" +#undef NONTERMINAL +}; + +// Returns the C++ grammar. +const Grammar &getGrammar(); +// Returns the corresponding LRTable for the C++ grammar. +const LRTable &getLRTable(); + +} // namespace cxx + +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_CXX_CXX_H diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt index 6dc8ed5b5e7a26..f312b10f7d8b47 100644 --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -1,3 +1,6 @@ +add_subdirectory(cxx) +add_subdirectory(grammar) + set(LLVM_LINK_COMPONENTS Support) add_clang_library(clangPseudo @@ -5,15 +8,11 @@ add_clang_library(clangPseudo DirectiveTree.cpp Forest.cpp GLR.cpp - Grammar.cpp - GrammarBNF.cpp Lex.cpp - LRGraph.cpp - LRTable.cpp - LRTableBuild.cpp Token.cpp LINK_LIBS clangBasic clangLex + clangPseudoGrammar ) diff --git a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt new file mode 100644 index 00000000000000..9e10f2ba5388ed --- /dev/null +++ b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt @@ -0,0 +1,9 @@ +add_clang_library(clangPseudoCXX + CXX.cpp + + DEPENDS + cxx_gen + + LINK_LIBS + clangPseudoGrammar + ) diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp new file mode 100644 index 00000000000000..3d594b722f1ca1 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -0,0 +1,34 @@ +//===--- CXX.cpp - Define public interfaces for C++ grammar ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/cxx/CXX.h" +#include "clang-pseudo/LRTable.h" + +namespace clang { +namespace pseudo { +namespace cxx { + +static const char *CXXBNF = +#include "CXXBNF.inc" + ; + +const Grammar &getGrammar() { + static std::vector Diags; + static Grammar *G = Grammar::parseBNF(CXXBNF, Diags).release(); + assert(Diags.empty()); + return *G; +} + +const LRTable &getLRTable() { + static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar())); + return *Table; +} + +} // namespace cxx +} // namespace pseudo +} // namespace clang diff --git a/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt b/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt new file mode 100644 index 00000000000000..d50cb7df2a2cc5 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_LINK_COMPONENTS Support) + +# This library intents to keep as minimal dependencies as possible, it is a base +# library of the cxx generator, to avoid creating long dep paths in the build +# graph. +add_clang_library(clangPseudoGrammar + Grammar.cpp + GrammarBNF.cpp + LRGraph.cpp + LRTable.cpp + LRTableBuild.cpp + + # FIXME: can we get rid of the clangBasic dependency? We need it for the + # clang::tok::getTokenName and clang::tok::getPunctuatorSpelling functions, we + # could consider remimplement these functions. + LINK_LIBS + clangBasic + ) diff --git a/clang-tools-extra/pseudo/lib/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/Grammar.cpp rename to clang-tools-extra/pseudo/lib/grammar/Grammar.cpp diff --git a/clang-tools-extra/pseudo/lib/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/GrammarBNF.cpp rename to clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp diff --git a/clang-tools-extra/pseudo/lib/LRGraph.cpp b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/LRGraph.cpp rename to clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp diff --git a/clang-tools-extra/pseudo/lib/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/LRTable.cpp rename to clang-tools-extra/pseudo/lib/grammar/LRTable.cpp diff --git a/clang-tools-extra/pseudo/lib/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/LRTableBuild.cpp rename to clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp From c6e45ea0743b763e8e997f8a9a9236986bea6a5f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 25 May 2022 11:05:23 +0100 Subject: [PATCH 448/908] [VPlan] Exit earlier when trying to widen with scalar VFs. This simplifies the code a bit, suggested in D124718. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D125029 --- .../Transforms/Vectorize/LoopVectorize.cpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cc9e7425ee55d1..22993f2ec76821 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8137,8 +8137,6 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, "Must be called with either a load or store"); auto willWiden = [&](ElementCount VF) -> bool { - if (VF.isScalar()) - return false; LoopVectorizationCostModel::InstWidening Decision = CM.getWideningDecision(I, VF); assert(Decision != LoopVectorizationCostModel::CM_Unknown && @@ -8335,8 +8333,6 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, return nullptr; auto willWiden = [&](ElementCount VF) -> bool { - if (VF.isScalar()) - return false; Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); // The following case may be scalarized depending on the VF. // The flag shows whether we use Intrinsic or a usual Call for vectorized @@ -8550,14 +8546,8 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, ArrayRef Operands, VFRange &Range, VPlanPtr &Plan) { - // First, check for specific widening recipes that deal with calls, memory - // operations, inductions and Phi nodes. - if (auto *CI = dyn_cast(Instr)) - return toVPRecipeResult(tryToWidenCall(CI, Operands, Range)); - - if (isa(Instr) || isa(Instr)) - return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan)); - + // First, check for specific widening recipes that deal with inductions, Phi + // nodes, calls and memory operations. VPRecipeBase *Recipe; if (auto Phi = dyn_cast(Instr)) { if (Phi->getParent() != OrigLoop->getHeader()) @@ -8595,6 +8585,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, Range, *Plan))) return toVPRecipeResult(Recipe); + // All widen recipes below deal only with VF > 1. + if (LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { return VF.isScalar(); }, Range)) + return nullptr; + + if (auto *CI = dyn_cast(Instr)) + return toVPRecipeResult(tryToWidenCall(CI, Operands, Range)); + + if (isa(Instr) || isa(Instr)) + return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan)); + if (!shouldWiden(Instr, Range)) return nullptr; From f1df6515e3fe54f688739927c57cb6adec01c307 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 25 May 2022 12:38:23 +0200 Subject: [PATCH 449/908] [pseudo] Add missing dependency, fix shared library build. --- clang-tools-extra/pseudo/benchmarks/CMakeLists.txt | 1 + clang-tools-extra/pseudo/fuzzer/CMakeLists.txt | 1 + clang-tools-extra/pseudo/tool/CMakeLists.txt | 1 + clang-tools-extra/pseudo/unittests/CMakeLists.txt | 1 + 4 files changed, 4 insertions(+) diff --git a/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt b/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt index 20804061ffdb5e..b088f8f7c68b83 100644 --- a/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt +++ b/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt @@ -3,5 +3,6 @@ add_benchmark(ClangPseudoBenchmark Benchmark.cpp) target_link_libraries(ClangPseudoBenchmark PRIVATE clangPseudo + clangPseudoGrammar LLVMSupport ) diff --git a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt index 556c2f438d388a..e5061cee3bf9db 100644 --- a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt +++ b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt @@ -11,4 +11,5 @@ add_llvm_fuzzer(clang-pseudo-fuzzer target_link_libraries(clang-pseudo-fuzzer PRIVATE clangPseudo + clangPseudoGrammar ) diff --git a/clang-tools-extra/pseudo/tool/CMakeLists.txt b/clang-tools-extra/pseudo/tool/CMakeLists.txt index bf73f89d4e1dd3..8b47d54e766c20 100644 --- a/clang-tools-extra/pseudo/tool/CMakeLists.txt +++ b/clang-tools-extra/pseudo/tool/CMakeLists.txt @@ -12,5 +12,6 @@ clang_target_link_libraries(clang-pseudo target_link_libraries(clang-pseudo PRIVATE clangPseudo + clangPseudoGrammar ) diff --git a/clang-tools-extra/pseudo/unittests/CMakeLists.txt b/clang-tools-extra/pseudo/unittests/CMakeLists.txt index 73b13984d93e65..2185e4e4c146b8 100644 --- a/clang-tools-extra/pseudo/unittests/CMakeLists.txt +++ b/clang-tools-extra/pseudo/unittests/CMakeLists.txt @@ -23,4 +23,5 @@ clang_target_link_libraries(ClangPseudoTests target_link_libraries(ClangPseudoTests PRIVATE clangPseudo + clangPseudoGrammar ) From 87936c7b131ee141a1309d5535f149ac48ff694e Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 24 May 2022 13:24:20 +0100 Subject: [PATCH 450/908] [LoopVectorize] Fix assertion failure in fixReduction when tail-folding When compiling the attached new test in scalable-reductions-tf.ll we were hitting this assertion in fixReduction: Assertion `isa(U) && "Reduction exit must feed Phi's or select" The loop contains a reduction and an intermediate store of the reduction value. When vectorising with tail-folding the contains of 'U' in the assertion above happened to be a scatter_store. It turns out that we were still creating a widen recipe for the invariant store, despite knowing that we can actually sink it. The simplest fix is to change buildVPlanWithVPRecipes so that we look for invariant stores before attempting to widen it. Differential Revision: https://reviews.llvm.org/D126295 --- .../Transforms/Vectorize/LoopVectorize.cpp | 15 +++---- .../AArch64/scalable-reductions-tf.ll | 40 +++++++++++++++++++ .../AArch64/scalable-reductions.ll | 2 - 3 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 22993f2ec76821..beddc3ee251738 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8829,6 +8829,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( auto OpRange = Plan->mapToVPValues(Instr->operands()); Operands = {OpRange.begin(), OpRange.end()}; } + + // Invariant stores inside loop will be deleted and a single store + // with the final reduction value will be added to the exit block + StoreInst *SI; + if ((SI = dyn_cast(&I)) && + Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) + continue; + if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( Instr, Operands, Range, Plan)) { // If Instr can be simplified to an existing VPValue, use it. @@ -8864,13 +8872,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( continue; } - // Invariant stores inside loop will be deleted and a single store - // with the final reduction value will be added to the exit block - StoreInst *SI; - if ((SI = dyn_cast(&I)) && - Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) - continue; - // Otherwise, if all widening options failed, Instruction is to be // replicated. This may create a successor for VPBB. VPBasicBlock *NextVPBB = diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll new file mode 100644 index 00000000000000..1749519883ead4 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S | FileCheck %s + +define void @invariant_store_red_exit_is_phi(i32* %dst, i32* readonly %src, i64 %n) { +; CHECK-LABEL: @invariant_store_red_exit_is_phi( +; CHECK: vector.body: +; CHECK: %[[VEC_PHI:.*]] = phi [ zeroinitializer, %vector.ph ], [ %[[PREDPHI:.*]], %vector.body ] +; CHECK: %[[ACTIVE_LANE_MASK:.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 {{%.*}}, i64 %n) +; CHECK: %[[LOAD:.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32 +; CHECK-NEXT: %[[ADD:.*]] = add %[[VEC_PHI]], %[[LOAD]] +; CHECK-NEXT: %[[SELECT:.*]] = select %[[ACTIVE_LANE_MASK]], %[[ADD]], %[[VEC_PHI]] +; CHECK: middle.block: +; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( %[[SELECT]]) +; CHECK-NEXT: store i32 %[[SUM]], i32* %dst, align 4 +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc + %red = phi i32 [ 0, %entry ], [ %storemerge, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx6 = getelementptr inbounds i32, i32* %src, i64 %indvars.iv + %load = load i32, i32* %arrayidx6, align 4 + %storemerge = add i32 %red, %load + store i32 %storemerge, i32* %dst, align 4 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !0 + +for.end.loopexit: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.end.loopexit + ret void +} + +!0 = distinct !{!0, !1, !2, !3, !4} +!1 = !{!"llvm.loop.vectorize.width", i32 4} +!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} +!3 = !{!"llvm.loop.interleave.count", i32 1} +!4 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll index 56d8a076a11424..86f1f553d82175 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll @@ -328,8 +328,6 @@ define void @invariant_store(i32* %dst, i32* readonly %src) { ; CHECK: %[[LOAD2:.*]] = load ; CHECK: %[[ADD1:.*]] = add %{{.*}}, %[[LOAD1]] ; CHECK: %[[ADD2:.*]] = add %{{.*}}, %[[LOAD2]] -; CHECK: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( %[[ADD1]] -; CHECK: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( %[[ADD2]] ; CHECK: middle.block: ; CHECK: %[[ADD:.*]] = add %[[ADD2]], %[[ADD1]] ; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( %[[ADD]]) From 730dc4e9bce8189c037a32e520d18b141250d265 Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Wed, 25 May 2022 12:01:42 +0100 Subject: [PATCH 451/908] [Clang] Added options for integrated backend. Following the new flow for external object code emission, provide flags to switch between integrated and external backend similar to the integrated assembler options. SPIR-V target is the only user of this functionality at this point. This patch also updated SPIR-V documentation to clarify that integrated object code emission for SPIR-V is an experimental feature. Differential Revision: https://reviews.llvm.org/D125679 --- clang/docs/UsersManual.rst | 24 ++++++++++++++++++---- clang/include/clang/Driver/Options.td | 7 +++++++ clang/include/clang/Driver/ToolChain.h | 14 ++++++++++++- clang/lib/Driver/ToolChain.cpp | 28 ++++++++++++++++++++++++++ clang/lib/Driver/ToolChains/SPIRV.h | 3 ++- clang/test/Driver/clang_f_opts.c | 5 +++++ clang/test/Driver/spirv-toolchain.cl | 8 ++++++++ 7 files changed, 83 insertions(+), 6 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index aaba6dffd2e5bf..45de85d342c800 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -3180,8 +3180,8 @@ Generic Targets .. code-block:: console - $ clang -target spirv32 test.cl - $ clang -target spirv64 test.cl + $ clang -target spirv32 -c test.cl + $ clang -target spirv64 -c test.cl More details can be found in :ref:`the SPIR-V support section `. @@ -3657,8 +3657,8 @@ Example usage for OpenCL kernel compilation: .. code-block:: console - $ clang -target spirv32 test.cl - $ clang -target spirv64 test.cl + $ clang -target spirv32 -c test.cl + $ clang -target spirv64 -c test.cl Both invocations of Clang will result in the generation of a SPIR-V binary file `test.o` for 32 bit and 64 bit respectively. This file can be imported @@ -3669,6 +3669,18 @@ Converting to SPIR-V produced with the optimization levels other than `-O0` is currently available as an experimental feature and it is not guaranteed to work in all cases. +Clang also supports integrated generation of SPIR-V without use of ``llvm-spirv`` +tool as an experimental feature when ``-fintegrated-objemitter`` flag is passed in +the command line. + + .. code-block:: console + + $ clang -target spirv32 -fintegrated-objemitter -c test.cl + +Note that only very basic functionality is supported at this point and therefore +it is not suitable for arbitrary use cases. This feature is only enabled when clang +build is configured with ``-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=SPIRV`` option. + Linking is done using ``spirv-link`` from `the SPIRV-Tools project `_. Similar to other external linkers, Clang will expect ``spirv-link`` to be installed separately and to be @@ -3676,6 +3688,10 @@ present in the ``PATH`` environment variable. Please refer to `the build and installation instructions `_. + .. code-block:: console + + $ clang -target spirv64 test1.cl test2.cl + .. _clang-cl: clang-cl diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 55f4f0999c403a..d34cec766a2e2b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4170,6 +4170,13 @@ def fno_integrated_cc1 : Flag<["-"], "fno-integrated-cc1">, Flags<[CoreOption, NoXarchOption]>, Group, HelpText<"Spawn a separate process for each cc1">; +def fintegrated_objemitter : Flag<["-"], "fintegrated-objemitter">, + Flags<[CoreOption, NoXarchOption]>, Group, + HelpText<"Use internal machine object code emitter.">; +def fno_integrated_objemitter : Flag<["-"], "fno-integrated-objemitter">, + Flags<[CoreOption, NoXarchOption]>, Group, + HelpText<"Use external machine object code emitter.">; + def : Flag<["-"], "integrated-as">, Alias, Flags<[NoXarchOption]>; def : Flag<["-"], "no-integrated-as">, Alias, Flags<[CC1Option, FlangOption, NoXarchOption]>; diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 269b24dc83c398..0bc345a970dfb4 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -385,11 +385,23 @@ class ToolChain { /// by default. virtual bool IsIntegratedAssemblerDefault() const { return false; } + /// IsIntegratedBackendDefault - Does this tool chain enable + /// -fintegrated-objemitter by default. + virtual bool IsIntegratedBackendDefault() const { return true; } + + /// IsIntegratedBackendSupported - Does this tool chain support + /// -fintegrated-objemitter. + virtual bool IsIntegratedBackendSupported() const { return true; } + + /// IsNonIntegratedBackendSupported - Does this tool chain support + /// -fno-integrated-objemitter. + virtual bool IsNonIntegratedBackendSupported() const { return false; } + /// Check if the toolchain should use the integrated assembler. virtual bool useIntegratedAs() const; /// Check if the toolchain should use the integrated backend. - virtual bool useIntegratedBackend() const { return true; } + virtual bool useIntegratedBackend() const; /// Check if the toolchain should use AsmParser to parse inlineAsm when /// integrated assembler is not default. diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index b10c7de96d7885..efbf2442acfc49 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -106,6 +106,34 @@ bool ToolChain::useIntegratedAs() const { IsIntegratedAssemblerDefault()); } +bool ToolChain::useIntegratedBackend() const { + assert( + ((IsIntegratedBackendDefault() && IsIntegratedBackendSupported()) || + (!IsIntegratedBackendDefault() || IsNonIntegratedBackendSupported())) && + "(Non-)integrated backend set incorrectly!"); + + bool IBackend = Args.hasFlag(options::OPT_fintegrated_objemitter, + options::OPT_fno_integrated_objemitter, + IsIntegratedBackendDefault()); + + // Diagnose when integrated-objemitter options are not supported by this + // toolchain. + unsigned DiagID; + if ((IBackend && !IsIntegratedBackendSupported()) || + (!IBackend && !IsNonIntegratedBackendSupported())) + DiagID = clang::diag::err_drv_unsupported_opt_for_target; + else + DiagID = clang::diag::warn_drv_unsupported_opt_for_target; + Arg *A = Args.getLastArg(options::OPT_fno_integrated_objemitter); + if (A && !IsNonIntegratedBackendSupported()) + D.Diag(DiagID) << A->getAsString(Args) << Triple.getTriple(); + A = Args.getLastArg(options::OPT_fintegrated_objemitter); + if (A && !IsIntegratedBackendSupported()) + D.Diag(DiagID) << A->getAsString(Args) << Triple.getTriple(); + + return IBackend; +} + bool ToolChain::useRelaxRelocations() const { return ENABLE_X86_RELAX_RELOCATIONS; } diff --git a/clang/lib/Driver/ToolChains/SPIRV.h b/clang/lib/Driver/ToolChains/SPIRV.h index a16ae3ca51facf..bb2904f761289c 100644 --- a/clang/lib/Driver/ToolChains/SPIRV.h +++ b/clang/lib/Driver/ToolChains/SPIRV.h @@ -64,8 +64,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRVToolChain final : public ToolChain { : ToolChain(D, Triple, Args) {} bool useIntegratedAs() const override { return true; } - bool useIntegratedBackend() const override { return false; } + bool IsIntegratedBackendDefault() const override { return false; } + bool IsNonIntegratedBackendSupported() const override { return true; } bool IsMathErrnoDefault() const override { return false; } bool isCrossCompiling() const override { return true; } bool isPICDefault() const override { return false; } diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 1b2a71e811baff..d529f9ad5c4855 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -605,3 +605,8 @@ // CHECK_JMC_WARN_NOT_ELF: -fjmc works only for ELF; option ignored // CHECK_NOJMC-NOT: -fjmc // CHECK_JMC: -fjmc + +// RUN: %clang -### -fintegrated-objemitter -target x86_64 %s 2>&1 | FileCheck -check-prefix=CHECK-INT-OBJEMITTER %s +// CHECK-INT-OBJEMITTER-NOT: unsupported option '-fintegrated-objemitter' for target +// RUN: %clang -### -fno-integrated-objemitter -target x86_64 %s 2>&1 | FileCheck -check-prefix=CHECK-NOINT-OBJEMITTER %s +// CHECK-NOINT-OBJEMITTER: unsupported option '-fno-integrated-objemitter' for target diff --git a/clang/test/Driver/spirv-toolchain.cl b/clang/test/Driver/spirv-toolchain.cl index 7f50ed89cb1271..db3ee4d3fe02f8 100644 --- a/clang/test/Driver/spirv-toolchain.cl +++ b/clang/test/Driver/spirv-toolchain.cl @@ -69,3 +69,11 @@ // SPLINK-SAME: "-o" [[BC:".*bc"]] // SPLINK: {{llvm-spirv.*"}} [[BC]] "-o" [[SPV2:".*o"]] // SPLINK: {{spirv-link.*"}} [[SPV1]] [[SPV2]] "-o" "a.out" + +//----------------------------------------------------------------------------- +// Check external vs internal object emission. +// RUN: %clang -### --target=spirv64 -fno-integrated-objemitter %s 2>&1 | FileCheck --check-prefix=XTOR %s +// RUN: %clang -### --target=spirv64 -fintegrated-objemitter %s 2>&1 | FileCheck --check-prefix=BACKEND %s + +// XTOR: {{llvm-spirv.*"}} +// BACKEND-NOT: {{llvm-spirv.*"}} From 18cb3b35066e5f385fa08d373c60447a3aa88ce3 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 25 May 2022 12:16:26 +0100 Subject: [PATCH 452/908] [ARM] Fix vcvtb/t.f16 input liveness The `vcvtb.f16.f32 Sd, Sn` (and vcvtt.f16.f32) instruction convert a f32 into a f16, writing either the top or bottom halves of the register. That means that half of the input register Sd is used in the output. This wasn't being modelled in the instructions, leading later analyses to believe that the registers were dead where they were not, generating invalid scheduling Fix that be specifying the input Sda register for the instructions too, allowing them to be set for cases like vector inserts. Most of the changes are plumbing through the constraint string, cstr. Differential Revision: https://reviews.llvm.org/D126118 --- llvm/lib/Target/ARM/ARMInstrFormats.td | 26 +-- llvm/lib/Target/ARM/ARMInstrVFP.td | 96 +++++----- llvm/test/CodeGen/ARM/aes-erratum-fix.ll | 88 ++++----- llvm/test/CodeGen/Thumb2/mve-div-expand.ll | 24 +-- llvm/test/CodeGen/Thumb2/mve-fmath.ll | 192 +++++++++---------- llvm/test/CodeGen/Thumb2/mve-masked-store.ll | 12 +- llvm/test/CodeGen/Thumb2/mve-vcvt.ll | 21 +- llvm/test/CodeGen/Thumb2/mve-vcvt16.ll | 8 +- 8 files changed, 238 insertions(+), 229 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index ff5afd787c82df..c9a2d21bec532c 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -1589,9 +1589,9 @@ class VFPXI pattern> + string opc, string asm, string cstr, list pattern> : VFPI { + opc, asm, cstr, pattern> { let PostEncoderMethod = "VFPThumb2PostEncoder"; } @@ -1751,8 +1751,8 @@ class AXSI4 opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : VFPAI { + string asm, string cstr, list pattern> + : VFPAI { // Instruction operands. bits<5> Dd; bits<5> Dm; @@ -1804,7 +1804,7 @@ class ADuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { // Instruction operands. bits<5> Dd; bits<5> Dn; @@ -1862,8 +1862,8 @@ class ADbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, // Single precision, unary, predicated class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : VFPAI { + string asm, string cstr, list pattern> + : VFPAI { // Instruction operands. bits<5> Sd; bits<5> Sm; @@ -1916,14 +1916,14 @@ class ASuIn opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> : ASuI { + "", pattern> { list Predicates = [HasVFP2,DontUseNEONForFP]; } // Single precision, binary class ASbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { // Instruction operands. bits<5> Sd; bits<5> Sn; @@ -2000,7 +2000,7 @@ class ASbIn opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, class AHuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { list Predicates = [HasFullFP16]; // Instruction operands. @@ -2056,7 +2056,7 @@ class AHuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, // Half precision, binary class AHbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { list Predicates = [HasFullFP16]; // Instruction operands. @@ -2116,7 +2116,7 @@ class AHbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, class AVConv1I opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{19-16} = opcod3; @@ -2149,7 +2149,7 @@ class AVConv1In opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, class AVConvXI opcod1, bits<4> opcod2, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{4} = 1; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index dc5f1b92a6c2bd..b233555d522552 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -584,12 +584,12 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), - IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "", [(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), - IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "", [(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -603,12 +603,12 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), - IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", + IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "", [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), - IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", + IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -627,7 +627,7 @@ def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", + IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>; def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, @@ -647,7 +647,7 @@ def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0, let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), - IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "", [(arm_cmpfpe0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -655,7 +655,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), - IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "", [(arm_cmpfpe0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -675,7 +675,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), - IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", + IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "", [(arm_cmpfp0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -683,7 +683,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), - IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", + IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "", [(arm_cmpfp0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -704,7 +704,7 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), - IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", + IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", "", [(set DPR:$Dd, (fpextend SPR:$Sm))]>, Sched<[WriteFPCVT]> { // Instruction operands. @@ -723,7 +723,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, // Special case encoding: bits 11-8 is 0b1011. def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, - IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", + IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", "", [(set SPR:$Sd, (fpround DPR:$Dm))]>, Sched<[WriteFPCVT]> { // Instruction operands. @@ -749,7 +749,7 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // Between half, single and double-precision. let hasSideEffects = 0 in def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", + /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; @@ -760,26 +760,30 @@ def : FP16Pat<(f16_to_fp GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; let hasSideEffects = 0 in -def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", +def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; def : FP16Pat<(f16 (fpround SPR:$Sm)), - (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>; + (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>; def : FP16Pat<(fp_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + (i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>; def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), - (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTBSH SPR:$src2), + (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), + (VCVTBSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), - (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH SPR:$src2), + (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), + (VCVTBSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; let hasSideEffects = 0 in def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", + /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; @@ -792,22 +796,26 @@ def : FP16Pat<(f32 (fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))), (SSubReg_f16_reg imm_odd:$lane)))>; let hasSideEffects = 0 in -def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", +def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), - (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTTSH SPR:$src2), + (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), + (VCVTTSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), - (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTTSH SPR:$src2), + (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), + (VCVTTSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), - NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", + NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFPARMv8, HasDPVFP]>, Sched<[WriteFPCVT]> { @@ -829,8 +837,8 @@ def : FP16Pat<(f64 (f16_to_fp GPR:$a)), Requires<[HasFPARMv8, HasDPVFP]>; def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, - (outs SPR:$Sd), (ins DPR:$Dm), - NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", + (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm), + NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. @@ -847,15 +855,15 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, } def : FullFP16Pat<(f16 (fpround DPR:$Dm)), - (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>, + (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>, Requires<[HasFPARMv8, HasDPVFP]>; def : FP16Pat<(fp_to_f16 (f64 DPR:$a)), - (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>, + (i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>, Requires<[HasFPARMv8, HasDPVFP]>; def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), - NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", + NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", "", []>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. bits<5> Sm; @@ -868,8 +876,8 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, } def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, - (outs SPR:$Sd), (ins DPR:$Dm), - NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", + (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm), + NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda", []>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. bits<5> Sd; @@ -990,7 +998,7 @@ defm VCVTM : vcvt_inst<"m", 0b11, ffloor>; def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", + IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>; def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, @@ -1019,7 +1027,7 @@ multiclass vrint_inst_zrx { def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", + NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", "", [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, Requires<[HasFPARMv8]> { let Inst{7} = op2; @@ -1027,7 +1035,7 @@ multiclass vrint_inst_zrx { } def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", + NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", "", [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, Requires<[HasFPARMv8, HasDPVFP]> { let Inst{7} = op2; @@ -1094,13 +1102,13 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", + IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>, Sched<[WriteFPSQRT64]>; def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", + IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", "", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>, Sched<[WriteFPSQRT32]>; @@ -1113,12 +1121,12 @@ let hasSideEffects = 0 in { let isMoveReg = 1 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>, + IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", "", []>, Requires<[HasFPRegs64]>; def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>, + IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", "", []>, Requires<[HasFPRegs]>; } // isMoveReg @@ -1984,7 +1992,7 @@ def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1, class BF16_VCVT op7_6> : VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm), VFPUnaryFrm, NoItinerary, - opc, ".bf16.f32\t$Sd, $Sm", []>, + opc, ".bf16.f32\t$Sd, $Sm", "", []>, RegConstraint<"$dst = $Sd">, Requires<[HasBF16]>, Sched<[]> { @@ -2440,7 +2448,7 @@ def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p), class MovFromVFP opc19_16, dag oops, dag iops, string opc, string asm, list pattern>: - VFPAI { + VFPAI { // Instruction operand. bits<4> Rt; @@ -2525,7 +2533,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in { class MovToVFP opc19_16, dag oops, dag iops, string opc, string asm, list pattern>: - VFPAI { + VFPAI { // Instruction operand. bits<4> Rt; @@ -2598,7 +2606,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in { let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, - "vmov", ".f64\t$Dd, $imm", + "vmov", ".f64\t$Dd, $imm", "", [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3,HasDPVFP]> { bits<5> Dd; @@ -2617,7 +2625,7 @@ def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_fpUNA32, - "vmov", ".f32\t$Sd, $imm", + "vmov", ".f32\t$Sd, $imm", "", [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { bits<5> Sd; bits<8> imm; @@ -2635,7 +2643,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm), VFPMiscFrm, IIC_fpUNA16, - "vmov", ".f16\t$Sd, $imm", + "vmov", ".f16\t$Sd, $imm", "", [(set (f16 HPR:$Sd), vfp_f16imm:$imm)]>, Requires<[HasFullFP16]> { bits<5> Sd; diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll index 4f606407a3fef1..32bc6bc67d9556 100644 --- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll +++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll @@ -1418,12 +1418,11 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r1, d0 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d1 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s14, s14 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r1 ; CHECK-FIX-NOSCHED-NEXT: lsr r1, r1, #16 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s1 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r0 ; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16 ; CHECK-FIX-NOSCHED-NEXT: vmov s13, r1 @@ -1436,31 +1435,32 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s15 -; CHECK-FIX-NOSCHED-NEXT: vmov s9, r3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 +; CHECK-FIX-NOSCHED-NEXT: vmov s3, r3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s5 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s9 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s2 ; CHECK-FIX-NOSCHED-NEXT: vmov s0, r7 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r7 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s11 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s3 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, s14 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r1, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1 @@ -1469,7 +1469,7 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s2 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s2 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s4 @@ -1746,13 +1746,12 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1 ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r2, d2 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s14, s14 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r2 ; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s5 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r0 ; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16 ; CHECK-FIX-NOSCHED-NEXT: vmov s13, r2 @@ -1765,31 +1764,32 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1 ; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s15 -; CHECK-FIX-NOSCHED-NEXT: vmov s9, r3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 +; CHECK-FIX-NOSCHED-NEXT: vmov s7, r3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s1 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s9 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s6 ; CHECK-FIX-NOSCHED-NEXT: vmov s4, r7 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r7 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s4, s4 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s11 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s7 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, s14 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r2, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r2, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r2 @@ -1798,7 +1798,7 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s6 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s7 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s6 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 @@ -3788,12 +3788,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r1, d0 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d1 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s14, s14 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r1 ; CHECK-FIX-NOSCHED-NEXT: lsr r1, r1, #16 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s1 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r0 ; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16 ; CHECK-FIX-NOSCHED-NEXT: vmov s13, r1 @@ -3806,31 +3805,32 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s15 -; CHECK-FIX-NOSCHED-NEXT: vmov s9, r3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 +; CHECK-FIX-NOSCHED-NEXT: vmov s3, r3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s5 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s9 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s2 ; CHECK-FIX-NOSCHED-NEXT: vmov s0, r7 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r7 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s11 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s3 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, s14 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r1, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1 @@ -3839,7 +3839,7 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s2 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s2 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s4 @@ -4116,13 +4116,12 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1 ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r2, d2 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s14, s14 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r2 ; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s5 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r0 ; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16 ; CHECK-FIX-NOSCHED-NEXT: vmov s13, r2 @@ -4135,31 +4134,32 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1 ; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s15 -; CHECK-FIX-NOSCHED-NEXT: vmov s9, r3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 +; CHECK-FIX-NOSCHED-NEXT: vmov s7, r3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s1 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s9 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s6 ; CHECK-FIX-NOSCHED-NEXT: vmov s4, r7 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r7 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s4, s4 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s11 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s7 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, s14 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r2, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r2, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r2 @@ -4168,7 +4168,7 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s6 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s7 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s6 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll index bb853f698cdfdf..939ab71a8061c5 100644 --- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -804,8 +804,8 @@ define arm_aapcs_vfpcc <8 x half> @frem_f16(<8 x half> %in1, <8 x half> %in2) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vmov q5, q0 ; CHECK-NEXT: vmov q4, q1 ; CHECK-NEXT: vcvtb.f32.f16 s0, s20 @@ -821,52 +821,52 @@ define arm_aapcs_vfpcc <8 x half> @frem_f16(<8 x half> %in1, <8 x half> %in2) { ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s24, s16 +; CHECK-NEXT: vcvtt.f16.f32 s24, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s21 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s25, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s21 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s25, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s22 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s26, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s22 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s26, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s23 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s27, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s23 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: vcvtt.f16.f32 s27, s0 +; CHECK-NEXT: vmov q0, q6 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: pop {r7, pc} entry: %out = frem <8 x half> %in1, %in2 diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll index b9f06644b38aad..844da3baa42ba8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -99,8 +99,8 @@ define arm_aapcs_vfpcc <8 x half> @cos_float16_t(<8 x half> %src) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -111,40 +111,40 @@ define arm_aapcs_vfpcc <8 x half> @cos_float16_t(<8 x half> %src) { ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.cos.v8f16(<8 x half> %src) @@ -210,8 +210,8 @@ define arm_aapcs_vfpcc <8 x half> @sin_float16_t(<8 x half> %src) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -222,40 +222,40 @@ define arm_aapcs_vfpcc <8 x half> @sin_float16_t(<8 x half> %src) { ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.sin.v8f16(<8 x half> %src) @@ -321,8 +321,8 @@ define arm_aapcs_vfpcc <8 x half> @exp_float16_t(<8 x half> %src) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -333,40 +333,40 @@ define arm_aapcs_vfpcc <8 x half> @exp_float16_t(<8 x half> %src) { ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.exp.v8f16(<8 x half> %src) @@ -432,8 +432,8 @@ define arm_aapcs_vfpcc <8 x half> @exp2_float16_t(<8 x half> %src) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -444,40 +444,40 @@ define arm_aapcs_vfpcc <8 x half> @exp2_float16_t(<8 x half> %src) { ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.exp2.v8f16(<8 x half> %src) @@ -543,8 +543,8 @@ define arm_aapcs_vfpcc <8 x half> @log_float16_t(<8 x half> %src) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -555,40 +555,40 @@ define arm_aapcs_vfpcc <8 x half> @log_float16_t(<8 x half> %src) { ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.log.v8f16(<8 x half> %src) @@ -654,8 +654,8 @@ define arm_aapcs_vfpcc <8 x half> @log2_float16_t(<8 x half> %src) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -666,40 +666,40 @@ define arm_aapcs_vfpcc <8 x half> @log2_float16_t(<8 x half> %src) { ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.log2.v8f16(<8 x half> %src) @@ -765,8 +765,8 @@ define arm_aapcs_vfpcc <8 x half> @log10_float16_t(<8 x half> %src) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -777,40 +777,40 @@ define arm_aapcs_vfpcc <8 x half> @log10_float16_t(<8 x half> %src) { ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.log10.v8f16(<8 x half> %src) @@ -881,8 +881,8 @@ define arm_aapcs_vfpcc <8 x half> @pow_float16_t(<8 x half> %src1, <8 x half> %s ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vmov q5, q0 ; CHECK-NEXT: vmov q4, q1 ; CHECK-NEXT: vcvtb.f32.f16 s0, s20 @@ -898,52 +898,52 @@ define arm_aapcs_vfpcc <8 x half> @pow_float16_t(<8 x half> %src1, <8 x half> %s ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s24, s16 +; CHECK-NEXT: vcvtt.f16.f32 s24, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s21 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s25, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s21 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s25, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s22 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s26, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s22 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s26, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s23 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s27, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s23 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: vcvtt.f16.f32 s27, s0 +; CHECK-NEXT: vmov q0, q6 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.pow.v8f16(<8 x half> %src1, <8 x half> %src2) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll index 29b29859e86290..c212f7c5fbe372 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll @@ -1204,7 +1204,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 ; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: csetm r2, gt @@ -1263,8 +1263,8 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: vcmp.f32 s6, #0 +; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 -; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1328,7 +1328,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 ; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: csetm r2, gt @@ -1387,8 +1387,8 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: vcmp.f32 s6, #0 +; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 -; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1452,7 +1452,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 ; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: csetm r2, gt @@ -1519,8 +1519,8 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: vcmp.f32 s6, #0 +; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 -; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll index 84a9e0145f0c75..ad7a09fa50acba 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll @@ -354,10 +354,10 @@ define arm_aapcs_vfpcc <8 x half> @vmovn32_trunc1(<4 x float> %src1, <4 x float> ; CHECK-MVE-LABEL: vmovn32_trunc1: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s1, s1 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s2, s2 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s3 ; CHECK-MVE-NEXT: vcvtt.f16.f32 s0, s4 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s1 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s3 ; CHECK-MVE-NEXT: vcvtt.f16.f32 s1, s5 ; CHECK-MVE-NEXT: vcvtt.f16.f32 s2, s6 ; CHECK-MVE-NEXT: vcvtt.f16.f32 s3, s7 @@ -377,14 +377,15 @@ entry: define arm_aapcs_vfpcc <8 x half> @vmovn32_trunc2(<4 x float> %src1, <4 x float> %src2) { ; CHECK-MVE-LABEL: vmovn32_trunc2: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcvtt.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vcvtt.f16.f32 s1, s1 -; CHECK-MVE-NEXT: vcvtt.f16.f32 s2, s2 -; CHECK-MVE-NEXT: vcvtt.f16.f32 s3, s3 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s4 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s5 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s6 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s7 +; CHECK-MVE-NEXT: vmov q2, q0 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s4 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s1, s5 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s2, s6 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s7 +; CHECK-MVE-NEXT: vcvtt.f16.f32 s0, s8 +; CHECK-MVE-NEXT: vcvtt.f16.f32 s1, s9 +; CHECK-MVE-NEXT: vcvtt.f16.f32 s2, s10 +; CHECK-MVE-NEXT: vcvtt.f16.f32 s3, s11 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: vmovn32_trunc2: diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll index 844e39e2964bb8..6ab9c80aba3ec9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll @@ -37,8 +37,8 @@ define arm_aapcs_vfpcc <4 x half> @fptrunc_4(<4 x float> %src1) { ; CHECK-LABEL: fptrunc_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s2 ; CHECK-NEXT: vcvtt.f16.f32 s0, s1 +; CHECK-NEXT: vcvtb.f16.f32 s1, s2 ; CHECK-NEXT: vcvtt.f16.f32 s1, s3 ; CHECK-NEXT: bx lr entry: @@ -50,13 +50,13 @@ define arm_aapcs_vfpcc <8 x half> @fptrunc_8(<8 x float> %src1) { ; CHECK-LABEL: fptrunc_8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s2 -; CHECK-NEXT: vcvtb.f16.f32 s2, s4 ; CHECK-NEXT: vcvtt.f16.f32 s0, s1 +; CHECK-NEXT: vcvtb.f16.f32 s1, s2 +; CHECK-NEXT: vcvtb.f16.f32 s2, s4 ; CHECK-NEXT: vcvtt.f16.f32 s1, s3 +; CHECK-NEXT: vcvtb.f16.f32 s3, s6 ; CHECK-NEXT: vcvtt.f16.f32 s2, s5 ; CHECK-NEXT: vcvtt.f16.f32 s3, s7 -; CHECK-NEXT: vcvtb.f16.f32 s4, s6 ; CHECK-NEXT: bx lr entry: %out = fptrunc <8 x float> %src1 to <8 x half> From c85688a22ba71afcf8105ac1c79865e6a347ddac Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 25 May 2022 07:16:50 -0400 Subject: [PATCH 453/908] [gn build] (manually) port some of cd2292ef8245 --- .../clang-tools-extra/pseudo/cxx/BUILD.gn | 7 +++++++ .../clang-tools-extra/pseudo/gen/BUILD.gn | 9 +++++++++ .../clang-tools-extra/pseudo/lib/BUILD.gn | 6 +----- .../pseudo/lib/grammar/BUILD.gn | 17 +++++++++++++++++ 4 files changed, 34 insertions(+), 5 deletions(-) create mode 100644 llvm/utils/gn/secondary/clang-tools-extra/pseudo/cxx/BUILD.gn create mode 100644 llvm/utils/gn/secondary/clang-tools-extra/pseudo/gen/BUILD.gn create mode 100644 llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/grammar/BUILD.gn diff --git a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/cxx/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/cxx/BUILD.gn new file mode 100644 index 00000000000000..c2a8d61ad5fc27 --- /dev/null +++ b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/cxx/BUILD.gn @@ -0,0 +1,7 @@ +# FIXME: Nothing depends on this yet. +static_library("cxx") { + output_name = "clangPseudoCXX" + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ "//clang-tools-extra/pseudo/lib/grammar" ] + sources = [ "CXX.cpp" ] +} diff --git a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/gen/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/gen/BUILD.gn new file mode 100644 index 00000000000000..274b6537b099bc --- /dev/null +++ b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/gen/BUILD.gn @@ -0,0 +1,9 @@ +executable("pseudo-gen") { + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//clang-tools-extra/pseudo/lib/grammar", + "//llvm/lib/Support", + ] + include_dirs = [ "../include" ] + sources = [ "Main.cpp" ] +} diff --git a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/BUILD.gn index 670b5c3263910b..04f5cbb8405913 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/BUILD.gn @@ -4,6 +4,7 @@ static_library("lib") { deps = [ "//clang/lib/Basic", "//clang/lib/Lex", + "//clang-tools-extra/pseudo/lib/grammar", "//llvm/lib/Support", ] include_dirs = [ "../include" ] @@ -12,11 +13,6 @@ static_library("lib") { "DirectiveTree.cpp", "Forest.cpp", "GLR.cpp", - "Grammar.cpp", - "GrammarBNF.cpp", - "LRGraph.cpp", - "LRTable.cpp", - "LRTableBuild.cpp", "Lex.cpp", "Token.cpp", ] diff --git a/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/grammar/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/grammar/BUILD.gn new file mode 100644 index 00000000000000..623e85f60fb595 --- /dev/null +++ b/llvm/utils/gn/secondary/clang-tools-extra/pseudo/lib/grammar/BUILD.gn @@ -0,0 +1,17 @@ +static_library("grammar") { + output_name = "clangPseudoGrammar" + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//clang/lib/Basic", + "//llvm/lib/Support", + ] + include_dirs = [ "../../include" ] + sources = [ + "Grammar.cpp", + "GrammarBNF.cpp", + "LRGraph.cpp", + "LRTable.cpp", + "LRTableBuild.cpp", + ] +} + From 512d06b04513dc8c859567e215a13ba8d312095d Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Wed, 25 May 2022 17:19:09 +0530 Subject: [PATCH 454/908] [mlir][openmp] Add check for types of operands in omp.atomic.write This patch makes sure that the address dereferences to value in omp.atomic.write operation. Reviewed By: kiranchandramohan, peixin Differential Revision: https://reviews.llvm.org/D126272 --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 3 +++ mlir/test/Dialect/OpenMP/invalid.mlir | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 3c86243fa3a83b..23a25fc4260b8d 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -838,6 +838,9 @@ LogicalResult AtomicWriteOp::verify() { "memory-order must not be acq_rel or acquire for atomic writes"); } } + if (address().getType().cast().getElementType() != + value().getType()) + return emitError("address must dereference to value type"); return verifySynchronizationHint(*this, hint_val()); } diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 3459ee795c83a7..ed5bc00cd7cbec 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -600,6 +600,14 @@ func.func @omp_atomic_write6(%addr : memref, %val : i32) { // ----- +func.func @omp_atomic_write(%addr : memref>, %val : i32) { + // expected-error @below {{address must dereference to value type}} + omp.atomic.write %addr = %val : memref>, i32 + return +} + +// ----- + func.func @omp_atomic_update1(%x: memref, %expr: f32) { // expected-error @below {{the type of the operand must be a pointer type whose element type is the same as that of the region argument}} omp.atomic.update %x : memref { From 1ad9b2662273d8c4bed9377fb3f0cba05c81b296 Mon Sep 17 00:00:00 2001 From: lorenzo chelini Date: Wed, 25 May 2022 12:59:20 +0200 Subject: [PATCH 455/908] [MLIR][Linalg] Adjust documentation (NFC) Adjust docs for tensor.pad, tensor.collapse_shape and tensor.expand_shape. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D126370 --- mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h | 4 ++-- .../lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp | 8 ++++---- mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp | 6 +++--- mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 188f2b436a3d3c..6d058e03fafed1 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -1205,7 +1205,7 @@ struct GeneralizePadOpPattern : public OpRewritePattern { const SmallVector &dynSizes) const; }; -/// Populates `patterns` with patterns that vectorize linalg.pad_tensor. +/// Populates `patterns` with patterns that vectorize tensor.pad. /// These patterns are meant to apply in a complementary fashion. Benefits /// are used to encode a certain ordering of pattern application. To avoid /// scattering magic constants throughout the code base, the patterns must be @@ -1290,7 +1290,7 @@ LogicalResult applyStagedPatterns( const FrozenRewritePatternSet &stage2Patterns, function_ref stage3Lambda = nullptr); -/// Rewrite extract_slice(pad_tensor(x)) into pad_tensor(extract_slice(x)). +/// Rewrite extract_slice(tensor.pad(x)) into tensor.pad(extract_slice(x)). struct ExtractSliceOfPadTensorSwapPattern : public OpRewritePattern { /// A function to control pattern application and rewrite logic. diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp index a24fdbe3f2512f..2d35deb6799714 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -412,7 +412,7 @@ class FuseElementwiseOps : public OpRewritePattern { /// : tensor into tensor /// /// The reshape can be folded into the `genericOp` if its loop dimensionality -/// is increased to match the result (operand) of the tensor_expand_shape. +/// is increased to match the result (operand) of the tensor.expand_shape. /// The indexing_map of the fused tensor in the `genericOp` and the /// reassociation map helps compute the indexing maps of the modified op. /// For the above example, based on the reassociation map it @@ -677,7 +677,7 @@ static void updateExpandedGenericOpRegion(PatternRewriter &rewriter, } } -/// Implements the fusion of a tensor_collapse_shape or a tensor_expand_shape op +/// Implements the fusion of a tensor.collapse_shape or a tensor.expand_shape op /// and a generic op as explained in `isFusableWithReshapeByExpansion`. Assumes /// that those conditions have been satisfied. static Optional> @@ -811,7 +811,7 @@ fuseWithReshapeByExpansion(GenericOp genericOp, Operation *reshapeOp, namespace { -/// Pattern to fuse a tensor_collapse_shape op with its consumer generic op, +/// Pattern to fuse a tensor.collapse_shape op with its consumer generic op, /// when the reshape op is collapsing dimensions. The dimensionality of the loop /// in the consumer is expanded. class FoldWithProducerReshapeOpByExpansion @@ -851,7 +851,7 @@ class FoldWithProducerReshapeOpByExpansion ControlFusionFn controlFoldingReshapes; }; -/// Pattern to fold a tensor_expand_shape op with its producer generic op +/// Pattern to fold a tensor.expand_shape op with its producer generic op /// by expanding the dimensionality of the loop in the producer op. struct FoldReshapeWithGenericOpByExpansion : public OpRewritePattern { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 6e1f4e27213d11..00c08af780201e 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -814,7 +814,7 @@ struct VectorizePadOpUserPattern : public OpRewritePattern { /// Rewrite use of tensor::PadOp result in TransferReadOp. E.g.: /// ``` -/// %0 = linalg.pad_tensor %src ... : tensor to tensor<17x5xf32> +/// %0 = tensor.pad %src ... : tensor to tensor<17x5xf32> /// %r = vector.transfer_read %0[%c0, %c0], %cst /// {in_bounds = [true, true]} : tensor<17x5xf32>, vector<17x5xf32> /// ``` @@ -869,7 +869,7 @@ struct PadOpVectorizationWithTransferReadPattern /// ``` /// %0 = tensor.extract_slice ...[...] [%s0, %s1] [1, 1] /// : tensor<...> to tensor -/// %1 = linalg.pad_tensor %0 ... : tensor to tensor<17x5xf32> +/// %1 = tensor.pad %0 ... : tensor to tensor<17x5xf32> /// %2 = vector.transfer_write %vec, %1[...] /// : vector<17x5xf32>, tensor<17x5xf32> /// %r = tensor.extract_slice %2[0, 0] [%s0, %s1] [1, 1] @@ -1026,7 +1026,7 @@ struct PadOpVectorizationWithTransferWritePattern /// Rewrite use of tensor::PadOp result in InsertSliceOp. E.g.: /// ``` -/// %0 = linalg.pad_tensor %src ... : tensor to tensor<17x5xf32> +/// %0 = tensor.pad %src ... : tensor to tensor<17x5xf32> /// %r = tensor.insert_slice %0 /// into %dest[%a, %b, 0, 0] [1, 1, 17, 5] [1, 1, 1, 1] /// : tensor<17x5xf32> into tensor diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp index a332402ef033d7..b82381625a7802 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -110,8 +110,8 @@ struct TestLinalgTransforms llvm::cl::init(false)}; Option testSwapSubTensorPadTensor{ *this, "test-swap-subtensor-padtensor", - llvm::cl::desc("Test rewrite of subtensor(pad_tensor) into " - "pad_tensor(subtensor)"), + llvm::cl::desc("Test rewrite of subtensor(tensor.pad) into " + "tensor.pad(subtensor)"), llvm::cl::init(false)}; Option testSplitReduction{ *this, "test-split-reduction", From f3c1d281767a5849b9f3e712e3c3406bd9410c81 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Wed, 25 May 2022 13:09:00 +0100 Subject: [PATCH 456/908] [clang-tidy] Extend SimplifyBooleanExpr demorgan support. Adds an option SimplifyDemorganRelaxed which, when enabled, will transform negated conjunctions or disjunctions when neither operand is a negation. Default value is `false`. Reviewed By: LegalizeAdulthood Differential Revision: https://reviews.llvm.org/D126162 --- .../readability/SimplifyBooleanExprCheck.cpp | 12 +++++++++-- .../readability/SimplifyBooleanExprCheck.h | 1 + .../readability-simplify-boolean-expr.rst | 20 ++++++++++++++++++ ...eadability-simplify-bool-expr-demorgan.cpp | 21 +++++++++++++++++++ 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp index f35f7839dc257a..8ae990a929dfdd 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp @@ -558,7 +558,8 @@ class SimplifyBooleanExprCheck::Visitor : public RecursiveASTVisitor { if (!BinaryOp || !BinaryOp->isLogicalOp() || !BinaryOp->getType()->isBooleanType()) return Base::TraverseUnaryOperator(Op); - if (checkEitherSide(BinaryOp, isUnaryLNot) || + if (Check->SimplifyDeMorganRelaxed || + checkEitherSide(BinaryOp, isUnaryLNot) || checkEitherSide(BinaryOp, [](const Expr *E) { return nestedDemorgan(E, 1); })) { if (Check->reportDeMorgan(Context, Op, BinaryOp, !IsProcessing, parent(), @@ -584,7 +585,13 @@ SimplifyBooleanExprCheck::SimplifyBooleanExprCheck(StringRef Name, ChainedConditionalReturn(Options.get("ChainedConditionalReturn", false)), ChainedConditionalAssignment( Options.get("ChainedConditionalAssignment", false)), - SimplifyDeMorgan(Options.get("SimplifyDeMorgan", true)) {} + SimplifyDeMorgan(Options.get("SimplifyDeMorgan", true)), + SimplifyDeMorganRelaxed(Options.get("SimplifyDeMorganRelaxed", false)) { + if (SimplifyDeMorganRelaxed && !SimplifyDeMorgan) + configurationDiag("%0: 'SimplifyDeMorganRelaxed' cannot be enabled " + "without 'SimplifyDeMorgan' enabled") + << Name; +} static bool containsBoolLiteral(const Expr *E) { if (!E) @@ -667,6 +674,7 @@ void SimplifyBooleanExprCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "ChainedConditionalAssignment", ChainedConditionalAssignment); Options.store(Opts, "SimplifyDeMorgan", SimplifyDeMorgan); + Options.store(Opts, "SimplifyDeMorganRelaxed", SimplifyDeMorganRelaxed); } void SimplifyBooleanExprCheck::registerMatchers(MatchFinder *Finder) { diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h index 72d8f3354b890c..8da20f37b975e1 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h @@ -69,6 +69,7 @@ class SimplifyBooleanExprCheck : public ClangTidyCheck { const bool ChainedConditionalReturn; const bool ChainedConditionalAssignment; const bool SimplifyDeMorgan; + const bool SimplifyDeMorganRelaxed; }; } // namespace readability diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst index 240e7be64ad45c..f25d4c418695f5 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst @@ -96,3 +96,23 @@ Options If `true`, DeMorgan's Theorem will be applied to simplify negated conjunctions and disjunctions. Default is `true`. + +.. option:: SimplifyDeMorganRelaxed + + If `true`, :option:`SimplifyDeMorgan` will also transform negated + conjunctions and disjunctions where there is no negation on either operand. + Default is `false`. + + When Enabled: + + .. code-block:: + + bool X = !(A && B) + bool Y = !(A || B) + + Would be transformed to: + + .. code-block:: + + bool X = !A || !B + bool Y = !A && !B diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-simplify-bool-expr-demorgan.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-simplify-bool-expr-demorgan.cpp index 12a9eb65352d86..6787273daa8c5c 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability-simplify-bool-expr-demorgan.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-simplify-bool-expr-demorgan.cpp @@ -1,9 +1,30 @@ // RUN: %check_clang_tidy %s readability-simplify-boolean-expr %t +// Check when we can convert !(A Op B) -> !A InvOp !B. +// RUN: %check_clang_tidy -check-suffixes=",RELAXED" %s \ +// RUN: readability-simplify-boolean-expr %t -- -config="{CheckOptions: [{ \ +// RUN: key: "readability-simplify-boolean-expr.SimplifyDeMorganRelaxed", value: true}]}" -- + +// Verify warning issued when invalid options are specified. +// RUN: clang-tidy %s -checks=-*,readability-simplify-boolean-expr -config="{CheckOptions: [ \ +// RUN: {key: readability-simplify-boolean-expr.SimplifyDeMorgan, value: false}, \ +// RUN: {key: readability-simplify-boolean-expr.SimplifyDeMorganRelaxed, value: true}]}" \ +// RUN: -- 2>&1 | FileCheck %s -check-prefix=CHECK-BAD-CONFIG \ +// RUN: -implicit-check-not="{{warning|error}}:" + +// CHECK-BAD-CONFIG: warning: readability-simplify-boolean-expr: 'SimplifyDeMorganRelaxed' cannot be enabled without 'SimplifyDeMorgan' enabled void eat(bool); void foo(bool A1, bool A2, bool A3, bool A4) { bool X; + + X = !(A1 && A2); + X = !(A1 || A2); + // CHECK-MESSAGES-RELAXED: :[[@LINE-2]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-MESSAGES-RELAXED: :[[@LINE-2]]:7: warning: boolean expression can be simplified by DeMorgan's theorem + // CHECK-FIXES-RELAXED: X = !A1 || !A2; + // CHECK-FIXES-NEXT-RELAXED: X = !A1 && !A2; + X = !(!A1 || A2); X = !(A1 || !A2); X = !(!A1 || !A2); From 09ef6da8dcd848258c9f876ba9298c7db0d589a9 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Wed, 25 May 2022 13:12:20 +0100 Subject: [PATCH 457/908] [clang-tidy] Update docs for SimplifyDeMorganRelaxed Forgot to add this to f3c1d281767 --- .../docs/clang-tidy/checks/readability-simplify-boolean-expr.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst index f25d4c418695f5..18ab84b26a2595 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-simplify-boolean-expr.rst @@ -101,6 +101,7 @@ Options If `true`, :option:`SimplifyDeMorgan` will also transform negated conjunctions and disjunctions where there is no negation on either operand. + This option has no effect if :option:`SimplifyDeMorgan` is `false`. Default is `false`. When Enabled: From f96aa834d7d77bb875d5bb727b89ab4af7940a0c Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 25 May 2022 08:17:10 -0400 Subject: [PATCH 458/908] Test C DR conformance (part three of many) This adds more of the tests for the first 100 DRs in C and updates their status on the status page. --- clang/test/C/drs/dr094.c | 29 ++++++ clang/test/C/drs/dr0xx.c | 175 +++++++++++++++++++++++++++++++++++++ clang/www/c_dr_status.html | 52 ++++++----- 3 files changed, 232 insertions(+), 24 deletions(-) create mode 100644 clang/test/C/drs/dr094.c diff --git a/clang/test/C/drs/dr094.c b/clang/test/C/drs/dr094.c new file mode 100644 index 00000000000000..132193ba05cbd5 --- /dev/null +++ b/clang/test/C/drs/dr094.c @@ -0,0 +1,29 @@ +/* RUN: %clang_cc1 -std=c89 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c99 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c11 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c17 -emit-llvm -o - %s | FileCheck %s + RUN: %clang_cc1 -std=c2x -emit-llvm -o - %s | FileCheck %s + */ + +/* WG14 DR094: yes + * Are constraints on function return the same as assignment? + */ + +float func(void) { return 1.0f; } +void other_func(void) { + int i; + float f; + + /* Test that there's been a conversion from float to int. */ + i = func(); + // CHECK: %call = call float @func() + // CHECK-NEXT: %conv = fptosi float %call to i32 + // CHECK-NEXT: store i32 %conv, ptr %i, align 4 + + /* Test that the conversion looks the same as an assignment. */ + i = f; + // CHECK: %0 = load float, ptr %f, align 4 + // CHECK-NEXT: %conv1 = fptosi float %0 to i32 + // CHECK-NEXT: store i32 %conv1, ptr %i, align 4 +} + diff --git a/clang/test/C/drs/dr0xx.c b/clang/test/C/drs/dr0xx.c index b0b57fa2891497..22b38de1a88763 100644 --- a/clang/test/C/drs/dr0xx.c +++ b/clang/test/C/drs/dr0xx.c @@ -57,6 +57,27 @@ * * WG14 DR067: yes * Integer and integral type confusion + * + * WG14 DR069: yes + * Questions about the representation of integer types + * + * WG14 DR077: yes + * Stability of addresses + * + * WG14 DR080: yes + * Merging of string constants + * + * WG14 DR086: yes + * Object-like macros in system headers + * + * WG14 DR091: yes + * Multibyte encodings + * + * WG14 DR092: dup 060 + * Partial initialization of strings + * + * WG14 DR093: yes + * Reservation of identifiers */ @@ -238,6 +259,17 @@ _Static_assert(DR038(DR038_X + DR038_Y) == DR038_X + DR038_Y, "fail"); */ _Static_assert(sizeof('a') == sizeof(int), "fail"); +/* WG14 DR040: partial + * 9 unrelated questions about C89 + * + * Question 6 + */ +struct dr040 { /* expected-note {{definition of 'struct dr040' is not complete until the closing '}'}} */ + char c; + short s; + int i[__builtin_offsetof(struct dr040, s)]; /* expected-error {{offsetof of incomplete type 'struct dr040'}} */ +}; + /* WG14 DR043: yes * On the definition of the NULL macro */ @@ -345,3 +377,146 @@ void dr068(void) { _Static_assert('\xFF' == 0xFF, "fail"); #endif } + +#if __STDC_VERSION__ < 202000L +/* WG14: DR070: yes + * Interchangeability of function arguments + * + * Note: we could issue a pedantic warning in this case. We are claiming + * conformance not because we diagnose the UB when we could but because we're + * not obligated to do anything about it and we make it "just work" via the + * usual conversion rules. + * + * This behavior is specific to functions without prototypes. A function with + * a prototype causes implicit conversions rather than relying on default + * argument promotion and warm thoughts. + */ +void dr070_1(c) /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} */ + int c; { +} + +void dr070_2(void) { + dr070_1(6); + dr070_1(6U); /* Pedantically UB */ +} +#endif /* __STDC_VERSION__ < 202000L */ + +/* WG14 DR071: yes + * Enumerated types + */ +enum dr071_t { foo_A = 0, foo_B = 1, foo_C = 8 }; +void dr071(void) { + /* Test that in-range values not present in the enumeration still round-trip + * to the original value. + */ + _Static_assert(100 == (int)(enum dr071_t)100, "fail"); +} + +/* WG14 DR081: yes + * Left shift operator + */ +void dr081(void) { + /* Demonstrate that we don't crash when left shifting a signed value; that's + * implementation defined behavior. + */ + _Static_assert(-1 << 1 == -2, "fail"); /* Didn't shift a zero into the "sign bit". */ + _Static_assert(1 << 3 == 1u << 3u, "fail"); /* Shift of a positive signed value does sensible things. */ +} + +/* WG14 DR084: yes + * Incomplete type in function declaration + * + * Note: because the situation is UB, we're free to do what we want. We elect + * to accept and require the incomplete type to be completed before the + * function definition. + */ +struct dr084_t; /* expected-note {{forward declaration of 'struct dr084_t'}} */ +extern void (*dr084_1)(struct dr084_t); +void dr084_2(struct dr084_t); +void dr084_2(struct dr084_t val) {} /* expected-error {{variable has incomplete type 'struct dr084_t'}} */ + +/* WG14 DR088: yes + * Compatibility of incomplete types + */ +struct dr088_t_1; + +void dr088_f(struct dr088_t_1 *); /* expected-note {{passing argument to parameter here}} */ +void dr088_1(void) { + /* Distinct type from the file scope forward declaration. */ + struct dr088_t_1; + /* FIXME: this diagnostic could be improved to not be utterly baffling. */ + dr088_f((struct dr088_t_1 *)0); /* expected-warning {{incompatible pointer types passing 'struct dr088_t_1 *' to parameter of type 'struct dr088_t_1 *'}} */ +} + +void dr088_2(struct dr088_t_1 *p) { /* Pointer to incomplete type. */ } +struct dr088_t_1 { int i; }; /* Type is completed. */ +void dr088_3(struct dr088_t_1 s) { + /* When passing a pointer to the completed type, is it the same type as the + * incomplete type used in the call declaration? + */ + dr088_2(&s); +} + +/* WG14 DR089: yes + * Multiple definitions of macros + */ +#define DR089 object_like /* expected-note {{previous definition is here}} */ +#define DR089(argument) function_like /* expected-warning {{'DR089' macro redefined}} */ + +/* WG14 DR095: yes + * Is initialization as constrained as assignment? + */ +void dr095(void) { + /* Ensure that type compatibility constraints on assignment are also honored + * for initializations. + */ + struct One { + int a; + } one; + struct Two { + float f; + } two = one; /* expected-error {{initializing 'struct Two' with an expression of incompatible type 'struct One'}} */ + + two = one; /* expected-error {{assigning to 'struct Two' from incompatible type 'struct One'}} */ +} + +/* WG14 DR096: yes + * Arrays of incomplete types + */ +void dr096(void) { + typedef void func_type(void); + func_type array_funcs[10]; /* expected-error {{'array_funcs' declared as array of functions of type 'func_type' (aka 'void (void)')}} */ + + void array_void[10]; /* expected-error {{array has incomplete element type 'void'}} */ + + struct S; /* expected-note {{forward declaration of 'struct S'}} */ + struct S s[10]; /* expected-error {{array has incomplete element type 'struct S'}} */ + + union U; /* expected-note {{forward declaration of 'union U'}} */ + union U u[10]; /* expected-error {{array has incomplete element type 'union U'}} */ + union U { int i; }; + + int never_completed_incomplete_array[][]; /* expected-error {{array has incomplete element type 'int[]'}} */ + + extern int completed_later[][]; /* expected-error {{array has incomplete element type 'int[]'}} */ + extern int completed_later[10][10]; +} + +/* WG14 DR098: yes + * Pre/post increment/decrement of function or incomplete types + */ +void dr098(void) { + typedef void func_type(void); + func_type fp; + struct incomplete *incomplete_ptr; + + ++fp; /* expected-error {{cannot increment value of type 'func_type' (aka 'void (void)')}} */ + fp++; /* expected-error {{cannot increment value of type 'func_type' (aka 'void (void)')}} */ + --fp; /* expected-error {{cannot decrement value of type 'func_type' (aka 'void (void)')}} */ + fp--; /* expected-error {{cannot decrement value of type 'func_type' (aka 'void (void)')}} */ + + (*incomplete_ptr)++; /* expected-error {{cannot increment value of type 'struct incomplete'}} */ + ++(*incomplete_ptr); /* expected-error {{cannot increment value of type 'struct incomplete'}} */ + (*incomplete_ptr)--; /* expected-error {{cannot decrement value of type 'struct incomplete'}} */ + --(*incomplete_ptr); /* expected-error {{cannot decrement value of type 'struct incomplete'}} */ +} diff --git a/clang/www/c_dr_status.html b/clang/www/c_dr_status.html index 49783ed4b4eac3..d0ae947f4189ff 100644 --- a/clang/www/c_dr_status.html +++ b/clang/www/c_dr_status.html @@ -291,7 +291,11 @@

C defect report implementation status

40 NAD 9 unrelated questions about C89 - Unknown + +
Partial + Question 6 has full support, the rest of the questions are currently unknown. +
+ 41 @@ -465,19 +469,19 @@

C defect report implementation status

69 NAD Questions about the representation of integer types - Unknown + Yes 70 NAD Interchangeability of function arguments - Unknown + Yes 71 C89 Enumerated types - Unknown + Yes 72 @@ -513,7 +517,7 @@

C defect report implementation status

77 NAD Stability of addresses - Unknown + Yes 78 @@ -531,13 +535,13 @@

C defect report implementation status

80 C89 Merging of string constants - Unknown + Yes 81 NAD Left shift operator - Unknown + Yes 82 @@ -555,7 +559,7 @@

C defect report implementation status

84 NAD Incomplete type in function declaration - Unknown + Yes 85 @@ -567,7 +571,7 @@

C defect report implementation status

86 NAD Object-like macros in system headers - Unknown + Yes 87 @@ -579,13 +583,13 @@

C defect report implementation status

88 NAD Compatibility of incomplete types - Unknown + Yes 89 C89 Multiple definitions of macros - Unknown + Yes 90 @@ -597,49 +601,49 @@

C defect report implementation status

91 NAD Multibyte encodings - Unknown + Yes 92 Dup Partial initialization of strings - Duplicate of 60 + Duplicate of 60 93 C89 Reservation of identifiers - Unknown + Yes 94 NAD - ANSI/ISO C Defect report #rfg1 - Unknown + Are constraints on function return the same as assignment? + Yes 95 NAD - ANSI/ISO C Defect report #rfg2 - Unknown + Is initialization as constrained as assignment? + Yes 96 NAD - ANSI/ISO C Defect report #rfg3 - Unknown + Arrays of incomplete types + Yes 97 Dup - ANSI/ISO C Defect report #rfg4 - Duplicate of 40 + Use of offsetof with an incomplete type + Duplicate of 40 98 NAD - ANSI/ISO C Defect report #rfg5 - Unknown + Pre/post increment/decrement of function or incomplete types + Yes 99 From d5ebba2aa68347eca8f3b920a61f4810d89c0f68 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 25 May 2022 08:13:43 -0400 Subject: [PATCH 459/908] [x86] add test with volatile load; NFC Test for D126353 --- llvm/test/CodeGen/X86/extractelement-load.ll | 35 ++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll index 78ad6d95963b50..5e1ff78a31c109 100644 --- a/llvm/test/CodeGen/X86/extractelement-load.ll +++ b/llvm/test/CodeGen/X86/extractelement-load.ll @@ -338,6 +338,41 @@ define i32 @multi_use_load_scalarization(<4 x i32>* %p) nounwind { ret i32 %r } +define i32 @multi_use_volatile_load_scalarization(<4 x i32>* %p) nounwind { +; X32-SSE2-LABEL: multi_use_volatile_load_scalarization: +; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-SSE2-NEXT: movl (%ecx), %eax +; X32-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; X32-SSE2-NEXT: psubd %xmm1, %xmm0 +; X32-SSE2-NEXT: movdqa %xmm0, (%ecx) +; X32-SSE2-NEXT: retl +; +; X64-SSSE3-LABEL: multi_use_volatile_load_scalarization: +; X64-SSSE3: # %bb.0: +; X64-SSSE3-NEXT: movl (%rdi), %eax +; X64-SSSE3-NEXT: movdqu (%rdi), %xmm0 +; X64-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-SSSE3-NEXT: psubd %xmm1, %xmm0 +; X64-SSSE3-NEXT: movdqa %xmm0, (%rdi) +; X64-SSSE3-NEXT: retq +; +; X64-AVX-LABEL: multi_use_volatile_load_scalarization: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movl (%rdi), %eax +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovdqa %xmm0, (%rdi) +; X64-AVX-NEXT: retq + %v = load volatile <4 x i32>, <4 x i32>* %p, align 1 + %v1 = add <4 x i32> %v, + store <4 x i32> %v1, <4 x i32>* %p + %r = extractelement <4 x i32> %v, i64 0 + ret i32 %r +} + ; This test is reduced from a C source example that showed a miscompile: ; https://github.com/llvm/llvm-project/issues/53695 ; The scalarized loads from 'zero' in the AVX asm must occur before From fae6bd7563ca54139d6f7f66683c12b88783501f Mon Sep 17 00:00:00 2001 From: Vy Nguyen Date: Fri, 20 May 2022 14:45:26 +0700 Subject: [PATCH 460/908] [lld-macho] Support -non_global_symbols_strip_list, -non_global_symbols_no_strip_list, -x PR/55600 Differential Revision: https://reviews.llvm.org/D126046 --- lld/MachO/Config.h | 10 ++ lld/MachO/Driver.cpp | 79 ++++++++++++--- lld/MachO/Options.td | 3 - lld/MachO/SyntheticSections.cpp | 45 +++++++-- lld/test/MachO/local-symbol-output.s | 141 +++++++++++++++++++++++++++ 5 files changed, 249 insertions(+), 29 deletions(-) create mode 100644 lld/test/MachO/local-symbol-output.s diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 46b3dca0c03ddc..de64dc47591d11 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -94,6 +94,13 @@ class SymbolPatterns { bool match(llvm::StringRef symbolName) const; }; +enum class SymtabPresence { + All, + None, + SelectivelyIncluded, + SelectivelyExcluded, +}; + struct Configuration { Symbol *entry = nullptr; bool hasReexports = false; @@ -179,6 +186,9 @@ struct Configuration { SymbolPatterns unexportedSymbols; SymbolPatterns whyLive; + SymtabPresence localSymbolsPresence = SymtabPresence::All; + SymbolPatterns localSymbolPatterns; + bool zeroModTime = false; llvm::StringRef osoPrefix; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index bfbfb26bb6b549..c04c4fb1899bb2 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -938,26 +938,29 @@ bool SymbolPatterns::match(StringRef symbolName) const { return matchLiteral(symbolName) || matchGlob(symbolName); } +static void handleSymbolPatternsListHelper(const Arg *arg, + SymbolPatterns &symbolPatterns) { + StringRef path = arg->getValue(); + Optional buffer = readFile(path); + if (!buffer) { + error("Could not read symbol file: " + path); + return; + } + MemoryBufferRef mbref = *buffer; + for (StringRef line : args::getLines(mbref)) { + line = line.take_until([](char c) { return c == '#'; }).trim(); + if (!line.empty()) + symbolPatterns.insert(line); + } +} static void handleSymbolPatterns(InputArgList &args, SymbolPatterns &symbolPatterns, unsigned singleOptionCode, unsigned listFileOptionCode) { for (const Arg *arg : args.filtered(singleOptionCode)) symbolPatterns.insert(arg->getValue()); - for (const Arg *arg : args.filtered(listFileOptionCode)) { - StringRef path = arg->getValue(); - Optional buffer = readFile(path); - if (!buffer) { - error("Could not read symbol file: " + path); - continue; - } - MemoryBufferRef mbref = *buffer; - for (StringRef line : args::getLines(mbref)) { - line = line.take_until([](char c) { return c == '#'; }).trim(); - if (!line.empty()) - symbolPatterns.insert(line); - } - } + for (const Arg *arg : args.filtered(listFileOptionCode)) + handleSymbolPatternsListHelper(arg, symbolPatterns); } static void createFiles(const InputArgList &args) { @@ -1406,9 +1409,53 @@ bool macho::link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, ">>> ignoring unexports"); config->unexportedSymbols.clear(); } + + // Imitating LD64's: + // -non_global_symbols_no_strip_list and -non_global_symbols_strip_list can't + // both be present. + // But -x can be used with either of these two, in which case, the last arg + // takes effect. + // (TODO: This is kind of confusing - considering disallowing using them + // together for a more straightforward behaviour) + { + bool includeLocal = false; + bool excludeLocal = false; + for (const Arg *arg : + args.filtered(OPT_x, OPT_non_global_symbols_no_strip_list, + OPT_non_global_symbols_strip_list)) { + switch (arg->getOption().getID()) { + case OPT_x: + config->localSymbolsPresence = SymtabPresence::None; + + break; + case OPT_non_global_symbols_no_strip_list: + if (excludeLocal) { + error("cannot use both -non_global_symbols_no_strip_list and " + "-non_global_symbols_strip_list"); + } else { + includeLocal = true; + config->localSymbolsPresence = SymtabPresence::SelectivelyIncluded; + handleSymbolPatternsListHelper(arg, config->localSymbolPatterns); + } + break; + case OPT_non_global_symbols_strip_list: + if (includeLocal) { + error("cannot use both -non_global_symbols_no_strip_list and " + "-non_global_symbols_strip_list"); + } else { + excludeLocal = true; + config->localSymbolsPresence = SymtabPresence::SelectivelyExcluded; + handleSymbolPatternsListHelper(arg, config->localSymbolPatterns); + } + break; + default: + llvm_unreachable("unexpected option"); + } + } + } // Explicitly-exported literal symbols must be defined, but might - // languish in an archive if unreferenced elsewhere. Light a fire - // under those lazy symbols! + // languish in an archive if unreferenced elsewhere or if they are in the + // non-global strip list. Light a fire under those lazy symbols! for (const CachedHashStringRef &cachedName : config->exportedSymbols.literals) symtab->addUndefined(cachedName.val(), /*file=*/nullptr, /*isWeakRef=*/false); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 5678ae567f935e..21d9dc00b5bb9e 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -581,17 +581,14 @@ def S : Flag<["-"], "S">, Group; def x : Flag<["-"], "x">, HelpText<"Exclude non-global symbols from the output symbol table">, - Flags<[HelpHidden]>, Group; def non_global_symbols_strip_list : Separate<["-"], "non_global_symbols_strip_list">, MetaVarName<"">, HelpText<"Specify in the non-global symbols that should be removed from the output symbol table">, - Flags<[HelpHidden]>, Group; def non_global_symbols_no_strip_list : Separate<["-"], "non_global_symbols_no_strip_list">, MetaVarName<"">, HelpText<"Specify in the non-global symbols that should remain in the output symbol table">, - Flags<[HelpHidden]>, Group; def oso_prefix : Separate<["-"], "oso_prefix">, MetaVarName<"">, diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 61597ccf9ebf58..3a5e9daad72913 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -963,16 +963,41 @@ void SymtabSection::finalizeContents() { symbols.push_back({sym, strx}); }; + std::function localSymbolsHandler; + switch (config->localSymbolsPresence) { + case SymtabPresence::All: + localSymbolsHandler = [&](Symbol *sym) { addSymbol(localSymbols, sym); }; + break; + case SymtabPresence::None: + localSymbolsHandler = [&](Symbol *) { /* Do nothing*/ }; + break; + case SymtabPresence::SelectivelyIncluded: + localSymbolsHandler = [&](Symbol *sym) { + if (config->localSymbolPatterns.match(sym->getName())) + addSymbol(localSymbols, sym); + }; + break; + case SymtabPresence::SelectivelyExcluded: + localSymbolsHandler = [&](Symbol *sym) { + if (!config->localSymbolPatterns.match(sym->getName())) + addSymbol(localSymbols, sym); + }; + break; + } + // Local symbols aren't in the SymbolTable, so we walk the list of object // files to gather them. - for (const InputFile *file : inputFiles) { - if (auto *objFile = dyn_cast(file)) { - for (Symbol *sym : objFile->symbols) { - if (auto *defined = dyn_cast_or_null(sym)) { - if (defined->isExternal() || !defined->isLive() || - !defined->includeInSymtab) - continue; - addSymbol(localSymbols, sym); + // But if `-x` is set, then we don't need to. + if (config->localSymbolsPresence != SymtabPresence::None) { + for (const InputFile *file : inputFiles) { + if (auto *objFile = dyn_cast(file)) { + for (Symbol *sym : objFile->symbols) { + if (auto *defined = dyn_cast_or_null(sym)) { + if (defined->isExternal() || !defined->isLive() || + !defined->includeInSymtab) + continue; + localSymbolsHandler(sym); + } } } } @@ -981,7 +1006,7 @@ void SymtabSection::finalizeContents() { // __dyld_private is a local symbol too. It's linker-created and doesn't // exist in any object file. if (Defined *dyldPrivate = in.stubHelper->dyldPrivate) - addSymbol(localSymbols, dyldPrivate); + localSymbolsHandler(dyldPrivate); for (Symbol *sym : symtab->getSymbols()) { if (!sym->isLive()) @@ -991,7 +1016,7 @@ void SymtabSection::finalizeContents() { continue; assert(defined->isExternal()); if (defined->privateExtern) - addSymbol(localSymbols, defined); + localSymbolsHandler(defined); else addSymbol(externalSymbols, defined); } else if (auto *dysym = dyn_cast(sym)) { diff --git a/lld/test/MachO/local-symbol-output.s b/lld/test/MachO/local-symbol-output.s new file mode 100644 index 00000000000000..3fd9458c00a54b --- /dev/null +++ b/lld/test/MachO/local-symbol-output.s @@ -0,0 +1,141 @@ +# REQUIRES: x86 + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t/main.s -o %t/main.o + +## Check that -non_global_symbols_no_strip_list and -non_global_symbols_strip_list +## can't be used at the same time. +# RUN: not %lld %t/main.o -o /dev/null \ +# RUN: -non_global_symbols_no_strip_list %t/foo.txt \ +# RUN: -non_global_symbols_strip_list %t/foo.txt 2>&1 | \ +# RUN: FileCheck --check-prefix=CONFLICT %s + +# CONFLICT: error: cannot use both -non_global_symbols_no_strip_list and -non_global_symbols_strip_list + +## Check that -x causes none of the local symbols to be emitted. +# RUN: %lld %t/main.o -x -o %t/no_local.out +# RUN: llvm-nm %t/no_local.out | FileCheck --check-prefix NO_LOCAL %s + +# NO_LOCAL-NOT: t _foo +# NO_LOCAL-NOT: t _bar +# NO_LOCAL-NOT: t _baz +# NO_LOCAL: T _main + +## Check that when using -x with -non_global_symbols_no_strip_list, whichever appears +## last in the command line arg list will take precedence. +# RUN: %lld %t/main.o -x -non_global_symbols_no_strip_list %t/foo.txt -o %t/x_then_no_strip.out +# RUN: llvm-nm %t/x_then_no_strip.out | FileCheck --check-prefix X-NO-STRIP %s + +# RUN: %lld %t/main.o -non_global_symbols_no_strip_list %t/foo.txt -x -o %t/no_strip_then_x.out +# RUN: llvm-nm %t/no_strip_then_x.out | FileCheck --check-prefix NO_LOCAL %s + +# X-NO-STRIP-NOT: t _bar +# X-NO-STRIP-DAG: t _foo +# X-NO-STRIP-DAG: T _main + +## Check that -non_global_symbols_no_strip_list can be specified more than once +## (The final no-strip list is the union of all these) +# RUN: %lld %t/main.o -o %t/no_strip_multi.out \ +# RUN: -non_global_symbols_no_strip_list %t/foo.txt \ +# RUN: -non_global_symbols_no_strip_list %t/bar.txt +# RUN: llvm-nm %t/no_strip_multi.out | FileCheck --check-prefix NO-STRIP-MULTI %s + +# NO-STRIP-MULTI-NOT: t _baz +# NO-STRIP-MULTI-DAG: t _foo +# NO-STRIP-MULTI-DAG: t _bar +# NO-STRIP-MULTI-DAG: T _main + +## Check that when using -x with -non_global_symbols_strip_list, whichever appears +## last in the command line arg list will take precedence. +# RUN: %lld %t/main.o -x -non_global_symbols_strip_list %t/foo.txt -o %t/x_then_strip.out +# RUN: llvm-nm %t/x_then_strip.out | FileCheck --check-prefix X-STRIP %s + +# RUN: %lld %t/main.o -non_global_symbols_strip_list %t/foo.txt -x -o %t/strip_then_x.out +# RUN: llvm-nm %t/no_strip_then_x.out | FileCheck --check-prefix NO_LOCAL %s + +# X-STRIP-NOT: t _foo +# X-STRIP-DAG: t _bar +# X-STRIP-DAG: t _baz +# X-STRIP-DAG: T _main + +## Check that -non_global_symbols_strip_list can be specified more than once +## (The final strip list is the union of all these) +# RUN: %lld %t/main.o -o %t/strip_multi.out \ +# RUN: -non_global_symbols_strip_list %t/foo.txt \ +# RUN: -non_global_symbols_strip_list %t/bar.txt +# RUN: llvm-nm %t/strip_multi.out | FileCheck --check-prefix STRIP-MULTI %s + +# STRIP-MULTI-NOT: t _foo +# STRIP-MULTI-NOT: t _bar +# STRIP-MULTI-DAG: t _baz +# STRIP-MULTI-DAG: T _main + +## Test interactions with exported_symbol. +# RUN: %lld %t/main.o -o %t/strip_all_export_one.out \ +# RUN: -x -exported_symbol _foo \ +# RUN: -undefined dynamic_lookup +# RUN: llvm-nm %t/strip_all_export_one.out | FileCheck --check-prefix STRIP-EXP %s + +# STRIP-EXP: U _foo +# STRIP-EXP-EMPTY: + +## Test interactions of -x and -non_global_symbols_strip_list with unexported_symbol. +# RUN: %lld %t/main.o -o %t/strip_x_unexport_one.out \ +# RUN: -x -unexported_symbol _globby \ +# RUN: -undefined dynamic_lookup + +# RUN: %lld %t/main.o -o %t/strip_all_unexport_one.out \ +# RUN: -non_global_symbols_strip_list %t/globby.txt \ +# RUN: -non_global_symbols_strip_list %t/foo.txt \ +# RUN: -non_global_symbols_strip_list %t/bar.txt \ +# RUN: -unexported_symbol _globby \ +# RUN: -undefined dynamic_lookup + +# RUN: llvm-nm %t/strip_x_unexport_one.out | FileCheck --check-prefix STRIP-UNEXP %s +# RUN: llvm-nm %t/strip_all_unexport_one.out | FileCheck --check-prefix STRIP-UNEXP %s + +## -unexported_symbol made _globby a local, therefore it should be stripped by -x too +# STRIP-UNEXP: T __mh_execute_header +# STRIP-UNEXP-DAG: T _main +# STRIP-UNEXP-EMPTY: + +## Test interactions of -non_global_symbols_strip_list and unexported_symbol. +# RUN: %lld %t/main.o -undefined dynamic_lookup -o %t/no_strip_unexport.out \ +# RUN: -non_global_symbols_no_strip_list %t/globby.txt \ +# RUN: -unexported_symbol _globby + +# RUN: llvm-nm %t/no_strip_unexport.out | FileCheck --check-prefix NOSTRIP-UNEXP %s + +# NOSTRIP-UNEXP: T __mh_execute_header +# NOSTRIP-UNEXP-DAG: T _main +# NOSTRIP-UNEXP-DAG: t _globby +# NOSTRIP-UNEXP-EMPTY: + +#--- foo.txt +_foo + +#--- bar.txt +_bar + +#--- globby.txt +_globby + +#--- main.s +.globl _main +.globl _globby + +_foo: + ret + +_bar: + ret + +_baz: + ret + +_main: + callq _foo + ret + + _globby: + ret \ No newline at end of file From 58b76492c1fea6b6c7ce2b3e7b9fb0d5ee00fe9d Mon Sep 17 00:00:00 2001 From: zhongyunde Date: Wed, 25 May 2022 20:37:32 +0800 Subject: [PATCH 461/908] [tests] precommit tests for D126040 Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D126356 --- .../Transforms/InstCombine/mul-masked-bits.ll | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll index ead179f855e0c5..553788561f759e 100644 --- a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll +++ b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll @@ -163,3 +163,60 @@ define i33 @squared_demanded_3_low_bits(i33 %x) { %and = and i33 %mul, 7 ret i33 %and } + +; Instcombine should be able to simplify mul operator. + +; Scalar tests +define i64 @scalar_mul_bit_x0_y0(i64 %x, i64 %y) { +; CHECK-LABEL: @scalar_mul_bit_x0_y0( +; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1 +; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], 1 +; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[AND1]], [[AND2]] +; CHECK-NEXT: ret i64 [[MUL]] +; + %and1 = and i64 %x, 1 + %and2 = and i64 %y, 1 + %mul = mul i64 %and1, %and2 + ret i64 %mul +} + +; Negative test +define i64 @scalar_mul_bit_x0_y1(i64 %x, i64 %y) { +; CHECK-LABEL: @scalar_mul_bit_x0_y1( +; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1 +; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], 2 +; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[AND1]], [[AND2]] +; CHECK-NEXT: ret i64 [[MUL]] +; + %and1 = and i64 %x, 1 + %and2 = and i64 %y, 2 + %mul = mul i64 %and1, %and2 + ret i64 %mul +} + +define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) { +; CHECK-LABEL: @scalar_mul_bit_x0_yC( +; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1 +; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[AND1]], [[AND2]] +; CHECK-NEXT: ret i64 [[MUL]] +; + %and1 = and i64 %x, 1 + %and2 = and i64 %y, %c + %mul = mul i64 %and1, %and2 + ret i64 %mul +} + +; Vector tests +define <2 x i64> @vector_mul_bit_x0_y0(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @vector_mul_bit_x0_y0( +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw <2 x i64> [[AND1]], [[AND2]] +; CHECK-NEXT: ret <2 x i64> [[MUL]] +; + %and1 = and <2 x i64> %x, + %and2 = and <2 x i64> %y, + %mul = mul <2 x i64> %and1, %and2 + ret <2 x i64> %mul +} From b3c856d10ceeb0b6c8f2a44838bbad187cbfaa65 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Wed, 25 May 2022 13:57:52 +0100 Subject: [PATCH 462/908] [MLIR][NFC] Fix the Conversion/MemRefToSPIRV/alloc.mlir test. Caught with D125604. Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D126292 --- mlir/test/Conversion/MemRefToSPIRV/alloc.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Conversion/MemRefToSPIRV/alloc.mlir b/mlir/test/Conversion/MemRefToSPIRV/alloc.mlir index 434eec1aaf30ce..498b0f99776479 100644 --- a/mlir/test/Conversion/MemRefToSPIRV/alloc.mlir +++ b/mlir/test/Conversion/MemRefToSPIRV/alloc.mlir @@ -41,7 +41,7 @@ module attributes { // CHECK: spv.GlobalVariable @__workgroup_mem__{{[0-9]+}} // CHECK-SAME: !spv.ptr)>, Workgroup> -// CHECK_LABEL: spv.func @alloc_dealloc_workgroup_mem +// CHECK: func @alloc_dealloc_workgroup_mem // CHECK: %[[VAR:.+]] = spv.mlir.addressof @__workgroup_mem__0 // CHECK: %[[LOC:.+]] = spv.SDiv // CHECK: %[[PTR:.+]] = spv.AccessChain %[[VAR]][%{{.+}}, %[[LOC]]] From 788463e72af847d11476793fdd5d57a104b18b3d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 25 May 2022 08:45:41 -0400 Subject: [PATCH 463/908] [pseudo-gen] Add -o flag, make --grammar required Virtually all LLVM tools accept a `-o` flag, so add one. This will make it possible to possibly add a --write-if-changed flag later. It also makes it so that the file isn't partially written if the tool oesn't run successfully. Marking --grammar as `Required` allows removing some manual verification code for it. Differential Revision: https://reviews.llvm.org/D126373 --- clang-tools-extra/pseudo/gen/Main.cpp | 33 +++++++++++++------ .../pseudo/include/CMakeLists.txt | 4 +-- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp index 535f863268df13..47ba7f2e71b542 100644 --- a/clang-tools-extra/pseudo/gen/Main.cpp +++ b/clang-tools-extra/pseudo/gen/Main.cpp @@ -15,13 +15,17 @@ #include "clang-pseudo/Grammar.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/ToolOutputFile.h" #include using llvm::cl::desc; using llvm::cl::init; using llvm::cl::opt; +using llvm::cl::Required; +using llvm::cl::value_desc; using llvm::cl::values; namespace { @@ -31,13 +35,17 @@ enum EmitType { }; opt Grammar("grammar", desc("Parse a BNF grammar file."), - init("")); + Required); opt Emit(desc("which information to emit:"), values(clEnumValN(EmitSymbolList, "emit-symbol-list", "Print nonterminal symbols (default)"), clEnumValN(EmitGrammarContent, "emit-grammar-content", "Print the BNF grammar content as a string"))); + +opt OutputFilename("o", init("-"), desc("Output"), + value_desc("file")); + std::string readOrDie(llvm::StringRef Path) { llvm::ErrorOr> Text = llvm::MemoryBuffer::getFile(Path); @@ -52,10 +60,6 @@ std::string readOrDie(llvm::StringRef Path) { int main(int argc, char *argv[]) { llvm::cl::ParseCommandLineOptions(argc, argv, ""); - if (!Grammar.getNumOccurrences()) { - llvm::errs() << "Grammar file must be provided!\n"; - return 1; - } std::string GrammarText = readOrDie(Grammar); std::vector Diags; @@ -65,25 +69,34 @@ int main(int argc, char *argv[]) { llvm::errs() << llvm::join(Diags, "\n"); return 1; } - switch (Emit) { + std::error_code EC; + llvm::ToolOutputFile Out{OutputFilename, EC, llvm::sys::fs::OF_None}; + if (EC) { + llvm::errs() << EC.message() << '\n'; + return 1; + } + + switch (Emit) { case EmitSymbolList: for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size(); ++ID) { std::string Name = G->symbolName(ID).str(); // translation-unit -> translation_unit std::replace(Name.begin(), Name.end(), '-', '_'); - llvm::outs() << (llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID)); + Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID); } break; case EmitGrammarContent: for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) { - llvm::outs() << '"'; - llvm::outs().write_escaped((Line + "\n").str()); - llvm::outs() << "\"\n"; + Out.os() << '"'; + Out.os().write_escaped((Line + "\n").str()); + Out.os() << "\"\n"; } break; } + Out.keep(); + return 0; } diff --git a/clang-tools-extra/pseudo/include/CMakeLists.txt b/clang-tools-extra/pseudo/include/CMakeLists.txt index e2a6f0efc0a32c..a79dd0dab81840 100644 --- a/clang-tools-extra/pseudo/include/CMakeLists.txt +++ b/clang-tools-extra/pseudo/include/CMakeLists.txt @@ -7,7 +7,7 @@ add_custom_command(OUTPUT ${cxx_symbols_inc} COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" --grammar ${cxx_bnf} --emit-symbol-list - > ${cxx_symbols_inc} + -o ${cxx_symbols_inc} COMMENT "Generating nonterminal symbol file for cxx grammar..." DEPENDS pseudo-gen VERBATIM) @@ -17,7 +17,7 @@ add_custom_command(OUTPUT ${cxx_bnf_inc} COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" --grammar ${cxx_bnf} --emit-grammar-content - > ${cxx_bnf_inc} + -o ${cxx_bnf_inc} COMMENT "Generating bnf string file for cxx grammar..." DEPENDS pseudo-gen VERBATIM) From c673d67bc76bfa31b78e3eee23899b9f262158ea Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 25 May 2022 14:58:28 +0200 Subject: [PATCH 464/908] [AST] Dont invalidate a ref-type var decl if it has no initializer. This would allow more AST nodes being preserved for broken code, and have a more consistent valid bit for ref-type var decl (currently, a ref-type var decl with a broken initializer is valid). Per https://reviews.llvm.org/D76831#1973053, the initializer of a variable should play no part in its "invalid" bit. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D122935 --- clang/lib/Sema/SemaDecl.cpp | 1 - clang/test/AST/ast-dump-invalid-initialized.cpp | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index b150a88e0aa21a..c5d4075a49e2f7 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -13199,7 +13199,6 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { if (Type->isReferenceType()) { Diag(Var->getLocation(), diag::err_reference_var_requires_init) << Var << SourceRange(Var->getLocation(), Var->getLocation()); - Var->setInvalidDecl(); return; } diff --git a/clang/test/AST/ast-dump-invalid-initialized.cpp b/clang/test/AST/ast-dump-invalid-initialized.cpp index 1b3c8581a94fbd..1c374ae716a9db 100644 --- a/clang/test/AST/ast-dump-invalid-initialized.cpp +++ b/clang/test/AST/ast-dump-invalid-initialized.cpp @@ -12,10 +12,10 @@ void test() { const A a2; // CHECK: `-VarDecl {{.*}} a3 'A' A a3 = garbage(); + // CHECK: `-VarDecl {{.*}} a4 'const A &' + const A& a4; - // CHECK: `-VarDecl {{.*}} invalid b1 'const A &' - const A& b1; // CHECK: `-VarDecl {{.*}} invalid b2 'ForwardDecl' ForwardDecl b2; // CHECK: `-VarDecl {{.*}} invalid b3 'auto' From a17fc7fd865096f4be6b8b4a7e6e0834aae6c048 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 25 May 2022 15:25:03 +0200 Subject: [PATCH 465/908] Fix unused-variable warning, NFC. --- clang/tools/libclang/CIndex.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index cf31524bbc4352..7bc9453793298e 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -1511,8 +1511,7 @@ bool CursorVisitor::VisitTemplateParameters( } if (const auto *E = Params->getRequiresClause()) { - if (Visit(MakeCXCursor(Params->getRequiresClause(), nullptr, TU, - RegionOfInterest))) + if (Visit(MakeCXCursor(E, nullptr, TU, RegionOfInterest))) return true; } From d8afac7a2b736b367cc817242b0c4a13a3ff0051 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 24 May 2022 11:14:23 -0400 Subject: [PATCH 466/908] [libc++] Use Python subprocess instead of libc++'s own utilities Once we move off entirely from the legacy testing framework, this will allow removing a bunch of code. Differential Revision: https://reviews.llvm.org/D126303 --- libcxx/utils/libcxx/sym_check/extract.py | 12 ++++-------- libcxx/utils/libcxx/sym_check/util.py | 13 ++++++------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/libcxx/utils/libcxx/sym_check/extract.py b/libcxx/utils/libcxx/sym_check/extract.py index 07299891792e31..13769ec5527458 100644 --- a/libcxx/utils/libcxx/sym_check/extract.py +++ b/libcxx/utils/libcxx/sym_check/extract.py @@ -11,10 +11,10 @@ """ import distutils.spawn import os.path -import sys import re +import subprocess +import sys -import libcxx.util from libcxx.sym_check import util extract_ignore_names = ['_init', '_fini'] @@ -51,9 +51,7 @@ def extract(self, lib): parsed symbols. """ cmd = [self.nm_exe] + self.flags + [lib] - out, _, exit_code = libcxx.util.executeCommandVerbose(cmd) - if exit_code != 0: - raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib)) + out = subprocess.check_output(cmd).decode() fmt_syms = (self._extract_sym(l) for l in out.splitlines() if l.strip()) # Cast symbol to string. @@ -139,9 +137,7 @@ def extract(self, lib): parsed symbols. """ cmd = [self.tool] + self.flags + [lib] - out, _, exit_code = libcxx.util.executeCommandVerbose(cmd) - if exit_code != 0: - raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib)) + out = subprocess.check_output(cmd).decode() dyn_syms = self.get_dynsym_table(out) return self.process_syms(dyn_syms) diff --git a/libcxx/utils/libcxx/sym_check/util.py b/libcxx/utils/libcxx/sym_check/util.py index 13f09d4b214f41..41c6439eca77ea 100644 --- a/libcxx/utils/libcxx/sym_check/util.py +++ b/libcxx/utils/libcxx/sym_check/util.py @@ -6,12 +6,12 @@ # #===----------------------------------------------------------------------===## +from pprint import pformat import ast import distutils.spawn -import sys import re -import libcxx.util -from pprint import pformat +import subprocess +import sys def read_syms_from_list(slist): @@ -66,11 +66,10 @@ def write_syms(sym_list, out=None, names_only=False, filter=None): def demangle_symbol(symbol): if _cppfilt_exe is None: return symbol - out, _, exit_code = libcxx.util.executeCommandVerbose( - [_cppfilt_exe], input=symbol) - if exit_code != 0: + result = subprocess.run([_cppfilt_exe], input=symbol.encode(), capture_output=True) + if result.returncode != 0: return symbol - return out + return result.stdout.decode() def is_elf(filename): From d8dda57ae7798a052e52ef5979980e815d2e4bc5 Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Wed, 25 May 2022 06:46:12 -0700 Subject: [PATCH 467/908] [PS5] Default to -fno-rtti --- clang/lib/Driver/ToolChain.cpp | 4 ++-- clang/test/Driver/rtti-options.cpp | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index efbf2442acfc49..eb3117e689d8cc 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -67,8 +67,8 @@ static ToolChain::RTTIMode CalculateRTTIMode(const ArgList &Args, return ToolChain::RM_Disabled; } - // -frtti is default, except for the PS4 and DriverKit. - bool NoRTTI = Triple.isPS4() || Triple.isDriverKit(); + // -frtti is default, except for the PS4/PS5 and DriverKit. + bool NoRTTI = Triple.isPS() || Triple.isDriverKit(); return NoRTTI ? ToolChain::RM_Disabled : ToolChain::RM_Enabled; } diff --git a/clang/test/Driver/rtti-options.cpp b/clang/test/Driver/rtti-options.cpp index e1d37d35bb6871..5a8a9fe43c74d0 100644 --- a/clang/test/Driver/rtti-options.cpp +++ b/clang/test/Driver/rtti-options.cpp @@ -22,6 +22,7 @@ // RUN: %clang -### -c -target x86_64-unknown-linux -fsanitize=undefined %s 2>&1 | FileCheck -check-prefix=CHECK-OK %s // RUN: %clang -### -c -target x86_64-unknown-linux -fsanitize=undefined -frtti %s 2>&1 | FileCheck -check-prefix=CHECK-OK %s // RUN: %clang -### -c -target x86_64-scei-ps4 -fsanitize=vptr %s 2>&1 | FileCheck -check-prefix=CHECK-SAN-WARN %s +// RUN: %clang -### -c -target x86_64-sie-ps5 -fsanitize=vptr %s 2>&1 | FileCheck -check-prefix=CHECK-SAN-WARN %s // Exceptions + no/default rtti // RUN: %clang -### -c -target x86_64-unknown-unknown -fcxx-exceptions -fno-rtti %s 2>&1 | FileCheck -check-prefix=CHECK-OK %s @@ -35,6 +36,7 @@ // -f{no-,}rtti/default // RUN: %clang -### -c -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RTTI %s +// RUN: %clang -### -c -target x86_64-sie-ps5 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RTTI %s // RUN: %clang -### -c -target x86_64-unknown-unknown -frtti %s 2>&1 | FileCheck -check-prefix=CHECK-RTTI %s // RUN: %clang -### -c -target x86_64-unknown-unknown -fno-rtti %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RTTI %s // RUN: %clang -### -c -target x86_64-unknown-unknown %s 2>&1 | FileCheck -check-prefix=CHECK-RTTI %s From 6e12711081d76fea8dc2f4d9579cfdf379497bbf Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Wed, 25 May 2022 15:08:31 +0200 Subject: [PATCH 468/908] [VP][fix] Don't discard masks in reductions When expanding VP reductions to non VP-code, the reduction pass was ignoring the mask before. Fix this by keeping the mask and selecting neutral elements where the mask is zero. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D126362 --- llvm/lib/CodeGen/ExpandVectorPredication.cpp | 23 +++-- llvm/test/CodeGen/Generic/expand-vp.ll | 96 ++++++++++++-------- 2 files changed, 72 insertions(+), 47 deletions(-) diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 088e683efdbe96..aa52914f25b054 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -113,6 +113,17 @@ static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { OldOp.eraseFromParent(); } +static bool maySpeculateLanes(VPIntrinsic &VPI) { + // The result of VP reductions depends on the mask and evl. + if (isa(VPI)) + return false; + // Fallback to whether the intrinsic is speculatable. + // FIXME: Check whether the replacing non-VP code will be speculatable + // instead. VP intrinsics themselves are never speculatable because of + // UB if %evl is greater than the runtime vector length. + return isSafeToSpeculativelyExecute(cast(&VPI)); +} + //// } Helpers namespace { @@ -216,8 +227,7 @@ Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, Value * CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, VPIntrinsic &VPI) { - assert((isSafeToSpeculativelyExecute(&VPI) || - VPI.canIgnoreVectorLengthParam()) && + assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && "Implicitly dropping %evl in non-speculatable operator!"); auto OC = static_cast(*VPI.getFunctionalOpcode()); @@ -296,8 +306,7 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, Value * CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, VPReductionIntrinsic &VPI) { - assert((isSafeToSpeculativelyExecute(&VPI) || - VPI.canIgnoreVectorLengthParam()) && + assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && "Implicitly dropping %evl in non-speculatable operator!"); Value *Mask = VPI.getMaskParam(); @@ -471,9 +480,9 @@ struct TransformJob { bool isDone() const { return Strategy.shouldDoNothing(); } }; -void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) { +void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { // Speculatable instructions do not strictly need predication. - if (isSafeToSpeculativelyExecute(&I)) { + if (maySpeculateLanes(VPI)) { // Converting a speculatable VP intrinsic means dropping %mask and %evl. // No need to expand %evl into the %mask only to ignore that code. if (LegalizeStrat.OpStrategy == VPLegalization::Convert) @@ -518,7 +527,7 @@ bool CachingVPExpander::expandVectorPredication() { if (!VPI) continue; auto VPStrat = getVPLegalizationStrategy(*VPI); - sanitizeStrategy(I, VPStrat); + sanitizeStrategy(*VPI, VPStrat); if (!VPStrat.shouldDoNothing()) Worklist.emplace_back(VPI, VPStrat); } diff --git a/llvm/test/CodeGen/Generic/expand-vp.ll b/llvm/test/CodeGen/Generic/expand-vp.ll index f4dfbdd2e16c56..6c8112ea60e234 100644 --- a/llvm/test/CodeGen/Generic/expand-vp.ll +++ b/llvm/test/CodeGen/Generic/expand-vp.ll @@ -166,62 +166,70 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n ; Check that reductions use the correct neutral element for masked-off elements ; ALL-CONVERT: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) { -; ALL-CONVERT-NEXT: [[ADD:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer +; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer +; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] +; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m +; ALL-CONVERT-NEXT: [[ADD:%.+]] = select <4 x i1> [[NEWM]], <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[ADD]]) ; ALL-CONVERT-NEXT: %{{.+}} = add i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[MUL:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[MUL:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[MUL]]) ; ALL-CONVERT-NEXT: %{{.+}} = mul i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[AND:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[AND:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[AND]]) ; ALL-CONVERT-NEXT: %{{.+}} = and i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[OR:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer +; ALL-CONVERT: [[OR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[OR]]) ; ALL-CONVERT-NEXT: %{{.+}} = or i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[XOR:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer +; ALL-CONVERT: [[XOR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[XOR]]) ; ALL-CONVERT-NEXT: %{{.+}} = xor i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[SMIN:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[SMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smin.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT-NEXT: [[SMAX:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[SMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smax.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT-NEXT: [[UMIN:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[UMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[UMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umin.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT-NEXT: [[UMAX:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer +; ALL-CONVERT: [[UMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[UMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umax.i32(i32 [[RED]], i32 %start) ; ALL-CONVERT-NEXT: ret void ; Check that reductions use the correct neutral element for masked-off elements ; ALL-CONVERT: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) { -; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer +; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] +; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m +; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> [[NEWM]], <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMIN_NNAN:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMIN_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMAX:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMAX_NNAN:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FADD:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]]) -; ALL-CONVERT-NEXT: [[FADD:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]]) -; ALL-CONVERT-NEXT: [[FMUL:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]]) -; ALL-CONVERT-NEXT: [[FMUL:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]]) ; ALL-CONVERT-NEXT: ret void @@ -332,29 +340,37 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n ; DISCARD_LEGAL: ret void ; DISCARD_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) { -; DISCARD_LEGAL-NEXT: %r0 = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r1 = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r2 = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r3 = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r4 = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: ret void +; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer +; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] +; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m +; DISCARD_LEGAL-NEXT: %r0 = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> [[NEWMASK]], i32 4) +; DISCARD_LEGAL-NOT: %r1 = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r2 = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r3 = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r4 = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL: ret void ; DISCARD_LEGAL: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) { -; DISCARD_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r6 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r7 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r8 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: ret void +; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer +; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] +; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m +; DISCARD_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> [[NEWMASK]], i32 4) +; DISCARD_LEGAL-NOT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r6 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r7 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r8 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL: ret void ; Convert %evl into %mask everywhere (%evl Convert, %mask Legal) ; From 8383351d45179ecb544f859919dbc798ac9a91ea Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 25 May 2022 10:04:47 -0400 Subject: [PATCH 469/908] [libc++] Remove conditional include --- libcxx/include/barrier | 6 ++---- libcxx/src/barrier.cpp | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/libcxx/include/barrier b/libcxx/include/barrier index f03c089a14b96c..9d91d255df9a06 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -51,9 +51,7 @@ namespace std #include <__thread/timed_backoff_policy.h> #include #include -#ifndef _LIBCPP_HAS_NO_TREE_BARRIER -# include -#endif +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -280,7 +278,7 @@ public: } }; -#endif //_LIBCPP_HAS_NO_TREE_BARRIER +#endif // !_LIBCPP_HAS_NO_TREE_BARRIER template class barrier { diff --git a/libcxx/src/barrier.cpp b/libcxx/src/barrier.cpp index 54890b2d797ef3..8ce2c043cf81db 100644 --- a/libcxx/src/barrier.cpp +++ b/libcxx/src/barrier.cpp @@ -90,7 +90,7 @@ void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) delete __barrier; } -#endif //!defined(_LIBCPP_HAS_NO_TREE_BARRIER) +#endif // !defined(_LIBCPP_HAS_NO_TREE_BARRIER) _LIBCPP_END_NAMESPACE_STD From 3c057ac2c28446ded80c3e91794c15d0c40a2d78 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Wed, 25 May 2022 19:32:21 +0530 Subject: [PATCH 470/908] [MLIR][Presburger] Add getDomainSet, getRangeSet to IntegerRelation This patch adds support for obtaining a set corresponding to the domain/range of the relation. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D126326 --- .../Analysis/Presburger/IntegerRelation.h | 24 +++++++++++ .../Analysis/Presburger/IntegerRelation.cpp | 27 ++++++++++++ .../Analysis/Presburger/CMakeLists.txt | 1 + .../Presburger/IntegerRelationTest.cpp | 43 +++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h index b6f127784b18b3..1eca9efd39fbf0 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h @@ -24,6 +24,9 @@ namespace mlir { namespace presburger { +class IntegerRelation; +class IntegerPolyhedron; + /// An IntegerRelation represents the set of points from a PresburgerSpace that /// satisfy a list of affine constraints. Affine constraints can be inequalities /// or equalities in the form: @@ -496,6 +499,12 @@ class IntegerRelation { space.setDimSymbolSeparation(newSymbolCount); } + /// Return a set corresponding to all points in the domain of the relation. + IntegerPolyhedron getDomainSet() const; + + /// Return a set corresponding to all points in the range of the relation. + IntegerPolyhedron getRangeSet() const; + void print(raw_ostream &os) const; void dump() const; @@ -643,6 +652,21 @@ class IntegerPolyhedron : public IntegerRelation { /*numReservedEqualities=*/0, /*numReservedCols=*/space.getNumIds() + 1, space) {} + /// Construct a set from an IntegerRelation. The relation should have + /// no domain ids. + explicit IntegerPolyhedron(const IntegerRelation &rel) + : IntegerRelation(rel) { + assert(space.getNumDomainIds() == 0 && + "Number of domain id's should be zero in Set kind space."); + } + + /// Construct a set from an IntegerRelation, but instead of creating a copy, + /// use move constructor. The relation should have no domain ids. + explicit IntegerPolyhedron(IntegerRelation &&rel) : IntegerRelation(rel) { + assert(space.getNumDomainIds() == 0 && + "Number of domain id's should be zero in Set kind space."); + } + /// Return a system with no constraints, i.e., one which is satisfied by all /// points. static IntegerPolyhedron getUniverse(const PresburgerSpace &space) { diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp index 73ae0ebd9873e3..4df84aaad906aa 100644 --- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp @@ -2075,6 +2075,33 @@ void IntegerRelation::removeIndependentConstraints(unsigned pos, unsigned num) { removeEquality(nbIndex); } +IntegerPolyhedron IntegerRelation::getDomainSet() const { + IntegerRelation copyRel = *this; + + // Convert Range variables to Local variables. + copyRel.convertIdKind(IdKind::Range, 0, getNumIdKind(IdKind::Range), + IdKind::Local); + + // Convert Domain variables to SetDim(Range) variables. + copyRel.convertIdKind(IdKind::Domain, 0, getNumIdKind(IdKind::Domain), + IdKind::SetDim); + + return IntegerPolyhedron(std::move(copyRel)); +} + +IntegerPolyhedron IntegerRelation::getRangeSet() const { + IntegerRelation copyRel = *this; + + // Convert Domain variables to Local variables. + copyRel.convertIdKind(IdKind::Domain, 0, getNumIdKind(IdKind::Domain), + IdKind::Local); + + // We do not need to do anything to Range variables since they are already in + // SetDim position. + + return IntegerPolyhedron(std::move(copyRel)); +} + void IntegerRelation::printSpace(raw_ostream &os) const { space.print(os); os << getNumConstraints() << " constraints\n"; diff --git a/mlir/unittests/Analysis/Presburger/CMakeLists.txt b/mlir/unittests/Analysis/Presburger/CMakeLists.txt index 11ab72d8c1f885..bf3ba08a8881ca 100644 --- a/mlir/unittests/Analysis/Presburger/CMakeLists.txt +++ b/mlir/unittests/Analysis/Presburger/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_unittest(MLIRPresburgerTests IntegerPolyhedronTest.cpp + IntegerRelationTest.cpp LinearTransformTest.cpp MatrixTest.cpp PresburgerSetTest.cpp diff --git a/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp new file mode 100644 index 00000000000000..06dbee01896bf0 --- /dev/null +++ b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp @@ -0,0 +1,43 @@ +//===- IntegerRelationTest.cpp - Tests for IntegerRelation class ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/Presburger/IntegerRelation.h" +#include "./Utils.h" + +#include +#include + +using namespace mlir; +using namespace presburger; + +static IntegerRelation parseRelationFromSet(StringRef set, unsigned numDomain) { + IntegerRelation rel = parsePoly(set); + + rel.convertIdKind(IdKind::SetDim, 0, numDomain, IdKind::Domain); + + return rel; +} + +TEST(IntegerRelationTest, getDomainAndRangeSet) { + IntegerRelation rel = parseRelationFromSet( + "(x, xr)[N] : (xr - x - 10 == 0, xr >= 0, N - xr >= 0)", 1); + + IntegerPolyhedron domainSet = rel.getDomainSet(); + + IntegerPolyhedron expectedDomainSet = + parsePoly("(x)[N] : (x + 10 >= 0, N - x - 10 >= 0)"); + + EXPECT_TRUE(domainSet.isEqual(expectedDomainSet)); + + IntegerPolyhedron rangeSet = rel.getRangeSet(); + + IntegerPolyhedron expectedRangeSet = + parsePoly("(x)[N] : (x >= 0, N - x >= 0)"); + + EXPECT_TRUE(rangeSet.isEqual(expectedRangeSet)); +} From 6346a026af7984f3b20d0c0e0e767ef83fc93027 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 25 May 2022 16:05:11 +0200 Subject: [PATCH 471/908] [PhaseOrdering] Add test for unprofitable loop load PRE backedge splitting (NFC) --- .../X86/vector-reduction-known-first-value.ll | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll new file mode 100644 index 00000000000000..508a501d9e847f --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -O3 < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @use(i8) + +define i16 @test(ptr %ptr) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FIRST:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: tail call void @use(i8 [[FIRST]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[VAL_EXT2:%.*]] = zext i8 [[FIRST]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[VAL_EXT2]], i64 0 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ [[TMP0]], [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <8 x i16> [ zeroinitializer, [[ENTRY]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD6]] to <8 x i16> +; CHECK-NEXT: [[TMP5]] = add <8 x i16> [[VEC_PHI]], [[TMP3]] +; CHECK-NEXT: [[TMP6]] = add <8 x i16> [[VEC_PHI5]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1008 +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: loop.loop_crit_edge: +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i16> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[BIN_RDX]]) +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1009 +; CHECK-NEXT: [[VAL_PRE:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT]], align 1 +; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i8 [[VAL_PRE]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT:%.*]] = add i16 [[TMP8]], [[VAL_EXT]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1010 +; CHECK-NEXT: [[VAL_PRE_1:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_1]], align 1 +; CHECK-NEXT: [[VAL_EXT_1:%.*]] = zext i8 [[VAL_PRE_1]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_1:%.*]] = add i16 [[ACCUM_NEXT]], [[VAL_EXT_1]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1011 +; CHECK-NEXT: [[VAL_PRE_2:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_2]], align 1 +; CHECK-NEXT: [[VAL_EXT_2:%.*]] = zext i8 [[VAL_PRE_2]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_2:%.*]] = add i16 [[ACCUM_NEXT_1]], [[VAL_EXT_2]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1012 +; CHECK-NEXT: [[VAL_PRE_3:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_3]], align 1 +; CHECK-NEXT: [[VAL_EXT_3:%.*]] = zext i8 [[VAL_PRE_3]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_3:%.*]] = add i16 [[ACCUM_NEXT_2]], [[VAL_EXT_3]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1013 +; CHECK-NEXT: [[VAL_PRE_4:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_4]], align 1 +; CHECK-NEXT: [[VAL_EXT_4:%.*]] = zext i8 [[VAL_PRE_4]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_4:%.*]] = add i16 [[ACCUM_NEXT_3]], [[VAL_EXT_4]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1014 +; CHECK-NEXT: [[VAL_PRE_5:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_5]], align 1 +; CHECK-NEXT: [[VAL_EXT_5:%.*]] = zext i8 [[VAL_PRE_5]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_5:%.*]] = add i16 [[ACCUM_NEXT_4]], [[VAL_EXT_5]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1015 +; CHECK-NEXT: [[VAL_PRE_6:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_6]], align 1 +; CHECK-NEXT: [[VAL_EXT_6:%.*]] = zext i8 [[VAL_PRE_6]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_6:%.*]] = add i16 [[ACCUM_NEXT_5]], [[VAL_EXT_6]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1016 +; CHECK-NEXT: [[VAL_PRE_7:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_7]], align 1 +; CHECK-NEXT: [[VAL_EXT_7:%.*]] = zext i8 [[VAL_PRE_7]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_7:%.*]] = add i16 [[ACCUM_NEXT_6]], [[VAL_EXT_7]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_8:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1017 +; CHECK-NEXT: [[VAL_PRE_8:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_8]], align 1 +; CHECK-NEXT: [[VAL_EXT_8:%.*]] = zext i8 [[VAL_PRE_8]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_8:%.*]] = add i16 [[ACCUM_NEXT_7]], [[VAL_EXT_8]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_9:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1018 +; CHECK-NEXT: [[VAL_PRE_9:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_9]], align 1 +; CHECK-NEXT: [[VAL_EXT_9:%.*]] = zext i8 [[VAL_PRE_9]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_9:%.*]] = add i16 [[ACCUM_NEXT_8]], [[VAL_EXT_9]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_10:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1019 +; CHECK-NEXT: [[VAL_PRE_10:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_10]], align 1 +; CHECK-NEXT: [[VAL_EXT_10:%.*]] = zext i8 [[VAL_PRE_10]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_10:%.*]] = add i16 [[ACCUM_NEXT_9]], [[VAL_EXT_10]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1020 +; CHECK-NEXT: [[VAL_PRE_11:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_11]], align 1 +; CHECK-NEXT: [[VAL_EXT_11:%.*]] = zext i8 [[VAL_PRE_11]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_11:%.*]] = add i16 [[ACCUM_NEXT_10]], [[VAL_EXT_11]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_12:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1021 +; CHECK-NEXT: [[VAL_PRE_12:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_12]], align 1 +; CHECK-NEXT: [[VAL_EXT_12:%.*]] = zext i8 [[VAL_PRE_12]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_12:%.*]] = add i16 [[ACCUM_NEXT_11]], [[VAL_EXT_12]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_13:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1022 +; CHECK-NEXT: [[VAL_PRE_13:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_13]], align 1 +; CHECK-NEXT: [[VAL_EXT_13:%.*]] = zext i8 [[VAL_PRE_13]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_13:%.*]] = add i16 [[ACCUM_NEXT_12]], [[VAL_EXT_13]] +; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_14:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1023 +; CHECK-NEXT: [[VAL_PRE_14:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_14]], align 1 +; CHECK-NEXT: [[VAL_EXT_14:%.*]] = zext i8 [[VAL_PRE_14]] to i16 +; CHECK-NEXT: [[ACCUM_NEXT_14:%.*]] = add i16 [[ACCUM_NEXT_13]], [[VAL_EXT_14]] +; CHECK-NEXT: ret i16 [[ACCUM_NEXT_14]] +; +entry: + %first = load i8, ptr %ptr + call void @use(i8 %first) readonly + br label %loop + +loop: + %accum = phi i16 [ 0, %entry ], [ %accum.next, %loop ] + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i8, ptr %ptr, i64 %iv + %val = load i8, ptr %gep, align 1 + %iv.next = add nuw i64 %iv, 1 + %val.ext = zext i8 %val to i16 + %accum.next = add i16 %accum, %val.ext + %exit.cond = icmp eq i64 %iv.next, 1024 + br i1 %exit.cond, label %exit, label %loop + +exit: + %lcssa.phi = phi i16 [ %accum.next, %loop ] + ret i16 %lcssa.phi +} From fb857ded70e108afec4f4f7e024b0315bbe00de4 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Wed, 25 May 2022 19:36:35 +0530 Subject: [PATCH 472/908] [MLIR][Presburger] Add inverse to IntegerRelation This patch adds support for obtaining inverse of a relation. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D126327 --- .../mlir/Analysis/Presburger/IntegerRelation.h | 6 ++++++ mlir/lib/Analysis/Presburger/IntegerRelation.cpp | 6 ++++++ .../Analysis/Presburger/IntegerRelationTest.cpp | 16 ++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h index 1eca9efd39fbf0..b74413b9ff0116 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h @@ -505,6 +505,12 @@ class IntegerRelation { /// Return a set corresponding to all points in the range of the relation. IntegerPolyhedron getRangeSet() const; + /// Invert the relation i.e., swap it's domain and range. + /// + /// Formally, let the relation `this` be R: A -> B, then this operation + /// modifies R to be B -> A. + void inverse(); + void print(raw_ostream &os) const; void dump() const; diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp index 4df84aaad906aa..87132728103a7e 100644 --- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp @@ -2102,6 +2102,12 @@ IntegerPolyhedron IntegerRelation::getRangeSet() const { return IntegerPolyhedron(std::move(copyRel)); } +void IntegerRelation::inverse() { + unsigned numRangeIds = getNumIdKind(IdKind::Range); + convertIdKind(IdKind::Domain, 0, getIdKindEnd(IdKind::Domain), IdKind::Range); + convertIdKind(IdKind::Range, 0, numRangeIds, IdKind::Domain); +} + void IntegerRelation::printSpace(raw_ostream &os) const { space.print(os); os << getNumConstraints() << " constraints\n"; diff --git a/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp index 06dbee01896bf0..ee922584f564eb 100644 --- a/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp +++ b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp @@ -41,3 +41,19 @@ TEST(IntegerRelationTest, getDomainAndRangeSet) { EXPECT_TRUE(rangeSet.isEqual(expectedRangeSet)); } + +TEST(IntegerRelationTest, inverse) { + IntegerRelation rel = + parseRelationFromSet("(x, y, z)[N, M] : (z - x - y == 0, x >= 0, N - x " + ">= 0, y >= 0, M - y >= 0)", + 2); + + IntegerRelation inverseRel = + parseRelationFromSet("(z, x, y)[N, M] : (x >= 0, N - x >= 0, y >= 0, M " + "- y >= 0, x + y - z == 0)", + 1); + + rel.inverse(); + + EXPECT_TRUE(rel.isEqual(inverseRel)); +} From e40c4dd66218393214ed7a817ebb82979adad01a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Wed, 25 May 2022 13:04:44 +0200 Subject: [PATCH 473/908] [llvm-rc] Avoid which(1) dependency in windres-prefix.test Rely on lit substitution to provide the path to llvm-windres instead of redundantly calling which(1) with the subtituted absolute path. This fixes test failure on the happy systems without which(1) installed. Differential Revision: https://reviews.llvm.org/D126366 --- llvm/test/tools/llvm-rc/windres-prefix.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-rc/windres-prefix.test b/llvm/test/tools/llvm-rc/windres-prefix.test index ef85f681febd18..9d24bf9ee7b2ac 100644 --- a/llvm/test/tools/llvm-rc/windres-prefix.test +++ b/llvm/test/tools/llvm-rc/windres-prefix.test @@ -4,7 +4,7 @@ ; Check that a triple prefix on the executable gets picked up as target triple. -; RUN: ln -fs $(which llvm-windres) %t/aarch64-w64-mingw32-windres +; RUN: ln -fs llvm-windres %t/aarch64-w64-mingw32-windres ; RUN: %t/aarch64-w64-mingw32-windres -### %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK-PREPROC ; CHECK-PREPROC: "clang" "--driver-mode=gcc" "-target" "aarch64-w64-mingw32" From 046f90173567535397c97b2d00406c93ee4bc890 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Wed, 25 May 2022 15:15:06 +0100 Subject: [PATCH 474/908] [TableGen] Undeprecate 'field' when used with the CodeEmitterGen backend. Differential Revision: https://reviews.llvm.org/D126290 --- llvm/docs/TableGen/ProgRef.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index 9853a7c76307ed..4d02ceb98cddec 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -208,7 +208,9 @@ identifiers:: multiclass string then true .. warning:: - The ``field`` reserved word is deprecated. + The ``field`` reserved word is deprecated, except when used with the + CodeEmitterGen backend where it's used to distinguish normal record + fields from encoding fields. Bang operators -------------- From a4a438f05a5cd583e6a0680a1dafcb35aa2d1b53 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 25 May 2022 07:41:45 -0700 Subject: [PATCH 475/908] [riscv] Add coverage for fixed length vector loops using LMUL --- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 533 ++++++++++++++++++ 1 file changed, 533 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index a8063d98bf7a5b..cd0b600c9d5a66 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -3769,3 +3769,536 @@ vector.body: ; preds = %vector.body, %entry for.cond.cleanup: ; preds = %vector.body ret void } + + +define void @sink_splat_mul_lmul2(i64* nocapture %a, i64 signext %x) { +; CHECK-LABEL: sink_splat_mul_lmul2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB67_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: bnez a2, .LBB67_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 + %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i64, i64* %a, i64 %index + %1 = bitcast i64* %0 to <4 x i64>* + %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %2 = mul <4 x i64> %wide.load, %broadcast.splat + %3 = bitcast i64* %0 to <4 x i64>* + store <4 x i64> %2, <4 x i64>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_add_lmul2(i64* nocapture %a, i64 signext %x) { +; CHECK-LABEL: sink_splat_add_lmul2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB68_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vadd.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: bnez a2, .LBB68_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 + %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i64, i64* %a, i64 %index + %1 = bitcast i64* %0 to <4 x i64>* + %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %2 = add <4 x i64> %wide.load, %broadcast.splat + %3 = bitcast i64* %0 to <4 x i64>* + store <4 x i64> %2, <4 x i64>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_sub_lmul2(i64* nocapture %a, i64 signext %x) { +; CHECK-LABEL: sink_splat_sub_lmul2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB69_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: bnez a2, .LBB69_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 + %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i64, i64* %a, i64 %index + %1 = bitcast i64* %0 to <4 x i64>* + %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %2 = sub <4 x i64> %wide.load, %broadcast.splat + %3 = bitcast i64* %0 to <4 x i64>* + store <4 x i64> %2, <4 x i64>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_rsub_lmul2(i64* nocapture %a, i64 signext %x) { +; CHECK-LABEL: sink_splat_rsub_lmul2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB70_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: bnez a2, .LBB70_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 + %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i64, i64* %a, i64 %index + %1 = bitcast i64* %0 to <4 x i64>* + %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %2 = sub <4 x i64> %broadcast.splat, %wide.load + %3 = bitcast i64* %0 to <4 x i64>* + store <4 x i64> %2, <4 x i64>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_and_lmul2(i64* nocapture %a, i64 signext %x) { +; CHECK-LABEL: sink_splat_and_lmul2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB71_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: bnez a2, .LBB71_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 + %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i64, i64* %a, i64 %index + %1 = bitcast i64* %0 to <4 x i64>* + %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %2 = and <4 x i64> %wide.load, %broadcast.splat + %3 = bitcast i64* %0 to <4 x i64>* + store <4 x i64> %2, <4 x i64>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_or_lmul2(i64* nocapture %a, i64 signext %x) { +; CHECK-LABEL: sink_splat_or_lmul2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB72_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vor.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: bnez a2, .LBB72_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 + %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i64, i64* %a, i64 %index + %1 = bitcast i64* %0 to <4 x i64>* + %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %2 = or <4 x i64> %wide.load, %broadcast.splat + %3 = bitcast i64* %0 to <4 x i64>* + store <4 x i64> %2, <4 x i64>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_xor_lmul2(i64* nocapture %a, i64 signext %x) { +; CHECK-LABEL: sink_splat_xor_lmul2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB73_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vxor.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: bnez a2, .LBB73_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0 + %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i64, i64* %a, i64 %index + %1 = bitcast i64* %0 to <4 x i64>* + %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %2 = xor <4 x i64> %wide.load, %broadcast.splat + %3 = bitcast i64* %0 to <4 x i64>* + store <4 x i64> %2, <4 x i64>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_mul_lmul8(i32* nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_mul_lmul8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB74_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a2, .LBB74_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <32 x i32>* + %wide.load = load <32 x i32>, <32 x i32>* %1, align 4 + %2 = mul <32 x i32> %wide.load, %broadcast.splat + %3 = bitcast i32* %0 to <32 x i32>* + store <32 x i32> %2, <32 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_add_lmul8(i32* nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_add_lmul8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB75_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vadd.vx v8, v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a2, .LBB75_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <32 x i32>* + %wide.load = load <32 x i32>, <32 x i32>* %1, align 4 + %2 = add <32 x i32> %wide.load, %broadcast.splat + %3 = bitcast i32* %0 to <32 x i32>* + store <32 x i32> %2, <32 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_sub_lmul8(i32* nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_sub_lmul8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB76_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a2, .LBB76_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <32 x i32>* + %wide.load = load <32 x i32>, <32 x i32>* %1, align 4 + %2 = sub <32 x i32> %wide.load, %broadcast.splat + %3 = bitcast i32* %0 to <32 x i32>* + store <32 x i32> %2, <32 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_rsub_lmul8(i32* nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_rsub_lmul8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB77_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a2, .LBB77_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <32 x i32>* + %wide.load = load <32 x i32>, <32 x i32>* %1, align 4 + %2 = sub <32 x i32> %broadcast.splat, %wide.load + %3 = bitcast i32* %0 to <32 x i32>* + store <32 x i32> %2, <32 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_and_lmul8(i32* nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_and_lmul8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB78_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a2, .LBB78_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <32 x i32>* + %wide.load = load <32 x i32>, <32 x i32>* %1, align 4 + %2 = and <32 x i32> %wide.load, %broadcast.splat + %3 = bitcast i32* %0 to <32 x i32>* + store <32 x i32> %2, <32 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_or_lmul8(i32* nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_or_lmul8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB79_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vor.vx v8, v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a2, .LBB79_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <32 x i32>* + %wide.load = load <32 x i32>, <32 x i32>* %1, align 4 + %2 = or <32 x i32> %wide.load, %broadcast.splat + %3 = bitcast i32* %0 to <32 x i32>* + store <32 x i32> %2, <32 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_xor_lmul8(i32* nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_xor_lmul8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: .LBB80_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vxor.vx v8, v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a2, .LBB80_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <32 x i32>* + %wide.load = load <32 x i32>, <32 x i32>* %1, align 4 + %2 = xor <32 x i32> %wide.load, %broadcast.splat + %3 = bitcast i32* %0 to <32 x i32>* + store <32 x i32> %2, <32 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} From 5a2dbe49bedc15769319a2754a9c109bf3f5c678 Mon Sep 17 00:00:00 2001 From: Joe Loser Date: Tue, 24 May 2022 14:40:08 -0600 Subject: [PATCH 476/908] [libc++][test] Verify std::ranges::rbegin, crbegin, rend, crend are CPOs Uncomment the tests to ensure `std::ranges::rbegin` and friends are indeed customization points objects. Differential Revision: https://reviews.llvm.org/D126325 --- .../customization.point.object/cpo.compile.pass.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp index 45a3eb1dac3f12..225412fda2152f 100644 --- a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp @@ -66,12 +66,12 @@ static_assert(test(std::ranges::end, a)); static_assert(test(std::ranges::cbegin, a)); static_assert(test(std::ranges::cdata, a)); static_assert(test(std::ranges::cend, a)); -//static_assert(test(std::ranges::crbegin, a)); -//static_assert(test(std::ranges::crend, a)); +static_assert(test(std::ranges::crbegin, a)); +static_assert(test(std::ranges::crend, a)); static_assert(test(std::ranges::data, a)); static_assert(test(std::ranges::empty, a)); -//static_assert(test(std::ranges::rbegin, a)); -//static_assert(test(std::ranges::rend, a)); +static_assert(test(std::ranges::rbegin, a)); +static_assert(test(std::ranges::rend, a)); static_assert(test(std::ranges::size, a)); static_assert(test(std::ranges::ssize, a)); From 413fbb045d714bbb1f1f3887104ccbc4b7b395c2 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Wed, 25 May 2022 10:45:26 -0400 Subject: [PATCH 477/908] [mlir][scf] Retain existing attributes in scf.for transforms These attributes can carry useful information, e.g., pipelines might use them to organize and chain patterns. Reviewed By: hanchung Differential Revision: https://reviews.llvm.org/D126320 --- mlir/lib/Dialect/SCF/SCF.cpp | 2 ++ .../Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp | 1 + mlir/test/Dialect/SCF/bufferize.mlir | 4 ++-- mlir/test/Dialect/SCF/canonicalize.mlir | 7 ++++--- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 3236017a5986c5..9d3e61e69d9a5a 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -660,6 +660,7 @@ struct ForOpIterArgsFolder : public OpRewritePattern { scf::ForOp newForOp = rewriter.create( forOp.getLoc(), forOp.getLowerBound(), forOp.getUpperBound(), forOp.getStep(), newIterArgs); + newForOp->setAttrs(forOp->getAttrs()); Block &newBlock = newForOp.getRegion().front(); // Replace the null placeholders with newly constructed values. @@ -802,6 +803,7 @@ static ForOp replaceTensorCastForOpIterArg(PatternRewriter &rewriter, scf::ForOp newForOp = rewriter.create( forOp.getLoc(), forOp.getLowerBound(), forOp.getUpperBound(), forOp.getStep(), newIterOperands); + newForOp->setAttrs(forOp->getAttrs()); Block &newBlock = newForOp.getRegion().front(); SmallVector newBlockTransferArgs(newBlock.getArguments().begin(), newBlock.getArguments().end()); diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp index d0c3c381dbfa1d..2a6a95e5e0d1c4 100644 --- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp @@ -491,6 +491,7 @@ struct ForOpInterface auto newForOp = rewriter.create( forOp.getLoc(), forOp.getLowerBound(), forOp.getUpperBound(), forOp.getStep(), initArgs); + newForOp->setAttrs(forOp->getAttrs()); ValueRange initArgsRange(initArgs); TypeRange initArgsTypes(initArgsRange); Block *loopBody = &newForOp.getLoopBody().front(); diff --git a/mlir/test/Dialect/SCF/bufferize.mlir b/mlir/test/Dialect/SCF/bufferize.mlir index eb795aeb5eb1fd..6193101e9264d7 100644 --- a/mlir/test/Dialect/SCF/bufferize.mlir +++ b/mlir/test/Dialect/SCF/bufferize.mlir @@ -30,14 +30,14 @@ func.func @if(%pred: i1, %true_val: tensor, %false_val: tensor) -> // CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref // CHECK: %[[RESULT_MEMREF:.*]] = scf.for %[[VAL_6:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF]]) -> (memref) { // CHECK: scf.yield %[[ITER]] : memref -// CHECK: } +// CHECK: } {some_attr} // CHECK: %[[VAL_8:.*]] = bufferization.to_tensor %[[VAL_9:.*]] : memref // CHECK: return %[[VAL_8]] : tensor // CHECK: } func.func @for(%arg0: tensor, %lb: index, %ub: index, %step: index) -> tensor { %ret = scf.for %iv = %lb to %ub step %step iter_args(%iter = %arg0) -> tensor { scf.yield %iter : tensor - } + } {some_attr} return %ret : tensor } diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index cca2f439bd70fd..8e087fc0f38a41 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -372,7 +372,7 @@ func.func @for_yields_3(%lb : index, %ub : index, %step : index) -> (i32, i32, i %r:3 = scf.for %i = %lb to %ub step %step iter_args(%0 = %a, %1 = %a, %2 = %b) -> (i32, i32, i32) { %c = func.call @make_i32() : () -> (i32) scf.yield %0, %c, %2 : i32, i32, i32 - } + } {some_attr} return %r#0, %r#1, %r#2 : i32, i32, i32 } @@ -382,7 +382,7 @@ func.func @for_yields_3(%lb : index, %ub : index, %step : index) -> (i32, i32, i // CHECK-NEXT: %[[r1:.*]] = scf.for {{.*}} iter_args(%arg4 = %[[a]]) -> (i32) { // CHECK-NEXT: %[[c:.*]] = func.call @make_i32() : () -> i32 // CHECK-NEXT: scf.yield %[[c]] : i32 -// CHECK-NEXT: } +// CHECK-NEXT: } {some_attr} // CHECK-NEXT: return %[[a]], %[[r1]], %[[b]] : i32, i32, i32 // ----- @@ -846,11 +846,12 @@ func.func @matmul_on_tensors(%t0: tensor<32x1024xf32>, %t1: tensor<1024x1024xf32 // CHECK: %[[DONE:.*]] = func.call @do(%[[CAST]]) : (tensor) -> tensor // CHECK: %[[UNCAST:.*]] = tensor.cast %[[DONE]] : tensor to tensor<32x1024xf32> // CHECK: scf.yield %[[UNCAST]] : tensor<32x1024xf32> +// CHECK: } {some_attr} %0 = tensor.cast %t0 : tensor<32x1024xf32> to tensor %1 = scf.for %i = %c0 to %c1024 step %c32 iter_args(%iter_t0 = %0) -> (tensor) { %2 = func.call @do(%iter_t0) : (tensor) -> tensor scf.yield %2 : tensor - } + } {some_attr} // CHECK-NOT: tensor.cast // CHECK: %[[RES:.*]] = tensor.insert_slice %[[FOR_RES]] into %[[T1]][0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32> // CHECK: return %[[RES]] : tensor<1024x1024xf32> From e0ea1fc6f8aa5c51061dced0f86c4fd25e3e9333 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Wed, 25 May 2022 10:53:35 -0400 Subject: [PATCH 478/908] [mlir][spirv] Fix capability check for 64-bit element types Using 64-bit integer/float type in interface storage classes would require Int64/Float64 capability, per the Vulkan spec: ``` shaderInt64 specifies whether 64-bit integers (signed and unsigned) are supported in shader code. If this feature is not enabled, 64-bit integer types must not be used in shader code. This also specifies whether shader modules can declare the Int64 capability. Declaring and using 64-bit integers is enabled for all storage classes that SPIR-V allows with the Int64 capability. ``` This is different from, say, 16-bit element types, where: ``` shaderInt16 specifies whether 16-bit integers (signed and unsigned) are supported in shader code. If this feature is not enabled, 16-bit integer types must not be used in shader code. This also specifies whether shader modules can declare the Int16 capability. However, this only enables a subset of the storage classes that SPIR-V allows for the Int16 SPIR-V capability: Declaring and using 16-bit integers in the Private, Workgroup (for non-Block variables), and Function storage classes is enabled, while declaring them in the interface storage classes (e.g., UniformConstant, Uniform, StorageBuffer, Input, Output, and PushConstant) is not enabled. ``` Reviewed By: hanchung Differential Revision: https://reviews.llvm.org/D126256 --- mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp | 24 ++-- .../SPIRV/Transforms/SPIRVConversion.cpp | 11 +- .../FuncToSPIRV/types-to-spirv.mlir | 109 ++++++++++++++++-- mlir/test/Conversion/MemRefToSPIRV/alloc.mlir | 2 +- .../SPIRV/Transforms/vce-deduction.mlir | 4 +- 5 files changed, 116 insertions(+), 34 deletions(-) diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp index b66f569c352d98..494f32925315b9 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp @@ -549,14 +549,17 @@ void ScalarType::getCapabilities( static const Capability caps[] = {Capability::cap8}; \ ArrayRef ref(caps, llvm::array_lengthof(caps)); \ capabilities.push_back(ref); \ - } else if (bitwidth == 16) { \ + return; \ + } \ + if (bitwidth == 16) { \ static const Capability caps[] = {Capability::cap16}; \ ArrayRef ref(caps, llvm::array_lengthof(caps)); \ capabilities.push_back(ref); \ + return; \ } \ - /* No requirements for other bitwidths */ \ - return; \ - } + /* For 64-bit integers/floats, Int64/Float64 enables support for all */ \ + /* storage classes. Fall through to the next section. */ \ + } break // This part only handles the cases where special bitwidths appearing in // interface storage classes. @@ -573,8 +576,9 @@ void ScalarType::getCapabilities( static const Capability caps[] = {Capability::StorageInputOutput16}; ArrayRef ref(caps, llvm::array_lengthof(caps)); capabilities.push_back(ref); + return; } - return; + break; } default: break; @@ -594,22 +598,22 @@ void ScalarType::getCapabilities( if (auto intType = dyn_cast()) { switch (bitwidth) { - case 32: - case 1: - break; WIDTH_CASE(Int, 8); WIDTH_CASE(Int, 16); WIDTH_CASE(Int, 64); + case 1: + case 32: + break; default: llvm_unreachable("invalid bitwidth to getCapabilities"); } } else { assert(isa()); switch (bitwidth) { - case 32: - break; WIDTH_CASE(Float, 16); WIDTH_CASE(Float, 64); + case 32: + break; default: llvm_unreachable("invalid bitwidth to getCapabilities"); } diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp index 68cb4ee225546d..bad8922a6fb76e 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp @@ -413,7 +413,7 @@ static Type convertBoolMemrefType(const spirv::TargetEnv &targetEnv, } int64_t memrefSize = (type.getNumElements() * numBoolBits + 7) / 8; - auto arrayElemCount = (memrefSize + *arrayElemSize - 1) / *arrayElemSize; + auto arrayElemCount = llvm::divideCeil(memrefSize, *arrayElemSize); int64_t stride = needsExplicitLayout(*storageClass) ? *arrayElemSize : 0; auto arrayType = spirv::ArrayType::get(arrayElemType, arrayElemCount, stride); @@ -455,13 +455,6 @@ static Type convertMemrefType(const spirv::TargetEnv &targetEnv, if (!arrayElemType) return nullptr; - Optional elementSize = getTypeNumBytes(options, elementType); - if (!elementSize) { - LLVM_DEBUG(llvm::dbgs() - << type << " illegal: cannot deduce element size\n"); - return nullptr; - } - Optional arrayElemSize = getTypeNumBytes(options, arrayElemType); if (!arrayElemSize) { LLVM_DEBUG(llvm::dbgs() @@ -482,7 +475,7 @@ static Type convertMemrefType(const spirv::TargetEnv &targetEnv, return nullptr; } - auto arrayElemCount = *memrefSize / *elementSize; + auto arrayElemCount = llvm::divideCeil(*memrefSize, *arrayElemSize); int64_t stride = needsExplicitLayout(*storageClass) ? *arrayElemSize : 0; auto arrayType = spirv::ArrayType::get(arrayElemType, arrayElemCount, stride); diff --git a/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir b/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir index cce9e2749de3df..f7a213b091928e 100644 --- a/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir +++ b/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir @@ -312,53 +312,83 @@ module attributes { func.func @memref_1bit_type(%arg0: memref<5xi1>) { return } // CHECK-LABEL: spv.func @memref_8bit_StorageBuffer -// CHECK-SAME: !spv.ptr [0])>, StorageBuffer> +// CHECK-SAME: !spv.ptr [0])>, StorageBuffer> // NOEMU-LABEL: func @memref_8bit_StorageBuffer // NOEMU-SAME: memref<16xi8> func.func @memref_8bit_StorageBuffer(%arg0: memref<16xi8, 0>) { return } // CHECK-LABEL: spv.func @memref_8bit_Uniform -// CHECK-SAME: !spv.ptr [0])>, Uniform> +// CHECK-SAME: !spv.ptr [0])>, Uniform> // NOEMU-LABEL: func @memref_8bit_Uniform // NOEMU-SAME: memref<16xsi8, 4> func.func @memref_8bit_Uniform(%arg0: memref<16xsi8, 4>) { return } // CHECK-LABEL: spv.func @memref_8bit_PushConstant -// CHECK-SAME: !spv.ptr [0])>, PushConstant> +// CHECK-SAME: !spv.ptr [0])>, PushConstant> // NOEMU-LABEL: func @memref_8bit_PushConstant // NOEMU-SAME: memref<16xui8, 7> func.func @memref_8bit_PushConstant(%arg0: memref<16xui8, 7>) { return } // CHECK-LABEL: spv.func @memref_16bit_StorageBuffer -// CHECK-SAME: !spv.ptr [0])>, StorageBuffer> +// CHECK-SAME: !spv.ptr [0])>, StorageBuffer> // NOEMU-LABEL: func @memref_16bit_StorageBuffer // NOEMU-SAME: memref<16xi16> func.func @memref_16bit_StorageBuffer(%arg0: memref<16xi16, 0>) { return } // CHECK-LABEL: spv.func @memref_16bit_Uniform -// CHECK-SAME: !spv.ptr [0])>, Uniform> +// CHECK-SAME: !spv.ptr [0])>, Uniform> // NOEMU-LABEL: func @memref_16bit_Uniform // NOEMU-SAME: memref<16xsi16, 4> func.func @memref_16bit_Uniform(%arg0: memref<16xsi16, 4>) { return } // CHECK-LABEL: spv.func @memref_16bit_PushConstant -// CHECK-SAME: !spv.ptr [0])>, PushConstant> +// CHECK-SAME: !spv.ptr [0])>, PushConstant> // NOEMU-LABEL: func @memref_16bit_PushConstant // NOEMU-SAME: memref<16xui16, 7> func.func @memref_16bit_PushConstant(%arg0: memref<16xui16, 7>) { return } // CHECK-LABEL: spv.func @memref_16bit_Input -// CHECK-SAME: !spv.ptr)>, Input> +// CHECK-SAME: !spv.ptr)>, Input> // NOEMU-LABEL: func @memref_16bit_Input // NOEMU-SAME: memref<16xf16, 9> func.func @memref_16bit_Input(%arg3: memref<16xf16, 9>) { return } // CHECK-LABEL: spv.func @memref_16bit_Output -// CHECK-SAME: !spv.ptr)>, Output> +// CHECK-SAME: !spv.ptr)>, Output> // NOEMU-LABEL: func @memref_16bit_Output // NOEMU-SAME: memref<16xf16, 10> func.func @memref_16bit_Output(%arg4: memref<16xf16, 10>) { return } +// CHECK-LABEL: spv.func @memref_64bit_StorageBuffer +// CHECK-SAME: !spv.ptr [0])>, StorageBuffer> +// NOEMU-LABEL: func @memref_64bit_StorageBuffer +// NOEMU-SAME: memref<16xi64> +func.func @memref_64bit_StorageBuffer(%arg0: memref<16xi64, 0>) { return } + +// CHECK-LABEL: spv.func @memref_64bit_Uniform +// CHECK-SAME: !spv.ptr [0])>, Uniform> +// NOEMU-LABEL: func @memref_64bit_Uniform +// NOEMU-SAME: memref<16xsi64, 4> +func.func @memref_64bit_Uniform(%arg0: memref<16xsi64, 4>) { return } + +// CHECK-LABEL: spv.func @memref_64bit_PushConstant +// CHECK-SAME: !spv.ptr [0])>, PushConstant> +// NOEMU-LABEL: func @memref_64bit_PushConstant +// NOEMU-SAME: memref<16xui64, 7> +func.func @memref_64bit_PushConstant(%arg0: memref<16xui64, 7>) { return } + +// CHECK-LABEL: spv.func @memref_64bit_Input +// CHECK-SAME: !spv.ptr)>, Input> +// NOEMU-LABEL: func @memref_64bit_Input +// NOEMU-SAME: memref<16xf64, 9> +func.func @memref_64bit_Input(%arg3: memref<16xf64, 9>) { return } + +// CHECK-LABEL: spv.func @memref_64bit_Output +// CHECK-SAME: !spv.ptr)>, Output> +// NOEMU-LABEL: func @memref_64bit_Output +// NOEMU-SAME: memref<16xf64, 10> +func.func @memref_64bit_Output(%arg4: memref<16xf64, 10>) { return } + } // end module // ----- @@ -368,7 +398,7 @@ func.func @memref_16bit_Output(%arg4: memref<16xf16, 10>) { return } // and extension is available. module attributes { spv.target_env = #spv.target_env< - #spv.vce, {}> } { @@ -389,6 +419,17 @@ func.func @memref_16bit_PushConstant( %arg1: memref<16xf16, 7> ) { return } +// CHECK-LABEL: spv.func @memref_64bit_PushConstant +// CHECK-SAME: !spv.ptr [0])>, PushConstant> +// CHECK-SAME: !spv.ptr [0])>, PushConstant> +// NOEMU-LABEL: spv.func @memref_64bit_PushConstant +// NOEMU-SAME: !spv.ptr [0])>, PushConstant> +// NOEMU-SAME: !spv.ptr [0])>, PushConstant> +func.func @memref_64bit_PushConstant( + %arg0: memref<16xi64, 7>, + %arg1: memref<16xf64, 7> +) { return } + } // end module // ----- @@ -398,7 +439,7 @@ func.func @memref_16bit_PushConstant( // and extension is available. module attributes { spv.target_env = #spv.target_env< - #spv.vce, {}> } { @@ -419,6 +460,17 @@ func.func @memref_16bit_StorageBuffer( %arg1: memref<16xf16, 0> ) { return } +// CHECK-LABEL: spv.func @memref_64bit_StorageBuffer +// CHECK-SAME: !spv.ptr [0])>, StorageBuffer> +// CHECK-SAME: !spv.ptr [0])>, StorageBuffer> +// NOEMU-LABEL: spv.func @memref_64bit_StorageBuffer +// NOEMU-SAME: !spv.ptr [0])>, StorageBuffer> +// NOEMU-SAME: !spv.ptr [0])>, StorageBuffer> +func.func @memref_64bit_StorageBuffer( + %arg0: memref<16xi64, 0>, + %arg1: memref<16xf64, 0> +) { return } + } // end module // ----- @@ -428,7 +480,7 @@ func.func @memref_16bit_StorageBuffer( // and extension is available. module attributes { spv.target_env = #spv.target_env< - #spv.vce, {}> } { @@ -449,6 +501,17 @@ func.func @memref_16bit_Uniform( %arg1: memref<16xf16, 4> ) { return } +// CHECK-LABEL: spv.func @memref_64bit_Uniform +// CHECK-SAME: !spv.ptr [0])>, Uniform> +// CHECK-SAME: !spv.ptr [0])>, Uniform> +// NOEMU-LABEL: spv.func @memref_64bit_Uniform +// NOEMU-SAME: !spv.ptr [0])>, Uniform> +// NOEMU-SAME: !spv.ptr [0])>, Uniform> +func.func @memref_64bit_Uniform( + %arg0: memref<16xi64, 4>, + %arg1: memref<16xf64, 4> +) { return } + } // end module // ----- @@ -458,7 +521,7 @@ func.func @memref_16bit_Uniform( // and extension is available. module attributes { spv.target_env = #spv.target_env< - #spv.vce, {}> + #spv.vce, {}> } { // CHECK-LABEL: spv.func @memref_16bit_Input @@ -473,6 +536,28 @@ func.func @memref_16bit_Input(%arg3: memref<16xf16, 9>) { return } // NOEMU-SAME: !spv.ptr)>, Output> func.func @memref_16bit_Output(%arg4: memref<16xi16, 10>) { return } +// CHECK-LABEL: spv.func @memref_64bit_Input +// CHECK-SAME: !spv.ptr)>, Input> +// CHECK-SAME: !spv.ptr)>, Input> +// NOEMU-LABEL: spv.func @memref_64bit_Input +// NOEMU-SAME: !spv.ptr)>, Input> +// NOEMU-SAME: !spv.ptr)>, Input> +func.func @memref_64bit_Input( + %arg0: memref<16xi64, 9>, + %arg1: memref<16xf64, 9> +) { return } + +// CHECK-LABEL: spv.func @memref_64bit_Output +// CHECK-SAME: !spv.ptr)>, Output> +// CHECK-SAME: !spv.ptr)>, Output> +// NOEMU-LABEL: spv.func @memref_64bit_Output +// NOEMU-SAME: !spv.ptr)>, Output> +// NOEMU-SAME: !spv.ptr)>, Output> +func.func @memref_64bit_Output( + %arg0: memref<16xi64, 10>, + %arg1: memref<16xf64, 10> +) { return } + } // end module // ----- diff --git a/mlir/test/Conversion/MemRefToSPIRV/alloc.mlir b/mlir/test/Conversion/MemRefToSPIRV/alloc.mlir index 498b0f99776479..692d70db83633c 100644 --- a/mlir/test/Conversion/MemRefToSPIRV/alloc.mlir +++ b/mlir/test/Conversion/MemRefToSPIRV/alloc.mlir @@ -40,7 +40,7 @@ module attributes { } // CHECK: spv.GlobalVariable @__workgroup_mem__{{[0-9]+}} -// CHECK-SAME: !spv.ptr)>, Workgroup> +// CHECK-SAME: !spv.ptr)>, Workgroup> // CHECK: func @alloc_dealloc_workgroup_mem // CHECK: %[[VAR:.+]] = spv.mlir.addressof @__workgroup_mem__0 // CHECK: %[[LOC:.+]] = spv.SDiv diff --git a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir index fa1f33a8fad76f..7d09e1c507af52 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir @@ -186,10 +186,10 @@ spv.module Logical GLSL450 attributes { // Complicated nested types // * Buffer requires ImageBuffer or SampledBuffer. // * Rg32f requires StorageImageExtendedFormats. -// CHECK: requires #spv.vce +// CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, + #spv.vce, {}> } { spv.GlobalVariable @data : !spv.ptr, Uniform> From dd336b6891f1e4be1aae2b7e30c1ba7318564812 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 25 May 2022 07:58:11 -0700 Subject: [PATCH 479/908] [RISCV] Restructure comment and add clarifying assert to getFrameIndexReference [NFC] Differential Revision: https://reviews.llvm.org/D126088 --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 87 +++++++------------- 1 file changed, 31 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 5be78a5ccc86a4..5062d992c6cc41 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -691,66 +691,41 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // If the stack was realigned, the frame pointer is set in order to allow // SP to be restored, so we need another base register to record the stack // after realignment. + // |--------------------------| -- <-- FP + // | callee-allocated save | | <----| + // | area for register varargs| | | + // |--------------------------| | | + // | callee-saved registers | | | + // |--------------------------| -- | + // | realignment (the size of | | | + // | this area is not counted | | | + // | in MFI.getStackSize()) | | | + // |--------------------------| -- |-- MFI.getStackSize() + // | RVV alignment padding | | | + // | (not counted in | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | + // |--------------------------| -- | + // | RVV objects | | | + // | (not counted in | | | + // | MFI.getStackSize()) | | | + // |--------------------------| -- | + // | padding before RVV | | | + // | (not counted in | | | + // | MFI.getStackSize() or in | | | + // | RVFI.getRVVStackSize()) | | | + // |--------------------------| -- | + // | scalar local variables | | <----' + // |--------------------------| -- <-- BP (if var sized objects present) + // | VarSize objects | | + // |--------------------------| -- <-- SP if (hasBP(MF)) { FrameReg = RISCVABI::getBPReg(); - // |--------------------------| -- <-- FP - // | callee-allocated save | | <----| - // | area for register varargs| | | - // |--------------------------| | | - // | callee-saved registers | | | - // |--------------------------| -- | - // | realignment (the size of | | | - // | this area is not counted | | | - // | in MFI.getStackSize()) | | | - // |--------------------------| -- |-- MFI.getStackSize() - // | RVV alignment padding | | | - // | (not counted in | | | - // | MFI.getStackSize() but | | | - // | counted in | | | - // | RVFI.getRVVStackSize()) | | | - // |--------------------------| -- | - // | RVV objects | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | padding before RVV | | | - // | (not counted in | | | - // | MFI.getStackSize() or in | | | - // | RVFI.getRVVStackSize()) | | | - // |--------------------------| -- | - // | scalar local variables | | <----' - // |--------------------------| -- <-- BP - // | VarSize objects | | - // |--------------------------| -- <-- SP } else { + // VarSize objects must be empty in this case! + assert(!MFI.hasVarSizedObjects()); FrameReg = RISCV::X2; - // |--------------------------| -- <-- FP - // | callee-allocated save | | <----| - // | area for register varargs| | | - // |--------------------------| | | - // | callee-saved registers | | | - // |--------------------------| -- | - // | realignment (the size of | | | - // | this area is not counted | | | - // | in MFI.getStackSize()) | | | - // |--------------------------| -- | - // | RVV alignment padding | | | - // | (not counted in | | | - // | MFI.getStackSize() but | | | - // | counted in | | | - // | RVFI.getRVVStackSize()) | | | - // |--------------------------| -- |-- MFI.getStackSize() - // | RVV objects | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | padding before RVV | | | - // | (not counted in | | | - // | MFI.getStackSize() or in | | | - // | RVFI.getRVVStackSize()) | | | - // |--------------------------| -- | - // | scalar local variables | | <----' - // |--------------------------| -- <-- SP } // The total amount of padding surrounding RVV objects is described by // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV From 2a3b6f2cba92e884969a914917839242eb26809b Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 25 May 2022 08:00:27 -0700 Subject: [PATCH 480/908] [RISCV] Hoist VSETVLI vlmax, vtype out of scalable loops This is a straight forward extension of the PRE transform introduced in D124869 to handle the VLMAX case. The test changes here look quite positive. This surprised me until I realized that all the tests are using @llvm.vscale to figure out the VLMAX, not the llvm.riscv.vsetvlmax intrinsic. If they'd used the later, these would have been full redundancy cases and fully handled by the data flow. I'm not really sure if use of vscale here is representative or not. If it is, we should probably look at using VSETVLI to lower vscale rather than a raw read of vlenb and some math. Differential Revision: https://reviews.llvm.org/D126338 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 7 +- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 240 +++++++++--------- 2 files changed, 124 insertions(+), 123 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 2a1cdeca4af090..1bfa8371a259f0 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1288,9 +1288,10 @@ void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) { /// Return true if the VL value configured must be equal to the requested one. static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { if (!Info.hasAVLImm()) - // TODO: Could allow VLMAX (e.g. X0), and possibly other registers - // by looking at the associated vreg def placement. - return false; + // VLMAX is always the same value. + // TODO: Could extend to other registers by looking at the associated + // vreg def placement. + return RISCV::X0 == Info.getAVLReg(); if (RISCVII::LMUL_1 != Info.getVLMUL()) // TODO: Generalize the code below to account for LMUL diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index cd0b600c9d5a66..fb69d3cca636e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -257,28 +257,28 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_mul_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_mul_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB7_5 ; CHECK-NEXT: .LBB7_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB7_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB7_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB7_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB7_7 ; CHECK-NEXT: .LBB7_5: # %for.body.preheader @@ -349,28 +349,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_add_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_add_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB8_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB8_5 ; CHECK-NEXT: .LBB8_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB8_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB8_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB8_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB8_7 ; CHECK-NEXT: .LBB8_5: # %for.body.preheader @@ -441,28 +441,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_sub_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB9_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB9_5 ; CHECK-NEXT: .LBB9_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB9_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB9_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB9_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB9_7 ; CHECK-NEXT: .LBB9_5: # %for.body.preheader @@ -533,28 +533,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_rsub_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_rsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB10_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB10_5 ; CHECK-NEXT: .LBB10_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB10_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vrsub.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB10_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB10_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB10_7 ; CHECK-NEXT: .LBB10_5: # %for.body.preheader @@ -625,28 +625,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_and_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_and_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB11_5 ; CHECK-NEXT: .LBB11_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB11_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB11_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB11_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB11_7 ; CHECK-NEXT: .LBB11_5: # %for.body.preheader @@ -717,28 +717,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_or_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_or_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB12_5 ; CHECK-NEXT: .LBB12_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB12_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB12_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB12_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB12_7 ; CHECK-NEXT: .LBB12_5: # %for.body.preheader @@ -809,28 +809,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_xor_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_xor_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB13_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB13_5 ; CHECK-NEXT: .LBB13_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB13_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB13_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB13_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB13_7 ; CHECK-NEXT: .LBB13_5: # %for.body.preheader @@ -1009,28 +1009,28 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_shl_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_shl_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB17_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB17_5 ; CHECK-NEXT: .LBB17_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB17_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vsll.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB17_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB17_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB17_7 ; CHECK-NEXT: .LBB17_5: # %for.body.preheader @@ -1101,28 +1101,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_lshr_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_lshr_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB18_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB18_5 ; CHECK-NEXT: .LBB18_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB18_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vsrl.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB18_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB18_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB18_7 ; CHECK-NEXT: .LBB18_5: # %for.body.preheader @@ -1193,28 +1193,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_ashr_scalable(i32* nocapture %a) { ; CHECK-LABEL: sink_splat_ashr_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: srli a1, a5, 1 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: srli a1, a4, 1 ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: bgeu a2, a1, .LBB19_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB19_5 ; CHECK-NEXT: .LBB19_2: # %vector.ph -; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: remu a3, a2, a1 ; CHECK-NEXT: sub a2, a2, a3 -; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: slli a4, a4, 1 +; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB19_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a6) -; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: add a4, a4, a1 -; CHECK-NEXT: add a6, a6, a5 -; CHECK-NEXT: bne a4, a2, .LBB19_3 +; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: add a6, a6, a4 +; CHECK-NEXT: bne a5, a2, .LBB19_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a3, .LBB19_7 ; CHECK-NEXT: .LBB19_5: # %for.body.preheader @@ -1512,11 +1512,11 @@ define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) { ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB26_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a2 @@ -1603,11 +1603,11 @@ define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) { ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB27_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a2 @@ -1694,11 +1694,11 @@ define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) { ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB28_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a2 @@ -1785,11 +1785,11 @@ define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) { ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB29_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a2 @@ -1876,11 +1876,11 @@ define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) { ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB30_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a2 @@ -1967,11 +1967,11 @@ define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) { ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB31_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a2 @@ -2141,13 +2141,13 @@ define void @sink_splat_fma_scalable(float* noalias nocapture %a, float* noalias ; CHECK-NEXT: li a7, 0 ; CHECK-NEXT: remu a5, a4, a3 ; CHECK-NEXT: sub a4, a4, a5 +; CHECK-NEXT: vsetvli t0, zero, e32, m1, ta, mu ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add t0, a0, a6 ; CHECK-NEXT: vl1re32.v v8, (t0) ; CHECK-NEXT: add t1, a1, a6 ; CHECK-NEXT: vl1re32.v v9, (t1) -; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (t0) ; CHECK-NEXT: add a7, a7, a3 @@ -2243,13 +2243,13 @@ define void @sink_splat_fma_commute_scalable(float* noalias nocapture %a, float* ; CHECK-NEXT: li a7, 0 ; CHECK-NEXT: remu a5, a4, a3 ; CHECK-NEXT: sub a4, a4, a5 +; CHECK-NEXT: vsetvli t0, zero, e32, m1, ta, mu ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add t0, a0, a6 ; CHECK-NEXT: vl1re32.v v8, (t0) ; CHECK-NEXT: add t1, a1, a6 ; CHECK-NEXT: vl1re32.v v9, (t1) -; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (t0) ; CHECK-NEXT: add a7, a7, a3 @@ -2558,28 +2558,28 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_udiv_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_udiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB42_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB42_5 ; CHECK-NEXT: .LBB42_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB42_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vdivu.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB42_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB42_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB42_7 ; CHECK-NEXT: .LBB42_5: # %for.body.preheader @@ -2650,28 +2650,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_sdiv_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB43_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB43_5 ; CHECK-NEXT: .LBB43_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB43_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vdiv.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB43_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB43_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB43_7 ; CHECK-NEXT: .LBB43_5: # %for.body.preheader @@ -2742,28 +2742,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_urem_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_urem_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB44_5 ; CHECK-NEXT: .LBB44_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB44_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vremu.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB44_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB44_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB44_7 ; CHECK-NEXT: .LBB44_5: # %for.body.preheader @@ -2834,28 +2834,28 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_srem_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_srem_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: srli a2, a6, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB45_5 ; CHECK-NEXT: .LBB45_2: # %vector.ph -; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: remu a4, a3, a2 ; CHECK-NEXT: sub a3, a3, a4 -; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB45_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re32.v v8, (a7) -; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vrem.vx v8, v8, a1 ; CHECK-NEXT: vs2r.v v8, (a7) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: bne a5, a3, .LBB45_3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a7, a7, a5 +; CHECK-NEXT: bne a6, a3, .LBB45_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB45_7 ; CHECK-NEXT: .LBB45_5: # %for.body.preheader From 6f4644d194da594562027a5d458d9fb7a20ebc39 Mon Sep 17 00:00:00 2001 From: Martin Boehme Date: Fri, 20 May 2022 14:04:44 +0200 Subject: [PATCH 481/908] [clang] Don't parse MS attributes in `ParseExportDeclaration()`. As @rsmith commented on https://reviews.llvm.org/D111548: "That looks like it's simply a bug as far as I can tell, and that call can be removed. MS attributes will be parsed as part of the decl specifier sequence as needed and don't need to be parsed as declaration attributes." Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D126062 --- clang/lib/Parse/ParseDeclCXX.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 22ad9e030b0d81..5ed989f17c1bce 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -438,7 +438,6 @@ Decl *Parser::ParseExportDeclaration() { // FIXME: Factor out a ParseExternalDeclarationWithAttrs. ParsedAttributes Attrs(AttrFactory); MaybeParseCXX11Attributes(Attrs); - MaybeParseMicrosoftAttributes(Attrs); ParseExternalDeclaration(Attrs); return Actions.ActOnFinishExportDecl(getCurScope(), ExportDecl, SourceLocation()); @@ -458,7 +457,6 @@ Decl *Parser::ParseExportDeclaration() { Tok.isNot(tok::eof)) { ParsedAttributes Attrs(AttrFactory); MaybeParseCXX11Attributes(Attrs); - MaybeParseMicrosoftAttributes(Attrs); ParseExternalDeclaration(Attrs); } From 9da89651a8d551970c6a97d1b4f70675ba484b2f Mon Sep 17 00:00:00 2001 From: Anubhab Ghosh Date: Wed, 25 May 2022 09:01:12 -0700 Subject: [PATCH 482/908] [llvm-objcopy][ObjectYAML][mips] Add MIPS specific ELF section indexes This fixes https://github.com/llvm/llvm-project/issues/53998 and displays correct information in obj2yaml for SHN_MIPS_* sections according to https://refspecs.linuxfoundation.org/elf/mipsabi.pdf Reviewed By: jhenderson, MaskRay Differential Revision: https://reviews.llvm.org/D123902 --- llvm/include/llvm/BinaryFormat/ELF.h | 9 +++ llvm/lib/ObjCopy/ELF/ELFObject.cpp | 9 +++ llvm/lib/ObjCopy/ELF/ELFObject.h | 5 ++ llvm/lib/ObjectYAML/ELFYAML.cpp | 11 ++++ .../ELF/hexagon-unsupported-on-x86.test | 15 ----- .../tools/llvm-objcopy/ELF/mips-symbol.test | 62 +++++++++++++++++++ .../unsupported-machine-specific-shndx.test | 17 +++++ .../obj2yaml/ELF/special-symbol-indices.yaml | 12 ++-- 8 files changed, 120 insertions(+), 20 deletions(-) delete mode 100644 llvm/test/tools/llvm-objcopy/ELF/hexagon-unsupported-on-x86.test create mode 100644 llvm/test/tools/llvm-objcopy/ELF/mips-symbol.test create mode 100644 llvm/test/tools/llvm-objcopy/ELF/unsupported-machine-specific-shndx.test diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 2e926856fda5c0..529953c59278bb 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -564,6 +564,15 @@ enum : unsigned { EF_MIPS_ARCH = 0xf0000000 // Mask for applying EF_MIPS_ARCH_ variant }; +// MIPS-specific section indexes +enum { + SHN_MIPS_ACOMMON = 0xff00, // Common symbols which are defined and allocated + SHN_MIPS_TEXT = 0xff01, // Not ABI compliant + SHN_MIPS_DATA = 0xff02, // Not ABI compliant + SHN_MIPS_SCOMMON = 0xff03, // Common symbols for global data area + SHN_MIPS_SUNDEFINED = 0xff04 // Undefined symbols for global data area +}; + // ELF Relocation types for Mips enum { #include "ELFRelocs/Mips.def" diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp index b0bf1402e4485a..a5d7884985a4d6 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp @@ -655,6 +655,15 @@ static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { return Index == SHN_AMDGPU_LDS; } + if (Machine == EM_MIPS) { + switch (Index) { + case SHN_MIPS_ACOMMON: + case SHN_MIPS_SCOMMON: + case SHN_MIPS_SUNDEFINED: + return true; + } + } + if (Machine == EM_HEXAGON) { switch (Index) { case SHN_HEXAGON_SCOMMON: diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h index b72133a0c5d7ce..896dae03f7398e 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.h +++ b/llvm/lib/ObjCopy/ELF/ELFObject.h @@ -621,6 +621,11 @@ enum SymbolShndxType { SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2, SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4, SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8, + SYMBOL_MIPS_ACOMMON = ELF::SHN_MIPS_ACOMMON, + SYMBOL_MIPS_TEXT = ELF::SHN_MIPS_TEXT, + SYMBOL_MIPS_DATA = ELF::SHN_MIPS_DATA, + SYMBOL_MIPS_SCOMMON = ELF::SHN_MIPS_SCOMMON, + SYMBOL_MIPS_SUNDEFINED = ELF::SHN_MIPS_SUNDEFINED, SYMBOL_HIPROC = ELF::SHN_HIPROC, SYMBOL_LOOS = ELF::SHN_LOOS, SYMBOL_HIOS = ELF::SHN_HIOS, diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index f2d1b793f138cc..6f429540713e9b 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -755,6 +755,8 @@ void ScalarBitSetTraits::bitset(IO &IO, void ScalarEnumerationTraits::enumeration( IO &IO, ELFYAML::ELF_SHN &Value) { + const auto *Object = static_cast(IO.getContext()); + assert(Object && "The IO context is not initialized"); #define ECase(X) IO.enumCase(Value, #X, ELF::X) ECase(SHN_UNDEF); ECase(SHN_LORESERVE); @@ -767,6 +769,15 @@ void ScalarEnumerationTraits::enumeration( ECase(SHN_XINDEX); ECase(SHN_HIRESERVE); ECase(SHN_AMDGPU_LDS); + + if (!IO.outputting() || Object->getMachine() == ELF::EM_MIPS) { + ECase(SHN_MIPS_ACOMMON); + ECase(SHN_MIPS_TEXT); + ECase(SHN_MIPS_DATA); + ECase(SHN_MIPS_SCOMMON); + ECase(SHN_MIPS_SUNDEFINED); + } + ECase(SHN_HEXAGON_SCOMMON); ECase(SHN_HEXAGON_SCOMMON_1); ECase(SHN_HEXAGON_SCOMMON_2); diff --git a/llvm/test/tools/llvm-objcopy/ELF/hexagon-unsupported-on-x86.test b/llvm/test/tools/llvm-objcopy/ELF/hexagon-unsupported-on-x86.test deleted file mode 100644 index 749f6b10b825dd..00000000000000 --- a/llvm/test/tools/llvm-objcopy/ELF/hexagon-unsupported-on-x86.test +++ /dev/null @@ -1,15 +0,0 @@ -# RUN: yaml2obj %s -o %t -# RUN: not llvm-objcopy %t %t2 2>&1 >/dev/null | FileCheck %s - -!ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 -Symbols: - - Name: test - Index: SHN_HEXAGON_SCOMMON - Binding: STB_GLOBAL - -# CHECK: symbol 'test' has unsupported value greater than or equal to SHN_LORESERVE: 65280 diff --git a/llvm/test/tools/llvm-objcopy/ELF/mips-symbol.test b/llvm/test/tools/llvm-objcopy/ELF/mips-symbol.test new file mode 100644 index 00000000000000..0e85df135323e3 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/mips-symbol.test @@ -0,0 +1,62 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objcopy %t %t2 +# RUN: llvm-readobj --symbols %t2 | FileCheck %s + +!ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_MIPS +Symbols: + - Name: test_mips_acommon + Index: SHN_MIPS_ACOMMON + Value: 0x1234 + Binding: STB_GLOBAL + - Name: test_mips_scommon + Index: SHN_MIPS_SCOMMON + Value: 0x1237 + Binding: STB_GLOBAL + - Name: test_mips_sundefined + Index: SHN_MIPS_SUNDEFINED + Value: 0x1238 + Binding: STB_GLOBAL + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Local (0x0) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined (0x0) +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: test_mips_acommon +# CHECK-NEXT: Value: 0x1234 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Processor Specific (0xFF00) +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: test_mips_scommon +# CHECK-NEXT: Value: 0x1237 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Processor Specific (0xFF03) +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: test_mips_sundefined +# CHECK-NEXT: Value: 0x1238 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Processor Specific (0xFF04) +# CHECK-NEXT: } +# CHECK-NEXT:] diff --git a/llvm/test/tools/llvm-objcopy/ELF/unsupported-machine-specific-shndx.test b/llvm/test/tools/llvm-objcopy/ELF/unsupported-machine-specific-shndx.test new file mode 100644 index 00000000000000..78494e8b3c30a2 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/unsupported-machine-specific-shndx.test @@ -0,0 +1,17 @@ +# RUN: yaml2obj %s -o %t -DMACHINE=HEXAGON +# RUN: not llvm-objcopy %t %t2 2>&1 >/dev/null | FileCheck %s -DINDEX=65280 +# RUN: yaml2obj %s -o %t -DMACHINE=MIPS +# RUN: not llvm-objcopy %t %t2 2>&1 >/dev/null | FileCheck %s -DINDEX=65283 + +!ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Symbols: + - Name: test + Index: SHN_[[MACHINE]]_SCOMMON + Binding: STB_GLOBAL + +# CHECK: symbol 'test' has unsupported value greater than or equal to SHN_LORESERVE: [[INDEX]] diff --git a/llvm/test/tools/obj2yaml/ELF/special-symbol-indices.yaml b/llvm/test/tools/obj2yaml/ELF/special-symbol-indices.yaml index dab050b90045a3..52b7f4ef881803 100644 --- a/llvm/test/tools/obj2yaml/ELF/special-symbol-indices.yaml +++ b/llvm/test/tools/obj2yaml/ELF/special-symbol-indices.yaml @@ -1,5 +1,7 @@ -# RUN: yaml2obj %s -o %t -# RUN: obj2yaml %t | FileCheck %s +# RUN: yaml2obj %s -o %t -DMACHINE=HEXAGON +# RUN: obj2yaml %t | FileCheck %s -DMACHINE=HEXAGON -DNAME=SCOMMON_1 +# RUN: yaml2obj %s -o %t -DMACHINE=MIPS +# RUN: obj2yaml %t | FileCheck %s -DMACHINE=MIPS -DNAME=TEXT ## Test checks that we are able to handle symbols with special/reserved indices. @@ -15,7 +17,7 @@ # CHECK-NEXT: Section: .text # CHECK-NEXT: Binding: STB_GLOBAL # CHECK-NEXT: - Name: processor_specific_index -# CHECK-NEXT: Index: SHN_HEXAGON_SCOMMON_1 +# CHECK-NEXT: Index: SHN_[[MACHINE]]_[[NAME]] # CHECK-NEXT: Binding: STB_GLOBAL # CHECK-NEXT: - Name: unknown_index # CHECK-NEXT: Index: 0xFFFE @@ -26,7 +28,7 @@ FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB Type: ET_EXEC - Machine: EM_HEXAGON + Machine: EM_[[MACHINE]] Sections: - Name: .text Type: SHT_PROGBITS @@ -42,7 +44,7 @@ Symbols: Index: 0x1 Binding: STB_GLOBAL - Name: processor_specific_index - Index: SHN_HEXAGON_SCOMMON_1 + Index: 0xff01 Binding: STB_GLOBAL - Name: unknown_index Index: 0xfffe From 06fee478d217a9fbd2ba31f92bc595ed327635a5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 May 2022 09:11:11 -0700 Subject: [PATCH 483/908] [X86] Add isSimple check to the load combine in combineExtractVectorElt. I think we need to be sure the load isn't volatile before we duplicate and shrink it. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D126353 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/extractelement-load.ll | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f5c3147ecad431..92f4e012ef8ffa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43520,7 +43520,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, auto *LoadVec = dyn_cast(InputVector); if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() && SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() && - !LikelyUsedAsVector) { + !LikelyUsedAsVector && LoadVec->isSimple()) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue NewPtr = TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx); diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll index 5e1ff78a31c109..738489b3060f56 100644 --- a/llvm/test/CodeGen/X86/extractelement-load.ll +++ b/llvm/test/CodeGen/X86/extractelement-load.ll @@ -342,29 +342,29 @@ define i32 @multi_use_volatile_load_scalarization(<4 x i32>* %p) nounwind { ; X32-SSE2-LABEL: multi_use_volatile_load_scalarization: ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: movl (%ecx), %eax ; X32-SSE2-NEXT: movdqu (%ecx), %xmm0 ; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; X32-SSE2-NEXT: movd %xmm0, %eax ; X32-SSE2-NEXT: psubd %xmm1, %xmm0 ; X32-SSE2-NEXT: movdqa %xmm0, (%ecx) ; X32-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: multi_use_volatile_load_scalarization: ; X64-SSSE3: # %bb.0: -; X64-SSSE3-NEXT: movl (%rdi), %eax ; X64-SSSE3-NEXT: movdqu (%rdi), %xmm0 ; X64-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-SSSE3-NEXT: movd %xmm0, %eax ; X64-SSSE3-NEXT: psubd %xmm1, %xmm0 ; X64-SSSE3-NEXT: movdqa %xmm0, (%rdi) ; X64-SSSE3-NEXT: retq ; ; X64-AVX-LABEL: multi_use_volatile_load_scalarization: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl (%rdi), %eax ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 ; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vmovdqa %xmm0, (%rdi) +; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm1 +; X64-AVX-NEXT: vmovdqa %xmm1, (%rdi) +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: retq %v = load volatile <4 x i32>, <4 x i32>* %p, align 1 %v1 = add <4 x i32> %v, From 172149e98c2aed38e9fca115309e10dc870d09e1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 May 2022 09:16:07 -0700 Subject: [PATCH 484/908] [RISCV] Preserve fast math flags in lowerVPOp. Update test to check MIR after finalize-isel instead of debug output. This is of course not the only place we should preserve FMF, but it's the most obvious one. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D126306 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++-- .../RISCV/pass-fast-math-flags-sdnode.ll | 9 ------- .../RISCV/rvv/pass-fast-math-flags-sdnode.ll | 24 +++++++++++++++++++ 3 files changed, 26 insertions(+), 11 deletions(-) delete mode 100644 llvm/test/CodeGen/RISCV/pass-fast-math-flags-sdnode.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 527753a6e22cbe..49c553af1435c3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6055,11 +6055,11 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, } if (!VT.isFixedLengthVector()) - return DAG.getNode(RISCVISDOpc, DL, VT, Ops); + return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags()); MVT ContainerVT = getContainerForFixedLengthVector(VT); - SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops); + SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags()); return convertFromScalableVector(VT, VPOp, DAG, Subtarget); } diff --git a/llvm/test/CodeGen/RISCV/pass-fast-math-flags-sdnode.ll b/llvm/test/CodeGen/RISCV/pass-fast-math-flags-sdnode.ll deleted file mode 100644 index c057aba4f26a18..00000000000000 --- a/llvm/test/CodeGen/RISCV/pass-fast-math-flags-sdnode.ll +++ /dev/null @@ -1,9 +0,0 @@ -; REQUIRES: asserts -; RUN: llc < %s -mtriple=riscv64 -mattr=+v -debug-only=isel -o /dev/null 2>&1 | FileCheck %s -declare @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) - -define @foo( %x, %y, %z, %m, i32 %vl) { -; CHECK: t14: nxv1f64 = vp_fmul nnan ninf nsz arcp contract afn reassoc t2, t4, t8, t13 - %1 = call fast @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) - ret %1 -} diff --git a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll new file mode 100644 index 00000000000000..0ccbf2d8266728 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -stop-after=finalize-isel | FileCheck %s + +declare @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) + +define @foo( %x, %y, %z, %m, i32 %vl) { + ; CHECK-LABEL: name: foo + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $v8, $v9, $v0, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 32 + ; CHECK-NEXT: [[SRLI:%[0-9]+]]:gprnox0 = SRLI killed [[SLLI]], 32 + ; CHECK-NEXT: $v0 = COPY [[COPY1]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrnov0 = IMPLICIT_DEF + ; CHECK-NEXT: %7:vrnov0 = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFMUL_VV_M1_MASK [[DEF]], [[COPY3]], [[COPY2]], $v0, killed [[SRLI]], 6 /* e64 */, 1, implicit $frm + ; CHECK-NEXT: $v8 = COPY %7 + ; CHECK-NEXT: PseudoRET implicit $v8 + %1 = call fast @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) + ret %1 +} From 18e6b8234a0d0b085ead20d9c26777c824bb4a3d Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Wed, 25 May 2022 16:12:11 +0000 Subject: [PATCH 485/908] Allow pointer types for atomicrmw xchg This adds support for pointer types for `atomic xchg` and let us write instructions such as `atomicrmw xchg i64** %0, i64* %1 seq_cst`. This is similar to the patch for allowing atomicrmw xchg on floating point types: https://reviews.llvm.org/D52416. Differential Revision: https://reviews.llvm.org/D124728 --- llvm/docs/LangRef.rst | 4 ++-- llvm/include/llvm/CodeGen/TargetLowering.h | 3 ++- llvm/lib/AsmParser/LLParser.cpp | 14 +++++++++----- llvm/lib/CodeGen/AtomicExpandPass.cpp | 9 +++++++-- llvm/lib/IR/Verifier.cpp | 3 ++- ...-atomicrmw-xchg-must-be-integer-or-fp-type.ll | 7 ------- llvm/test/Bitcode/compatibility.ll | 6 ++++++ llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll | 9 +++++++++ llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll | 11 +++++++++++ llvm/test/CodeGen/AMDGPU/local-atomics64.ll | 13 +++++++++++++ llvm/test/CodeGen/X86/atomic64.ll | 16 ++++++++++++++++ 11 files changed, 77 insertions(+), 18 deletions(-) delete mode 100644 llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index f5ba612d2bfdab..46549e1175ae64 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -10263,8 +10263,8 @@ operation. The operation must be one of the following keywords: For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. For xchg, this -may also be a floating point type with the same size constraints as -integers. For fadd/fsub, this must be a floating point type. The +may also be a floating point or a pointer type with the same size constraints +as integers. For fadd/fsub, this must be a floating point type. The type of the '````' operand must be a pointer to that type. If the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not allowed to modify the number or order of execution of this diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index a582838ff663e0..8ad6c4d58817fe 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2092,7 +2092,8 @@ class TargetLoweringBase { virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const { if (RMWI->getOperation() == AtomicRMWInst::Xchg && - RMWI->getValOperand()->getType()->isFloatingPointTy()) + (RMWI->getValOperand()->getType()->isFloatingPointTy() || + RMWI->getValOperand()->getType()->isPointerTy())) return AtomicExpansionKind::CastToInteger; return AtomicExpansionKind::None; diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 5076c0bff44dd5..1f8d68cca3dacb 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7436,10 +7436,12 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { if (Operation == AtomicRMWInst::Xchg) { if (!Val->getType()->isIntegerTy() && - !Val->getType()->isFloatingPointTy()) { - return error(ValLoc, - "atomicrmw " + AtomicRMWInst::getOperationName(Operation) + - " operand must be an integer or floating point type"); + !Val->getType()->isFloatingPointTy() && + !Val->getType()->isPointerTy()) { + return error( + ValLoc, + "atomicrmw " + AtomicRMWInst::getOperationName(Operation) + + " operand must be an integer, floating point, or pointer type"); } } else if (IsFP) { if (!Val->getType()->isFloatingPointTy()) { @@ -7455,7 +7457,9 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { } } - unsigned Size = Val->getType()->getPrimitiveSizeInBits(); + unsigned Size = + PFS.getFunction().getParent()->getDataLayout().getTypeStoreSizeInBits( + Val->getType()); if (Size < 8 || (Size & (Size - 1))) return error(ValLoc, "atomicrmw operand must be power-of-two byte-sized" " integer"); diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 2f4eb1f21fbf21..5ce6fbb5f64719 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -387,7 +387,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { Value *Val = RMWI->getValOperand(); Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); - Value *NewVal = Builder.CreateBitCast(Val, NewTy); + Value *NewVal = Val->getType()->isPointerTy() + ? Builder.CreatePtrToInt(Val, NewTy) + : Builder.CreateBitCast(Val, NewTy); auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, @@ -395,7 +397,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { NewRMWI->setVolatile(RMWI->isVolatile()); LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); - Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); + Value *NewRVal = RMWI->getType()->isPointerTy() + ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType()) + : Builder.CreateBitCast(NewRMWI, RMWI->getType()); RMWI->replaceAllUsesWith(NewRVal); RMWI->eraseFromParent(); return NewRMWI; @@ -527,6 +531,7 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Type *OrigTy = NewVal->getType(); // This code can go away when cmpxchg supports FP types. + assert(!OrigTy->isPointerTy()); bool NeedBitcast = OrigTy->isFloatingPointTy(); if (NeedBitcast) { IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 09d47954c05e74..93063a50b39b7e 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3925,7 +3925,8 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) { auto Op = RMWI.getOperation(); Type *ElTy = RMWI.getOperand(1)->getType(); if (Op == AtomicRMWInst::Xchg) { - Check(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), + Check(ElTy->isIntegerTy() || ElTy->isFloatingPointTy() || + ElTy->isPointerTy(), "atomicrmw " + AtomicRMWInst::getOperationName(Op) + " operand must have integer or floating point type!", &RMWI, ElTy); diff --git a/llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll b/llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll deleted file mode 100644 index d0a794bd0244a0..00000000000000 --- a/llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s - -; CHECK: error: atomicrmw xchg operand must be an integer or floating point type -define void @f(i32** %ptr) { - atomicrmw xchg i32** %ptr, i32* null seq_cst - ret void -} diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index ab9c3b1c61ab78..a87e11e0fe0d6e 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -854,6 +854,12 @@ define void @fp_atomics(float* %word) { ret void } +define void @pointer_atomics(i8** %word) { +; CHECK: %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic + %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic + ret void +} + ;; Fast Math Flags define void @fastmathflags_unop(float %op1) { %f.nnan = fneg nnan float %op1 diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll index 29c8a0b1c695cd..de5f806b919406 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -659,6 +659,15 @@ entry: ret void } +; GCN-LABEL: {{^}}atomic_xchg_pointer_offset: +; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @atomic_xchg_pointer_offset(i8** %out, i8* %in) { +entry: + %gep = getelementptr i8*, i8** %out, i32 4 + %val = atomicrmw volatile xchg i8** %gep, i8* %in seq_cst + ret void +} + ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll index a27361e54cd180..a2bd77fe40f8a4 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -794,6 +794,17 @@ entry: ret void } +; GCN-LABEL: {{^}}atomic_xchg_pointer_offset: +; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} + +; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} +define amdgpu_kernel void @atomic_xchg_pointer_offset(i8* addrspace(1)* %out, i8* %in) { +entry: + %gep = getelementptr i8*, i8* addrspace(1)* %out, i64 4 + %tmp0 = atomicrmw volatile xchg i8* addrspace(1)* %gep, i8* %in seq_cst + ret void +} + ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: ; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; CIVI: buffer_store_dwordx2 [[RET]] diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics64.ll b/llvm/test/CodeGen/AMDGPU/local-atomics64.ll index 027c441a0e3726..5c67b299cf134d 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomics64.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomics64.ll @@ -40,6 +40,19 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(double addrspace(1)* % ret void } +; GCN-LABEL: {{^}}lds_atomic_xchg_ret_pointer_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_xchg_ret_pointer_offset(i8* addrspace(1)* %out, i8* addrspace(3)* %ptr) nounwind { + %gep = getelementptr i8*, i8* addrspace(3)* %ptr, i32 4 + %result = atomicrmw xchg i8* addrspace(3)* %gep, i8* null seq_cst + store i8* %result, i8* addrspace(1)* %out, align 8 + ret void +} + ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64: ; SICIVI: s_mov_b32 m0 ; GFX9-NOT: m0 diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll index 040845322857c9..19899bf191f52d 100644 --- a/llvm/test/CodeGen/X86/atomic64.ll +++ b/llvm/test/CodeGen/X86/atomic64.ll @@ -4,6 +4,7 @@ @sc64 = external dso_local global i64 @fsc64 = external dso_local global double +@psc64 = external dso_local global i8* define void @atomic_fetch_add64() nounwind { ; X64-LABEL: atomic_fetch_add64: @@ -780,3 +781,18 @@ define void @atomic_fetch_swapf64(double %x) nounwind { %t1 = atomicrmw xchg double* @fsc64, double %x acquire ret void } + +define void @atomic_fetch_swapptr(i8* %x) nounwind { +; X64-LABEL: atomic_fetch_swapptr: +; X64: # %bb.0: +; X64-NEXT: xchgq %rdi, psc64(%rip) +; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_swapptr: +; I486: # %bb.0: +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: xchgl %eax, psc64 +; I486-NEXT: retl + %t1 = atomicrmw xchg i8** @psc64, i8* %x acquire + ret void +} From 2a28467e5389c4d741d1825fadd39ae84ecaa5dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Tue, 10 May 2022 10:10:36 -0500 Subject: [PATCH 486/908] AMDGPU/GISel: Factor out AMDGPURegisterBankInfo::buildReadFirstLane A later change will add a 3rd user, so factoring out the common code seems useful. Reorganizing the executeInWaterfallLoop causes some more COPYs to be generated, but those all fold away during instruction selection. Generating the comparisons uses generic instructions over machine instructions now which admittedly shouldn't make a difference (though it should make it easier to move the waterfall loop generation to another place). Differential Revision: https://reviews.llvm.org/D125324 --- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 262 +-- .../Target/AMDGPU/AMDGPURegisterBankInfo.h | 3 + .../GlobalISel/image-waterfall-loop-O0.ll | 190 +- .../GlobalISel/llvm.amdgcn.intersect_ray.ll | 32 +- .../llvm.amdgcn.raw.buffer.atomic.add.ll | 68 +- .../llvm.amdgcn.raw.buffer.atomic.cmpswap.ll | 78 +- .../llvm.amdgcn.raw.buffer.atomic.fadd.ll | 136 +- .../llvm.amdgcn.raw.buffer.load.format.f16.ll | 68 +- .../llvm.amdgcn.raw.buffer.load.format.ll | 34 +- .../GlobalISel/llvm.amdgcn.raw.buffer.load.ll | 186 +- ...llvm.amdgcn.raw.buffer.store.format.f16.ll | 124 +- ...llvm.amdgcn.raw.buffer.store.format.f32.ll | 62 +- .../llvm.amdgcn.raw.buffer.store.ll | 163 +- .../llvm.amdgcn.raw.tbuffer.load.f16.ll | 68 +- .../llvm.amdgcn.raw.tbuffer.load.ll | 34 +- .../llvm.amdgcn.raw.tbuffer.store.f16.ll | 196 +- .../llvm.amdgcn.raw.tbuffer.store.i8.ll | 196 +- .../llvm.amdgcn.raw.tbuffer.store.ll | 158 +- .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 1836 +++++++++-------- .../llvm.amdgcn.struct.buffer.atomic.add.ll | 72 +- ...lvm.amdgcn.struct.buffer.atomic.cmpswap.ll | 82 +- .../llvm.amdgcn.struct.buffer.atomic.fadd.ll | 140 +- ...vm.amdgcn.struct.buffer.load.format.f16.ll | 112 +- .../llvm.amdgcn.struct.buffer.load.format.ll | 53 +- .../llvm.amdgcn.struct.buffer.load.ll | 37 +- ...m.amdgcn.struct.buffer.store.format.f16.ll | 72 +- ...m.amdgcn.struct.buffer.store.format.f32.ll | 36 +- .../llvm.amdgcn.struct.buffer.store.ll | 37 +- .../llvm.amdgcn.struct.tbuffer.load.f16.ll | 120 +- .../llvm.amdgcn.struct.tbuffer.load.ll | 58 +- .../regbankselect-amdgcn-s-buffer-load.mir | 46 +- .../regbankselect-amdgcn.image.load.1d.ll | 164 +- .../regbankselect-amdgcn.image.sample.1d.ll | 254 ++- .../regbankselect-amdgcn.mfma.gfx90a.mir | 154 +- .../regbankselect-amdgcn.mfma.gfx940.mir | 244 ++- .../regbankselect-amdgcn.raw.buffer.load.ll | 59 +- .../regbankselect-amdgcn.s.buffer.load.ll | 804 ++++---- .../regbankselect-amdgcn.s.buffer.load.mir | 78 +- ...regbankselect-amdgcn.struct.buffer.load.ll | 59 +- ...egbankselect-amdgcn.struct.buffer.store.ll | 59 +- .../regbankselect-amdgcn.update.dpp.mir | 36 +- .../regbankselect-waterfall-agpr.mir | 48 +- 42 files changed, 3566 insertions(+), 3152 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 4f44827e112e08..6cac8d98ec7aa4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -678,6 +678,62 @@ static LLT getHalfSizedType(LLT Ty) { return LLT::scalar(Ty.getScalarSizeInBits() / 2); } +// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector +// source value into a scalar register. +Register AMDGPURegisterBankInfo::buildReadFirstLane(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Src) const { + LLT Ty = MRI.getType(Src); + const RegisterBank *Bank = getRegBank(Src, MRI, *TRI); + + if (Bank == &AMDGPU::SGPRRegBank) + return Src; + + unsigned Bits = Ty.getSizeInBits(); + assert(Bits % 32 == 0); + + if (Bank != &AMDGPU::VGPRRegBank) { + // We need to copy from AGPR to VGPR + Src = B.buildCopy(Ty, Src).getReg(0); + MRI.setRegBank(Src, AMDGPU::VGPRRegBank); + } + + LLT S32 = LLT::scalar(32); + unsigned NumParts = Bits / 32; + SmallVector SrcParts; + SmallVector DstParts; + + if (Bits == 32) { + SrcParts.push_back(Src); + } else { + auto Unmerge = B.buildUnmerge(S32, Src); + for (unsigned i = 0; i < NumParts; ++i) + SrcParts.push_back(Unmerge.getReg(i)); + } + + for (unsigned i = 0; i < NumParts; ++i) { + Register SrcPart = SrcParts[i]; + Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + MRI.setType(DstPart, NumParts == 1 ? Ty : S32); + + const TargetRegisterClass *Constrained = + constrainGenericRegister(SrcPart, AMDGPU::VGPR_32RegClass, MRI); + (void)Constrained; + assert(Constrained && "Failed to constrain readfirstlane src reg"); + + B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart}); + + DstParts.push_back(DstPart); + } + + if (Bits == 32) + return DstParts[0]; + + Register Dst = B.buildMerge(Ty, DstParts).getReg(0); + MRI.setRegBank(Dst, AMDGPU::SGPRRegBank); + return Dst; +} + /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If /// any of the required SGPR operands are VGPRs, perform a waterfall loop to /// execute the instruction for each unique combination of values in all lanes @@ -710,8 +766,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( MachineFunction *MF = &B.getMF(); const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass(); - const unsigned WaveAndOpc = Subtarget.isWave32() ? - AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; const unsigned MovExecOpc = Subtarget.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; const unsigned MovExecTermOpc = @@ -783,9 +837,9 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( MachineBasicBlock::iterator NewBegin = FirstInst.getIterator(); auto NewEnd = BodyBB->end(); - MachineBasicBlock::iterator I = LoopBB->end(); B.setMBB(*LoopBB); + LLT S1 = LLT::scalar(1); Register CondReg; assert(std::distance(NewBegin, NewEnd) == OrigRangeSize); @@ -819,161 +873,59 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( B.setMBB(*LoopBB); } - unsigned OpSize = OpTy.getSizeInBits(); + Register CurrentLaneReg = buildReadFirstLane(B, MRI, OpReg); - // Can only do a readlane of 32-bit pieces. - if (OpSize == 32) { - // Avoid extra copies in the simple case of one 32-bit register. - Register CurrentLaneOpReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - MRI.setType(CurrentLaneOpReg, OpTy); - - constrainGenericRegister(OpReg, AMDGPU::VGPR_32RegClass, MRI); - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpReg) - .addReg(OpReg); - - Register NewCondReg = MRI.createVirtualRegister(WaveRC); - bool First = CondReg == AMDGPU::NoRegister; - if (First) - CondReg = NewCondReg; - - // Compare the just read M0 value to all possible Idx values. - B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) - .addDef(NewCondReg) - .addReg(CurrentLaneOpReg) - .addReg(OpReg); - Op.setReg(CurrentLaneOpReg); - - if (!First) { - Register AndReg = MRI.createVirtualRegister(WaveRC); - - // If there are multiple operands to consider, and the conditions. - B.buildInstr(WaveAndOpc) - .addDef(AndReg) - .addReg(NewCondReg) - .addReg(CondReg); - CondReg = AndReg; - } + // Build the comparison(s). + unsigned OpSize = OpTy.getSizeInBits(); + bool Is64 = OpSize % 64 == 0; + unsigned PartSize = Is64 ? 64 : 32; + LLT PartTy = LLT::scalar(PartSize); + unsigned NumParts = OpSize / PartSize; + SmallVector OpParts; + SmallVector CurrentLaneParts; + + if (NumParts == 1) { + OpParts.push_back(OpReg); + CurrentLaneParts.push_back(CurrentLaneReg); } else { - LLT S32 = LLT::scalar(32); - SmallVector ReadlanePieces; - - // The compares can be done as 64-bit, but the extract needs to be done - // in 32-bit pieces. - - bool Is64 = OpSize % 64 == 0; - - unsigned UnmergeTySize = Is64 ? 64 : 32; - unsigned CmpOp = - Is64 ? AMDGPU::V_CMP_EQ_U64_e64 : AMDGPU::V_CMP_EQ_U32_e64; - - // Insert the unmerge before the loop. - - B.setMBB(MBB); - unsigned NumPieces = OpSize / UnmergeTySize; - SmallVector UnmergePieces; - if (NumPieces == 1) { - UnmergePieces.push_back(OpReg); - } else { - LLT UnmergeTy = LLT::scalar(UnmergeTySize); - MachineInstrBuilder Unmerge = B.buildUnmerge(UnmergeTy, OpReg); - for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) - UnmergePieces.push_back(Unmerge.getReg(PieceIdx)); + auto UnmergeOp = B.buildUnmerge(PartTy, OpReg); + auto UnmergeCurrentLane = B.buildUnmerge(PartTy, CurrentLaneReg); + for (unsigned i = 0; i < NumParts; ++i) { + OpParts.push_back(UnmergeOp.getReg(i)); + CurrentLaneParts.push_back(UnmergeCurrentLane.getReg(i)); + MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank); + MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank); } - B.setMBB(*LoopBB); - - for (Register UnmergePiece : UnmergePieces) { - Register CurrentLaneOpReg; - if (Is64) { - Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32); - Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32); - - MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass); - MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass); - MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegLo) - .addReg(UnmergePiece, 0, AMDGPU::sub0); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegHi) - .addReg(UnmergePiece, 0, AMDGPU::sub1); - - CurrentLaneOpReg = - B.buildMerge(LLT::scalar(64), - {CurrentLaneOpRegLo, CurrentLaneOpRegHi}) - .getReg(0); - - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass); - - if (OpTy.getScalarSizeInBits() == 64) { - // If we need to produce a 64-bit element vector, so use the - // merged pieces - ReadlanePieces.push_back(CurrentLaneOpReg); - } else { - // 32-bit element type. - ReadlanePieces.push_back(CurrentLaneOpRegLo); - ReadlanePieces.push_back(CurrentLaneOpRegHi); - } - } else { - CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32); - MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass); - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpReg) - .addReg(UnmergePiece); - ReadlanePieces.push_back(CurrentLaneOpReg); - } + } - Register NewCondReg = MRI.createVirtualRegister(WaveRC); - bool First = CondReg == AMDGPU::NoRegister; - if (First) - CondReg = NewCondReg; - - B.buildInstr(CmpOp) - .addDef(NewCondReg) - .addReg(CurrentLaneOpReg) - .addReg(UnmergePiece); - - if (!First) { - Register AndReg = MRI.createVirtualRegister(WaveRC); - - // If there are multiple operands to consider, and the conditions. - B.buildInstr(WaveAndOpc) - .addDef(AndReg) - .addReg(NewCondReg) - .addReg(CondReg); - CondReg = AndReg; - } - } + for (unsigned i = 0; i < NumParts; ++i) { + auto CmpReg = B.buildICmp(CmpInst::ICMP_EQ, S1, CurrentLaneParts[i], + OpParts[i]).getReg(0); + MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank); - // FIXME: Build merge seems to switch to CONCAT_VECTORS but not - // BUILD_VECTOR - if (OpTy.isVector()) { - auto Merge = B.buildBuildVector(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); - MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank); - } else if (ReadlanePieces.size() > 1) { - auto Merge = B.buildMerge(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); - MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank); + if (!CondReg) { + CondReg = CmpReg; } else { - Op.setReg(ReadlanePieces[0]); + CondReg = B.buildAnd(S1, CondReg, CmpReg).getReg(0); + MRI.setRegBank(CondReg, AMDGPU::VCCRegBank); } } + Op.setReg(CurrentLaneReg); + // Make sure we don't re-process this register again. WaterfalledRegMap.insert(std::make_pair(OldReg, Op.getReg())); } } + // The ballot becomes a no-op during instruction selection. + CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot, + {LLT::scalar(Subtarget.isWave32() ? 32 : 64)}, + false) + .addReg(CondReg) + .getReg(0); + MRI.setRegClass(CondReg, WaveRC); + // Update EXEC, save the original EXEC value to VCC. B.buildInstr(AndSaveExecOpc) .addDef(NewExec) @@ -1061,28 +1013,10 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane( if (Bank == &AMDGPU::SGPRRegBank) return; - LLT Ty = MRI.getType(Reg); MachineIRBuilder B(MI); - if (Bank != &AMDGPU::VGPRRegBank) { - // We need to copy from AGPR to VGPR - Reg = B.buildCopy(Ty, Reg).getReg(0); - MRI.setRegBank(Reg, AMDGPU::VGPRRegBank); - } - - Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - B.buildInstr(AMDGPU::V_READFIRSTLANE_B32) - .addDef(SGPR) - .addReg(Reg); - - MRI.setType(SGPR, Ty); - - const TargetRegisterClass *Constrained = - constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI); - (void)Constrained; - assert(Constrained && "Failed to constrain readfirstlane src reg"); - - MI.getOperand(OpIdx).setReg(SGPR); + Reg = buildReadFirstLane(B, MRI, Reg); + MI.getOperand(OpIdx).setReg(Reg); } /// Split \p Ty into 2 pieces. The first will have \p FirstSize bits, and the diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 736b77a5e9be9d..4079f34d18b391 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -59,6 +59,9 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo { SmallSet &SGPROperandRegs, MachineRegisterInfo &MRI) const; + Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Src) const; + bool executeInWaterfallLoop(MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index b4bc8da9c00934..a6be8956dbcd71 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -8,7 +8,7 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: v_mov_b32_e32 v14, v1 ; CHECK-NEXT: v_mov_b32_e32 v13, v2 @@ -25,132 +25,128 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_mov_b32_e32 v5, v10 ; CHECK-NEXT: v_mov_b32_e32 v6, v9 ; CHECK-NEXT: v_mov_b32_e32 v7, v8 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s4, s8 ; CHECK-NEXT: s_mov_b32 s5, s8 ; CHECK-NEXT: s_mov_b32 s6, s8 ; CHECK-NEXT: s_mov_b32 s7, s8 -; CHECK-NEXT: v_writelane_b32 v15, s4, 0 -; CHECK-NEXT: v_writelane_b32 v15, s5, 1 -; CHECK-NEXT: v_writelane_b32 v15, s6, 2 -; CHECK-NEXT: v_writelane_b32 v15, s7, 3 +; CHECK-NEXT: v_writelane_b32 v16, s4, 0 +; CHECK-NEXT: v_writelane_b32 v16, s5, 1 +; CHECK-NEXT: v_writelane_b32 v16, s6, 2 +; CHECK-NEXT: v_writelane_b32 v16, s7, 3 ; CHECK-NEXT: s_mov_b32 s6, 0 ; CHECK-NEXT: s_mov_b32 s4, s6 ; CHECK-NEXT: s_mov_b32 s5, s6 -; CHECK-NEXT: v_mov_b32_e32 v9, s5 -; CHECK-NEXT: v_mov_b32_e32 v8, s4 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v9, v1 -; CHECK-NEXT: v_mov_b32_e32 v8, v0 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v9, v3 -; CHECK-NEXT: v_mov_b32_e32 v8, v2 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v9, v5 -; CHECK-NEXT: v_mov_b32_e32 v8, v4 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v0, v6 -; CHECK-NEXT: v_mov_b32_e32 v1, v7 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, s5 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s4, exec_lo -; CHECK-NEXT: v_writelane_b32 v15, s4, 4 +; CHECK-NEXT: v_writelane_b32 v16, s4, 4 ; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s8, v6 -; CHECK-NEXT: v_readfirstlane_b32 s19, v7 -; CHECK-NEXT: s_mov_b32 s4, s8 -; CHECK-NEXT: s_mov_b32 s5, s19 -; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[4:5], v[6:7] -; CHECK-NEXT: v_readfirstlane_b32 s18, v4 -; CHECK-NEXT: v_readfirstlane_b32 s17, v5 -; CHECK-NEXT: s_mov_b32 s6, s18 -; CHECK-NEXT: s_mov_b32 s7, s17 -; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[6:7], v[4:5] -; CHECK-NEXT: s_and_b32 s5, s4, s5 -; CHECK-NEXT: v_readfirstlane_b32 s16, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, v8 +; CHECK-NEXT: v_mov_b32_e32 v6, v9 +; CHECK-NEXT: v_mov_b32_e32 v5, v10 +; CHECK-NEXT: v_mov_b32_e32 v4, v11 +; CHECK-NEXT: v_mov_b32_e32 v3, v12 +; CHECK-NEXT: v_mov_b32_e32 v2, v13 +; CHECK-NEXT: v_mov_b32_e32 v1, v14 +; CHECK-NEXT: v_mov_b32_e32 v0, v15 +; CHECK-NEXT: v_readfirstlane_b32 s12, v7 +; CHECK-NEXT: v_readfirstlane_b32 s10, v6 +; CHECK-NEXT: v_readfirstlane_b32 s9, v5 +; CHECK-NEXT: v_readfirstlane_b32 s8, v4 ; CHECK-NEXT: v_readfirstlane_b32 s7, v3 -; CHECK-NEXT: s_mov_b32 s10, s16 -; CHECK-NEXT: s_mov_b32 s11, s7 -; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; CHECK-NEXT: s_and_b32 s9, s4, s5 -; CHECK-NEXT: v_readfirstlane_b32 s6, v0 +; CHECK-NEXT: v_readfirstlane_b32 s6, v2 ; CHECK-NEXT: v_readfirstlane_b32 s5, v1 -; CHECK-NEXT: s_mov_b32 s10, s6 -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[0:1] -; CHECK-NEXT: s_and_b32 s4, s4, s9 -; CHECK-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 -; CHECK-NEXT: s_mov_b32 s9, s19 -; CHECK-NEXT: s_mov_b32 s10, s18 -; CHECK-NEXT: s_mov_b32 s11, s17 -; CHECK-NEXT: s_mov_b32 s12, s16 -; CHECK-NEXT: s_mov_b32 s13, s7 -; CHECK-NEXT: s_mov_b32 s14, s6 -; CHECK-NEXT: s_mov_b32 s15, s5 -; CHECK-NEXT: v_writelane_b32 v15, s8, 5 -; CHECK-NEXT: v_writelane_b32 v15, s9, 6 -; CHECK-NEXT: v_writelane_b32 v15, s10, 7 -; CHECK-NEXT: v_writelane_b32 v15, s11, 8 -; CHECK-NEXT: v_writelane_b32 v15, s12, 9 -; CHECK-NEXT: v_writelane_b32 v15, s13, 10 -; CHECK-NEXT: v_writelane_b32 v15, s14, 11 -; CHECK-NEXT: v_writelane_b32 v15, s15, 12 +; CHECK-NEXT: v_readfirstlane_b32 s4, v0 +; CHECK-NEXT: ; kill: def $sgpr12 killed $sgpr12 def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 +; CHECK-NEXT: s_mov_b32 s13, s10 +; CHECK-NEXT: s_mov_b32 s14, s9 +; CHECK-NEXT: s_mov_b32 s15, s8 +; CHECK-NEXT: s_mov_b32 s16, s7 +; CHECK-NEXT: s_mov_b32 s17, s6 +; CHECK-NEXT: s_mov_b32 s18, s5 +; CHECK-NEXT: s_mov_b32 s19, s4 +; CHECK-NEXT: v_writelane_b32 v16, s12, 5 +; CHECK-NEXT: v_writelane_b32 v16, s13, 6 +; CHECK-NEXT: v_writelane_b32 v16, s14, 7 +; CHECK-NEXT: v_writelane_b32 v16, s15, 8 +; CHECK-NEXT: v_writelane_b32 v16, s16, 9 +; CHECK-NEXT: v_writelane_b32 v16, s17, 10 +; CHECK-NEXT: v_writelane_b32 v16, s18, 11 +; CHECK-NEXT: v_writelane_b32 v16, s19, 12 +; CHECK-NEXT: v_mov_b32_e32 v6, v8 +; CHECK-NEXT: v_mov_b32_e32 v7, v9 +; CHECK-NEXT: v_mov_b32_e32 v4, v10 +; CHECK-NEXT: v_mov_b32_e32 v5, v11 +; CHECK-NEXT: v_mov_b32_e32 v2, v12 +; CHECK-NEXT: v_mov_b32_e32 v3, v13 +; CHECK-NEXT: v_mov_b32_e32 v0, v14 +; CHECK-NEXT: v_mov_b32_e32 v1, v15 +; CHECK-NEXT: s_mov_b64 s[4:5], s[12:13] +; CHECK-NEXT: s_mov_b64 s[10:11], s[14:15] +; CHECK-NEXT: s_mov_b64 s[8:9], s[16:17] +; CHECK-NEXT: s_mov_b64 s[6:7], s[18:19] +; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[4:5], v[6:7] +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[10:11], v[4:5] +; CHECK-NEXT: s_and_b32 s4, s4, s5 +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[8:9], v[2:3] +; CHECK-NEXT: s_and_b32 s4, s4, s5 +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[0:1] +; CHECK-NEXT: s_and_b32 s4, s4, s5 ; CHECK-NEXT: s_and_saveexec_b32 s4, s4 -; CHECK-NEXT: v_writelane_b32 v15, s4, 13 +; CHECK-NEXT: v_writelane_b32 v16, s4, 13 ; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: v_readlane_b32 s4, v15, 13 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s8, v15, 5 -; CHECK-NEXT: v_readlane_b32 s9, v15, 6 -; CHECK-NEXT: v_readlane_b32 s10, v15, 7 -; CHECK-NEXT: v_readlane_b32 s11, v15, 8 -; CHECK-NEXT: v_readlane_b32 s12, v15, 9 -; CHECK-NEXT: v_readlane_b32 s13, v15, 10 -; CHECK-NEXT: v_readlane_b32 s14, v15, 11 -; CHECK-NEXT: v_readlane_b32 s15, v15, 12 -; CHECK-NEXT: v_readlane_b32 s16, v15, 0 -; CHECK-NEXT: v_readlane_b32 s17, v15, 1 -; CHECK-NEXT: v_readlane_b32 s18, v15, 2 -; CHECK-NEXT: v_readlane_b32 s19, v15, 3 +; CHECK-NEXT: v_readlane_b32 s4, v16, 13 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s8, v16, 5 +; CHECK-NEXT: v_readlane_b32 s9, v16, 6 +; CHECK-NEXT: v_readlane_b32 s10, v16, 7 +; CHECK-NEXT: v_readlane_b32 s11, v16, 8 +; CHECK-NEXT: v_readlane_b32 s12, v16, 9 +; CHECK-NEXT: v_readlane_b32 s13, v16, 10 +; CHECK-NEXT: v_readlane_b32 s14, v16, 11 +; CHECK-NEXT: v_readlane_b32 s15, v16, 12 +; CHECK-NEXT: v_readlane_b32 s16, v16, 0 +; CHECK-NEXT: v_readlane_b32 s17, v16, 1 +; CHECK-NEXT: v_readlane_b32 s18, v16, 2 +; CHECK-NEXT: v_readlane_b32 s19, v16, 3 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; CHECK-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; CHECK-NEXT: s_cbranch_execnz .LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: v_readlane_b32 s4, v15, 4 +; CHECK-NEXT: v_readlane_b32 s4, v16, 4 ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, s4 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll index 80bc1114de8846..a4fdbd908f4693 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -138,10 +138,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; GFX1030-NEXT: v_readfirstlane_b32 s7, v14 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] -; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:30], s[4:7] -; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12 +; GFX1030-NEXT: ; implicit-def: $vgpr11 ; GFX1030-NEXT: ; implicit-def: $vgpr15 ; GFX1030-NEXT: ; implicit-def: $vgpr16 ; GFX1030-NEXT: ; implicit-def: $vgpr17 @@ -175,10 +175,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] -; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[20:23], v[0:15], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17 +; GFX1013-NEXT: ; implicit-def: $vgpr16 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -221,10 +221,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1030-NEXT: v_readfirstlane_b32 s7, v12 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] -; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[13:20], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10 +; GFX1030-NEXT: ; implicit-def: $vgpr9 ; GFX1030-NEXT: ; implicit-def: $vgpr13 ; GFX1030-NEXT: ; implicit-def: $vgpr14 ; GFX1030-NEXT: ; implicit-def: $vgpr15 @@ -259,10 +259,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1013-NEXT: v_readfirstlane_b32 s7, v12 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] -; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[13:16], v[0:7], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10 +; GFX1013-NEXT: ; implicit-def: $vgpr9 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -304,10 +304,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; GFX1030-NEXT: v_readfirstlane_b32 s7, v15 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] -; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:31], s[4:7] -; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13 +; GFX1030-NEXT: ; implicit-def: $vgpr12 ; GFX1030-NEXT: ; implicit-def: $vgpr16 ; GFX1030-NEXT: ; implicit-def: $vgpr17 ; GFX1030-NEXT: ; implicit-def: $vgpr18 @@ -342,10 +342,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] -; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17 +; GFX1013-NEXT: ; implicit-def: $vgpr16 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -389,10 +389,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1030-NEXT: v_readfirstlane_b32 s7, v13 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] -; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:29], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX1030-NEXT: ; implicit-def: $vgpr10 ; GFX1030-NEXT: ; implicit-def: $vgpr14 ; GFX1030-NEXT: ; implicit-def: $vgpr15 ; GFX1030-NEXT: ; implicit-def: $vgpr16 @@ -432,10 +432,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] -; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17 +; GFX1013-NEXT: ; implicit-def: $vgpr16 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll index 2f9a6e6a17ed94..b1f7c98205f08a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -108,32 +108,36 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -167,32 +171,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll index c008e0a5bb9977..50fb684a590a7f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -68,34 +68,38 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -105,7 +109,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY13]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY19]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -131,34 +135,38 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll index afc01b3378bde6..9dca872cdea603 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -150,32 +150,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -201,32 +205,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -256,32 +264,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -305,32 +317,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll index 14c05ec94ce743..cae772b032424b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll @@ -158,32 +158,36 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -208,32 +212,36 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll index 0441368d9905e9..5ab03c10cbd14e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll @@ -108,32 +108,36 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll index 93d807a8d269d3..4cc3e555c7d28a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -57,29 +57,33 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -109,32 +113,36 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -466,29 +474,33 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -518,29 +530,33 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -797,29 +813,33 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -852,30 +872,34 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll index 0e996101cbb7ea..93ce8bdacf9e74 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -167,29 +167,33 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -215,29 +219,33 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -465,36 +473,40 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: %13:vgpr_32, dead %49:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; UNPACKED-NEXT: %13:vgpr_32, dead %54:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -522,30 +534,34 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: %13:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll index b496be3c576395..d27818afd62bdf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -123,29 +123,33 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -293,30 +297,34 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %15:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: %15:vgpr_32, dead %40:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll index 5dfc64fa1b2306..939b932f5b2776 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -59,29 +59,33 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -116,9 +120,10 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[COPY7]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -153,32 +158,36 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -507,29 +516,33 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -763,30 +776,34 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -818,29 +835,33 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll index 75bdd845394bf2..9fe1fbd91a2a2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll @@ -156,32 +156,36 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -206,32 +210,36 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll index 00e31dbe0622a7..1ffbf31ae52b94 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -107,32 +107,36 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll index 36fa2bf0255d76..ce4e7fd5951928 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll @@ -135,29 +135,33 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -181,29 +185,33 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -233,32 +241,36 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -282,32 +294,36 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -338,32 +354,36 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -388,32 +408,36 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll index 12a4396f62cfc9..9b4656b46dabd3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll @@ -50,29 +50,33 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -96,29 +100,33 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -148,32 +156,36 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -197,32 +209,36 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -253,32 +269,36 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -303,32 +323,36 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll index 026c600b2a8bb8..bab3c26716b21f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -127,29 +127,33 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -179,32 +183,36 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -235,32 +243,36 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -597,29 +609,33 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -653,29 +669,33 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index fb4d8a8faf3b92..d7e8d2a2730fe1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -2696,27 +2696,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2738,27 +2742,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2780,27 +2788,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2826,27 +2838,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2866,27 +2882,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2906,27 +2926,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2957,27 +2981,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3001,27 +3029,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3045,27 +3077,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3092,27 +3128,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3132,27 +3172,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3172,27 +3216,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3220,27 +3268,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3262,27 +3314,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3302,27 +3358,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3349,28 +3409,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3378,23 +3442,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX7: bb.1 (%ir-block.0): @@ -3406,28 +3470,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3435,23 +3503,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX8: bb.1 (%ir-block.0): @@ -3463,28 +3531,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3492,23 +3564,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3532,28 +3604,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3561,23 +3637,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX7: bb.1 (%ir-block.0): @@ -3593,28 +3669,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3622,23 +3702,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX8: bb.1 (%ir-block.0): @@ -3654,28 +3734,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3683,23 +3767,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3721,28 +3805,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3750,23 +3838,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX7: bb.1 (%ir-block.0): @@ -3782,28 +3870,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3811,23 +3903,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX8: bb.1 (%ir-block.0): @@ -3843,28 +3935,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3872,23 +3968,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3907,28 +4003,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3936,23 +4036,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): @@ -3965,28 +4065,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3994,23 +4098,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: bb.1 (%ir-block.0): @@ -4023,28 +4127,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4052,23 +4160,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4087,28 +4195,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4116,23 +4228,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): @@ -4145,28 +4257,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4174,23 +4290,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: bb.1 (%ir-block.0): @@ -4203,28 +4319,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4232,23 +4352,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4267,28 +4387,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4296,23 +4420,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): @@ -4325,28 +4449,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4354,23 +4482,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: bb.1 (%ir-block.0): @@ -4383,28 +4511,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4412,23 +4544,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4446,28 +4578,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4475,23 +4611,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY19]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX7: bb.1 (%ir-block.0): @@ -4503,28 +4639,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4532,23 +4672,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY19]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX8: bb.1 (%ir-block.0): @@ -4560,28 +4700,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4589,23 +4733,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY19]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll index c9884689e30c0d..6b6d95d1c6b015 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -118,33 +118,37 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -180,33 +184,37 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll index 65ca08b45d6784..0079a880ba6eb8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -75,35 +75,39 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -113,7 +117,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY15]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY21]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -141,35 +145,39 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll index 8ee34c3da49459..53a9c831e5e291 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -164,33 +164,37 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -218,33 +222,37 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -276,32 +284,36 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -327,32 +339,36 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll index 849523e11c50fc..19bff0f4d614dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll @@ -171,33 +171,37 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -207,25 +211,25 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec + ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY18]], implicit $exec - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY19]], implicit $exec - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY20]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec + ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec + ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] @@ -245,33 +249,37 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -281,10 +289,10 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY11]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY12]] + ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] + ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll index a7fab9821eebf0..54bd85b95f2a7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -118,33 +117,37 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -154,14 +157,14 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY11]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY12]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY13]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY14]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll index 93927318961f2b..9623fa14eccebe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; Natural mapping define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { @@ -189,33 +188,37 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll index 54fc5dcd476d89..5f0087912add31 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll @@ -150,33 +150,37 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -204,33 +208,37 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll index 72ebb811fc8501..31b8977e1513f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll @@ -112,33 +112,37 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll index bd6fad5e071de1..dd5265a7b68e4c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; Natural mapping @@ -118,33 +117,37 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY21]], [[COPY19]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll index a75d2928465f3b..bba5cebc0ca320 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=UNPACKED %s define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; PACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -211,33 +211,37 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -247,10 +251,10 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY11]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY12]] + ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] + ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): @@ -267,33 +271,37 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -303,25 +311,25 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec + ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY18]], implicit $exec - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY19]], implicit $exec - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY20]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec + ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec + ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll index e3cea6fa10169d..3f6a03426aeaf1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK %s define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -140,33 +140,37 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -176,14 +180,14 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY11]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY12]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY13]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY14]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir index 80383c7b2280b4..484c1beebea858 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir @@ -62,24 +62,25 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) @@ -117,24 +118,25 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %8, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll index 0a1a0438e6770a..3e4b0d4fa1e1d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -113,34 +113,33 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -175,34 +174,33 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -245,34 +243,33 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -308,34 +305,33 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll index a0ed8d7ddecb31..8f62f75b97c6fb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -134,34 +134,33 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -200,34 +199,33 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -273,24 +271,25 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; FAST-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -329,24 +328,25 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -392,46 +392,45 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[UV16:%[0-9]+]]:vgpr_32(s32), [[UV17:%[0-9]+]]:vgpr_32(s32), [[UV18:%[0-9]+]]:vgpr_32(s32), [[UV19:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV16]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV17]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV18]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV19]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV20:%[0-9]+]]:vgpr(s64), [[UV21:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; FAST-NEXT: [[UV22:%[0-9]+]]:sgpr(s64), [[UV23:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x s32>) + ; FAST-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV22]](s64), [[UV20]] + ; FAST-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]] + ; FAST-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]] + ; FAST-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -470,46 +469,45 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[UV16:%[0-9]+]]:vgpr_32(s32), [[UV17:%[0-9]+]]:vgpr_32(s32), [[UV18:%[0-9]+]]:vgpr_32(s32), [[UV19:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV16]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV17]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV18]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV19]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV20:%[0-9]+]]:vgpr(s64), [[UV21:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[UV22:%[0-9]+]]:sgpr(s64), [[UV23:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV22]](s64), [[UV20]] + ; GREEDY-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]] + ; GREEDY-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]] + ; GREEDY-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir index 8ea58fac3aba06..1e05a4b8460999 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir @@ -12,18 +12,20 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) ; GREEDY-LABEL: name: mfma_f32_32x32x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 @@ -41,18 +43,20 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: mfma_f32_16x16x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -70,18 +74,20 @@ body: | ; FAST-LABEL: name: mfma_f32_4x4x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_f32_4x4x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -99,18 +105,20 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x8bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) ; GREEDY-LABEL: name: mfma_f32_32x32x8bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 @@ -128,18 +136,20 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x16bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_f32_16x16x16bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -157,18 +167,20 @@ body: | ; FAST-LABEL: name: mfma_f64_16x16x4f64_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FAST: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) ; GREEDY-LABEL: name: mfma_f64_16x16x4f64_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 @@ -186,18 +198,20 @@ body: | ; FAST-LABEL: name: mfma_f64_4x4x4f64_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 - ; FAST: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) ; GREEDY-LABEL: name: mfma_f64_4x4x4f64_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $agpr0_agpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir index 5f9822cf58b92a..22d5fe9911e393 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir @@ -12,18 +12,20 @@ body: | ; FAST-LABEL: name: mfma_i32_16x16x32_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_i32_16x16x32_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -41,18 +43,20 @@ body: | ; FAST-LABEL: name: mfma_i32_32x32x16_i8_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: mfma_i32_32x32x16_i8_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -70,18 +74,20 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x8_xf32_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_f32_16x16x8_xf32_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -99,18 +105,20 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x4_xf32_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: mfma_f32_32x32x4_xf32_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -128,20 +136,22 @@ body: | ; FAST-LABEL: name: smfmac_f32_16x16x32_f16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: smfmac_f32_16x16x32_f16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -160,20 +170,22 @@ body: | ; FAST-LABEL: name: smfmac_f32_32x32x16_f16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: smfmac_f32_32x32x16_f16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -192,20 +204,22 @@ body: | ; FAST-LABEL: name: smfmac_f32_16x16x32_bf16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: smfmac_f32_16x16x32_bf16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -224,20 +238,22 @@ body: | ; FAST-LABEL: name: smfmac_f32_32x32x16_bf16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: smfmac_f32_32x32x16_bf16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -256,20 +272,22 @@ body: | ; FAST-LABEL: name: smfmac_i32_16x16x64_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: smfmac_i32_16x16x64_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -288,20 +306,22 @@ body: | ; FAST-LABEL: name: smfmac_i32_32x32x32_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: smfmac_i32_32x32x32_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll index e14262c416406f..5fca16e473a4eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -64,24 +64,25 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -125,9 +126,10 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -165,27 +167,28 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index 57e4d40bb9c814..b941a250004ebb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -1563,22 +1563,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1607,22 +1608,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1658,22 +1660,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1703,22 +1706,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1756,22 +1760,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1802,22 +1807,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1853,22 +1859,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1897,22 +1904,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1947,22 +1955,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1991,22 +2000,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2043,22 +2053,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2073,15 +2084,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GREEDY: bb.1 (%ir-block.0): @@ -2098,22 +2109,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2128,15 +2140,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2162,22 +2174,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2192,15 +2205,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GREEDY: bb.1 (%ir-block.0): @@ -2218,22 +2231,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2248,15 +2262,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2280,22 +2294,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2310,15 +2325,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GREEDY: bb.1 (%ir-block.0): @@ -2336,22 +2351,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2366,15 +2382,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2397,22 +2413,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2427,15 +2444,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY: bb.1 (%ir-block.0): @@ -2452,22 +2469,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2482,15 +2500,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2513,22 +2531,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2543,15 +2562,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY: bb.1 (%ir-block.0): @@ -2568,22 +2587,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2598,15 +2618,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2629,22 +2649,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2659,15 +2680,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY: bb.1 (%ir-block.0): @@ -2684,22 +2705,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2714,15 +2736,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2744,22 +2766,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2774,15 +2797,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GREEDY: bb.1 (%ir-block.0): @@ -2798,22 +2821,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2828,15 +2852,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir index bcd55e225f4728..45a2ab5b774c2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir @@ -17,27 +17,29 @@ body: | ; FAST-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 - ; FAST: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256 - ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] - ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) - ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] + ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $sgpr0 %2:vgpr(s32) = G_CONSTANT i32 256 @@ -57,26 +59,28 @@ body: | ; FAST-LABEL: name: s_buffer_load_negative_offset ; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 - ; FAST: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_negative_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_CONSTANT i32 -60 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll index 56cff7cde87868..49e352858f2254 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -62,24 +62,25 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -122,9 +123,10 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -161,27 +163,28 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll index 42ee4da9fdaf31..94cad1624b5621 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -64,24 +64,25 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -124,9 +125,10 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -163,27 +165,28 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir index bee5f3ea1be245..46184e8b4192a5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir @@ -11,11 +11,12 @@ body: | ; CHECK-LABEL: name: update_dpp_ss ; CHECK: liveins: $sgpr0, $sgpr1 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -32,10 +33,11 @@ body: | ; CHECK-LABEL: name: update_dpp_sv ; CHECK: liveins: $sgpr0, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -52,10 +54,11 @@ body: | ; CHECK-LABEL: name: update_dpp_vs ; CHECK: liveins: $vgpr0, $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -72,9 +75,10 @@ body: | ; CHECK-LABEL: name: update_dpp_vv ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir index 07c2d682fcacd4..c0f72eccf52498 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir @@ -31,9 +31,10 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) @@ -75,34 +76,33 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<8 x s32>) = COPY [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %6, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) From 0c8fdd72309e7b6f542acd8213a23917e0fd2e9e Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Mon, 23 May 2022 18:04:30 -0700 Subject: [PATCH 487/908] [mlir] Fix Tensor_InsertSliceOp description This commit fixes `Tensor_InsertSliceOp` `sizes` inputs/attributes description. Before this commit, the description says the `sizes` inputs/attributes denote the size of the return type. But according to the `InsertSliceOpConstantArgumentFolder` in `lib/Dialect/Tensor/IR/TensorOps.cpp`, the `sizes` inputs/attributes actually denote the size of the source type. I had an off-line discussion with the authors of `TensorOps.td` and `TensorOps.cpp`. We concluded that it was a typo in the Op description. This commit updates the Op description to match the actual usage. Differential Revision: https://reviews.llvm.org/D126264 --- mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index edb1f20b3cd8a2..e32a8d65fec9c4 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -489,7 +489,7 @@ def Tensor_InsertSliceOp : Tensor_OpWithOffsetSizesAndStrides<"insert_slice", [ * dest: the tensor into which the source tensor is inserted. * offsets: tensor-rank number of offsets into the `dest` tensor into which the slice is inserted. - * sizes: tensor-rank number of sizes which specify the sizes of the result + * sizes: tensor-rank number of sizes which specify the sizes of the source tensor type. * strides: tensor-rank number of strides that specify subsampling in each dimension. From 4391625255c62074037d95a55232a87eae70c60b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 May 2022 09:49:05 -0700 Subject: [PATCH 488/908] [lldb] Fix an unused function warning This patch fixes: .../llvm-project/lldb/source/Host/common/PseudoTerminal.cpp:106:20: error: unused function 'use_ptsname' [-Werror,-Wunused-function] --- lldb/source/Host/common/PseudoTerminal.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/source/Host/common/PseudoTerminal.cpp b/lldb/source/Host/common/PseudoTerminal.cpp index 13c82e8b1ea91e..be4c3c7928dfd8 100644 --- a/lldb/source/Host/common/PseudoTerminal.cpp +++ b/lldb/source/Host/common/PseudoTerminal.cpp @@ -103,6 +103,7 @@ llvm::Error PseudoTerminal::OpenSecondary(int oflag) { std::error_code(errno, std::generic_category())); } +#if !HAVE_PTSNAME_R || defined(__APPLE__) static std::string use_ptsname(int fd) { static std::mutex mutex; std::lock_guard guard(mutex); @@ -110,6 +111,7 @@ static std::string use_ptsname(int fd) { assert(r != nullptr); return r; } +#endif std::string PseudoTerminal::GetSecondaryName() const { assert(m_primary_fd >= 0); From a648724921b3bf877a0f3b09ac3e32d9c8de3fe5 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Fri, 29 Apr 2022 16:38:56 -0700 Subject: [PATCH 489/908] Reland "[llvm-pdbutil] Add options to only dump symbol record at specified offset and its parents or children with spcified depth." This reverts commit cfb4e782520ce59602a34732386ebbdf58136cfb. --- .../llvm/DebugInfo/CodeView/CVSymbolVisitor.h | 8 + .../llvm/DebugInfo/PDB/Native/InputFile.h | 4 +- .../llvm/DebugInfo/PDB/Native/LinePrinter.h | 6 +- llvm/include/llvm/Support/BinaryStreamArray.h | 2 + .../DebugInfo/CodeView/CVSymbolVisitor.cpp | 72 +++++++ llvm/lib/DebugInfo/PDB/Native/InputFile.cpp | 2 +- .../llvm-pdbutil/Inputs/symbol-offset.yaml | 204 ++++++++++++++++++ .../tools/llvm-pdbutil/symbol-offset.test | 176 +++++++++++++++ llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp | 15 +- llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp | 35 ++- 10 files changed, 516 insertions(+), 8 deletions(-) create mode 100644 llvm/test/tools/llvm-pdbutil/Inputs/symbol-offset.yaml create mode 100644 llvm/test/tools/llvm-pdbutil/symbol-offset.test diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h index d1c06573b39801..ef44b622d955ab 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h @@ -18,12 +18,20 @@ class SymbolVisitorCallbacks; class CVSymbolVisitor { public: + struct FilterOptions { + llvm::Optional SymbolOffset; + llvm::Optional ParentRecursiveDepth; + llvm::Optional ChildRecursiveDepth; + }; + CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks); Error visitSymbolRecord(CVSymbol &Record); Error visitSymbolRecord(CVSymbol &Record, uint32_t Offset); Error visitSymbolStream(const CVSymbolArray &Symbols); Error visitSymbolStream(const CVSymbolArray &Symbols, uint32_t InitialOffset); + Error visitSymbolStreamFiltered(const CVSymbolArray &Symbols, + const FilterOptions &Filter); private: SymbolVisitorCallbacks &Callbacks; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h index 5b0f433f118af2..392f183f002f88 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h @@ -180,8 +180,8 @@ Error iterateSymbolGroups(InputFile &Input, const PrintScope &HeaderScope, AutoIndent Indent(HeaderScope); FilterOptions Filters = HeaderScope.P.getFilters(); - if (Filters.NumOccurrences) { - uint32_t Modi = Filters.DumpModi; + if (Filters.DumpModi) { + uint32_t Modi = Filters.DumpModi.getValue(); SymbolGroup SG(&Input, Modi); return iterateOneModule(Input, withLabelWidth(HeaderScope, NumDigits(Modi)), SG, Modi, Callback); diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h b/llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h index e65bfc19fd2068..0db21309f593d3 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h @@ -30,8 +30,10 @@ struct FilterOptions { std::list IncludeCompilands; uint32_t PaddingThreshold; uint32_t SizeThreshold; - uint32_t DumpModi; - uint32_t NumOccurrences; + llvm::Optional DumpModi; + llvm::Optional ParentRecurseDepth; + llvm::Optional ChildrenRecurseDepth; + llvm::Optional SymbolOffset; bool JustMyCode; }; diff --git a/llvm/include/llvm/Support/BinaryStreamArray.h b/llvm/include/llvm/Support/BinaryStreamArray.h index c3e0db4dcff095..ef2233c53ec2cc 100644 --- a/llvm/include/llvm/Support/BinaryStreamArray.h +++ b/llvm/include/llvm/Support/BinaryStreamArray.h @@ -111,6 +111,8 @@ class VarStreamArray { bool valid() const { return Stream.valid(); } + bool isOffsetValid(uint32_t Offset) const { return at(Offset) != end(); } + uint32_t skew() const { return Skew; } Iterator end() const { return Iterator(E); } diff --git a/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp index 31af9a1bd848d7..162f1db0189bdc 100644 --- a/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp +++ b/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h" #include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/ErrorHandling.h" @@ -83,3 +84,74 @@ Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols, } return Error::success(); } + +Error CVSymbolVisitor::visitSymbolStreamFiltered(const CVSymbolArray &Symbols, + const FilterOptions &Filter) { + if (!Filter.SymbolOffset) + return visitSymbolStream(Symbols); + uint32_t SymbolOffset = *Filter.SymbolOffset; + uint32_t ParentRecurseDepth = + Filter.ParentRecursiveDepth ? *Filter.ParentRecursiveDepth : 0; + uint32_t ChildrenRecurseDepth = + Filter.ChildRecursiveDepth ? *Filter.ChildRecursiveDepth : 0; + if (!Symbols.isOffsetValid(SymbolOffset)) + return createStringError(inconvertibleErrorCode(), "Invalid symbol offset"); + CVSymbol Sym = *Symbols.at(SymbolOffset); + uint32_t SymEndOffset = + symbolOpensScope(Sym.kind()) ? getScopeEndOffset(Sym) : 0; + + std::vector ParentOffsets; + std::vector ParentEndOffsets; + uint32_t ChildrenDepth = 0; + for (auto Begin = Symbols.begin(), End = Symbols.end(); Begin != End; + ++Begin) { + uint32_t BeginOffset = Begin.offset(); + CVSymbol BeginSym = *Begin; + if (BeginOffset < SymbolOffset) { + if (symbolOpensScope(Begin->kind())) { + uint32_t EndOffset = getScopeEndOffset(BeginSym); + if (SymbolOffset < EndOffset) { + ParentOffsets.push_back(BeginOffset); + ParentEndOffsets.push_back(EndOffset); + } + } + } else if (BeginOffset == SymbolOffset) { + // Found symbol at offset. Visit its parent up to ParentRecurseDepth. + if (ParentRecurseDepth >= ParentOffsets.size()) + ParentRecurseDepth = ParentOffsets.size(); + uint32_t StartIndex = ParentOffsets.size() - ParentRecurseDepth; + while (StartIndex < ParentOffsets.size()) { + if (!Symbols.isOffsetValid(ParentOffsets[StartIndex])) + break; + CVSymbol Parent = *Symbols.at(ParentOffsets[StartIndex]); + if (auto EC = visitSymbolRecord(Parent, ParentOffsets[StartIndex])) + return EC; + ++StartIndex; + } + if (auto EC = visitSymbolRecord(Sym, SymbolOffset)) + return EC; + } else if (BeginOffset <= SymEndOffset) { + if (ChildrenRecurseDepth) { + // Visit children. + if (symbolEndsScope(Begin->kind())) + --ChildrenDepth; + if (ChildrenDepth < ChildrenRecurseDepth || + BeginOffset == SymEndOffset) { + if (auto EC = visitSymbolRecord(BeginSym, BeginOffset)) + return EC; + } + if (symbolOpensScope(Begin->kind())) + ++ChildrenDepth; + } + } else { + // Visit parents' ends. + if (ParentRecurseDepth && BeginOffset == ParentEndOffsets.back()) { + if (auto EC = visitSymbolRecord(BeginSym, BeginOffset)) + return EC; + ParentEndOffsets.pop_back(); + --ParentRecurseDepth; + } + } + } + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp index 99dd1316dd8fe5..ec7c4e1e3353c0 100644 --- a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp @@ -579,7 +579,7 @@ bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group, return false; // If the arg was not specified on the command line, always dump all modules. - if (Filters.NumOccurrences == 0) + if (!Filters.DumpModi) return true; // Otherwise, only dump if this is the same module specified. diff --git a/llvm/test/tools/llvm-pdbutil/Inputs/symbol-offset.yaml b/llvm/test/tools/llvm-pdbutil/Inputs/symbol-offset.yaml new file mode 100644 index 00000000000000..ca574347b07ccc --- /dev/null +++ b/llvm/test/tools/llvm-pdbutil/Inputs/symbol-offset.yaml @@ -0,0 +1,204 @@ +--- +DbiStream: + VerHeader: V70 + Age: 1 + BuildNumber: 36363 + PdbDllVersion: 0 + PdbDllRbld: 0 + Flags: 0 + MachineType: Amd64 + Modules: + - Module: '/tmp/test.obj' + Modi: + Signature: 4 + Records: + - Kind: S_GPROC32 + ProcSym: + PtrParent: 0 + PtrEnd: 468 + PtrNext: 0 + CodeSize: 137 + DbgStart: 0 + DbgEnd: 0 + FunctionType: 4104 + Offset: 176 + Segment: 1 + Flags: [ ] + DisplayName: main + - Kind: S_FRAMEPROC + FrameProcSym: + TotalFrameBytes: 56 + PaddingFrameBytes: 0 + OffsetToPadding: 0 + BytesOfCalleeSavedRegisters: 0 + OffsetOfExceptionHandler: 0 + SectionIdOfExceptionHandler: 0 + Flags: [ ] + - Kind: S_LOCAL + LocalSym: + Type: 116 + Flags: [ IsParameter ] + VarName: argc + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 4 + Range: + OffsetStart: 197 + ISectStart: 1 + Range: 116 + Gaps: [] + - Kind: S_LOCAL + LocalSym: + Type: 4102 + Flags: [ IsParameter ] + VarName: argv + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 8 + Range: + OffsetStart: 197 + ISectStart: 1 + Range: 116 + Gaps: [] + - Kind: S_INLINESITE + InlineSiteSym: + PtrParent: 4 + PtrEnd: 464 + Inlinee: 4098 + - Kind: S_LOCAL + LocalSym: + Type: 116 + Flags: [ IsParameter ] + VarName: x + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 24 + Range: + OffsetStart: 221 + ISectStart: 1 + Range: 87 + Gaps: [] + - Kind: S_LOCAL + LocalSym: + Type: 116 + Flags: [ IsParameter ] + VarName: y + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 28 + Range: + OffsetStart: 221 + ISectStart: 1 + Range: 87 + Gaps: [] + - Kind: S_LOCAL + LocalSym: + Type: 116 + Flags: [ IsParameter ] + VarName: z + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 32 + Range: + OffsetStart: 221 + ISectStart: 1 + Range: 87 + Gaps: [] + - Kind: S_INLINESITE + InlineSiteSym: + PtrParent: 144 + PtrEnd: 288 + Inlinee: 4096 + - Kind: S_LOCAL + LocalSym: + Type: 116 + Flags: [ IsParameter ] + VarName: x + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 40 + Range: + OffsetStart: 229 + ISectStart: 1 + Range: 7 + Gaps: [] + - Kind: S_INLINESITE_END + ScopeEndSym: {} + - Kind: S_INLINESITE + InlineSiteSym: + PtrParent: 144 + PtrEnd: 412 + Inlinee: 4097 + - Kind: S_LOCAL + LocalSym: + Type: 116 + Flags: [ IsParameter ] + VarName: x + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 44 + Range: + OffsetStart: 260 + ISectStart: 1 + Range: 19 + Gaps: [] + - Kind: S_LOCAL + LocalSym: + Type: 116 + Flags: [ IsParameter ] + VarName: y + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 48 + Range: + OffsetStart: 260 + ISectStart: 1 + Range: 19 + Gaps: [] + - Kind: S_INLINESITE + InlineSiteSym: + PtrParent: 292 + PtrEnd: 408 + Inlinee: 4096 + - Kind: S_LOCAL + LocalSym: + Type: 116 + Flags: [ IsParameter ] + VarName: x + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 52 + Range: + OffsetStart: 272 + ISectStart: 1 + Range: 7 + Gaps: [] + - Kind: S_INLINESITE_END + ScopeEndSym: {} + - Kind: S_INLINESITE_END + ScopeEndSym: {} + - Kind: S_INLINESITE + InlineSiteSym: + PtrParent: 144 + PtrEnd: 460 + Inlinee: 4096 + - Kind: S_LOCAL + LocalSym: + Type: 116 + Flags: [ IsParameter ] + VarName: x + - Kind: S_DEFRANGE_FRAMEPOINTER_REL + DefRangeFramePointerRelSym: + Offset: 36 + Range: + OffsetStart: 299 + ISectStart: 1 + Range: 7 + Gaps: [] + - Kind: S_INLINESITE_END + ScopeEndSym: {} + - Kind: S_INLINESITE_END + ScopeEndSym: {} + - Kind: S_END + ScopeEndSym: {} +... diff --git a/llvm/test/tools/llvm-pdbutil/symbol-offset.test b/llvm/test/tools/llvm-pdbutil/symbol-offset.test new file mode 100644 index 00000000000000..175aaaca5874db --- /dev/null +++ b/llvm/test/tools/llvm-pdbutil/symbol-offset.test @@ -0,0 +1,176 @@ +; RUN: llvm-pdbutil yaml2pdb %p/Inputs/symbol-offset.yaml --pdb=%t.pdb + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=80 %t.pdb \ +; RUN: | FileCheck --check-prefix=OFFSET %s + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=4 --show-parents \ +; RUN: %t.pdb | FileCheck --check-prefix=SHOW-PARENT1 %s + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=80 --show-parents \ +; RUN: %t.pdb | FileCheck --check-prefix=SHOW-PARENT2 %s + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=308 --show-parents \ +; RUN: %t.pdb | FileCheck --check-prefix=SHOW-PARENT3 %s + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=260 --show-parents \ +; RUN: --parent-recurse-depth=1 %t.pdb \ +; RUN: | FileCheck --check-prefix=SHOW-PARENT-DEPTH1 %s + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=260 --show-parents \ +; RUN: --parent-recurse-depth=2 %t.pdb \ +; RUN: | FileCheck --check-prefix=SHOW-PARENT-DEPTH2 %s + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=416 --show-children\ +; RUN: %t.pdb | FileCheck --check-prefix=SHOW-CHILDREN1 %s + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=292 --show-children\ +; RUN: %t.pdb | FileCheck --check-prefix=SHOW-CHILDREN2 %s + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=4 --show-children \ +; RUN: --children-recurse-depth=1 %t.pdb \ +; RUN: | FileCheck --check-prefix=SHOW-CHILDREN-DEPTH1 %s + +; RUN: llvm-pdbutil dump --symbols --modi=0 --symbol-offset=292 --show-children\ +; RUN: --children-recurse-depth=2 %t.pdb \ +; RUN: | FileCheck --check-prefix=SHOW-CHILDREN-DEPTH2 %s + +The pdb file is generate from following cpp file and use llvm-pdbutil's pdb2yaml +functionality to convert pdb to yaml. Unrelated information was removed from the +yaml test file. +; [[clang::always_inline]] int func1(int x) { +; return x * 2; +; } +; +; [[clang::always_inline]] int func2(int x, int y) { +; return func1(x + y); +; } +; +; [[clang::always_inline]] int func3(int x, int y, int z) { +; y += func1(x); +; x += func2(y, z); +; return x + func1(x); +; } +; +; int main(int argc, char** argv) { +; return func3(argc, 1, 2); +; } + + +OFFSET: 80 | S_LOCAL [size = 16] `argc` +OFFSET-NEXT: type=0x0074 (int), flags = param + +SHOW-PARENT1: 4 | S_GPROC32 [size = 44] `main` +SHOW-PARENT1-NEXT: parent = 0, end = 468, addr = 0001:0176, code size = 137 +SHOW-PARENT1-NEXT: type = `0x1008 ()`, debug start = 0, debug end = 0, flags = none + +SHOW-PARENT2: 4 | S_GPROC32 [size = 44] `main` +SHOW-PARENT2-NEXT: parent = 0, end = 468, addr = 0001:0176, code size = 137 +SHOW-PARENT2-NEXT: type = `0x1008 ()`, debug start = 0, debug end = 0, flags = none +SHOW-PARENT2-NEXT: 80 | S_LOCAL [size = 16] `argc` +SHOW-PARENT2-NEXT: type=0x0074 (int), flags = param +SHOW-PARENT2-NEXT: 468 | S_END [size = 4] + +SHOW-PARENT3: 4 | S_GPROC32 [size = 44] `main` +SHOW-PARENT3-NEXT: parent = 0, end = 468, addr = 0001:0176, code size = 137 +SHOW-PARENT3-NEXT: type = `0x1008 ()`, debug start = 0, debug end = 0, flags = none +SHOW-PARENT3-NEXT: 144 | S_INLINESITE [size = 16] +SHOW-PARENT3-NEXT: inlinee = 0x1002 (), parent = 4, end = 464 +SHOW-PARENT3-NEXT: 292 | S_INLINESITE [size = 16] +SHOW-PARENT3-NEXT: inlinee = 0x1001 (), parent = 144, end = 412 +SHOW-PARENT3-NEXT: 308 | S_LOCAL [size = 12] `x` +SHOW-PARENT3-NEXT: type=0x0074 (int), flags = param +SHOW-PARENT3-NEXT: 412 | S_INLINESITE_END [size = 4] +SHOW-PARENT3-NEXT: 464 | S_INLINESITE_END [size = 4] +SHOW-PARENT3-NEXT: 468 | S_END [size = 4] + + +SHOW-PARENT-DEPTH1: 244 | S_INLINESITE [size = 16] +SHOW-PARENT-DEPTH1-NEXT: inlinee = 0x1000 (), parent = 144, end = 288 +SHOW-PARENT-DEPTH1-NEXT: 260 | S_LOCAL [size = 12] `x` +SHOW-PARENT-DEPTH1-NEXT: type=0x0074 (int), flags = param +SHOW-PARENT-DEPTH1-NEXT: 288 | S_INLINESITE_END [size = 4] + +SHOW-PARENT-DEPTH2: 144 | S_INLINESITE [size = 16] +SHOW-PARENT-DEPTH2-NEXT: inlinee = 0x1002 (), parent = 4, end = 464 +SHOW-PARENT-DEPTH2-NEXT: 244 | S_INLINESITE [size = 16] +SHOW-PARENT-DEPTH2-NEXT: inlinee = 0x1000 (), parent = 144, end = 288 +SHOW-PARENT-DEPTH2-NEXT: 260 | S_LOCAL [size = 12] `x` +SHOW-PARENT-DEPTH2-NEXT: type=0x0074 (int), flags = param +SHOW-PARENT-DEPTH2-NEXT: 288 | S_INLINESITE_END [size = 4] +SHOW-PARENT-DEPTH2-NEXT: 464 | S_INLINESITE_END [size = 4] + +SHOW-CHILDREN1: 416 | S_INLINESITE [size = 16] +SHOW-CHILDREN1-NEXT: inlinee = 0x1000 (), parent = 144, end = 460 +SHOW-CHILDREN1-NEXT: 432 | S_LOCAL [size = 12] `x` +SHOW-CHILDREN1-NEXT: type=0x0074 (int), flags = param +SHOW-CHILDREN1-NEXT: 444 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] +SHOW-CHILDREN1-NEXT: offset = 36, range = [0001:0299,+7) +SHOW-CHILDREN1-NEXT: gaps = [] +SHOW-CHILDREN1-NEXT: 460 | S_INLINESITE_END [size = 4] + +SHOW-CHILDREN2: 292 | S_INLINESITE [size = 16] +SHOW-CHILDREN2-NEXT: inlinee = 0x1001 (), parent = 144, end = 412 +SHOW-CHILDREN2-NEXT: 308 | S_LOCAL [size = 12] `x` +SHOW-CHILDREN2-NEXT: type=0x0074 (int), flags = param +SHOW-CHILDREN2-NEXT: 320 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] +SHOW-CHILDREN2-NEXT: offset = 44, range = [0001:0260,+19) +SHOW-CHILDREN2-NEXT: gaps = [] +SHOW-CHILDREN2-NEXT: 336 | S_LOCAL [size = 12] `y` +SHOW-CHILDREN2-NEXT: type=0x0074 (int), flags = param +SHOW-CHILDREN2-NEXT: 348 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] +SHOW-CHILDREN2-NEXT: offset = 48, range = [0001:0260,+19) +SHOW-CHILDREN2-NEXT: gaps = [] +SHOW-CHILDREN2-NEXT: 364 | S_INLINESITE [size = 16] +SHOW-CHILDREN2-NEXT: inlinee = 0x1000 (), parent = 292, end = 408 +SHOW-CHILDREN2-NEXT: 380 | S_LOCAL [size = 12] `x` +SHOW-CHILDREN2-NEXT: type=0x0074 (int), flags = param +SHOW-CHILDREN2-NEXT: 392 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] +SHOW-CHILDREN2-NEXT: offset = 52, range = [0001:0272,+7) +SHOW-CHILDREN2-NEXT: gaps = [] +SHOW-CHILDREN2-NEXT: 408 | S_INLINESITE_END [size = 4] +SHOW-CHILDREN2-NEXT: 412 | S_INLINESITE_END [size = 4] + +SHOW-CHILDREN-DEPTH1: 4 | S_GPROC32 [size = 44] `main` +SHOW-CHILDREN-DEPTH1-NEXT: parent = 0, end = 468, addr = 0001:0176, code size = 137 +SHOW-CHILDREN-DEPTH1-NEXT: type = `0x1008 ()`, debug start = 0, debug end = 0, flags = none +SHOW-CHILDREN-DEPTH1-NEXT: 48 | S_FRAMEPROC [size = 32] +SHOW-CHILDREN-DEPTH1-NEXT: size = 56, padding size = 0, offset to padding = 0 +SHOW-CHILDREN-DEPTH1-NEXT: bytes of callee saved registers = 0, exception handler addr = 0000:0000 +SHOW-CHILDREN-DEPTH1-NEXT: local fp reg = NONE, param fp reg = NONE +SHOW-CHILDREN-DEPTH1-NEXT: flags = none +SHOW-CHILDREN-DEPTH1-NEXT: 80 | S_LOCAL [size = 16] `argc` +SHOW-CHILDREN-DEPTH1-NEXT: type=0x0074 (int), flags = param +SHOW-CHILDREN-DEPTH1-NEXT: 96 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] +SHOW-CHILDREN-DEPTH1-NEXT: offset = 4, range = [0001:0197,+116) +SHOW-CHILDREN-DEPTH1-NEXT: gaps = [] +SHOW-CHILDREN-DEPTH1-NEXT: 112 | S_LOCAL [size = 16] `argv` +SHOW-CHILDREN-DEPTH1-NEXT: type=0x1006 (), flags = param +SHOW-CHILDREN-DEPTH1-NEXT: 128 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] +SHOW-CHILDREN-DEPTH1-NEXT: offset = 8, range = [0001:0197,+116) +SHOW-CHILDREN-DEPTH1-NEXT: gaps = [] +SHOW-CHILDREN-DEPTH1-NEXT: 144 | S_INLINESITE [size = 16] +SHOW-CHILDREN-DEPTH1-NEXT: inlinee = 0x1002 (), parent = 4, end = 464 +SHOW-CHILDREN-DEPTH1-NEXT: 464 | S_INLINESITE_END [size = 4] +SHOW-CHILDREN-DEPTH1-NEXT: 468 | S_END [size = 4] + +SHOW-CHILDREN-DEPTH2: 292 | S_INLINESITE [size = 16] +SHOW-CHILDREN-DEPTH2-NEXT: inlinee = 0x1001 (), parent = 144, end = 412 +SHOW-CHILDREN-DEPTH2-NEXT: 308 | S_LOCAL [size = 12] `x` +SHOW-CHILDREN-DEPTH2-NEXT: type=0x0074 (int), flags = param +SHOW-CHILDREN-DEPTH2-NEXT: 320 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] +SHOW-CHILDREN-DEPTH2-NEXT: offset = 44, range = [0001:0260,+19) +SHOW-CHILDREN-DEPTH2-NEXT: gaps = [] +SHOW-CHILDREN-DEPTH2-NEXT: 336 | S_LOCAL [size = 12] `y` +SHOW-CHILDREN-DEPTH2-NEXT: type=0x0074 (int), flags = param +SHOW-CHILDREN-DEPTH2-NEXT: 348 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] +SHOW-CHILDREN-DEPTH2-NEXT: offset = 48, range = [0001:0260,+19) +SHOW-CHILDREN-DEPTH2-NEXT: gaps = [] +SHOW-CHILDREN-DEPTH2-NEXT: 364 | S_INLINESITE [size = 16] +SHOW-CHILDREN-DEPTH2-NEXT: inlinee = 0x1000 (), parent = 292, end = 408 +SHOW-CHILDREN-DEPTH2-NEXT: 380 | S_LOCAL [size = 12] `x` +SHOW-CHILDREN-DEPTH2-NEXT: type=0x0074 (int), flags = param +SHOW-CHILDREN-DEPTH2-NEXT: 392 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] +SHOW-CHILDREN-DEPTH2-NEXT: offset = 52, range = [0001:0272,+7) +SHOW-CHILDREN-DEPTH2-NEXT: gaps = [] +SHOW-CHILDREN-DEPTH2-NEXT: 408 | S_INLINESITE_END [size = 4] +SHOW-CHILDREN-DEPTH2-NEXT: 412 | S_INLINESITE_END [size = 4] \ No newline at end of file diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp index 12cda106c036d7..a173eb1faa6274 100644 --- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp +++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp @@ -1485,8 +1485,19 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() { Pipeline.addCallbackToPipeline(Dumper); CVSymbolVisitor Visitor(Pipeline); auto SS = ModS.getSymbolsSubstream(); - if (auto EC = - Visitor.visitSymbolStream(ModS.getSymbolArray(), SS.Offset)) { + if (opts::Filters.SymbolOffset) { + CVSymbolVisitor::FilterOptions Filter; + Filter.SymbolOffset = opts::Filters.SymbolOffset; + Filter.ParentRecursiveDepth = opts::Filters.ParentRecurseDepth; + Filter.ChildRecursiveDepth = opts::Filters.ChildrenRecurseDepth; + if (auto EC = Visitor.visitSymbolStreamFiltered(ModS.getSymbolArray(), + Filter)) { + P.formatLine("Error while processing symbol records. {0}", + toString(std::move(EC))); + return EC; + } + } else if (auto EC = Visitor.visitSymbolStream(ModS.getSymbolArray(), + SS.Offset)) { P.formatLine("Error while processing symbol records. {0}", toString(std::move(EC))); return EC; diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp index 7258a7f0ecd8d0..c995c7975737b5 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -562,6 +562,27 @@ cl::opt cl::opt DumpFpo("fpo", cl::desc("dump FPO records"), cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt DumpSymbolOffset( + "symbol-offset", cl::Optional, + cl::desc("only dump symbol record with the specified symbol offset"), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt DumpParents("show-parents", + cl::desc("dump the symbols record's all parents."), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt + DumpParentDepth("parent-recurse-depth", cl::Optional, cl::init(-1U), + cl::desc("only recurse to a depth of N when displaying " + "parents of a symbol record."), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt DumpChildren("show-children", + cl::desc("dump the symbols record's all children."), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::opt + DumpChildrenDepth("children-recurse-depth", cl::Optional, cl::init(-1U), + cl::desc("only recurse to a depth of N when displaying " + "children of a symbol record."), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); + // MODULE & FILE OPTIONS cl::opt DumpModules("modules", cl::desc("dump compiland information"), cl::cat(FileOptions), cl::sub(DumpSubcommand)); @@ -1533,9 +1554,21 @@ int main(int Argc, const char **Argv) { errs().flush(); exit(1); } - opts::Filters.NumOccurrences = opts::dump::DumpModi.getNumOccurrences(); opts::Filters.DumpModi = opts::dump::DumpModi; } + if (opts::dump::DumpSymbolOffset) { + if (opts::dump::DumpModi.getNumOccurrences() != 1) { + errs() + << "need to specify argument '-modi' when using '-symbol-offset'.\n"; + errs().flush(); + exit(1); + } + opts::Filters.SymbolOffset = opts::dump::DumpSymbolOffset; + if (opts::dump::DumpParents) + opts::Filters.ParentRecurseDepth = opts::dump::DumpParentDepth; + if (opts::dump::DumpChildren) + opts::Filters.ChildrenRecurseDepth = opts::dump::DumpChildrenDepth; + } if (opts::PdbToYamlSubcommand) { pdb2Yaml(opts::pdb2yaml::InputFilename.front()); From afc90101a5b09d1bcc295e8c97f20b19fe80683f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 25 May 2022 12:03:23 -0500 Subject: [PATCH 490/908] Revert "AMDGPU/GISel: Factor out AMDGPURegisterBankInfo::buildReadFirstLane" This reverts commit 2a28467e5389c4d741d1825fadd39ae84ecaa5dc. --- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 262 ++- .../Target/AMDGPU/AMDGPURegisterBankInfo.h | 3 - .../GlobalISel/image-waterfall-loop-O0.ll | 190 +- .../GlobalISel/llvm.amdgcn.intersect_ray.ll | 32 +- .../llvm.amdgcn.raw.buffer.atomic.add.ll | 68 +- .../llvm.amdgcn.raw.buffer.atomic.cmpswap.ll | 78 +- .../llvm.amdgcn.raw.buffer.atomic.fadd.ll | 136 +- .../llvm.amdgcn.raw.buffer.load.format.f16.ll | 68 +- .../llvm.amdgcn.raw.buffer.load.format.ll | 34 +- .../GlobalISel/llvm.amdgcn.raw.buffer.load.ll | 186 +- ...llvm.amdgcn.raw.buffer.store.format.f16.ll | 124 +- ...llvm.amdgcn.raw.buffer.store.format.f32.ll | 62 +- .../llvm.amdgcn.raw.buffer.store.ll | 163 +- .../llvm.amdgcn.raw.tbuffer.load.f16.ll | 68 +- .../llvm.amdgcn.raw.tbuffer.load.ll | 34 +- .../llvm.amdgcn.raw.tbuffer.store.f16.ll | 196 +- .../llvm.amdgcn.raw.tbuffer.store.i8.ll | 196 +- .../llvm.amdgcn.raw.tbuffer.store.ll | 158 +- .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 1836 ++++++++--------- .../llvm.amdgcn.struct.buffer.atomic.add.ll | 72 +- ...lvm.amdgcn.struct.buffer.atomic.cmpswap.ll | 82 +- .../llvm.amdgcn.struct.buffer.atomic.fadd.ll | 140 +- ...vm.amdgcn.struct.buffer.load.format.f16.ll | 112 +- .../llvm.amdgcn.struct.buffer.load.format.ll | 53 +- .../llvm.amdgcn.struct.buffer.load.ll | 37 +- ...m.amdgcn.struct.buffer.store.format.f16.ll | 72 +- ...m.amdgcn.struct.buffer.store.format.f32.ll | 36 +- .../llvm.amdgcn.struct.buffer.store.ll | 37 +- .../llvm.amdgcn.struct.tbuffer.load.f16.ll | 120 +- .../llvm.amdgcn.struct.tbuffer.load.ll | 58 +- .../regbankselect-amdgcn-s-buffer-load.mir | 46 +- .../regbankselect-amdgcn.image.load.1d.ll | 164 +- .../regbankselect-amdgcn.image.sample.1d.ll | 254 +-- .../regbankselect-amdgcn.mfma.gfx90a.mir | 154 +- .../regbankselect-amdgcn.mfma.gfx940.mir | 244 +-- .../regbankselect-amdgcn.raw.buffer.load.ll | 59 +- .../regbankselect-amdgcn.s.buffer.load.ll | 804 ++++---- .../regbankselect-amdgcn.s.buffer.load.mir | 78 +- ...regbankselect-amdgcn.struct.buffer.load.ll | 59 +- ...egbankselect-amdgcn.struct.buffer.store.ll | 59 +- .../regbankselect-amdgcn.update.dpp.mir | 36 +- .../regbankselect-waterfall-agpr.mir | 48 +- 42 files changed, 3152 insertions(+), 3566 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 6cac8d98ec7aa4..4f44827e112e08 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -678,62 +678,6 @@ static LLT getHalfSizedType(LLT Ty) { return LLT::scalar(Ty.getScalarSizeInBits() / 2); } -// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector -// source value into a scalar register. -Register AMDGPURegisterBankInfo::buildReadFirstLane(MachineIRBuilder &B, - MachineRegisterInfo &MRI, - Register Src) const { - LLT Ty = MRI.getType(Src); - const RegisterBank *Bank = getRegBank(Src, MRI, *TRI); - - if (Bank == &AMDGPU::SGPRRegBank) - return Src; - - unsigned Bits = Ty.getSizeInBits(); - assert(Bits % 32 == 0); - - if (Bank != &AMDGPU::VGPRRegBank) { - // We need to copy from AGPR to VGPR - Src = B.buildCopy(Ty, Src).getReg(0); - MRI.setRegBank(Src, AMDGPU::VGPRRegBank); - } - - LLT S32 = LLT::scalar(32); - unsigned NumParts = Bits / 32; - SmallVector SrcParts; - SmallVector DstParts; - - if (Bits == 32) { - SrcParts.push_back(Src); - } else { - auto Unmerge = B.buildUnmerge(S32, Src); - for (unsigned i = 0; i < NumParts; ++i) - SrcParts.push_back(Unmerge.getReg(i)); - } - - for (unsigned i = 0; i < NumParts; ++i) { - Register SrcPart = SrcParts[i]; - Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - MRI.setType(DstPart, NumParts == 1 ? Ty : S32); - - const TargetRegisterClass *Constrained = - constrainGenericRegister(SrcPart, AMDGPU::VGPR_32RegClass, MRI); - (void)Constrained; - assert(Constrained && "Failed to constrain readfirstlane src reg"); - - B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart}); - - DstParts.push_back(DstPart); - } - - if (Bits == 32) - return DstParts[0]; - - Register Dst = B.buildMerge(Ty, DstParts).getReg(0); - MRI.setRegBank(Dst, AMDGPU::SGPRRegBank); - return Dst; -} - /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If /// any of the required SGPR operands are VGPRs, perform a waterfall loop to /// execute the instruction for each unique combination of values in all lanes @@ -766,6 +710,8 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( MachineFunction *MF = &B.getMF(); const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass(); + const unsigned WaveAndOpc = Subtarget.isWave32() ? + AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; const unsigned MovExecOpc = Subtarget.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; const unsigned MovExecTermOpc = @@ -837,9 +783,9 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( MachineBasicBlock::iterator NewBegin = FirstInst.getIterator(); auto NewEnd = BodyBB->end(); + MachineBasicBlock::iterator I = LoopBB->end(); B.setMBB(*LoopBB); - LLT S1 = LLT::scalar(1); Register CondReg; assert(std::distance(NewBegin, NewEnd) == OrigRangeSize); @@ -873,59 +819,161 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( B.setMBB(*LoopBB); } - Register CurrentLaneReg = buildReadFirstLane(B, MRI, OpReg); - - // Build the comparison(s). unsigned OpSize = OpTy.getSizeInBits(); - bool Is64 = OpSize % 64 == 0; - unsigned PartSize = Is64 ? 64 : 32; - LLT PartTy = LLT::scalar(PartSize); - unsigned NumParts = OpSize / PartSize; - SmallVector OpParts; - SmallVector CurrentLaneParts; - - if (NumParts == 1) { - OpParts.push_back(OpReg); - CurrentLaneParts.push_back(CurrentLaneReg); + + // Can only do a readlane of 32-bit pieces. + if (OpSize == 32) { + // Avoid extra copies in the simple case of one 32-bit register. + Register CurrentLaneOpReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + MRI.setType(CurrentLaneOpReg, OpTy); + + constrainGenericRegister(OpReg, AMDGPU::VGPR_32RegClass, MRI); + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpReg) + .addReg(OpReg); + + Register NewCondReg = MRI.createVirtualRegister(WaveRC); + bool First = CondReg == AMDGPU::NoRegister; + if (First) + CondReg = NewCondReg; + + // Compare the just read M0 value to all possible Idx values. + B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) + .addDef(NewCondReg) + .addReg(CurrentLaneOpReg) + .addReg(OpReg); + Op.setReg(CurrentLaneOpReg); + + if (!First) { + Register AndReg = MRI.createVirtualRegister(WaveRC); + + // If there are multiple operands to consider, and the conditions. + B.buildInstr(WaveAndOpc) + .addDef(AndReg) + .addReg(NewCondReg) + .addReg(CondReg); + CondReg = AndReg; + } } else { - auto UnmergeOp = B.buildUnmerge(PartTy, OpReg); - auto UnmergeCurrentLane = B.buildUnmerge(PartTy, CurrentLaneReg); - for (unsigned i = 0; i < NumParts; ++i) { - OpParts.push_back(UnmergeOp.getReg(i)); - CurrentLaneParts.push_back(UnmergeCurrentLane.getReg(i)); - MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank); - MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank); + LLT S32 = LLT::scalar(32); + SmallVector ReadlanePieces; + + // The compares can be done as 64-bit, but the extract needs to be done + // in 32-bit pieces. + + bool Is64 = OpSize % 64 == 0; + + unsigned UnmergeTySize = Is64 ? 64 : 32; + unsigned CmpOp = + Is64 ? AMDGPU::V_CMP_EQ_U64_e64 : AMDGPU::V_CMP_EQ_U32_e64; + + // Insert the unmerge before the loop. + + B.setMBB(MBB); + unsigned NumPieces = OpSize / UnmergeTySize; + SmallVector UnmergePieces; + if (NumPieces == 1) { + UnmergePieces.push_back(OpReg); + } else { + LLT UnmergeTy = LLT::scalar(UnmergeTySize); + MachineInstrBuilder Unmerge = B.buildUnmerge(UnmergeTy, OpReg); + for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) + UnmergePieces.push_back(Unmerge.getReg(PieceIdx)); } - } + B.setMBB(*LoopBB); + + for (Register UnmergePiece : UnmergePieces) { + Register CurrentLaneOpReg; + if (Is64) { + Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32); + Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32); + + MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass); + MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass); + MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass); + + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpRegLo) + .addReg(UnmergePiece, 0, AMDGPU::sub0); + + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpRegHi) + .addReg(UnmergePiece, 0, AMDGPU::sub1); + + CurrentLaneOpReg = + B.buildMerge(LLT::scalar(64), + {CurrentLaneOpRegLo, CurrentLaneOpRegHi}) + .getReg(0); + + MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass); + + if (OpTy.getScalarSizeInBits() == 64) { + // If we need to produce a 64-bit element vector, so use the + // merged pieces + ReadlanePieces.push_back(CurrentLaneOpReg); + } else { + // 32-bit element type. + ReadlanePieces.push_back(CurrentLaneOpRegLo); + ReadlanePieces.push_back(CurrentLaneOpRegHi); + } + } else { + CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32); + MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass); + MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass); + + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpReg) + .addReg(UnmergePiece); + ReadlanePieces.push_back(CurrentLaneOpReg); + } - for (unsigned i = 0; i < NumParts; ++i) { - auto CmpReg = B.buildICmp(CmpInst::ICMP_EQ, S1, CurrentLaneParts[i], - OpParts[i]).getReg(0); - MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank); + Register NewCondReg = MRI.createVirtualRegister(WaveRC); + bool First = CondReg == AMDGPU::NoRegister; + if (First) + CondReg = NewCondReg; + + B.buildInstr(CmpOp) + .addDef(NewCondReg) + .addReg(CurrentLaneOpReg) + .addReg(UnmergePiece); + + if (!First) { + Register AndReg = MRI.createVirtualRegister(WaveRC); + + // If there are multiple operands to consider, and the conditions. + B.buildInstr(WaveAndOpc) + .addDef(AndReg) + .addReg(NewCondReg) + .addReg(CondReg); + CondReg = AndReg; + } + } - if (!CondReg) { - CondReg = CmpReg; + // FIXME: Build merge seems to switch to CONCAT_VECTORS but not + // BUILD_VECTOR + if (OpTy.isVector()) { + auto Merge = B.buildBuildVector(OpTy, ReadlanePieces); + Op.setReg(Merge.getReg(0)); + MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank); + } else if (ReadlanePieces.size() > 1) { + auto Merge = B.buildMerge(OpTy, ReadlanePieces); + Op.setReg(Merge.getReg(0)); + MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank); } else { - CondReg = B.buildAnd(S1, CondReg, CmpReg).getReg(0); - MRI.setRegBank(CondReg, AMDGPU::VCCRegBank); + Op.setReg(ReadlanePieces[0]); } } - Op.setReg(CurrentLaneReg); - // Make sure we don't re-process this register again. WaterfalledRegMap.insert(std::make_pair(OldReg, Op.getReg())); } } - // The ballot becomes a no-op during instruction selection. - CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot, - {LLT::scalar(Subtarget.isWave32() ? 32 : 64)}, - false) - .addReg(CondReg) - .getReg(0); - MRI.setRegClass(CondReg, WaveRC); - // Update EXEC, save the original EXEC value to VCC. B.buildInstr(AndSaveExecOpc) .addDef(NewExec) @@ -1013,10 +1061,28 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane( if (Bank == &AMDGPU::SGPRRegBank) return; + LLT Ty = MRI.getType(Reg); MachineIRBuilder B(MI); - Reg = buildReadFirstLane(B, MRI, Reg); - MI.getOperand(OpIdx).setReg(Reg); + if (Bank != &AMDGPU::VGPRRegBank) { + // We need to copy from AGPR to VGPR + Reg = B.buildCopy(Ty, Reg).getReg(0); + MRI.setRegBank(Reg, AMDGPU::VGPRRegBank); + } + + Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + B.buildInstr(AMDGPU::V_READFIRSTLANE_B32) + .addDef(SGPR) + .addReg(Reg); + + MRI.setType(SGPR, Ty); + + const TargetRegisterClass *Constrained = + constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI); + (void)Constrained; + assert(Constrained && "Failed to constrain readfirstlane src reg"); + + MI.getOperand(OpIdx).setReg(SGPR); } /// Split \p Ty into 2 pieces. The first will have \p FirstSize bits, and the diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 4079f34d18b391..736b77a5e9be9d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -59,9 +59,6 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo { SmallSet &SGPROperandRegs, MachineRegisterInfo &MRI) const; - Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, - Register Src) const; - bool executeInWaterfallLoop(MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index a6be8956dbcd71..b4bc8da9c00934 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -8,7 +8,7 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: v_mov_b32_e32 v14, v1 ; CHECK-NEXT: v_mov_b32_e32 v13, v2 @@ -25,128 +25,132 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_mov_b32_e32 v5, v10 ; CHECK-NEXT: v_mov_b32_e32 v6, v9 ; CHECK-NEXT: v_mov_b32_e32 v7, v8 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s4, s8 ; CHECK-NEXT: s_mov_b32 s5, s8 ; CHECK-NEXT: s_mov_b32 s6, s8 ; CHECK-NEXT: s_mov_b32 s7, s8 -; CHECK-NEXT: v_writelane_b32 v16, s4, 0 -; CHECK-NEXT: v_writelane_b32 v16, s5, 1 -; CHECK-NEXT: v_writelane_b32 v16, s6, 2 -; CHECK-NEXT: v_writelane_b32 v16, s7, 3 +; CHECK-NEXT: v_writelane_b32 v15, s4, 0 +; CHECK-NEXT: v_writelane_b32 v15, s5, 1 +; CHECK-NEXT: v_writelane_b32 v15, s6, 2 +; CHECK-NEXT: v_writelane_b32 v15, s7, 3 ; CHECK-NEXT: s_mov_b32 s6, 0 ; CHECK-NEXT: s_mov_b32 s4, s6 ; CHECK-NEXT: s_mov_b32 s5, s6 -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: v_mov_b32_e32 v1, s5 +; CHECK-NEXT: v_mov_b32_e32 v9, s5 +; CHECK-NEXT: v_mov_b32_e32 v8, s4 +; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v9, v1 +; CHECK-NEXT: v_mov_b32_e32 v8, v0 +; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v9, v3 +; CHECK-NEXT: v_mov_b32_e32 v8, v2 +; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v9, v5 +; CHECK-NEXT: v_mov_b32_e32 v8, v4 +; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v0, v6 +; CHECK-NEXT: v_mov_b32_e32 v1, v7 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s4, exec_lo -; CHECK-NEXT: v_writelane_b32 v16, s4, 4 +; CHECK-NEXT: v_writelane_b32 v15, s4, 4 ; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v7, v8 -; CHECK-NEXT: v_mov_b32_e32 v6, v9 -; CHECK-NEXT: v_mov_b32_e32 v5, v10 -; CHECK-NEXT: v_mov_b32_e32 v4, v11 -; CHECK-NEXT: v_mov_b32_e32 v3, v12 -; CHECK-NEXT: v_mov_b32_e32 v2, v13 -; CHECK-NEXT: v_mov_b32_e32 v1, v14 -; CHECK-NEXT: v_mov_b32_e32 v0, v15 -; CHECK-NEXT: v_readfirstlane_b32 s12, v7 -; CHECK-NEXT: v_readfirstlane_b32 s10, v6 -; CHECK-NEXT: v_readfirstlane_b32 s9, v5 -; CHECK-NEXT: v_readfirstlane_b32 s8, v4 +; CHECK-NEXT: v_readfirstlane_b32 s8, v6 +; CHECK-NEXT: v_readfirstlane_b32 s19, v7 +; CHECK-NEXT: s_mov_b32 s4, s8 +; CHECK-NEXT: s_mov_b32 s5, s19 +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[4:5], v[6:7] +; CHECK-NEXT: v_readfirstlane_b32 s18, v4 +; CHECK-NEXT: v_readfirstlane_b32 s17, v5 +; CHECK-NEXT: s_mov_b32 s6, s18 +; CHECK-NEXT: s_mov_b32 s7, s17 +; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[6:7], v[4:5] +; CHECK-NEXT: s_and_b32 s5, s4, s5 +; CHECK-NEXT: v_readfirstlane_b32 s16, v2 ; CHECK-NEXT: v_readfirstlane_b32 s7, v3 -; CHECK-NEXT: v_readfirstlane_b32 s6, v2 +; CHECK-NEXT: s_mov_b32 s10, s16 +; CHECK-NEXT: s_mov_b32 s11, s7 +; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] +; CHECK-NEXT: s_and_b32 s9, s4, s5 +; CHECK-NEXT: v_readfirstlane_b32 s6, v0 ; CHECK-NEXT: v_readfirstlane_b32 s5, v1 -; CHECK-NEXT: v_readfirstlane_b32 s4, v0 -; CHECK-NEXT: ; kill: def $sgpr12 killed $sgpr12 def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 -; CHECK-NEXT: s_mov_b32 s13, s10 -; CHECK-NEXT: s_mov_b32 s14, s9 -; CHECK-NEXT: s_mov_b32 s15, s8 -; CHECK-NEXT: s_mov_b32 s16, s7 -; CHECK-NEXT: s_mov_b32 s17, s6 -; CHECK-NEXT: s_mov_b32 s18, s5 -; CHECK-NEXT: s_mov_b32 s19, s4 -; CHECK-NEXT: v_writelane_b32 v16, s12, 5 -; CHECK-NEXT: v_writelane_b32 v16, s13, 6 -; CHECK-NEXT: v_writelane_b32 v16, s14, 7 -; CHECK-NEXT: v_writelane_b32 v16, s15, 8 -; CHECK-NEXT: v_writelane_b32 v16, s16, 9 -; CHECK-NEXT: v_writelane_b32 v16, s17, 10 -; CHECK-NEXT: v_writelane_b32 v16, s18, 11 -; CHECK-NEXT: v_writelane_b32 v16, s19, 12 -; CHECK-NEXT: v_mov_b32_e32 v6, v8 -; CHECK-NEXT: v_mov_b32_e32 v7, v9 -; CHECK-NEXT: v_mov_b32_e32 v4, v10 -; CHECK-NEXT: v_mov_b32_e32 v5, v11 -; CHECK-NEXT: v_mov_b32_e32 v2, v12 -; CHECK-NEXT: v_mov_b32_e32 v3, v13 -; CHECK-NEXT: v_mov_b32_e32 v0, v14 -; CHECK-NEXT: v_mov_b32_e32 v1, v15 -; CHECK-NEXT: s_mov_b64 s[4:5], s[12:13] -; CHECK-NEXT: s_mov_b64 s[10:11], s[14:15] -; CHECK-NEXT: s_mov_b64 s[8:9], s[16:17] -; CHECK-NEXT: s_mov_b64 s[6:7], s[18:19] -; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[4:5], v[6:7] -; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[10:11], v[4:5] -; CHECK-NEXT: s_and_b32 s4, s4, s5 -; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[8:9], v[2:3] -; CHECK-NEXT: s_and_b32 s4, s4, s5 -; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[0:1] -; CHECK-NEXT: s_and_b32 s4, s4, s5 +; CHECK-NEXT: s_mov_b32 s10, s6 +; CHECK-NEXT: s_mov_b32 s11, s5 +; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[0:1] +; CHECK-NEXT: s_and_b32 s4, s4, s9 +; CHECK-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 +; CHECK-NEXT: s_mov_b32 s9, s19 +; CHECK-NEXT: s_mov_b32 s10, s18 +; CHECK-NEXT: s_mov_b32 s11, s17 +; CHECK-NEXT: s_mov_b32 s12, s16 +; CHECK-NEXT: s_mov_b32 s13, s7 +; CHECK-NEXT: s_mov_b32 s14, s6 +; CHECK-NEXT: s_mov_b32 s15, s5 +; CHECK-NEXT: v_writelane_b32 v15, s8, 5 +; CHECK-NEXT: v_writelane_b32 v15, s9, 6 +; CHECK-NEXT: v_writelane_b32 v15, s10, 7 +; CHECK-NEXT: v_writelane_b32 v15, s11, 8 +; CHECK-NEXT: v_writelane_b32 v15, s12, 9 +; CHECK-NEXT: v_writelane_b32 v15, s13, 10 +; CHECK-NEXT: v_writelane_b32 v15, s14, 11 +; CHECK-NEXT: v_writelane_b32 v15, s15, 12 ; CHECK-NEXT: s_and_saveexec_b32 s4, s4 -; CHECK-NEXT: v_writelane_b32 v16, s4, 13 +; CHECK-NEXT: v_writelane_b32 v15, s4, 13 ; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: v_readlane_b32 s4, v16, 13 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s8, v16, 5 -; CHECK-NEXT: v_readlane_b32 s9, v16, 6 -; CHECK-NEXT: v_readlane_b32 s10, v16, 7 -; CHECK-NEXT: v_readlane_b32 s11, v16, 8 -; CHECK-NEXT: v_readlane_b32 s12, v16, 9 -; CHECK-NEXT: v_readlane_b32 s13, v16, 10 -; CHECK-NEXT: v_readlane_b32 s14, v16, 11 -; CHECK-NEXT: v_readlane_b32 s15, v16, 12 -; CHECK-NEXT: v_readlane_b32 s16, v16, 0 -; CHECK-NEXT: v_readlane_b32 s17, v16, 1 -; CHECK-NEXT: v_readlane_b32 s18, v16, 2 -; CHECK-NEXT: v_readlane_b32 s19, v16, 3 +; CHECK-NEXT: v_readlane_b32 s4, v15, 13 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s8, v15, 5 +; CHECK-NEXT: v_readlane_b32 s9, v15, 6 +; CHECK-NEXT: v_readlane_b32 s10, v15, 7 +; CHECK-NEXT: v_readlane_b32 s11, v15, 8 +; CHECK-NEXT: v_readlane_b32 s12, v15, 9 +; CHECK-NEXT: v_readlane_b32 s13, v15, 10 +; CHECK-NEXT: v_readlane_b32 s14, v15, 11 +; CHECK-NEXT: v_readlane_b32 s15, v15, 12 +; CHECK-NEXT: v_readlane_b32 s16, v15, 0 +; CHECK-NEXT: v_readlane_b32 s17, v15, 1 +; CHECK-NEXT: v_readlane_b32 s18, v15, 2 +; CHECK-NEXT: v_readlane_b32 s19, v15, 3 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; CHECK-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; CHECK-NEXT: s_cbranch_execnz .LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: v_readlane_b32 s4, v16, 4 +; CHECK-NEXT: v_readlane_b32 s4, v15, 4 ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, s4 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll index a4fdbd908f4693..80bc1114de8846 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -138,10 +138,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; GFX1030-NEXT: v_readfirstlane_b32 s7, v14 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] -; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 +; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:30], s[4:7] -; GFX1030-NEXT: ; implicit-def: $vgpr11 +; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12 ; GFX1030-NEXT: ; implicit-def: $vgpr15 ; GFX1030-NEXT: ; implicit-def: $vgpr16 ; GFX1030-NEXT: ; implicit-def: $vgpr17 @@ -175,10 +175,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] -; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 +; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[20:23], v[0:15], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr16 +; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -221,10 +221,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1030-NEXT: v_readfirstlane_b32 s7, v12 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] -; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 +; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[13:20], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr9 +; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1030-NEXT: ; implicit-def: $vgpr13 ; GFX1030-NEXT: ; implicit-def: $vgpr14 ; GFX1030-NEXT: ; implicit-def: $vgpr15 @@ -259,10 +259,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1013-NEXT: v_readfirstlane_b32 s7, v12 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] -; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 +; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[13:16], v[0:7], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr9 +; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -304,10 +304,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; GFX1030-NEXT: v_readfirstlane_b32 s7, v15 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] -; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 +; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:31], s[4:7] -; GFX1030-NEXT: ; implicit-def: $vgpr12 +; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13 ; GFX1030-NEXT: ; implicit-def: $vgpr16 ; GFX1030-NEXT: ; implicit-def: $vgpr17 ; GFX1030-NEXT: ; implicit-def: $vgpr18 @@ -342,10 +342,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] -; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 +; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr16 +; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -389,10 +389,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1030-NEXT: v_readfirstlane_b32 s7, v13 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] -; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 +; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:29], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr10 +; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11 ; GFX1030-NEXT: ; implicit-def: $vgpr14 ; GFX1030-NEXT: ; implicit-def: $vgpr15 ; GFX1030-NEXT: ; implicit-def: $vgpr16 @@ -432,10 +432,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] -; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 +; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr16 +; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll index b1f7c98205f08a..2f9a6e6a17ed94 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -108,36 +108,32 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -171,36 +167,32 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll index 50fb684a590a7f..c008e0a5bb9977 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -68,38 +68,34 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -109,7 +105,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY19]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY13]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -135,38 +131,34 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll index 9dca872cdea603..afc01b3378bde6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -150,36 +150,32 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -205,36 +201,32 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -264,36 +256,32 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -317,36 +305,32 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll index cae772b032424b..14c05ec94ce743 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll @@ -158,36 +158,32 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -212,36 +208,32 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll index 5ab03c10cbd14e..0441368d9905e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll @@ -108,36 +108,32 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll index 4cc3e555c7d28a..93d807a8d269d3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -57,33 +57,29 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -113,36 +109,32 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -474,33 +466,29 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -530,33 +518,29 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -813,33 +797,29 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -872,34 +852,30 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; CHECK-NEXT: %14:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll index 93ce8bdacf9e74..0e996101cbb7ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -167,33 +167,29 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -219,33 +215,29 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -473,40 +465,36 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: %13:vgpr_32, dead %54:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; UNPACKED-NEXT: %13:vgpr_32, dead %49:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -534,34 +522,30 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: %13:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; PACKED-NEXT: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll index d27818afd62bdf..b496be3c576395 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -123,33 +123,29 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -297,34 +293,30 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %15:vgpr_32, dead %40:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec + ; CHECK-NEXT: %15:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll index 939b932f5b2776..5dfc64fa1b2306 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -59,33 +59,29 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -120,10 +116,9 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[COPY7]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -158,36 +153,32 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -516,33 +507,29 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -776,34 +763,30 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: %14:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -835,33 +818,29 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll index 9fe1fbd91a2a2e..75bdd845394bf2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll @@ -156,36 +156,32 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -210,36 +206,32 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll index 1ffbf31ae52b94..00e31dbe0622a7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -107,36 +107,32 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll index ce4e7fd5951928..36fa2bf0255d76 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll @@ -135,33 +135,29 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -185,33 +181,29 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -241,36 +233,32 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -294,36 +282,32 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -354,36 +338,32 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -408,36 +388,32 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll index 9b4656b46dabd3..12a4396f62cfc9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll @@ -50,33 +50,29 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -100,33 +96,29 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -156,36 +148,32 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -209,36 +197,32 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -269,36 +253,32 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -323,36 +303,32 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll index bab3c26716b21f..026c600b2a8bb8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -127,33 +127,29 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -183,36 +179,32 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -243,36 +235,32 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -609,33 +597,29 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -669,33 +653,29 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index d7e8d2a2730fe1..fb4d8a8faf3b92 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -2696,31 +2696,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2742,31 +2738,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2788,31 +2780,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2838,31 +2826,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2882,31 +2866,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2926,31 +2906,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2981,31 +2957,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3029,31 +3001,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3077,31 +3045,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3128,31 +3092,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3172,31 +3132,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3216,31 +3172,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3268,31 +3220,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3314,31 +3262,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3358,31 +3302,27 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3409,32 +3349,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3442,23 +3378,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX7: bb.1 (%ir-block.0): @@ -3470,32 +3406,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3503,23 +3435,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX8: bb.1 (%ir-block.0): @@ -3531,32 +3463,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3564,23 +3492,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3604,32 +3532,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3637,23 +3561,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX7: bb.1 (%ir-block.0): @@ -3669,32 +3593,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3702,23 +3622,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX8: bb.1 (%ir-block.0): @@ -3734,32 +3654,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3767,23 +3683,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3805,32 +3721,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3838,23 +3750,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX7: bb.1 (%ir-block.0): @@ -3870,32 +3782,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3903,23 +3811,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX8: bb.1 (%ir-block.0): @@ -3935,32 +3843,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3968,23 +3872,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4003,32 +3907,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4036,23 +3936,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): @@ -4065,32 +3965,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4098,23 +3994,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: bb.1 (%ir-block.0): @@ -4127,32 +4023,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4160,23 +4052,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4195,32 +4087,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4228,23 +4116,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): @@ -4257,32 +4145,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4290,23 +4174,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: bb.1 (%ir-block.0): @@ -4319,32 +4203,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4352,23 +4232,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4387,32 +4267,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4420,23 +4296,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): @@ -4449,32 +4325,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4482,23 +4354,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: bb.1 (%ir-block.0): @@ -4511,32 +4383,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4544,23 +4412,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4578,32 +4446,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4611,23 +4475,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY19]] + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY13]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX7: bb.1 (%ir-block.0): @@ -4639,32 +4503,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4672,23 +4532,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY19]] + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY13]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX8: bb.1 (%ir-block.0): @@ -4700,32 +4560,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4733,23 +4589,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY19]] + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY13]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll index 6b6d95d1c6b015..c9884689e30c0d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -118,37 +118,33 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -184,37 +180,33 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll index 0079a880ba6eb8..65ca08b45d6784 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -75,39 +75,35 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -117,7 +113,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY21]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY15]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -145,39 +141,35 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll index 53a9c831e5e291..8ee34c3da49459 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -164,37 +164,33 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -222,37 +218,33 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -284,36 +276,32 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -339,36 +327,32 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll index 19bff0f4d614dc..849523e11c50fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll @@ -171,37 +171,33 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -211,25 +207,25 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec - ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[COPY16]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec - ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec - ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY18]], implicit $exec + ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY19]], implicit $exec + ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY20]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] @@ -249,37 +245,33 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -289,10 +281,10 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = COPY [[COPY11]] + ; PACKED-NEXT: $vgpr1 = COPY [[COPY12]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll index 54bd85b95f2a7b..a7fab9821eebf0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -117,37 +118,33 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -157,14 +154,14 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY11]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY12]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY13]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY14]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll index 9623fa14eccebe..93927318961f2b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; Natural mapping define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { @@ -188,37 +189,33 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll index 5f0087912add31..54fc5dcd476d89 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll @@ -150,37 +150,33 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -208,37 +204,33 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll index 31b8977e1513f3..72ebb811fc8501 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll @@ -112,37 +112,33 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll index dd5265a7b68e4c..bd6fad5e071de1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; Natural mapping @@ -117,37 +118,33 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY21]], [[COPY19]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll index bba5cebc0ca320..a75d2928465f3b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; PACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -211,37 +211,33 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -251,10 +247,10 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = COPY [[COPY11]] + ; PACKED-NEXT: $vgpr1 = COPY [[COPY12]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): @@ -271,37 +267,33 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -311,25 +303,25 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec - ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[COPY16]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec - ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec - ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY18]], implicit $exec + ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY19]], implicit $exec + ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY20]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll index 3f6a03426aeaf1..e3cea6fa10169d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -140,37 +140,33 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -180,14 +176,14 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY11]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY12]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY13]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY14]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir index 484c1beebea858..80383c7b2280b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir @@ -62,25 +62,24 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) @@ -118,25 +117,24 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %8, %bb.2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll index 3e4b0d4fa1e1d1..0a1a0438e6770a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -113,33 +113,34 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -174,33 +175,34 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -243,33 +245,34 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -305,33 +308,34 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll index 8f62f75b97c6fb..a0ed8d7ddecb31 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -134,33 +134,34 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -199,33 +200,34 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -271,25 +273,24 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; FAST-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -328,25 +329,24 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -392,45 +392,46 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; FAST-NEXT: [[UV16:%[0-9]+]]:vgpr_32(s32), [[UV17:%[0-9]+]]:vgpr_32(s32), [[UV18:%[0-9]+]]:vgpr_32(s32), [[UV19:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV16]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV17]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV18]](s32), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV19]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc + ; FAST-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec + ; FAST-NEXT: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) + ; FAST-NEXT: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec + ; FAST-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc ; FAST-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; FAST-NEXT: [[UV20:%[0-9]+]]:vgpr(s64), [[UV21:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; FAST-NEXT: [[UV22:%[0-9]+]]:sgpr(s64), [[UV23:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x s32>) - ; FAST-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV22]](s64), [[UV20]] - ; FAST-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]] - ; FAST-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]] - ; FAST-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]] - ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -469,45 +470,46 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; GREEDY-NEXT: [[UV16:%[0-9]+]]:vgpr_32(s32), [[UV17:%[0-9]+]]:vgpr_32(s32), [[UV18:%[0-9]+]]:vgpr_32(s32), [[UV19:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV16]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV17]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV18]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV19]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; GREEDY-NEXT: [[UV20:%[0-9]+]]:vgpr(s64), [[UV21:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[UV22:%[0-9]+]]:sgpr(s64), [[UV23:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV22]](s64), [[UV20]] - ; GREEDY-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]] - ; GREEDY-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]] - ; GREEDY-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir index 1e05a4b8460999..8ea58fac3aba06 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir @@ -12,20 +12,18 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FAST: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) ; GREEDY-LABEL: name: mfma_f32_32x32x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 @@ -43,20 +41,18 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: mfma_f32_16x16x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -74,20 +70,18 @@ body: | ; FAST-LABEL: name: mfma_f32_4x4x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_f32_4x4x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -105,20 +99,18 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x8bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FAST: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) ; GREEDY-LABEL: name: mfma_f32_32x32x8bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 @@ -136,20 +128,18 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x16bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_f32_16x16x16bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -167,20 +157,18 @@ body: | ; FAST-LABEL: name: mfma_f64_16x16x4f64_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FAST: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) ; GREEDY-LABEL: name: mfma_f64_16x16x4f64_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 @@ -198,20 +186,18 @@ body: | ; FAST-LABEL: name: mfma_f64_4x4x4f64_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 + ; FAST: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) ; GREEDY-LABEL: name: mfma_f64_4x4x4f64_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $agpr0_agpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir index 22d5fe9911e393..5f9822cf58b92a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir @@ -12,20 +12,18 @@ body: | ; FAST-LABEL: name: mfma_i32_16x16x32_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_i32_16x16x32_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -43,20 +41,18 @@ body: | ; FAST-LABEL: name: mfma_i32_32x32x16_i8_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: mfma_i32_32x32x16_i8_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -74,20 +70,18 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x8_xf32_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_f32_16x16x8_xf32_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -105,20 +99,18 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x4_xf32_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: mfma_f32_32x32x4_xf32_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -136,22 +128,20 @@ body: | ; FAST-LABEL: name: smfmac_f32_16x16x32_f16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: smfmac_f32_16x16x32_f16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -170,22 +160,20 @@ body: | ; FAST-LABEL: name: smfmac_f32_32x32x16_f16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: smfmac_f32_32x32x16_f16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -204,22 +192,20 @@ body: | ; FAST-LABEL: name: smfmac_f32_16x16x32_bf16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: smfmac_f32_16x16x32_bf16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -238,22 +224,20 @@ body: | ; FAST-LABEL: name: smfmac_f32_32x32x16_bf16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: smfmac_f32_32x32x16_bf16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -272,22 +256,20 @@ body: | ; FAST-LABEL: name: smfmac_i32_16x16x64_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: smfmac_i32_16x16x64_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -306,22 +288,20 @@ body: | ; FAST-LABEL: name: smfmac_i32_32x32x32_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: smfmac_i32_32x32x32_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll index 5fca16e473a4eb..e14262c416406f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -64,25 +64,24 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -126,10 +125,9 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -167,28 +165,27 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index b941a250004ebb..57e4d40bb9c814 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -1563,23 +1563,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1608,23 +1607,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1660,23 +1658,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1706,23 +1703,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1760,23 +1756,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1807,23 +1802,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1859,23 +1853,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1904,23 +1897,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1955,23 +1947,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2000,23 +1991,22 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2053,23 +2043,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2084,15 +2073,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GREEDY: bb.1 (%ir-block.0): @@ -2109,23 +2098,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2140,15 +2128,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2174,23 +2162,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2205,15 +2192,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GREEDY: bb.1 (%ir-block.0): @@ -2231,23 +2218,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2262,15 +2248,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2294,23 +2280,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2325,15 +2310,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GREEDY: bb.1 (%ir-block.0): @@ -2351,23 +2336,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2382,15 +2366,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2413,23 +2397,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2444,15 +2427,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY: bb.1 (%ir-block.0): @@ -2469,23 +2452,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2500,15 +2482,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2531,23 +2513,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2562,15 +2543,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY: bb.1 (%ir-block.0): @@ -2587,23 +2568,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2618,15 +2598,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2649,23 +2629,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2680,15 +2659,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY: bb.1 (%ir-block.0): @@ -2705,23 +2684,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2736,15 +2714,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2766,23 +2744,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2797,15 +2774,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GREEDY: bb.1 (%ir-block.0): @@ -2821,23 +2798,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2852,15 +2828,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir index 45a2ab5b774c2e..bcd55e225f4728 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir @@ -17,29 +17,27 @@ body: | ; FAST-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] - ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) - ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; FAST: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256 + ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] + ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] - ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $sgpr0 %2:vgpr(s32) = G_CONSTANT i32 256 @@ -59,28 +57,26 @@ body: | ; FAST-LABEL: name: s_buffer_load_negative_offset ; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 - ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; FAST: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 + ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_negative_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 - ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_CONSTANT i32 -60 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll index 49e352858f2254..56cff7cde87868 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -62,25 +62,24 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -123,10 +122,9 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -163,28 +161,27 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll index 94cad1624b5621..42ee4da9fdaf31 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -64,25 +64,24 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -125,10 +124,9 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -165,28 +163,27 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir index 46184e8b4192a5..bee5f3ea1be245 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir @@ -11,12 +11,11 @@ body: | ; CHECK-LABEL: name: update_dpp_ss ; CHECK: liveins: $sgpr0, $sgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -33,11 +32,10 @@ body: | ; CHECK-LABEL: name: update_dpp_sv ; CHECK: liveins: $sgpr0, $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -54,11 +52,10 @@ body: | ; CHECK-LABEL: name: update_dpp_vs ; CHECK: liveins: $vgpr0, $sgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -75,10 +72,9 @@ body: | ; CHECK-LABEL: name: update_dpp_vv ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir index c0f72eccf52498..07c2d682fcacd4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir @@ -31,10 +31,9 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) @@ -76,33 +75,34 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<8 x s32>) = COPY [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %6, %bb.2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] - ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) From affa1b1cc52dfa3997c006ae828e7e5d93bcd72e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Tue, 10 May 2022 10:10:36 -0500 Subject: [PATCH 491/908] AMDGPU/GISel: Factor out AMDGPURegisterBankInfo::buildReadFirstLane A later change will add a 3rd user, so factoring out the common code seems useful. Reorganizing the executeInWaterfallLoop causes some more COPYs to be generated, but those all fold away during instruction selection. Generating the comparisons uses generic instructions over machine instructions now which admittedly shouldn't make a difference (though it should make it easier to move the waterfall loop generation to another place). (Resubmit with missing test added.) Differential Revision: https://reviews.llvm.org/D125324 --- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 262 +-- .../Target/AMDGPU/AMDGPURegisterBankInfo.h | 3 + .../GlobalISel/image-waterfall-loop-O0.ll | 190 +- .../GlobalISel/llvm.amdgcn.intersect_ray.ll | 32 +- .../llvm.amdgcn.raw.buffer.atomic.add.ll | 68 +- .../llvm.amdgcn.raw.buffer.atomic.cmpswap.ll | 78 +- .../llvm.amdgcn.raw.buffer.atomic.fadd.ll | 136 +- .../llvm.amdgcn.raw.buffer.load.format.f16.ll | 68 +- .../llvm.amdgcn.raw.buffer.load.format.ll | 34 +- .../GlobalISel/llvm.amdgcn.raw.buffer.load.ll | 186 +- ...llvm.amdgcn.raw.buffer.store.format.f16.ll | 124 +- ...llvm.amdgcn.raw.buffer.store.format.f32.ll | 62 +- .../llvm.amdgcn.raw.buffer.store.ll | 163 +- .../llvm.amdgcn.raw.tbuffer.load.f16.ll | 68 +- .../llvm.amdgcn.raw.tbuffer.load.ll | 34 +- .../llvm.amdgcn.raw.tbuffer.store.f16.ll | 196 +- .../llvm.amdgcn.raw.tbuffer.store.i8.ll | 196 +- .../llvm.amdgcn.raw.tbuffer.store.ll | 158 +- .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 1836 +++++++++-------- .../llvm.amdgcn.struct.buffer.atomic.add.ll | 72 +- ...lvm.amdgcn.struct.buffer.atomic.cmpswap.ll | 82 +- .../llvm.amdgcn.struct.buffer.atomic.fadd.ll | 140 +- ...vm.amdgcn.struct.buffer.load.format.f16.ll | 112 +- .../llvm.amdgcn.struct.buffer.load.format.ll | 53 +- .../llvm.amdgcn.struct.buffer.load.ll | 37 +- ...m.amdgcn.struct.buffer.store.format.f16.ll | 72 +- ...m.amdgcn.struct.buffer.store.format.f32.ll | 36 +- .../llvm.amdgcn.struct.buffer.store.ll | 37 +- .../llvm.amdgcn.struct.tbuffer.load.f16.ll | 120 +- .../llvm.amdgcn.struct.tbuffer.load.ll | 58 +- .../regbankselect-amdgcn-s-buffer-load.mir | 46 +- .../regbankselect-amdgcn.image.load.1d.ll | 164 +- .../regbankselect-amdgcn.image.sample.1d.ll | 254 ++- .../regbankselect-amdgcn.mfma.gfx90a.mir | 154 +- .../regbankselect-amdgcn.mfma.gfx940.mir | 244 ++- .../regbankselect-amdgcn.raw.buffer.load.ll | 59 +- .../regbankselect-amdgcn.s.buffer.load.ll | 804 ++++---- .../regbankselect-amdgcn.s.buffer.load.mir | 78 +- ...regbankselect-amdgcn.struct.buffer.load.ll | 59 +- ...egbankselect-amdgcn.struct.buffer.store.ll | 59 +- .../regbankselect-amdgcn.update.dpp.mir | 36 +- .../regbankselect-waterfall-agpr.mir | 48 +- llvm/test/CodeGen/AMDGPU/indirect-call.ll | 36 +- 43 files changed, 3584 insertions(+), 3170 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 4f44827e112e08..6cac8d98ec7aa4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -678,6 +678,62 @@ static LLT getHalfSizedType(LLT Ty) { return LLT::scalar(Ty.getScalarSizeInBits() / 2); } +// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector +// source value into a scalar register. +Register AMDGPURegisterBankInfo::buildReadFirstLane(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Src) const { + LLT Ty = MRI.getType(Src); + const RegisterBank *Bank = getRegBank(Src, MRI, *TRI); + + if (Bank == &AMDGPU::SGPRRegBank) + return Src; + + unsigned Bits = Ty.getSizeInBits(); + assert(Bits % 32 == 0); + + if (Bank != &AMDGPU::VGPRRegBank) { + // We need to copy from AGPR to VGPR + Src = B.buildCopy(Ty, Src).getReg(0); + MRI.setRegBank(Src, AMDGPU::VGPRRegBank); + } + + LLT S32 = LLT::scalar(32); + unsigned NumParts = Bits / 32; + SmallVector SrcParts; + SmallVector DstParts; + + if (Bits == 32) { + SrcParts.push_back(Src); + } else { + auto Unmerge = B.buildUnmerge(S32, Src); + for (unsigned i = 0; i < NumParts; ++i) + SrcParts.push_back(Unmerge.getReg(i)); + } + + for (unsigned i = 0; i < NumParts; ++i) { + Register SrcPart = SrcParts[i]; + Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + MRI.setType(DstPart, NumParts == 1 ? Ty : S32); + + const TargetRegisterClass *Constrained = + constrainGenericRegister(SrcPart, AMDGPU::VGPR_32RegClass, MRI); + (void)Constrained; + assert(Constrained && "Failed to constrain readfirstlane src reg"); + + B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart}); + + DstParts.push_back(DstPart); + } + + if (Bits == 32) + return DstParts[0]; + + Register Dst = B.buildMerge(Ty, DstParts).getReg(0); + MRI.setRegBank(Dst, AMDGPU::SGPRRegBank); + return Dst; +} + /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If /// any of the required SGPR operands are VGPRs, perform a waterfall loop to /// execute the instruction for each unique combination of values in all lanes @@ -710,8 +766,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( MachineFunction *MF = &B.getMF(); const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass(); - const unsigned WaveAndOpc = Subtarget.isWave32() ? - AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; const unsigned MovExecOpc = Subtarget.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; const unsigned MovExecTermOpc = @@ -783,9 +837,9 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( MachineBasicBlock::iterator NewBegin = FirstInst.getIterator(); auto NewEnd = BodyBB->end(); - MachineBasicBlock::iterator I = LoopBB->end(); B.setMBB(*LoopBB); + LLT S1 = LLT::scalar(1); Register CondReg; assert(std::distance(NewBegin, NewEnd) == OrigRangeSize); @@ -819,161 +873,59 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop( B.setMBB(*LoopBB); } - unsigned OpSize = OpTy.getSizeInBits(); + Register CurrentLaneReg = buildReadFirstLane(B, MRI, OpReg); - // Can only do a readlane of 32-bit pieces. - if (OpSize == 32) { - // Avoid extra copies in the simple case of one 32-bit register. - Register CurrentLaneOpReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - MRI.setType(CurrentLaneOpReg, OpTy); - - constrainGenericRegister(OpReg, AMDGPU::VGPR_32RegClass, MRI); - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpReg) - .addReg(OpReg); - - Register NewCondReg = MRI.createVirtualRegister(WaveRC); - bool First = CondReg == AMDGPU::NoRegister; - if (First) - CondReg = NewCondReg; - - // Compare the just read M0 value to all possible Idx values. - B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) - .addDef(NewCondReg) - .addReg(CurrentLaneOpReg) - .addReg(OpReg); - Op.setReg(CurrentLaneOpReg); - - if (!First) { - Register AndReg = MRI.createVirtualRegister(WaveRC); - - // If there are multiple operands to consider, and the conditions. - B.buildInstr(WaveAndOpc) - .addDef(AndReg) - .addReg(NewCondReg) - .addReg(CondReg); - CondReg = AndReg; - } + // Build the comparison(s). + unsigned OpSize = OpTy.getSizeInBits(); + bool Is64 = OpSize % 64 == 0; + unsigned PartSize = Is64 ? 64 : 32; + LLT PartTy = LLT::scalar(PartSize); + unsigned NumParts = OpSize / PartSize; + SmallVector OpParts; + SmallVector CurrentLaneParts; + + if (NumParts == 1) { + OpParts.push_back(OpReg); + CurrentLaneParts.push_back(CurrentLaneReg); } else { - LLT S32 = LLT::scalar(32); - SmallVector ReadlanePieces; - - // The compares can be done as 64-bit, but the extract needs to be done - // in 32-bit pieces. - - bool Is64 = OpSize % 64 == 0; - - unsigned UnmergeTySize = Is64 ? 64 : 32; - unsigned CmpOp = - Is64 ? AMDGPU::V_CMP_EQ_U64_e64 : AMDGPU::V_CMP_EQ_U32_e64; - - // Insert the unmerge before the loop. - - B.setMBB(MBB); - unsigned NumPieces = OpSize / UnmergeTySize; - SmallVector UnmergePieces; - if (NumPieces == 1) { - UnmergePieces.push_back(OpReg); - } else { - LLT UnmergeTy = LLT::scalar(UnmergeTySize); - MachineInstrBuilder Unmerge = B.buildUnmerge(UnmergeTy, OpReg); - for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) - UnmergePieces.push_back(Unmerge.getReg(PieceIdx)); + auto UnmergeOp = B.buildUnmerge(PartTy, OpReg); + auto UnmergeCurrentLane = B.buildUnmerge(PartTy, CurrentLaneReg); + for (unsigned i = 0; i < NumParts; ++i) { + OpParts.push_back(UnmergeOp.getReg(i)); + CurrentLaneParts.push_back(UnmergeCurrentLane.getReg(i)); + MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank); + MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank); } - B.setMBB(*LoopBB); - - for (Register UnmergePiece : UnmergePieces) { - Register CurrentLaneOpReg; - if (Is64) { - Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32); - Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32); - - MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass); - MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass); - MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegLo) - .addReg(UnmergePiece, 0, AMDGPU::sub0); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegHi) - .addReg(UnmergePiece, 0, AMDGPU::sub1); - - CurrentLaneOpReg = - B.buildMerge(LLT::scalar(64), - {CurrentLaneOpRegLo, CurrentLaneOpRegHi}) - .getReg(0); - - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass); - - if (OpTy.getScalarSizeInBits() == 64) { - // If we need to produce a 64-bit element vector, so use the - // merged pieces - ReadlanePieces.push_back(CurrentLaneOpReg); - } else { - // 32-bit element type. - ReadlanePieces.push_back(CurrentLaneOpRegLo); - ReadlanePieces.push_back(CurrentLaneOpRegHi); - } - } else { - CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32); - MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass); - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpReg) - .addReg(UnmergePiece); - ReadlanePieces.push_back(CurrentLaneOpReg); - } + } - Register NewCondReg = MRI.createVirtualRegister(WaveRC); - bool First = CondReg == AMDGPU::NoRegister; - if (First) - CondReg = NewCondReg; - - B.buildInstr(CmpOp) - .addDef(NewCondReg) - .addReg(CurrentLaneOpReg) - .addReg(UnmergePiece); - - if (!First) { - Register AndReg = MRI.createVirtualRegister(WaveRC); - - // If there are multiple operands to consider, and the conditions. - B.buildInstr(WaveAndOpc) - .addDef(AndReg) - .addReg(NewCondReg) - .addReg(CondReg); - CondReg = AndReg; - } - } + for (unsigned i = 0; i < NumParts; ++i) { + auto CmpReg = B.buildICmp(CmpInst::ICMP_EQ, S1, CurrentLaneParts[i], + OpParts[i]).getReg(0); + MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank); - // FIXME: Build merge seems to switch to CONCAT_VECTORS but not - // BUILD_VECTOR - if (OpTy.isVector()) { - auto Merge = B.buildBuildVector(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); - MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank); - } else if (ReadlanePieces.size() > 1) { - auto Merge = B.buildMerge(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); - MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank); + if (!CondReg) { + CondReg = CmpReg; } else { - Op.setReg(ReadlanePieces[0]); + CondReg = B.buildAnd(S1, CondReg, CmpReg).getReg(0); + MRI.setRegBank(CondReg, AMDGPU::VCCRegBank); } } + Op.setReg(CurrentLaneReg); + // Make sure we don't re-process this register again. WaterfalledRegMap.insert(std::make_pair(OldReg, Op.getReg())); } } + // The ballot becomes a no-op during instruction selection. + CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot, + {LLT::scalar(Subtarget.isWave32() ? 32 : 64)}, + false) + .addReg(CondReg) + .getReg(0); + MRI.setRegClass(CondReg, WaveRC); + // Update EXEC, save the original EXEC value to VCC. B.buildInstr(AndSaveExecOpc) .addDef(NewExec) @@ -1061,28 +1013,10 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane( if (Bank == &AMDGPU::SGPRRegBank) return; - LLT Ty = MRI.getType(Reg); MachineIRBuilder B(MI); - if (Bank != &AMDGPU::VGPRRegBank) { - // We need to copy from AGPR to VGPR - Reg = B.buildCopy(Ty, Reg).getReg(0); - MRI.setRegBank(Reg, AMDGPU::VGPRRegBank); - } - - Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - B.buildInstr(AMDGPU::V_READFIRSTLANE_B32) - .addDef(SGPR) - .addReg(Reg); - - MRI.setType(SGPR, Ty); - - const TargetRegisterClass *Constrained = - constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI); - (void)Constrained; - assert(Constrained && "Failed to constrain readfirstlane src reg"); - - MI.getOperand(OpIdx).setReg(SGPR); + Reg = buildReadFirstLane(B, MRI, Reg); + MI.getOperand(OpIdx).setReg(Reg); } /// Split \p Ty into 2 pieces. The first will have \p FirstSize bits, and the diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 736b77a5e9be9d..4079f34d18b391 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -59,6 +59,9 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo { SmallSet &SGPROperandRegs, MachineRegisterInfo &MRI) const; + Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Src) const; + bool executeInWaterfallLoop(MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index b4bc8da9c00934..a6be8956dbcd71 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -8,7 +8,7 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: v_mov_b32_e32 v14, v1 ; CHECK-NEXT: v_mov_b32_e32 v13, v2 @@ -25,132 +25,128 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_mov_b32_e32 v5, v10 ; CHECK-NEXT: v_mov_b32_e32 v6, v9 ; CHECK-NEXT: v_mov_b32_e32 v7, v8 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s4, s8 ; CHECK-NEXT: s_mov_b32 s5, s8 ; CHECK-NEXT: s_mov_b32 s6, s8 ; CHECK-NEXT: s_mov_b32 s7, s8 -; CHECK-NEXT: v_writelane_b32 v15, s4, 0 -; CHECK-NEXT: v_writelane_b32 v15, s5, 1 -; CHECK-NEXT: v_writelane_b32 v15, s6, 2 -; CHECK-NEXT: v_writelane_b32 v15, s7, 3 +; CHECK-NEXT: v_writelane_b32 v16, s4, 0 +; CHECK-NEXT: v_writelane_b32 v16, s5, 1 +; CHECK-NEXT: v_writelane_b32 v16, s6, 2 +; CHECK-NEXT: v_writelane_b32 v16, s7, 3 ; CHECK-NEXT: s_mov_b32 s6, 0 ; CHECK-NEXT: s_mov_b32 s4, s6 ; CHECK-NEXT: s_mov_b32 s5, s6 -; CHECK-NEXT: v_mov_b32_e32 v9, s5 -; CHECK-NEXT: v_mov_b32_e32 v8, s4 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v9, v1 -; CHECK-NEXT: v_mov_b32_e32 v8, v0 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v9, v3 -; CHECK-NEXT: v_mov_b32_e32 v8, v2 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v9, v5 -; CHECK-NEXT: v_mov_b32_e32 v8, v4 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v0, v6 -; CHECK-NEXT: v_mov_b32_e32 v1, v7 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, s5 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s4, exec_lo -; CHECK-NEXT: v_writelane_b32 v15, s4, 4 +; CHECK-NEXT: v_writelane_b32 v16, s4, 4 ; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s8, v6 -; CHECK-NEXT: v_readfirstlane_b32 s19, v7 -; CHECK-NEXT: s_mov_b32 s4, s8 -; CHECK-NEXT: s_mov_b32 s5, s19 -; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[4:5], v[6:7] -; CHECK-NEXT: v_readfirstlane_b32 s18, v4 -; CHECK-NEXT: v_readfirstlane_b32 s17, v5 -; CHECK-NEXT: s_mov_b32 s6, s18 -; CHECK-NEXT: s_mov_b32 s7, s17 -; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[6:7], v[4:5] -; CHECK-NEXT: s_and_b32 s5, s4, s5 -; CHECK-NEXT: v_readfirstlane_b32 s16, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, v8 +; CHECK-NEXT: v_mov_b32_e32 v6, v9 +; CHECK-NEXT: v_mov_b32_e32 v5, v10 +; CHECK-NEXT: v_mov_b32_e32 v4, v11 +; CHECK-NEXT: v_mov_b32_e32 v3, v12 +; CHECK-NEXT: v_mov_b32_e32 v2, v13 +; CHECK-NEXT: v_mov_b32_e32 v1, v14 +; CHECK-NEXT: v_mov_b32_e32 v0, v15 +; CHECK-NEXT: v_readfirstlane_b32 s12, v7 +; CHECK-NEXT: v_readfirstlane_b32 s10, v6 +; CHECK-NEXT: v_readfirstlane_b32 s9, v5 +; CHECK-NEXT: v_readfirstlane_b32 s8, v4 ; CHECK-NEXT: v_readfirstlane_b32 s7, v3 -; CHECK-NEXT: s_mov_b32 s10, s16 -; CHECK-NEXT: s_mov_b32 s11, s7 -; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; CHECK-NEXT: s_and_b32 s9, s4, s5 -; CHECK-NEXT: v_readfirstlane_b32 s6, v0 +; CHECK-NEXT: v_readfirstlane_b32 s6, v2 ; CHECK-NEXT: v_readfirstlane_b32 s5, v1 -; CHECK-NEXT: s_mov_b32 s10, s6 -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[0:1] -; CHECK-NEXT: s_and_b32 s4, s4, s9 -; CHECK-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 -; CHECK-NEXT: s_mov_b32 s9, s19 -; CHECK-NEXT: s_mov_b32 s10, s18 -; CHECK-NEXT: s_mov_b32 s11, s17 -; CHECK-NEXT: s_mov_b32 s12, s16 -; CHECK-NEXT: s_mov_b32 s13, s7 -; CHECK-NEXT: s_mov_b32 s14, s6 -; CHECK-NEXT: s_mov_b32 s15, s5 -; CHECK-NEXT: v_writelane_b32 v15, s8, 5 -; CHECK-NEXT: v_writelane_b32 v15, s9, 6 -; CHECK-NEXT: v_writelane_b32 v15, s10, 7 -; CHECK-NEXT: v_writelane_b32 v15, s11, 8 -; CHECK-NEXT: v_writelane_b32 v15, s12, 9 -; CHECK-NEXT: v_writelane_b32 v15, s13, 10 -; CHECK-NEXT: v_writelane_b32 v15, s14, 11 -; CHECK-NEXT: v_writelane_b32 v15, s15, 12 +; CHECK-NEXT: v_readfirstlane_b32 s4, v0 +; CHECK-NEXT: ; kill: def $sgpr12 killed $sgpr12 def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 +; CHECK-NEXT: s_mov_b32 s13, s10 +; CHECK-NEXT: s_mov_b32 s14, s9 +; CHECK-NEXT: s_mov_b32 s15, s8 +; CHECK-NEXT: s_mov_b32 s16, s7 +; CHECK-NEXT: s_mov_b32 s17, s6 +; CHECK-NEXT: s_mov_b32 s18, s5 +; CHECK-NEXT: s_mov_b32 s19, s4 +; CHECK-NEXT: v_writelane_b32 v16, s12, 5 +; CHECK-NEXT: v_writelane_b32 v16, s13, 6 +; CHECK-NEXT: v_writelane_b32 v16, s14, 7 +; CHECK-NEXT: v_writelane_b32 v16, s15, 8 +; CHECK-NEXT: v_writelane_b32 v16, s16, 9 +; CHECK-NEXT: v_writelane_b32 v16, s17, 10 +; CHECK-NEXT: v_writelane_b32 v16, s18, 11 +; CHECK-NEXT: v_writelane_b32 v16, s19, 12 +; CHECK-NEXT: v_mov_b32_e32 v6, v8 +; CHECK-NEXT: v_mov_b32_e32 v7, v9 +; CHECK-NEXT: v_mov_b32_e32 v4, v10 +; CHECK-NEXT: v_mov_b32_e32 v5, v11 +; CHECK-NEXT: v_mov_b32_e32 v2, v12 +; CHECK-NEXT: v_mov_b32_e32 v3, v13 +; CHECK-NEXT: v_mov_b32_e32 v0, v14 +; CHECK-NEXT: v_mov_b32_e32 v1, v15 +; CHECK-NEXT: s_mov_b64 s[4:5], s[12:13] +; CHECK-NEXT: s_mov_b64 s[10:11], s[14:15] +; CHECK-NEXT: s_mov_b64 s[8:9], s[16:17] +; CHECK-NEXT: s_mov_b64 s[6:7], s[18:19] +; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[4:5], v[6:7] +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[10:11], v[4:5] +; CHECK-NEXT: s_and_b32 s4, s4, s5 +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[8:9], v[2:3] +; CHECK-NEXT: s_and_b32 s4, s4, s5 +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[0:1] +; CHECK-NEXT: s_and_b32 s4, s4, s5 ; CHECK-NEXT: s_and_saveexec_b32 s4, s4 -; CHECK-NEXT: v_writelane_b32 v15, s4, 13 +; CHECK-NEXT: v_writelane_b32 v16, s4, 13 ; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: v_readlane_b32 s4, v15, 13 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s8, v15, 5 -; CHECK-NEXT: v_readlane_b32 s9, v15, 6 -; CHECK-NEXT: v_readlane_b32 s10, v15, 7 -; CHECK-NEXT: v_readlane_b32 s11, v15, 8 -; CHECK-NEXT: v_readlane_b32 s12, v15, 9 -; CHECK-NEXT: v_readlane_b32 s13, v15, 10 -; CHECK-NEXT: v_readlane_b32 s14, v15, 11 -; CHECK-NEXT: v_readlane_b32 s15, v15, 12 -; CHECK-NEXT: v_readlane_b32 s16, v15, 0 -; CHECK-NEXT: v_readlane_b32 s17, v15, 1 -; CHECK-NEXT: v_readlane_b32 s18, v15, 2 -; CHECK-NEXT: v_readlane_b32 s19, v15, 3 +; CHECK-NEXT: v_readlane_b32 s4, v16, 13 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s8, v16, 5 +; CHECK-NEXT: v_readlane_b32 s9, v16, 6 +; CHECK-NEXT: v_readlane_b32 s10, v16, 7 +; CHECK-NEXT: v_readlane_b32 s11, v16, 8 +; CHECK-NEXT: v_readlane_b32 s12, v16, 9 +; CHECK-NEXT: v_readlane_b32 s13, v16, 10 +; CHECK-NEXT: v_readlane_b32 s14, v16, 11 +; CHECK-NEXT: v_readlane_b32 s15, v16, 12 +; CHECK-NEXT: v_readlane_b32 s16, v16, 0 +; CHECK-NEXT: v_readlane_b32 s17, v16, 1 +; CHECK-NEXT: v_readlane_b32 s18, v16, 2 +; CHECK-NEXT: v_readlane_b32 s19, v16, 3 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; CHECK-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; CHECK-NEXT: s_cbranch_execnz .LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: v_readlane_b32 s4, v15, 4 +; CHECK-NEXT: v_readlane_b32 s4, v16, 4 ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, s4 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll index 80bc1114de8846..a4fdbd908f4693 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -138,10 +138,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; GFX1030-NEXT: v_readfirstlane_b32 s7, v14 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] -; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:30], s[4:7] -; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12 +; GFX1030-NEXT: ; implicit-def: $vgpr11 ; GFX1030-NEXT: ; implicit-def: $vgpr15 ; GFX1030-NEXT: ; implicit-def: $vgpr16 ; GFX1030-NEXT: ; implicit-def: $vgpr17 @@ -175,10 +175,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] -; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[20:23], v[0:15], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17 +; GFX1013-NEXT: ; implicit-def: $vgpr16 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -221,10 +221,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1030-NEXT: v_readfirstlane_b32 s7, v12 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] -; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[13:20], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10 +; GFX1030-NEXT: ; implicit-def: $vgpr9 ; GFX1030-NEXT: ; implicit-def: $vgpr13 ; GFX1030-NEXT: ; implicit-def: $vgpr14 ; GFX1030-NEXT: ; implicit-def: $vgpr15 @@ -259,10 +259,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1013-NEXT: v_readfirstlane_b32 s7, v12 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] -; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[13:16], v[0:7], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10 +; GFX1013-NEXT: ; implicit-def: $vgpr9 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -304,10 +304,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; GFX1030-NEXT: v_readfirstlane_b32 s7, v15 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] -; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:31], s[4:7] -; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13 +; GFX1030-NEXT: ; implicit-def: $vgpr12 ; GFX1030-NEXT: ; implicit-def: $vgpr16 ; GFX1030-NEXT: ; implicit-def: $vgpr17 ; GFX1030-NEXT: ; implicit-def: $vgpr18 @@ -342,10 +342,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] -; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17 +; GFX1013-NEXT: ; implicit-def: $vgpr16 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -389,10 +389,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1030-NEXT: v_readfirstlane_b32 s7, v13 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] -; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:29], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX1030-NEXT: ; implicit-def: $vgpr10 ; GFX1030-NEXT: ; implicit-def: $vgpr14 ; GFX1030-NEXT: ; implicit-def: $vgpr15 ; GFX1030-NEXT: ; implicit-def: $vgpr16 @@ -432,10 +432,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] -; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo +; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17 +; GFX1013-NEXT: ; implicit-def: $vgpr16 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll index 2f9a6e6a17ed94..b1f7c98205f08a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -108,32 +108,36 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -167,32 +171,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll index c008e0a5bb9977..50fb684a590a7f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -68,34 +68,38 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -105,7 +109,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY13]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY19]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -131,34 +135,38 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll index afc01b3378bde6..9dca872cdea603 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -150,32 +150,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -201,32 +205,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -256,32 +264,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -305,32 +317,36 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll index 14c05ec94ce743..cae772b032424b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll @@ -158,32 +158,36 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -208,32 +212,36 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll index 0441368d9905e9..5ab03c10cbd14e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll @@ -108,32 +108,36 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll index 93d807a8d269d3..4cc3e555c7d28a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -57,29 +57,33 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -109,32 +113,36 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -466,29 +474,33 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -518,29 +530,33 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -797,29 +813,33 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -852,30 +872,34 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll index 0e996101cbb7ea..93ce8bdacf9e74 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -167,29 +167,33 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -215,29 +219,33 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -465,36 +473,40 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: %13:vgpr_32, dead %49:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; UNPACKED-NEXT: %13:vgpr_32, dead %54:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -522,30 +534,34 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: %13:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll index b496be3c576395..d27818afd62bdf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -123,29 +123,33 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -293,30 +297,34 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %15:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: %15:vgpr_32, dead %40:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll index 5dfc64fa1b2306..939b932f5b2776 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -59,29 +59,33 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -116,9 +120,10 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[COPY7]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -153,32 +158,36 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -507,29 +516,33 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -763,30 +776,34 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -818,29 +835,33 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll index 75bdd845394bf2..9fe1fbd91a2a2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll @@ -156,32 +156,36 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -206,32 +210,36 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll index 00e31dbe0622a7..1ffbf31ae52b94 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -107,32 +107,36 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll index 36fa2bf0255d76..ce4e7fd5951928 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll @@ -135,29 +135,33 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -181,29 +185,33 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -233,32 +241,36 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -282,32 +294,36 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -338,32 +354,36 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -388,32 +408,36 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll index 12a4396f62cfc9..9b4656b46dabd3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll @@ -50,29 +50,33 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -96,29 +100,33 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -148,32 +156,36 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -197,32 +209,36 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -253,32 +269,36 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -303,32 +323,36 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll index 026c600b2a8bb8..bab3c26716b21f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -127,29 +127,33 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -179,32 +183,36 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -235,32 +243,36 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -597,29 +609,33 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -653,29 +669,33 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index fb4d8a8faf3b92..d7e8d2a2730fe1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -2696,27 +2696,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2738,27 +2742,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2780,27 +2788,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2826,27 +2838,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2866,27 +2882,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2906,27 +2926,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2957,27 +2981,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3001,27 +3029,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3045,27 +3077,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3092,27 +3128,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3132,27 +3172,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3172,27 +3216,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3220,27 +3268,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3262,27 +3314,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3302,27 +3358,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3349,28 +3409,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3378,23 +3442,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX7: bb.1 (%ir-block.0): @@ -3406,28 +3470,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3435,23 +3503,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX8: bb.1 (%ir-block.0): @@ -3463,28 +3531,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3492,23 +3564,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3532,28 +3604,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3561,23 +3637,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX7: bb.1 (%ir-block.0): @@ -3593,28 +3669,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3622,23 +3702,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX8: bb.1 (%ir-block.0): @@ -3654,28 +3734,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3683,23 +3767,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3721,28 +3805,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3750,23 +3838,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX7: bb.1 (%ir-block.0): @@ -3782,28 +3870,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3811,23 +3903,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX8: bb.1 (%ir-block.0): @@ -3843,28 +3935,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3872,23 +3968,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3907,28 +4003,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3936,23 +4036,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): @@ -3965,28 +4065,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3994,23 +4098,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: bb.1 (%ir-block.0): @@ -4023,28 +4127,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4052,23 +4160,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4087,28 +4195,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4116,23 +4228,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): @@ -4145,28 +4257,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4174,23 +4290,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: bb.1 (%ir-block.0): @@ -4203,28 +4319,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4232,23 +4352,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4267,28 +4387,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4296,23 +4420,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): @@ -4325,28 +4449,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4354,23 +4482,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: bb.1 (%ir-block.0): @@ -4383,28 +4511,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4412,23 +4544,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4446,28 +4578,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4475,23 +4611,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY19]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX7: bb.1 (%ir-block.0): @@ -4503,28 +4639,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4532,23 +4672,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY19]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX8: bb.1 (%ir-block.0): @@ -4560,28 +4700,32 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4589,23 +4733,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY19]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll index c9884689e30c0d..6b6d95d1c6b015 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -118,33 +118,37 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -180,33 +184,37 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll index 65ca08b45d6784..0079a880ba6eb8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -75,35 +75,39 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -113,7 +117,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY15]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY21]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -141,35 +145,39 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll index 8ee34c3da49459..53a9c831e5e291 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -164,33 +164,37 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -218,33 +222,37 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -276,32 +284,36 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -327,32 +339,36 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll index 849523e11c50fc..19bff0f4d614dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll @@ -171,33 +171,37 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -207,25 +211,25 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec + ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY18]], implicit $exec - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY19]], implicit $exec - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY20]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec + ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec + ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] @@ -245,33 +249,37 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -281,10 +289,10 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY11]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY12]] + ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] + ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll index a7fab9821eebf0..54bd85b95f2a7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -118,33 +117,37 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -154,14 +157,14 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY11]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY12]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY13]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY14]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll index 93927318961f2b..9623fa14eccebe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; Natural mapping define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { @@ -189,33 +188,37 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll index 54fc5dcd476d89..5f0087912add31 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll @@ -150,33 +150,37 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -204,33 +208,37 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll index 72ebb811fc8501..31b8977e1513f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll @@ -112,33 +112,37 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll index bd6fad5e071de1..dd5265a7b68e4c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; Natural mapping @@ -118,33 +117,37 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY21]], [[COPY19]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll index a75d2928465f3b..bba5cebc0ca320 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=UNPACKED %s define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; PACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -211,33 +211,37 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -247,10 +251,10 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY11]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY12]] + ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] + ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): @@ -267,33 +271,37 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -303,25 +311,25 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec + ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY18]], implicit $exec - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY19]], implicit $exec - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY20]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec + ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec + ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll index e3cea6fa10169d..3f6a03426aeaf1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK %s define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -140,33 +140,37 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -176,14 +180,14 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY11]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY12]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY13]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY14]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir index 80383c7b2280b4..484c1beebea858 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir @@ -62,24 +62,25 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) @@ -117,24 +118,25 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %8, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll index 0a1a0438e6770a..3e4b0d4fa1e1d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -113,34 +113,33 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -175,34 +174,33 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -245,34 +243,33 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -308,34 +305,33 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<8 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll index a0ed8d7ddecb31..8f62f75b97c6fb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -134,34 +134,33 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -200,34 +199,33 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -273,24 +271,25 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; FAST-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -329,24 +328,25 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -392,46 +392,45 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; FAST-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; FAST-NEXT: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: successors: %bb.3(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc - ; FAST-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec - ; FAST-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec - ; FAST-NEXT: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; FAST-NEXT: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec - ; FAST-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc + ; FAST-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; FAST-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; FAST-NEXT: [[UV16:%[0-9]+]]:vgpr_32(s32), [[UV17:%[0-9]+]]:vgpr_32(s32), [[UV18:%[0-9]+]]:vgpr_32(s32), [[UV19:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; FAST-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV16]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV17]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV18]](s32), implicit $exec + ; FAST-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV19]](s32), implicit $exec ; FAST-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec + ; FAST-NEXT: [[UV20:%[0-9]+]]:vgpr(s64), [[UV21:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; FAST-NEXT: [[UV22:%[0-9]+]]:sgpr(s64), [[UV23:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x s32>) + ; FAST-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV22]](s64), [[UV20]] + ; FAST-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]] + ; FAST-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]] + ; FAST-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]] + ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1) + ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -470,46 +469,45 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: successors: %bb.3(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; GREEDY-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<8 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; GREEDY-NEXT: [[UV16:%[0-9]+]]:vgpr_32(s32), [[UV17:%[0-9]+]]:vgpr_32(s32), [[UV18:%[0-9]+]]:vgpr_32(s32), [[UV19:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV16]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV17]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV18]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV19]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV20:%[0-9]+]]:vgpr(s64), [[UV21:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[UV22:%[0-9]+]]:sgpr(s64), [[UV23:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV22]](s64), [[UV20]] + ; GREEDY-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]] + ; GREEDY-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]] + ; GREEDY-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir index 8ea58fac3aba06..1e05a4b8460999 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir @@ -12,18 +12,20 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) ; GREEDY-LABEL: name: mfma_f32_32x32x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 @@ -41,18 +43,20 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: mfma_f32_16x16x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -70,18 +74,20 @@ body: | ; FAST-LABEL: name: mfma_f32_4x4x4bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_f32_4x4x4bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -99,18 +105,20 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x8bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FAST: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) ; GREEDY-LABEL: name: mfma_f32_32x32x8bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 @@ -128,18 +136,20 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x16bf16_1k_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_f32_16x16x16bf16_1k_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -157,18 +167,20 @@ body: | ; FAST-LABEL: name: mfma_f64_16x16x4f64_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FAST: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) ; GREEDY-LABEL: name: mfma_f64_16x16x4f64_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 @@ -186,18 +198,20 @@ body: | ; FAST-LABEL: name: mfma_f64_4x4x4f64_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 - ; FAST: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) ; GREEDY-LABEL: name: mfma_f64_4x4x4f64_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $agpr0_agpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir index 5f9822cf58b92a..22d5fe9911e393 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir @@ -12,18 +12,20 @@ body: | ; FAST-LABEL: name: mfma_i32_16x16x32_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_i32_16x16x32_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -41,18 +43,20 @@ body: | ; FAST-LABEL: name: mfma_i32_32x32x16_i8_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: mfma_i32_32x32x16_i8_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -70,18 +74,20 @@ body: | ; FAST-LABEL: name: mfma_f32_16x16x8_xf32_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: mfma_f32_16x16x8_xf32_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -99,18 +105,20 @@ body: | ; FAST-LABEL: name: mfma_f32_32x32x4_xf32_vva ; FAST: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: mfma_f32_32x32x4_xf32_vva ; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -128,20 +136,22 @@ body: | ; FAST-LABEL: name: smfmac_f32_16x16x32_f16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: smfmac_f32_16x16x32_f16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -160,20 +170,22 @@ body: | ; FAST-LABEL: name: smfmac_f32_32x32x16_f16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: smfmac_f32_32x32x16_f16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -192,20 +204,22 @@ body: | ; FAST-LABEL: name: smfmac_f32_16x16x32_bf16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: smfmac_f32_16x16x32_bf16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -224,20 +238,22 @@ body: | ; FAST-LABEL: name: smfmac_f32_32x32x16_bf16_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: smfmac_f32_32x32x16_bf16_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -256,20 +272,22 @@ body: | ; FAST-LABEL: name: smfmac_i32_16x16x64_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) ; GREEDY-LABEL: name: smfmac_i32_16x16x64_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 @@ -288,20 +306,22 @@ body: | ; FAST-LABEL: name: smfmac_i32_32x32x32_i8_vva ; FAST: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; FAST: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; FAST: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; FAST: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; FAST: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) ; GREEDY-LABEL: name: smfmac_i32_32x32x32_i8_vva ; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GREEDY: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 - ; GREEDY: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 - ; GREEDY: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20 + ; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0 + ; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll index e14262c416406f..5fca16e473a4eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -64,24 +64,25 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -125,9 +126,10 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -165,27 +167,28 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index 57e4d40bb9c814..b941a250004ebb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -1563,22 +1563,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1607,22 +1608,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1658,22 +1660,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1703,22 +1706,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1756,22 +1760,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1802,22 +1807,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1853,22 +1859,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1897,22 +1904,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -1947,22 +1955,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -1991,22 +2000,23 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2043,22 +2053,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2073,15 +2084,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GREEDY: bb.1 (%ir-block.0): @@ -2098,22 +2109,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2128,15 +2140,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2162,22 +2174,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2192,15 +2205,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GREEDY: bb.1 (%ir-block.0): @@ -2218,22 +2231,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2248,15 +2262,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2280,22 +2294,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2310,15 +2325,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GREEDY: bb.1 (%ir-block.0): @@ -2336,22 +2351,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2366,15 +2382,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2397,22 +2413,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2427,15 +2444,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY: bb.1 (%ir-block.0): @@ -2452,22 +2469,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2482,15 +2500,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2513,22 +2531,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2543,15 +2562,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY: bb.1 (%ir-block.0): @@ -2568,22 +2587,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2598,15 +2618,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2629,22 +2649,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2659,15 +2680,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY: bb.1 (%ir-block.0): @@ -2684,22 +2705,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2714,15 +2736,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2744,22 +2766,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 @@ -2774,15 +2797,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GREEDY: bb.1 (%ir-block.0): @@ -2798,22 +2821,23 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.3 - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 @@ -2828,15 +2852,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir index bcd55e225f4728..45a2ab5b774c2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir @@ -17,27 +17,29 @@ body: | ; FAST-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 - ; FAST: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256 - ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] - ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) - ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] + ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $sgpr0 %2:vgpr(s32) = G_CONSTANT i32 256 @@ -57,26 +59,28 @@ body: | ; FAST-LABEL: name: s_buffer_load_negative_offset ; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 - ; FAST: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; FAST-NEXT: {{ $}} + ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 + ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_negative_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_CONSTANT i32 -60 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll index 56cff7cde87868..49e352858f2254 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -62,24 +62,25 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -122,9 +123,10 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -161,27 +163,28 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll index 42ee4da9fdaf31..94cad1624b5621 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -64,24 +64,25 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -124,9 +125,10 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) @@ -163,27 +165,28 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir index bee5f3ea1be245..46184e8b4192a5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir @@ -11,11 +11,12 @@ body: | ; CHECK-LABEL: name: update_dpp_ss ; CHECK: liveins: $sgpr0, $sgpr1 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -32,10 +33,11 @@ body: | ; CHECK-LABEL: name: update_dpp_sv ; CHECK: liveins: $sgpr0, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -52,10 +54,11 @@ body: | ; CHECK-LABEL: name: update_dpp_vs ; CHECK: liveins: $vgpr0, $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -72,9 +75,10 @@ body: | ; CHECK-LABEL: name: update_dpp_vv ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir index 07c2d682fcacd4..c0f72eccf52498 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir @@ -31,9 +31,10 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) @@ -75,34 +76,33 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<8 x s32>) = COPY [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %6, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec - ; CHECK-NEXT: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec - ; CHECK-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV14]](s64), [[UV10]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index 4f3029b8514647..f343fab1d63711 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -515,7 +515,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: s_mov_b32 s13, s43 ; GISEL-NEXT: s_mov_b32 s14, s42 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 @@ -679,7 +679,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GISEL-NEXT: s_mov_b32 s13, s43 ; GISEL-NEXT: s_mov_b32 s14, s42 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 @@ -841,14 +841,14 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GISEL-NEXT: s_mov_b32 s13, s43 ; GISEL-NEXT: s_mov_b32 s14, s42 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: v_mov_b32_e32 v2, v0 -; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GISEL-NEXT: v_mov_b32_e32 v1, v0 +; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 ; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] ; GISEL-NEXT: s_cbranch_execnz .LBB4_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 ; GISEL-NEXT: v_readlane_b32 s49, v40, 16 ; GISEL-NEXT: v_readlane_b32 s48, v40, 15 ; GISEL-NEXT: v_readlane_b32 s47, v40, 14 @@ -1022,7 +1022,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GISEL-NEXT: s_mov_b32 s13, s43 ; GISEL-NEXT: s_mov_b32 s14, s42 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 ; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51] ; GISEL-NEXT: s_cbranch_execnz .LBB5_2 @@ -1211,7 +1211,7 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] ; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz .LBB6_1 ; GISEL-NEXT: ; %bb.2: @@ -1409,7 +1409,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v41 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 +; GISEL-NEXT: ; implicit-def: $vgpr1 ; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB7_1 ; GISEL-NEXT: ; %bb.2: @@ -1603,19 +1603,19 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GISEL-NEXT: v_writelane_b32 v40, s63, 31 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s6, v1 -; GISEL-NEXT: v_readfirstlane_b32 s7, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GISEL-NEXT: v_mov_b32_e32 v3, v0 -; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 +; GISEL-NEXT: v_readfirstlane_b32 s8, v1 +; GISEL-NEXT: v_readfirstlane_b32 s9, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_mov_b32_e32 v2, v0 +; GISEL-NEXT: ; implicit-def: $vgpr1 ; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] +; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7] ; GISEL-NEXT: s_cbranch_execnz .LBB8_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mov_b32_e32 v0, v2 ; GISEL-NEXT: v_readlane_b32 s63, v40, 31 ; GISEL-NEXT: v_readlane_b32 s62, v40, 30 ; GISEL-NEXT: v_readlane_b32 s61, v40, 29 @@ -1802,7 +1802,7 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] ; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB9_1 ; GISEL-NEXT: ; %bb.2: From 8a1984c25e2c982a9388c14bdaf99bdcd3e26bd4 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 25 May 2022 13:32:25 -0400 Subject: [PATCH 492/908] [Clang][Docs] Document `-Xoffload-linker` flag Summary: I added the `-Xoffload-linker` flag and did not provide additional documentation. This patch adds it. --- clang/docs/ClangCommandLineReference.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 13e175a4e5203f..5926decc85fc12 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -4215,6 +4215,10 @@ Pass the comma separated arguments in to the linker Pass to the linker +.. option:: -Xoffload-linker , -Xoffload-linker- + +Pass to all the device linking jobs, or for only if specified. + .. program:: clang1 .. option:: -Z .. program:: clang From ba3f85390bde10eab1cbdb68f744b8f5ab31859b Mon Sep 17 00:00:00 2001 From: Mike Rice Date: Tue, 24 May 2022 13:57:41 -0700 Subject: [PATCH 493/908] [OpenMP] Add diagnostic for unterminated 'omp [begin] declare target' Warns when end-of-file is reached without seeing all matching 'omp end declare target' directives. The diagnostic shows the location of the related begin directive. Differential Revision: https://reviews.llvm.org/D126331 --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 +++ clang/include/clang/Sema/Sema.h | 4 ++++ clang/lib/Sema/Sema.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 8 ++++++++ .../Inputs/unterminated_declare_target_include.h | 3 +++ clang/test/OpenMP/declare_target_messages.cpp | 12 +++++++++++- 6 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 clang/test/OpenMP/Inputs/unterminated_declare_target_include.h diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 2969e24e3baf86..6dfe5af6ecfa3c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11001,6 +11001,9 @@ def err_omp_append_args_with_varargs : Error< "'append_args' is not allowed with varargs functions">; def err_openmp_vla_in_task_untied : Error< "variable length arrays are not supported in OpenMP tasking regions with 'untied' clause">; +def warn_omp_unterminated_declare_target : Warning< + "expected '#pragma omp end declare target' at end of file to match '#pragma omp %0'">, + InGroup; } // end of OpenMP category let CategoryName = "Related Result Type Issue" in { diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9cbbeadc3d74bf..bd7a809e404745 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10769,6 +10769,10 @@ class Sema final { /// encountered. void ActOnFinishedOpenMPDeclareTargetContext(DeclareTargetContextInfo &DTCI); + /// Report unterminated 'omp declare target' or 'omp begin declare target' at + /// the end of a compilation unit. + void DiagnoseUnterminatedOpenMPDeclareTarget(); + /// Searches for the provided declaration name for OpenMP declare target /// directive. NamedDecl *lookupOpenMPDeclareTargetName(Scope *CurScope, diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index ce104f377730c8..ad2cb62a18f0cf 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1157,6 +1157,7 @@ void Sema::ActOnEndOfTranslationUnit() { DiagnoseUnterminatedPragmaAlignPack(); DiagnoseUnterminatedPragmaAttribute(); + DiagnoseUnterminatedOpenMPDeclareTarget(); // All delayed member exception specs should be checked or we end up accepting // incompatible declarations. diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 1fdb7df1feea4c..fdb7658f77bda5 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -22101,6 +22101,14 @@ void Sema::ActOnFinishedOpenMPDeclareTargetContext( ActOnOpenMPDeclareTargetName(It.first, It.second.Loc, It.second.MT, DTCI); } +void Sema::DiagnoseUnterminatedOpenMPDeclareTarget() { + if (DeclareTargetNesting.empty()) + return; + DeclareTargetContextInfo &DTCI = DeclareTargetNesting.back(); + Diag(DTCI.Loc, diag::warn_omp_unterminated_declare_target) + << getOpenMPDirectiveName(DTCI.Kind); +} + NamedDecl *Sema::lookupOpenMPDeclareTargetName(Scope *CurScope, CXXScopeSpec &ScopeSpec, const DeclarationNameInfo &Id) { diff --git a/clang/test/OpenMP/Inputs/unterminated_declare_target_include.h b/clang/test/OpenMP/Inputs/unterminated_declare_target_include.h new file mode 100644 index 00000000000000..ddb60d562de15e --- /dev/null +++ b/clang/test/OpenMP/Inputs/unterminated_declare_target_include.h @@ -0,0 +1,3 @@ +// expected-warning@+1 {{expected '#pragma omp end declare target' at end of file to match '#pragma omp declare target'}} +#pragma omp declare target +void zxy(); diff --git a/clang/test/OpenMP/declare_target_messages.cpp b/clang/test/OpenMP/declare_target_messages.cpp index 31185881e192b9..7e7cc60e75e35d 100644 --- a/clang/test/OpenMP/declare_target_messages.cpp +++ b/clang/test/OpenMP/declare_target_messages.cpp @@ -6,6 +6,9 @@ // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify=expected,omp5,host5 -fopenmp-simd -fopenmp-is-device -fopenmp-targets=x86_64-apple-macos10.7.0 -fnoopenmp-use-tls -ferror-limit 100 -o - %s // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify=expected,omp45 -fopenmp-version=45 -fopenmp-simd -fnoopenmp-use-tls -ferror-limit 100 -o - %s // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify=expected,omp51 -fopenmp -fopenmp-version=51 -fnoopenmp-use-tls -ferror-limit 100 -o - %s +// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify=expected,omp51 -fopenmp -fopenmp-version=51 -fnoopenmp-use-tls -ferror-limit 100 -DTESTEND=1 -o - %s +// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify=expected,omp51 -fopenmp -fopenmp-version=51 -fnoopenmp-use-tls -ferror-limit 100 -I%S/Inputs -DTESTENDINC=1 -o - %s +// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify=expected,omp51 -fopenmp-version=51 -fopenmp-simd -fnoopenmp-use-tls -ferror-limit 100 -o - %s // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify=expected,omp51 -fopenmp-version=51 -fopenmp-simd -fnoopenmp-use-tls -ferror-limit 100 -o - %s // RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify=expected,omp5 -fopenmp -fnoopenmp-use-tls -ferror-limit 100 -o - %s @@ -228,5 +231,12 @@ int MultiDevTy; #pragma omp declare target to(MultiDevTy) device_type(host) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} omp5-error {{'device_type(host)' does not match previously specified 'device_type(any)' for the same declaration}} omp51-error {{'device_type(host)' does not match previously specified 'device_type(any)' for the same declaration}} #pragma omp declare target to(MultiDevTy) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} omp5-error {{'device_type(nohost)' does not match previously specified 'device_type(any)' for the same declaration}} // omp51-error {{'device_type(nohost)' does not match previously specified 'device_type(any)' for the same declaration}} -// TODO: Issue an error message error {{expected '#pragma omp end declare target'}} note {{to match this '#pragma omp declare target'}} +#if TESTENDINC +#include "unterminated_declare_target_include.h" +#elif TESTEND +// expected-warning@+1 {{expected '#pragma omp end declare target' at end of file to match '#pragma omp declare target'}} #pragma omp declare target +#else +// expected-warning@+1 {{expected '#pragma omp end declare target' at end of file to match '#pragma omp begin declare target'}} +#pragma omp begin declare target +#endif From 69da3b6aead2e7a18a2578aad661d6d36b8d30cf Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 25 May 2022 13:33:30 -0400 Subject: [PATCH 494/908] Revert "[OpenMP] atomic compare fail : Parser & AST support" This reverts commit 232bf8189ef7d574a468bd5bfd1e84e962f7f16e. It broke the sanitize buildbot: https://lab.llvm.org/buildbot/#/builders/5/builds/24074 It also reproduces on Windows debug builds as a crash. --- clang/include/clang/AST/ASTNodeTraverser.h | 11 -- clang/include/clang/AST/OpenMPClause.h | 127 ------------------ clang/include/clang/AST/RecursiveASTVisitor.h | 5 - .../clang/Basic/DiagnosticSemaKinds.td | 4 - clang/include/clang/Parse/Parser.h | 2 - clang/include/clang/Sema/Sema.h | 3 - clang/lib/AST/OpenMPClause.cpp | 31 ----- clang/lib/AST/StmtProfile.cpp | 2 - clang/lib/Basic/OpenMPKinds.cpp | 14 -- clang/lib/Parse/ParseOpenMP.cpp | 46 +------ clang/lib/Sema/SemaOpenMP.cpp | 93 ------------- clang/lib/Sema/TreeTransform.h | 7 - clang/lib/Serialization/ASTReader.cpp | 28 ---- clang/lib/Serialization/ASTWriter.cpp | 8 -- clang/test/OpenMP/atomic_ast_print.cpp | 40 ------ clang/test/OpenMP/atomic_messages.cpp | 18 --- clang/tools/libclang/CIndex.cpp | 2 - flang/lib/Semantics/check-omp-structure.cpp | 1 - llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 +- 19 files changed, 2 insertions(+), 444 deletions(-) diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index cb5bbeb6448128..f2c5c01ac88deb 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -214,10 +214,6 @@ class ASTNodeTraverser } void Visit(const OMPClause *C) { - if (OMPFailClause::classof(C)) { - Visit(static_cast(C)); - return; - } getNodeDelegate().AddChild([=] { getNodeDelegate().Visit(C); for (const auto *S : C->children()) @@ -225,13 +221,6 @@ class ASTNodeTraverser }); } - void Visit(const OMPFailClause *C) { - getNodeDelegate().AddChild([=] { - getNodeDelegate().Visit(C); - const OMPClause *MOC = C->const_getMemoryOrderClause(); - Visit(MOC); - }); - } void Visit(const GenericSelectionExpr::ConstAssociation &A) { getNodeDelegate().AddChild([=] { getNodeDelegate().Visit(A); diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index aa9503c565779e..a745df11434685 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -2266,133 +2266,6 @@ class OMPCompareClause final : public OMPClause { } }; -/// This represents 'fail' clause in the '#pragma omp atomic' -/// directive. -/// -/// \code -/// #pragma omp atomic compare fail -/// \endcode -/// In this example directive '#pragma omp atomic compare' has 'fail' clause. -class OMPFailClause final - : public OMPClause, - private llvm::TrailingObjects { - OMPClause *MemoryOrderClause; - - friend class OMPClauseReader; - friend TrailingObjects; - - /// Define the sizes of each trailing object array except the last one. This - /// is required for TrailingObjects to work properly. - size_t numTrailingObjects(OverloadToken) const { - // 2 locations: for '(' and argument location. - return 2; - } - - /// Sets the location of '(' in fail clause. - void setLParenLoc(SourceLocation Loc) { - *getTrailingObjects() = Loc; - } - - /// Sets the location of memoryOrder clause argument in fail clause. - void setArgumentLoc(SourceLocation Loc) { - *std::next(getTrailingObjects(), 1) = Loc; - } - - /// Sets the mem_order clause for 'atomic compare fail' directive. - void setMemOrderClauseKind(OpenMPClauseKind MemOrder) { - OpenMPClauseKind *MOCK = getTrailingObjects(); - *MOCK = MemOrder; - } - - /// Sets the mem_order clause for 'atomic compare fail' directive. - void setMemOrderClause(OMPClause *MemoryOrderClauseParam) { - MemoryOrderClause = MemoryOrderClauseParam; - } -public: - /// Build 'fail' clause. - /// - /// \param StartLoc Starting location of the clause. - /// \param EndLoc Ending location of the clause. - OMPFailClause(SourceLocation StartLoc, SourceLocation EndLoc) - : OMPClause(llvm::omp::OMPC_fail, StartLoc, EndLoc) {} - - /// Build an empty clause. - OMPFailClause() - : OMPClause(llvm::omp::OMPC_fail, SourceLocation(), SourceLocation()) {} - - static OMPFailClause *CreateEmpty(const ASTContext &C); - static OMPFailClause *Create(const ASTContext &C, SourceLocation StartLoc, - SourceLocation EndLoc); - - child_range children() { - return child_range(child_iterator(), child_iterator()); - } - - - const_child_range children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); - } - - child_range used_children() { - return child_range(child_iterator(), child_iterator()); - } - const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); - } - - static bool classof(const OMPClause *T) { - return T->getClauseKind() == llvm::omp::OMPC_fail; - } - - void initFailClause(SourceLocation LParenLoc, OMPClause *MemOClause, - SourceLocation MemOrderLoc) { - - setLParenLoc(LParenLoc); - MemoryOrderClause = MemOClause; - setArgumentLoc(MemOrderLoc); - - OpenMPClauseKind ClauseKind = MemoryOrderClause->getClauseKind(); - OpenMPClauseKind MemClauseKind = llvm::omp::OMPC_unknown; - switch(ClauseKind) { - case llvm::omp::OMPC_acq_rel: - case llvm::omp::OMPC_acquire: - MemClauseKind = llvm::omp::OMPC_acquire; - break; - case llvm::omp::OMPC_relaxed: - case llvm::omp::OMPC_release: - MemClauseKind = llvm::omp::OMPC_relaxed; - break; - case llvm::omp::OMPC_seq_cst: - MemClauseKind = llvm::omp::OMPC_seq_cst; - break; - default : break; - } - setMemOrderClauseKind(MemClauseKind); - } - - /// Gets the location of '(' in fail clause. - SourceLocation getLParenLoc() const { - return *getTrailingObjects(); - } - - OMPClause *getMemoryOrderClause() { return MemoryOrderClause; } - - const OMPClause *const_getMemoryOrderClause() const { - return static_cast(MemoryOrderClause); - } - - /// Gets the location of memoryOrder clause argument in fail clause. - SourceLocation getArgumentLoc() const { - return *std::next(getTrailingObjects(), 1); - } - - /// Gets the dependence kind in clause for 'depobj' directive. - OpenMPClauseKind getMemOrderClauseKind() const { - return *getTrailingObjects(); - } -}; - /// This represents 'seq_cst' clause in the '#pragma omp atomic' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index e29619a5cecc6f..ae6442d75dd4b4 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3346,11 +3346,6 @@ bool RecursiveASTVisitor::VisitOMPCompareClause(OMPCompareClause *) { return true; } -template -bool RecursiveASTVisitor::VisitOMPFailClause(OMPFailClause *) { - return true; -} - template bool RecursiveASTVisitor::VisitOMPSeqCstClause(OMPSeqCstClause *) { return true; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6dfe5af6ecfa3c..3168c20969e1b1 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10633,10 +10633,6 @@ def note_omp_atomic_compare: Note< "expect binary operator in conditional expression|expect '<', '>' or '==' as order operator|expect comparison in a form of 'x == e', 'e == x', 'x ordop expr', or 'expr ordop x'|" "expect lvalue for result value|expect scalar value|expect integer value|unexpected 'else' statement|expect '==' operator|expect an assignment statement 'v = x'|" "expect a 'if' statement|expect no more than two statements|expect a compound statement|expect 'else' statement|expect a form 'r = x == e; if (r) ...'}0">; -def err_omp_atomic_fail_wrong_or_no_clauses : Error<"expected a memory order clause">; -def err_omp_atomic_fail_extra_mem_order_clauses : Error<"directive '#pragma omp atomic compare fail' cannot contain more than one memory order clause">; -def err_omp_atomic_fail_extra_clauses : Error<"directive '#pragma omp atomic compare' cannot contain more than one fail clause">; -def err_omp_atomic_fail_no_compare : Error<"expected 'compare' clause with the 'fail' modifier">; def err_omp_atomic_several_clauses : Error< "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause">; def err_omp_several_mem_order_clauses : Error< diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index a4d90ce467e48e..a20866e410aa49 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -433,8 +433,6 @@ class Parser : public CodeCompletionHandler { /// a statement expression and builds a suitable expression statement. StmtResult handleExprStmt(ExprResult E, ParsedStmtContext StmtCtx); - OMPClause *ParseOpenMPFailClause(OMPClause *Clause); - public: Parser(Preprocessor &PP, Sema &Actions, bool SkipFunctionBodies); ~Parser() override; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index bd7a809e404745..43dbf50d2829d5 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11361,9 +11361,6 @@ class Sema final { /// Called on well-formed 'compare' clause. OMPClause *ActOnOpenMPCompareClause(SourceLocation StartLoc, SourceLocation EndLoc); - /// Called on well-formed 'fail' clause. - OMPClause *ActOnOpenMPFailClause(SourceLocation StartLoc, - SourceLocation EndLoc); /// Called on well-formed 'seq_cst' clause. OMPClause *ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc); diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index acf80ed52c3d5d..dc2d90e366bc76 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -127,7 +127,6 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_update: case OMPC_capture: case OMPC_compare: - case OMPC_fail: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -221,7 +220,6 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_update: case OMPC_capture: case OMPC_compare: - case OMPC_fail: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -414,25 +412,6 @@ OMPUpdateClause *OMPUpdateClause::CreateEmpty(const ASTContext &C, return Clause; } -OMPFailClause *OMPFailClause::Create(const ASTContext &C, - SourceLocation StartLoc, - SourceLocation EndLoc) { - void *Mem = - C.Allocate(totalSizeToAlloc(2, 1), - alignof(OMPFailClause)); - auto *Clause = - new (Mem) OMPFailClause(StartLoc, EndLoc); - return Clause; -} - -OMPFailClause *OMPFailClause::CreateEmpty(const ASTContext &C) { - void *Mem = - C.Allocate(totalSizeToAlloc(2, 1), - alignof(OMPFailClause)); - auto *Clause = new (Mem) OMPFailClause(); - return Clause; -} - void OMPPrivateClause::setPrivateCopies(ArrayRef VL) { assert(VL.size() == varlist_size() && "Number of private copies is not the same as the preallocated buffer"); @@ -1868,16 +1847,6 @@ void OMPClausePrinter::VisitOMPCompareClause(OMPCompareClause *) { OS << "compare"; } -void OMPClausePrinter::VisitOMPFailClause(OMPFailClause *Node) { - OS << "fail"; - if(Node) { - OS << "("; - OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(), - static_cast(Node->getMemOrderClauseKind())); - OS << ")"; - } -} - void OMPClausePrinter::VisitOMPSeqCstClause(OMPSeqCstClause *) { OS << "seq_cst"; } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 3779e1f349b0e3..77d5d95a1ad108 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -557,8 +557,6 @@ void OMPClauseProfiler::VisitOMPCaptureClause(const OMPCaptureClause *) {} void OMPClauseProfiler::VisitOMPCompareClause(const OMPCompareClause *) {} -void OMPClauseProfiler::VisitOMPFailClause(const OMPFailClause *) {} - void OMPClauseProfiler::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseProfiler::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index dbfe2117b00a61..7b1086330f1e37 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -366,20 +366,6 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, #include "clang/Basic/OpenMPKinds.def" } llvm_unreachable("Invalid OpenMP 'depend' clause type"); - case OMPC_fail: { - OpenMPClauseKind CK = static_cast(Type); - switch (CK) { - case OMPC_acquire: - return "acquire"; - case OMPC_relaxed: - return "relaxed"; - case OMPC_seq_cst: - return "seq_cst"; - default: - return "unknown"; - } - llvm_unreachable("Invalid OpenMP 'fail' clause modifier"); - } case OMPC_device: switch (Type) { case OMPC_DEVICE_unknown: diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 1e484f56441704..0385d5f1586e90 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3244,7 +3244,6 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_write: case OMPC_capture: case OMPC_compare: - case OMPC_fail: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -3631,45 +3630,6 @@ OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind, Val.getValue().Loc, Val.getValue().RLoc); } -OMPClause *Parser::ParseOpenMPFailClause(OMPClause *Clause) { - - OMPFailClause *FailClause = static_cast(Clause); - SourceLocation LParenLoc; - if (Tok.is(tok::l_paren)) { - LParenLoc = Tok.getLocation(); - ConsumeAnyToken(); - } else { - Diag(diag::err_expected_lparen_after) << getOpenMPClauseName(OMPC_fail); - return Clause; - } - - - OpenMPClauseKind CKind = Tok.isAnnotation() - ? OMPC_unknown - : getOpenMPClauseKind(PP.getSpelling(Tok)); - if (CKind == OMPC_unknown) { - Diag(diag::err_omp_expected_clause) << ("atomic compare fail"); - return Clause; - } - OMPClause *MemoryOrderClause = ParseOpenMPClause(CKind, false); - SourceLocation MemOrderLoc; - // Store Memory Order SubClause for Sema. - if (MemoryOrderClause) { - MemOrderLoc = Tok.getLocation(); - } - - if (Tok.is(tok::r_paren)) { - FailClause->initFailClause(LParenLoc,MemoryOrderClause,MemOrderLoc); - ConsumeAnyToken(); - } else { - const IdentifierInfo *Arg = Tok.getIdentifierInfo(); - Diag(Tok, diag::err_expected_rparen_after) - << (Arg ? Arg->getName() : "atomic compare fail"); - } - - return Clause; -} - /// Parsing of OpenMP clauses like 'ordered'. /// /// ordered-clause: @@ -3702,11 +3662,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPClauseKind Kind, bool ParseOnly) { if (ParseOnly) return nullptr; - OMPClause *Clause = Actions.ActOnOpenMPClause(Kind, Loc, Tok.getLocation()); - if (Kind == llvm::omp::Clause::OMPC_fail) { - Clause = ParseOpenMPFailClause(Clause); - } - return Clause; + return Actions.ActOnOpenMPClause(Kind, Loc, Tok.getLocation()); } /// Parsing of OpenMP clauses with single expressions and some additional diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index fdb7658f77bda5..09c187f24a802d 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -12026,68 +12026,6 @@ bool OpenMPAtomicCompareCaptureChecker::checkStmt(Stmt *S, return checkType(ErrorInfo); } - -class OpenMPAtomicFailChecker { - -protected: - Sema &SemaRef; - ASTContext &Context; - -public: - // Error descriptor type which will be returned to Sema - unsigned int ErrorNo; - - OpenMPAtomicFailChecker(Sema &S) : SemaRef(S), Context(S.getASTContext()) {} - - /// Check if all results conform with spec in terms of lvalue/rvalue - /// and scalar type. - bool checkSubClause(ArrayRef Clauses, SourceLocation *ErrorLoc); - /// Return the error descriptor that will guide the error message emission. - unsigned getErrorDesc() const { return ErrorNo; } -}; - -bool OpenMPAtomicFailChecker::checkSubClause(ArrayRef Clauses, - SourceLocation *ErrorLoc) { - int NoOfFails = 0; - ErrorNo = 0; - SourceLocation ClauseLoc; - for (const OMPClause *C : Clauses) { - if (C->getClauseKind() == OMPC_fail) { - NoOfFails++; - const OMPFailClause *FC = static_cast(C); - const OMPClause *MemOrderC = FC->const_getMemoryOrderClause(); - /* Clauses contains OMPC_fail and the subclause */ - if (MemOrderC) { - OpenMPClauseKind ClauseKind = MemOrderC->getClauseKind(); - if ((ClauseKind == OMPC_acq_rel) || (ClauseKind == OMPC_acquire) || - (ClauseKind == OMPC_relaxed) || (ClauseKind == OMPC_release) || - (ClauseKind == OMPC_seq_cst)) { - switch(ClauseKind) { - case OMPC_acq_rel : - ClauseKind = OMPC_acquire; - break; - case OMPC_release : - ClauseKind = OMPC_relaxed; - break; - default : break; - } - continue; - } else { - ErrorNo = diag::err_omp_atomic_fail_wrong_or_no_clauses; - *ErrorLoc = MemOrderC->getBeginLoc(); - continue; - } - } - } - } - if (NoOfFails > 1) { - ErrorNo = diag::err_omp_atomic_fail_extra_clauses; - *ErrorLoc = ClauseLoc; - } - - return !ErrorNo; -} - } // namespace StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, @@ -12108,8 +12046,6 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, SourceLocation AtomicKindLoc; OpenMPClauseKind MemOrderKind = OMPC_unknown; SourceLocation MemOrderLoc; - llvm::omp::Clause SubClause = OMPC_unknown; - SourceLocation SubClauseLoc; bool MutexClauseEncountered = false; llvm::SmallSet EncounteredAtomicKinds; for (const OMPClause *C : Clauses) { @@ -12138,16 +12074,6 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, } break; } - case OMPC_fail: { - if (AtomicKind != OMPC_compare) { - Diag(C->getBeginLoc(), diag::err_omp_atomic_fail_no_compare) - << SourceRange(C->getBeginLoc(), C->getEndLoc()); - return StmtError(); - } - SubClause = OMPC_fail; - SubClauseLoc = C->getBeginLoc(); - break; - } case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -12626,17 +12552,6 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, CE = Checker.getCond(); // We reuse IsXLHSInRHSPart to tell if it is in the form 'x ordop expr'. IsXLHSInRHSPart = Checker.isXBinopExpr(); - if (SubClause == OMPC_fail) { - OpenMPAtomicFailChecker Checker(*this); - SourceLocation ErrorLoc, NoteLoc; - NoteLoc = ErrorLoc = Body->getBeginLoc(); - ErrorLoc = SubClauseLoc; - if (!Checker.checkSubClause(Clauses,&ErrorLoc)) { - unsigned ErrorNo = Checker.getErrorDesc(); - Diag(ErrorLoc, ErrorNo); - return StmtError(); - } - } } } @@ -16576,9 +16491,6 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_compare: Res = ActOnOpenMPCompareClause(StartLoc, EndLoc); break; - case OMPC_fail: - Res = ActOnOpenMPFailClause(StartLoc, EndLoc); - break; case OMPC_seq_cst: Res = ActOnOpenMPSeqCstClause(StartLoc, EndLoc); break; @@ -16732,11 +16644,6 @@ OMPClause *Sema::ActOnOpenMPCompareClause(SourceLocation StartLoc, return new (Context) OMPCompareClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPFailClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return OMPFailClause::Create(Context, StartLoc, EndLoc); -} - OMPClause *Sema::ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc) { return new (Context) OMPSeqCstClause(StartLoc, EndLoc); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index c951b6ca5d6a03..fae712e04db865 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9559,13 +9559,6 @@ TreeTransform::TransformOMPCompareClause(OMPCompareClause *C) { return C; } -template -OMPClause * -TreeTransform::TransformOMPFailClause(OMPFailClause *C) { - // No need to rebuild this clause, no template-dependent parameters. - return C; -} - template OMPClause * TreeTransform::TransformOMPSeqCstClause(OMPSeqCstClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a669f30d95c701..76629a516cddd8 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11744,9 +11744,6 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_compare: C = new (Context) OMPCompareClause(); break; - case llvm::omp::OMPC_fail: - C = OMPFailClause::CreateEmpty(Context); - break; case llvm::omp::OMPC_seq_cst: C = new (Context) OMPSeqCstClause(); break; @@ -12116,31 +12113,6 @@ void OMPClauseReader::VisitOMPCaptureClause(OMPCaptureClause *) {} void OMPClauseReader::VisitOMPCompareClause(OMPCompareClause *) {} -void OMPClauseReader::VisitOMPFailClause(OMPFailClause *C) { - C->setLParenLoc(Record.readSourceLocation()); - SourceLocation SourceLoc = Record.readSourceLocation(); - C->setArgumentLoc(SourceLoc); - OpenMPClauseKind CKind = Record.readEnum(); - C->setMemOrderClauseKind(CKind); - - SourceLocation EndLoc; - OMPClause *MemoryOrderClause = NULL; - switch(CKind) { - case llvm::omp::OMPC_acquire: - MemoryOrderClause = new (Context) OMPAcquireClause(SourceLoc, EndLoc); - break; - case llvm::omp::OMPC_relaxed: - MemoryOrderClause = new (Context) OMPRelaxedClause(SourceLoc, EndLoc); - break; - case llvm::omp::OMPC_seq_cst: - MemoryOrderClause = new (Context) OMPSeqCstClause(SourceLoc, EndLoc); - break; - default: - break; - } - C->setMemOrderClause(MemoryOrderClause); -} - void OMPClauseReader::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseReader::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 8a14ce23866055..61be9265db0cce 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6327,14 +6327,6 @@ void OMPClauseWriter::VisitOMPCaptureClause(OMPCaptureClause *) {} void OMPClauseWriter::VisitOMPCompareClause(OMPCompareClause *) {} -void OMPClauseWriter::VisitOMPFailClause(OMPFailClause *C) { - // Record.AddSourceLocation(C->getLParenLoc()); - // Copied from VisitOMPUpdateClause - Record.AddSourceLocation(C->getLParenLoc()); - Record.AddSourceLocation(C->getArgumentLoc()); - Record.writeEnum(C->getMemOrderClauseKind()); -} - void OMPClauseWriter::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseWriter::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/test/OpenMP/atomic_ast_print.cpp b/clang/test/OpenMP/atomic_ast_print.cpp index c877cea915503b..201f62ab2117e3 100644 --- a/clang/test/OpenMP/atomic_ast_print.cpp +++ b/clang/test/OpenMP/atomic_ast_print.cpp @@ -226,16 +226,6 @@ T foo(T argc) { { v = a; if (a < b) { a = b; } } #pragma omp atomic compare capture hint(6) { v = a == b; if (v) a = c; } -#pragma omp atomic compare fail(acq_rel) - { if (a < c) { a = c; } } -#pragma omp atomic compare fail(acquire) - { if (a < c) { a = c; } } -#pragma omp atomic compare fail(release) - { if (a < c) { a = c; } } -#pragma omp atomic compare fail(relaxed) - { if (a < c) { a = c; } } -#pragma omp atomic compare fail(seq_cst) - { if (a < c) { a = c; } } #endif return T(); } @@ -1109,16 +1099,6 @@ int main(int argc, char **argv) { { v = a; if (a < b) { a = b; } } #pragma omp atomic compare capture hint(6) { v = a == b; if (v) a = c; } -#pragma omp atomic compare fail(acq_rel) - if(a < b) { a = b; } -#pragma omp atomic compare fail(acquire) - if(a < b) { a = b; } -#pragma omp atomic compare fail(release) - if(a < b) { a = b; } -#pragma omp atomic compare fail(relaxed) - if(a < b) { a = b; } -#pragma omp atomic compare fail(seq_cst) - if(a < b) { a = b; } #endif // CHECK-NEXT: #pragma omp atomic // CHECK-NEXT: a++; @@ -1449,26 +1429,6 @@ int main(int argc, char **argv) { // CHECK-51-NEXT: if (v) // CHECK-51-NEXT: a = c; // CHECK-51-NEXT: } - // CHECK-51-NEXT: #pragma omp atomic compare fail(acquire) - // CHECK-51-NEXT: if (a < b) { - // CHECK-51-NEXT: a = b; - // CHECK-51-NEXT: } - // CHECK-51-NEXT: #pragma omp atomic compare fail(acquire) - // CHECK-51-NEXT: if (a < b) { - // CHECK-51-NEXT: a = b; - // CHECK-51-NEXT: } - // CHECK-51-NEXT: #pragma omp atomic compare fail(relaxed) - // CHECK-51-NEXT: if (a < b) { - // CHECK-51-NEXT: a = b; - // CHECK-51-NEXT: } - // CHECK-51-NEXT: #pragma omp atomic compare fail(relaxed) - // CHECK-51-NEXT: if (a < b) { - // CHECK-51-NEXT: a = b; - // CHECK-51-NEXT: } - // CHECK-51-NEXT: #pragma omp atomic compare fail(seq_cst) - // CHECK-51-NEXT: if (a < b) { - // CHECK-51-NEXT: a = b; - // CHECK-51-NEXT: } // expect-note@+1 {{in instantiation of function template specialization 'foo' requested here}} return foo(a); } diff --git a/clang/test/OpenMP/atomic_messages.cpp b/clang/test/OpenMP/atomic_messages.cpp index 656fd498c74aae..23fd24bfcf1185 100644 --- a/clang/test/OpenMP/atomic_messages.cpp +++ b/clang/test/OpenMP/atomic_messages.cpp @@ -958,24 +958,6 @@ int mixed() { // expected-error@+1 {{directive '#pragma omp atomic' cannot contain more than one 'capture' clause}} #pragma omp atomic compare compare capture capture { v = a; if (a > b) a = b; } -// expected-error@+1 {{expected 'compare' clause with the 'fail' modifier}} -#pragma omp atomic fail(seq_cst) - if(v == a) { v = a; } -// expected-error@+1 {{expected '(' after 'fail'}} -#pragma omp atomic compare fail - if(v < a) { v = a; } -// expected-error@+1 {{expected a memory order clause}} -#pragma omp atomic compare fail(capture) - if(v < a) { v = a; } - // expected-error@+2 {{expected ')' after 'atomic compare fail'}} - // expected-warning@+1 {{extra tokens at the end of '#pragma omp atomic' are ignored}} -#pragma omp atomic compare fail(seq_cst | acquire) - if(v < a) { v = a; } -// expected-error@+1 {{directive '#pragma omp atomic' cannot contain more than one 'fail' clause}} -#pragma omp atomic compare fail(relaxed) fail(seq_cst) - if(v < a) { v = a; } - - #endif // expected-note@+1 {{in instantiation of function template specialization 'mixed' requested here}} return mixed(); diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 7bc9453793298e..85dabac7e778a1 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2367,8 +2367,6 @@ void OMPClauseEnqueue::VisitOMPCaptureClause(const OMPCaptureClause *) {} void OMPClauseEnqueue::VisitOMPCompareClause(const OMPCompareClause *) {} -void OMPClauseEnqueue::VisitOMPFailClause(const OMPFailClause *) {} - void OMPClauseEnqueue::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseEnqueue::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 73d660f33d995e..83acaba367ffd2 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1807,7 +1807,6 @@ CHECK_SIMPLE_CLAUSE(Bind, OMPC_bind) CHECK_SIMPLE_CLAUSE(Align, OMPC_align) CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare) CHECK_SIMPLE_CLAUSE(CancellationConstructType, OMPC_cancellation_construct_type) -CHECK_SIMPLE_CLAUSE(Fail, OMPC_fail) CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize) CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index b4abc64bc21185..4f0cc29a8b2091 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -199,7 +199,6 @@ def OMPC_Write : Clause<"write"> { let clangClass = "OMPWriteClause"; } def OMPC_Update : Clause<"update"> { let clangClass = "OMPUpdateClause"; } def OMPC_Capture : Clause<"capture"> { let clangClass = "OMPCaptureClause"; } def OMPC_Compare : Clause<"compare"> { let clangClass = "OMPCompareClause"; } -def OMPC_Fail : Clause<"fail"> { let clangClass = "OMPFailClause"; } def OMPC_SeqCst : Clause<"seq_cst"> { let clangClass = "OMPSeqCstClause"; } def OMPC_AcqRel : Clause<"acq_rel"> { let clangClass = "OMPAcqRelClause"; } def OMPC_Acquire : Clause<"acquire"> { let clangClass = "OMPAcquireClause"; } @@ -570,8 +569,7 @@ def OMP_Atomic : Directive<"atomic"> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause + VersionedClause ]; } def OMP_Target : Directive<"target"> { From 9368bf9023eee0dc6fcfa007e157fe30e1540fcc Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 25 May 2022 13:45:17 -0400 Subject: [PATCH 495/908] Removing this as part of the revert done in 69da3b6aead2e7a18a2578aad661d6d36b8d30cf This appears to have been added in a follow-up commit that I missed. --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ebe65e5f72b3e7..61e3661e59be0d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6317,7 +6317,6 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_bind: case OMPC_align: case OMPC_cancellation_construct_type: - case OMPC_fail: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } From be2cb824d0d49a6483f310552dac47a35b72face Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 25 May 2022 10:37:26 -0700 Subject: [PATCH 496/908] [riscv] Remove mutation of prior vsetvli from insertion dataflow This moves mutation entirely out of the main algorithm. The immediate trigger is that we hit another case of the same issue I thought we'd fixed in 72925d9. It turns out we hadn't considered the cross block case. As a brief summary, the issue being fixed is that if we mutate a previous vsetvli in phase 3, there's a possibility that some later use of that vsetvli changes "compatibility". In the cross_block_mutate test, this later vsetvli occurs in another block (and is thus visit order dependent too!). This causes us to fail strict asserts. (To be explicit, the current on by default workaround should compensate. It's only when we turn that off that we have problems.) Now, I want to explicitly call out an alternate workaround. We could leave the mutation in phase 3, and simplify restrict it to the case where the previous vsetvli's GPR result is unused. That covers the case we've actually seen. (I'll note that codegen regressions with a simple form of this were significant. We might have to check specifically for the use outside block case to keep them reasonable, which complicates the workaround slightly.) Personally, I'm at the point where I want the mutation pulled out just for robustness sake. I'm worried there's yet one more form of this bug we haven't thought about. The other motivation for this change is that it does give us a couple of minor codegen wins. None appear to be hugely significant, but improvements never hurt right? Differential Revision: https://reviews.llvm.org/D125270 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 91 ++++++++++++++----- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 6 +- 3 files changed, 68 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 1bfa8371a259f0..2ff2a60cc961e1 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -462,6 +462,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); void doLocalPrepass(MachineBasicBlock &MBB); + void doLocalPostpass(MachineBasicBlock &MBB); void doPRE(MachineBasicBlock &MBB); }; @@ -478,6 +479,15 @@ static bool isVectorConfigInstr(const MachineInstr &MI) { MI.getOpcode() == RISCV::PseudoVSETIVLI; } +/// Return true if this is 'vsetvli x0, x0, vtype' which preserves +/// VL and only sets VTYPE. +static bool isVLPreservingConfig(const MachineInstr &MI) { + if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) + return false; + assert(RISCV::X0 == MI.getOperand(1).getReg()); + return RISCV::X0 == MI.getOperand(0).getReg(); +} + static MachineInstr *elideCopies(MachineInstr *MI, const MachineRegisterInfo *MRI) { while (true) { @@ -1065,9 +1075,6 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; - // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. - MachineInstr *PrevVSETVLIMI = nullptr; - for (MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. if (isVectorConfigInstr(MI)) { @@ -1078,7 +1085,6 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); CurInfo = getInfoForVSETVLI(MI); - PrevVSETVLIMI = &MI; continue; } @@ -1125,29 +1131,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { // vl/vtype for succesor blocks. if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && needVSETVLI(NewInfo, CurInfo)) { - // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it - // with current VL/VTYPE. - bool NeedInsertVSETVLI = true; - if (PrevVSETVLIMI) { - // If these two VSETVLI have the same AVL and the same VLMAX, - // we could merge these two VSETVLI. - // TODO: If we remove this, we get a `vsetvli x0, x0, vtype' - // here. We could simply let this be emitted, then remove - // the unused vsetvlis in a post-pass. - if (CurInfo.hasSameAVL(NewInfo) && CurInfo.hasSameVLMAX(NewInfo)) { - // WARNING: For correctness, it is essential the contents of VL - // and VTYPE stay the same after MI. This greatly limits the - // mutation we can legally do here. - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } - } - if (NeedInsertVSETVLI) - insertVSETVLI(MBB, MI, NewInfo, CurInfo); + insertVSETVLI(MBB, MI, NewInfo, CurInfo); CurInfo = NewInfo; } } - PrevVSETVLIMI = nullptr; } // If this is something that updates VL/VTYPE that we don't know about, set @@ -1155,7 +1142,6 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { CurInfo = VSETVLIInfo::getUnknown(); - PrevVSETVLIMI = nullptr; } } @@ -1378,6 +1364,52 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { AvailableInfo, OldInfo); } +void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { + MachineInstr *PrevMI = nullptr; + bool UsedVL = false, UsedVTYPE = false; + SmallVector ToDelete; + for (MachineInstr &MI : MBB) { + // Note: Must be *before* vsetvli handling to account for config cases + // which only change some subfields. + if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) + UsedVL = true; + if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) + UsedVTYPE = true; + + if (!isVectorConfigInstr(MI)) + continue; + + if (PrevMI) { + if (!UsedVL && !UsedVTYPE) { + ToDelete.push_back(PrevMI); + // fallthrough + } else if (!UsedVTYPE && isVLPreservingConfig(MI)) { + // Note: `vsetvli x0, x0, vtype' is the canonical instruction + // for this case. If you find yourself wanting to add other forms + // to this "unused VTYPE" case, we're probably missing a + // canonicalization earlier. + // Note: We don't need to explicitly check vtype compatibility + // here because this form is only legal (per ISA) when not + // changing VL. + PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm()); + ToDelete.push_back(&MI); + // Leave PrevMI unchanged + continue; + } + } + PrevMI = &MI; + UsedVL = false; + UsedVTYPE = false; + Register VRegDef = MI.getOperand(0).getReg(); + if (VRegDef != RISCV::X0 && + !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) + UsedVL = true; + } + + for (auto *MI : ToDelete) + MI->eraseFromParent(); +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. const RISCVSubtarget &ST = MF.getSubtarget(); @@ -1443,6 +1475,15 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) emitVSETVLIs(MBB); + // Now that all vsetvlis are explicit, go through and do block local + // DSE and peephole based demanded fields based transforms. Note that + // this *must* be done outside the main dataflow so long as we allow + // any cross block analysis within the dataflow. We can't have both + // demanded fields based mutation and non-local analysis in the + // dataflow at the same time without introducing inconsistencies. + for (MachineBasicBlock &MBB : MF) + doLocalPostpass(MBB); + // Once we're fully done rewriting all the instructions, do a final pass // through to check for VSETVLIs which write to an unused destination. // For the non X0, X0 variant, we can replace the destination register diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index b9886aafe83cfd..9b92926b25f3c3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -747,7 +747,6 @@ define void @vector_init_vsetvli_fv2(i64 %N, double* %c) { ; CHECK-LABEL: vector_init_vsetvli_fv2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB15_1: # %for.body @@ -812,14 +811,13 @@ for.end: ; preds = %for.body ; Demonstrates a case where mutation in phase3 is problematic. We mutate the ; vsetvli without considering that it changes the compatibility result of the ; vadd in the second block. -; FIXME: This currently crashes with strict asserts enabled. define @cross_block_mutate( %a, %b, ; CHECK-LABEL: cross_block_mutate: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli a0, 6, e32, m2, tu, mu ; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %mask) { entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index e625e446eea040..3fff538b571717 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -328,8 +328,7 @@ entry: define double @test17(i64 %avl, %a, %b) nounwind { ; CHECK-LABEL: test17: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, mu -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfadd.vv v8, v8, v9 @@ -468,7 +467,6 @@ entry: define void @avl_forward4( %v, * %p, i64 %reg) nounwind { ; CHECK-LABEL: avl_forward4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret @@ -498,7 +496,6 @@ entry: define @vleNff(i64* %str, i64 %n, i64 %x) { ; CHECK-LABEL: vleNff: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl @@ -526,7 +523,6 @@ define @avl_forward5(* %addr) { ; CHECK-LABEL: avl_forward5: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret From d3187dd5f0f07c9c7536792af25ba723506f6339 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 25 May 2022 13:43:13 -0400 Subject: [PATCH 497/908] [SLP] add minimum test for miscompile (PR55688); NFC --- .../SLPVectorizer/X86/horizontal.ll | 45 ++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index f3fa5e0f57dd8b..8ff25ec5515855 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -220,8 +220,8 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 ; ALL-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] ; ALL-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) -; ALL-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] -; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] +; ALL-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[MUL49]] +; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[OP_RDX]] ; ALL-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 ; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] ; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] @@ -341,19 +341,19 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { ; ALL-NEXT: br label [[FOR_BODY:%.*]] ; ALL: for.body: ; ALL-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] +; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] ; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 ; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] ; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; ALL-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] ; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) -; ALL-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] +; ALL-NEXT: [[OP_RDX]] = fadd fast float [[TMP6]], [[SUM_042]] ; ALL-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 ; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] ; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] ; ALL: for.cond.for.end_crit_edge: -; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32 +; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_RDX]] to i32 ; ALL-NEXT: br label [[FOR_END]] ; ALL: for.end: ; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] @@ -1463,4 +1463,39 @@ define float @fadd_v4f32_fmf_intersect(float* %p) { ret float %add3 } +; FIXME: Can't preserve no-wrap guarantees with reassociated math. +; This must not propagate 'nsw' to a new add instruction. + +define void @nsw_propagation_v4i32(i32* %res, i32 %start) { +; CHECK-LABEL: @nsw_propagation_v4i32( +; CHECK-NEXT: [[T0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 +; CHECK-NEXT: [[T1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 +; CHECK-NEXT: [[T2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 +; CHECK-NEXT: [[T3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 +; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[START:%.*]], [[T0]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T1]], [[S]] +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[T2]], [[ADD]] +; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[T3]], [[ADD_1]] +; CHECK-NEXT: store i32 [[ADD_2]], i32* [[RES:%.*]], align 16 +; CHECK-NEXT: ret void +; +; STORE-LABEL: @nsw_propagation_v4i32( +; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 +; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) +; STORE-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[TMP2]], [[START:%.*]] +; STORE-NEXT: store i32 [[OP_RDX]], i32* [[RES:%.*]], align 16 +; STORE-NEXT: ret void +; + %t0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 + %t1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 + %t2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 + %t3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 + %s = add nsw i32 %start, %t0 + %add = add nsw i32 %t1, %s + %add.1 = add nsw i32 %t2, %add + %add.2 = add nsw i32 %t3, %add.1 + store i32 %add.2, i32* %res, align 16 + ret void +} + declare i32 @__gxx_personality_v0(...) From 0ee1c0388ca354a06914fb7aa5454ceacfa806d0 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 25 May 2022 19:39:57 +0200 Subject: [PATCH 498/908] [mlir][bufferize] Remove hoisting functionality from One-Shot Bufferize The same functionality is already provided by `-buffer-hoisting` and `-buffer-loop-hoisting`. Differential Revision: https://reviews.llvm.org/D126251 --- .../IR/BufferizableOpInterface.h | 12 ---- .../IR/BufferizableOpInterface.td | 16 ----- .../Bufferization/Transforms/Bufferize.h | 5 +- .../IR/BufferizableOpInterface.cpp | 57 ------------------ .../Bufferization/Transforms/Bufferize.cpp | 4 -- .../FuncBufferizableOpInterfaceImpl.cpp | 2 - .../BufferizableOpInterfaceImpl.cpp | 5 -- .../Transforms/buffer-hoisting.mlir | 0 .../Transforms/buffer-loop-hoisting.mlir | 0 .../Dialect/Linalg/one-shot-bufferize.mlir | 2 +- mlir/test/Dialect/SCF/one-shot-bufferize.mlir | 60 ++++++++++--------- 11 files changed, 35 insertions(+), 128 deletions(-) rename mlir/test/{ => Dialect/Bufferization}/Transforms/buffer-hoisting.mlir (100%) rename mlir/test/{ => Dialect/Bufferization}/Transforms/buffer-loop-hoisting.mlir (100%) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index 4ca73feb85da2e..8b492cfda643e6 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -280,14 +280,6 @@ struct BufferizationOptions { /// computation. Whether this pays off or not can be very input IR-specific. bool alwaysAliasingWithDest = true; - /// If set to `true`, try to hoist allocations out of blocks as much as - /// possible. An allocation is not hoisted across allocation hoisting barriers - /// as indicated by `BufferizableOpInterface::isAllocationHoistingBarrier`. - /// - /// Examples of allocation hoisting barriers are parallel loops or ops where - /// SSA values cannot be captured from the outside. - bool hoistAllocations = true; - /// Buffer alignment for new memory allocations. unsigned int bufferAlignment = 128; @@ -618,10 +610,6 @@ BaseMemRefType getMemRefTypeWithStaticIdentityLayout(TensorType tensorType, Attribute memorySpace = {}); -/// Try to hoist all new buffer allocations until the next hoisting barrier. -LogicalResult hoistBufferAllocations(Operation *op, - const BufferizationOptions &options); - /// Create alloc/dealloc ops as specified in the bufferization options. If /// `onlyLeakingAlloc`, only those buffer allocations are processed for which no /// buffer deallocation can be created. `changed` is set to `true` if the IR was diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td index 76b0659ae96ebc..6e8cec021845ab 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td @@ -259,22 +259,6 @@ def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> { return value.isa(); }] >, - InterfaceMethod< - /*desc=*/[{ - Return `true` if the op is an allocation hoisting barrier. Buffer - allocations will never be beyond such ops. E.g., ops with certain - parallel semantics may be allocation hoisting barriers. The majority - of ops, however, is not a barrier. Therefore, this method returns - `false` by default. - }], - /*retType=*/"bool", - /*methodName=*/"isAllocationHoistingBarrier", - /*args=*/(ins), - /*methodBody=*/"", - /*defaultImplementation=*/[{ - return false; - }] - >, InterfaceMethod< /*desc=*/[{ Return `true` if the `uRead` and `uWrite` do not constitute a RaW diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h index be87bab0013e2c..9dc2e96d2f22e5 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h @@ -91,9 +91,8 @@ BufferizationOptions getPartialBufferizationOptions(); LogicalResult bufferizeOp(Operation *op, BufferizationState &bufferizationState); -/// Finalize all buffer allocations. -/// * Hoist buffer allocations as much as possible. -/// * Create alloc/dealloc ops as specified by the bufferization options. +/// Finalize all buffer allocations: Create alloc/dealloc ops as specified by +/// the bufferization options. LogicalResult finalizeBuffers(Operation *op, const BufferizationOptions &options); } // namespace bufferization diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index a1c9fd66ff4a45..90bfb67a0b35bd 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -582,63 +582,6 @@ bufferization::createAllocDeallocOps(Operation *op, return success(!status.wasInterrupted()); } -/// Try to hoist all new buffer allocations until the next hoisting barrier. -// TODO: Consolidate this function with the existing buffer hoisting pass. -LogicalResult -bufferization::hoistBufferAllocations(Operation *op, - const BufferizationOptions &options) { - // Nothing to do if allocation hoisting is deactivated. - if (!options.hoistAllocations) - return success(); - - // Gather all buffer allocations that were created by the bufferization. - SmallVector allocaOps; - op->walk([&](memref::AllocaOp allocaOp) { - if (allocaOp->hasAttr(kBufferAllocationAttr)) - allocaOps.push_back(allocaOp); - }); - - for (Operation *allocaOp : allocaOps) { - // TODO: Hoisting of allocs with dynamic shape not implemented. - if (!allocaOp->getOpOperands().empty()) - continue; - - Operation *op = allocaOp->getParentOp(); - while (op) { - if (auto bufferizableOp = dyn_cast(op)) { - if (bufferizableOp.isAllocationHoistingBarrier()) { - break; - } - } else { - // Op is not bufferizable: It may not be safe to hoist across this op. - break; - } - op = op->getParentOp(); - } - - // FuncOp is an allocation hoisting barrier, so this should never happen. - assert(op && "allocation hoisting barrier not found"); - - // Nothing to do if the insertion point is in the same block. - if (op == allocaOp->getParentOp()) - continue; - - // `op` may have multiple blocks. Make sure that we insert in the right one. - SmallVector blocks; - for (Region &r : op->getRegions()) - for (Block &b : r.getBlocks()) - blocks.push_back(&b); - auto *insertionBlock = llvm::find_if( - blocks, [&](Block *b) { return b->findAncestorOpInBlock(*allocaOp); }); - assert(insertionBlock != blocks.end() && "owning block not found"); - - // Move to the beginning of the block. - allocaOp->moveBefore(&(*insertionBlock)->front()); - } - - return success(); -} - //===----------------------------------------------------------------------===// // Bufferization-specific BlockAndValueMapping support with debugging. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 9fee93edee1a01..4aad432a6c528c 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -295,10 +295,6 @@ static bool hasTensorSemantics(Operation *op) { LogicalResult bufferization::finalizeBuffers(Operation *op, const BufferizationOptions &options) { - // Hoist buffers. - if (failed(hoistBufferAllocations(op, options))) - return failure(); - // Create allocation ops for "leaking buffers", i.e., buffer allocations that // escape block boundaries. If there are no leaking allocs, `hasLeakingAllocs` // is set to `false`. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index 16989dff36722a..be91f1fd86e99b 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -549,8 +549,6 @@ struct FuncOpInterface // All function arguments are writable by default. return true; } - - bool isAllocationHoistingBarrier(Operation *op) const { return true; } }; } // namespace func_ext diff --git a/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp index ac868a3723a785..4b7e0a33b5b5cf 100644 --- a/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp @@ -119,11 +119,6 @@ struct AssumingOpInterface const AnalysisState &state) const { return BufferRelation::Equivalent; } - - bool isAllocationHoistingBarrier(Operation *op) const { - // Allocations should not be hoisted out of AssumingOps. - return true; - } }; /// Bufferization of shape.assuming_yield. Bufferized as part of their enclosing diff --git a/mlir/test/Transforms/buffer-hoisting.mlir b/mlir/test/Dialect/Bufferization/Transforms/buffer-hoisting.mlir similarity index 100% rename from mlir/test/Transforms/buffer-hoisting.mlir rename to mlir/test/Dialect/Bufferization/Transforms/buffer-hoisting.mlir diff --git a/mlir/test/Transforms/buffer-loop-hoisting.mlir b/mlir/test/Dialect/Bufferization/Transforms/buffer-loop-hoisting.mlir similarity index 100% rename from mlir/test/Transforms/buffer-loop-hoisting.mlir rename to mlir/test/Dialect/Bufferization/Transforms/buffer-loop-hoisting.mlir diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir index 7fad7d62dc9074..15a85f30fc0b17 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -buffer-loop-hoisting -split-input-file | FileCheck %s // Run fuzzer with different seeds. // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir index a3b57228beaf67..888eea82bbf794 100644 --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -367,18 +367,20 @@ func.func @scf_while_non_equiv_condition(%arg0: tensor<5xi1>, %idx: index) -> (tensor<5xi1>, tensor<5xi1>) { - // These allocation used to be inside the scf.while loop, but they were - // hoisted. - // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[arg0]], %[[w1:.*]] = %[[arg1]]) {{.*}} { + // CHECK: %[[clone1:.*]] = bufferization.clone %[[arg1]] + // CHECK: %[[clone0:.*]] = bufferization.clone %[[arg0]] + // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[clone0]], %[[w1:.*]] = %[[clone1]]) {{.*}} { %r0, %r1 = scf.while (%w0 = %arg0, %w1 = %arg1) : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) { // CHECK: %[[condition:.*]] = memref.load %[[w0]] + // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1> // CHECK: memref.copy %[[w1]], %[[a1]] - // CHECK: %[[casted1:.*]] = memref.cast %[[a1]] + // CHECK: memref.dealloc %[[w1]] + // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1> // CHECK: memref.copy %[[w0]], %[[a0]] + // CHECK: memref.dealloc %[[w0]] // CHECK: %[[casted0:.*]] = memref.cast %[[a0]] + // CHECK: %[[casted1:.*]] = memref.cast %[[a1]] // CHECK: scf.condition(%[[condition]]) %[[casted1]], %[[casted0]] %condition = tensor.extract %w0[%idx] : tensor<5xi1> scf.condition(%condition) %w1, %w0 : tensor<5xi1>, tensor<5xi1> @@ -410,21 +412,21 @@ func.func @scf_while_non_equiv_condition_and_body(%arg0: tensor<5xi1>, %idx: index) -> (tensor<5xi1>, tensor<5xi1>) { - // These allocation used to be inside the scf.while loop, but they were - // hoisted. - // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[a2:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[a3:.*]] = memref.alloc() {{.*}} : memref<5xi1> - // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[arg0]], %[[w1:.*]] = %[[arg1]]) {{.*}} { + // CHECK: %[[clone1:.*]] = bufferization.clone %[[arg1]] + // CHECK: %[[clone0:.*]] = bufferization.clone %[[arg0]] + // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[clone0]], %[[w1:.*]] = %[[clone1]]) {{.*}} { %r0, %r1 = scf.while (%w0 = %arg0, %w1 = %arg1) : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) { // CHECK: %[[condition:.*]] = memref.load %[[w0]] - // CHECK: memref.copy %[[w1]], %[[a3]] - // CHECK: %[[casted3:.*]] = memref.cast %[[a3]] - // CHECK: memref.copy %[[w0]], %[[a2]] - // CHECK: %[[casted2:.*]] = memref.cast %[[a2]] - // CHECK: scf.condition(%[[condition]]) %[[casted3]], %[[casted2]] + // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1> + // CHECK: memref.copy %[[w1]], %[[a1]] + // CHECK: memref.dealloc %[[w1]] + // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1> + // CHECK: memref.copy %[[w0]], %[[a0]] + // CHECK: memref.dealloc %[[w0]] + // CHECK: %[[casted0:.*]] = memref.cast %[[a0]] + // CHECK: %[[casted1:.*]] = memref.cast %[[a1]] + // CHECK: scf.condition(%[[condition]]) %[[casted1]], %[[casted0]] %condition = tensor.extract %w0[%idx] : tensor<5xi1> scf.condition(%condition) %w1, %w0 : tensor<5xi1>, tensor<5xi1> } do { @@ -432,11 +434,14 @@ func.func @scf_while_non_equiv_condition_and_body(%arg0: tensor<5xi1>, // CHECK: } do { // CHECK: ^bb0(%[[b0:.*]]: memref<5xi1, #{{.*}}>, %[[b1:.*]]: memref<5xi1, #{{.*}}): // CHECK: memref.store %{{.*}}, %[[b0]] - // CHECK: memref.copy %[[b1]], %[[a1]] - // CHECK: %[[casted1:.*]] = memref.cast %[[a1]] - // CHECK: memref.copy %[[b0]], %[[a0]] - // CHECK: %[[casted0:.*]] = memref.cast %[[a0]] - // CHECK: scf.yield %[[casted1]], %[[casted0]] + // CHECK: %[[a3:.*]] = memref.alloc() {{.*}} : memref<5xi1> + // CHECK: memref.copy %[[b1]], %[[a3]] + // CHECK: memref.dealloc %[[b1]] + // CHECK: %[[a2:.*]] = memref.alloc() {{.*}} : memref<5xi1> + // CHECK: memref.copy %[[b0]], %[[a2]] + // CHECK: %[[casted2:.*]] = memref.cast %[[a2]] + // CHECK: %[[casted3:.*]] = memref.cast %[[a3]] + // CHECK: scf.yield %[[casted3]], %[[casted2]] // CHECK: } %pos = "dummy.some_op"() : () -> (index) %val = "dummy.another_op"() : () -> (i1) @@ -444,8 +449,6 @@ func.func @scf_while_non_equiv_condition_and_body(%arg0: tensor<5xi1>, scf.yield %b1, %1 : tensor<5xi1>, tensor<5xi1> } - // CHECK-DAG: memref.dealloc %[[a0]] - // CHECK-DAG: memref.dealloc %[[a1]] // CHECK: return %[[loop]]#0, %[[loop]]#1 return %r0, %r1 : tensor<5xi1>, tensor<5xi1> } @@ -454,19 +457,20 @@ func.func @scf_while_non_equiv_condition_and_body(%arg0: tensor<5xi1>, // CHECK-LABEL: func @scf_while_iter_arg_result_mismatch( // CHECK-SAME: %[[arg0:.*]]: memref<5xi1, #{{.*}}>, %[[arg1:.*]]: memref<5xi1, #{{.*}}> -// CHECK: %[[alloc1:.*]] = memref.alloc() {{.*}} : memref<5xi1> // CHECK: %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<5xi1> -// CHECK: scf.while (%[[arg3:.*]] = %[[arg1]]) : (memref<5xi1, #{{.*}}) -> () { +// CHECK: %[[clone:.*]] = bufferization.clone %[[arg1]] +// CHECK: scf.while (%[[arg3:.*]] = %[[clone]]) : (memref<5xi1, #{{.*}}) -> () { +// CHECK: memref.dealloc %[[arg3]] // CHECK: %[[load:.*]] = memref.load %[[arg0]] // CHECK: scf.condition(%[[load]]) // CHECK: } do { // CHECK: memref.copy %[[arg0]], %[[alloc2]] // CHECK: memref.store %{{.*}}, %[[alloc2]] +// CHECK: %[[alloc1:.*]] = memref.alloc() {{.*}} : memref<5xi1> // CHECK: memref.copy %[[alloc2]], %[[alloc1]] // CHECK: %[[casted:.*]] = memref.cast %[[alloc1]] : memref<5xi1> to memref<5xi1, #{{.*}}> // CHECK: scf.yield %[[casted]] // CHECK: } -// CHECK-DAG: memref.dealloc %[[alloc1]] // CHECK-DAG: memref.dealloc %[[alloc2]] func.func @scf_while_iter_arg_result_mismatch(%arg0: tensor<5xi1>, %arg1: tensor<5xi1>, From 23bb550eeba48651964ffeb0c8a061ac99b91034 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 25 May 2022 06:46:25 -0700 Subject: [PATCH 499/908] DWARFVerifier: Change vector of IntervalMap to vector of unique_ptrs This is a workaround for compilation issue on FreeBSD. See comments in https://reviews.llvm.org/rG0d8cb8b399ad for more information. This fixes https://github.com/llvm/llvm-project/issues/55414. Differential Revision: https://reviews.llvm.org/D125611 --- llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 1544714053728f..c704f8f583af83 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -406,9 +406,9 @@ unsigned DWARFVerifier::verifyIndex(StringRef Name, DataExtractor D(IndexStr, DCtx.isLittleEndian(), 0); if (!Index.parse(D)) return 1; - IntervalMap::Allocator Alloc; - std::vector> Sections( - Index.getColumnKinds().size(), IntervalMap(Alloc)); + using MapType = IntervalMap; + MapType::Allocator Alloc; + std::vector> Sections(Index.getColumnKinds().size()); for (const DWARFUnitIndex::Entry &E : Index.getRows()) { uint64_t Sig = E.getSignature(); if (!E.getContributions()) @@ -421,7 +421,9 @@ unsigned DWARFVerifier::verifyIndex(StringRef Name, int Col = E.index(); if (SC.Length == 0) continue; - auto &M = Sections[Col]; + if (!Sections[Col]) + Sections[Col] = std::make_unique(Alloc); + auto &M = *Sections[Col]; auto I = M.find(SC.Offset); if (I != M.end() && I.start() < (SC.Offset + SC.Length)) { error() << llvm::formatv( From 504736cedff39d46fffc1293a35602ba140b19a8 Mon Sep 17 00:00:00 2001 From: Daniel Grumberg Date: Mon, 16 May 2022 11:46:36 +0100 Subject: [PATCH 500/908] [clang][extract-api] Don't emit symbols prefixed with an underscore These symbols are understood to not be used for client API consumption by convention so they should not appear in the generated symbol graph. Differential Revision: https://reviews.llvm.org/D125678 --- .../Serialization/SymbolGraphSerializer.cpp | 5 + clang/test/ExtractAPI/underscored.c | 396 ++++++++++++++++++ 2 files changed, 401 insertions(+) create mode 100644 clang/test/ExtractAPI/underscored.c diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 045d5d68e733bb..39d885c7c90aa3 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -467,6 +467,11 @@ bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const { if (Record.Availability.isUnconditionallyUnavailable()) return true; + // Filter out symbols prefixed with an underscored as they are understood to + // be symbols clients should not use. + if (Record.Name.startswith("_")) + return true; + return false; } diff --git a/clang/test/ExtractAPI/underscored.c b/clang/test/ExtractAPI/underscored.c new file mode 100644 index 00000000000000..47f1893cdb029b --- /dev/null +++ b/clang/test/ExtractAPI/underscored.c @@ -0,0 +1,396 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ +// RUN: %t/reference.output.json.in >> %t/reference.output.json +// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: -x c-header %t/input.h -o %t/output.json -verify + +// Generator version is not consistent across test runs, normalize it. +// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ +// RUN: %t/output.json >> %t/output-normalized.json +// RUN: diff %t/reference.output.json %t/output-normalized.json + +//--- input.h +// expected-no-diagnostics + +// Global record +int _HiddenGlobal; +int exposed_global; + +// Record type +struct _HiddenRecord { + int a; +}; + +struct ExposedRecord { + int a; +}; + +// Typedef +typedef struct {} _HiddenTypedef; +typedef int ExposedTypedef; +typedef _HiddenTypedef ExposedTypedefToHidden; + +// Macros +#define _HIDDEN_MACRO 5 +#define EXPOSED_MACRO 5 + +// Symbols that start with '_' should not appear in the reference output +//--- reference.output.json.in +{ + "metadata": { + "formatVersion": { + "major": 0, + "minor": 5, + "patch": 3 + }, + "generator": "?" + }, + "module": { + "name": "", + "platform": { + "architecture": "arm64", + "operatingSystem": { + "minimumVersion": { + "major": 11, + "minor": 0, + "patch": 0 + }, + "name": "macosx" + }, + "vendor": "apple" + } + }, + "relationships": [ + { + "kind": "memberOf", + "source": "c:@S@ExposedRecord@FI@a", + "target": "c:@S@ExposedRecord" + } + ], + "symbols": [ + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "exposed_global" + } + ], + "identifier": { + "interfaceLanguage": "c", + "precise": "c:@exposed_global" + }, + "kind": { + "displayName": "Global Variable", + "identifier": "c.var" + }, + "location": { + "position": { + "character": 5, + "line": 5 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "exposed_global" + } + ], + "subHeading": [ + { + "kind": "identifier", + "spelling": "exposed_global" + } + ], + "title": "exposed_global" + }, + "pathComponents": [ + "exposed_global" + ] + }, + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "keyword", + "spelling": "struct" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "ExposedRecord" + } + ], + "identifier": { + "interfaceLanguage": "c", + "precise": "c:@S@ExposedRecord" + }, + "kind": { + "displayName": "Structure", + "identifier": "c.struct" + }, + "location": { + "position": { + "character": 8, + "line": 12 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "ExposedRecord" + } + ], + "subHeading": [ + { + "kind": "identifier", + "spelling": "ExposedRecord" + } + ], + "title": "ExposedRecord" + }, + "pathComponents": [ + "ExposedRecord" + ] + }, + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "a" + } + ], + "identifier": { + "interfaceLanguage": "c", + "precise": "c:@S@ExposedRecord@FI@a" + }, + "kind": { + "displayName": "Instance Property", + "identifier": "c.property" + }, + "location": { + "position": { + "character": 7, + "line": 13 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "a" + } + ], + "subHeading": [ + { + "kind": "identifier", + "spelling": "a" + } + ], + "title": "a" + }, + "pathComponents": [ + "ExposedRecord", + "a" + ] + }, + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "keyword", + "spelling": "#define" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "EXPOSED_MACRO" + } + ], + "identifier": { + "interfaceLanguage": "c", + "precise": "c:input.h@335@macro@EXPOSED_MACRO" + }, + "kind": { + "displayName": "Macro", + "identifier": "c.macro" + }, + "location": { + "position": { + "character": 9, + "line": 23 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "EXPOSED_MACRO" + } + ], + "subHeading": [ + { + "kind": "identifier", + "spelling": "EXPOSED_MACRO" + } + ], + "title": "EXPOSED_MACRO" + }, + "pathComponents": [ + "EXPOSED_MACRO" + ] + }, + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "keyword", + "spelling": "typedef" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "ExposedTypedef" + } + ], + "identifier": { + "interfaceLanguage": "c", + "precise": "c:input.h@T@ExposedTypedef" + }, + "kind": { + "displayName": "Type Alias", + "identifier": "c.typealias" + }, + "location": { + "position": { + "character": 13, + "line": 18 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "ExposedTypedef" + } + ], + "subHeading": [ + { + "kind": "identifier", + "spelling": "ExposedTypedef" + } + ], + "title": "ExposedTypedef" + }, + "pathComponents": [ + "ExposedTypedef" + ], + "type": "c:I" + }, + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "keyword", + "spelling": "typedef" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:@SA@_HiddenTypedef", + "spelling": "_HiddenTypedef" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "ExposedTypedefToHidden" + } + ], + "identifier": { + "interfaceLanguage": "c", + "precise": "c:input.h@T@ExposedTypedefToHidden" + }, + "kind": { + "displayName": "Type Alias", + "identifier": "c.typealias" + }, + "location": { + "position": { + "character": 24, + "line": 19 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "ExposedTypedefToHidden" + } + ], + "subHeading": [ + { + "kind": "identifier", + "spelling": "ExposedTypedefToHidden" + } + ], + "title": "ExposedTypedefToHidden" + }, + "pathComponents": [ + "ExposedTypedefToHidden" + ], + "type": "c:@SA@_HiddenTypedef" + } + ] +} From 4baae166ce33ac763bfc1813cf56b8d26e07fb1f Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Wed, 25 May 2022 10:22:10 -0700 Subject: [PATCH 501/908] [pseudo] Fix pseudo-gen usage when cross-compiling Use the LLVM build system's cross-compilation support for the tool, so that the build works for both host and cross-compilation scenarios. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D126397 --- .../pseudo/include/CMakeLists.txt | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/pseudo/include/CMakeLists.txt b/clang-tools-extra/pseudo/include/CMakeLists.txt index a79dd0dab81840..e4265b18c2b2ef 100644 --- a/clang-tools-extra/pseudo/include/CMakeLists.txt +++ b/clang-tools-extra/pseudo/include/CMakeLists.txt @@ -1,25 +1,35 @@ # The cxx.bnf grammar file set(cxx_bnf ${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxx.bnf) +# Using CMAKE_CROSSCOMPILING and not LLVM_USE_HOST_TOOLS because the latter is +# also set for LLVM_OPTIMIZED_TABLEGEN, which we don't care about here. +if(CMAKE_CROSSCOMPILING) + build_native_tool(pseudo-gen pseudo_gen) + set(pseudo_gen_target "${pseudo_gen}") +else() + set(pseudo_gen $) + set(pseudo_gen_target pseudo-gen) +endif() + # Generate inc files. set(cxx_symbols_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXSymbols.inc) add_custom_command(OUTPUT ${cxx_symbols_inc} - COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + COMMAND "${pseudo_gen}" --grammar ${cxx_bnf} --emit-symbol-list -o ${cxx_symbols_inc} COMMENT "Generating nonterminal symbol file for cxx grammar..." - DEPENDS pseudo-gen + DEPENDS ${pseudo_gen_target} VERBATIM) set(cxx_bnf_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXBNF.inc) add_custom_command(OUTPUT ${cxx_bnf_inc} - COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + COMMAND "${pseudo_gen}" --grammar ${cxx_bnf} --emit-grammar-content -o ${cxx_bnf_inc} COMMENT "Generating bnf string file for cxx grammar..." - DEPENDS pseudo-gen + DEPENDS ${pseudo_gen_target} VERBATIM) # add_custom_command does not create a new target, we need to deine a target From 1f06398e96d4508d22f42b760f70eb5d4e7b1dc9 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 25 May 2022 11:07:31 -0700 Subject: [PATCH 502/908] Reapply "[RISCV] Enable strict assertions in InsertVSETVLI data flow" be2cb8 fixes the case which triggered the revert. Reapply, and let's see if anything else falls out. Original commit message: These asserts are believed to hold after several recent miscompiles have been fixed. If you see an assertion failure on this change, please toggle the default back and make sure you file a bug with a reproducer. We may have as yet uncaught miscompiles lurking in this code. Differential Revision: https://reviews.llvm.org/D125271 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 2ff2a60cc961e1..2fc29d0d067b6a 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -38,7 +38,7 @@ static cl::opt DisableInsertVSETVLPHIOpt( cl::desc("Disable looking through phis when inserting vsetvlis.")); static cl::opt UseStrictAsserts( - "riscv-insert-vsetvl-strict-asserts", cl::init(false), cl::Hidden, + "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, cl::desc("Enable strict assertion checking for the dataflow algorithm")); namespace { From 997b072e10d2be09c0e1a5bf4d6b92e2da3b8cc6 Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Wed, 25 May 2022 14:19:59 -0400 Subject: [PATCH 503/908] C++ DR2394: Const-default-constructible for members. Const class members may be initialized with a defaulted default constructor under the same conditions it would be allowed for a const object elsewhere. Differential Revision: https://reviews.llvm.org/D126170 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/lib/Sema/SemaDeclCXX.cpp | 9 ++++----- clang/test/CXX/drs/dr23xx.cpp | 11 +++++++++++ clang/test/CXX/special/class.ctor/p5-0x.cpp | 17 +++++++++++------ .../test/SemaCXX/cxx0x-deleted-default-ctor.cpp | 6 +++++- clang/www/cxx_dr_status.html | 2 +- 6 files changed, 35 insertions(+), 13 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a457a8fb2efe44..c34d29c8762d48 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -155,6 +155,9 @@ Bug Fixes a coroutine will no longer generate a call to a global allocation function with the signature (std::size_t, p0, ..., pn). This fixes Issue `Issue 54881 `_. +- Implement `CWG 2394 `_: Const class members + may be initialized with a defaulted default constructor under the same + conditions it would be allowed for a const object elsewhere. Improvements to Clang's diagnostics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index a16fe0b1b72a47..c470a46f584773 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -9213,13 +9213,12 @@ bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) { << !!ICI << MD->getParent() << FD << FieldType << /*Reference*/0; return true; } - // C++11 [class.ctor]p5: any non-variant non-static data member of - // const-qualified type (or array thereof) with no - // brace-or-equal-initializer does not have a user-provided default - // constructor. + // C++11 [class.ctor]p5 (modified by DR2394): any non-variant non-static + // data member of const-qualified type (or array thereof) with no + // brace-or-equal-initializer is not const-default-constructible. if (!inUnion() && FieldType.isConstQualified() && !FD->hasInClassInitializer() && - (!FieldRecord || !FieldRecord->hasUserProvidedDefaultConstructor())) { + (!FieldRecord || !FieldRecord->allowConstDefaultInit())) { if (Diagnose) S.Diag(FD->getLocation(), diag::note_deleted_default_ctor_uninit_field) << !!ICI << MD->getParent() << FD << FD->getType() << /*Const*/1; diff --git a/clang/test/CXX/drs/dr23xx.cpp b/clang/test/CXX/drs/dr23xx.cpp index a7ff2a5813ed03..8d6b4a5dc16eaf 100644 --- a/clang/test/CXX/drs/dr23xx.cpp +++ b/clang/test/CXX/drs/dr23xx.cpp @@ -158,3 +158,14 @@ void g() { } } //namespace dr2303 #endif + +namespace dr2394 { // dr2394: 15 + +struct A {}; +const A a; + +// Now allowed to default-init B. +struct B { const A a; }; +B b; + +} diff --git a/clang/test/CXX/special/class.ctor/p5-0x.cpp b/clang/test/CXX/special/class.ctor/p5-0x.cpp index 67f0d2a9357441..c433df64a7cf4e 100644 --- a/clang/test/CXX/special/class.ctor/p5-0x.cpp +++ b/clang/test/CXX/special/class.ctor/p5-0x.cpp @@ -2,6 +2,8 @@ struct DefaultedDefCtor1 {}; struct DefaultedDefCtor2 { DefaultedDefCtor2() = default; }; +struct DefaultedDefCtorUninitialized1 { int x; }; +struct DefaultedDefCtorUninitialized2 { int x; DefaultedDefCtorUninitialized2() = default; }; struct DeletedDefCtor { DeletedDefCtor() = delete; DeletedDefCtor(int); }; // expected-note {{explicitly marked deleted here}} class PrivateDefCtor { PrivateDefCtor() = default; public: PrivateDefCtor(int); }; struct DeletedDtor { ~DeletedDtor() = delete; }; // expected-note 4{{explicitly marked deleted here}} @@ -51,21 +53,20 @@ class NotDeleted2d { int &&a = 0; }; // expected-error {{reference member 'a' bi NotDeleted2d nd2d; // expected-note {{first required here}} // - any non-variant non-static data member of const qualified type (or array -// thereof) with no brace-or-equal-initializer does not have a user-provided -// default constructor, +// thereof) with no brace-or-equal-initializer is not const-default-constructible class Deleted3a { const int a; }; // expected-note {{because field 'a' of const-qualified type 'const int' would not be initialized}} \ expected-warning {{does not declare any constructor}} \ expected-note {{will never be initialized}} Deleted3a d3a; // expected-error {{implicitly-deleted default constructor}} -class Deleted3b { const DefaultedDefCtor1 a[42]; }; // expected-note {{because field 'a' of const-qualified type 'const DefaultedDefCtor1[42]' would not be initialized}} +class Deleted3b { const DefaultedDefCtorUninitialized1 a[42]; }; // expected-note {{because field 'a' of const-qualified type 'const DefaultedDefCtorUninitialized1[42]' would not be initialized}} Deleted3b d3b; // expected-error {{implicitly-deleted default constructor}} -class Deleted3c { const DefaultedDefCtor2 a; }; // expected-note {{because field 'a' of const-qualified type 'const DefaultedDefCtor2' would not be initialized}} +class Deleted3c { const DefaultedDefCtorUninitialized2 a; }; // expected-note {{because field 'a' of const-qualified type 'const DefaultedDefCtorUninitialized2' would not be initialized}} Deleted3c d3c; // expected-error {{implicitly-deleted default constructor}} class NotDeleted3a { const int a = 0; }; NotDeleted3a nd3a; -class NotDeleted3b { const DefaultedDefCtor1 a[42] = {}; }; +class NotDeleted3b { const DefaultedDefCtorUninitialized1 a[42] = {}; }; NotDeleted3b nd3b; -class NotDeleted3c { const DefaultedDefCtor2 a = DefaultedDefCtor2(); }; +class NotDeleted3c { const DefaultedDefCtorUninitialized2 a = DefaultedDefCtorUninitialized2(); }; NotDeleted3c nd3c; union NotDeleted3d { const int a; int b; }; NotDeleted3d nd3d; @@ -75,6 +76,10 @@ union NotDeleted3f { const DefaultedDefCtor2 a; int b; }; NotDeleted3f nd3f; struct NotDeleted3g { union { const int a; int b; }; }; NotDeleted3g nd3g; +struct NotDeleted3h { const DefaultedDefCtor1 a[42]; }; +NotDeleted3h nd3h; +struct NotDeleted3i { const DefaultedDefCtor2 a; }; +NotDeleted3i nd3i; // - X is a union and all of its variant members are of const-qualified type (or // array thereof), diff --git a/clang/test/SemaCXX/cxx0x-deleted-default-ctor.cpp b/clang/test/SemaCXX/cxx0x-deleted-default-ctor.cpp index feb483546c3eb6..f8a40f97c9791c 100644 --- a/clang/test/SemaCXX/cxx0x-deleted-default-ctor.cpp +++ b/clang/test/SemaCXX/cxx0x-deleted-default-ctor.cpp @@ -48,8 +48,12 @@ struct good : non_trivial { }; good g; +struct bad_const_inner { + int x; +}; + struct bad_const { - const good g; // expected-note {{field 'g' of const-qualified type 'const good' would not be initialized}} + const bad_const_inner g; // expected-note {{field 'g' of const-qualified type 'const bad_const_inner' would not be initialized}} }; bad_const bc; // expected-error {{call to implicitly-deleted default constructor}} diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index e193d37c095981..b9a38150063bc9 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -14178,7 +14178,7 @@

C++ defect report implementation status

2394 CD5 Const-default-constructible for members - Unknown + Clang 15 2395 From bbad981dab49851ab0b94fbb27925fe1c80f888b Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 25 May 2022 09:19:19 -0700 Subject: [PATCH 504/908] [flang] Address regression (calls to assumed-length character function dummy procedures) A recent fix beefed up semantics checking to catch the case of a call to an external assumed-length character function; this check has false positives in the case of an assumed-length character function that is a dummy procedure. These do have a length that is passed in extra compiler-created arguments. This patch refines the check and undoes some changes to tests. Differential Revision: https://reviews.llvm.org/D126390 --- flang/lib/Semantics/expression.cpp | 5 +- flang/test/Evaluate/rewrite01.f90 | 14 +-- .../test/Lower/dummy-procedure-character.f90 | 43 ++++++++ flang/test/Lower/host-associated.f90 | 99 ++++++++++--------- flang/test/Semantics/call01.f90 | 4 - 5 files changed, 107 insertions(+), 58 deletions(-) diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 98816017dbfe33..4a100e420e3450 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -2528,16 +2528,17 @@ std::optional ExpressionAnalyzer::CheckCall( bool procIsAssociated{ specificIntrinsic && specificIntrinsic->name == "associated"}; if (!procIsAssociated) { + const Symbol *procSymbol{proc.GetSymbol()}; + bool procIsDummy{procSymbol && IsDummy(*procSymbol)}; if (chars->functionResult && chars->functionResult->IsAssumedLengthCharacter() && - !specificIntrinsic) { + !specificIntrinsic && !procIsDummy) { Say(callSite, "Assumed-length character function must be defined with a length to be called"_err_en_US); } semantics::CheckArguments(*chars, arguments, GetFoldingContext(), context_.FindScope(callSite), treatExternalAsImplicit, specificIntrinsic); - const Symbol *procSymbol{proc.GetSymbol()}; if (procSymbol && !IsPureProcedure(*procSymbol)) { if (const semantics::Scope * pure{semantics::FindPureProcedureContaining( diff --git a/flang/test/Evaluate/rewrite01.f90 b/flang/test/Evaluate/rewrite01.f90 index b95c1b2de85489..94c43d95ab27a6 100644 --- a/flang/test/Evaluate/rewrite01.f90 +++ b/flang/test/Evaluate/rewrite01.f90 @@ -105,9 +105,11 @@ subroutine lbound_test(x, n, m) !CHECK: len_test subroutine len_test(a,b, c, d, e, n, m) character(*), intent(in) :: a - character(10) :: b + character(*) :: b external b character(10), intent(in) :: c + character(10) :: d + external d integer, intent(in) :: n, m character(n), intent(in) :: e @@ -115,9 +117,9 @@ subroutine len_test(a,b, c, d, e, n, m) print *, len(a, kind=8) !CHECK: PRINT *, 5_4 print *, len(a(1:5)) - !CHECK: PRINT *, 10_4 + !CHECK: PRINT *, len(b(a)) print *, len(b(a)) - !CHECK: PRINT *, int(10_8+int(a%len,kind=8),kind=4) + !CHECK: PRINT *, len(b(a)//a) print *, len(b(a) // a) !CHECK: PRINT *, 10_4 print *, len(c) @@ -126,14 +128,14 @@ subroutine len_test(a,b, c, d, e, n, m) !CHECK: PRINT *, 5_4 print *, len(c(1:5)) !CHECK: PRINT *, 10_4 - print *, len(b(c)) + print *, len(d(c)) !CHECK: PRINT *, 20_4 - print *, len(b(c) // c) + print *, len(d(c) // c) !CHECK: PRINT *, 0_4 print *, len(a(10:4)) !CHECK: PRINT *, int(max(0_8,int(m,kind=8)-int(n,kind=8)+1_8),kind=4) print *, len(a(n:m)) - !CHECK: PRINT *, 10_4 + !CHECK: PRINT *, len(b(a(int(n,kind=8):int(m,kind=8)))) print *, len(b(a(n:m))) !CHECK: PRINT *, int(max(0_8,max(0_8,int(n,kind=8))-4_8+1_8),kind=4) print *, len(e(4:)) diff --git a/flang/test/Lower/dummy-procedure-character.f90 b/flang/test/Lower/dummy-procedure-character.f90 index e2874cb018b81c..8eabf6df418bca 100644 --- a/flang/test/Lower/dummy-procedure-character.f90 +++ b/flang/test/Lower/dummy-procedure-character.f90 @@ -143,6 +143,21 @@ subroutine override_incoming_length(bar7) ! Test calling character dummy function ! ----------------------------------------------------------------------------- +! CHECK-LABEL: func @_QPcall_assumed_length +! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> {fir.char_proc}) { +subroutine call_assumed_length(bar8) + character(*) :: bar8 + external :: bar8 +! CHECK: %[[VAL_3:.*]] = fir.extract_value %[[VAL_0]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> +! CHECK: %[[WAL_2:.*]] = fir.box_addr %[[VAL_3]] : (!fir.boxproc<() -> ()>) -> (() -> ()) +! CHECK: %[[VAL_4:.*]] = fir.extract_value %[[VAL_0]], [1 : index] : (tuple ()>, i64>) -> i64 +! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_4]] : i64) {bindc_name = ".result"} +! CHECK: %[[VAL_7:.*]] = fir.convert %[[WAL_2]] : (() -> ()) -> ((!fir.ref>, index, !fir.ref) -> !fir.boxchar<1>) +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_4]] : (i64) -> index +! CHECK: fir.call %[[VAL_7]](%[[VAL_6]], %[[VAL_8]], %{{.*}}) : (!fir.ref>, index, !fir.ref) -> !fir.boxchar<1> + call test(bar8(42)) +end subroutine + ! CHECK-LABEL: func @_QPcall_explicit_length ! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> {fir.char_proc}) { subroutine call_explicit_length(bar9) @@ -181,6 +196,34 @@ function bar10(n) call test(bar10(42_8)) end subroutine + +! CHECK-LABEL: func @_QPhost( +! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> +subroutine host(f) + character*(*) :: f + external :: f + ! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1:.*]], %{{.*}} : (!fir.ref ()>, i64>>>, i32) -> !fir.ref ()>, i64>> + ! CHECK: fir.store %[[VAL_0]] to %[[VAL_3]] : !fir.ref ()>, i64>> + ! CHECK: fir.call @_QFhostPintern(%[[VAL_1]]) + call intern() +contains +! CHECK-LABEL: func @_QFhostPintern( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ()>, i64>>> {fir.host_assoc}) + subroutine intern() +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_2:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_1]] : (!fir.ref ()>, i64>>>, i32) -> !fir.ref ()>, i64>> +! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]] : !fir.ref ()>, i64>> +! CHECK: %[[VAL_4:.*]] = fir.extract_value %[[VAL_3]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> +! CHECK: %[[WAL_1:.*]] = fir.box_addr %[[VAL_4]] : (!fir.boxproc<() -> ()>) -> (() -> ()) +! CHECK: %[[VAL_5:.*]] = fir.extract_value %[[VAL_3]], [1 : index] : (tuple ()>, i64>) -> i64 +! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_5]] : i64) {bindc_name = ".result"} +! CHECK: %[[VAL_8:.*]] = fir.convert %[[WAL_1]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_5]] : (i64) -> index +! CHECK: fir.call %[[VAL_8]](%[[VAL_7]], %[[VAL_9]]) : (!fir.ref>, index) -> !fir.boxchar<1> + call test(f()) + end subroutine +end subroutine + ! CHECK-LABEL: func @_QPhost2( ! CHECK-SAME: %[[VAL_0:.*]]: tuple ()>, i64> {fir.char_proc}) subroutine host2(f) diff --git a/flang/test/Lower/host-associated.f90 b/flang/test/Lower/host-associated.f90 index 9654ddbb49a8a9..a2c7ef10ed58a7 100644 --- a/flang/test/Lower/host-associated.f90 +++ b/flang/test/Lower/host-associated.f90 @@ -579,50 +579,57 @@ end subroutine test_proc_dummy_other ! CHECK: %[[VAL_10:.*]] = fir.address_of(@_QQcl.{{.*}}) : !fir.ref> ! CHECK: %[[VAL_11:.*]] = fir.extract_value %[[VAL_2]], [0 : index] : (tuple ()>, i64>) -> !fir.boxproc<() -> ()> ! CHECK: %[[VAL_12:.*]] = fir.box_addr %[[VAL_11]] : (!fir.boxproc<() -> ()>) -> (() -> ()) -! CHECK: %[[VAL_13:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref -! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_12]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) -! CHECK: %[[VAL_15:.*]] = fir.call %[[VAL_14]](%0, %c10) : (!fir.ref>, index) -> !fir.boxchar<1> -! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.char<1,?>(%c22 : index) {bindc_name = ".chrtmp"} -! CHECK: %[[VAL_17:.*]] = fir.convert %c12 : (index) -> i64 -! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.ref>) -> !fir.ref -! CHECK: %[[VAL_19:.*]] = fir.convert %2 : (!fir.ref>) -> !fir.ref -! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[VAL_18]], %[[VAL_19]], %[[VAL_17]], %false) : (!fir.ref, !fir.ref, i64, i1) -> () -! CHECK: cf.br ^bb1(%c12, %c10 : index, index) -! CHECK: ^bb1(%[[VAL_20:.*]]: index, %[[VAL_21:.*]]: index): // 2 preds: ^bb0, ^bb2 -! CHECK: %[[VAL_22:.*]] = arith.cmpi sgt, %[[VAL_21]], %c0 : index -! CHECK: cf.cond_br %[[VAL_22]], ^bb2, ^bb3 -! CHECK: ^bb2: // pred: ^bb1 -! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_20]], %c12 : index -! CHECK: %[[VAL_24:.*]] = fir.convert %0 : (!fir.ref>) -> !fir.ref>> -! CHECK: %[[VAL_25:.*]] = fir.coordinate_of %[[VAL_24]], %[[VAL_23]] : (!fir.ref>>, index) -> !fir.ref> -! CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_25]] : !fir.ref> -! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_16]] : (!fir.ref>) -> !fir.ref>> -! CHECK: %[[VAL_28:.*]] = fir.coordinate_of %[[VAL_27]], %[[VAL_20]] : (!fir.ref>>, index) -> !fir.ref> -! CHECK: fir.store %[[VAL_26]] to %[[VAL_28]] : !fir.ref> -! CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_20]], %c1 : index -! CHECK: %[[VAL_30:.*]] = arith.subi %[[VAL_21]], %c1 : index -! CHECK: cf.br ^bb1(%[[VAL_29]], %[[VAL_30]] : index, index) -! CHECK: ^bb3: // pred: ^bb1 -! CHECK: %[[VAL_31:.*]] = fir.convert %c22 : (index) -> i64 -! CHECK: %[[VAL_32:.*]] = fir.convert %1 : (!fir.ref>) -> !fir.ref -! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[VAL_32]], %[[VAL_18]], %[[VAL_31]], %false) : (!fir.ref, !fir.ref, i64, i1) -> () -! CHECK: %[[VAL_33:.*]] = fir.undefined !fir.char<1> -! CHECK: %[[VAL_34:.*]] = fir.insert_value %[[VAL_33]], %c32_i8, [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> -! CHECK: cf.br ^bb4(%c22, %c18 : index, index) -! CHECK: ^bb4(%[[VAL_35:.*]]: index, %[[VAL_36:.*]]: index): // 2 preds: ^bb3, ^bb5 -! CHECK: %[[VAL_37:.*]] = arith.cmpi sgt, %[[VAL_36]], %c0 : index -! CHECK: cf.cond_br %[[VAL_37]], ^bb5, ^bb6 -! CHECK: ^bb5: // pred: ^bb4 -! CHECK: %[[VAL_38:.*]] = fir.convert %1 : (!fir.ref>) -> !fir.ref>> -! CHECK: %[[VAL_39:.*]] = fir.coordinate_of %[[VAL_38]], %[[VAL_35]] : (!fir.ref>>, index) -> !fir.ref> -! CHECK: fir.store %[[VAL_34]] to %[[VAL_39]] : !fir.ref> -! CHECK: %[[VAL_40:.*]] = arith.addi %[[VAL_35]], %c1 : index -! CHECK: %[[VAL_41:.*]] = arith.subi %[[VAL_36]], %c1 : index -! CHECK: cf.br ^bb4(%[[VAL_40]], %[[VAL_41]] : index, index) -! CHECK: ^bb6: // pred: ^bb4 -! CHECK: fir.call @llvm.stackrestore(%[[VAL_13]]) : (!fir.ref) -> () -! CHECK: %[[VAL_42:.*]] = fir.emboxchar %1, %c40 : (!fir.ref>, index) -> !fir.boxchar<1> -! CHECK: return %[[VAL_42]] : !fir.boxchar<1> +! CHECK: %[[VAL_13:.*]] = fir.extract_value %[[VAL_2]], [1 : index] : (tuple ()>, i64>) -> i64 +! CHECK: %[[VAL_14:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref +! CHECK: %[[VAL_15:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_13]] : i64) {bindc_name = ".result"} +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_12]] : (() -> ()) -> ((!fir.ref>, index) -> !fir.boxchar<1>) +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_13]] : (i64) -> index +! CHECK: %[[VAL_18:.*]] = fir.call %[[VAL_16]](%[[VAL_15]], %[[VAL_17]]) : (!fir.ref>, index) -> !fir.boxchar<1> +! CHECK: %[[VAL_19:.*]] = arith.addi %[[VAL_17]], %[[VAL_4]] : index +! CHECK: %[[VAL_20:.*]] = fir.alloca !fir.char<1,?>(%[[VAL_19]] : index) {bindc_name = ".chrtmp"} +! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_4]] : (index) -> i64 +! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_20]] : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_10]] : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[VAL_22]], %[[VAL_23]], %[[VAL_21]], %[[VAL_5]]) : (!fir.ref, !fir.ref, i64, i1) -> () +! CHECK: br ^bb1(%[[VAL_4]], %[[VAL_17]] : index, index) +! CHECK: ^bb1(%[[VAL_24:.*]]: index, %[[VAL_25:.*]]: index): +! CHECK: %[[VAL_26:.*]] = arith.cmpi sgt, %[[VAL_25]], %[[VAL_8]] : index +! CHECK: cond_br %[[VAL_26]], ^bb2, ^bb3 +! CHECK: ^bb2: +! CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_24]], %[[VAL_4]] : index +! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_15]] : (!fir.ref>) -> !fir.ref>> +! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], %[[VAL_27]] : (!fir.ref>>, index) -> !fir.ref> +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref> +! CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_20]] : (!fir.ref>) -> !fir.ref>> +! CHECK: %[[VAL_32:.*]] = fir.coordinate_of %[[VAL_31]], %[[VAL_24]] : (!fir.ref>>, index) -> !fir.ref> +! CHECK: fir.store %[[VAL_30]] to %[[VAL_32]] : !fir.ref> +! CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_24]], %[[VAL_6]] : index +! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_25]], %[[VAL_6]] : index +! CHECK: br ^bb1(%[[VAL_33]], %[[VAL_34]] : index, index) +! CHECK: ^bb3: +! CHECK: %[[VAL_35:.*]] = arith.cmpi slt, %[[VAL_3]], %[[VAL_19]] : index +! CHECK: %[[VAL_36:.*]] = arith.select %[[VAL_35]], %[[VAL_3]], %[[VAL_19]] : index +! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (index) -> i64 +! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_9]] : (!fir.ref>) -> !fir.ref +! CHECK: fir.call @llvm.memmove.p0i8.p0i8.i64(%[[VAL_38]], %[[VAL_22]], %[[VAL_37]], %[[VAL_5]]) : (!fir.ref, !fir.ref, i64, i1) -> () +! CHECK: %[[VAL_39:.*]] = fir.undefined !fir.char<1> +! CHECK: %[[VAL_40:.*]] = fir.insert_value %[[VAL_39]], %[[VAL_7]], [0 : index] : (!fir.char<1>, i8) -> !fir.char<1> +! CHECK: %[[VAL_41:.*]] = arith.subi %[[VAL_3]], %[[VAL_36]] : index +! CHECK: br ^bb4(%[[VAL_36]], %[[VAL_41]] : index, index) +! CHECK: ^bb4(%[[VAL_42:.*]]: index, %[[VAL_43:.*]]: index): +! CHECK: %[[VAL_44:.*]] = arith.cmpi sgt, %[[VAL_43]], %[[VAL_8]] : index +! CHECK: cond_br %[[VAL_44]], ^bb5, ^bb6 +! CHECK: ^bb5: +! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_9]] : (!fir.ref>) -> !fir.ref>> +! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_45]], %[[VAL_42]] : (!fir.ref>>, index) -> !fir.ref> +! CHECK: fir.store %[[VAL_40]] to %[[VAL_46]] : !fir.ref> +! CHECK: %[[VAL_47:.*]] = arith.addi %[[VAL_42]], %[[VAL_6]] : index +! CHECK: %[[VAL_48:.*]] = arith.subi %[[VAL_43]], %[[VAL_6]] : index +! CHECK: br ^bb4(%[[VAL_47]], %[[VAL_48]] : index, index) +! CHECK: ^bb6: +! CHECK: fir.call @llvm.stackrestore(%[[VAL_14]]) : (!fir.ref) -> () +! CHECK: %[[VAL_49:.*]] = fir.emboxchar %[[VAL_9]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> +! CHECK: return %[[VAL_49]] : !fir.boxchar<1> ! CHECK: } subroutine test_proc_dummy_char @@ -640,8 +647,8 @@ end subroutine test_proc_dummy_char function get_message(a) character(40) :: get_message - character(10) :: a - get_message = "message is: " // a() + character(*) :: a + get_message = "message is: " // a() end function get_message ! CHECK-LABEL: func @_QPtest_11a() { diff --git a/flang/test/Semantics/call01.f90 b/flang/test/Semantics/call01.f90 index f25fe45736bcae..de38bf85fbe23b 100644 --- a/flang/test/Semantics/call01.f90 +++ b/flang/test/Semantics/call01.f90 @@ -129,16 +129,12 @@ character*(*) function f2() character*(*) function f4() end function end interface - !ERROR: Assumed-length character function must be defined with a length to be called print *, f1() - !ERROR: Assumed-length character function must be defined with a length to be called print *, f2() !ERROR: Assumed-length character function must be defined with a length to be called print *, f3() !ERROR: Assumed-length character function must be defined with a length to be called print *, f4() - !ERROR: Assumed-length character function must be defined with a length to be called print *, fp1() - !ERROR: Assumed-length character function must be defined with a length to be called print *, fp2() end subroutine From 7f5439945b1f07535ea667ec31a82a9d9a2a8666 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Mon, 23 May 2022 18:22:40 +0000 Subject: [PATCH 505/908] [compiler-rt][test] Fix flake in symbolize_stack test Addresses tests flakes described in https://github.com/llvm/llvm-project/issues/55460 The test being updated can fail in FileCheck to match when given long enough stack traces. This can be problematic when file system paths become long enough to cause the majority of the long function name to become truncated. We found in our CI that the truncated output would often fail to match, thereby causing the test to fail when it should not. Here we change the test to match on sybolizer output that should be more reliable than matching inside the long function name. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D126102 --- .../test/sanitizer_common/TestCases/symbolize_stack.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler-rt/test/sanitizer_common/TestCases/symbolize_stack.cpp b/compiler-rt/test/sanitizer_common/TestCases/symbolize_stack.cpp index d6ec49b407f5d0..37b7e98339eb6c 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/symbolize_stack.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/symbolize_stack.cpp @@ -5,6 +5,11 @@ // On Darwin LSan reports a false positive // XFAIL: darwin && lsan +// FIXME: https://github.com/llvm/llvm-project/issues/55460 +// On Linux its possible for symbolizer output to be truncated and to match the +// check below. Remove when the underlying problem has been addressed. +// UNSUPPORTED: linux + #include #include From c4bc416418a2d8a86b699060efa47515de5ffbb6 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Wed, 25 May 2022 13:41:53 +0200 Subject: [PATCH 506/908] [LLVM] Add rcp.approx.ftz.f32 intrinsic Split out from https://reviews.llvm.org/D126158. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D126369 --- clang/include/clang/Basic/BuiltinsNVPTX.def | 2 ++ llvm/include/llvm/IR/IntrinsicsNVVM.td | 2 ++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2 ++ 3 files changed, 6 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 04bed16c9958e3..a5ec77a6112c05 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -343,6 +343,8 @@ BUILTIN(__nvvm_rcp_rn_d, "dd", "") BUILTIN(__nvvm_rcp_rz_d, "dd", "") BUILTIN(__nvvm_rcp_rm_d, "dd", "") BUILTIN(__nvvm_rcp_rp_d, "dd", "") + +BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "") BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "") // Sqrt diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 678001f44527dd..6bdd0b0775be92 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -933,6 +933,8 @@ let TargetPrefix = "nvvm" in { def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + def int_nvvm_rcp_approx_ftz_f : GCCBuiltin<"__nvvm_rcp_approx_ftz_f">, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index d30c3d13f7ce0d..1192cc07840847 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1034,6 +1034,8 @@ def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_rcp_rp_d>; +def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>; def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; From 1a51ab766f471059abe6ae9123512d2137e2e0af Mon Sep 17 00:00:00 2001 From: Joe Nash Date: Mon, 25 Apr 2022 13:58:14 -0400 Subject: [PATCH 507/908] [AMDGPU] gfx11 export instructions Contributors: Jay Foad Dmitry Preobrazhensky Patch 10/N for upstreaming of AMDGPU gfx11 architecture. Depends on D125822 Reviewed By: dp Differential Revision: https://reviews.llvm.org/D125824 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 2 +- .../Disassembler/AMDGPUDisassembler.cpp | 13 ++++ .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 1 + llvm/lib/Target/AMDGPU/EXPInstructions.td | 62 ++++++++++++++----- llvm/lib/Target/AMDGPU/SIDefines.h | 15 +++-- llvm/lib/Target/AMDGPU/SIInstrFormats.td | 20 ++++-- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 28 ++++++--- llvm/lib/Target/AMDGPU/VIInstrFormats.td | 2 +- llvm/test/MC/AMDGPU/exp-err.s | 22 ++++++- llvm/test/MC/AMDGPU/exp-gfx10.s | 4 ++ llvm/test/MC/AMDGPU/exp-gfx11.s | 19 ++++++ llvm/test/MC/AMDGPU/exp-pregfx11.s | 59 ++++++++++++++++++ llvm/test/MC/AMDGPU/exp.s | 56 +---------------- .../test/MC/Disassembler/AMDGPU/exp_gfx11.txt | 13 ++++ 14 files changed, 222 insertions(+), 94 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/exp-gfx11.s create mode 100644 llvm/test/MC/AMDGPU/exp-pregfx11.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/exp_gfx11.txt diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index e3cddd64a4a583..6f707cc4cfaff6 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -6282,7 +6282,7 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { continue; } - if (Op.isToken() && Op.getToken() == "done") + if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) continue; // Handle optional arguments diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index a9540b14f1ad96..62aaa321f498af 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -604,6 +604,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = convertMIMGInst(MI); } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)) + Res = convertEXPInst(MI); + if (Res && IsSDWA) Res = convertSDWAInst(MI); @@ -635,6 +638,16 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Res; } +DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { + if (STI.getFeatureBits()[AMDGPU::FeatureGFX11]) { + // The MCInst still has these fields even though they are no longer encoded + // in the GFX11 instruction. + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm); + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr); + } + return MCDisassembler::Success; +} + DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 5e44fecf43c119..3d1ee29faa3a1e 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -101,6 +101,7 @@ class AMDGPUDisassembler : public MCDisassembler { DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const; + DecodeStatus convertEXPInst(MCInst &MI) const; DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const; DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; diff --git a/llvm/lib/Target/AMDGPU/EXPInstructions.td b/llvm/lib/Target/AMDGPU/EXPInstructions.td index 3654b8b6232987..f8742a9ce2ff58 100644 --- a/llvm/lib/Target/AMDGPU/EXPInstructions.td +++ b/llvm/lib/Target/AMDGPU/EXPInstructions.td @@ -10,7 +10,7 @@ // EXP classes //===----------------------------------------------------------------------===// -class EXPCommon : InstSI< +class EXPCommon : InstSI< (outs), (ins exp_tgt:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3, @@ -21,21 +21,30 @@ class EXPCommon : InstSI< let mayLoad = done; let mayStore = 1; let UseNamedOperandTable = 1; - let Uses = [EXEC]; + let Uses = !if(row, [EXEC, M0], [EXEC]); let SchedRW = [WriteExport]; let DisableWQM = 1; } -class EXP_Pseudo : EXPCommon, - SIMCInstr { +class EXP_Pseudo + : EXPCommon, SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; } -class EXP_Real - : EXPCommon, - SIMCInstr { +// Real instruction with optional asm operands "compr" and "vm". +class EXP_Real_ComprVM + : EXPCommon<0, done, "exp$tgt $src0, $src1, $src2, $src3" + #!if(done, " done", "")#"$compr$vm">, + SIMCInstr { + let AsmMatchConverter = "cvtExp"; +} + +// Real instruction with optional asm operand "row_en". +class EXP_Real_Row + : EXPCommon, + SIMCInstr { let AsmMatchConverter = "cvtExp"; } @@ -43,11 +52,13 @@ class EXP_Real // EXP Instructions //===----------------------------------------------------------------------===// -// Split EXP instruction into EXP and EXP_DONE so we can set -// mayLoad for done=1. +// DONE variants have mayLoad = 1. +// ROW variants have an implicit use of M0. let SubtargetPredicate = isNotGFX90APlus in { -def EXP : EXP_Pseudo<0>; -def EXP_DONE : EXP_Pseudo<1>; +def EXP : EXP_Pseudo<0, 0>; +def EXP_DONE : EXP_Pseudo<0, 1>; +def EXP_ROW : EXP_Pseudo<1, 0>; +def EXP_ROW_DONE : EXP_Pseudo<1, 1>; } // let SubtargetPredicate = isNotGFX90APlus //===----------------------------------------------------------------------===// @@ -55,7 +66,7 @@ def EXP_DONE : EXP_Pseudo<1>; //===----------------------------------------------------------------------===// class EXP_Real_si - : EXP_Real<_done, pseudo, SIEncodingFamily.SI>, EXPe { + : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.SI>, EXPe_ComprVM { let AssemblerPredicate = isGFX6GFX7; let DecoderNamespace = "GFX6GFX7"; let done = _done; @@ -69,7 +80,7 @@ def EXP_DONE_si : EXP_Real_si<1, "EXP_DONE">; //===----------------------------------------------------------------------===// class EXP_Real_vi - : EXP_Real<_done, pseudo, SIEncodingFamily.VI>, EXPe_vi { + : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.VI>, EXPe_vi { let AssemblerPredicate = isGFX8GFX9; let SubtargetPredicate = isNotGFX90APlus; let DecoderNamespace = "GFX8"; @@ -80,12 +91,12 @@ def EXP_vi : EXP_Real_vi<0, "EXP">; def EXP_DONE_vi : EXP_Real_vi<1, "EXP_DONE">; //===----------------------------------------------------------------------===// -// GFX10+ +// GFX10 //===----------------------------------------------------------------------===// class EXP_Real_gfx10 - : EXP_Real<_done, pseudo, SIEncodingFamily.GFX10>, EXPe { - let AssemblerPredicate = isGFX10Plus; + : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.GFX10>, EXPe_ComprVM { + let AssemblerPredicate = isGFX10Only; let DecoderNamespace = "GFX10"; let done = _done; } @@ -93,6 +104,23 @@ class EXP_Real_gfx10 def EXP_gfx10 : EXP_Real_gfx10<0, "EXP">; def EXP_DONE_gfx10 : EXP_Real_gfx10<1, "EXP_DONE">; +//===----------------------------------------------------------------------===// +// GFX11+ +//===----------------------------------------------------------------------===// + +class EXP_Real_gfx11 + : EXP_Real_Row<_row, _done, pseudo, SIEncodingFamily.GFX11>, EXPe_Row { + let AssemblerPredicate = isGFX11Plus; + let DecoderNamespace = "GFX11"; + let row = _row; + let done = _done; +} + +def EXP_gfx11 : EXP_Real_gfx11<0, 0, "EXP">; +def EXP_DONE_gfx11 : EXP_Real_gfx11<0, 1, "EXP_DONE">; +def EXP_ROW_gfx11 : EXP_Real_gfx11<1, 0, "EXP_ROW">; +def EXP_ROW_DONE_gfx11 : EXP_Real_gfx11<1, 1, "EXP_ROW_DONE">; + //===----------------------------------------------------------------------===// // EXP Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 80ac558955c513..07819f8475c287 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -852,20 +852,23 @@ enum Target : unsigned { ET_MRT0 = 0, ET_MRT7 = 7, ET_MRTZ = 8, - ET_NULL = 9, + ET_NULL = 9, // Pre-GFX11 ET_POS0 = 12, ET_POS3 = 15, - ET_POS4 = 16, // GFX10+ - ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget - ET_PRIM = 20, // GFX10+ - ET_PARAM0 = 32, - ET_PARAM31 = 63, + ET_POS4 = 16, // GFX10+ + ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget + ET_PRIM = 20, // GFX10+ + ET_DUAL_SRC_BLEND0 = 21, // GFX11+ + ET_DUAL_SRC_BLEND1 = 22, // GFX11+ + ET_PARAM0 = 32, // Pre-GFX11 + ET_PARAM31 = 63, // Pre-GFX11 ET_NULL_MAX_IDX = 0, ET_MRTZ_MAX_IDX = 0, ET_PRIM_MAX_IDX = 0, ET_MRT_MAX_IDX = 7, ET_POS_MAX_IDX = 4, + ET_DUAL_SRC_BLEND_MAX_IDX = 1, ET_PARAM_MAX_IDX = 31, ET_INVALID = 255, diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 838cd1e25523f4..2fd1a0b60d1bf6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -357,9 +357,7 @@ class MIMGe_gfx10 op> : MIMGe { class EXPe : Enc64 { bits<4> en; bits<6> tgt; - bits<1> compr; bits<1> done; - bits<1> vm; bits<8> src0; bits<8> src1; bits<8> src2; @@ -367,9 +365,7 @@ class EXPe : Enc64 { let Inst{3-0} = en; let Inst{9-4} = tgt; - let Inst{10} = compr; let Inst{11} = done; - let Inst{12} = vm; let Inst{31-26} = 0x3e; let Inst{39-32} = src0; let Inst{47-40} = src1; @@ -377,6 +373,22 @@ class EXPe : Enc64 { let Inst{63-56} = src3; } +// Pre-GFX11 encoding has compr and vm bits. +class EXPe_ComprVM : EXPe { + bits<1> compr; + bits<1> vm; + + let Inst{10} = compr; + let Inst{12} = vm; +} + +// GFX11+ encoding has row bit. +class EXPe_Row : EXPe { + bits<1> row; + + let Inst{13} = row; +} + let Uses = [EXEC] in { class VINTRPCommon pattern> : diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 9dae84d8c47a3b..bc1abd7a588450 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1276,12 +1276,13 @@ struct ExpTgt { }; static constexpr ExpTgt ExpTgtInfo[] = { - {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, - {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, - {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, - {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, - {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, - {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, + {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, + {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, + {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, + {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, + {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, + {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, + {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, }; bool getTgtName(unsigned Id, StringRef &Name, int &Index) { @@ -1319,7 +1320,20 @@ unsigned getTgtId(const StringRef Name) { } bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { - return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI); + switch (Id) { + case ET_NULL: + return !isGFX11Plus(STI); + case ET_POS4: + case ET_PRIM: + return isGFX10Plus(STI); + case ET_DUAL_SRC_BLEND0: + case ET_DUAL_SRC_BLEND1: + return isGFX11Plus(STI); + default: + if (Id >= ET_PARAM0 && Id <= ET_PARAM31) + return !isGFX11Plus(STI); + return true; + } } } // namespace Exp diff --git a/llvm/lib/Target/AMDGPU/VIInstrFormats.td b/llvm/lib/Target/AMDGPU/VIInstrFormats.td index bd65a495fa7278..7393ef6c2a2d7f 100644 --- a/llvm/lib/Target/AMDGPU/VIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/VIInstrFormats.td @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -class EXPe_vi : EXPe { +class EXPe_vi : EXPe_ComprVM { let Inst{31-26} = 0x31; //encoding } diff --git a/llvm/test/MC/AMDGPU/exp-err.s b/llvm/test/MC/AMDGPU/exp-err.s index bab0434c8dec02..630f8558d69935 100644 --- a/llvm/test/MC/AMDGPU/exp-err.s +++ b/llvm/test/MC/AMDGPU/exp-err.s @@ -1,11 +1,12 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefixes=GCN,GFX68 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN,GFX68 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck -check-prefixes=GCN,GFX11 --implicit-check-not=error: %s exp mrt8 v3, v2, v1, v0 // GCN: :5: error: invalid exp target exp pos4 v3, v2, v1, v0 -// GCN: :5: error: exp target is not supported on this GPU +// GFX68: :5: error: exp target is not supported on this GPU exp pos5 v3, v2, v1, v0 // GCN: :5: error: invalid exp target @@ -117,3 +118,18 @@ exp mrt0 v0, v0, 0x12345678, v0 exp mrt0 v0, v0, v0, 0x12345678 // GCN: 22: error: invalid operand for instruction + +exp null v4, v3, v2, v1 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: exp target is not supported on this GPU + +exp param0 v4, v3, v2, v1 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: exp target is not supported on this GPU + +exp param31 v4, v3, v2, v1 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: exp target is not supported on this GPU + +exp mrt0 v4, v3, v2, v1 vm +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +exp mrtz, v3, v3, off, off compr +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/exp-gfx10.s b/llvm/test/MC/AMDGPU/exp-gfx10.s index c64278ff1547ea..0d28b654d81907 100644 --- a/llvm/test/MC/AMDGPU/exp-gfx10.s +++ b/llvm/test/MC/AMDGPU/exp-gfx10.s @@ -1,15 +1,19 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=verde %s 2>&1 | FileCheck -check-prefix=SIVI --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=SIVI --implicit-check-not=error: %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefix=GFX11 %s exp prim v1, off, off, off // SIVI: :5: error: exp target is not supported on this GPU // GFX10: exp prim v1, off, off, off ; encoding: [0x41,0x01,0x00,0xf8,0x01,0x00,0x00,0x00] +// GFX11: exp prim v1, off, off, off ; encoding: [0x41,0x01,0x00,0xf8,0x01,0x00,0x00,0x00] exp prim v2, v3, off, off // SIVI: :5: error: exp target is not supported on this GPU // GFX10: exp prim v2, v3, off, off ; encoding: [0x43,0x01,0x00,0xf8,0x02,0x03,0x00,0x00] +// GFX11: exp prim v2, v3, off, off ; encoding: [0x43,0x01,0x00,0xf8,0x02,0x03,0x00,0x00] exp pos4 v4, v3, v2, v1 // SIVI: error: exp target is not supported on this GPU // GFX10: exp pos4 v4, v3, v2, v1 ; encoding: [0x0f,0x01,0x00,0xf8,0x04,0x03,0x02,0x01] +// GFX11: exp pos4 v4, v3, v2, v1 ; encoding: [0x0f,0x01,0x00,0xf8,0x04,0x03,0x02,0x01] diff --git a/llvm/test/MC/AMDGPU/exp-gfx11.s b/llvm/test/MC/AMDGPU/exp-gfx11.s new file mode 100644 index 00000000000000..577e800811f6c8 --- /dev/null +++ b/llvm/test/MC/AMDGPU/exp-gfx11.s @@ -0,0 +1,19 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=PREGFX11 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=PREGFX11 --implicit-check-not=error: %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefix=GFX11 --implicit-check-not=error: %s + +exp dual_src_blend0 v4, v3, v2, v1 +// PREGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: exp target is not supported on this GPU +// GFX11: exp dual_src_blend0 v4, v3, v2, v1 ; encoding: [0x5f,0x01,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp dual_src_blend1 v2, v3, off, off +// PREGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: exp target is not supported on this GPU +// GFX11: exp dual_src_blend1 v2, v3, off, off ; encoding: [0x63,0x01,0x00,0xf8,0x02,0x03,0x00,0x00] + +exp mrtz v4, v3, v2, v1 row_en +// PREGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: exp mrtz v4, v3, v2, v1 row_en ; encoding: [0x8f,0x20,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp mrtz v4, v3, off, off done row_en +// PREGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: exp mrtz v4, v3, off, off done row_en ; encoding: [0x83,0x28,0x00,0xf8,0x04,0x03,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/exp-pregfx11.s b/llvm/test/MC/AMDGPU/exp-pregfx11.s new file mode 100644 index 00000000000000..8a8d0bfc42ef44 --- /dev/null +++ b/llvm/test/MC/AMDGPU/exp-pregfx11.s @@ -0,0 +1,59 @@ +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck -check-prefix=SI %s +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GFX89 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX89 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s + +exp null v4, v3, v2, v1 +// SI: exp null v4, v3, v2, v1 ; encoding: [0x9f,0x00,0x00,0xf8,0x04,0x03,0x02,0x01] +// GFX89: exp null v4, v3, v2, v1 ; encoding: [0x9f,0x00,0x00,0xc4,0x04,0x03,0x02,0x01] +// GFX10: exp null v4, v3, v2, v1 ; encoding: [0x9f,0x00,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp null v4, v3, v2, v1 done +// SI: exp null v4, v3, v2, v1 done ; encoding: [0x9f,0x08,0x00,0xf8,0x04,0x03,0x02,0x01] +// GFX89: exp null v4, v3, v2, v1 done ; encoding: [0x9f,0x08,0x00,0xc4,0x04,0x03,0x02,0x01] +// GFX10: exp null v4, v3, v2, v1 done ; encoding: [0x9f,0x08,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp param0 v4, v3, v2, v1 +// SI: exp param0 v4, v3, v2, v1 ; encoding: [0x0f,0x02,0x00,0xf8,0x04,0x03,0x02,0x01] +// GFX89: exp param0 v4, v3, v2, v1 ; encoding: [0x0f,0x02,0x00,0xc4,0x04,0x03,0x02,0x01] +// GFX10: exp param0 v4, v3, v2, v1 ; encoding: [0x0f,0x02,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp param0 v4, v3, v2, v1 done +// SI: exp param0 v4, v3, v2, v1 done ; encoding: [0x0f,0x0a,0x00,0xf8,0x04,0x03,0x02,0x01] +// GFX89: exp param0 v4, v3, v2, v1 done ; encoding: [0x0f,0x0a,0x00,0xc4,0x04,0x03,0x02,0x01] +// GFX10: exp param0 v4, v3, v2, v1 done ; encoding: [0x0f,0x0a,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp param31 v4, v3, v2, v1 +// SI: exp param31 v4, v3, v2, v1 ; encoding: [0xff,0x03,0x00,0xf8,0x04,0x03,0x02,0x01] +// GFX89: exp param31 v4, v3, v2, v1 ; encoding: [0xff,0x03,0x00,0xc4,0x04,0x03,0x02,0x01] +// GFX10: exp param31 v4, v3, v2, v1 ; encoding: [0xff,0x03,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp param31 v4, v3, v2, v1 done +// SI: exp param31 v4, v3, v2, v1 done ; encoding: [0xff,0x0b,0x00,0xf8,0x04,0x03,0x02,0x01] +// GFX89: exp param31 v4, v3, v2, v1 done ; encoding: [0xff,0x0b,0x00,0xc4,0x04,0x03,0x02,0x01] +// GFX10: exp param31 v4, v3, v2, v1 done ; encoding: [0xff,0x0b,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp mrt0 v4, v3, v2, v1 vm +// SI: exp mrt0 v4, v3, v2, v1 vm ; encoding: [0x0f,0x10,0x00,0xf8,0x04,0x03,0x02,0x01] +// GFX89: exp mrt0 v4, v3, v2, v1 vm ; encoding: [0x0f,0x10,0x00,0xc4,0x04,0x03,0x02,0x01] +// GFX10: exp mrt0 v4, v3, v2, v1 vm ; encoding: [0x0f,0x10,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp mrt0 v4, v3, v2, v1 done vm +// SI: exp mrt0 v4, v3, v2, v1 done vm ; encoding: [0x0f,0x18,0x00,0xf8,0x04,0x03,0x02,0x01] +// GFX89: exp mrt0 v4, v3, v2, v1 done vm ; encoding: [0x0f,0x18,0x00,0xc4,0x04,0x03,0x02,0x01] +// GFX10: exp mrt0 v4, v3, v2, v1 done vm ; encoding: [0x0f,0x18,0x00,0xf8,0x04,0x03,0x02,0x01] + +exp mrtz, v3, v3, v7, v7 compr +// SI: exp mrtz v3, v3, v7, v7 compr ; encoding: [0x8f,0x04,0x00,0xf8,0x03,0x07,0x00,0x00] +// GFX89: exp mrtz v3, v3, v7, v7 compr ; encoding: [0x8f,0x04,0x00,0xc4,0x03,0x07,0x00,0x00] +// GFX10: exp mrtz v3, v3, v7, v7 compr ; encoding: [0x8f,0x04,0x00,0xf8,0x03,0x07,0x00,0x00] + +exp mrtz, off, off, v7, v7 compr +// SI: exp mrtz off, off, v7, v7 compr ; encoding: [0x8c,0x04,0x00,0xf8,0x00,0x07,0x00,0x00] +// GFX89: exp mrtz off, off, v7, v7 compr ; encoding: [0x8c,0x04,0x00,0xc4,0x00,0x07,0x00,0x00] +// GFX10: exp mrtz off, off, v7, v7 compr ; encoding: [0x8c,0x04,0x00,0xf8,0x00,0x07,0x00,0x00] + +exp mrtz, v3, v3, off, off compr +// SI: exp mrtz v3, v3, off, off compr ; encoding: [0x83,0x04,0x00,0xf8,0x03,0x00,0x00,0x00] +// GFX89: exp mrtz v3, v3, off, off compr ; encoding: [0x83,0x04,0x00,0xc4,0x03,0x00,0x00,0x00] +// GFX10: exp mrtz v3, v3, off, off compr ; encoding: [0x83,0x04,0x00,0xf8,0x03,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/exp.s b/llvm/test/MC/AMDGPU/exp.s index a1d106b3c2aaa7..321075057a21ad 100644 --- a/llvm/test/MC/AMDGPU/exp.s +++ b/llvm/test/MC/AMDGPU/exp.s @@ -2,6 +2,7 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GFX89 %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX89 %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefix=GFX10 %s exp mrt0 off, off, off, off // SI: exp mrt0 off, off, off, off ; encoding: [0x00,0x00,0x00,0xf8,0x00,0x00,0x00,0x00] @@ -83,16 +84,6 @@ exp mrtz v4, v3, v2, v1 done // GFX89: exp mrtz v4, v3, v2, v1 done ; encoding: [0x8f,0x08,0x00,0xc4,0x04,0x03,0x02,0x01] // GFX10: exp mrtz v4, v3, v2, v1 done ; encoding: [0x8f,0x08,0x00,0xf8,0x04,0x03,0x02,0x01] -exp null v4, v3, v2, v1 -// SI: exp null v4, v3, v2, v1 ; encoding: [0x9f,0x00,0x00,0xf8,0x04,0x03,0x02,0x01] -// GFX89: exp null v4, v3, v2, v1 ; encoding: [0x9f,0x00,0x00,0xc4,0x04,0x03,0x02,0x01] -// GFX10: exp null v4, v3, v2, v1 ; encoding: [0x9f,0x00,0x00,0xf8,0x04,0x03,0x02,0x01] - -exp null v4, v3, v2, v1 done -// SI: exp null v4, v3, v2, v1 done ; encoding: [0x9f,0x08,0x00,0xf8,0x04,0x03,0x02,0x01] -// GFX89: exp null v4, v3, v2, v1 done ; encoding: [0x9f,0x08,0x00,0xc4,0x04,0x03,0x02,0x01] -// GFX10: exp null v4, v3, v2, v1 done ; encoding: [0x9f,0x08,0x00,0xf8,0x04,0x03,0x02,0x01] - exp pos0 v4, v3, v2, v1 // SI: exp pos0 v4, v3, v2, v1 ; encoding: [0xcf,0x00,0x00,0xf8,0x04,0x03,0x02,0x01] // GFX89: exp pos0 v4, v3, v2, v1 ; encoding: [0xcf,0x00,0x00,0xc4,0x04,0x03,0x02,0x01] @@ -112,48 +103,3 @@ exp pos3 v4, v3, v2, v1 done // SI: exp pos3 v4, v3, v2, v1 done ; encoding: [0xff,0x08,0x00,0xf8,0x04,0x03,0x02,0x01] // GFX89: exp pos3 v4, v3, v2, v1 done ; encoding: [0xff,0x08,0x00,0xc4,0x04,0x03,0x02,0x01] // GFX10: exp pos3 v4, v3, v2, v1 done ; encoding: [0xff,0x08,0x00,0xf8,0x04,0x03,0x02,0x01] - -exp param0 v4, v3, v2, v1 -// SI: exp param0 v4, v3, v2, v1 ; encoding: [0x0f,0x02,0x00,0xf8,0x04,0x03,0x02,0x01] -// GFX89: exp param0 v4, v3, v2, v1 ; encoding: [0x0f,0x02,0x00,0xc4,0x04,0x03,0x02,0x01] -// GFX10: exp param0 v4, v3, v2, v1 ; encoding: [0x0f,0x02,0x00,0xf8,0x04,0x03,0x02,0x01] - -exp param0 v4, v3, v2, v1 done -// SI: exp param0 v4, v3, v2, v1 done ; encoding: [0x0f,0x0a,0x00,0xf8,0x04,0x03,0x02,0x01] -// GFX89: exp param0 v4, v3, v2, v1 done ; encoding: [0x0f,0x0a,0x00,0xc4,0x04,0x03,0x02,0x01] -// GFX10: exp param0 v4, v3, v2, v1 done ; encoding: [0x0f,0x0a,0x00,0xf8,0x04,0x03,0x02,0x01] - -exp param31 v4, v3, v2, v1 -// SI: exp param31 v4, v3, v2, v1 ; encoding: [0xff,0x03,0x00,0xf8,0x04,0x03,0x02,0x01] -// GFX89: exp param31 v4, v3, v2, v1 ; encoding: [0xff,0x03,0x00,0xc4,0x04,0x03,0x02,0x01] -// GFX10: exp param31 v4, v3, v2, v1 ; encoding: [0xff,0x03,0x00,0xf8,0x04,0x03,0x02,0x01] - -exp param31 v4, v3, v2, v1 done -// SI: exp param31 v4, v3, v2, v1 done ; encoding: [0xff,0x0b,0x00,0xf8,0x04,0x03,0x02,0x01] -// GFX89: exp param31 v4, v3, v2, v1 done ; encoding: [0xff,0x0b,0x00,0xc4,0x04,0x03,0x02,0x01] -// GFX10: exp param31 v4, v3, v2, v1 done ; encoding: [0xff,0x0b,0x00,0xf8,0x04,0x03,0x02,0x01] - -exp mrt0 v4, v3, v2, v1 vm -// SI: exp mrt0 v4, v3, v2, v1 vm ; encoding: [0x0f,0x10,0x00,0xf8,0x04,0x03,0x02,0x01] -// GFX89: exp mrt0 v4, v3, v2, v1 vm ; encoding: [0x0f,0x10,0x00,0xc4,0x04,0x03,0x02,0x01] -// GFX10: exp mrt0 v4, v3, v2, v1 vm ; encoding: [0x0f,0x10,0x00,0xf8,0x04,0x03,0x02,0x01] - -exp mrt0 v4, v3, v2, v1 done vm -// SI: exp mrt0 v4, v3, v2, v1 done vm ; encoding: [0x0f,0x18,0x00,0xf8,0x04,0x03,0x02,0x01] -// GFX89: exp mrt0 v4, v3, v2, v1 done vm ; encoding: [0x0f,0x18,0x00,0xc4,0x04,0x03,0x02,0x01] -// GFX10: exp mrt0 v4, v3, v2, v1 done vm ; encoding: [0x0f,0x18,0x00,0xf8,0x04,0x03,0x02,0x01] - -exp mrtz, v3, v3, v7, v7 compr -// SI: exp mrtz v3, v3, v7, v7 compr ; encoding: [0x8f,0x04,0x00,0xf8,0x03,0x07,0x00,0x00] -// GFX89: exp mrtz v3, v3, v7, v7 compr ; encoding: [0x8f,0x04,0x00,0xc4,0x03,0x07,0x00,0x00] -// GFX10: exp mrtz v3, v3, v7, v7 compr ; encoding: [0x8f,0x04,0x00,0xf8,0x03,0x07,0x00,0x00] - -exp mrtz, off, off, v7, v7 compr -// SI: exp mrtz off, off, v7, v7 compr ; encoding: [0x8c,0x04,0x00,0xf8,0x00,0x07,0x00,0x00] -// GFX89: exp mrtz off, off, v7, v7 compr ; encoding: [0x8c,0x04,0x00,0xc4,0x00,0x07,0x00,0x00] -// GFX10: exp mrtz off, off, v7, v7 compr ; encoding: [0x8c,0x04,0x00,0xf8,0x00,0x07,0x00,0x00] - -exp mrtz, v3, v3, off, off compr -// SI: exp mrtz v3, v3, off, off compr ; encoding: [0x83,0x04,0x00,0xf8,0x03,0x00,0x00,0x00] -// GFX89: exp mrtz v3, v3, off, off compr ; encoding: [0x83,0x04,0x00,0xc4,0x03,0x00,0x00,0x00] -// GFX10: exp mrtz v3, v3, off, off compr ; encoding: [0x83,0x04,0x00,0xf8,0x03,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/exp_gfx11.txt b/llvm/test/MC/Disassembler/AMDGPU/exp_gfx11.txt new file mode 100644 index 00000000000000..ffdd30597ddd82 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/exp_gfx11.txt @@ -0,0 +1,13 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -strict-whitespace %s -check-prefix=GFX11 + +# GFX11: exp dual_src_blend0 v4, v3, v2, v1 ; encoding: [0x5f,0x01,0x00,0xf8,0x04,0x03,0x02,0x01] +0x5f,0x01,0x00,0xf8,0x04,0x03,0x02,0x01 + +# GFX11: exp dual_src_blend1 v2, v3, off, off ; encoding: [0x63,0x01,0x00,0xf8,0x02,0x03,0x00,0x00] +0x63,0x01,0x00,0xf8,0x02,0x03,0x00,0x00 + +# GFX11: exp mrtz v4, v3, v2, v1 row_en ; encoding: [0x8f,0x20,0x00,0xf8,0x04,0x03,0x02,0x01] +0x8f,0x20,0x00,0xf8,0x04,0x03,0x02,0x01 + +# GFX11: exp mrtz v4, v3, off, off done row_en ; encoding: [0x83,0x28,0x00,0xf8,0x04,0x03,0x00,0x00] +0x83,0x28,0x00,0xf8,0x04,0x03,0x00,0x00 From ef1ea5ac01332948c1fca260bba04110cf65f29b Mon Sep 17 00:00:00 2001 From: Joe Nash Date: Wed, 27 Apr 2022 09:17:25 -0400 Subject: [PATCH 508/908] [AMDGPU] gfx11 vinterp instructions MC support A new instruction encoding. Some of these instructions were previously VOP3 encoded. Contributors: Carl Ritson Patch 11/N for upstreaming of AMDGPU gfx11 architecture. Depends on D125824 Reviewed By: critson Differential Revision: https://reviews.llvm.org/D125989 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 81 ++++- .../Disassembler/AMDGPUDisassembler.cpp | 15 + .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 1 + .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 11 + .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 2 + llvm/lib/Target/AMDGPU/SIDefines.h | 3 + llvm/lib/Target/AMDGPU/SIInstrFormats.td | 4 + llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 9 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 1 + llvm/lib/Target/AMDGPU/VINTERPInstructions.td | 135 +++++++++ llvm/lib/Target/AMDGPU/VOP3Instructions.td | 4 +- llvm/test/MC/AMDGPU/gfx11_err.s | 6 + llvm/test/MC/AMDGPU/vinterp.s | 277 ++++++++++++++++++ llvm/test/MC/Disassembler/AMDGPU/vinterp.txt | 247 ++++++++++++++++ 15 files changed, 798 insertions(+), 6 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/VINTERPInstructions.td create mode 100644 llvm/test/MC/AMDGPU/vinterp.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/vinterp.txt diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 6f707cc4cfaff6..90fecad555ce47 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -162,6 +162,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { ImmTyABID, ImmTyEndpgm, ImmTyWaitVDST, + ImmTyWaitEXP, }; enum ImmKindTy { @@ -837,6 +838,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isU16Imm() const; bool isEndpgm() const; bool isWaitVDST() const; + bool isWaitEXP() const; StringRef getExpressionAsToken() const { assert(isExpr()); @@ -1045,6 +1047,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { case ImmTyABID: OS << "ABID"; break; case ImmTyEndpgm: OS << "Endpgm"; break; case ImmTyWaitVDST: OS << "WaitVDST"; break; + case ImmTyWaitEXP: OS << "WaitEXP"; break; } } @@ -1720,6 +1723,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { OptionalImmIndexMap &OptionalIdx); void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); + void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); void cvtMIMG(MCInst &Inst, const OperandVector &Operands, bool IsAtomic = false); @@ -1762,8 +1766,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; - OperandMatchResultTy parseWaitVDST(OperandVector &Operands); AMDGPUOperand::Ptr defaultWaitVDST() const; + AMDGPUOperand::Ptr defaultWaitEXP() const; }; struct OptionalOperand { @@ -7842,7 +7846,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, - {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr} + {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, + {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} }; void AMDGPUAsmParser::onBeginOfFile() { @@ -8012,6 +8017,66 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) } } +void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) +{ + OptionalImmIndexMap OptionalIdx; + unsigned Opc = Inst.getOpcode(); + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("unhandled operand type"); + } + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + if (OpSelIdx != -1) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); + + if (OpSelIdx == -1) + return; + + const int Ops[] = { AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 }; + const int ModOps[] = { AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers }; + + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); + if (OpIdx == -1) + break; + + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); + + if ((OpSel & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_0; + if (ModOps[J] == AMDGPU::OpName::src0_modifiers && + (OpSel & (1 << 3)) != 0) + ModVal |= SISrcMods::DST_OP_SEL; + + Inst.getOperand(ModIdx).setImm(ModVal); + } +} + void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx) { unsigned Opc = Inst.getOpcode(); @@ -8859,3 +8924,15 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { bool AMDGPUOperand::isWaitVDST() const { return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); } + +//===----------------------------------------------------------------------===// +// VINTERP +//===----------------------------------------------------------------------===// + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); +} + +bool AMDGPUOperand::isWaitEXP() const { + return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); +} diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 62aaa321f498af..bd75272859695e 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -607,6 +607,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)) Res = convertEXPInst(MI); + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)) + Res = convertVINTERPInst(MI); + if (Res && IsSDWA) Res = convertSDWAInst(MI); @@ -648,6 +651,18 @@ DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { return MCDisassembler::Success; } +DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { + if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) { + // The MCInst has this field that is not directly encoded in the + // instruction. + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel); + } + return MCDisassembler::Success; +} + DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 3d1ee29faa3a1e..f46ad9d4220958 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -102,6 +102,7 @@ class AMDGPUDisassembler : public MCDisassembler { raw_string_ostream &KdStream) const; DecodeStatus convertEXPInst(MCInst &MI) const; + DecodeStatus convertVINTERPInst(MCInst &MI) const; DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const; DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index e60e52fb67382d..5e6ddaee8daee0 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -605,6 +605,17 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint8_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm != 0) { + O << " wait_exp:"; + printU4ImmDecOperand(MI, OpNo, O); + } +} + +// Print default vcc/vcc_lo operand of VOPC. void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 2a0ebd2f3de468..e351dba9cfc085 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -177,6 +177,8 @@ class AMDGPUInstPrinter : public MCInstPrinter { raw_ostream &O); void printWaitVDST(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, unsigned N); diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 07819f8475c287..8760edecbd90d4 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -66,6 +66,9 @@ enum : uint64_t { // LDSDIR instruction format. LDSDIR = 1 << 26, + // VINTERP instruction format. + VINTERP = 1 << 27, + // High bits - other information. VM_CNT = UINT64_C(1) << 32, EXP_CNT = UINT64_C(1) << 33, diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 2fd1a0b60d1bf6..50239840c22517 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -51,6 +51,9 @@ class InstSI > { } def wait_vdst : NamedOperandU8<"WaitVDST", NamedMatchClass<"WaitVDST">>; +def wait_exp : NamedOperandU8<"WaitEXP", NamedMatchClass<"WaitEXP">>; } // End OperandType = "OPERAND_IMMEDIATE" @@ -1476,6 +1477,9 @@ def VOP3OpSelMods : ComplexPattern; def VOP3PMadMixMods : ComplexPattern; +def VINTERPMods : ComplexPattern; +def VINTERPModsHi : ComplexPattern; + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// @@ -2518,10 +2522,11 @@ multiclass VINTRP_m op, dag outs, dag ins, string asm, def _vi : VINTRP_Real_vi ; - let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { + let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { def _gfx10 : VINTRP_Real_si; - } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" + } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" } + //===----------------------------------------------------------------------===// // Vector instruction mappings //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index becb8a61df7ce4..d248992ad0c287 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -31,6 +31,7 @@ include "FLATInstructions.td" include "BUFInstructions.td" include "EXPInstructions.td" include "LDSDIRInstructions.td" +include "VINTERPInstructions.td" //===----------------------------------------------------------------------===// // VINTRP Instructions diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td new file mode 100644 index 00000000000000..11553c3d1c9a6e --- /dev/null +++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td @@ -0,0 +1,135 @@ +//===-- VINTERPInstructions.td - VINTERP Instruction Definitions ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// VINTERP encoding +//===----------------------------------------------------------------------===// + +class VINTERPe_gfx11 op, VOPProfile P> : Enc64 { + bits<8> vdst; + bits<4> src0_modifiers; + bits<9> src0; + bits<3> src1_modifiers; + bits<9> src1; + bits<3> src2_modifiers; + bits<9> src2; + bits<1> clamp; + bits<3> waitexp; + + let Inst{31-26} = 0x33; // VOP3P encoding + let Inst{25-24} = 0x1; // VINTERP sub-encoding + let Inst{23} = 0; // reserved + + let Inst{7-0} = vdst; + let Inst{10-8} = waitexp; + let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0) + let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1) + let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2) + let Inst{14} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel(3) + let Inst{15} = clamp; + let Inst{22-16} = op; + let Inst{40-32} = src0; + let Inst{49-41} = src1; + let Inst{58-50} = src2; + let Inst{61} = src0_modifiers{0}; // neg(0) + let Inst{62} = src1_modifiers{0}; // neg(1) + let Inst{63} = src2_modifiers{0}; // neg(2) +} + +//===----------------------------------------------------------------------===// +// VOP3 VINTERP +//===----------------------------------------------------------------------===// + +class VINTERP_Pseudo pattern = []> : + VOP3_Pseudo { + let AsmMatchConverter = "cvtVINTERP"; + let mayRaiseFPException = 0; + + let VOP3_OPSEL = 1; + let VINTERP = 1; +} + +class VINTERP_Real : + VOP3_Real { + let VINTERP = 1; +} + +def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> { + let HasOpSel = 0; + let HasModifiers = 1; + + let Outs64 = (outs VGPR_32:$vdst); + let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, + Src1Mod:$src1_modifiers, VRegSrc_32:$src1, + Src2Mod:$src2_modifiers, VRegSrc_32:$src2, + clampmod:$clamp, + wait_exp:$waitexp); + + let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$waitexp"; +} + +class VOP3_VINTERP_F16 ArgVT> : VOPProfile { + let HasOpSel = 1; + let HasModifiers = 1; + + let Outs64 = (outs VGPR_32:$vdst); + let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, + Src1Mod:$src1_modifiers, VRegSrc_32:$src1, + Src2Mod:$src2_modifiers, VRegSrc_32:$src2, + clampmod:$clamp, op_sel0:$op_sel, + wait_exp:$waitexp); + + let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp"; +} + +//===----------------------------------------------------------------------===// +// VINTERP Pseudo Instructions +//===----------------------------------------------------------------------===// + +let SubtargetPredicate = isGFX11Plus in { + +let Uses = [M0, EXEC, MODE] in { +def V_INTERP_P10_F32_inreg : VINTERP_Pseudo <"v_interp_p10_f32", VOP3_VINTERP_F32>; +def V_INTERP_P2_F32_inreg : VINTERP_Pseudo <"v_interp_p2_f32", VOP3_VINTERP_F32>; +def V_INTERP_P10_F16_F32_inreg : + VINTERP_Pseudo <"v_interp_p10_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>; +def V_INTERP_P2_F16_F32_inreg : + VINTERP_Pseudo <"v_interp_p2_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>; +} // Uses = [M0, EXEC, MODE] + +let Uses = [M0, EXEC] in { +def V_INTERP_P10_RTZ_F16_F32_inreg : + VINTERP_Pseudo <"v_interp_p10_rtz_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>; +def V_INTERP_P2_RTZ_F16_F32_inreg : + VINTERP_Pseudo <"v_interp_p2_rtz_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>; +} // Uses = [M0, EXEC] + +} // SubtargetPredicate = isGFX11Plus + +def VINTERP_OPSEL { + int LOW = 0; + int HIGH = 0xa; +} +//===----------------------------------------------------------------------===// +// VINTERP Real Instructions +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11" in { + multiclass VINTERP_Real_gfx11 op> { + def _gfx11 : + VINTERP_Real(NAME), SIEncodingFamily.GFX11>, + VINTERPe_gfx11(NAME).Pfl>; + } +} + +defm V_INTERP_P10_F32_inreg : VINTERP_Real_gfx11<0x000>; +defm V_INTERP_P2_F32_inreg : VINTERP_Real_gfx11<0x001>; +defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_gfx11<0x002>; +defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_gfx11<0x003>; +defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_gfx11<0x004>; +defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_gfx11<0x005>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index afd012787e9ee8..7eb0a1120b80a6 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -693,7 +693,7 @@ def : IntClampPat; // GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { multiclass VOP3_Real_gfx10 op> { def _gfx10 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX10>, @@ -738,7 +738,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let AsmString = asmName # ps.AsmOperands; } } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>; diff --git a/llvm/test/MC/AMDGPU/gfx11_err.s b/llvm/test/MC/AMDGPU/gfx11_err.s index 180ea60da5438e..55edaa37d282a3 100644 --- a/llvm/test/MC/AMDGPU/gfx11_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_err.s @@ -26,3 +26,9 @@ lds_direct_load v15 wait_vdst:16 lds_direct_load v15 wait_vdst // GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_interp_p10_f32 v0, v1, v2, v3 wait_exp:8 +// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_interp_p2_f32 v0, -v1, v2, v3 wait_exp +// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/vinterp.s b/llvm/test/MC/AMDGPU/vinterp.s new file mode 100644 index 00000000000000..fa64243c156d43 --- /dev/null +++ b/llvm/test/MC/AMDGPU/vinterp.s @@ -0,0 +1,277 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefix=GFX11 %s + +v_interp_p10_f32 v0, v1, v2, v3 +// GFX11: v_interp_p10_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f32 v1, v10, v20, v30 +// GFX11: v_interp_p10_f32 v1, v10, v20, v30 ; encoding: [0x01,0x00,0x00,0xcd,0x0a,0x29,0x7a,0x04] + +v_interp_p10_f32 v2, v11, v21, v31 +// GFX11: v_interp_p10_f32 v2, v11, v21, v31 ; encoding: [0x02,0x00,0x00,0xcd,0x0b,0x2b,0x7e,0x04] + +v_interp_p10_f32 v3, v12, v22, v32 +// GFX11: v_interp_p10_f32 v3, v12, v22, v32 ; encoding: [0x03,0x00,0x00,0xcd,0x0c,0x2d,0x82,0x04] + +v_interp_p10_f32 v0, v1, v2, v3 clamp +// GFX11: v_interp_p10_f32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x00,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f32 v0, -v1, v2, v3 +// GFX11: v_interp_p10_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x24] + +v_interp_p10_f32 v0, v1, -v2, v3 +// GFX11: v_interp_p10_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x44] + +v_interp_p10_f32 v0, v1, v2, -v3 +// GFX11: v_interp_p10_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x84] + +v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0 +// GFX11: v_interp_p10_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1 +// GFX11: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1 ; encoding: [0x00,0x01,0x00,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7 +// GFX11: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x00,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7 +// GFX11: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7 ; encoding: [0x00,0x87,0x00,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f32 v0, v1, v2, v3 +// GFX11: v_interp_p2_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f32 v1, v10, v20, v30 +// GFX11: v_interp_p2_f32 v1, v10, v20, v30 ; encoding: [0x01,0x00,0x01,0xcd,0x0a,0x29,0x7a,0x04] + +v_interp_p2_f32 v2, v11, v21, v31 +// GFX11: v_interp_p2_f32 v2, v11, v21, v31 ; encoding: [0x02,0x00,0x01,0xcd,0x0b,0x2b,0x7e,0x04] + +v_interp_p2_f32 v3, v12, v22, v32 +// GFX11: v_interp_p2_f32 v3, v12, v22, v32 ; encoding: [0x03,0x00,0x01,0xcd,0x0c,0x2d,0x82,0x04] + +v_interp_p2_f32 v0, v1, v2, v3 clamp +// GFX11: v_interp_p2_f32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x01,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f32 v0, -v1, v2, v3 +// GFX11: v_interp_p2_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x24] + +v_interp_p2_f32 v0, v1, -v2, v3 +// GFX11: v_interp_p2_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x44] + +v_interp_p2_f32 v0, v1, v2, -v3 +// GFX11: v_interp_p2_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x84] + +v_interp_p2_f32 v0, v1, v2, v3 wait_exp:0 +// GFX11: v_interp_p2_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f32 v0, v1, v2, v3 wait_exp:1 +// GFX11: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:1 ; encoding: [0x00,0x01,0x01,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f32 v0, v1, v2, v3 wait_exp:7 +// GFX11: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x01,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:7 +// GFX11: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:7 ; encoding: [0x00,0x87,0x01,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, -v1, v2, v3 +// GFX11: v_interp_p10_f16_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x24] + +v_interp_p10_f16_f32 v0, v1, -v2, v3 +// GFX11: v_interp_p10_f16_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x44] + +v_interp_p10_f16_f32 v0, v1, v2, -v3 +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x84] + +v_interp_p10_f16_f32 v0, v1, v2, v3 clamp +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0 +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1 +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1 ; encoding: [0x00,0x01,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7 +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,0] +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x00,0x10,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x00,0x78,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0x4d,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x02,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p10_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x02,0xcd,0x01,0x05,0x0e,0xe4] + +v_interp_p2_f16_f32 v0, v1, v2, v3 +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, -v1, v2, v3 +// GFX11: v_interp_p2_f16_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x24] + +v_interp_p2_f16_f32 v0, v1, -v2, v3 +// GFX11: v_interp_p2_f16_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x44] + +v_interp_p2_f16_f32 v0, v1, v2, -v3 +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x84] + +v_interp_p2_f16_f32 v0, v1, v2, v3 clamp +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:0 +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:1 +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:1 ; encoding: [0x00,0x01,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:7 +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,0] +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x00,0x08,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x00,0x10,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x00,0x78,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0x4d,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p2_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0xe4] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3 +// GFX11: v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x24] + +v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3 +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x44] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3 +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x84] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0 +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1 +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1 ; encoding: [0x00,0x01,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7 +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,0] +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x00,0x08,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x00,0x10,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x00,0x78,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0x4d,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x04,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p10_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p10_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x04,0xcd,0x01,0x05,0x0e,0xe4] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3 +// GFX11: v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x24] + +v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3 +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x44] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3 +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x84] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:0 +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1 +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1 ; encoding: [0x00,0x01,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7 +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,0] +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x00,0x08,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x00,0x10,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x00,0x78,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0x4d,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0x04] + +v_interp_p2_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 +// GFX11: v_interp_p2_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0xe4] diff --git a/llvm/test/MC/Disassembler/AMDGPU/vinterp.txt b/llvm/test/MC/Disassembler/AMDGPU/vinterp.txt new file mode 100644 index 00000000000000..a1532bfe90d6d0 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/vinterp.txt @@ -0,0 +1,247 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble %s | FileCheck -strict-whitespace -check-prefix=GFX11 %s + +# GFX11: v_interp_p10_f32 v0, v1, v2, v3{{$}} +0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f32 v1, v10, v20, v30{{$}} +0x01,0x00,0x00,0xcd,0x0a,0x29,0x7a,0x04 + +# GFX11: v_interp_p10_f32 v2, v11, v21, v31{{$}} +0x02,0x00,0x00,0xcd,0x0b,0x2b,0x7e,0x04 + +# GFX11: v_interp_p10_f32 v3, v12, v22, v32{{$}} +0x03,0x00,0x00,0xcd,0x0c,0x2d,0x82,0x04 + +# GFX11: v_interp_p10_f32 v0, v1, v2, v3 clamp{{$}} +0x00,0x80,0x00,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f32 v0, -v1, v2, v3{{$}} +0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x24 + +# GFX11: v_interp_p10_f32 v0, v1, -v2, v3{{$}} +0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x44 + +# GFX11: v_interp_p10_f32 v0, v1, v2, -v3{{$}} +0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x84 + +# GFX11: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1{{$}} +0x00,0x01,0x00,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7{{$}} +0x00,0x07,0x00,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7{{$}} +0x00,0x87,0x00,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f32 v0, v1, v2, v3{{$}} +0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f32 v1, v10, v20, v30{{$}} +0x01,0x00,0x01,0xcd,0x0a,0x29,0x7a,0x04 + +# GFX11: v_interp_p2_f32 v2, v11, v21, v31{{$}} +0x02,0x00,0x01,0xcd,0x0b,0x2b,0x7e,0x04 + +# GFX11: v_interp_p2_f32 v3, v12, v22, v32{{$}} +0x03,0x00,0x01,0xcd,0x0c,0x2d,0x82,0x04 + +# GFX11: v_interp_p2_f32 v0, v1, v2, v3 clamp{{$}} +0x00,0x80,0x01,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f32 v0, -v1, v2, v3{{$}} +0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x24 + +# GFX11: v_interp_p2_f32 v0, v1, -v2, v3{{$}} +0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x44 + +# GFX11: v_interp_p2_f32 v0, v1, v2, -v3{{$}} +0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x84 + +# GFX11: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:1{{$}} +0x00,0x01,0x01,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:7{{$}} +0x00,0x07,0x01,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f32 v0, v1, v2, v3 clamp wait_exp:7{{$}} +0x00,0x87,0x01,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3{{$}} +0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, -v1, v2, v3{{$}} +0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x24 + +# GFX11: v_interp_p10_f16_f32 v0, v1, -v2, v3{{$}} +0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x44 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, -v3{{$}} +0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x84 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp{{$}} +0x00,0x80,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} +0x00,0x01,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} +0x00,0x07,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0]{{$}} +0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0]{{$}} +0x00,0x10,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0]{{$}} +0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1]{{$}} +0x00,0x40,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1]{{$}} +0x00,0x78,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0x4d,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0xcd,0x02,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0xcd,0x02,0xcd,0x01,0x05,0x0e,0xe4 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3{{$}} +0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, -v1, v2, v3{{$}} +0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x24 + +# GFX11: v_interp_p2_f16_f32 v0, v1, -v2, v3{{$}} +0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x44 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, -v3{{$}} +0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x84 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp{{$}} +0x00,0x80,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} +0x00,0x01,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} +0x00,0x07,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0]{{$}} +0x00,0x08,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0]{{$}} +0x00,0x10,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0]{{$}} +0x00,0x20,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1]{{$}} +0x00,0x40,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1]{{$}} +0x00,0x78,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0x4d,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0xe4 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3{{$}} +0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3{{$}} +0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x24 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3{{$}} +0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x44 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3{{$}} +0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x84 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp{{$}} +0x00,0x80,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} +0x00,0x01,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} +0x00,0x07,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0]{{$}} +0x00,0x08,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0]{{$}} +0x00,0x10,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0]{{$}} +0x00,0x20,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1]{{$}} +0x00,0x40,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1]{{$}} +0x00,0x78,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0x4d,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0xcd,0x04,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p10_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0xcd,0x04,0xcd,0x01,0x05,0x0e,0xe4 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3{{$}} +0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3{{$}} +0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x24 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3{{$}} +0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x44 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3{{$}} +0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x84 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp{{$}} +0x00,0x80,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:1{{$}} +0x00,0x01,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:7{{$}} +0x00,0x07,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,0]{{$}} +0x00,0x08,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,1,0,0]{{$}} +0x00,0x10,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,1,0]{{$}} +0x00,0x20,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1]{{$}} +0x00,0x40,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,1,1,1]{{$}} +0x00,0x78,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0x4d,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0x04 + +# GFX11: v_interp_p2_rtz_f16_f32 v0, -v1, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5{{$}} +0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0xe4 From c088fbe7decce4409f81defcbb96841b0e78d861 Mon Sep 17 00:00:00 2001 From: Chia-hung Duan Date: Wed, 25 May 2022 19:21:48 +0000 Subject: [PATCH 509/908] [mlir][PDLL] Allow numeric result indexing for unregistered op If we don't specify the result index while matching operand with the result of certain operation, it's supposed to match all the results of the operation with the operand. For registered op, it's easy to do that by either indexing with number or name. For unregistered op, this commit enables the numeric result indexing for this use case. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D126330 --- mlir/docs/PDLL.md | 17 ++++++++++++++--- mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp | 10 +++++++++- mlir/lib/Tools/PDLL/Parser/Parser.cpp | 5 ++++- mlir/test/mlir-pdll/CodeGen/MLIR/expr.pdll | 13 +++++++++++++ mlir/test/mlir-pdll/Parser/expr.pdll | 20 ++++++++++++++++++++ 5 files changed, 60 insertions(+), 5 deletions(-) diff --git a/mlir/docs/PDLL.md b/mlir/docs/PDLL.md index 1727386b148878..11ab5831d2b6a7 100644 --- a/mlir/docs/PDLL.md +++ b/mlir/docs/PDLL.md @@ -700,9 +700,9 @@ let inputOp = op(resultOp.result, resultOp.0); ``` Along with result name access, variables of `Op` type may implicitly convert to -`Value` or `ValueRange`. These variables are converted to `Value` when they are -known (via ODS) to only have one result, in all other cases they convert to -`ValueRange`: +`Value` or `ValueRange`. If these variables are registered (has ODS entry), they +are converted to `Value` when they are known to only have one result, otherwise +they will be converted to `ValueRange`: ```pdll // `resultOp` may also convert implicitly to a Value for use in `inputOp`: @@ -713,6 +713,17 @@ let inputOp = op(resultOp); let inputOp = op(op); ``` +#### Unregistered Operations + +A variable of unregistered op is still available for numeric result indexing. +Given that we don't have knowledge of its result groups, numeric indexing +returns a Value corresponding to the individual result at the given index. + +```pdll +// Use the index `0` to refer to the first result value of the unregistered op. +let inputOp = op(op.0); +``` + ### Attribute Expression An attribute expression represents a literal MLIR attribute. It allows for diff --git a/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp b/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp index 334093e576d58b..5678761e718055 100644 --- a/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp +++ b/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp @@ -444,7 +444,15 @@ Value CodeGen::genExprImpl(const ast::MemberAccessExpr *expr) { assert(opType.getName() && "expected valid operation name"); const ods::Operation *odsOp = odsContext.lookupOperation(*opType.getName()); - assert(odsOp && "expected valid ODS operation information"); + + if (!odsOp) { + assert(llvm::isDigit(name[0]) && "unregistered op only allows numeric indexing"); + unsigned resultIndex; + name.getAsInteger(/*Radix=*/10, resultIndex); + IntegerAttr index = builder.getI32IntegerAttr(resultIndex); + return builder.create(loc, genType(expr->getType()), + parentExprs[0], index); + } // Find the result with the member name or by index. ArrayRef results = odsOp->getResults(); diff --git a/mlir/lib/Tools/PDLL/Parser/Parser.cpp b/mlir/lib/Tools/PDLL/Parser/Parser.cpp index 8efc3d0fce7b8a..1672c11ca72948 100644 --- a/mlir/lib/Tools/PDLL/Parser/Parser.cpp +++ b/mlir/lib/Tools/PDLL/Parser/Parser.cpp @@ -2683,8 +2683,11 @@ FailureOr Parser::validateMemberAccess(ast::Expr *parentExpr, }); if (it != results.end()) return it->isVariadic() ? valueRangeTy : valueTy; + } else if (llvm::isDigit(name[0])) { + // Allow unchecked numeric indexing of the results of unregistered + // operations. It returns a single value. + return valueTy; } - } else if (auto tupleType = parentType.dyn_cast()) { // Handle indexed results. unsigned index = 0; diff --git a/mlir/test/mlir-pdll/CodeGen/MLIR/expr.pdll b/mlir/test/mlir-pdll/CodeGen/MLIR/expr.pdll index 2d696029453706..671a3b4818c63c 100644 --- a/mlir/test/mlir-pdll/CodeGen/MLIR/expr.pdll +++ b/mlir/test/mlir-pdll/CodeGen/MLIR/expr.pdll @@ -55,6 +55,19 @@ Pattern OpAllResultMemberAccess { // ----- +// Handle result indexing on unregistered op. +// CHECK: pdl.pattern @UnregisteredOpResultIndexing +// CHECK: %[[BAR_OP:.*]] = operation "my_dialect.unregistered_bar" +// CHECK: %[[BAR_RES:.*]] = result 0 of %[[BAR_OP]] +// CHECK: operation "my_dialect.unregistered_foo"(%[[BAR_RES]] : !pdl.value) +Pattern UnregisteredOpResultIndexing { + let bar : Op; + let op = op(bar.0); + erase op; +} + +// ----- + // Handle implicit "named" operation results access. #include "include/ops.td" diff --git a/mlir/test/mlir-pdll/Parser/expr.pdll b/mlir/test/mlir-pdll/Parser/expr.pdll index c7d96035e2f7c1..5320efd2e690a4 100644 --- a/mlir/test/mlir-pdll/Parser/expr.pdll +++ b/mlir/test/mlir-pdll/Parser/expr.pdll @@ -90,6 +90,26 @@ Pattern { // ----- +// CHECK: Module +// CHECK: `-VariableDecl {{.*}} Name Type> +// CHECK: `-OperationExpr {{.*}} Type> +// CHECK: `-OpNameDecl {{.*}} Name +// CHECK: `Operands` +// CHECK: `-MemberAccessExpr {{.*}} Member<0> Type +// CHECK: `-OperationExpr {{.*}} Type> +// CHECK: `-OpNameDecl {{.*}} Name +// CHECK: `Operands` +// CHECK: `-DeclRefExpr {{.*}} Type +// CHECK: `-VariableDecl {{.*}} Name<_> Type +// CHECK: `Constraints` +// CHECK: `-ValueRangeConstraintDecl +Pattern { + let op = op(op.0); + erase op; +} + +// ----- + //===----------------------------------------------------------------------===// // OperationExpr //===----------------------------------------------------------------------===// From 79e09af1d6e11b05c6484868f15a9a2db298699c Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Tue, 24 May 2022 13:41:39 -0700 Subject: [PATCH 510/908] [clang] Fix the begin location of concepts specialization expression The concept specialization expression should start at the location of the nested qualifiers when it has nested qualifiers. This ensures that libclang reports correct source ranges that include all subexpressions when visiting the expression. Differential Revision: https://reviews.llvm.org/D126332 --- clang/include/clang/AST/ExprConcepts.h | 2 ++ clang/test/Index/index-concepts.cpp | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/AST/ExprConcepts.h b/clang/include/clang/AST/ExprConcepts.h index 6849b65b71c0ce..fd9cd31f3b90ba 100644 --- a/clang/include/clang/AST/ExprConcepts.h +++ b/clang/include/clang/AST/ExprConcepts.h @@ -122,6 +122,8 @@ class ConceptSpecializationExpr final : public Expr, public ConceptReference, } SourceLocation getBeginLoc() const LLVM_READONLY { + if (auto QualifierLoc = getNestedNameSpecifierLoc()) + return QualifierLoc.getBeginLoc(); return ConceptName.getBeginLoc(); } diff --git a/clang/test/Index/index-concepts.cpp b/clang/test/Index/index-concepts.cpp index ba4b87111ff487..b8d41e841e6195 100644 --- a/clang/test/Index/index-concepts.cpp +++ b/clang/test/Index/index-concepts.cpp @@ -64,8 +64,8 @@ concept ConTwoTemplateParams = ns::ConInNamespace && ConWithLogicalAnd; // CHECK: index-concepts.cpp:[[@LINE-1]]:9: ConceptDecl=ConTwoTemplateParams:[[@LINE-1]]:9 (Definition) Extent=[[[@LINE-2]]:1 - [[@LINE-1]]:79] // CHECK: index-concepts.cpp:[[@LINE-3]]:17: TemplateTypeParameter=T1:[[@LINE-3]]:17 (Definition) Extent=[[[@LINE-3]]:11 - [[@LINE-3]]:19] [access=public] // CHECK: index-concepts.cpp:[[@LINE-4]]:27: TemplateTypeParameter=T2:[[@LINE-4]]:27 (Definition) Extent=[[[@LINE-4]]:21 - [[@LINE-4]]:29] [access=public] -// CHECK: index-concepts.cpp:[[@LINE-4]]:36: BinaryOperator= Extent=[[[@LINE-4]]:36 - [[@LINE-4]]:79] -// CHECK: index-concepts.cpp:[[@LINE-5]]:36: ConceptSpecializationExpr= Extent=[[[@LINE-5]]:36 - [[@LINE-5]]:54] +// CHECK: index-concepts.cpp:[[@LINE-4]]:32: BinaryOperator= Extent=[[[@LINE-4]]:32 - [[@LINE-4]]:79] +// CHECK: index-concepts.cpp:[[@LINE-5]]:32: ConceptSpecializationExpr= Extent=[[[@LINE-5]]:32 - [[@LINE-5]]:54] // CHECK: index-concepts.cpp:[[@LINE-6]]:32: NamespaceRef=ns:55:11 Extent=[[[@LINE-6]]:32 - [[@LINE-6]]:34] // CHECK: index-concepts.cpp:[[@LINE-7]]:36: TemplateRef=ConInNamespace:58:9 Extent=[[[@LINE-7]]:36 - [[@LINE-7]]:50] // CHECK: index-concepts.cpp:[[@LINE-8]]:58: ConceptSpecializationExpr= Extent=[[[@LINE-8]]:58 - [[@LINE-8]]:79] @@ -107,7 +107,7 @@ requires ns::ConInNamespace && ConTwoTemplateParams {} // CHECK: index-concepts.cpp:[[@LINE-4]]:16: TemplateTypeParameter=T:[[@LINE-4]]:16 (Definition) Extent=[[[@LINE-4]]:10 - [[@LINE-4]]:17] [access=public] // CHECK: index-concepts.cpp:[[@LINE-4]]:36: ParmDecl=x:[[@LINE-4]]:36 (Definition) Extent=[[[@LINE-4]]:27 - [[@LINE-4]]:37] // CHECK: index-concepts.cpp:[[@LINE-5]]:33: TypeRef=T:[[@LINE-6]]:16 Extent=[[[@LINE-5]]:33 - [[@LINE-5]]:34] -// CHECK: index-concepts.cpp:[[@LINE-5]]:14: ConceptSpecializationExpr= Extent=[[[@LINE-5]]:14 - [[@LINE-5]]:31] +// CHECK: index-concepts.cpp:[[@LINE-5]]:10: ConceptSpecializationExpr= Extent=[[[@LINE-5]]:10 - [[@LINE-5]]:31] // CHECK: index-concepts.cpp:[[@LINE-6]]:10: NamespaceRef=ns:55:11 Extent=[[[@LINE-6]]:10 - [[@LINE-6]]:12] // CHECK: index-concepts.cpp:[[@LINE-7]]:14: TemplateRef=ConInNamespace:58:9 Extent=[[[@LINE-7]]:14 - [[@LINE-7]]:28] // CHECK: index-concepts.cpp:[[@LINE-8]]:29: TypeRef=T:[[@LINE-10]]:16 Extent=[[[@LINE-8]]:29 - [[@LINE-8]]:30] From 8e757c6b500dc18f0cc5a7c027682ac2aacf8eb0 Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Wed, 25 May 2022 19:37:15 +0100 Subject: [PATCH 511/908] Fix conversion error to Expected On Ubuntu 18.04 with GCC 7.5 Intel trace code fails to build due to failure to convert from lldb_private::process_linux::IntelPTPerThreadProcessTraceUP to Expected. This commit explicitely marks those unique_ptr values as being moved which fixes the conversion error. Reviewed By: wallace Differential Revision: https://reviews.llvm.org/D126402 --- lldb/source/Plugins/Process/Linux/IntelPTCollector.cpp | 2 +- lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/Process/Linux/IntelPTCollector.cpp b/lldb/source/Plugins/Process/Linux/IntelPTCollector.cpp index 9abfc190bf68d9..154e1d9b0ad149 100644 --- a/lldb/source/Plugins/Process/Linux/IntelPTCollector.cpp +++ b/lldb/source/Plugins/Process/Linux/IntelPTCollector.cpp @@ -78,7 +78,7 @@ IntelPTPerThreadProcessTrace::Start(const TraceIntelPTStartRequest &request, error = joinErrors(std::move(error), trace->TraceStart(tid)); if (error) return std::move(error); - return trace; + return std::move(trace); } Error IntelPTCollector::TraceStart(const TraceIntelPTStartRequest &request) { diff --git a/lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp b/lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp index 9b10919839b5f8..047ce471149fdd 100644 --- a/lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp +++ b/lldb/source/Plugins/Process/Linux/IntelPTSingleBufferTrace.cpp @@ -318,7 +318,7 @@ Expected IntelPTSingleBufferTrace::Start( } IntelPTSingleBufferTraceUP trace_up( new IntelPTSingleBufferTrace(std::move(*perf_event), initial_state)); - return trace_up; + return std::move(trace_up); } else { return perf_event.takeError(); } From 835e09c4c3eea2d7fadaf8a4e554e7912b9242a2 Mon Sep 17 00:00:00 2001 From: Joe Nash Date: Tue, 10 May 2022 14:46:57 -0400 Subject: [PATCH 512/908] [AMDGPU] gfx11 FLAT Instructions MachineCode Support for FLAT type instructions Contributors: Sebastian Neubauer Patch 12/N for upstreaming of AMDGPU gfx11 architecture. Depends on D125989 Reviewed By: rampitec, #amdgpu Differential Revision: https://reviews.llvm.org/D125992 --- llvm/lib/Target/AMDGPU/FLATInstructions.td | 245 ++- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 2 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +- llvm/test/MC/AMDGPU/flat-gfx11-mnemonic.s | 196 +++ llvm/test/MC/AMDGPU/gfx11-flat-errs.s | 852 +++++++++ llvm/test/MC/AMDGPU/gfx11_err.s | 3 + llvm/test/MC/AMDGPU/gfx11_flat.s | 1551 +++++++++++++++++ .../MC/Disassembler/AMDGPU/flat-gfx11.txt | 536 ++++++ .../MC/Disassembler/AMDGPU/gfx11_dasm_all.txt | 1 - 9 files changed, 3372 insertions(+), 16 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/flat-gfx11-mnemonic.s create mode 100644 llvm/test/MC/AMDGPU/gfx11-flat-errs.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_flat.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/flat-gfx11.txt diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 78f1cbf0488a11..cb2822818549ba 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -78,8 +78,8 @@ class FLAT_Pseudo op, FLAT_Pseudo ps> : - InstSI , +class FLAT_Real op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : + InstSI , Enc64 { let isPseudo = 0; @@ -687,18 +687,9 @@ defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", // GFX7-, GFX10-only flat instructions. let SubtargetPredicate = isGFX7GFX10 in { -defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", - VGPR_32, f32, v2f32, VReg_64>; - defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; -defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", - VGPR_32, f32>; - -defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", - VGPR_32, f32>; - defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", VReg_64, f64>; @@ -717,12 +708,30 @@ let SubtargetPredicate = isGFX90APlus in { } // End SubtargetPredicate = isGFX90APlus let SubtargetPredicate = isGFX940Plus in { - defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>; defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>; } // End SubtargetPredicate = isGFX940Plus +// GFX7-, GFX10-, GFX11-only flat instructions. +let SubtargetPredicate = isGFX7GFX10GFX11 in { + +defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", + VGPR_32, f32, v2f32, VReg_64>; + +defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", + VGPR_32, f32>; + +defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", + VGPR_32, f32>; + +} // End SubtargetPredicate = isGFX7GFX10GFX11 + +// GFX940-, GFX11-only flat instructions. +let SubtargetPredicate = isGFX940GFX11Plus in { + defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; +} // End SubtargetPredicate = isGFX940GFX11Plus + defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; @@ -1885,7 +1894,7 @@ let SubtargetPredicate = isGFX940Plus in { class FLAT_Real_gfx10 op, FLAT_Pseudo ps> : FLAT_Real, SIMCInstr { - let AssemblerPredicate = isGFX10Plus; + let AssemblerPredicate = isGFX10Only; let DecoderNamespace = "GFX10"; let Inst{11-0} = offset{11-0}; @@ -2113,3 +2122,213 @@ defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; + +//===----------------------------------------------------------------------===// +// GFX11 +//===----------------------------------------------------------------------===// + +class FLAT_Real_gfx11 op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : + FLAT_Real , + SIMCInstr { + let AssemblerPredicate = isGFX11Plus; + let DecoderNamespace = "GFX11"; + + let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); + let Inst{15} = cpol{CPolBit.SLC}; + let Inst{17-16} = seg; + let Inst{55} = ps.sve; +} + +multiclass FLAT_Real_Base_gfx11 op, string ps, string opName, int renamed = false> { + def _gfx11 : FLAT_Real_gfx11(ps), opName> { + let Inst{54-48} = !cast(SGPR_NULL_gfx11plus.HWEncoding); + } + if renamed then + def _renamed_gfx11 : MnemonicAlias(ps).Mnemonic, opName>, Requires<[isGFX11Plus]>; +} + +multiclass FLAT_Real_RTN_gfx11 op, string ps, string opName> { + def _RTN_gfx11 : FLAT_Real_gfx11(ps#"_RTN"), opName> { + let Inst{54-48} = !cast(SGPR_NULL_gfx11plus.HWEncoding); + } +} + +multiclass FLAT_Real_SADDR_gfx11 op, string ps, string opName> { + def _SADDR_gfx11 : FLAT_Real_gfx11(ps#"_SADDR"), opName>; +} + +multiclass FLAT_Real_SADDR_RTN_gfx11 op, string ps, string opName> { + def _SADDR_RTN_gfx11 : FLAT_Real_gfx11(ps#"_SADDR_RTN"), opName>; +} + +multiclass FLAT_Real_ST_gfx11 op, string ps, string opName> { + def _ST_gfx11 : FLAT_Real_gfx11(ps#"_ST"), opName> { + let Inst{54-48} = !cast(SGPR_NULL_gfx11plus.HWEncoding); + let OtherPredicates = [HasFlatScratchSTMode]; + } +} + +multiclass FLAT_Real_SVS_gfx11 op, string ps, string opName> { + def _SVS_gfx11 : FLAT_Real_gfx11(ps#"_SVS"), opName> { + let OtherPredicates = [HasFlatScratchSVSMode]; + } +} + +multiclass FLAT_Real_AllAddr_gfx11 op, string ps, string opName, int renamed = false> : + FLAT_Real_Base_gfx11, + FLAT_Real_SADDR_gfx11; + +multiclass FLAT_Real_Atomics_gfx11 op, string ps, string opName, int renamed = false> : + FLAT_Real_Base_gfx11, + FLAT_Real_RTN_gfx11; + +multiclass FLAT_Real_GlblAtomics_gfx11 op, string ps, string opName, int renamed = false> : + FLAT_Real_AllAddr_gfx11, + FLAT_Real_RTN_gfx11, + FLAT_Real_SADDR_RTN_gfx11; + +multiclass FLAT_Real_GlblAtomics_RTN_gfx11 op, string ps, string opName> : + FLAT_Real_RTN_gfx11, + FLAT_Real_SADDR_RTN_gfx11; + +multiclass FLAT_Real_ScratchAllAddr_gfx11 op, string ps, string opName, int renamed = false> : + FLAT_Real_Base_gfx11, + FLAT_Real_SADDR_gfx11, + FLAT_Real_ST_gfx11, + FLAT_Real_SVS_gfx11; + +// ENC_FLAT. +defm FLAT_LOAD_U8 : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; +defm FLAT_LOAD_I8 : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; +defm FLAT_LOAD_U16 : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; +defm FLAT_LOAD_I16 : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; +defm FLAT_LOAD_B32 : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; +defm FLAT_LOAD_B64 : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; +defm FLAT_LOAD_B96 : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; +defm FLAT_LOAD_B128 : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; +defm FLAT_STORE_B8 : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; +defm FLAT_STORE_B16 : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; +defm FLAT_STORE_B32 : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; +defm FLAT_STORE_B64 : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; +defm FLAT_STORE_B96 : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; +defm FLAT_STORE_B128 : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; +defm FLAT_LOAD_D16_U8 : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">; +defm FLAT_LOAD_D16_I8 : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">; +defm FLAT_LOAD_D16_B16 : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">; +defm FLAT_LOAD_D16_HI_U8 : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">; +defm FLAT_LOAD_D16_HI_I8 : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">; +defm FLAT_LOAD_D16_HI_B16 : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">; +defm FLAT_STORE_D16_HI_B8 : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">; +defm FLAT_STORE_D16_HI_B16 : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">; +defm FLAT_ATOMIC_SWAP_B32 : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; +defm FLAT_ATOMIC_CMPSWAP_B32 : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; +defm FLAT_ATOMIC_ADD_U32 : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; +defm FLAT_ATOMIC_SUB_U32 : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; +defm FLAT_ATOMIC_MIN_I32 : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; +defm FLAT_ATOMIC_MIN_U32 : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; +defm FLAT_ATOMIC_MAX_I32 : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; +defm FLAT_ATOMIC_MAX_U32 : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; +defm FLAT_ATOMIC_AND_B32 : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; +defm FLAT_ATOMIC_OR_B32 : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; +defm FLAT_ATOMIC_XOR_B32 : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; +defm FLAT_ATOMIC_INC_U32 : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; +defm FLAT_ATOMIC_DEC_U32 : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; +defm FLAT_ATOMIC_SWAP_B64 : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; +defm FLAT_ATOMIC_CMPSWAP_B64 : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; +defm FLAT_ATOMIC_ADD_U64 : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; +defm FLAT_ATOMIC_SUB_U64 : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; +defm FLAT_ATOMIC_MIN_I64 : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; +defm FLAT_ATOMIC_MIN_U64 : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; +defm FLAT_ATOMIC_MAX_I64 : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; +defm FLAT_ATOMIC_MAX_U64 : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; +defm FLAT_ATOMIC_AND_B64 : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; +defm FLAT_ATOMIC_OR_B64 : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; +defm FLAT_ATOMIC_XOR_B64 : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; +defm FLAT_ATOMIC_INC_U64 : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; +defm FLAT_ATOMIC_DEC_U64 : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; +defm FLAT_ATOMIC_CMPSWAP_F32 : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">; +defm FLAT_ATOMIC_MIN_F32 : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">; +defm FLAT_ATOMIC_MAX_F32 : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">; +defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">; + +// ENC_FLAT_GLBL. +defm GLOBAL_LOAD_U8 : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; +defm GLOBAL_LOAD_I8 : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; +defm GLOBAL_LOAD_U16 : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; +defm GLOBAL_LOAD_I16 : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; +defm GLOBAL_LOAD_B32 : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; +defm GLOBAL_LOAD_B64 : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; +defm GLOBAL_LOAD_B96 : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; +defm GLOBAL_LOAD_B128 : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; +defm GLOBAL_STORE_B8 : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; +defm GLOBAL_STORE_B16 : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; +defm GLOBAL_STORE_B32 : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; +defm GLOBAL_STORE_B64 : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; +defm GLOBAL_STORE_B96 : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; +defm GLOBAL_STORE_B128 : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; +defm GLOBAL_LOAD_D16_U8 : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">; +defm GLOBAL_LOAD_D16_I8 : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">; +defm GLOBAL_LOAD_D16_B16 : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">; +defm GLOBAL_LOAD_D16_HI_U8 : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">; +defm GLOBAL_LOAD_D16_HI_I8 : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">; +defm GLOBAL_LOAD_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">; +defm GLOBAL_STORE_D16_HI_B8 : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">; +defm GLOBAL_STORE_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">; +defm GLOBAL_LOAD_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">; +defm GLOBAL_STORE_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">; +defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; +defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; +defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; +defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; +defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32">; +defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; +defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; +defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; +defm GLOBAL_ATOMIC_MAX_U32 : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; +defm GLOBAL_ATOMIC_AND_B32 : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; +defm GLOBAL_ATOMIC_OR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; +defm GLOBAL_ATOMIC_XOR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; +defm GLOBAL_ATOMIC_INC_U32 : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; +defm GLOBAL_ATOMIC_DEC_U32 : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; +defm GLOBAL_ATOMIC_SWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; +defm GLOBAL_ATOMIC_CMPSWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; +defm GLOBAL_ATOMIC_ADD_U64 : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; +defm GLOBAL_ATOMIC_SUB_U64 : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; +defm GLOBAL_ATOMIC_MIN_I64 : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; +defm GLOBAL_ATOMIC_MIN_U64 : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; +defm GLOBAL_ATOMIC_MAX_I64 : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; +defm GLOBAL_ATOMIC_MAX_U64 : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; +defm GLOBAL_ATOMIC_AND_B64 : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; +defm GLOBAL_ATOMIC_OR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; +defm GLOBAL_ATOMIC_XOR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; +defm GLOBAL_ATOMIC_INC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; +defm GLOBAL_ATOMIC_DEC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; +defm GLOBAL_ATOMIC_CMPSWAP_F32 : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">; +defm GLOBAL_ATOMIC_MIN_F32 : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">; +defm GLOBAL_ATOMIC_MAX_F32 : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">; +defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; + +// ENC_FLAT_SCRATCH. +defm SCRATCH_LOAD_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; +defm SCRATCH_LOAD_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; +defm SCRATCH_LOAD_U16 : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; +defm SCRATCH_LOAD_I16 : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; +defm SCRATCH_LOAD_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; +defm SCRATCH_LOAD_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; +defm SCRATCH_LOAD_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; +defm SCRATCH_LOAD_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; +defm SCRATCH_STORE_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; +defm SCRATCH_STORE_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; +defm SCRATCH_STORE_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; +defm SCRATCH_STORE_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; +defm SCRATCH_STORE_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; +defm SCRATCH_STORE_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; +defm SCRATCH_LOAD_D16_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">; +defm SCRATCH_LOAD_D16_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">; +defm SCRATCH_LOAD_D16_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">; +defm SCRATCH_LOAD_D16_HI_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">; +defm SCRATCH_LOAD_D16_HI_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">; +defm SCRATCH_LOAD_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">; +defm SCRATCH_STORE_D16_HI_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">; +defm SCRATCH_STORE_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 5e6ddaee8daee0..03fa4f5026652e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -144,7 +144,7 @@ void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo, if (IsFlatSeg) { // Unsigned offset printU16ImmDecOperand(MI, OpNo, O); } else { // Signed offset - if (AMDGPU::isGFX10Plus(STI)) { + if (AMDGPU::isGFX10(STI)) { O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm())); } else { O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm())); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index bc1abd7a588450..c6ff426fca1de1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2198,7 +2198,7 @@ Optional getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, } unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) { - // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. + // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+. if (AMDGPU::isGFX10(ST)) return Signed ? 12 : 11; diff --git a/llvm/test/MC/AMDGPU/flat-gfx11-mnemonic.s b/llvm/test/MC/AMDGPU/flat-gfx11-mnemonic.s new file mode 100644 index 00000000000000..2c454e617e7b79 --- /dev/null +++ b/llvm/test/MC/AMDGPU/flat-gfx11-mnemonic.s @@ -0,0 +1,196 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefix=GFX11 %s + +// FLAT + +flat_load_ubyte v1, v[4:5] +// GFX11: flat_load_u8 v1, v[4:5] ; encoding: [0x00,0x00,0x40,0xdc,0x04,0x00,0x7c,0x01] + +flat_load_sbyte v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x44,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_ushort v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x48,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_sshort v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x4c,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_dword v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_dwordx2 v[1:2], v[3:4] +// GFX11: encoding: [0x00,0x00,0x54,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_dwordx3 v[1:3], v[5:6] +// GFX11: encoding: [0x00,0x00,0x58,0xdc,0x05,0x00,0x7c,0x01] + +flat_load_dwordx4 v[1:4], v[5:6] +// GFX11: encoding: [0x00,0x00,0x5c,0xdc,0x05,0x00,0x7c,0x01] + +flat_store_byte v[3:4], v1 +// GFX11: encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x7c,0x00] + +flat_store_short v[3:4], v1 +// GFX11: encoding: [0x00,0x00,0x64,0xdc,0x03,0x01,0x7c,0x00] + +flat_store_dword v[3:4], v1 offset:16 +// GFX11: encoding: [0x10,0x00,0x68,0xdc,0x03,0x01,0x7c,0x00] + +flat_store_dwordx2 v[1:2], v[3:4] +// GFX11: encoding: [0x00,0x00,0x6c,0xdc,0x01,0x03,0x7c,0x00] + +flat_store_dwordx3 v[1:2], v[3:5] +// GFX11: encoding: [0x00,0x00,0x70,0xdc,0x01,0x03,0x7c,0x00] + +flat_store_dwordx4 v[1:2], v[3:6] +// GFX11: flat_store_b128 v[1:2], v[3:6] ; encoding: [0x00,0x00,0x74,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_swap v0, v[1:2], v3 offset:2047 glc +// GFX11: encoding: [0xff,0x47,0xcc,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] offset:2047 glc +// GFX11: flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] offset:2047 glc ; encoding: [0xff,0x47,0x04,0xdd,0x03,0x05,0x7c,0x01] + +flat_atomic_add v[3:4], v5 slc +// GFX11: encoding: [0x00,0x80,0xd4,0xdc,0x03,0x05,0x7c,0x00] + +flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc +// GFX11: encoding: [0xff,0x47,0xd0,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] offset:2047 glc +// GFX11: encoding: [0xff,0x47,0x08,0xdd,0x03,0x05,0x7c,0x01] + +// GLOBAL No saddr + +global_load_ubyte v1, v[3:4], off +// GFX11: global_load_u8 v1, v[3:4], off ; encoding: [0x00,0x00,0x42,0xdc,0x03,0x00,0x7c,0x01] + +global_load_sbyte v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x46,0xdc,0x03,0x00,0x7c,0x01] + +global_load_ushort v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x4a,0xdc,0x03,0x00,0x7c,0x01] + +global_load_sshort v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x4e,0xdc,0x03,0x00,0x7c,0x01] + +global_load_dword v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x7c,0x01] + +global_load_dwordx2 v[1:2], v[3:4], off +// GFX11: encoding: [0x00,0x00,0x56,0xdc,0x03,0x00,0x7c,0x01] + +global_load_dwordx3 v[1:3], v[5:6], off +// GFX11: encoding: [0x00,0x00,0x5a,0xdc,0x05,0x00,0x7c,0x01] + +global_load_dwordx4 v[1:4], v[5:6], off +// GFX11: encoding: [0x00,0x00,0x5e,0xdc,0x05,0x00,0x7c,0x01] + +global_store_byte v[3:4], v1, off +// GFX11: encoding: [0x00,0x00,0x62,0xdc,0x03,0x01,0x7c,0x00] + +global_store_short v[3:4], v1, off +// GFX11: encoding: [0x00,0x00,0x66,0xdc,0x03,0x01,0x7c,0x00] + +global_store_dword v[3:4], v1, off offset:16 +// GFX11: encoding: [0x10,0x00,0x6a,0xdc,0x03,0x01,0x7c,0x00] + +global_store_dwordx2 v[1:2], v[3:4], off +// GFX11: encoding: [0x00,0x00,0x6e,0xdc,0x01,0x03,0x7c,0x00] + +global_store_dwordx3 v[1:2], v[3:5], off +// GFX11: encoding: [0x00,0x00,0x72,0xdc,0x01,0x03,0x7c,0x00] + +global_store_dwordx4 v[1:2], v[3:6], off +// GFX11: encoding: [0x00,0x00,0x76,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_swap v0, v[1:2], v3, off offset:2047 glc +// GFX11: encoding: [0xff,0x47,0xce,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_swap_x2 v[1:2], v[3:4], v[5:6], off offset:2047 glc +// GFX11: encoding: [0xff,0x47,0x06,0xdd,0x03,0x05,0x7c,0x01] + +// SCRATCH + +scratch_load_ubyte v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x41,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_sbyte v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x45,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_ushort v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x49,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_sshort v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x4d,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_dword v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x51,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_dwordx2 v[1:2], v2, s1 +// GFX11: encoding: [0x00,0x00,0x55,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_dwordx3 v[1:3], v2, s1 +// GFX11: encoding: [0x00,0x00,0x59,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_dwordx4 v[1:4], v2, s1 +// GFX11: encoding: [0x00,0x00,0x5d,0xdc,0x02,0x00,0x81,0x01] + +scratch_store_byte v1, v2, s3 +// GFX11: encoding: [0x00,0x00,0x61,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_short v1, v2, s3 +// GFX11: encoding: [0x00,0x00,0x65,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_dword v1, v2, s3 +// GFX11: encoding: [0x00,0x00,0x69,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_dwordx2 v1, v[2:3], s3 +// GFX11: encoding: [0x00,0x00,0x6d,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_dwordx3 v1, v[2:4], s3 +// GFX11: encoding: [0x00,0x00,0x71,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_dwordx4 v1, v[2:5], s3 +// GFX11: encoding: [0x00,0x00,0x75,0xdc,0x01,0x02,0x83,0x00] + +scratch_load_dword v1, v2, s1 offset:2047 +// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_dword v1, v2, off offset:2047 +// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x02,0x00,0xfc,0x01] + +scratch_load_dword v1, off, s1 offset:2047 +// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x00,0x00,0x01,0x01] + +scratch_load_dword v1, off, off offset:2047 +// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_dword v1, off, off +// GFX11: encoding: [0x00,0x00,0x51,0xdc,0x00,0x00,0x7c,0x01] + +scratch_store_dword v1, v2, s3 offset:2047 +// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_dword v1, v2, off offset:2047 +// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x01,0x02,0xfc,0x00] + +scratch_store_dword off, v2, s3 offset:2047 +// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x00,0x02,0x03,0x00] + +scratch_store_dword off, v2, off offset:2047 +// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x00,0x02,0x7c,0x00] + +scratch_load_dword v1, v2, s1 offset:4095 +// GFX11: encoding: [0xff,0x0f,0x51,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_dword v1, v2, s1 offset:-4096 +// GFX11: encoding: [0x00,0x10,0x51,0xdc,0x02,0x00,0x81,0x01] + +scratch_store_dword v1, v2, s1 offset:4095 +// GFX11: encoding: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x81,0x00] + +scratch_store_dword v1, v2, s1 offset:-4096 +// GFX11: encoding: [0x00,0x10,0x69,0xdc,0x01,0x02,0x81,0x00] + +scratch_store_dword off, v2, off +// GFX11: encoding: [0x00,0x00,0x69,0xdc,0x00,0x02,0x7c,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11-flat-errs.s b/llvm/test/MC/AMDGPU/gfx11-flat-errs.s new file mode 100644 index 00000000000000..789ecb2e9f7e0d --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11-flat-errs.s @@ -0,0 +1,852 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-GFX9_10-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=VI-GFX9_10-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 2>&1 %s | FileCheck --check-prefix=VI-GFX9_10-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 2>&1 %s | FileCheck --check-prefix=VI-GFX9_10-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 2>&1 %s | FileCheck --check-prefix=GFX11-ERR --implicit-check-not=error: %s + +// FLAT + +flat_load_u8 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_i8 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_u16 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_i16 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_d16_b16 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4] offset:-1 +// GFX11-ERR: error: expected a 12-bit unsigned offset +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4] offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4] offset:4096 +// GFX11-ERR: error: expected a 12-bit unsigned offset +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4] offset:4 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4] offset:4 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4] offset:4 glc slc dlc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b64 v[1:2], v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b96 v[1:3], v[5:6] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b128 v[1:4], v[5:6] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_d16_i8 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_d16_hi_i8 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b8 v[3:4], v1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b16 v[3:4], v1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b32 v[3:4], v1 offset:16 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b32 v[3:4], v1, off +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b32 v[3:4], v1, s[0:1] +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b32 v[3:4], v1, s0 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4], off +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4], s[0:1] +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4], s0 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_b32 v1, v[3:4], exec_hi +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b32 v[3:4], v1, exec_hi +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b64 v[1:2], v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b96 v[1:2], v[3:5] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_b128 v[1:2], v[3:6] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b32 v0, v[1:2], v3 offset:2047 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b32 v[1:2], v3 offset:2047 glc +// GFX11-ERR: error: instruction must not use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b32 v0, v[1:2], v3 offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b32 v0, v[1:2], v3 offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b32 v0, v[1:2], v3 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b32 v0, v[1:2], v3 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b32 v0, v[1:2], v3 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b32 v0, v[1:2], v3 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b32 v0, v[1:2], v3 offset:2047 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b64 v[1:2], v[3:4] offset:2047 glc +// GFX11-ERR: error: instruction must not use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b64 v[1:2], v[3:4] glc +// GFX11-ERR: error: instruction must not use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_add_u32 v[3:4], v5 slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_add_u32 v2, v[3:4], v5 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_add_u32 v1, v[3:4], v5 offset:8 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] offset:2047 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b32 v[1:2], v[3:4] offset:2047 glc +// GFX11-ERR: error: instruction must not use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] offset:2047 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b64 v[1:2], v[3:4] offset:2047 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b64 v[1:2], v[3:4] offset:2047 glc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b64 v[1:2], v[3:4] glc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_d16_u8 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_d16_hi_u8 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_d16_i8 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_d16_hi_i8 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_d16_b16 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_load_d16_hi_b16 v1, v[3:4] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_d16_hi_b8 v[3:4], v1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +flat_store_d16_hi_b16 v[3:4], v1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +// GLOBAL No saddr + +global_load_u8 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_i8 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_u16 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_i16 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_b16 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off offset:-1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off offset:2048 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off offset:4096 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: expected a 13-bit signed offset +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off offset:4 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off offset:4 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off offset:4 glc slc dlc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b64 v[1:2], v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b96 v[1:3], v[5:6], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b128 v[1:4], v[5:6], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_i8 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_hi_i8 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b8 v[3:4], v1, off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b16 v[3:4], v1, off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v[3:4], v1, off offset:16 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v[3:4], v1 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: too few operands for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v[3:4], v1, s[0:1] +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v[3:4], v1, s0 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], off, s[0:1] +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], s0 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v[3:4], exec_hi +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v[3:4], v1, exec_hi +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b64 v[1:2], v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b96 v[1:2], v[3:5], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b128 v[1:2], v[3:6], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, off offset:2047 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v[1:2], v3 offset:2047 glc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: not a valid operand +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v[1:2], v3 glc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, off offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, off offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, off glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, off glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, off +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, off slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, off offset:2047 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_add_u32 v2, v[3:4], off, v5 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_add_u32 v1, v[3:4], off, v5 offset:8 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off offset:2047 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v[1:2], off, v[3:4] +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off offset:2047 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v[3:4], off offset:2047 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_u8 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_hi_u8 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_i8 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_hi_i8 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_b16 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_hi_b16 v1, v[3:4], off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_d16_hi_b8 v[3:4], v1, off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_d16_hi_b16 v[3:4], v1, off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_addtid_b32 v1, off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +// GLOBAL With saddr + +global_load_u8 v1, v3, s2 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_u8 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_i8 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_u16 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_i16 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_b16 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] offset:-1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] offset:-4097 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: expected a 13-bit signed offset +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] offset:2048 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] offset:4096 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: expected a 13-bit signed offset +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] offset:4 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] offset:4 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] offset:4 glc slc dlc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b64 v[1:2], v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b96 v[1:3], v5, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b128 v[1:4], v5, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_i8 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_hi_i8 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b8 v3, v1, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b16 v3, v1, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v3, v1, s[2:3] offset:16 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v3, v1 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: too few operands for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v3, v1, s[0:1] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v3, v1, s0 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s[2:3], s[0:1] +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, s0 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_b32 v1, v3, exec_hi +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b32 v3, v1, exec_hi +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b64 v1, v[2:3], s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b96 v1, v[3:5], s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_b128 v1, v[3:6], s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, s[2:3] offset:2047 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v[1:2], v3 offset:2047 glc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: not a valid operand +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v[1:2], v3 glc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v1, v3, s[2:3] offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v1, v3, s[2:3] offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v1, v3, s[2:3] glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v1, v3, s[2:3] glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v1, v3, s[2:3] +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v1, v3, s[2:3] slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b32 v0, v[1:2], v3, s[2:3] offset:2047 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_add_u32 v2, v3, s[2:3], v5 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_add_u32 v1, v3, s[2:3], v5 offset:8 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v1, v3, s[2:3] offset:2047 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v1, s[2:3], v3 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v1, v3, s[2:3] slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b32 v0, v1, v3, s[2:3] offset:2047 slc +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v3, s[2:3] offset:2047 +// GFX11-ERR: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] offset:2047 glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] offset:2047 glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] glc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] glc slc +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_u8 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_hi_u8 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_i8 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_hi_i8 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_b16 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_d16_hi_b16 v1, v3, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_d16_hi_b8 v3, v1, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_store_d16_hi_b16 v3, v1, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +global_load_addtid_b32 v1, s[2:3] +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +// SCRATCH + +scratch_load_u8 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_i8 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_u16 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_i16 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b64 v[1:2], v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b96 v[1:3], v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b128 v[1:4], v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b8 v1, v2, s3 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b16 v1, v2, s3 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 v1, v2, s3 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b64 v1, v[2:3], s3 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b96 v1, v[2:4], s3 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b128 v1, v[2:5], s3 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_d16_u8 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_d16_hi_u8 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_d16_i8 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_d16_hi_i8 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_d16_b16 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_d16_hi_b16 v1, v2, s1 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_d16_hi_b8 v1, v2, s3 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_d16_hi_b16 v1, v2, s3 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, v2, s1 offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, v2, off offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, off, s1 offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, off, off offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, off, off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 v1, v2, s3 offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 v1, v2, off offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 off, v2, s3 offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 off, v2, off offset:2047 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, v2, s1 offset:4095 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, v2, s1 offset:-4096 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 v1, v2, s1 offset:4095 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 v1, v2, s1 offset:-4096 +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, v2, s1 offset:4096 +// GFX11-ERR: error: expected a 13-bit signed offset +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_load_b32 v1, v2, s1 offset:-4097 +// GFX11-ERR: error: expected a 13-bit signed offset +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 v1, v2, s1 offset:4096 +// GFX11-ERR: error: expected a 13-bit signed offset +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 v1, v2, s1 offset:-4097 +// GFX11-ERR: error: expected a 13-bit signed offset +// VI-GFX9_10-ERR: error: instruction not supported on this GPU + +scratch_store_b32 off, v2, off +// VI-GFX9_10-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx11_err.s b/llvm/test/MC/AMDGPU/gfx11_err.s index 55edaa37d282a3..aaeb0d0c5085b3 100644 --- a/llvm/test/MC/AMDGPU/gfx11_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_err.s @@ -32,3 +32,6 @@ v_interp_p10_f32 v0, v1, v2, v3 wait_exp:8 v_interp_p2_f32 v0, -v1, v2, v3 wait_exp // GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +global_atomic_cmpswap_x2 v[1:4], v3, v[5:8], off offset:2047 glc +// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx11_flat.s b/llvm/test/MC/AMDGPU/gfx11_flat.s new file mode 100644 index 00000000000000..03420912371367 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_flat.s @@ -0,0 +1,1551 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s + +flat_atomic_add_f32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0x58,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_add_f32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0x58,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_add_u32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xd4,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_add_u32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xd4,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_add_u32 v[3:4], v5 slc +// GFX11: encoding: [0x00,0x80,0xd4,0xdc,0x03,0x05,0x7c,0x00] + +flat_atomic_add_u64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x0c,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_add_u64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x0c,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_and_b32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xf0,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_and_b32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xf0,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_and_b64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x24,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_and_b64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x24,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] glc slc +// GFX11: encoding: [0x00,0xc0,0xd0,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] glc +// GFX11: encoding: [0x00,0x40,0xd0,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0xd0,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] offset:2047 glc +// GFX11: encoding: [0xff,0x47,0xd0,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_cmpswap_b32 v1, v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0xd0,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0xd0,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_cmpswap_b64 v[0:1], v[2:5] offset:64 +// GFX11: [0x40,0x00,0x08,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_cmpswap_b64 v[1:2], v[0:1], v[2:5] offset:64 glc +// GFX11: [0x40,0x40,0x08,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] glc slc +// GFX11: encoding: [0x00,0xc0,0x08,0xdd,0x03,0x05,0x7c,0x01] + +flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] glc +// GFX11: encoding: [0x00,0x40,0x08,0xdd,0x03,0x05,0x7c,0x01] + +flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0x08,0xdd,0x03,0x05,0x7c,0x01] + +flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] offset:2047 glc +// GFX11: encoding: [0xff,0x47,0x08,0xdd,0x03,0x05,0x7c,0x01] + +flat_atomic_cmpswap_f32 v1, v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x40,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_cmpswap_f32 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x40,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_dec_u32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0x00,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_dec_u32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0x00,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_dec_u64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x34,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_dec_u64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x34,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_inc_u32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xfc,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_inc_u32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xfc,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_inc_u64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x30,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_inc_u64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x30,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_max_f32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0x48,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_max_f32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0x48,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_max_i32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xe8,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_max_i32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xe8,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_max_i64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x1c,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_max_i64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x1c,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_max_u32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xec,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_max_u32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xec,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_max_u64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x20,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_max_u64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x20,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_min_f32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0x44,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_min_f32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0x44,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_min_i32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xe0,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_min_i32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xe0,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_min_i64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x14,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_min_i64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x14,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_min_u32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xe4,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_min_u32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xe4,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_min_u64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x18,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_min_u64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x18,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_or_b32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xf4,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_or_b32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xf4,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_or_b64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x28,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_or_b64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x28,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_sub_u32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xd8,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_sub_u32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xd8,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_sub_u64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x10,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_sub_u64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x10,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_swap_b32 v0, v[1:2], v3 glc slc +// GFX11: encoding: [0x00,0xc0,0xcc,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_swap_b32 v0, v[1:2], v3 glc +// GFX11: encoding: [0x00,0x40,0xcc,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_swap_b32 v0, v[1:2], v3 offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0xcc,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_swap_b32 v0, v[1:2], v3 offset:2047 glc +// GFX11: encoding: [0xff,0x47,0xcc,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_swap_b32 v0, v[1:2], v3 offset:2048 glc +// GFX11: encoding: [0x00,0x48,0xcc,0xdc,0x01,0x03,0x7c,0x00] + +flat_atomic_swap_b32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xcc,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_swap_b32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xcc,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_swap_b64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x04,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_swap_b64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x04,0xdd,0x00,0x02,0x7c,0x01] + +flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] glc slc +// GFX11: encoding: [0x00,0xc0,0x04,0xdd,0x03,0x05,0x7c,0x01] + +flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] glc +// GFX11: encoding: [0x00,0x40,0x04,0xdd,0x03,0x05,0x7c,0x01] + +flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0x04,0xdd,0x03,0x05,0x7c,0x01] + +flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] offset:2047 glc +// GFX11: encoding: [0xff,0x47,0x04,0xdd,0x03,0x05,0x7c,0x01] + +flat_atomic_xor_b32 v1, v[0:1], v2 offset:64 glc +// GFX11: [0x40,0x40,0xf8,0xdc,0x00,0x02,0x7c,0x01] + +flat_atomic_xor_b32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0xf8,0xdc,0x00,0x02,0x7c,0x00] + +flat_atomic_xor_b64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x2c,0xdd,0x00,0x02,0x7c,0x00] + +flat_atomic_xor_b64 v[1:2], v[0:1], v[2:3] offset:64 glc +// GFX11: [0x40,0x40,0x2c,0xdd,0x00,0x02,0x7c,0x01] + +flat_load_b128 v[1:4], v[0:1] offset:64 +// GFX11: [0x40,0x00,0x5c,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_b128 v[1:4], v[5:6] +// GFX11: encoding: [0x00,0x00,0x5c,0xdc,0x05,0x00,0x7c,0x01] + +flat_load_b32 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x50,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_b32 v1, v[3:4] offset:2047 +// GFX11: encoding: [0xff,0x07,0x50,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_b32 v1, v[3:4] offset:2048 +// GFX11: encoding: [0x00,0x08,0x50,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_b32 v1, v[3:4] offset:4 glc slc dlc +// GFX11: encoding: [0x04,0xe0,0x50,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_b32 v1, v[3:4] offset:4 glc slc +// GFX11: encoding: [0x04,0xc0,0x50,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_b32 v1, v[3:4] offset:4 glc +// GFX11: encoding: [0x04,0x40,0x50,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_b32 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_b64 v[1:2], v[0:1] offset:64 +// GFX11: [0x40,0x00,0x54,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_b64 v[1:2], v[3:4] +// GFX11: encoding: [0x00,0x00,0x54,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_b96 v[1:3], v[0:1] offset:64 +// GFX11: [0x40,0x00,0x58,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_b96 v[1:3], v[5:6] +// GFX11: encoding: [0x00,0x00,0x58,0xdc,0x05,0x00,0x7c,0x01] + +flat_load_d16_b16 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x80,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_d16_b16 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x80,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_d16_b16 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x80,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_d16_hi_b16 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x8c,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_d16_hi_b16 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x8c,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_d16_hi_i8 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x88,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_d16_hi_i8 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x88,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_d16_hi_i8 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x88,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_d16_hi_u8 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x84,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_d16_hi_u8 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x84,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_d16_i8 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x7c,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_d16_i8 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x7c,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_d16_i8 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x7c,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_d16_u8 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x78,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_d16_u8 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x78,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_i16 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x4c,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_i16 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x4c,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_i8 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x44,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_i8 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x44,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_u16 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x48,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_u16 v1, v[3:4] +// GFX11: encoding: [0x00,0x00,0x48,0xdc,0x03,0x00,0x7c,0x01] + +flat_load_u8 v1, v[0:1] offset:64 +// GFX11: [0x40,0x00,0x40,0xdc,0x00,0x00,0x7c,0x01] + +flat_load_u8 v1, v[3:4] +// GFX11-LABEL: flat_load_u8 v1, v[3:4] ; encoding: [0x00,0x00,0x40,0xdc,0x03,0x00,0x7c,0x01] + +flat_store_b128 v[0:1], v[2:5] offset:64 +// GFX11: [0x40,0x00,0x74,0xdc,0x00,0x02,0x7c,0x00] + +flat_store_b128 v[1:2], v[3:6] +// GFX11: encoding: [0x00,0x00,0x74,0xdc,0x01,0x03,0x7c,0x00] + +flat_store_b16 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0x64,0xdc,0x00,0x02,0x7c,0x00] + +flat_store_b16 v[3:4], v1 +// GFX11: encoding: [0x00,0x00,0x64,0xdc,0x03,0x01,0x7c,0x00] + +flat_store_b32 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0x68,0xdc,0x00,0x02,0x7c,0x00] + +flat_store_b32 v[3:4], v1 offset:16 +// GFX11: encoding: [0x10,0x00,0x68,0xdc,0x03,0x01,0x7c,0x00] + +flat_store_b64 v[0:1], v[2:3] offset:64 +// GFX11: [0x40,0x00,0x6c,0xdc,0x00,0x02,0x7c,0x00] + +flat_store_b64 v[1:2], v[3:4] +// GFX11: encoding: [0x00,0x00,0x6c,0xdc,0x01,0x03,0x7c,0x00] + +flat_store_b8 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0x60,0xdc,0x00,0x02,0x7c,0x00] + +flat_store_b8 v[3:4], v1 +// GFX11: encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x7c,0x00] + +flat_store_b96 v[0:1], v[2:4] offset:64 +// GFX11: [0x40,0x00,0x70,0xdc,0x00,0x02,0x7c,0x00] + +flat_store_b96 v[1:2], v[3:5] +// GFX11: encoding: [0x00,0x00,0x70,0xdc,0x01,0x03,0x7c,0x00] + +flat_store_d16_hi_b16 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0x94,0xdc,0x00,0x02,0x7c,0x00] + +flat_store_d16_hi_b16 v[3:4], v1 +// GFX11: encoding: [0x00,0x00,0x94,0xdc,0x03,0x01,0x7c,0x00] + +flat_store_d16_hi_b8 v[0:1], v2 offset:64 +// GFX11: [0x40,0x00,0x90,0xdc,0x00,0x02,0x7c,0x00] + +flat_store_d16_hi_b8 v[3:4], v1 +// GFX11: encoding: [0x00,0x00,0x90,0xdc,0x03,0x01,0x7c,0x00] + +global_atomic_add_f32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x5a,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_add_f32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x5a,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_add_f32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0x5a,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_add_f32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0x5a,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_add_u32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xd6,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_add_u32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xd6,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_add_u32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xd6,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_add_u32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xd6,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_add_u64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x0e,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_add_u64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x0e,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_add_u64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x0e,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_add_u64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x0e,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_and_b32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xf2,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_and_b32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xf2,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_and_b32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xf2,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_and_b32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xf2,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_and_b64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x26,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_and_b64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x26,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_and_b64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x26,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_and_b64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x26,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] glc slc +// GFX11: encoding: [0x00,0xc0,0xd2,0xdc,0x01,0x02,0x02,0x00] + +global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] glc +// GFX11: encoding: [0x00,0x40,0xd2,0xdc,0x01,0x02,0x02,0x00] + +global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0xd2,0xdc,0x01,0x02,0x02,0x00] + +global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] offset:2047 glc +// GFX11: encoding: [0xff,0x47,0xd2,0xdc,0x01,0x02,0x02,0x00] + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off glc slc +// GFX11: encoding: [0x00,0xc0,0xd2,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off glc +// GFX11: encoding: [0x00,0x40,0xd2,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0xd2,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off offset:2047 glc +// GFX11: encoding: [0xff,0x47,0xd2,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0xd2,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xd2,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_cmpswap_b32 v1, v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0xd2,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_cmpswap_b32 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0xd2,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_cmpswap_b64 v0, v[2:5], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x0a,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_cmpswap_b64 v[0:1], v[2:5], off offset:64 +// GFX11: [0x40,0x00,0x0a,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_cmpswap_b64 v[1:2], v0, v[2:5], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x0a,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] glc slc +// GFX11: encoding: [0x00,0xc0,0x0a,0xdd,0x03,0x05,0x02,0x01] + +global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] glc +// GFX11: encoding: [0x00,0x40,0x0a,0xdd,0x03,0x05,0x02,0x01] + +global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0x0a,0xdd,0x03,0x05,0x02,0x01] + +global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] offset:2047 glc +// GFX11: encoding: [0xff,0x47,0x0a,0xdd,0x03,0x05,0x02,0x01] + +global_atomic_cmpswap_b64 v[1:2], v[0:1], v[2:5], off offset:64 glc +// GFX11: [0x40,0x40,0x0a,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off glc slc +// GFX11: encoding: [0x00,0xc0,0x0a,0xdd,0x03,0x05,0x7c,0x01] + +global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off glc +// GFX11: encoding: [0x00,0x40,0x0a,0xdd,0x03,0x05,0x7c,0x01] + +global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0x0a,0xdd,0x03,0x05,0x7c,0x01] + +global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off offset:2047 glc +// GFX11: encoding: [0xff,0x47,0x0a,0xdd,0x03,0x05,0x7c,0x01] + +global_atomic_cmpswap_f32 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x42,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_cmpswap_f32 v1, v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x42,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_cmpswap_f32 v1, v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x42,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_cmpswap_f32 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x42,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_csub_u32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xde,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_csub_u32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xde,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_dec_u32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x02,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_dec_u32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x02,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_dec_u32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0x02,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_dec_u32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0x02,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_dec_u64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x36,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_dec_u64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x36,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_dec_u64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x36,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_dec_u64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x36,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_inc_u32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xfe,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_inc_u32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xfe,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_inc_u32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xfe,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_inc_u32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xfe,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_inc_u64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x32,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_inc_u64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x32,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_inc_u64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x32,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_inc_u64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x32,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_max_f32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x4a,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_max_f32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x4a,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_max_f32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0x4a,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_max_f32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0x4a,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_max_i32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xea,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_max_i32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xea,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_max_i32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xea,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_max_i32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xea,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_max_i64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x1e,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_max_i64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x1e,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_max_i64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x1e,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_max_i64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x1e,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_max_u32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xee,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_max_u32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xee,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_max_u32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xee,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_max_u32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xee,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_max_u64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x22,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_max_u64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x22,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_max_u64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x22,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_max_u64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x22,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_min_f32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x46,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_min_f32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x46,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_min_f32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0x46,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_min_f32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0x46,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_min_i32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xe2,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_min_i32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xe2,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_min_i32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xe2,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_min_i32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xe2,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_min_i64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x16,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_min_i64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x16,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_min_i64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x16,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_min_i64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x16,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_min_u32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xe6,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_min_u32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xe6,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_min_u32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xe6,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_min_u32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xe6,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_min_u64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x1a,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_min_u64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x1a,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_min_u64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x1a,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_min_u64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x1a,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_or_b32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xf6,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_or_b32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xf6,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_or_b32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xf6,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_or_b32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xf6,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_or_b64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x2a,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_or_b64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x2a,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_or_b64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x2a,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_or_b64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x2a,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_sub_u32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xda,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_sub_u32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xda,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_sub_u32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xda,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_sub_u32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xda,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_sub_u64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x12,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_sub_u64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x12,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_sub_u64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x12,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_sub_u64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x12,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_swap_b32 v0, v1, v3, s[2:3] glc slc +// GFX11: encoding: [0x00,0xc0,0xce,0xdc,0x01,0x03,0x02,0x00] + +global_atomic_swap_b32 v0, v1, v3, s[2:3] glc +// GFX11: encoding: [0x00,0x40,0xce,0xdc,0x01,0x03,0x02,0x00] + +global_atomic_swap_b32 v0, v1, v3, s[2:3] offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0xce,0xdc,0x01,0x03,0x02,0x00] + +global_atomic_swap_b32 v0, v1, v3, s[2:3] offset:2047 glc +// GFX11: encoding: [0xff,0x47,0xce,0xdc,0x01,0x03,0x02,0x00] + +global_atomic_swap_b32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xce,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_swap_b32 v0, v[1:2], v3, off glc slc +// GFX11: encoding: [0x00,0xc0,0xce,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_swap_b32 v0, v[1:2], v3, off glc +// GFX11: encoding: [0x00,0x40,0xce,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_swap_b32 v0, v[1:2], v3, off offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0xce,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_swap_b32 v0, v[1:2], v3, off offset:2047 glc +// GFX11: encoding: [0xff,0x47,0xce,0xdc,0x01,0x03,0x7c,0x00] + +global_atomic_swap_b32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xce,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_swap_b32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xce,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_swap_b32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xce,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_swap_b64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x06,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_swap_b64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x06,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_swap_b64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x06,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] glc slc +// GFX11: encoding: [0x00,0xc0,0x06,0xdd,0x03,0x05,0x02,0x01] + +global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] glc +// GFX11: encoding: [0x00,0x40,0x06,0xdd,0x03,0x05,0x02,0x01] + +global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0x06,0xdd,0x03,0x05,0x02,0x01] + +global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] offset:2047 glc +// GFX11: encoding: [0xff,0x47,0x06,0xdd,0x03,0x05,0x02,0x01] + +global_atomic_swap_b64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x06,0xdd,0x00,0x02,0x7c,0x01] + +global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off glc slc +// GFX11: encoding: [0x00,0xc0,0x06,0xdd,0x03,0x05,0x7c,0x01] + +global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off glc +// GFX11: encoding: [0x00,0x40,0x06,0xdd,0x03,0x05,0x7c,0x01] + +global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off offset:2047 glc slc +// GFX11: encoding: [0xff,0xc7,0x06,0xdd,0x03,0x05,0x7c,0x01] + +global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off offset:2047 glc +// GFX11: encoding: [0xff,0x47,0x06,0xdd,0x03,0x05,0x7c,0x01] + +global_atomic_xor_b32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xfa,0xdc,0x00,0x02,0x00,0x00] + +global_atomic_xor_b32 v1, v0, v2, s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0xfa,0xdc,0x00,0x02,0x00,0x01] + +global_atomic_xor_b32 v1, v[0:1], v2, off offset:64 glc +// GFX11: [0x40,0x40,0xfa,0xdc,0x00,0x02,0x7c,0x01] + +global_atomic_xor_b32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0xfa,0xdc,0x00,0x02,0x7c,0x00] + +global_atomic_xor_b64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x2e,0xdd,0x00,0x02,0x00,0x00] + +global_atomic_xor_b64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x2e,0xdd,0x00,0x02,0x7c,0x00] + +global_atomic_xor_b64 v[1:2], v0, v[2:3], s[0:1] offset:64 glc +// GFX11: [0x40,0x40,0x2e,0xdd,0x00,0x02,0x00,0x01] + +global_atomic_xor_b64 v[1:2], v[0:1], v[2:3], off offset:64 glc +// GFX11: [0x40,0x40,0x2e,0xdd,0x00,0x02,0x7c,0x01] + +global_load_addtid_b32 v1, off offset:64 +// GFX11: [0x40,0x00,0xa2,0xdc,0x00,0x00,0x7c,0x01] + +global_load_addtid_b32 v1, off +// GFX11: encoding: [0x00,0x00,0xa2,0xdc,0x00,0x00,0x7c,0x01] + +global_load_addtid_b32 v1, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xa2,0xdc,0x00,0x00,0x00,0x01] + +global_load_addtid_b32 v1, s[2:3] +// GFX11: encoding: [0x00,0x00,0xa2,0xdc,0x00,0x00,0x02,0x01] + +global_load_b128 v[1:4], v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x5e,0xdc,0x00,0x00,0x00,0x01] + +global_load_b128 v[1:4], v5, s[2:3] +// GFX11: encoding: [0x00,0x00,0x5e,0xdc,0x05,0x00,0x02,0x01] + +global_load_b128 v[1:4], v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x5e,0xdc,0x00,0x00,0x7c,0x01] + +global_load_b128 v[1:4], v[5:6], off +// GFX11: encoding: [0x00,0x00,0x5e,0xdc,0x05,0x00,0x7c,0x01] + +global_load_b32 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x52,0xdc,0x00,0x00,0x00,0x01] + +global_load_b32 v1, v3, s[2:3] offset:2047 +// GFX11: encoding: [0xff,0x07,0x52,0xdc,0x03,0x00,0x02,0x01] + +global_load_b32 v1, v3, s[2:3] offset:4 glc slc dlc +// GFX11: encoding: [0x04,0xe0,0x52,0xdc,0x03,0x00,0x02,0x01] + +global_load_b32 v1, v3, s[2:3] offset:4 glc slc +// GFX11: encoding: [0x04,0xc0,0x52,0xdc,0x03,0x00,0x02,0x01] + +global_load_b32 v1, v3, s[2:3] offset:4 glc +// GFX11: encoding: [0x04,0x40,0x52,0xdc,0x03,0x00,0x02,0x01] + +global_load_b32 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x02,0x01] + +global_load_b32 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x52,0xdc,0x00,0x00,0x7c,0x01] + +global_load_b32 v1, v[3:4], off offset:2047 +// GFX11: encoding: [0xff,0x07,0x52,0xdc,0x03,0x00,0x7c,0x01] + +global_load_b32 v1, v[3:4], off offset:4 glc slc dlc +// GFX11: encoding: [0x04,0xe0,0x52,0xdc,0x03,0x00,0x7c,0x01] + +global_load_b32 v1, v[3:4], off offset:4 glc slc +// GFX11: encoding: [0x04,0xc0,0x52,0xdc,0x03,0x00,0x7c,0x01] + +global_load_b32 v1, v[3:4], off offset:4 glc +// GFX11: encoding: [0x04,0x40,0x52,0xdc,0x03,0x00,0x7c,0x01] + +global_load_b32 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x7c,0x01] + +global_load_b64 v[1:2], v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x56,0xdc,0x00,0x00,0x00,0x01] + +global_load_b64 v[1:2], v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x56,0xdc,0x03,0x00,0x02,0x01] + +global_load_b64 v[1:2], v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x56,0xdc,0x00,0x00,0x7c,0x01] + +global_load_b64 v[1:2], v[3:4], off +// GFX11: encoding: [0x00,0x00,0x56,0xdc,0x03,0x00,0x7c,0x01] + +global_load_b96 v[1:3], v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x5a,0xdc,0x00,0x00,0x00,0x01] + +global_load_b96 v[1:3], v5, s[2:3] +// GFX11: encoding: [0x00,0x00,0x5a,0xdc,0x05,0x00,0x02,0x01] + +global_load_b96 v[1:3], v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x5a,0xdc,0x00,0x00,0x7c,0x01] + +global_load_b96 v[1:3], v[5:6], off +// GFX11: encoding: [0x00,0x00,0x5a,0xdc,0x05,0x00,0x7c,0x01] + +global_load_d16_b16 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x82,0xdc,0x00,0x00,0x00,0x01] + +global_load_d16_b16 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x82,0xdc,0x03,0x00,0x02,0x01] + +global_load_d16_b16 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x82,0xdc,0x03,0x00,0x02,0x01] + +global_load_d16_b16 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x82,0xdc,0x00,0x00,0x7c,0x01] + +global_load_d16_b16 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x82,0xdc,0x03,0x00,0x7c,0x01] + +global_load_d16_b16 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x82,0xdc,0x03,0x00,0x7c,0x01] + +global_load_d16_hi_b16 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x8e,0xdc,0x00,0x00,0x00,0x01] + +global_load_d16_hi_b16 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x8e,0xdc,0x03,0x00,0x02,0x01] + +global_load_d16_hi_b16 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x8e,0xdc,0x00,0x00,0x7c,0x01] + +global_load_d16_hi_b16 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x8e,0xdc,0x03,0x00,0x7c,0x01] + +global_load_d16_hi_i8 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x8a,0xdc,0x00,0x00,0x00,0x01] + +global_load_d16_hi_i8 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x8a,0xdc,0x03,0x00,0x02,0x01] + +global_load_d16_hi_i8 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x8a,0xdc,0x03,0x00,0x02,0x01] + +global_load_d16_hi_i8 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x8a,0xdc,0x00,0x00,0x7c,0x01] + +global_load_d16_hi_i8 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x8a,0xdc,0x03,0x00,0x7c,0x01] + +global_load_d16_hi_i8 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x8a,0xdc,0x03,0x00,0x7c,0x01] + +global_load_d16_hi_u8 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x86,0xdc,0x00,0x00,0x00,0x01] + +global_load_d16_hi_u8 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x86,0xdc,0x03,0x00,0x02,0x01] + +global_load_d16_hi_u8 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x86,0xdc,0x00,0x00,0x7c,0x01] + +global_load_d16_hi_u8 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x86,0xdc,0x03,0x00,0x7c,0x01] + +global_load_d16_i8 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x7e,0xdc,0x00,0x00,0x00,0x01] + +global_load_d16_i8 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x7e,0xdc,0x03,0x00,0x02,0x01] + +global_load_d16_i8 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x7e,0xdc,0x03,0x00,0x02,0x01] + +global_load_d16_i8 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x7e,0xdc,0x00,0x00,0x7c,0x01] + +global_load_d16_i8 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x7e,0xdc,0x03,0x00,0x7c,0x01] + +global_load_d16_i8 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x7e,0xdc,0x03,0x00,0x7c,0x01] + +global_load_d16_u8 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x7a,0xdc,0x00,0x00,0x00,0x01] + +global_load_d16_u8 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x7a,0xdc,0x03,0x00,0x02,0x01] + +global_load_d16_u8 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x7a,0xdc,0x00,0x00,0x7c,0x01] + +global_load_d16_u8 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x7a,0xdc,0x03,0x00,0x7c,0x01] + +global_load_i16 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x4e,0xdc,0x00,0x00,0x00,0x01] + +global_load_i16 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x4e,0xdc,0x03,0x00,0x02,0x01] + +global_load_i16 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x4e,0xdc,0x00,0x00,0x7c,0x01] + +global_load_i16 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x4e,0xdc,0x03,0x00,0x7c,0x01] + +global_load_i8 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x46,0xdc,0x00,0x00,0x00,0x01] + +global_load_i8 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x46,0xdc,0x03,0x00,0x02,0x01] + +global_load_i8 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x46,0xdc,0x00,0x00,0x7c,0x01] + +global_load_i8 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x46,0xdc,0x03,0x00,0x7c,0x01] + +global_load_u16 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x4a,0xdc,0x00,0x00,0x00,0x01] + +global_load_u16 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x4a,0xdc,0x03,0x00,0x02,0x01] + +global_load_u16 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x4a,0xdc,0x00,0x00,0x7c,0x01] + +global_load_u16 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x4a,0xdc,0x03,0x00,0x7c,0x01] + +global_load_u8 v1, v0, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x42,0xdc,0x00,0x00,0x00,0x01] + +global_load_u8 v1, v3, s[2:3] +// GFX11: encoding: [0x00,0x00,0x42,0xdc,0x03,0x00,0x02,0x01] + +global_load_u8 v1, v[0:1], off offset:64 +// GFX11: [0x40,0x00,0x42,0xdc,0x00,0x00,0x7c,0x01] + +global_load_u8 v1, v[3:4], off +// GFX11: encoding: [0x00,0x00,0x42,0xdc,0x03,0x00,0x7c,0x01] + +global_store_addtid_b32 v2, off offset:64 +// GFX11: [0x40,0x00,0xa6,0xdc,0x00,0x02,0x7c,0x00] + +global_store_addtid_b32 v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0xa6,0xdc,0x00,0x02,0x00,0x00] + +global_store_b128 v0, v[2:5], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x76,0xdc,0x00,0x02,0x00,0x00] + +global_store_b128 v1, v[3:6], s[2:3] +// GFX11: encoding: [0x00,0x00,0x76,0xdc,0x01,0x03,0x02,0x00] + +global_store_b128 v[0:1], v[2:5], off offset:64 +// GFX11: [0x40,0x00,0x76,0xdc,0x00,0x02,0x7c,0x00] + +global_store_b128 v[1:2], v[3:6], off +// GFX11: encoding: [0x00,0x00,0x76,0xdc,0x01,0x03,0x7c,0x00] + +global_store_b16 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x66,0xdc,0x00,0x02,0x00,0x00] + +global_store_b16 v3, v1, s[2:3] +// GFX11: encoding: [0x00,0x00,0x66,0xdc,0x03,0x01,0x02,0x00] + +global_store_b16 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] + +global_store_b16 v[3:4], v1, off +// GFX11: encoding: [0x00,0x00,0x66,0xdc,0x03,0x01,0x7c,0x00] + +global_store_b32 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x6a,0xdc,0x00,0x02,0x00,0x00] + +global_store_b32 v3, v1, s[2:3] offset:16 +// GFX11: encoding: [0x10,0x00,0x6a,0xdc,0x03,0x01,0x02,0x00] + +global_store_b32 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0x6a,0xdc,0x00,0x02,0x7c,0x00] + +global_store_b32 v[3:4], v1, off offset:16 +// GFX11: encoding: [0x10,0x00,0x6a,0xdc,0x03,0x01,0x7c,0x00] + +global_store_b64 v0, v[2:3], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x6e,0xdc,0x00,0x02,0x00,0x00] + +global_store_b64 v1, v[2:3], s[2:3] +// GFX11: encoding: [0x00,0x00,0x6e,0xdc,0x01,0x02,0x02,0x00] + +global_store_b64 v[0:1], v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x6e,0xdc,0x00,0x02,0x7c,0x00] + +global_store_b64 v[1:2], v[3:4], off +// GFX11: encoding: [0x00,0x00,0x6e,0xdc,0x01,0x03,0x7c,0x00] + +global_store_b8 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x62,0xdc,0x00,0x02,0x00,0x00] + +global_store_b8 v3, v1, s[2:3] +// GFX11: encoding: [0x00,0x00,0x62,0xdc,0x03,0x01,0x02,0x00] + +global_store_b8 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0x62,0xdc,0x00,0x02,0x7c,0x00] + +global_store_b8 v[3:4], v1, off +// GFX11: encoding: [0x00,0x00,0x62,0xdc,0x03,0x01,0x7c,0x00] + +global_store_b96 v0, v[2:4], s[0:1] offset:64 +// GFX11: [0x40,0x00,0x72,0xdc,0x00,0x02,0x00,0x00] + +global_store_b96 v1, v[3:5], s[2:3] +// GFX11: encoding: [0x00,0x00,0x72,0xdc,0x01,0x03,0x02,0x00] + +global_store_b96 v[0:1], v[2:4], off offset:64 +// GFX11: [0x40,0x00,0x72,0xdc,0x00,0x02,0x7c,0x00] + +global_store_b96 v[1:2], v[3:5], off +// GFX11: encoding: [0x00,0x00,0x72,0xdc,0x01,0x03,0x7c,0x00] + +global_store_d16_hi_b16 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x96,0xdc,0x00,0x02,0x00,0x00] + +global_store_d16_hi_b16 v3, v1, s[2:3] +// GFX11: encoding: [0x00,0x00,0x96,0xdc,0x03,0x01,0x02,0x00] + +global_store_d16_hi_b16 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0x96,0xdc,0x00,0x02,0x7c,0x00] + +global_store_d16_hi_b16 v[3:4], v1, off +// GFX11: encoding: [0x00,0x00,0x96,0xdc,0x03,0x01,0x7c,0x00] + +global_store_d16_hi_b8 v0, v2, s[0:1] offset:64 +// GFX11: [0x40,0x00,0x92,0xdc,0x00,0x02,0x00,0x00] + +global_store_d16_hi_b8 v3, v1, s[2:3] +// GFX11: encoding: [0x00,0x00,0x92,0xdc,0x03,0x01,0x02,0x00] + +global_store_d16_hi_b8 v[0:1], v2, off offset:64 +// GFX11: [0x40,0x00,0x92,0xdc,0x00,0x02,0x7c,0x00] + +global_store_d16_hi_b8 v[3:4], v1, off +// GFX11: encoding: [0x00,0x00,0x92,0xdc,0x03,0x01,0x7c,0x00] + +scratch_load_b128 v[1:4], off, off offset:64 +// GFX11: [0x40,0x00,0x5d,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_b128 v[1:4], off, s0 offset:64 +// GFX11: [0x40,0x00,0x5d,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_b128 v[1:4], v0, off offset:64 +// GFX11: [0x40,0x00,0x5d,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_b128 v[1:4], v0, s0 offset:64 +// GFX11: [0x40,0x00,0x5d,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_b128 v[1:4], v2, s1 +// GFX11: encoding: [0x00,0x00,0x5d,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_b32 v1, off, off offset:2047 +// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_b32 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x51,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_b32 v1, off, off +// GFX11: encoding: [0x00,0x00,0x51,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_b32 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x51,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_b32 v1, off, s1 offset:2047 +// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x00,0x00,0x01,0x01] + +scratch_load_b32 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x51,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_b32 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x51,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_b32 v1, v2, off offset:2047 +// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x02,0x00,0xfc,0x01] + +scratch_load_b32 v1, v2, s1 offset:-4096 +// GFX11: encoding: [0x00,0x10,0x51,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_b32 v1, v2, s1 offset:2047 +// GFX11: encoding: [0xff,0x07,0x51,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_b32 v1, v2, s1 offset:4095 +// GFX11: encoding: [0xff,0x0f,0x51,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_b32 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x51,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_b64 v[1:2], off, off offset:64 +// GFX11: [0x40,0x00,0x55,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_b64 v[1:2], off, s0 offset:64 +// GFX11: [0x40,0x00,0x55,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_b64 v[1:2], v0, off offset:64 +// GFX11: [0x40,0x00,0x55,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_b64 v[1:2], v0, s0 offset:64 +// GFX11: [0x40,0x00,0x55,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_b64 v[1:2], v2, s1 +// GFX11: encoding: [0x00,0x00,0x55,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_b96 v[1:3], off, off offset:64 +// GFX11: [0x40,0x00,0x59,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_b96 v[1:3], off, s0 offset:64 +// GFX11: [0x40,0x00,0x59,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_b96 v[1:3], v0, off offset:64 +// GFX11: [0x40,0x00,0x59,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_b96 v[1:3], v0, s0 offset:64 +// GFX11: [0x40,0x00,0x59,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_b96 v[1:3], v2, s1 +// GFX11: encoding: [0x00,0x00,0x59,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_d16_b16 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x81,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_d16_b16 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x81,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_d16_b16 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x81,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_d16_b16 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x81,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_d16_b16 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x81,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_d16_hi_b16 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x8d,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_d16_hi_b16 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x8d,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_d16_hi_b16 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x8d,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_d16_hi_b16 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x8d,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_d16_hi_b16 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x8d,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_d16_hi_i8 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x89,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_d16_hi_i8 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x89,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_d16_hi_i8 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x89,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_d16_hi_i8 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x89,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_d16_hi_i8 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x89,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_d16_hi_u8 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x85,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_d16_hi_u8 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x85,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_d16_hi_u8 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x85,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_d16_hi_u8 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x85,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_d16_hi_u8 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x85,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_d16_i8 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x7d,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_d16_i8 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x7d,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_d16_i8 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x7d,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_d16_i8 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x7d,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_d16_i8 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x7d,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_d16_u8 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x79,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_d16_u8 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x79,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_d16_u8 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x79,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_d16_u8 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x79,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_d16_u8 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x79,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_i16 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x4d,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_i16 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x4d,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_i16 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x4d,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_i16 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x4d,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_i16 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x4d,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_i8 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x45,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_i8 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x45,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_i8 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x45,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_i8 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x45,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_i8 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x45,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_u16 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x49,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_u16 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x49,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_u16 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x49,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_u16 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x49,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_u16 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x49,0xdc,0x02,0x00,0x81,0x01] + +scratch_load_u8 v1, off, off offset:64 +// GFX11: [0x40,0x00,0x41,0xdc,0x00,0x00,0x7c,0x01] + +scratch_load_u8 v1, off, s0 offset:64 +// GFX11: [0x40,0x00,0x41,0xdc,0x00,0x00,0x00,0x01] + +scratch_load_u8 v1, v0, off offset:64 +// GFX11: [0x40,0x00,0x41,0xdc,0x00,0x00,0xfc,0x01] + +scratch_load_u8 v1, v0, s0 offset:64 +// GFX11: [0x40,0x00,0x41,0xdc,0x00,0x00,0x80,0x01] + +scratch_load_u8 v1, v2, s1 +// GFX11: encoding: [0x00,0x00,0x41,0xdc,0x02,0x00,0x81,0x01] + +scratch_store_b128 off, v[2:5], off offset:64 +// GFX11: [0x40,0x00,0x75,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_b128 off, v[2:5], s0 offset:64 +// GFX11: [0x40,0x00,0x75,0xdc,0x00,0x02,0x00,0x00] + +scratch_store_b128 v0, v[2:5], off offset:64 +// GFX11: [0x40,0x00,0x75,0xdc,0x00,0x02,0xfc,0x00] + +scratch_store_b128 v0, v[2:5], s0 offset:64 +// GFX11: [0x40,0x00,0x75,0xdc,0x00,0x02,0x80,0x00] + +scratch_store_b128 v1, v[2:5], s3 +// GFX11: encoding: [0x00,0x00,0x75,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_b16 off, v2, off offset:64 +// GFX11: [0x40,0x00,0x65,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_b16 off, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x65,0xdc,0x00,0x02,0x00,0x00] + +scratch_store_b16 v0, v2, off offset:64 +// GFX11: [0x40,0x00,0x65,0xdc,0x00,0x02,0xfc,0x00] + +scratch_store_b16 v0, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x65,0xdc,0x00,0x02,0x80,0x00] + +scratch_store_b16 v1, v2, s3 +// GFX11: encoding: [0x00,0x00,0x65,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_b32 off, v2, off offset:2047 +// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_b32 off, v2, off offset:64 +// GFX11: [0x40,0x00,0x69,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_b32 off, v2, off +// GFX11: encoding: [0x00,0x00,0x69,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_b32 off, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x69,0xdc,0x00,0x02,0x00,0x00] + +scratch_store_b32 off, v2, s3 offset:2047 +// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x00,0x02,0x03,0x00] + +scratch_store_b32 v0, v2, off offset:64 +// GFX11: [0x40,0x00,0x69,0xdc,0x00,0x02,0xfc,0x00] + +scratch_store_b32 v0, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x69,0xdc,0x00,0x02,0x80,0x00] + +scratch_store_b32 v1, v2, off offset:2047 +// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x01,0x02,0xfc,0x00] + +scratch_store_b32 v1, v2, s1 offset:-4096 +// GFX11: encoding: [0x00,0x10,0x69,0xdc,0x01,0x02,0x81,0x00] + +scratch_store_b32 v1, v2, s1 offset:4095 +// GFX11: encoding: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x81,0x00] + +scratch_store_b32 v1, v2, s3 offset:2047 +// GFX11: encoding: [0xff,0x07,0x69,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_b32 v1, v2, s3 +// GFX11: encoding: [0x00,0x00,0x69,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_b64 off, v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x6d,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_b64 off, v[2:3], s0 offset:64 +// GFX11: [0x40,0x00,0x6d,0xdc,0x00,0x02,0x00,0x00] + +scratch_store_b64 v0, v[2:3], off offset:64 +// GFX11: [0x40,0x00,0x6d,0xdc,0x00,0x02,0xfc,0x00] + +scratch_store_b64 v0, v[2:3], s0 offset:64 +// GFX11: [0x40,0x00,0x6d,0xdc,0x00,0x02,0x80,0x00] + +scratch_store_b64 v1, v[2:3], s3 +// GFX11: encoding: [0x00,0x00,0x6d,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_b8 off, v2, off offset:64 +// GFX11: [0x40,0x00,0x61,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_b8 off, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x61,0xdc,0x00,0x02,0x00,0x00] + +scratch_store_b8 v0, v2, off offset:64 +// GFX11: [0x40,0x00,0x61,0xdc,0x00,0x02,0xfc,0x00] + +scratch_store_b8 v0, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x61,0xdc,0x00,0x02,0x80,0x00] + +scratch_store_b8 v1, v2, s3 +// GFX11: encoding: [0x00,0x00,0x61,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_b96 off, v[2:4], off offset:64 +// GFX11: [0x40,0x00,0x71,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_b96 off, v[2:4], s0 offset:64 +// GFX11: [0x40,0x00,0x71,0xdc,0x00,0x02,0x00,0x00] + +scratch_store_b96 v0, v[2:4], off offset:64 +// GFX11: [0x40,0x00,0x71,0xdc,0x00,0x02,0xfc,0x00] + +scratch_store_b96 v0, v[2:4], s0 offset:64 +// GFX11: [0x40,0x00,0x71,0xdc,0x00,0x02,0x80,0x00] + +scratch_store_b96 v1, v[2:4], s3 +// GFX11: encoding: [0x00,0x00,0x71,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_d16_hi_b16 off, v2, off offset:64 +// GFX11: [0x40,0x00,0x95,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_d16_hi_b16 off, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x95,0xdc,0x00,0x02,0x00,0x00] + +scratch_store_d16_hi_b16 v0, v2, off offset:64 +// GFX11: [0x40,0x00,0x95,0xdc,0x00,0x02,0xfc,0x00] + +scratch_store_d16_hi_b16 v0, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x95,0xdc,0x00,0x02,0x80,0x00] + +scratch_store_d16_hi_b16 v1, v2, s3 +// GFX11: encoding: [0x00,0x00,0x95,0xdc,0x01,0x02,0x83,0x00] + +scratch_store_d16_hi_b8 off, v2, off offset:64 +// GFX11: [0x40,0x00,0x91,0xdc,0x00,0x02,0x7c,0x00] + +scratch_store_d16_hi_b8 off, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x91,0xdc,0x00,0x02,0x00,0x00] + +scratch_store_d16_hi_b8 v0, v2, off offset:64 +// GFX11: [0x40,0x00,0x91,0xdc,0x00,0x02,0xfc,0x00] + +scratch_store_d16_hi_b8 v0, v2, s0 offset:64 +// GFX11: [0x40,0x00,0x91,0xdc,0x00,0x02,0x80,0x00] + +scratch_store_d16_hi_b8 v1, v2, s3 +// GFX11: encoding: [0x00,0x00,0x91,0xdc,0x01,0x02,0x83,0x00] + diff --git a/llvm/test/MC/Disassembler/AMDGPU/flat-gfx11.txt b/llvm/test/MC/Disassembler/AMDGPU/flat-gfx11.txt new file mode 100644 index 00000000000000..3c550d91b4d0f3 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/flat-gfx11.txt @@ -0,0 +1,536 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX11 + + +# FLAT + + +# GFX11: flat_load_u8 v1, v[3:4] +0x00,0x00,0x40,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_i8 v1, v[3:4] +0x00,0x00,0x44,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_u16 v1, v[3:4] +0x00,0x00,0x48,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_i16 v1, v[3:4] +0x00,0x00,0x4c,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_d16_b16 v1, v[3:4] +0x00,0x00,0x80,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_b32 v1, v[3:4] +0x00,0x00,0x50,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_b32 v1, v[3:4] offset:2047 +0xff,0x07,0x50,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_b32 v1, v[3:4] offset:4 glc +0x04,0x40,0x50,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_b32 v1, v[3:4] offset:4 glc slc +0x04,0xc0,0x50,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_b32 v1, v[3:4] offset:4 glc slc dlc +0x04,0xe0,0x50,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_b64 v[1:2], v[3:4] +0x00,0x00,0x54,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_b96 v[1:3], v[5:6] +0x00,0x00,0x58,0xdc,0x05,0x00,0x7c,0x01 + +# GFX11: flat_load_b128 v[1:4], v[5:6] +0x00,0x00,0x5c,0xdc,0x05,0x00,0x7c,0x01 + +# GFX11: flat_load_d16_i8 v1, v[3:4] +0x00,0x00,0x7c,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_d16_hi_i8 v1, v[3:4] +0x00,0x00,0x88,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_store_b8 v[3:4], v1 +0x00,0x00,0x60,0xdc,0x03,0x01,0x7c,0x00 + +# GFX11: flat_store_b16 v[3:4], v1 +0x00,0x00,0x64,0xdc,0x03,0x01,0x7c,0x00 + +# GFX11: flat_store_b32 v[3:4], v1 offset:16 +0x10,0x00,0x68,0xdc,0x03,0x01,0x7c,0x00 + +# GFX11: flat_store_b64 v[1:2], v[3:4] +0x00,0x00,0x6c,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_store_b96 v[1:2], v[3:5] +0x00,0x00,0x70,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_store_b128 v[1:2], v[3:6] +0x00,0x00,0x74,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_atomic_swap_b32 v0, v[1:2], v3 offset:2047 glc +0xff,0x47,0xcc,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_atomic_swap_b32 v0, v[1:2], v3 offset:2047 glc slc +0xff,0xc7,0xcc,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_atomic_swap_b32 v0, v[1:2], v3 glc +0x00,0x40,0xcc,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_atomic_swap_b32 v0, v[1:2], v3 glc slc +0x00,0xc0,0xcc,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] offset:2047 glc +0xff,0x47,0x04,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] offset:2047 glc slc +0xff,0xc7,0x04,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] glc +0x00,0x40,0x04,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: flat_atomic_swap_b64 v[1:2], v[3:4], v[5:6] glc slc +0x00,0xc0,0x04,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] offset:2047 glc +0xff,0x47,0xd0,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] offset:2047 glc slc +0xff,0xc7,0xd0,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] glc +0x00,0x40,0xd0,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_atomic_cmpswap_b32 v0, v[1:2], v[3:4] glc slc +0x00,0xc0,0xd0,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] offset:2047 glc +0xff,0x47,0x08,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] offset:2047 glc slc +0xff,0xc7,0x08,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] glc +0x00,0x40,0x08,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: flat_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8] glc slc +0x00,0xc0,0x08,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: flat_load_d16_u8 v1, v[3:4] +0x00,0x00,0x78,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_d16_hi_u8 v1, v[3:4] +0x00,0x00,0x84,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_d16_i8 v1, v[3:4] +0x00,0x00,0x7c,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_d16_hi_i8 v1, v[3:4] +0x00,0x00,0x88,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_d16_b16 v1, v[3:4] +0x00,0x00,0x80,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_load_d16_hi_b16 v1, v[3:4] +0x00,0x00,0x8c,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: flat_store_d16_hi_b8 v[3:4], v1 +0x00,0x00,0x90,0xdc,0x03,0x01,0x7c,0x00 + +# GFX11: flat_store_d16_hi_b16 v[3:4], v1 +0x00,0x00,0x94,0xdc,0x03,0x01,0x7c,0x00 + + +# GLOBAL No saddr + + +# GFX11: global_load_u8 v1, v[3:4], off +0x00,0x00,0x42,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_i8 v1, v[3:4], off +0x00,0x00,0x46,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_u16 v1, v[3:4], off +0x00,0x00,0x4a,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_i16 v1, v[3:4], off +0x00,0x00,0x4e,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_d16_b16 v1, v[3:4], off +0x00,0x00,0x82,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_b32 v1, v[3:4], off +0x00,0x00,0x52,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_b32 v1, v[3:4], off offset:2047 +0xff,0x07,0x52,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_b32 v1, v[3:4], off offset:4 glc +0x04,0x40,0x52,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_b32 v1, v[3:4], off offset:4 glc slc +0x04,0xc0,0x52,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_b32 v1, v[3:4], off offset:4 glc slc dlc +0x04,0xe0,0x52,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_b64 v[1:2], v[3:4], off +0x00,0x00,0x56,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_b96 v[1:3], v[5:6], off +0x00,0x00,0x5a,0xdc,0x05,0x00,0x7c,0x01 + +# GFX11: global_load_b128 v[1:4], v[5:6], off +0x00,0x00,0x5e,0xdc,0x05,0x00,0x7c,0x01 + +# GFX11: global_load_d16_i8 v1, v[3:4], off +0x00,0x00,0x7e,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_d16_hi_i8 v1, v[3:4], off +0x00,0x00,0x8a,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_store_b8 v[3:4], v1, off +0x00,0x00,0x62,0xdc,0x03,0x01,0x7c,0x00 + +# GFX11: global_store_b16 v[3:4], v1, off +0x00,0x00,0x66,0xdc,0x03,0x01,0x7c,0x00 + +# GFX11: global_store_b32 v[3:4], v1, off offset:16 +0x10,0x00,0x6a,0xdc,0x03,0x01,0x7c,0x00 + +# GFX11: global_store_b64 v[1:2], v[3:4], off +0x00,0x00,0x6e,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_store_b96 v[1:2], v[3:5], off +0x00,0x00,0x72,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_store_b128 v[1:2], v[3:6], off +0x00,0x00,0x76,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_atomic_swap_b32 v0, v[1:2], v3, off offset:2047 glc +0xff,0x47,0xce,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_atomic_swap_b32 v0, v[1:2], v3, off offset:2047 glc slc +0xff,0xc7,0xce,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_atomic_swap_b32 v0, v[1:2], v3, off glc +0x00,0x40,0xce,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_atomic_swap_b32 v0, v[1:2], v3, off glc slc +0x00,0xc0,0xce,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off offset:2047 glc +0xff,0x47,0x06,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off offset:2047 glc slc +0xff,0xc7,0x06,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off glc +0x00,0x40,0x06,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: global_atomic_swap_b64 v[1:2], v[3:4], v[5:6], off glc slc +0x00,0xc0,0x06,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off offset:2047 glc +0xff,0x47,0xd2,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off offset:2047 glc slc +0xff,0xc7,0xd2,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off glc +0x00,0x40,0xd2,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_atomic_cmpswap_b32 v0, v[1:2], v[3:4], off glc slc +0x00,0xc0,0xd2,0xdc,0x01,0x03,0x7c,0x00 + +# GFX11: global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off offset:2047 glc +0xff,0x47,0x0a,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off offset:2047 glc slc +0xff,0xc7,0x0a,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off glc +0x00,0x40,0x0a,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: global_atomic_cmpswap_b64 v[1:2], v[3:4], v[5:8], off glc slc +0x00,0xc0,0x0a,0xdd,0x03,0x05,0x7c,0x01 + +# GFX11: global_load_d16_u8 v1, v[3:4], off +0x00,0x00,0x7a,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_d16_hi_u8 v1, v[3:4], off +0x00,0x00,0x86,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_d16_i8 v1, v[3:4], off +0x00,0x00,0x7e,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_d16_hi_i8 v1, v[3:4], off +0x00,0x00,0x8a,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_d16_b16 v1, v[3:4], off +0x00,0x00,0x82,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_load_d16_hi_b16 v1, v[3:4], off +0x00,0x00,0x8e,0xdc,0x03,0x00,0x7c,0x01 + +# GFX11: global_store_d16_hi_b8 v[3:4], v1, off +0x00,0x00,0x92,0xdc,0x03,0x01,0x7c,0x00 + +# GFX11: global_store_d16_hi_b16 v[3:4], v1, off +0x00,0x00,0x96,0xdc,0x03,0x01,0x7c,0x00 + +# GFX11: global_load_addtid_b32 v1, off +0x00,0x00,0xa2,0xdc,0x00,0x00,0x7c,0x01 + +# GFX11: global_atomic_add_f32 v[1:2], v2, off offset:-4096 +0x00,0x10,0x5a,0xdd,0x01,0x02,0x7c,0x00 + +# GFX11: global_atomic_add_f32 v[1:2], v2, off offset:4095 +0xff,0x0f,0x5a,0xdd,0x01,0x02,0x7c,0x00 + + +# GLOBAL With saddr + + +# GFX11: global_load_u8 v1, v3, s[2:3] +0x00,0x00,0x42,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_i8 v1, v3, s[2:3] +0x00,0x00,0x46,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_u16 v1, v3, s[2:3] +0x00,0x00,0x4a,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_i16 v1, v3, s[2:3] +0x00,0x00,0x4e,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_d16_b16 v1, v3, s[2:3] +0x00,0x00,0x82,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_b32 v1, v3, s[2:3] +0x00,0x00,0x52,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_b32 v1, v3, s[2:3] offset:2047 +0xff,0x07,0x52,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_b32 v1, v3, s[2:3] offset:4 glc +0x04,0x40,0x52,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_b32 v1, v3, s[2:3] offset:4 glc slc +0x04,0xc0,0x52,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_b32 v1, v3, s[2:3] offset:4 glc slc dlc +0x04,0xe0,0x52,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_b64 v[1:2], v3, s[2:3] +0x00,0x00,0x56,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_b96 v[1:3], v5, s[2:3] +0x00,0x00,0x5a,0xdc,0x05,0x00,0x02,0x01 + +# GFX11: global_load_b128 v[1:4], v5, s[2:3] +0x00,0x00,0x5e,0xdc,0x05,0x00,0x02,0x01 + +# GFX11: global_load_d16_i8 v1, v3, s[2:3] +0x00,0x00,0x7e,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_d16_hi_i8 v1, v3, s[2:3] +0x00,0x00,0x8a,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_store_b8 v3, v1, s[2:3] +0x00,0x00,0x62,0xdc,0x03,0x01,0x02,0x00 + +# GFX11: global_store_b16 v3, v1, s[2:3] +0x00,0x00,0x66,0xdc,0x03,0x01,0x02,0x00 + +# GFX11: global_store_b32 v3, v1, s[2:3] offset:16 +0x10,0x00,0x6a,0xdc,0x03,0x01,0x02,0x00 + +# GFX11: global_store_b64 v1, v[2:3], s[2:3] +0x00,0x00,0x6e,0xdc,0x01,0x02,0x02,0x00 + +# GFX11: global_store_b96 v1, v[3:5], s[2:3] +0x00,0x00,0x72,0xdc,0x01,0x03,0x02,0x00 + +# GFX11: global_store_b128 v1, v[3:6], s[2:3] +0x00,0x00,0x76,0xdc,0x01,0x03,0x02,0x00 + +# GFX11: global_atomic_swap_b32 v0, v1, v3, s[2:3] offset:2047 glc +0xff,0x47,0xce,0xdc,0x01,0x03,0x02,0x00 + +# GFX11: global_atomic_swap_b32 v0, v1, v3, s[2:3] offset:2047 glc slc +0xff,0xc7,0xce,0xdc,0x01,0x03,0x02,0x00 + +# GFX11: global_atomic_swap_b32 v0, v1, v3, s[2:3] glc +0x00,0x40,0xce,0xdc,0x01,0x03,0x02,0x00 + +# GFX11: global_atomic_swap_b32 v0, v1, v3, s[2:3] glc slc +0x00,0xc0,0xce,0xdc,0x01,0x03,0x02,0x00 + +# GFX11: global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] offset:2047 glc +0xff,0x47,0x06,0xdd,0x03,0x05,0x02,0x01 + +# GFX11: global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] offset:2047 glc slc +0xff,0xc7,0x06,0xdd,0x03,0x05,0x02,0x01 + +# GFX11: global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] glc +0x00,0x40,0x06,0xdd,0x03,0x05,0x02,0x01 + +# GFX11: global_atomic_swap_b64 v[1:2], v3, v[5:6], s[2:3] glc slc +0x00,0xc0,0x06,0xdd,0x03,0x05,0x02,0x01 + +# GFX11: global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] offset:2047 glc +0xff,0x47,0xd2,0xdc,0x01,0x02,0x02,0x00 + +# GFX11: global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] offset:2047 glc slc +0xff,0xc7,0xd2,0xdc,0x01,0x02,0x02,0x00 + +# GFX11: global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] glc +0x00,0x40,0xd2,0xdc,0x01,0x02,0x02,0x00 + +# GFX11: global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] glc slc +0x00,0xc0,0xd2,0xdc,0x01,0x02,0x02,0x00 + +# GFX11: global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] offset:2047 glc +0xff,0x47,0x0a,0xdd,0x03,0x05,0x02,0x01 + +# GFX11: global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] offset:2047 glc slc +0xff,0xc7,0x0a,0xdd,0x03,0x05,0x02,0x01 + +# GFX11: global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] glc +0x00,0x40,0x0a,0xdd,0x03,0x05,0x02,0x01 + +# GFX11: global_atomic_cmpswap_b64 v[1:2], v3, v[5:8], s[2:3] glc slc +0x00,0xc0,0x0a,0xdd,0x03,0x05,0x02,0x01 + +# GFX11: global_load_d16_u8 v1, v3, s[2:3] +0x00,0x00,0x7a,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_d16_hi_u8 v1, v3, s[2:3] +0x00,0x00,0x86,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_d16_i8 v1, v3, s[2:3] +0x00,0x00,0x7e,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_d16_hi_i8 v1, v3, s[2:3] +0x00,0x00,0x8a,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_d16_b16 v1, v3, s[2:3] +0x00,0x00,0x82,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_load_d16_hi_b16 v1, v3, s[2:3] +0x00,0x00,0x8e,0xdc,0x03,0x00,0x02,0x01 + +# GFX11: global_store_d16_hi_b8 v3, v1, s[2:3] +0x00,0x00,0x92,0xdc,0x03,0x01,0x02,0x00 + +# GFX11: global_store_d16_hi_b16 v3, v1, s[2:3] +0x00,0x00,0x96,0xdc,0x03,0x01,0x02,0x00 + +# GFX11: global_load_addtid_b32 v1, s[2:3] +0x00,0x00,0xa2,0xdc,0x00,0x00,0x02,0x01 + + +# SCRATCH + + +# GFX11: scratch_load_u8 v1, v2, s1 +0x00,0x00,0x41,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_i8 v1, v2, s1 +0x00,0x00,0x45,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_u16 v1, v2, s1 +0x00,0x00,0x49,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_i16 v1, v2, s1 +0x00,0x00,0x4d,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_b32 v1, v2, s1 +0x00,0x00,0x51,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_b64 v[1:2], v2, s1 +0x00,0x00,0x55,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_b96 v[1:3], v2, s1 +0x00,0x00,0x59,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_b128 v[1:4], v2, s1 +0x00,0x00,0x5d,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_store_b8 v1, v2, s3 +0x00,0x00,0x61,0xdc,0x01,0x02,0x83,0x00 + +# GFX11: scratch_store_b16 v1, v2, s3 +0x00,0x00,0x65,0xdc,0x01,0x02,0x83,0x00 + +# GFX11: scratch_store_b32 v1, v2, s3 +0x00,0x00,0x69,0xdc,0x01,0x02,0x83,0x00 + +# GFX11: scratch_store_b64 v1, v[2:3], s3 +0x00,0x00,0x6d,0xdc,0x01,0x02,0x83,0x00 + +# GFX11: scratch_store_b96 v1, v[2:4], s3 +0x00,0x00,0x71,0xdc,0x01,0x02,0x83,0x00 + +# GFX11: scratch_store_b128 v1, v[2:5], s3 +0x00,0x00,0x75,0xdc,0x01,0x02,0x83,0x00 + +# GFX11: scratch_load_d16_u8 v1, v2, s1 +0x00,0x00,0x79,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_d16_hi_u8 v1, v2, s1 +0x00,0x00,0x85,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_d16_i8 v1, v2, s1 +0x00,0x00,0x7d,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_d16_hi_i8 v1, v2, s1 +0x00,0x00,0x89,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_d16_b16 v1, v2, s1 +0x00,0x00,0x81,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_d16_hi_b16 v1, v2, s1 +0x00,0x00,0x8d,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_store_d16_hi_b8 v1, v2, s3 +0x00,0x00,0x91,0xdc,0x01,0x02,0x83,0x00 + +# GFX11: scratch_store_d16_hi_b16 v1, v2, s3 +0x00,0x00,0x95,0xdc,0x01,0x02,0x83,0x00 + +# GFX11: scratch_load_b32 v1, v2, s1 offset:2047 +0xff,0x07,0x51,0xdc,0x02,0x00,0x81,0x01 + +# GFX11: scratch_load_b32 v1, v2, off offset:2047 +0xff,0x07,0x51,0xdc,0x02,0x00,0xfc,0x01 + +# GFX11: scratch_load_b32 v1, off, s1 offset:2047 +0xff,0x07,0x51,0xdc,0x00,0x00,0x01,0x01 + +# GFX11: scratch_load_b32 v1, off, off offset:2047 +0xff,0x07,0x51,0xdc,0x00,0x00,0x7c,0x01 + +# GFX11: scratch_load_b32 v1, off, off +0x00,0x00,0x51,0xdc,0x00,0x00,0x7c,0x01 + +# GFX11: scratch_store_b32 v1, v2, s3 offset:2047 +0xff,0x07,0x69,0xdc,0x01,0x02,0x83,0x00 + +# GFX11: scratch_store_b32 v1, v2, off offset:2047 +0xff,0x07,0x69,0xdc,0x01,0x02,0xfc,0x00 + +# GFX11: scratch_store_b32 off, v2, s3 offset:2047 +0xff,0x07,0x69,0xdc,0x00,0x02,0x03,0x00 + +# GFX11: scratch_store_b32 off, v2, off offset:2047 +0xff,0x07,0x69,0xdc,0x00,0x02,0x7c,0x00 + +# GFX11: scratch_store_b32 off, v2, off +0x00,0x00,0x69,0xdc,0x00,0x02,0x7c,0x00 + +# GFX11: scratch_load_b32 v5, off, off offset:-4096 +0x00,0x10,0x51,0xdc,0x00,0x00,0x7c,0x05 + +# GFX11: scratch_load_b32 v5, off, off offset:4095 +0xff,0x0f,0x51,0xdc,0x00,0x00,0x7c,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt index 4a4d98529767b0..2d9057ec018dc5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt @@ -1,7 +1,6 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s - # GFX11: s_absdiff_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x83] 0x01,0x02,0x7f,0x83 From 5c9f3ec4ad5d930f38c5682345a589ce57a5d602 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 17 May 2022 18:23:35 -0700 Subject: [PATCH 513/908] [LLDB][NativePDB] Check for missing type info to avoid crash. NativePDB often assumes that all debug info are available. This is one step to make it more pervasive. Differential Revision: https://reviews.llvm.org/D125844 --- .../SymbolFile/NativePDB/PdbAstBuilder.cpp | 45 +++++++- .../NativePDB/SymbolFileNativePDB.cpp | 2 + .../NativePDB/UdtRecordCompleter.cpp | 11 +- .../Shell/SymbolFile/NativePDB/missing-type.s | 108 ++++++++++++++++++ 4 files changed, 162 insertions(+), 4 deletions(-) create mode 100644 lldb/test/Shell/SymbolFile/NativePDB/missing-type.s diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index eaff882c5f1387..80d51ab656f4a6 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -323,6 +323,8 @@ PdbAstBuilder::CreateDeclInfoForType(const TagRecord &record, TypeIndex ti) { // LLDB TypeSP for the parent. This will cause the AST to automatically get // the right DeclContext created for any parent. clang::QualType parent_qt = GetOrCreateType(parent_iter->second); + if (parent_qt.isNull()) + return {nullptr, ""}; context = clang::TagDecl::castToDeclContext(parent_qt->getAsTagDecl()); return {context, uname}; @@ -532,6 +534,8 @@ llvm::Optional PdbAstBuilder::GetOrCreateDeclForUid(PdbSymUid uid) break; case PdbSymUidKind::Type: { clang::QualType qt = GetOrCreateType(uid.asTypeSym()); + if (qt.isNull()) + return llvm::None; if (auto *tag = qt->getAsTagDecl()) { result = tag; break; @@ -581,6 +585,8 @@ PdbAstBuilder::CreateDeclInfoForUndecoratedName(llvm::StringRef name) { std::vector types = m_index.tpi().findRecordsByName(scope_name); while (!types.empty()) { clang::QualType qt = GetOrCreateType(types.back()); + if (qt.isNull()) + continue; clang::TagDecl *tag = qt->getAsTagDecl(); if (tag) return {clang::TagDecl::castToDeclContext(tag), std::string(uname)}; @@ -622,6 +628,8 @@ PdbAstBuilder::GetParentDeclContextForSymbol(const CVSymbol &sym) { std::vector matches = m_index.tpi().findRecordsByName(qname); while (!matches.empty()) { clang::QualType qt = GetOrCreateType(matches.back()); + if (qt.isNull()) + continue; clang::TagDecl *tag = qt->getAsTagDecl(); if (tag) return clang::TagDecl::castToDeclContext(tag); @@ -700,6 +708,8 @@ clang::DeclContext *PdbAstBuilder::GetParentDeclContext(PdbSymUid uid) { } bool PdbAstBuilder::CompleteType(clang::QualType qt) { + if (qt.isNull()) + return false; clang::TagDecl *tag = qt->getAsTagDecl(); if (!tag) return false; @@ -767,6 +777,8 @@ clang::QualType PdbAstBuilder::CreateSimpleType(TypeIndex ti) { if (ti.getSimpleMode() != SimpleTypeMode::Direct) { clang::QualType direct_type = GetOrCreateType(ti.makeDirect()); + if (direct_type.isNull()) + return {}; return m_clang.getASTContext().getPointerType(direct_type); } @@ -791,7 +803,8 @@ clang::QualType PdbAstBuilder::CreatePointerType(const PointerRecord &pointer) { if (pointer.isPointerToMember()) { MemberPointerInfo mpi = pointer.getMemberInfo(); clang::QualType class_type = GetOrCreateType(mpi.ContainingType); - + if (class_type.isNull()) + return {}; return m_clang.getASTContext().getMemberPointerType( pointee_type, class_type.getTypePtr()); } @@ -835,6 +848,9 @@ clang::QualType PdbAstBuilder::CreateRecordType(PdbTypeSymId id, clang::DeclContext *context = nullptr; std::string uname; std::tie(context, uname) = CreateDeclInfoForType(record, id.index); + if (!context) + return {}; + clang::TagTypeKind ttk = TranslateUdtKind(record); lldb::AccessType access = (ttk == clang::TTK_Class) ? lldb::eAccessPrivate : lldb::eAccessPublic; @@ -899,6 +915,8 @@ clang::VarDecl *PdbAstBuilder::CreateVariableDecl(PdbSymUid uid, CVSymbol sym, clang::DeclContext &scope) { VariableInfo var_info = GetVariableNameInfo(sym); clang::QualType qt = GetOrCreateType(var_info.type); + if (qt.isNull()) + return nullptr; clang::VarDecl *var_decl = m_clang.CreateVariableDeclaration( &scope, OptionalClangModuleID(), var_info.name.str().c_str(), qt); @@ -947,6 +965,8 @@ PdbAstBuilder::GetOrCreateTypedefDecl(PdbGlobalSymId id) { PdbTypeSymId real_type_id{udt.Type, false}; clang::QualType qt = GetOrCreateType(real_type_id); + if (qt.isNull()) + return nullptr; std::string uname = std::string(DropNameScope(udt.Name)); @@ -1017,6 +1037,9 @@ clang::QualType PdbAstBuilder::CreateType(PdbTypeSymId type) { } clang::QualType PdbAstBuilder::GetOrCreateType(PdbTypeSymId type) { + if (type.index.isNoneType()) + return {}; + lldb::user_id_t uid = toOpaqueUid(type); auto iter = m_uid_to_type.find(uid); if (iter != m_uid_to_type.end()) @@ -1029,6 +1052,8 @@ clang::QualType PdbAstBuilder::GetOrCreateType(PdbTypeSymId type) { // This is a forward decl. Call GetOrCreate on the full decl, then map the // forward decl id to the full decl QualType. clang::QualType qt = GetOrCreateType(best_type); + if (qt.isNull()) + return {}; m_uid_to_type[toOpaqueUid(type)] = qt; return qt; } @@ -1036,6 +1061,9 @@ clang::QualType PdbAstBuilder::GetOrCreateType(PdbTypeSymId type) { // This is either a full decl, or a forward decl with no matching full decl // in the debug info. qt = CreateType(type); + if (qt.isNull()) + return {}; + m_uid_to_type[toOpaqueUid(type)] = qt; if (IsTagRecord(type, m_index.tpi())) { clang::TagDecl *tag = qt->getAsTagDecl(); @@ -1298,6 +1326,8 @@ void PdbAstBuilder::CreateFunctionParameters(PdbCompilandSymId func_id, PdbCompilandSymId param_uid(func_id.modi, record_offset); clang::QualType qt = GetOrCreateType(param_type); + if (qt.isNull()) + return; CompilerType param_type_ct = m_clang.GetType(qt); clang::ParmVarDecl *param = m_clang.CreateParameterDeclaration( @@ -1319,7 +1349,12 @@ clang::QualType PdbAstBuilder::CreateEnumType(PdbTypeSymId id, clang::DeclContext *decl_context = nullptr; std::string uname; std::tie(decl_context, uname) = CreateDeclInfoForType(er, id.index); + if (!decl_context) + return {}; + clang::QualType underlying_type = GetOrCreateType(er.UnderlyingType); + if (underlying_type.isNull()) + return {}; Declaration declaration; CompilerType enum_ct = m_clang.CreateEnumerationType( @@ -1336,7 +1371,7 @@ clang::QualType PdbAstBuilder::CreateArrayType(const ArrayRecord &ar) { clang::QualType element_type = GetOrCreateType(ar.ElementType); uint64_t element_size = GetSizeOfType({ar.ElementType}, m_index.tpi()); - if (element_size == 0) + if (element_type.isNull() || element_size == 0) return {}; uint64_t element_count = ar.Size / element_size; @@ -1364,10 +1399,14 @@ clang::QualType PdbAstBuilder::CreateFunctionType( for (TypeIndex arg_index : arg_indices) { clang::QualType arg_type = GetOrCreateType(arg_index); + if (arg_type.isNull()) + continue; arg_types.push_back(ToCompilerType(arg_type)); } clang::QualType return_type = GetOrCreateType(return_type_idx); + if (return_type.isNull()) + return {}; llvm::Optional cc = TranslateCallingConvention(calling_convention); @@ -1418,7 +1457,7 @@ void PdbAstBuilder::ParseAllNamespacesPlusChildrenOf( clang::DeclContext *context = nullptr; std::string uname; std::tie(context, uname) = CreateDeclInfoForType(tag.asTag(), tid.index); - if (!context->isNamespace()) + if (!context || !context->isNamespace()) continue; clang::NamespaceDecl *ns = llvm::cast(context); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index 22d8977e117b0c..18fd7efb93e82f 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -726,6 +726,8 @@ TypeSP SymbolFileNativePDB::CreateAndCacheType(PdbTypeSymId type_id) { PdbTypeSymId best_decl_id = full_decl_uid ? *full_decl_uid : type_id; clang::QualType qt = m_ast->GetOrCreateType(best_decl_id); + if (qt.isNull()) + return nullptr; TypeSP result = CreateType(best_decl_id, m_ast->ToCompilerType(qt)); if (!result) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp index 856ba1cb296d8e..dba304420c552e 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp @@ -67,7 +67,8 @@ clang::QualType UdtRecordCompleter::AddBaseClassForTypeIndex( m_ast_builder.clang().CreateBaseClassSpecifier( qt.getAsOpaquePtr(), TranslateMemberAccess(access), vtable_idx.hasValue(), udt_cvt.kind() == LF_CLASS); - lldbassert(base_spec); + if (!base_spec) + return {}; m_bases.push_back( std::make_pair(vtable_idx.getValueOr(0), std::move(base_spec))); @@ -80,6 +81,8 @@ void UdtRecordCompleter::AddMethod(llvm::StringRef name, TypeIndex type_idx, MemberAttributes attrs) { clang::QualType method_qt = m_ast_builder.GetOrCreateType(PdbTypeSymId(type_idx)); + if (method_qt.isNull()) + return; m_ast_builder.CompleteType(method_qt); CompilerType method_ct = m_ast_builder.ToCompilerType(method_qt); lldb::opaque_compiler_type_t derived_opaque_ty = m_derived_ct.GetOpaqueQualType(); @@ -106,6 +109,8 @@ Error UdtRecordCompleter::visitKnownMember(CVMemberRecord &cvr, clang::QualType base_qt = AddBaseClassForTypeIndex(base.Type, base.getAccess()); + if (base_qt.isNull()) + return llvm::Error::success(); auto decl = m_ast_builder.clang().GetAsCXXRecordDecl(base_qt.getAsOpaquePtr()); lldbassert(decl); @@ -137,6 +142,8 @@ Error UdtRecordCompleter::visitKnownMember( CVMemberRecord &cvr, StaticDataMemberRecord &static_data_member) { clang::QualType member_type = m_ast_builder.GetOrCreateType(PdbTypeSymId(static_data_member.Type)); + if (member_type.isNull()) + return llvm::Error::success(); CompilerType member_ct = m_ast_builder.ToCompilerType(member_type); @@ -235,6 +242,8 @@ Error UdtRecordCompleter::visitKnownMember(CVMemberRecord &cvr, } clang::QualType member_qt = m_ast_builder.GetOrCreateType(PdbTypeSymId(ti)); + if (member_qt.isNull()) + return Error::success(); m_ast_builder.CompleteType(member_qt); lldb::AccessType access = TranslateMemberAccess(data_member.getAccess()); diff --git a/lldb/test/Shell/SymbolFile/NativePDB/missing-type.s b/lldb/test/Shell/SymbolFile/NativePDB/missing-type.s new file mode 100644 index 00000000000000..32be7a1b96eef7 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/NativePDB/missing-type.s @@ -0,0 +1,108 @@ +# clang-format off +# REQUIRES: lld, x86 + +# Test when type index is missing in FieldList. +# RUN: llvm-mc -triple=x86_64-windows-msvc --filetype=obj %s > %t.obj +# RUN: lld-link /debug:full /nodefaultlib /entry:main %t.obj /out:%t.exe /base:0x140000000 +# RUN: lldb-test symbols --find=type --name=S %t.exe | FileCheck %s + +# CHECK: name = "S", size = 4, compiler_type = {{.*}} struct S { +# CHECK-NEXT: } + + + + .text + .def @feat.00; + .scl 3; + .type 0; + .endef + .globl @feat.00 +.set @feat.00, 0 + .intel_syntax noprefix + .file "a.cpp" + .def main; + .scl 2; + .type 32; + .endef + .globl main # -- Begin function main + .p2align 4, 0x90 +main: # @main +.seh_proc main +# %bb.0: # %entry + sub rsp, 24 + .seh_stackalloc 24 + .seh_endprologue + mov dword ptr [rsp + 20], 0 + mov qword ptr [rsp + 8], rdx + mov dword ptr [rsp + 4], ecx +.Ltmp0: + mov eax, dword ptr [rsp] + add rsp, 24 + ret +.Ltmp1: +.Lfunc_end0: + .seh_endproc + # -- End function + .section .drectve,"yn" +.Ltmp25: + .section .debug$T,"dr" + .p2align 2 + .long 4 # Debug section magic + # Pointer (0x1000) + .short 0xa # Record length + .short 0x1002 # Record kind: LF_POINTER + .long 0x670 # PointeeType: char* + .long 0x1000c # Attrs: [ Type: Near64, Mode: Pointer, SizeOf: 8 ] + # ArgList (0x1001) + .short 0xe # Record length + .short 0x1201 # Record kind: LF_ARGLIST + .long 0x2 # NumArgs + .long 0x74 # Argument: int + .long 0x1000 # Argument: char** + # Procedure (0x1002) + .short 0xe # Record length + .short 0x1008 # Record kind: LF_PROCEDURE + .long 0x74 # ReturnType: int + .byte 0x0 # CallingConvention: NearC + .byte 0x0 # FunctionOptions + .short 0x2 # NumParameters + .long 0x1001 # ArgListType: (int, char**) + # FuncId (0x1003) + .short 0x12 # Record length + .short 0x1601 # Record kind: LF_FUNC_ID + .long 0x0 # ParentScope + .long 0x1002 # FunctionType: int (int, char**) + .asciz "main" # Name + .byte 243 + .byte 242 + .byte 241 + # Struct (0x1004) + .short 0x1e # Record length + .short 0x1505 # Record kind: LF_STRUCTURE + .short 0x0 # MemberCount + .short 0x280 # Properties ( ForwardReference (0x80) | HasUniqueName (0x200) ) + .long 0x0 # FieldList + .long 0x0 # DerivedFrom + .long 0x0 # VShape + .short 0x0 # SizeOf + .asciz "S" # Name + .asciz ".?AUS@@" # LinkageName + # FieldList (0x1005) + .short 0xe # Record length + .short 0x1203 # Record kind: LF_FIELDLIST + .short 0x150d # Member kind: DataMember ( LF_MEMBER ) + .short 0x3 # Attrs: Public + .long 0 # Type. It's intentionally written as 0 for testing. + .short 0x0 # FieldOffset + .asciz "x" # Name + # Struct (0x1006) + .short 0x1e # Record length + .short 0x1505 # Record kind: LF_STRUCTURE + .short 0x1 # MemberCount + .short 0x200 # Properties ( HasUniqueName (0x200) ) + .long 0x1005 # FieldList: + .long 0x0 # DerivedFrom + .long 0x0 # VShape + .short 0x4 # SizeOf + .asciz "S" # Name + .asciz ".?AUS@@" # LinkageName From ab249fd87d30b152c4a2308c1fd7dff8b662adb9 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 25 May 2022 22:25:27 +0200 Subject: [PATCH 514/908] [mlir][bufferization][NFC] Remove dead code There were two copies of AlwaysCopyAnalysisState. (Must have been a merge conflict mistake...) Differential Revision: https://reviews.llvm.org/D126414 --- .../IR/BufferizableOpInterface.h | 25 ------------- .../IR/BufferizableOpInterface.cpp | 37 ------------------- .../Bufferization/Transforms/Bufferize.cpp | 20 +++++++++- 3 files changed, 19 insertions(+), 63 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index 8b492cfda643e6..9d3acb3ca0e428 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -474,31 +474,6 @@ class AnalysisState { const BufferizationOptions &options; }; -/// This a "no analysis, always copy" AnalysisState. In the absence of an -/// analysis, a buffer must be copied each time it is written to. Therefore, all -/// OpOperands that bufferize to a memory write must bufferize out-of-place. -class AlwaysCopyAnalysisState : public AnalysisState { -public: - explicit AlwaysCopyAnalysisState(const BufferizationOptions &options); - - AlwaysCopyAnalysisState(const AlwaysCopyAnalysisState &) = delete; - - virtual ~AlwaysCopyAnalysisState() = default; - - /// Return `true` if the given OpResult has been decided to bufferize inplace. - bool isInPlace(OpOperand &opOperand) const override; - - /// Return true if `v1` and `v2` bufferize to equivalent buffers. - bool areEquivalentBufferizedValues(Value v1, Value v2) const override; - - /// Return `true` if the given tensor has undefined contents. - bool hasUndefinedContents(OpOperand *opOperand) const override; - - /// Return true if the given tensor (or an aliasing tensor) is yielded from - /// the containing block. Also include all aliasing tensors in the same block. - bool isTensorYielded(Value tensor) const override; -}; - /// BufferizationState provides helper functions for performing bufferization /// rewrites and handling memref buffers. struct BufferizationState { diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 90bfb67a0b35bd..79b499545ca9f2 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -374,43 +374,6 @@ void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter, rewriter.replaceOp(op, replacements); } -AlwaysCopyAnalysisState::AlwaysCopyAnalysisState( - const BufferizationOptions &options) - : AnalysisState(options) { - // Note: Allocations must be deallocated with a subsequent run of the buffer - // deallocation pass. - assert(!options.createDeallocs && - "cannot create deallocs with AlwaysCopyBufferizationState"); -} - -/// Return `true` if the given OpResult has been decided to bufferize inplace. -bool AlwaysCopyAnalysisState::isInPlace(OpOperand &opOperand) const { - // OpOperands that bufferize to a memory write are out-of-place, i.e., an - // alloc and copy is inserted. - return !bufferizesToMemoryWrite(opOperand); -} - -/// Return true if `v1` and `v2` bufferize to equivalent buffers. -bool AlwaysCopyAnalysisState::areEquivalentBufferizedValues(Value v1, - Value v2) const { - // There is no analysis, so we do not know if the values are equivalent. The - // conservative answer is "false". - return false; -} - -/// Return `true` if the given tensor has undefined contents. -bool AlwaysCopyAnalysisState::hasUndefinedContents(OpOperand *opOperand) const { - // There is no analysis, so the conservative answer is "false". - return false; -} - -/// Return true if the given tensor (or an aliasing tensor) is yielded from -/// the containing block. Also include all aliasing tensors in the same block. -bool AlwaysCopyAnalysisState::isTensorYielded(Value tensor) const { - // There is no analysis, so conservatively answer "true". - return true; -} - //===----------------------------------------------------------------------===// // Bufferization-specific scoped alloc/dealloc insertion support. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 4aad432a6c528c..64152273a17c6d 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -478,7 +478,12 @@ namespace { class AlwaysCopyAnalysisState : public AnalysisState { public: AlwaysCopyAnalysisState(const BufferizationOptions &options) - : AnalysisState(options) {} + : AnalysisState(options) { + // Note: Allocations must be deallocated with a subsequent run of the buffer + // deallocation pass. + assert(!options.createDeallocs && + "cannot create deallocs with AlwaysCopyBufferizationState"); + } AlwaysCopyAnalysisState(const AlwaysCopyAnalysisState &) = delete; @@ -497,6 +502,19 @@ class AlwaysCopyAnalysisState : public AnalysisState { // conservative answer is "false". return false; } + + /// Return `true` if the given tensor has undefined contents. + bool hasUndefinedContents(OpOperand *opOperand) const override { + // There is no analysis, so the conservative answer is "false". + return false; + } + + /// Return true if the given tensor (or an aliasing tensor) is yielded from + /// the containing block. Also include all aliasing tensors in the same block. + bool isTensorYielded(Value tensor) const override { + // There is no analysis, so conservatively answer "true". + return true; + } }; } // namespace From b7c8c4d8cf07d2e9e8cd157bccc8bd9e7c76415a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 25 May 2022 13:22:26 -0400 Subject: [PATCH 515/908] [Clang] Introduce `--offload-link` option to perform offload device linking The new driver uses an augmented linker wrapper to perform the device linking phase, but to the user looks like a regular linker invocation. Contrary to the old driver, the new driver contains all the information necessary to produce a linked device image in the host object itself. Currently, we infer the usage of the device linker by the user specifying an offloading toolchain, e.g. (--offload-arch=...) or (-fopenmp-targets=...), but this shouldn't be strictly necessary. This patch introduces a new option `--offload-link` to tell the driver to use the offloading linker instead. So a compilation flow can now look like this, ``` clang foo.cu --offload-new-driver -fgpu-rdc --offload-arch=sm_70 -c clang foo.o --offload-link -lcudart ``` I was considering if this could be merged into the `-fuse-ld` option, but because the device linker wraps over the users linker it would conflict with that. In the future it's possible to merge this into `lld` completely or `gold` via a plugin and we would use this option to enable the device linking feature. Let me know what you think for this. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D126398 --- clang/docs/ClangCommandLineReference.rst | 4 ++++ clang/include/clang/Driver/Options.td | 2 ++ clang/lib/Driver/Driver.cpp | 3 ++- clang/test/Driver/cuda-openmp-driver.cu | 5 +++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 5926decc85fc12..73dccf6a9a04e0 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -4209,6 +4209,10 @@ Set starting address of TEXT to Pass the comma separated arguments in to the linker +.. option:: --offload-link + +Use the linker supporting offloading device linking. + .. option:: -X .. option:: -Xlinker , --for-linker , --for-linker= diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d34cec766a2e2b..e19baac5860aab 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -820,6 +820,8 @@ def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">, Group, Flags<[LinkerInput, RenderAsInput]>, HelpText<"Pass -z to the linker">, MetaVarName<"">, Group; +def offload_link : Flag<["--"], "offload-link">, Group, + HelpText<"Use the new offloading linker to perform the link job.">; def Xlinker : Separate<["-"], "Xlinker">, Flags<[LinkerInput, RenderAsInput]>, HelpText<"Pass to the linker">, MetaVarName<"">, Group; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 3edbaed9ae7f2f..94925568aec238 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4158,7 +4158,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Check if this Linker Job should emit a static library. if (ShouldEmitStaticLibrary(Args)) { LA = C.MakeAction(LinkerInputs, types::TY_Image); - } else if (UseNewOffloadingDriver) { + } else if (UseNewOffloadingDriver || + Args.hasArg(options::OPT_offload_link)) { LA = C.MakeAction(LinkerInputs, types::TY_Image); LA->propagateHostOffloadInfo(C.getActiveOffloadKinds(), /*BoundArch=*/nullptr); diff --git a/clang/test/Driver/cuda-openmp-driver.cu b/clang/test/Driver/cuda-openmp-driver.cu index dec0288db2630c..b27195da2fb154 100644 --- a/clang/test/Driver/cuda-openmp-driver.cu +++ b/clang/test/Driver/cuda-openmp-driver.cu @@ -35,3 +35,8 @@ // RUN: %clang -### -target x86_64-linux-gnu -nocudalib --cuda-feature=+ptx61 --offload-arch=sm_70 %s 2>&1 | FileCheck -check-prefix MANUAL-FEATURE %s // MANUAL-FEATURE: -cc1{{.*}}-target-feature{{.*}}+ptx61 + +// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings --offload-link %s 2>&1 \ +// RUN: | FileCheck -check-prefix DEVICE-LINK %s + +// DEVICE-LINK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[INPUT:.+]]"], output: "a.out" From 256a52d9aac8a9e98fbfd6a3d91090bf127cef7d Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Wed, 25 May 2022 13:30:49 -0700 Subject: [PATCH 516/908] Round up zero-sized symbols to 1 byte in `.debug_aranges`. This commit modifies the AsmPrinter to avoid emitting any zero-sized symbols to the .debug_aranges table, by rounding their size up to 1. Entries with zero length violate the DWARF 5 spec, which states: > Each descriptor is a triple consisting of a segment selector, the beginning > address within that segment of a range of text or data covered by some entry > owned by the corresponding compilation unit, followed by the non-zero length > of that range. In practice, these zero-sized entries produce annoying warnings in lld and cause GNU binutils to truncate the table when parsing it. Other parts of LLVM, such as DWARFDebugARanges in the DebugInfo module (specifically the appendRange method), already avoid emitting zero-sized symbols to .debug_aranges, but not comprehensively in the AsmPrinter. In fact, the AsmPrinter does try to avoid emitting such zero-sized symbols when labels aren't involved, but doesn't when the symbol to emitted is a difference of two labels; this patch extends that logic to handle the case in which the symbol is defined via labels. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D126257 --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 10 +++++--- .../Generic/dwarf-aranges-zero-size.ll | 23 +++++++++++++++++++ llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll | 2 +- llvm/test/DebugInfo/X86/dwarf-aranges.ll | 2 +- 4 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/Generic/dwarf-aranges-zero-size.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 37ae84ad9bf17c..50032c887d839a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3042,13 +3042,17 @@ void DwarfDebug::emitDebugARanges() { for (const ArangeSpan &Span : List) { Asm->emitLabelReference(Span.Start, PtrSize); - // Calculate the size as being from the span start to it's end. - if (Span.End) { + // Calculate the size as being from the span start to its end. + // + // If the size is zero, then round it up to one byte. The DWARF + // specification requires that entries in this table have nonzero + // lengths. + uint64_t Size = SymSize[Span.Start]; + if (Size != 0 && Span.End) { Asm->emitLabelDifference(Span.End, Span.Start, PtrSize); } else { // For symbols without an end marker (e.g. common), we // write a single arange entry containing just that one symbol. - uint64_t Size = SymSize[Span.Start]; if (Size == 0) Size = 1; diff --git a/llvm/test/CodeGen/Generic/dwarf-aranges-zero-size.ll b/llvm/test/CodeGen/Generic/dwarf-aranges-zero-size.ll new file mode 100644 index 00000000000000..9a8be3c8f91d20 --- /dev/null +++ b/llvm/test/CodeGen/Generic/dwarf-aranges-zero-size.ll @@ -0,0 +1,23 @@ +; Ensures that the AsmPrinter doesn't emit zero-sized symbols into `.debug_aranges`. +; +; RUN: llc --generate-arange-section < %s | FileCheck %s +; CHECK: .section .debug_aranges +; CHECK: .quad EXAMPLE +; CHECK-NEXT: .quad 1 +; CHECK: .section + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@EXAMPLE = constant <{ [0 x i8] }> zeroinitializer, align 1, !dbg !0 + +!llvm.module.flags = !{!3} +!llvm.dbg.cu = !{!4} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "EXAMPLE", linkageName: "EXAMPLE", scope: null, file: null, line: 161, type: !2, isLocal: false, isDefinition: true, align: 1) +!2 = !DIBasicType(name: "()", encoding: DW_ATE_unsigned) +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !5, producer: "rustc", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: null, globals: !6) +!5 = !DIFile(filename: "foo", directory: "") +!6 = !{!0} diff --git a/llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll b/llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll index 98922f1b7d088f..51d2c4a20589b9 100644 --- a/llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll +++ b/llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll @@ -98,7 +98,7 @@ ; CHECK: .debug_aranges contents: ; CHECK-NEXT: Address Range Header: length = 0x{{.*}}, format = DWARF32, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x02, seg_size = 0x00 -; CHECK-NEXT: [0x0000, 0x0006) +; CHECK-NEXT: [0x0000, 0x0001) ; CHECK: .debug_addr contents: ; CHECK-NEXT: Address table header: length = 0x{{.*}}, format = DWARF32, version = 0x0005, addr_size = 0x02, seg_size = 0x00 diff --git a/llvm/test/DebugInfo/X86/dwarf-aranges.ll b/llvm/test/DebugInfo/X86/dwarf-aranges.ll index 5358a30c6a06af..c43e9eea8b710f 100644 --- a/llvm/test/DebugInfo/X86/dwarf-aranges.ll +++ b/llvm/test/DebugInfo/X86/dwarf-aranges.ll @@ -22,7 +22,7 @@ ; - it should have made one span covering all functions in this CU. ; CHECK-NEXT: .quad .Lfunc_begin0 -; CHECK-NEXT: .quad .Lsec_end2-.Lfunc_begin0 +; CHECK-NEXT: .quad 1 ; -- finish -- ; CHECK-NEXT: # ARange terminator From e51a6b7374ca2d11e07375843bf24ef51307308a Mon Sep 17 00:00:00 2001 From: zr33 Date: Wed, 25 May 2022 13:40:11 -0700 Subject: [PATCH 517/908] [BOLT][DWARF] Convert dwarf5-df-* tests to assembly tests Reviewed By: ayermolo Differential Revision: https://reviews.llvm.org/D126086 --- .../X86/Inputs/dwarf4-df-dualcu-helper.ll | 81 --- .../test/X86/Inputs/dwarf4-df-dualcu-helper.s | 298 +++++++++ .../Inputs/dwarf4-df-dualcu-loclist-helper.ll | 61 -- .../Inputs/dwarf4-df-dualcu-loclist-helper.s | 322 ++++++++++ .../Inputs/dwarf4-df-dualcu-loclist-main.ll | 99 --- .../Inputs/dwarf4-df-dualcu-loclist-main.s | 546 +++++++++++++++++ bolt/test/X86/Inputs/dwarf4-df-dualcu-main.ll | 129 ---- bolt/test/X86/Inputs/dwarf4-df-dualcu-main.s | 420 +++++++++++++ .../X86/Inputs/dwarf5-df-dualcu-helper.ll | 81 --- .../test/X86/Inputs/dwarf5-df-dualcu-helper.s | 348 +++++++++++ .../Inputs/dwarf5-df-dualcu-loclist-helper.ll | 61 -- .../Inputs/dwarf5-df-dualcu-loclist-helper.s | 351 +++++++++++ .../Inputs/dwarf5-df-dualcu-loclist-main.ll | 99 --- .../Inputs/dwarf5-df-dualcu-loclist-main.s | 578 ++++++++++++++++++ bolt/test/X86/Inputs/dwarf5-df-dualcu-main.ll | 129 ---- bolt/test/X86/Inputs/dwarf5-df-dualcu-main.s | 476 +++++++++++++++ bolt/test/X86/Inputs/dwarf5-df-mono-helper.ll | 81 --- bolt/test/X86/Inputs/dwarf5-df-mono-helper.s | 281 +++++++++ bolt/test/X86/Inputs/dwarf5-df-mono-main.ll | 129 ---- bolt/test/X86/Inputs/dwarf5-df-mono-main.s | 476 +++++++++++++++ bolt/test/X86/Inputs/dwarf5-locaddrx.s | 473 ++++++++++++++ bolt/test/X86/dwarf4-df-dualcu-loclist.test | 8 +- bolt/test/X86/dwarf4-df-dualcu.test | 8 +- bolt/test/X86/dwarf5-df-dualcu-loclist.test | 8 +- bolt/test/X86/dwarf5-df-dualcu.test | 9 +- bolt/test/X86/dwarf5-df-mono-dualcu.test | 6 +- bolt/test/X86/dwarf5-locaddrx.test | 128 +--- 27 files changed, 4590 insertions(+), 1096 deletions(-) delete mode 100644 bolt/test/X86/Inputs/dwarf4-df-dualcu-helper.ll create mode 100644 bolt/test/X86/Inputs/dwarf4-df-dualcu-helper.s delete mode 100644 bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-helper.ll create mode 100644 bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-helper.s delete mode 100644 bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-main.ll create mode 100644 bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-main.s delete mode 100644 bolt/test/X86/Inputs/dwarf4-df-dualcu-main.ll create mode 100644 bolt/test/X86/Inputs/dwarf4-df-dualcu-main.s delete mode 100644 bolt/test/X86/Inputs/dwarf5-df-dualcu-helper.ll create mode 100644 bolt/test/X86/Inputs/dwarf5-df-dualcu-helper.s delete mode 100644 bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-helper.ll create mode 100644 bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-helper.s delete mode 100644 bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-main.ll create mode 100644 bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-main.s delete mode 100644 bolt/test/X86/Inputs/dwarf5-df-dualcu-main.ll create mode 100644 bolt/test/X86/Inputs/dwarf5-df-dualcu-main.s delete mode 100644 bolt/test/X86/Inputs/dwarf5-df-mono-helper.ll create mode 100644 bolt/test/X86/Inputs/dwarf5-df-mono-helper.s delete mode 100644 bolt/test/X86/Inputs/dwarf5-df-mono-main.ll create mode 100644 bolt/test/X86/Inputs/dwarf5-df-mono-main.s create mode 100644 bolt/test/X86/Inputs/dwarf5-locaddrx.s diff --git a/bolt/test/X86/Inputs/dwarf4-df-dualcu-helper.ll b/bolt/test/X86/Inputs/dwarf4-df-dualcu-helper.ll deleted file mode 100644 index 3b0ba9c1e0c33e..00000000000000 --- a/bolt/test/X86/Inputs/dwarf4-df-dualcu-helper.ll +++ /dev/null @@ -1,81 +0,0 @@ -; clang++ -g -gdwarf-4 -emit-llvm -S helper.cpp -; int z = 0; -; int d = 0; -; -; int helper(int z_, int d_) { -; z += z_; -; d += d_; -; return z * d; -; } - -; ModuleID = 'helper.cpp' -source_filename = "helper.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@z = dso_local global i32 0, align 4, !dbg !0 -@d = dso_local global i32 0, align 4, !dbg !5 - -; Function Attrs: mustprogress noinline nounwind optnone uwtable -define dso_local noundef i32 @_Z6helperii(i32 noundef %z_, i32 noundef %d_) #0 !dbg !14 { -entry: - %z_.addr = alloca i32, align 4 - %d_.addr = alloca i32, align 4 - store i32 %z_, i32* %z_.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %z_.addr, metadata !18, metadata !DIExpression()), !dbg !19 - store i32 %d_, i32* %d_.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %d_.addr, metadata !20, metadata !DIExpression()), !dbg !21 - %0 = load i32, i32* %z_.addr, align 4, !dbg !22 - %1 = load i32, i32* @z, align 4, !dbg !23 - %add = add nsw i32 %1, %0, !dbg !23 - store i32 %add, i32* @z, align 4, !dbg !23 - %2 = load i32, i32* %d_.addr, align 4, !dbg !24 - %3 = load i32, i32* @d, align 4, !dbg !25 - %add1 = add nsw i32 %3, %2, !dbg !25 - store i32 %add1, i32* @d, align 4, !dbg !25 - %4 = load i32, i32* @z, align 4, !dbg !26 - %5 = load i32, i32* @d, align 4, !dbg !27 - %mul = mul nsw i32 %4, %5, !dbg !28 - ret i32 %mul, !dbg !29 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11, !12} -!llvm.ident = !{!13} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "z", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "helper.cpp", directory: ".") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "d", scope: !2, file: !3, line: 2, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 4} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{i32 7, !"frame-pointer", i32 2} -!13 = !{!"clang version 15.0.0"} -!14 = distinct !DISubprogram(name: "helper", linkageName: "_Z6helperii", scope: !3, file: !3, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !17) -!15 = !DISubroutineType(types: !16) -!16 = !{!7, !7, !7} -!17 = !{} -!18 = !DILocalVariable(name: "z_", arg: 1, scope: !14, file: !3, line: 4, type: !7) -!19 = !DILocation(line: 4, column: 16, scope: !14) -!20 = !DILocalVariable(name: "d_", arg: 2, scope: !14, file: !3, line: 4, type: !7) -!21 = !DILocation(line: 4, column: 24, scope: !14) -!22 = !DILocation(line: 5, column: 7, scope: !14) -!23 = !DILocation(line: 5, column: 4, scope: !14) -!24 = !DILocation(line: 6, column: 7, scope: !14) -!25 = !DILocation(line: 6, column: 4, scope: !14) -!26 = !DILocation(line: 7, column: 9, scope: !14) -!27 = !DILocation(line: 7, column: 13, scope: !14) -!28 = !DILocation(line: 7, column: 11, scope: !14) -!29 = !DILocation(line: 7, column: 2, scope: !14) diff --git a/bolt/test/X86/Inputs/dwarf4-df-dualcu-helper.s b/bolt/test/X86/Inputs/dwarf4-df-dualcu-helper.s new file mode 100644 index 00000000000000..55b8054f03996d --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf4-df-dualcu-helper.s @@ -0,0 +1,298 @@ +# clang++ -g -gdwarf-4 -emit-llvm -S helper.cpp +# llc -O0 -mtriple=x86_64-unknown-linux-gnu helper.ll +# int z = 0; +# int d = 0; +# +# int helper(int z_, int d_) { +# z += z_; +# d += d_; +# return z * d; +# } + + .text + .file "helper.cpp" + .file 1 "." "helper.cpp" + .globl _Z6helperii # -- Begin function _Z6helperii + .p2align 4, 0x90 + .type _Z6helperii,@function +_Z6helperii: # @_Z6helperii +.Lfunc_begin0: + .loc 1 4 0 # helper.cpp:4:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) + movl %esi, -8(%rbp) +.Ltmp0: + .loc 1 5 7 prologue_end # helper.cpp:5:7 + movl -4(%rbp), %eax + .loc 1 5 4 is_stmt 0 # helper.cpp:5:4 + addl z, %eax + movl %eax, z + .loc 1 6 7 is_stmt 1 # helper.cpp:6:7 + movl -8(%rbp), %eax + .loc 1 6 4 is_stmt 0 # helper.cpp:6:4 + addl d, %eax + movl %eax, d + .loc 1 7 9 is_stmt 1 # helper.cpp:7:9 + movl z, %eax + .loc 1 7 11 is_stmt 0 # helper.cpp:7:11 + imull d, %eax + .loc 1 7 2 # helper.cpp:7:2 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size _Z6helperii, .Lfunc_end0-_Z6helperii + .cfi_endproc + # -- End function + .type z,@object # @z + .bss + .globl z + .p2align 2 +z: + .long 0 # 0x0 + .size z, 4 + + .type d,@object # @d + .globl d + .p2align 2 +d: + .long 0 # 0x0 + .size d, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .ascii "\260B" # DW_AT_GNU_dwo_name + .byte 14 # DW_FORM_strp + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .ascii "\263B" # DW_AT_GNU_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x25 DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lskel_string0 # DW_AT_comp_dir + .long .Lskel_string1 # DW_AT_GNU_dwo_name + .quad 9133088002243470176 # DW_AT_GNU_dwo_id + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_GNU_addr_base +.Ldebug_info_end0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "helper.dwo" # string offset=2 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "z" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "d" # string offset=6 +.Linfo_string3: + .asciz "_Z6helperii" # string offset=8 +.Linfo_string4: + .asciz "helper" # string offset=20 +.Linfo_string5: + .asciz "z_" # string offset=27 +.Linfo_string6: + .asciz "d_" # string offset=30 +.Linfo_string7: + .asciz "clang version 15.0.0" # string offset=33 +.Linfo_string8: + .asciz "helper.cpp" # string offset=54 +.Linfo_string9: + .asciz "helper.dwo" # string offset=65 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 20 + .long 27 + .long 30 + .long 33 + .long 54 + .long 65 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x50 DW_TAG_compile_unit + .byte 7 # DW_AT_producer + .short 33 # DW_AT_language + .byte 8 # DW_AT_name + .byte 9 # DW_AT_GNU_dwo_name + .quad 9133088002243470176 # DW_AT_GNU_dwo_id + .byte 2 # Abbrev [2] 0x19:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 36 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 0 + .byte 3 # Abbrev [3] 0x24:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x28:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 36 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 1 + .byte 4 # Abbrev [4] 0x33:0x27 DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 36 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x43:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 5 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 36 # DW_AT_type + .byte 5 # Abbrev [5] 0x4e:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 36 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\260B" # DW_AT_GNU_dwo_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits +.Laddr_table_base0: + .quad z + .quad d + .quad .Lfunc_begin0 + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-helper.ll b/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-helper.ll deleted file mode 100644 index 36a3398caf7f08..00000000000000 --- a/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-helper.ll +++ /dev/null @@ -1,61 +0,0 @@ -; ModuleID = 'helper.cpp' -source_filename = "helper.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@z = dso_local local_unnamed_addr global i32 0, align 4, !dbg !0 -@d = dso_local local_unnamed_addr global i32 0, align 4, !dbg !5 - -; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable -define dso_local noundef i32 @_Z6helperii(i32 noundef %z_, i32 noundef %d_) local_unnamed_addr #0 !dbg !13 { -entry: - call void @llvm.dbg.value(metadata i32 %z_, metadata !17, metadata !DIExpression()), !dbg !19 - call void @llvm.dbg.value(metadata i32 %d_, metadata !18, metadata !DIExpression()), !dbg !19 - %0 = load i32, i32* @z, align 4, !dbg !20, !tbaa !21 - %add = add nsw i32 %0, %z_, !dbg !20 - store i32 %add, i32* @z, align 4, !dbg !20, !tbaa !21 - %1 = load i32, i32* @d, align 4, !dbg !25, !tbaa !21 - %add1 = add nsw i32 %1, %d_, !dbg !25 - store i32 %add1, i32* @d, align 4, !dbg !25, !tbaa !21 - %mul = mul nsw i32 %add1, %add, !dbg !26 - ret i32 %mul, !dbg !27 -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.value(metadata, metadata, metadata) #1 - -attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11} -!llvm.ident = !{!12} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "z", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "helper.cpp", directory: ".") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "d", scope: !2, file: !3, line: 2, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 4} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{!"clang version 15.0.0"} -!13 = distinct !DISubprogram(name: "helper", linkageName: "_Z6helperii", scope: !3, file: !3, line: 4, type: !14, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !16) -!14 = !DISubroutineType(types: !15) -!15 = !{!7, !7, !7} -!16 = !{!17, !18} -!17 = !DILocalVariable(name: "z_", arg: 1, scope: !13, file: !3, line: 4, type: !7) -!18 = !DILocalVariable(name: "d_", arg: 2, scope: !13, file: !3, line: 4, type: !7) -!19 = !DILocation(line: 0, scope: !13) -!20 = !DILocation(line: 5, column: 4, scope: !13) -!21 = !{!22, !22, i64 0} -!22 = !{!"int", !23, i64 0} -!23 = !{!"omnipotent char", !24, i64 0} -!24 = !{!"Simple C++ TBAA"} -!25 = !DILocation(line: 6, column: 4, scope: !13) -!26 = !DILocation(line: 7, column: 11, scope: !13) -!27 = !DILocation(line: 7, column: 2, scope: !13) diff --git a/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-helper.s b/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-helper.s new file mode 100644 index 00000000000000..9870d76c7af1ce --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-helper.s @@ -0,0 +1,322 @@ +# clang++ -g -gdwarf-4 -emit-llvm -S helper.cpp +# llc -O2 -mtriple=x86_64-unknown-linux-gnu helper.ll +# int z = 0; +# int d = 0; +# +# int helper(int z_, int d_) { +# z += z_; +# d += d_; +# return z * d; +# } + + .text + .file "helper.cpp" + .file 1 "." "helper.cpp" + .globl _Z6helperii # -- Begin function _Z6helperii + .p2align 4, 0x90 + .type _Z6helperii,@function +_Z6helperii: # @_Z6helperii +.Lfunc_begin0: + .loc 1 4 0 # helper.cpp:4:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: helper:z_ <- $edi + #DEBUG_VALUE: helper:d_ <- $esi + movl %esi, %eax +.Ltmp0: + .loc 1 5 4 prologue_end # helper.cpp:5:4 + addl z(%rip), %edi +.Ltmp1: + #DEBUG_VALUE: helper:z_ <- [DW_OP_LLVM_entry_value 1] $edi + .loc 1 6 4 # helper.cpp:6:4 + addl d(%rip), %eax + .loc 1 5 4 # helper.cpp:5:4 + movl %edi, z(%rip) + .loc 1 6 4 # helper.cpp:6:4 + movl %eax, d(%rip) + .loc 1 7 11 # helper.cpp:7:11 + imull %edi, %eax + .loc 1 7 2 is_stmt 0 # helper.cpp:7:2 + retq +.Ltmp2: +.Lfunc_end0: + .size _Z6helperii, .Lfunc_end0-_Z6helperii + .cfi_endproc + # -- End function + .type z,@object # @z + .bss + .globl z + .p2align 2 +z: + .long 0 # 0x0 + .size z, 4 + + .type d,@object # @d + .globl d + .p2align 2 +d: + .long 0 # 0x0 + .size d, 4 + + .section .debug_loc.dwo,"e",@progbits +.Ldebug_loc0: + .byte 3 + .byte 2 + .long .Ltmp1-.Lfunc_begin0 + .short 1 # Loc expr size + .byte 85 # super-register DW_OP_reg5 + .byte 3 + .byte 3 + .long .Lfunc_end0-.Ltmp1 + .short 4 # Loc expr size + .byte 243 # DW_OP_GNU_entry_value + .byte 1 # 1 + .byte 85 # super-register DW_OP_reg5 + .byte 159 # DW_OP_stack_value + .byte 0 + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .ascii "\260B" # DW_AT_GNU_dwo_name + .byte 14 # DW_FORM_strp + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .ascii "\263B" # DW_AT_GNU_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x25 DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lskel_string0 # DW_AT_comp_dir + .long .Lskel_string1 # DW_AT_GNU_dwo_name + .quad -5732284259998169791 # DW_AT_GNU_dwo_id + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_GNU_addr_base +.Ldebug_info_end0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "helper.dwo" # string offset=2 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "z" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "d" # string offset=6 +.Linfo_string3: + .asciz "_Z6helperii" # string offset=8 +.Linfo_string4: + .asciz "helper" # string offset=20 +.Linfo_string5: + .asciz "z_" # string offset=27 +.Linfo_string6: + .asciz "d_" # string offset=30 +.Linfo_string7: + .asciz "clang version 15.0.0" # string offset=33 +.Linfo_string8: + .asciz "helper.cpp" # string offset=54 +.Linfo_string9: + .asciz "helper.dwo" # string offset=65 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 20 + .long 27 + .long 30 + .long 33 + .long 54 + .long 65 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x50 DW_TAG_compile_unit + .byte 7 # DW_AT_producer + .short 33 # DW_AT_language + .byte 8 # DW_AT_name + .byte 9 # DW_AT_GNU_dwo_name + .quad -5732284259998169791 # DW_AT_GNU_dwo_id + .byte 2 # Abbrev [2] 0x19:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 36 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 0 + .byte 3 # Abbrev [3] 0x24:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x28:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 36 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 1 + .byte 4 # Abbrev [4] 0x33:0x27 DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_GNU_all_call_sites + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 36 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x43:0xc DW_TAG_formal_parameter + .long .Ldebug_loc0-.debug_loc.dwo # DW_AT_location + .byte 5 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 36 # DW_AT_type + .byte 6 # Abbrev [6] 0x4f:0xa DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 84 + .byte 6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 36 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\260B" # DW_AT_GNU_dwo_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .ascii "\227B" # DW_AT_GNU_all_call_sites + .byte 25 # DW_FORM_flag_present + .byte 110 # DW_AT_linkage_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 23 # DW_FORM_sec_offset + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits +.Laddr_table_base0: + .quad z + .quad d + .quad .Lfunc_begin0 + .quad .Ltmp1 + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-main.ll b/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-main.ll deleted file mode 100644 index 4550b7fa92ff37..00000000000000 --- a/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-main.ll +++ /dev/null @@ -1,99 +0,0 @@ -; ModuleID = 'main.cpp' -source_filename = "main.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@x = dso_local local_unnamed_addr global i32 0, align 4, !dbg !0 -@y = dso_local local_unnamed_addr global i32 1, align 4, !dbg !5 - -; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn uwtable -define dso_local void @_Z3usePiS_(i32* nocapture noundef %x, i32* nocapture noundef %y) local_unnamed_addr #0 !dbg !13 { -entry: - call void @llvm.dbg.value(metadata i32* %x, metadata !18, metadata !DIExpression()), !dbg !20 - call void @llvm.dbg.value(metadata i32* %y, metadata !19, metadata !DIExpression()), !dbg !20 - %0 = load i32, i32* %x, align 4, !dbg !21, !tbaa !22 - %add = add nsw i32 %0, 4, !dbg !21 - store i32 %add, i32* %x, align 4, !dbg !21, !tbaa !22 - %1 = load i32, i32* %y, align 4, !dbg !26, !tbaa !22 - %sub = add nsw i32 %1, -2, !dbg !26 - store i32 %sub, i32* %y, align 4, !dbg !26, !tbaa !22 - ret void, !dbg !27 -} - -; Function Attrs: mustprogress norecurse uwtable -define dso_local noundef i32 @main(i32 noundef %argc, i8** nocapture noundef readnone %argv) local_unnamed_addr #1 !dbg !28 { -entry: - call void @llvm.dbg.value(metadata i32 %argc, metadata !35, metadata !DIExpression()), !dbg !37 - call void @llvm.dbg.value(metadata i8** %argv, metadata !36, metadata !DIExpression()), !dbg !37 - call void @llvm.dbg.value(metadata i32* @x, metadata !18, metadata !DIExpression()), !dbg !38 - call void @llvm.dbg.value(metadata i32* @y, metadata !19, metadata !DIExpression()), !dbg !38 - %add.i = add nsw i32 %argc, 4, !dbg !40 - store i32 %add.i, i32* @x, align 4, !dbg !40, !tbaa !22 - %sub.i = add nsw i32 %argc, 1, !dbg !41 - store i32 %sub.i, i32* @y, align 4, !dbg !41, !tbaa !22 - %call = tail call noundef i32 @_Z6helperii(i32 noundef %add.i, i32 noundef %sub.i), !dbg !42 - ret i32 %call, !dbg !43 -} - -declare !dbg !44 dso_local noundef i32 @_Z6helperii(i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.value(metadata, metadata, metadata) #3 - -attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { mustprogress norecurse uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #3 = { nocallback nofree nosync nounwind readnone speculatable willreturn } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11} -!llvm.ident = !{!12} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 7, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "main.cpp", directory: ".") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "y", scope: !2, file: !3, line: 8, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 4} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{!"clang version 15.0.0"} -!13 = distinct !DISubprogram(name: "use", linkageName: "_Z3usePiS_", scope: !3, file: !3, line: 1, type: !14, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !17) -!14 = !DISubroutineType(types: !15) -!15 = !{null, !16, !16} -!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) -!17 = !{!18, !19} -!18 = !DILocalVariable(name: "x", arg: 1, scope: !13, file: !3, line: 1, type: !16) -!19 = !DILocalVariable(name: "y", arg: 2, scope: !13, file: !3, line: 1, type: !16) -!20 = !DILocation(line: 0, scope: !13) -!21 = !DILocation(line: 2, column: 4, scope: !13) -!22 = !{!23, !23, i64 0} -!23 = !{!"int", !24, i64 0} -!24 = !{!"omnipotent char", !25, i64 0} -!25 = !{!"Simple C++ TBAA"} -!26 = !DILocation(line: 3, column: 4, scope: !13) -!27 = !DILocation(line: 4, column: 1, scope: !13) -!28 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 9, type: !29, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !34) -!29 = !DISubroutineType(types: !30) -!30 = !{!7, !7, !31} -!31 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !32, size: 64) -!32 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !33, size: 64) -!33 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!34 = !{!35, !36} -!35 = !DILocalVariable(name: "argc", arg: 1, scope: !28, file: !3, line: 9, type: !7) -!36 = !DILocalVariable(name: "argv", arg: 2, scope: !28, file: !3, line: 9, type: !31) -!37 = !DILocation(line: 0, scope: !28) -!38 = !DILocation(line: 0, scope: !13, inlinedAt: !39) -!39 = distinct !DILocation(line: 12, column: 4, scope: !28) -!40 = !DILocation(line: 2, column: 4, scope: !13, inlinedAt: !39) -!41 = !DILocation(line: 3, column: 4, scope: !13, inlinedAt: !39) -!42 = !DILocation(line: 13, column: 11, scope: !28) -!43 = !DILocation(line: 13, column: 4, scope: !28) -!44 = !DISubprogram(name: "helper", linkageName: "_Z6helperii", scope: !3, file: !3, line: 6, type: !45, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !47) -!45 = !DISubroutineType(types: !46) -!46 = !{!7, !7, !7} -!47 = !{} diff --git a/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-main.s b/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-main.s new file mode 100644 index 00000000000000..4eadba80088c1b --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf4-df-dualcu-loclist-main.s @@ -0,0 +1,546 @@ +# clang++ -g -gdwarf-4 -emit-llvm -S main.cpp +# llc -O2 -mtriple=x86_64-unknown-linux-gnu main.ll +# void use(int * x, int * y) { +# *x += 4; +# *y -= 2; +# } +# +# int helper(int z_, int d_); +# int x = 0; +# int y = 1; +# int main(int argc, char *argv[]) { +# x = argc; +# y = argc + 3; +# use(&x, &y); +# return helper(x, y); +# } + + .text + .file "main.cpp" + .file 1 "." "main.cpp" + .globl _Z3usePiS_ # -- Begin function _Z3usePiS_ + .p2align 4, 0x90 + .type _Z3usePiS_,@function +_Z3usePiS_: # @_Z3usePiS_ +.Lfunc_begin0: + .loc 1 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: use:x <- $rdi + #DEBUG_VALUE: use:y <- $rsi + .loc 1 2 4 prologue_end # main.cpp:2:4 + addl $4, (%rdi) + .loc 1 3 4 # main.cpp:3:4 + addl $-2, (%rsi) + .loc 1 4 1 # main.cpp:4:1 + retq +.Ltmp0: +.Lfunc_end0: + .size _Z3usePiS_, .Lfunc_end0-_Z3usePiS_ + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 1 9 0 # main.cpp:9:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: main:argc <- $edi + #DEBUG_VALUE: main:argv <- $rsi + # kill: def $edi killed $edi def $rdi + .loc 1 2 4 prologue_end # main.cpp:2:4 + leal 4(%rdi), %eax + movl %eax, x(%rip) +.Ltmp1: + #DEBUG_VALUE: use:x <- undef + .loc 1 3 4 # main.cpp:3:4 + leal 1(%rdi), %esi +.Ltmp2: + #DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rsi + movl %esi, y(%rip) +.Ltmp3: + #DEBUG_VALUE: use:y <- undef + .loc 1 13 11 # main.cpp:13:11 + movl %eax, %edi +.Ltmp4: + #DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $edi + jmp _Z6helperii # TAILCALL +.Ltmp5: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .type x,@object # @x + .bss + .globl x + .p2align 2 +x: + .long 0 # 0x0 + .size x, 4 + + .type y,@object # @y + .data + .globl y + .p2align 2 +y: + .long 1 # 0x1 + .size y, 4 + + .section .debug_loc.dwo,"e",@progbits +.Ldebug_loc0: + .byte 3 + .byte 3 + .long .Ltmp4-.Lfunc_begin1 + .short 1 # Loc expr size + .byte 85 # super-register DW_OP_reg5 + .byte 3 + .byte 5 + .long .Lfunc_end1-.Ltmp4 + .short 4 # Loc expr size + .byte 243 # DW_OP_GNU_entry_value + .byte 1 # 1 + .byte 85 # super-register DW_OP_reg5 + .byte 159 # DW_OP_stack_value + .byte 0 +.Ldebug_loc1: + .byte 3 + .byte 3 + .long .Ltmp2-.Lfunc_begin1 + .short 1 # Loc expr size + .byte 84 # DW_OP_reg4 + .byte 3 + .byte 6 + .long .Lfunc_end1-.Ltmp2 + .short 4 # Loc expr size + .byte 243 # DW_OP_GNU_entry_value + .byte 1 # 1 + .byte 84 # DW_OP_reg4 + .byte 159 # DW_OP_stack_value + .byte 0 + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .ascii "\260B" # DW_AT_GNU_dwo_name + .byte 14 # DW_FORM_strp + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .ascii "\263B" # DW_AT_GNU_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x25 DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lskel_string0 # DW_AT_comp_dir + .long .Lskel_string1 # DW_AT_GNU_dwo_name + .quad -4922924650784735988 # DW_AT_GNU_dwo_id + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_GNU_addr_base +.Ldebug_info_end0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "main.dwo" # string offset=2 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "x" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "y" # string offset=6 +.Linfo_string3: + .asciz "_Z3usePiS_" # string offset=8 +.Linfo_string4: + .asciz "use" # string offset=19 +.Linfo_string5: + .asciz "_Z6helperii" # string offset=23 +.Linfo_string6: + .asciz "helper" # string offset=35 +.Linfo_string7: + .asciz "main" # string offset=42 +.Linfo_string8: + .asciz "argc" # string offset=47 +.Linfo_string9: + .asciz "argv" # string offset=52 +.Linfo_string10: + .asciz "char" # string offset=57 +.Linfo_string11: + .asciz "clang version 15.0.0" # string offset=62 +.Linfo_string12: + .asciz "main.cpp" # string offset=83 +.Linfo_string13: + .asciz "main.dwo" # string offset=92 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 19 + .long 23 + .long 35 + .long 42 + .long 47 + .long 52 + .long 57 + .long 62 + .long 83 + .long 92 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0xbd DW_TAG_compile_unit + .byte 11 # DW_AT_producer + .short 33 # DW_AT_language + .byte 12 # DW_AT_name + .byte 13 # DW_AT_GNU_dwo_name + .quad -4922924650784735988 # DW_AT_GNU_dwo_id + .byte 2 # Abbrev [2] 0x19:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 36 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 0 + .byte 3 # Abbrev [3] 0x24:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x28:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 36 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 1 + .byte 4 # Abbrev [4] 0x33:0x1b DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_GNU_all_call_sites + .long 78 # DW_AT_abstract_origin + .byte 5 # Abbrev [5] 0x3f:0x7 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 85 + .long 84 # DW_AT_abstract_origin + .byte 5 # Abbrev [5] 0x46:0x7 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 84 + .long 92 # DW_AT_abstract_origin + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x4e:0x17 DW_TAG_subprogram + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 1 # DW_AT_inline + .byte 7 # Abbrev [7] 0x54:0x8 DW_TAG_formal_parameter + .byte 0 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 101 # DW_AT_type + .byte 7 # Abbrev [7] 0x5c:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 101 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 8 # Abbrev [8] 0x65:0x5 DW_TAG_pointer_type + .long 36 # DW_AT_type + .byte 9 # Abbrev [9] 0x6a:0x3b DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_GNU_all_call_sites + .byte 7 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 36 # DW_AT_type + # DW_AT_external + .byte 10 # Abbrev [10] 0x79:0xc DW_TAG_formal_parameter + .long .Ldebug_loc0-.debug_loc.dwo # DW_AT_location + .byte 8 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 36 # DW_AT_type + .byte 10 # Abbrev [10] 0x85:0xc DW_TAG_formal_parameter + .long .Ldebug_loc1-.debug_loc.dwo # DW_AT_location + .byte 9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 185 # DW_AT_type + .byte 11 # Abbrev [11] 0x91:0xd DW_TAG_inlined_subroutine + .long 78 # DW_AT_abstract_origin + .byte 3 # DW_AT_low_pc + .long .Ltmp3-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_call_file + .byte 12 # DW_AT_call_line + .byte 4 # DW_AT_call_column + .byte 12 # Abbrev [12] 0x9e:0x6 DW_TAG_GNU_call_site + .long 165 # DW_AT_abstract_origin + # DW_AT_GNU_tail_call + .byte 4 # DW_AT_low_pc + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0xa5:0x14 DW_TAG_subprogram + .byte 5 # DW_AT_linkage_name + .byte 6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 6 # DW_AT_decl_line + .long 36 # DW_AT_type + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0xae:0x5 DW_TAG_formal_parameter + .long 36 # DW_AT_type + .byte 14 # Abbrev [14] 0xb3:0x5 DW_TAG_formal_parameter + .long 36 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 8 # Abbrev [8] 0xb9:0x5 DW_TAG_pointer_type + .long 190 # DW_AT_type + .byte 8 # Abbrev [8] 0xbe:0x5 DW_TAG_pointer_type + .long 195 # DW_AT_type + .byte 3 # Abbrev [3] 0xc3:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\260B" # DW_AT_GNU_dwo_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .ascii "\227B" # DW_AT_GNU_all_call_sites + .byte 25 # DW_FORM_flag_present + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 32 # DW_AT_inline + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .ascii "\227B" # DW_AT_GNU_all_call_sites + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 23 # DW_FORM_sec_offset + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 0 # DW_CHILDREN_no + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 12 # Abbreviation Code + .ascii "\211\202\001" # DW_TAG_GNU_call_site + .byte 0 # DW_CHILDREN_no + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .ascii "\225B" # DW_AT_GNU_tail_call + .byte 25 # DW_FORM_flag_present + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 13 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 14 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits +.Laddr_table_base0: + .quad x + .quad y + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .quad .Ltmp5 + .quad .Ltmp4 + .quad .Ltmp2 + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf4-df-dualcu-main.ll b/bolt/test/X86/Inputs/dwarf4-df-dualcu-main.ll deleted file mode 100644 index 704f5659b29d8d..00000000000000 --- a/bolt/test/X86/Inputs/dwarf4-df-dualcu-main.ll +++ /dev/null @@ -1,129 +0,0 @@ -; clang++ -g -gdwarf-4 -emit-llvm -S main.cpp -; void use(int * x, int * y) { -; *x += 4; -; *y -= 2; -; } -; -; int helper(int z_, int d_); -; int x = 0; -; int y = 1; -; int main(int argc, char *argv[]) { -; x = argc; -; y = argc + 3; -; use(&x, &y); -; return helper(x, y); -; } - -; ModuleID = 'main.cpp' -source_filename = "main.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@x = dso_local global i32 0, align 4, !dbg !0 -@y = dso_local global i32 1, align 4, !dbg !5 - -; Function Attrs: mustprogress noinline nounwind optnone uwtable -define dso_local void @_Z3usePiS_(i32* noundef %x, i32* noundef %y) #0 !dbg !14 { -entry: - %x.addr = alloca i32*, align 8 - %y.addr = alloca i32*, align 8 - store i32* %x, i32** %x.addr, align 8 - call void @llvm.dbg.declare(metadata i32** %x.addr, metadata !19, metadata !DIExpression()), !dbg !20 - store i32* %y, i32** %y.addr, align 8 - call void @llvm.dbg.declare(metadata i32** %y.addr, metadata !21, metadata !DIExpression()), !dbg !22 - %0 = load i32*, i32** %x.addr, align 8, !dbg !23 - %1 = load i32, i32* %0, align 4, !dbg !24 - %add = add nsw i32 %1, 4, !dbg !24 - store i32 %add, i32* %0, align 4, !dbg !24 - %2 = load i32*, i32** %y.addr, align 8, !dbg !25 - %3 = load i32, i32* %2, align 4, !dbg !26 - %sub = sub nsw i32 %3, 2, !dbg !26 - store i32 %sub, i32* %2, align 4, !dbg !26 - ret void, !dbg !27 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -; Function Attrs: mustprogress noinline norecurse optnone uwtable -define dso_local noundef i32 @main(i32 noundef %argc, i8** noundef %argv) #2 !dbg !28 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !34, metadata !DIExpression()), !dbg !35 - store i8** %argv, i8*** %argv.addr, align 8 - call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !36, metadata !DIExpression()), !dbg !37 - %0 = load i32, i32* %argc.addr, align 4, !dbg !38 - store i32 %0, i32* @x, align 4, !dbg !39 - %1 = load i32, i32* %argc.addr, align 4, !dbg !40 - %add = add nsw i32 %1, 3, !dbg !41 - store i32 %add, i32* @y, align 4, !dbg !42 - call void @_Z3usePiS_(i32* noundef @x, i32* noundef @y), !dbg !43 - %2 = load i32, i32* @x, align 4, !dbg !44 - %3 = load i32, i32* @y, align 4, !dbg !45 - %call = call noundef i32 @_Z6helperii(i32 noundef %2, i32 noundef %3), !dbg !46 - ret i32 %call, !dbg !47 -} - -declare dso_local noundef i32 @_Z6helperii(i32 noundef, i32 noundef) #3 - -attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } -attributes #2 = { mustprogress noinline norecurse optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #3 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11, !12} -!llvm.ident = !{!13} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 7, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "main.cpp", directory: ".") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "y", scope: !2, file: !3, line: 8, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 4} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{i32 7, !"frame-pointer", i32 2} -!13 = !{!"clang version 15.0.0"} -!14 = distinct !DISubprogram(name: "use", linkageName: "_Z3usePiS_", scope: !3, file: !3, line: 1, type: !15, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !18) -!15 = !DISubroutineType(types: !16) -!16 = !{null, !17, !17} -!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) -!18 = !{} -!19 = !DILocalVariable(name: "x", arg: 1, scope: !14, file: !3, line: 1, type: !17) -!20 = !DILocation(line: 1, column: 16, scope: !14) -!21 = !DILocalVariable(name: "y", arg: 2, scope: !14, file: !3, line: 1, type: !17) -!22 = !DILocation(line: 1, column: 25, scope: !14) -!23 = !DILocation(line: 2, column: 2, scope: !14) -!24 = !DILocation(line: 2, column: 4, scope: !14) -!25 = !DILocation(line: 3, column: 2, scope: !14) -!26 = !DILocation(line: 3, column: 4, scope: !14) -!27 = !DILocation(line: 4, column: 1, scope: !14) -!28 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 9, type: !29, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !18) -!29 = !DISubroutineType(types: !30) -!30 = !{!7, !7, !31} -!31 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !32, size: 64) -!32 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !33, size: 64) -!33 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!34 = !DILocalVariable(name: "argc", arg: 1, scope: !28, file: !3, line: 9, type: !7) -!35 = !DILocation(line: 9, column: 15, scope: !28) -!36 = !DILocalVariable(name: "argv", arg: 2, scope: !28, file: !3, line: 9, type: !31) -!37 = !DILocation(line: 9, column: 27, scope: !28) -!38 = !DILocation(line: 10, column: 8, scope: !28) -!39 = !DILocation(line: 10, column: 6, scope: !28) -!40 = !DILocation(line: 11, column: 8, scope: !28) -!41 = !DILocation(line: 11, column: 13, scope: !28) -!42 = !DILocation(line: 11, column: 6, scope: !28) -!43 = !DILocation(line: 12, column: 4, scope: !28) -!44 = !DILocation(line: 13, column: 18, scope: !28) -!45 = !DILocation(line: 13, column: 21, scope: !28) -!46 = !DILocation(line: 13, column: 11, scope: !28) -!47 = !DILocation(line: 13, column: 4, scope: !28) diff --git a/bolt/test/X86/Inputs/dwarf4-df-dualcu-main.s b/bolt/test/X86/Inputs/dwarf4-df-dualcu-main.s new file mode 100644 index 00000000000000..0f79e7ec18f701 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf4-df-dualcu-main.s @@ -0,0 +1,420 @@ +# clang++ -g -gdwarf-4 -emit-llvm -S main.cpp +# llc -O0 -mtriple=x86_64-unknown-linux-gnu main.ll +# void use(int * x, int * y) { +# *x += 4; +# *y -= 2; +# } +# +# int helper(int z_, int d_); +# int x = 0; +# int y = 1; +# int main(int argc, char *argv[]) { +# x = argc; +# y = argc + 3; +# use(&x, &y); +# return helper(x, y); +# } + + .text + .file "main.cpp" + .file 1 "." "main.cpp" + .globl _Z3usePiS_ # -- Begin function _Z3usePiS_ + .p2align 4, 0x90 + .type _Z3usePiS_,@function +_Z3usePiS_: # @_Z3usePiS_ +.Lfunc_begin0: + .loc 1 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp0: + .loc 1 2 2 prologue_end # main.cpp:2:2 + movq -8(%rbp), %rax + .loc 1 2 4 is_stmt 0 # main.cpp:2:4 + movl (%rax), %ecx + addl $4, %ecx + movl %ecx, (%rax) + .loc 1 3 2 is_stmt 1 # main.cpp:3:2 + movq -16(%rbp), %rax + .loc 1 3 4 is_stmt 0 # main.cpp:3:4 + movl (%rax), %ecx + subl $2, %ecx + movl %ecx, (%rax) + .loc 1 4 1 is_stmt 1 # main.cpp:4:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size _Z3usePiS_, .Lfunc_end0-_Z3usePiS_ + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 1 9 0 # main.cpp:9:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp2: + .loc 1 10 8 prologue_end # main.cpp:10:8 + movl -8(%rbp), %eax + .loc 1 10 6 is_stmt 0 # main.cpp:10:6 + movl %eax, x + .loc 1 11 8 is_stmt 1 # main.cpp:11:8 + movl -8(%rbp), %eax + .loc 1 11 13 is_stmt 0 # main.cpp:11:13 + addl $3, %eax + .loc 1 11 6 # main.cpp:11:6 + movl %eax, y + .loc 1 12 4 is_stmt 1 # main.cpp:12:4 + movabsq $x, %rdi + movabsq $y, %rsi + callq _Z3usePiS_ + .loc 1 13 18 # main.cpp:13:18 + movl x, %edi + .loc 1 13 21 is_stmt 0 # main.cpp:13:21 + movl y, %esi + .loc 1 13 11 # main.cpp:13:11 + callq _Z6helperii + .loc 1 13 4 # main.cpp:13:4 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp3: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .type x,@object # @x + .bss + .globl x + .p2align 2 +x: + .long 0 # 0x0 + .size x, 4 + + .type y,@object # @y + .data + .globl y + .p2align 2 +y: + .long 1 # 0x1 + .size y, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .ascii "\260B" # DW_AT_GNU_dwo_name + .byte 14 # DW_FORM_strp + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .ascii "\263B" # DW_AT_GNU_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x25 DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lskel_string0 # DW_AT_comp_dir + .long .Lskel_string1 # DW_AT_GNU_dwo_name + .quad 4587139501222431648 # DW_AT_GNU_dwo_id + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_GNU_addr_base +.Ldebug_info_end0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "main.dwo" # string offset=2 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "x" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "y" # string offset=6 +.Linfo_string3: + .asciz "_Z3usePiS_" # string offset=8 +.Linfo_string4: + .asciz "use" # string offset=19 +.Linfo_string5: + .asciz "main" # string offset=23 +.Linfo_string6: + .asciz "argc" # string offset=28 +.Linfo_string7: + .asciz "argv" # string offset=33 +.Linfo_string8: + .asciz "char" # string offset=38 +.Linfo_string9: + .asciz "clang version 15.0.0" # string offset=43 +.Linfo_string10: + .asciz "main.cpp" # string offset=64 +.Linfo_string11: + .asciz "main.dwo" # string offset=73 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 19 + .long 23 + .long 28 + .long 33 + .long 38 + .long 43 + .long 64 + .long 73 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x85 DW_TAG_compile_unit + .byte 9 # DW_AT_producer + .short 33 # DW_AT_language + .byte 10 # DW_AT_name + .byte 11 # DW_AT_GNU_dwo_name + .quad 4587139501222431648 # DW_AT_GNU_dwo_id + .byte 2 # Abbrev [2] 0x19:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 36 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 0 + .byte 3 # Abbrev [3] 0x24:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x28:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 36 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 1 + .byte 4 # Abbrev [4] 0x33:0x23 DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 5 # Abbrev [5] 0x3f:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 0 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 124 # DW_AT_type + .byte 5 # Abbrev [5] 0x4a:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 2 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 124 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x56:0x26 DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 5 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 36 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x65:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 36 # DW_AT_type + .byte 5 # Abbrev [5] 0x70:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 7 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 129 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x7c:0x5 DW_TAG_pointer_type + .long 36 # DW_AT_type + .byte 7 # Abbrev [7] 0x81:0x5 DW_TAG_pointer_type + .long 134 # DW_AT_type + .byte 7 # Abbrev [7] 0x86:0x5 DW_TAG_pointer_type + .long 139 # DW_AT_type + .byte 3 # Abbrev [3] 0x8b:0x4 DW_TAG_base_type + .byte 8 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\260B" # DW_AT_GNU_dwo_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits +.Laddr_table_base0: + .quad x + .quad y + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-df-dualcu-helper.ll b/bolt/test/X86/Inputs/dwarf5-df-dualcu-helper.ll deleted file mode 100644 index 4d53bc4c386edc..00000000000000 --- a/bolt/test/X86/Inputs/dwarf5-df-dualcu-helper.ll +++ /dev/null @@ -1,81 +0,0 @@ -; clang++ -g -gdwarf-5 -emit-llvm -S helper.cpp -; int z = 0; -; int d = 0; -; -; int helper(int z_, int d_) { -; z += z_; -; d += d_; -; return z * d; -; } - -; ModuleID = 'helper.cpp' -source_filename = "helper.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@z = dso_local global i32 0, align 4, !dbg !0 -@d = dso_local global i32 0, align 4, !dbg !5 - -; Function Attrs: mustprogress noinline nounwind optnone uwtable -define dso_local noundef i32 @_Z6helperii(i32 noundef %z_, i32 noundef %d_) #0 !dbg !14 { -entry: - %z_.addr = alloca i32, align 4 - %d_.addr = alloca i32, align 4 - store i32 %z_, i32* %z_.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %z_.addr, metadata !18, metadata !DIExpression()), !dbg !19 - store i32 %d_, i32* %d_.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %d_.addr, metadata !20, metadata !DIExpression()), !dbg !21 - %0 = load i32, i32* %z_.addr, align 4, !dbg !22 - %1 = load i32, i32* @z, align 4, !dbg !23 - %add = add nsw i32 %1, %0, !dbg !23 - store i32 %add, i32* @z, align 4, !dbg !23 - %2 = load i32, i32* %d_.addr, align 4, !dbg !24 - %3 = load i32, i32* @d, align 4, !dbg !25 - %add1 = add nsw i32 %3, %2, !dbg !25 - store i32 %add1, i32* @d, align 4, !dbg !25 - %4 = load i32, i32* @z, align 4, !dbg !26 - %5 = load i32, i32* @d, align 4, !dbg !27 - %mul = mul nsw i32 %4, %5, !dbg !28 - ret i32 %mul, !dbg !29 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11, !12} -!llvm.ident = !{!13} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "z", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "helper.dwo", emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: GNU) -!3 = !DIFile(filename: "helper.cpp", directory: ".", checksumkind: CSK_MD5, checksum: "e635924a35b65444173d0c76a54b866f") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "d", scope: !2, file: !3, line: 2, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 5} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{i32 7, !"frame-pointer", i32 2} -!13 = !{!"clang version 15.0.0"} -!14 = distinct !DISubprogram(name: "helper", linkageName: "_Z6helperii", scope: !3, file: !3, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !17) -!15 = !DISubroutineType(types: !16) -!16 = !{!7, !7, !7} -!17 = !{} -!18 = !DILocalVariable(name: "z_", arg: 1, scope: !14, file: !3, line: 4, type: !7) -!19 = !DILocation(line: 4, column: 16, scope: !14) -!20 = !DILocalVariable(name: "d_", arg: 2, scope: !14, file: !3, line: 4, type: !7) -!21 = !DILocation(line: 4, column: 24, scope: !14) -!22 = !DILocation(line: 5, column: 7, scope: !14) -!23 = !DILocation(line: 5, column: 4, scope: !14) -!24 = !DILocation(line: 6, column: 7, scope: !14) -!25 = !DILocation(line: 6, column: 4, scope: !14) -!26 = !DILocation(line: 7, column: 9, scope: !14) -!27 = !DILocation(line: 7, column: 13, scope: !14) -!28 = !DILocation(line: 7, column: 11, scope: !14) -!29 = !DILocation(line: 7, column: 2, scope: !14) diff --git a/bolt/test/X86/Inputs/dwarf5-df-dualcu-helper.s b/bolt/test/X86/Inputs/dwarf5-df-dualcu-helper.s new file mode 100644 index 00000000000000..b0f973cc4605a9 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-df-dualcu-helper.s @@ -0,0 +1,348 @@ +# clang++ -g -gdwarf-5 -emit-llvm -S helper.cpp +# llc -O0 -mtriple=x86_64-unknown-linux-gnu helper.ll +# int z = 0; +# int d = 0; +# +# int helper(int z_, int d_) { +# z += z_; +# d += d_; +# return z * d; +# } + + .text + .file "helper.cpp" + .file 0 "." "helper.cpp" md5 0xe635924a35b65444173d0c76a54b866f + .globl _Z6helperii # -- Begin function _Z6helperii + .p2align 4, 0x90 + .type _Z6helperii,@function +_Z6helperii: # @_Z6helperii +.Lfunc_begin0: + .loc 0 4 0 # helper.cpp:4:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) + movl %esi, -8(%rbp) +.Ltmp0: + .loc 0 5 7 prologue_end # helper.cpp:5:7 + movl -4(%rbp), %eax + .loc 0 5 4 is_stmt 0 # helper.cpp:5:4 + addl z, %eax + movl %eax, z + .loc 0 6 7 is_stmt 1 # helper.cpp:6:7 + movl -8(%rbp), %eax + .loc 0 6 4 is_stmt 0 # helper.cpp:6:4 + addl d, %eax + movl %eax, d + .loc 0 7 9 is_stmt 1 # helper.cpp:7:9 + movl z, %eax + .loc 0 7 11 is_stmt 0 # helper.cpp:7:11 + imull d, %eax + .loc 0 7 2 # helper.cpp:7:2 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size _Z6helperii, .Lfunc_end0-_Z6helperii + .cfi_endproc + # -- End function + .type z,@object # @z + .bss + .globl z + .p2align 2 +z: + .long 0 # 0x0 + .size z, 4 + + .type d,@object # @d + .globl d + .p2align 2 +d: + .long 0 # 0x0 + .size d, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_skeleton_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .ascii "\264B" # DW_AT_GNU_pubnames + .byte 25 # DW_FORM_flag_present + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad -3815466543033299045 + .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .byte 0 # DW_AT_comp_dir + # DW_AT_GNU_pubnames + .byte 1 # DW_AT_dwo_name + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 12 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "helper.dwo" # string offset=2 + .section .debug_str_offsets,"",@progbits + .long .Lskel_string0 + .long .Lskel_string1 + .section .debug_str_offsets.dwo,"e",@progbits + .long 44 # Length of String Offsets Set + .short 5 + .short 0 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "z" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "d" # string offset=6 +.Linfo_string3: + .asciz "_Z6helperii" # string offset=8 +.Linfo_string4: + .asciz "helper" # string offset=20 +.Linfo_string5: + .asciz "z_" # string offset=27 +.Linfo_string6: + .asciz "d_" # string offset=30 +.Linfo_string7: + .asciz "clang version 15.0.0" # string offset=33 +.Linfo_string8: + .asciz "helper.cpp" # string offset=54 +.Linfo_string9: + .asciz "helper.dwo" # string offset=65 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 20 + .long 27 + .long 30 + .long 33 + .long 54 + .long 65 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad -3815466543033299045 + .byte 1 # Abbrev [1] 0x14:0x48 DW_TAG_compile_unit + .byte 7 # DW_AT_producer + .short 33 # DW_AT_language + .byte 8 # DW_AT_name + .byte 9 # DW_AT_dwo_name + .byte 2 # Abbrev [2] 0x1a:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 0 + .byte 3 # Abbrev [3] 0x25:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x29:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 1 + .byte 4 # Abbrev [4] 0x34:0x27 DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x44:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 5 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 5 # Abbrev [5] 0x4f:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad z + .quad d + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .section .debug_gnu_pubnames,"",@progbits + .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info +.LpubNames_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 52 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "helper" # External Name + .long 41 # DIE offset + .byte 32 # Attributes: VARIABLE, EXTERNAL + .asciz "d" # External Name + .long 26 # DIE offset + .byte 32 # Attributes: VARIABLE, EXTERNAL + .asciz "z" # External Name + .long 0 # End Mark +.LpubNames_end0: + .section .debug_gnu_pubtypes,"",@progbits + .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info +.LpubTypes_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 37 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "int" # External Name + .long 0 # End Mark +.LpubTypes_end0: + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-helper.ll b/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-helper.ll deleted file mode 100644 index 6848c71a5a7d31..00000000000000 --- a/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-helper.ll +++ /dev/null @@ -1,61 +0,0 @@ -; ModuleID = 'helper.cpp' -source_filename = "helper.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@z = dso_local local_unnamed_addr global i32 0, align 4, !dbg !0 -@d = dso_local local_unnamed_addr global i32 0, align 4, !dbg !5 - -; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable -define dso_local noundef i32 @_Z6helperii(i32 noundef %z_, i32 noundef %d_) local_unnamed_addr #0 !dbg !13 { -entry: - call void @llvm.dbg.value(metadata i32 %z_, metadata !17, metadata !DIExpression()), !dbg !19 - call void @llvm.dbg.value(metadata i32 %d_, metadata !18, metadata !DIExpression()), !dbg !19 - %0 = load i32, i32* @z, align 4, !dbg !20, !tbaa !21 - %add = add nsw i32 %0, %z_, !dbg !20 - store i32 %add, i32* @z, align 4, !dbg !20, !tbaa !21 - %1 = load i32, i32* @d, align 4, !dbg !25, !tbaa !21 - %add1 = add nsw i32 %1, %d_, !dbg !25 - store i32 %add1, i32* @d, align 4, !dbg !25, !tbaa !21 - %mul = mul nsw i32 %add1, %add, !dbg !26 - ret i32 %mul, !dbg !27 -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.value(metadata, metadata, metadata) #1 - -attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11} -!llvm.ident = !{!12} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "z", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "helper.cpp", directory: ".", checksumkind: CSK_MD5, checksum: "e635924a35b65444173d0c76a54b866f") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "d", scope: !2, file: !3, line: 2, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 5} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{!"clang version 15.0.0"} -!13 = distinct !DISubprogram(name: "helper", linkageName: "_Z6helperii", scope: !3, file: !3, line: 4, type: !14, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !16) -!14 = !DISubroutineType(types: !15) -!15 = !{!7, !7, !7} -!16 = !{!17, !18} -!17 = !DILocalVariable(name: "z_", arg: 1, scope: !13, file: !3, line: 4, type: !7) -!18 = !DILocalVariable(name: "d_", arg: 2, scope: !13, file: !3, line: 4, type: !7) -!19 = !DILocation(line: 0, scope: !13) -!20 = !DILocation(line: 5, column: 4, scope: !13) -!21 = !{!22, !22, i64 0} -!22 = !{!"int", !23, i64 0} -!23 = !{!"omnipotent char", !24, i64 0} -!24 = !{!"Simple C++ TBAA"} -!25 = !DILocation(line: 6, column: 4, scope: !13) -!26 = !DILocation(line: 7, column: 11, scope: !13) -!27 = !DILocation(line: 7, column: 2, scope: !13) diff --git a/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-helper.s b/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-helper.s new file mode 100644 index 00000000000000..389d83c692ba86 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-helper.s @@ -0,0 +1,351 @@ +# clang++ -g -gdwarf-5 -emit-llvm -S helper.cpp +# llc -O2 -mtriple=x86_64-unknown-linux-gnu helper.ll +# int z = 0; +# int d = 0; +# +# int helper(int z_, int d_) { +# z += z_; +# d += d_; +# return z * d; +# } + + .text + .file "helper.cpp" + .file 0 "." "helper.cpp" md5 0xe635924a35b65444173d0c76a54b866f + .globl _Z6helperii # -- Begin function _Z6helperii + .p2align 4, 0x90 + .type _Z6helperii,@function +_Z6helperii: # @_Z6helperii +.Lfunc_begin0: + .loc 0 4 0 # helper.cpp:4:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: helper:z_ <- $edi + #DEBUG_VALUE: helper:d_ <- $esi + movl %esi, %eax +.Ltmp0: + .loc 0 5 4 prologue_end # helper.cpp:5:4 + addl z(%rip), %edi +.Ltmp1: + #DEBUG_VALUE: helper:z_ <- [DW_OP_LLVM_entry_value 1] $edi + .loc 0 6 4 # helper.cpp:6:4 + addl d(%rip), %eax + .loc 0 5 4 # helper.cpp:5:4 + movl %edi, z(%rip) + .loc 0 6 4 # helper.cpp:6:4 + movl %eax, d(%rip) + .loc 0 7 11 # helper.cpp:7:11 + imull %edi, %eax + .loc 0 7 2 is_stmt 0 # helper.cpp:7:2 + retq +.Ltmp2: +.Lfunc_end0: + .size _Z6helperii, .Lfunc_end0-_Z6helperii + .cfi_endproc + # -- End function + .type z,@object # @z + .bss + .globl z + .p2align 2 +z: + .long 0 # 0x0 + .size z, 4 + + .type d,@object # @d + .globl d + .p2align 2 +d: + .long 0 # 0x0 + .size d, 4 + + .section .debug_loclists.dwo,"e",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 1 # Offset entry count +.Lloclists_table_base0: + .long .Ldebug_loc0-.Lloclists_table_base0 +.Ldebug_loc0: + .byte 1 # DW_LLE_base_addressx + .byte 2 # base address index + .byte 4 # DW_LLE_offset_pair + .uleb128 .Lfunc_begin0-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp1-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 85 # super-register DW_OP_reg5 + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp1-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end0-.Lfunc_begin0 # ending offset + .byte 4 # Loc expr size + .byte 163 # DW_OP_entry_value + .byte 1 # 1 + .byte 85 # super-register DW_OP_reg5 + .byte 159 # DW_OP_stack_value + .byte 0 # DW_LLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_skeleton_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad 9211989493039430794 + .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .byte 0 # DW_AT_comp_dir + .byte 1 # DW_AT_dwo_name + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 12 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "helper.dwo" # string offset=2 + .section .debug_str_offsets,"",@progbits + .long .Lskel_string0 + .long .Lskel_string1 + .section .debug_str_offsets.dwo,"e",@progbits + .long 44 # Length of String Offsets Set + .short 5 + .short 0 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "z" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "d" # string offset=6 +.Linfo_string3: + .asciz "_Z6helperii" # string offset=8 +.Linfo_string4: + .asciz "helper" # string offset=20 +.Linfo_string5: + .asciz "z_" # string offset=27 +.Linfo_string6: + .asciz "d_" # string offset=30 +.Linfo_string7: + .asciz "clang version 15.0.0" # string offset=33 +.Linfo_string8: + .asciz "helper.cpp" # string offset=54 +.Linfo_string9: + .asciz "helper.dwo" # string offset=65 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 20 + .long 27 + .long 30 + .long 33 + .long 54 + .long 65 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad 9211989493039430794 + .byte 1 # Abbrev [1] 0x14:0x45 DW_TAG_compile_unit + .byte 7 # DW_AT_producer + .short 33 # DW_AT_language + .byte 8 # DW_AT_name + .byte 9 # DW_AT_dwo_name + .byte 2 # Abbrev [2] 0x1a:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 0 + .byte 3 # Abbrev [3] 0x25:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x29:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 1 + .byte 4 # Abbrev [4] 0x34:0x24 DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_call_all_calls + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x44:0x9 DW_TAG_formal_parameter + .byte 0 # DW_AT_location + .byte 5 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 6 # Abbrev [6] 0x4d:0xa DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 84 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 122 # DW_AT_call_all_calls + .byte 25 # DW_FORM_flag_present + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 34 # DW_FORM_loclistx + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad z + .quad d + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-main.ll b/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-main.ll deleted file mode 100644 index bdcb094572781e..00000000000000 --- a/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-main.ll +++ /dev/null @@ -1,99 +0,0 @@ -; ModuleID = 'main.cpp' -source_filename = "main.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@x = dso_local local_unnamed_addr global i32 0, align 4, !dbg !0 -@y = dso_local local_unnamed_addr global i32 1, align 4, !dbg !5 - -; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn uwtable -define dso_local void @_Z3usePiS_(i32* nocapture noundef %x, i32* nocapture noundef %y) local_unnamed_addr #0 !dbg !13 { -entry: - call void @llvm.dbg.value(metadata i32* %x, metadata !18, metadata !DIExpression()), !dbg !20 - call void @llvm.dbg.value(metadata i32* %y, metadata !19, metadata !DIExpression()), !dbg !20 - %0 = load i32, i32* %x, align 4, !dbg !21, !tbaa !22 - %add = add nsw i32 %0, 4, !dbg !21 - store i32 %add, i32* %x, align 4, !dbg !21, !tbaa !22 - %1 = load i32, i32* %y, align 4, !dbg !26, !tbaa !22 - %sub = add nsw i32 %1, -2, !dbg !26 - store i32 %sub, i32* %y, align 4, !dbg !26, !tbaa !22 - ret void, !dbg !27 -} - -; Function Attrs: mustprogress norecurse uwtable -define dso_local noundef i32 @main(i32 noundef %argc, i8** nocapture noundef readnone %argv) local_unnamed_addr #1 !dbg !28 { -entry: - call void @llvm.dbg.value(metadata i32 %argc, metadata !35, metadata !DIExpression()), !dbg !37 - call void @llvm.dbg.value(metadata i8** %argv, metadata !36, metadata !DIExpression()), !dbg !37 - call void @llvm.dbg.value(metadata i32* @x, metadata !18, metadata !DIExpression()), !dbg !38 - call void @llvm.dbg.value(metadata i32* @y, metadata !19, metadata !DIExpression()), !dbg !38 - %add.i = add nsw i32 %argc, 4, !dbg !40 - store i32 %add.i, i32* @x, align 4, !dbg !40, !tbaa !22 - %sub.i = add nsw i32 %argc, 1, !dbg !41 - store i32 %sub.i, i32* @y, align 4, !dbg !41, !tbaa !22 - %call = tail call noundef i32 @_Z6helperii(i32 noundef %add.i, i32 noundef %sub.i), !dbg !42 - ret i32 %call, !dbg !43 -} - -declare !dbg !44 dso_local noundef i32 @_Z6helperii(i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.value(metadata, metadata, metadata) #3 - -attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { mustprogress norecurse uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #3 = { nocallback nofree nosync nounwind readnone speculatable willreturn } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11} -!llvm.ident = !{!12} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 7, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "main.cpp", directory: ".", checksumkind: CSK_MD5, checksum: "1f627913a0daee879e00a3a51726f0ef") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "y", scope: !2, file: !3, line: 8, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 5} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{!"clang version 15.0.0"} -!13 = distinct !DISubprogram(name: "use", linkageName: "_Z3usePiS_", scope: !3, file: !3, line: 1, type: !14, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !17) -!14 = !DISubroutineType(types: !15) -!15 = !{null, !16, !16} -!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) -!17 = !{!18, !19} -!18 = !DILocalVariable(name: "x", arg: 1, scope: !13, file: !3, line: 1, type: !16) -!19 = !DILocalVariable(name: "y", arg: 2, scope: !13, file: !3, line: 1, type: !16) -!20 = !DILocation(line: 0, scope: !13) -!21 = !DILocation(line: 2, column: 4, scope: !13) -!22 = !{!23, !23, i64 0} -!23 = !{!"int", !24, i64 0} -!24 = !{!"omnipotent char", !25, i64 0} -!25 = !{!"Simple C++ TBAA"} -!26 = !DILocation(line: 3, column: 4, scope: !13) -!27 = !DILocation(line: 4, column: 1, scope: !13) -!28 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 9, type: !29, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !34) -!29 = !DISubroutineType(types: !30) -!30 = !{!7, !7, !31} -!31 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !32, size: 64) -!32 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !33, size: 64) -!33 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!34 = !{!35, !36} -!35 = !DILocalVariable(name: "argc", arg: 1, scope: !28, file: !3, line: 9, type: !7) -!36 = !DILocalVariable(name: "argv", arg: 2, scope: !28, file: !3, line: 9, type: !31) -!37 = !DILocation(line: 0, scope: !28) -!38 = !DILocation(line: 0, scope: !13, inlinedAt: !39) -!39 = distinct !DILocation(line: 12, column: 4, scope: !28) -!40 = !DILocation(line: 2, column: 4, scope: !13, inlinedAt: !39) -!41 = !DILocation(line: 3, column: 4, scope: !13, inlinedAt: !39) -!42 = !DILocation(line: 13, column: 11, scope: !28) -!43 = !DILocation(line: 13, column: 4, scope: !28) -!44 = !DISubprogram(name: "helper", linkageName: "_Z6helperii", scope: !3, file: !3, line: 6, type: !45, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !47) -!45 = !DISubroutineType(types: !46) -!46 = !{!7, !7, !7} -!47 = !{} diff --git a/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-main.s b/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-main.s new file mode 100644 index 00000000000000..f2ac24396a9d08 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-df-dualcu-loclist-main.s @@ -0,0 +1,578 @@ +# clang++ -g -gdwarf-5 -emit-llvm -S main.cpp +# llc -O2 -mtriple=x86_64-unknown-linux-gnu main.ll +# void use(int * x, int * y) { +# *x += 4; +# *y -= 2; +# } +# +# int helper(int z_, int d_); +# int x = 0; +# int y = 1; +# int main(int argc, char *argv[]) { +# x = argc; +# y = argc + 3; +# use(&x, &y); +# return helper(x, y); +# } + + .text + .file "main.cpp" + .file 0 "." "main.cpp" md5 0x1f627913a0daee879e00a3a51726f0ef + .globl _Z3usePiS_ # -- Begin function _Z3usePiS_ + .p2align 4, 0x90 + .type _Z3usePiS_,@function +_Z3usePiS_: # @_Z3usePiS_ +.Lfunc_begin0: + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: use:x <- $rdi + #DEBUG_VALUE: use:y <- $rsi + .loc 0 2 4 prologue_end # main.cpp:2:4 + addl $4, (%rdi) + .loc 0 3 4 # main.cpp:3:4 + addl $-2, (%rsi) + .loc 0 4 1 # main.cpp:4:1 + retq +.Ltmp0: +.Lfunc_end0: + .size _Z3usePiS_, .Lfunc_end0-_Z3usePiS_ + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 9 0 # main.cpp:9:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: main:argc <- $edi + #DEBUG_VALUE: main:argv <- $rsi + # kill: def $edi killed $edi def $rdi + .loc 0 2 4 prologue_end # main.cpp:2:4 + leal 4(%rdi), %eax + movl %eax, x(%rip) +.Ltmp1: + #DEBUG_VALUE: use:x <- undef + .loc 0 3 4 # main.cpp:3:4 + leal 1(%rdi), %esi +.Ltmp2: + #DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rsi + movl %esi, y(%rip) +.Ltmp3: + #DEBUG_VALUE: use:y <- undef + .loc 0 13 11 # main.cpp:13:11 + movl %eax, %edi +.Ltmp4: + #DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $edi + jmp _Z6helperii # TAILCALL +.Ltmp5: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .type x,@object # @x + .bss + .globl x + .p2align 2 +x: + .long 0 # 0x0 + .size x, 4 + + .type y,@object # @y + .data + .globl y + .p2align 2 +y: + .long 1 # 0x1 + .size y, 4 + + .section .debug_loclists.dwo,"e",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 2 # Offset entry count +.Lloclists_table_base0: + .long .Ldebug_loc0-.Lloclists_table_base0 + .long .Ldebug_loc1-.Lloclists_table_base0 +.Ldebug_loc0: + .byte 1 # DW_LLE_base_addressx + .byte 2 # base address index + .byte 4 # DW_LLE_offset_pair + .uleb128 .Lfunc_begin1-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp4-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 85 # super-register DW_OP_reg5 + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp4-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end1-.Lfunc_begin0 # ending offset + .byte 4 # Loc expr size + .byte 163 # DW_OP_entry_value + .byte 1 # 1 + .byte 85 # super-register DW_OP_reg5 + .byte 159 # DW_OP_stack_value + .byte 0 # DW_LLE_end_of_list +.Ldebug_loc1: + .byte 1 # DW_LLE_base_addressx + .byte 2 # base address index + .byte 4 # DW_LLE_offset_pair + .uleb128 .Lfunc_begin1-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp2-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 84 # DW_OP_reg4 + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp2-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end1-.Lfunc_begin0 # ending offset + .byte 4 # Loc expr size + .byte 163 # DW_OP_entry_value + .byte 1 # 1 + .byte 84 # DW_OP_reg4 + .byte 159 # DW_OP_stack_value + .byte 0 # DW_LLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_skeleton_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad 4593992475441569756 + .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .byte 0 # DW_AT_comp_dir + .byte 1 # DW_AT_dwo_name + .byte 2 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 12 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "main.dwo" # string offset=2 + .section .debug_str_offsets,"",@progbits + .long .Lskel_string0 + .long .Lskel_string1 + .section .debug_str_offsets.dwo,"e",@progbits + .long 60 # Length of String Offsets Set + .short 5 + .short 0 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "x" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "y" # string offset=6 +.Linfo_string3: + .asciz "_Z3usePiS_" # string offset=8 +.Linfo_string4: + .asciz "use" # string offset=19 +.Linfo_string5: + .asciz "_Z6helperii" # string offset=23 +.Linfo_string6: + .asciz "helper" # string offset=35 +.Linfo_string7: + .asciz "main" # string offset=42 +.Linfo_string8: + .asciz "argc" # string offset=47 +.Linfo_string9: + .asciz "argv" # string offset=52 +.Linfo_string10: + .asciz "char" # string offset=57 +.Linfo_string11: + .asciz "clang version 15.0.0" # string offset=62 +.Linfo_string12: + .asciz "main.cpp" # string offset=83 +.Linfo_string13: + .asciz "main.dwo" # string offset=92 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 19 + .long 23 + .long 35 + .long 42 + .long 47 + .long 52 + .long 57 + .long 62 + .long 83 + .long 92 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad 4593992475441569756 + .byte 1 # Abbrev [1] 0x14:0xae DW_TAG_compile_unit + .byte 11 # DW_AT_producer + .short 33 # DW_AT_language + .byte 12 # DW_AT_name + .byte 13 # DW_AT_dwo_name + .byte 2 # Abbrev [2] 0x1a:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 0 + .byte 3 # Abbrev [3] 0x25:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x29:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 1 + .byte 4 # Abbrev [4] 0x34:0x1b DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_call_all_calls + .long 79 # DW_AT_abstract_origin + .byte 5 # Abbrev [5] 0x40:0x7 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 85 + .long 84 # DW_AT_abstract_origin + .byte 5 # Abbrev [5] 0x47:0x7 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 84 + .long 92 # DW_AT_abstract_origin + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x4f:0x16 DW_TAG_subprogram + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + # DW_AT_inline + .byte 7 # Abbrev [7] 0x54:0x8 DW_TAG_formal_parameter + .byte 0 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 101 # DW_AT_type + .byte 7 # Abbrev [7] 0x5c:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 101 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 8 # Abbrev [8] 0x65:0x5 DW_TAG_pointer_type + .long 37 # DW_AT_type + .byte 9 # Abbrev [9] 0x6a:0x35 DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_call_all_calls + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_external + .byte 10 # Abbrev [10] 0x79:0x9 DW_TAG_formal_parameter + .byte 0 # DW_AT_location + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 10 # Abbrev [10] 0x82:0x9 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 179 # DW_AT_type + .byte 11 # Abbrev [11] 0x8b:0xd DW_TAG_inlined_subroutine + .long 79 # DW_AT_abstract_origin + .byte 3 # DW_AT_low_pc + .long .Ltmp3-.Lfunc_begin1 # DW_AT_high_pc + .byte 0 # DW_AT_call_file + .byte 12 # DW_AT_call_line + .byte 4 # DW_AT_call_column + .byte 12 # Abbrev [12] 0x98:0x6 DW_TAG_call_site + .long 159 # DW_AT_call_origin + # DW_AT_call_tail_call + .byte 4 # DW_AT_call_pc + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x9f:0x14 DW_TAG_subprogram + .byte 5 # DW_AT_linkage_name + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 6 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0xa8:0x5 DW_TAG_formal_parameter + .long 37 # DW_AT_type + .byte 14 # Abbrev [14] 0xad:0x5 DW_TAG_formal_parameter + .long 37 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 8 # Abbrev [8] 0xb3:0x5 DW_TAG_pointer_type + .long 184 # DW_AT_type + .byte 8 # Abbrev [8] 0xb8:0x5 DW_TAG_pointer_type + .long 189 # DW_AT_type + .byte 3 # Abbrev [3] 0xbd:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 122 # DW_AT_call_all_calls + .byte 25 # DW_FORM_flag_present + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 32 # DW_AT_inline + .byte 33 # DW_FORM_implicit_const + .byte 1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 122 # DW_AT_call_all_calls + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 34 # DW_FORM_loclistx + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 0 # DW_CHILDREN_no + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 12 # Abbreviation Code + .byte 72 # DW_TAG_call_site + .byte 0 # DW_CHILDREN_no + .byte 127 # DW_AT_call_origin + .byte 19 # DW_FORM_ref4 + .ascii "\202\001" # DW_AT_call_tail_call + .byte 25 # DW_FORM_flag_present + .ascii "\201\001" # DW_AT_call_pc + .byte 27 # DW_FORM_addrx + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 13 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 14 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad x + .quad y + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .quad .Ltmp4 +.Ldebug_addr_end0: + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-df-dualcu-main.ll b/bolt/test/X86/Inputs/dwarf5-df-dualcu-main.ll deleted file mode 100644 index 99651eb65525b9..00000000000000 --- a/bolt/test/X86/Inputs/dwarf5-df-dualcu-main.ll +++ /dev/null @@ -1,129 +0,0 @@ -; clang++ -g -gdwarf-5 -emit-llvm -S main.cpp -; void use(int * x, int * y) { -; *x += 4; -; *y -= 2; -; } -; -; int helper(int z_, int d_); -; int x = 0; -; int y = 1; -; int main(int argc, char *argv[]) { -; x = argc; -; y = argc + 3; -; use(&x, &y); -; return helper(x, y); -; } - -; ModuleID = 'main.cpp' -source_filename = "main.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@x = dso_local global i32 0, align 4, !dbg !0 -@y = dso_local global i32 1, align 4, !dbg !5 - -; Function Attrs: mustprogress noinline nounwind optnone uwtable -define dso_local void @_Z3usePiS_(i32* noundef %x, i32* noundef %y) #0 !dbg !14 { -entry: - %x.addr = alloca i32*, align 8 - %y.addr = alloca i32*, align 8 - store i32* %x, i32** %x.addr, align 8 - call void @llvm.dbg.declare(metadata i32** %x.addr, metadata !19, metadata !DIExpression()), !dbg !20 - store i32* %y, i32** %y.addr, align 8 - call void @llvm.dbg.declare(metadata i32** %y.addr, metadata !21, metadata !DIExpression()), !dbg !22 - %0 = load i32*, i32** %x.addr, align 8, !dbg !23 - %1 = load i32, i32* %0, align 4, !dbg !24 - %add = add nsw i32 %1, 4, !dbg !24 - store i32 %add, i32* %0, align 4, !dbg !24 - %2 = load i32*, i32** %y.addr, align 8, !dbg !25 - %3 = load i32, i32* %2, align 4, !dbg !26 - %sub = sub nsw i32 %3, 2, !dbg !26 - store i32 %sub, i32* %2, align 4, !dbg !26 - ret void, !dbg !27 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -; Function Attrs: mustprogress noinline norecurse optnone uwtable -define dso_local noundef i32 @main(i32 noundef %argc, i8** noundef %argv) #2 !dbg !28 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !34, metadata !DIExpression()), !dbg !35 - store i8** %argv, i8*** %argv.addr, align 8 - call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !36, metadata !DIExpression()), !dbg !37 - %0 = load i32, i32* %argc.addr, align 4, !dbg !38 - store i32 %0, i32* @x, align 4, !dbg !39 - %1 = load i32, i32* %argc.addr, align 4, !dbg !40 - %add = add nsw i32 %1, 3, !dbg !41 - store i32 %add, i32* @y, align 4, !dbg !42 - call void @_Z3usePiS_(i32* noundef @x, i32* noundef @y), !dbg !43 - %2 = load i32, i32* @x, align 4, !dbg !44 - %3 = load i32, i32* @y, align 4, !dbg !45 - %call = call noundef i32 @_Z6helperii(i32 noundef %2, i32 noundef %3), !dbg !46 - ret i32 %call, !dbg !47 -} - -declare dso_local noundef i32 @_Z6helperii(i32 noundef, i32 noundef) #3 - -attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } -attributes #2 = { mustprogress noinline norecurse optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #3 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11, !12} -!llvm.ident = !{!13} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 7, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "main.dwo", emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: GNU) -!3 = !DIFile(filename: "main.cpp", directory: ".", checksumkind: CSK_MD5, checksum: "1f627913a0daee879e00a3a51726f0ef") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "y", scope: !2, file: !3, line: 8, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 5} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{i32 7, !"frame-pointer", i32 2} -!13 = !{!"clang version 15.0.0"} -!14 = distinct !DISubprogram(name: "use", linkageName: "_Z3usePiS_", scope: !3, file: !3, line: 1, type: !15, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !18) -!15 = !DISubroutineType(types: !16) -!16 = !{null, !17, !17} -!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) -!18 = !{} -!19 = !DILocalVariable(name: "x", arg: 1, scope: !14, file: !3, line: 1, type: !17) -!20 = !DILocation(line: 1, column: 16, scope: !14) -!21 = !DILocalVariable(name: "y", arg: 2, scope: !14, file: !3, line: 1, type: !17) -!22 = !DILocation(line: 1, column: 25, scope: !14) -!23 = !DILocation(line: 2, column: 2, scope: !14) -!24 = !DILocation(line: 2, column: 4, scope: !14) -!25 = !DILocation(line: 3, column: 2, scope: !14) -!26 = !DILocation(line: 3, column: 4, scope: !14) -!27 = !DILocation(line: 4, column: 1, scope: !14) -!28 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 9, type: !29, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !18) -!29 = !DISubroutineType(types: !30) -!30 = !{!7, !7, !31} -!31 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !32, size: 64) -!32 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !33, size: 64) -!33 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!34 = !DILocalVariable(name: "argc", arg: 1, scope: !28, file: !3, line: 9, type: !7) -!35 = !DILocation(line: 9, column: 15, scope: !28) -!36 = !DILocalVariable(name: "argv", arg: 2, scope: !28, file: !3, line: 9, type: !31) -!37 = !DILocation(line: 9, column: 27, scope: !28) -!38 = !DILocation(line: 10, column: 8, scope: !28) -!39 = !DILocation(line: 10, column: 6, scope: !28) -!40 = !DILocation(line: 11, column: 8, scope: !28) -!41 = !DILocation(line: 11, column: 13, scope: !28) -!42 = !DILocation(line: 11, column: 6, scope: !28) -!43 = !DILocation(line: 12, column: 4, scope: !28) -!44 = !DILocation(line: 13, column: 18, scope: !28) -!45 = !DILocation(line: 13, column: 21, scope: !28) -!46 = !DILocation(line: 13, column: 11, scope: !28) -!47 = !DILocation(line: 13, column: 4, scope: !28) diff --git a/bolt/test/X86/Inputs/dwarf5-df-dualcu-main.s b/bolt/test/X86/Inputs/dwarf5-df-dualcu-main.s new file mode 100644 index 00000000000000..0492fdd236b579 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-df-dualcu-main.s @@ -0,0 +1,476 @@ +# clang++ -g -gdwarf-5 -emit-llvm -S main.cpp +# llc -O0 -mtriple=x86_64-unknown-linux-gnu main.ll +# void use(int * x, int * y) { +# *x += 4; +# *y -= 2; +# } +# +# int helper(int z_, int d_); +# int x = 0; +# int y = 1; +# int main(int argc, char *argv[]) { +# x = argc; +# y = argc + 3; +# use(&x, &y); +# return helper(x, y); +# } + + .text + .file "main.cpp" + .file 0 "." "main.cpp" md5 0x1f627913a0daee879e00a3a51726f0ef + .globl _Z3usePiS_ # -- Begin function _Z3usePiS_ + .p2align 4, 0x90 + .type _Z3usePiS_,@function +_Z3usePiS_: # @_Z3usePiS_ +.Lfunc_begin0: + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp0: + .loc 0 2 2 prologue_end # main.cpp:2:2 + movq -8(%rbp), %rax + .loc 0 2 4 is_stmt 0 # main.cpp:2:4 + movl (%rax), %ecx + addl $4, %ecx + movl %ecx, (%rax) + .loc 0 3 2 is_stmt 1 # main.cpp:3:2 + movq -16(%rbp), %rax + .loc 0 3 4 is_stmt 0 # main.cpp:3:4 + movl (%rax), %ecx + subl $2, %ecx + movl %ecx, (%rax) + .loc 0 4 1 is_stmt 1 # main.cpp:4:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size _Z3usePiS_, .Lfunc_end0-_Z3usePiS_ + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 9 0 # main.cpp:9:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp2: + .loc 0 10 8 prologue_end # main.cpp:10:8 + movl -8(%rbp), %eax + .loc 0 10 6 is_stmt 0 # main.cpp:10:6 + movl %eax, x + .loc 0 11 8 is_stmt 1 # main.cpp:11:8 + movl -8(%rbp), %eax + .loc 0 11 13 is_stmt 0 # main.cpp:11:13 + addl $3, %eax + .loc 0 11 6 # main.cpp:11:6 + movl %eax, y + .loc 0 12 4 is_stmt 1 # main.cpp:12:4 + movabsq $x, %rdi + movabsq $y, %rsi + callq _Z3usePiS_ + .loc 0 13 18 # main.cpp:13:18 + movl x, %edi + .loc 0 13 21 is_stmt 0 # main.cpp:13:21 + movl y, %esi + .loc 0 13 11 # main.cpp:13:11 + callq _Z6helperii + .loc 0 13 4 # main.cpp:13:4 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp3: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .type x,@object # @x + .bss + .globl x + .p2align 2 +x: + .long 0 # 0x0 + .size x, 4 + + .type y,@object # @y + .data + .globl y + .p2align 2 +y: + .long 1 # 0x1 + .size y, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_skeleton_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .ascii "\264B" # DW_AT_GNU_pubnames + .byte 25 # DW_FORM_flag_present + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad 2452897014735893912 + .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .byte 0 # DW_AT_comp_dir + # DW_AT_GNU_pubnames + .byte 1 # DW_AT_dwo_name + .byte 2 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 12 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "main.dwo" # string offset=2 + .section .debug_str_offsets,"",@progbits + .long .Lskel_string0 + .long .Lskel_string1 + .section .debug_str_offsets.dwo,"e",@progbits + .long 52 # Length of String Offsets Set + .short 5 + .short 0 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "x" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "y" # string offset=6 +.Linfo_string3: + .asciz "_Z3usePiS_" # string offset=8 +.Linfo_string4: + .asciz "use" # string offset=19 +.Linfo_string5: + .asciz "main" # string offset=23 +.Linfo_string6: + .asciz "argc" # string offset=28 +.Linfo_string7: + .asciz "argv" # string offset=33 +.Linfo_string8: + .asciz "char" # string offset=38 +.Linfo_string9: + .asciz "clang version 15.0.0" # string offset=43 +.Linfo_string10: + .asciz "main.cpp" # string offset=64 +.Linfo_string11: + .asciz "main.dwo" # string offset=73 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 19 + .long 23 + .long 28 + .long 33 + .long 38 + .long 43 + .long 64 + .long 73 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad 2452897014735893912 + .byte 1 # Abbrev [1] 0x14:0x7d DW_TAG_compile_unit + .byte 9 # DW_AT_producer + .short 33 # DW_AT_language + .byte 10 # DW_AT_name + .byte 11 # DW_AT_dwo_name + .byte 2 # Abbrev [2] 0x1a:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 0 + .byte 3 # Abbrev [3] 0x25:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x29:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 1 + .byte 4 # Abbrev [4] 0x34:0x23 DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 5 # Abbrev [5] 0x40:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 0 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 125 # DW_AT_type + .byte 5 # Abbrev [5] 0x4b:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 2 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 125 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x57:0x26 DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 5 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x66:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 5 # Abbrev [5] 0x71:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 130 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x7d:0x5 DW_TAG_pointer_type + .long 37 # DW_AT_type + .byte 7 # Abbrev [7] 0x82:0x5 DW_TAG_pointer_type + .long 135 # DW_AT_type + .byte 7 # Abbrev [7] 0x87:0x5 DW_TAG_pointer_type + .long 140 # DW_AT_type + .byte 3 # Abbrev [3] 0x8c:0x4 DW_TAG_base_type + .byte 8 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad x + .quad y + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 +.Ldebug_addr_end0: + .section .debug_gnu_pubnames,"",@progbits + .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info +.LpubNames_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 87 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "main" # External Name + .long 26 # DIE offset + .byte 32 # Attributes: VARIABLE, EXTERNAL + .asciz "x" # External Name + .long 41 # DIE offset + .byte 32 # Attributes: VARIABLE, EXTERNAL + .asciz "y" # External Name + .long 52 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "use" # External Name + .long 0 # End Mark +.LpubNames_end0: + .section .debug_gnu_pubtypes,"",@progbits + .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info +.LpubTypes_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 37 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "int" # External Name + .long 140 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "char" # External Name + .long 0 # End Mark +.LpubTypes_end0: + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-df-mono-helper.ll b/bolt/test/X86/Inputs/dwarf5-df-mono-helper.ll deleted file mode 100644 index c69f64f37ebd16..00000000000000 --- a/bolt/test/X86/Inputs/dwarf5-df-mono-helper.ll +++ /dev/null @@ -1,81 +0,0 @@ -; clang++ -g -gdwarf-5 -emit-llvm -S helper.cpp -; int z = 0; -; int d = 0; -; -; int helper(int z_, int d_) { -; z += z_; -; d += d_; -; return z * d; -; } - -; ModuleID = 'helper.cpp' -source_filename = "helper.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@z = dso_local global i32 0, align 4, !dbg !0 -@d = dso_local global i32 0, align 4, !dbg !5 - -; Function Attrs: mustprogress noinline nounwind optnone uwtable -define dso_local noundef i32 @_Z6helperii(i32 noundef %z_, i32 noundef %d_) #0 !dbg !14 { -entry: - %z_.addr = alloca i32, align 4 - %d_.addr = alloca i32, align 4 - store i32 %z_, i32* %z_.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %z_.addr, metadata !18, metadata !DIExpression()), !dbg !19 - store i32 %d_, i32* %d_.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %d_.addr, metadata !20, metadata !DIExpression()), !dbg !21 - %0 = load i32, i32* %z_.addr, align 4, !dbg !22 - %1 = load i32, i32* @z, align 4, !dbg !23 - %add = add nsw i32 %1, %0, !dbg !23 - store i32 %add, i32* @z, align 4, !dbg !23 - %2 = load i32, i32* %d_.addr, align 4, !dbg !24 - %3 = load i32, i32* @d, align 4, !dbg !25 - %add1 = add nsw i32 %3, %2, !dbg !25 - store i32 %add1, i32* @d, align 4, !dbg !25 - %4 = load i32, i32* @z, align 4, !dbg !26 - %5 = load i32, i32* @d, align 4, !dbg !27 - %mul = mul nsw i32 %4, %5, !dbg !28 - ret i32 %mul, !dbg !29 -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11, !12} -!llvm.ident = !{!13} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "z", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "helper.cpp", directory: ".", checksumkind: CSK_MD5, checksum: "e635924a35b65444173d0c76a54b866f") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "d", scope: !2, file: !3, line: 2, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 5} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{i32 7, !"frame-pointer", i32 2} -!13 = !{!"clang version 15.0.0"} -!14 = distinct !DISubprogram(name: "helper", linkageName: "_Z6helperii", scope: !3, file: !3, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !17) -!15 = !DISubroutineType(types: !16) -!16 = !{!7, !7, !7} -!17 = !{} -!18 = !DILocalVariable(name: "z_", arg: 1, scope: !14, file: !3, line: 4, type: !7) -!19 = !DILocation(line: 4, column: 16, scope: !14) -!20 = !DILocalVariable(name: "d_", arg: 2, scope: !14, file: !3, line: 4, type: !7) -!21 = !DILocation(line: 4, column: 24, scope: !14) -!22 = !DILocation(line: 5, column: 7, scope: !14) -!23 = !DILocation(line: 5, column: 4, scope: !14) -!24 = !DILocation(line: 6, column: 7, scope: !14) -!25 = !DILocation(line: 6, column: 4, scope: !14) -!26 = !DILocation(line: 7, column: 9, scope: !14) -!27 = !DILocation(line: 7, column: 13, scope: !14) -!28 = !DILocation(line: 7, column: 11, scope: !14) -!29 = !DILocation(line: 7, column: 2, scope: !14) diff --git a/bolt/test/X86/Inputs/dwarf5-df-mono-helper.s b/bolt/test/X86/Inputs/dwarf5-df-mono-helper.s new file mode 100644 index 00000000000000..fbb25ff149ae91 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-df-mono-helper.s @@ -0,0 +1,281 @@ +# clang++ -g -gdwarf-5 -emit-llvm -S helper.cpp +# llc -O0 -mtriple=x86_64-unknown-linux-gnu helper.ll +# int z = 0; +# int d = 0; +# +# int helper(int z_, int d_) { +# z += z_; +# d += d_; +# return z * d; +# } + + .text + .file "helper.cpp" + .file 0 "." "helper.cpp" md5 0xe635924a35b65444173d0c76a54b866f + .globl _Z6helperii # -- Begin function _Z6helperii + .p2align 4, 0x90 + .type _Z6helperii,@function +_Z6helperii: # @_Z6helperii +.Lfunc_begin0: + .loc 0 4 0 # helper.cpp:4:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) + movl %esi, -8(%rbp) +.Ltmp0: + .loc 0 5 7 prologue_end # helper.cpp:5:7 + movl -4(%rbp), %eax + .loc 0 5 4 is_stmt 0 # helper.cpp:5:4 + addl z, %eax + movl %eax, z + .loc 0 6 7 is_stmt 1 # helper.cpp:6:7 + movl -8(%rbp), %eax + .loc 0 6 4 is_stmt 0 # helper.cpp:6:4 + addl d, %eax + movl %eax, d + .loc 0 7 9 is_stmt 1 # helper.cpp:7:9 + movl z, %eax + .loc 0 7 11 is_stmt 0 # helper.cpp:7:11 + imull d, %eax + .loc 0 7 2 # helper.cpp:7:2 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size _Z6helperii, .Lfunc_end0-_Z6helperii + .cfi_endproc + # -- End function + .type z,@object # @z + .bss + .globl z + .p2align 2 +z: + .long 0 # 0x0 + .size z, 4 + + .type d,@object # @d + .globl d + .p2align 2 +d: + .long 0 # 0x0 + .size d, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x59 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0xb DW_TAG_variable + .byte 3 # DW_AT_name + .long 46 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 0 + .byte 3 # Abbrev [3] 0x2e:0x4 DW_TAG_base_type + .byte 4 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x32:0xb DW_TAG_variable + .byte 5 # DW_AT_name + .long 46 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 1 + .byte 4 # Abbrev [4] 0x3d:0x27 DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 6 # DW_AT_linkage_name + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 46 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x4d:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 46 # DW_AT_type + .byte 5 # Abbrev [5] 0x58:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 46 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 44 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 15.0.0" # string offset=0 +.Linfo_string1: + .asciz "helper.cpp" # string offset=21 +.Linfo_string2: + .asciz "." # string offset=32 +.Linfo_string3: + .asciz "z" # string offset=34 +.Linfo_string4: + .asciz "int" # string offset=36 +.Linfo_string5: + .asciz "d" # string offset=40 +.Linfo_string6: + .asciz "_Z6helperii" # string offset=42 +.Linfo_string7: + .asciz "helper" # string offset=54 +.Linfo_string8: + .asciz "z_" # string offset=61 +.Linfo_string9: + .asciz "d_" # string offset=64 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad z + .quad d + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-df-mono-main.ll b/bolt/test/X86/Inputs/dwarf5-df-mono-main.ll deleted file mode 100644 index 3fa28c0848a983..00000000000000 --- a/bolt/test/X86/Inputs/dwarf5-df-mono-main.ll +++ /dev/null @@ -1,129 +0,0 @@ -; clang++ -g -gdwarf-5 -gsplit-dwarf=split -emit-llvm -S main.cpp -; void use(int * x, int * y) { -; *x += 4; -; *y -= 2; -; } -; -; int helper(int z_, int d_); -; int x = 0; -; int y = 1; -; int main(int argc, char *argv[]) { -; x = argc; -; y = argc + 3; -; use(&x, &y); -; return helper(x, y); -; } - -; ModuleID = 'main.cpp' -source_filename = "main.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@x = dso_local global i32 0, align 4, !dbg !0 -@y = dso_local global i32 1, align 4, !dbg !5 - -; Function Attrs: mustprogress noinline nounwind optnone uwtable -define dso_local void @_Z3usePiS_(i32* noundef %x, i32* noundef %y) #0 !dbg !14 { -entry: - %x.addr = alloca i32*, align 8 - %y.addr = alloca i32*, align 8 - store i32* %x, i32** %x.addr, align 8 - call void @llvm.dbg.declare(metadata i32** %x.addr, metadata !19, metadata !DIExpression()), !dbg !20 - store i32* %y, i32** %y.addr, align 8 - call void @llvm.dbg.declare(metadata i32** %y.addr, metadata !21, metadata !DIExpression()), !dbg !22 - %0 = load i32*, i32** %x.addr, align 8, !dbg !23 - %1 = load i32, i32* %0, align 4, !dbg !24 - %add = add nsw i32 %1, 4, !dbg !24 - store i32 %add, i32* %0, align 4, !dbg !24 - %2 = load i32*, i32** %y.addr, align 8, !dbg !25 - %3 = load i32, i32* %2, align 4, !dbg !26 - %sub = sub nsw i32 %3, 2, !dbg !26 - store i32 %sub, i32* %2, align 4, !dbg !26 - ret void, !dbg !27 -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -; Function Attrs: mustprogress noinline norecurse optnone uwtable -define dso_local noundef i32 @main(i32 noundef %argc, i8** noundef %argv) #2 !dbg !28 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !34, metadata !DIExpression()), !dbg !35 - store i8** %argv, i8*** %argv.addr, align 8 - call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !36, metadata !DIExpression()), !dbg !37 - %0 = load i32, i32* %argc.addr, align 4, !dbg !38 - store i32 %0, i32* @x, align 4, !dbg !39 - %1 = load i32, i32* %argc.addr, align 4, !dbg !40 - %add = add nsw i32 %1, 3, !dbg !41 - store i32 %add, i32* @y, align 4, !dbg !42 - call void @_Z3usePiS_(i32* noundef @x, i32* noundef @y), !dbg !43 - %2 = load i32, i32* @x, align 4, !dbg !44 - %3 = load i32, i32* @y, align 4, !dbg !45 - %call = call noundef i32 @_Z6helperii(i32 noundef %2, i32 noundef %3), !dbg !46 - ret i32 %call, !dbg !47 -} - -declare dso_local noundef i32 @_Z6helperii(i32 noundef, i32 noundef) #3 - -attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } -attributes #2 = { mustprogress noinline norecurse optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #3 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11, !12} -!llvm.ident = !{!13} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 7, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "main.dwo", emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: GNU) -!3 = !DIFile(filename: "main.cpp", directory: ".", checksumkind: CSK_MD5, checksum: "1f627913a0daee879e00a3a51726f0ef") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "y", scope: !2, file: !3, line: 8, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 5} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{i32 7, !"frame-pointer", i32 2} -!13 = !{!"clang version 15.0.0"} -!14 = distinct !DISubprogram(name: "use", linkageName: "_Z3usePiS_", scope: !3, file: !3, line: 1, type: !15, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !18) -!15 = !DISubroutineType(types: !16) -!16 = !{null, !17, !17} -!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) -!18 = !{} -!19 = !DILocalVariable(name: "x", arg: 1, scope: !14, file: !3, line: 1, type: !17) -!20 = !DILocation(line: 1, column: 16, scope: !14) -!21 = !DILocalVariable(name: "y", arg: 2, scope: !14, file: !3, line: 1, type: !17) -!22 = !DILocation(line: 1, column: 25, scope: !14) -!23 = !DILocation(line: 2, column: 2, scope: !14) -!24 = !DILocation(line: 2, column: 4, scope: !14) -!25 = !DILocation(line: 3, column: 2, scope: !14) -!26 = !DILocation(line: 3, column: 4, scope: !14) -!27 = !DILocation(line: 4, column: 1, scope: !14) -!28 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 9, type: !29, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !18) -!29 = !DISubroutineType(types: !30) -!30 = !{!7, !7, !31} -!31 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !32, size: 64) -!32 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !33, size: 64) -!33 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!34 = !DILocalVariable(name: "argc", arg: 1, scope: !28, file: !3, line: 9, type: !7) -!35 = !DILocation(line: 9, column: 15, scope: !28) -!36 = !DILocalVariable(name: "argv", arg: 2, scope: !28, file: !3, line: 9, type: !31) -!37 = !DILocation(line: 9, column: 27, scope: !28) -!38 = !DILocation(line: 10, column: 8, scope: !28) -!39 = !DILocation(line: 10, column: 6, scope: !28) -!40 = !DILocation(line: 11, column: 8, scope: !28) -!41 = !DILocation(line: 11, column: 13, scope: !28) -!42 = !DILocation(line: 11, column: 6, scope: !28) -!43 = !DILocation(line: 12, column: 4, scope: !28) -!44 = !DILocation(line: 13, column: 18, scope: !28) -!45 = !DILocation(line: 13, column: 21, scope: !28) -!46 = !DILocation(line: 13, column: 11, scope: !28) -!47 = !DILocation(line: 13, column: 4, scope: !28) diff --git a/bolt/test/X86/Inputs/dwarf5-df-mono-main.s b/bolt/test/X86/Inputs/dwarf5-df-mono-main.s new file mode 100644 index 00000000000000..bbcba0fb8abb41 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-df-mono-main.s @@ -0,0 +1,476 @@ +# clang++ -g -gdwarf-5 -gsplit-dwarf=split -emit-llvm -S main.cpp +# llc -O0 -mtriple=x86_64-unknown-linux-gnu main.ll +# void use(int * x, int * y) { +# *x += 4; +# *y -= 2; +# } +# +# int helper(int z_, int d_); +# int x = 0; +# int y = 1; +# int main(int argc, char *argv[]) { +# x = argc; +# y = argc + 3; +# use(&x, &y); +# return helper(x, y); +# } + + .text + .file "main.cpp" + .file 0 "." "main.cpp" md5 0x1f627913a0daee879e00a3a51726f0ef + .globl _Z3usePiS_ # -- Begin function _Z3usePiS_ + .p2align 4, 0x90 + .type _Z3usePiS_,@function +_Z3usePiS_: # @_Z3usePiS_ +.Lfunc_begin0: + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp0: + .loc 0 2 2 prologue_end # main.cpp:2:2 + movq -8(%rbp), %rax + .loc 0 2 4 is_stmt 0 # main.cpp:2:4 + movl (%rax), %ecx + addl $4, %ecx + movl %ecx, (%rax) + .loc 0 3 2 is_stmt 1 # main.cpp:3:2 + movq -16(%rbp), %rax + .loc 0 3 4 is_stmt 0 # main.cpp:3:4 + movl (%rax), %ecx + subl $2, %ecx + movl %ecx, (%rax) + .loc 0 4 1 is_stmt 1 # main.cpp:4:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size _Z3usePiS_, .Lfunc_end0-_Z3usePiS_ + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 9 0 # main.cpp:9:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp2: + .loc 0 10 8 prologue_end # main.cpp:10:8 + movl -8(%rbp), %eax + .loc 0 10 6 is_stmt 0 # main.cpp:10:6 + movl %eax, x + .loc 0 11 8 is_stmt 1 # main.cpp:11:8 + movl -8(%rbp), %eax + .loc 0 11 13 is_stmt 0 # main.cpp:11:13 + addl $3, %eax + .loc 0 11 6 # main.cpp:11:6 + movl %eax, y + .loc 0 12 4 is_stmt 1 # main.cpp:12:4 + movabsq $x, %rdi + movabsq $y, %rsi + callq _Z3usePiS_ + .loc 0 13 18 # main.cpp:13:18 + movl x, %edi + .loc 0 13 21 is_stmt 0 # main.cpp:13:21 + movl y, %esi + .loc 0 13 11 # main.cpp:13:11 + callq _Z6helperii + .loc 0 13 4 # main.cpp:13:4 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp3: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .type x,@object # @x + .bss + .globl x + .p2align 2 +x: + .long 0 # 0x0 + .size x, 4 + + .type y,@object # @y + .data + .globl y + .p2align 2 +y: + .long 1 # 0x1 + .size y, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_skeleton_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .ascii "\264B" # DW_AT_GNU_pubnames + .byte 25 # DW_FORM_flag_present + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad 2452897014735893912 + .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .byte 0 # DW_AT_comp_dir + # DW_AT_GNU_pubnames + .byte 1 # DW_AT_dwo_name + .byte 2 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 12 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "main.dwo" # string offset=2 + .section .debug_str_offsets,"",@progbits + .long .Lskel_string0 + .long .Lskel_string1 + .section .debug_str_offsets.dwo,"e",@progbits + .long 52 # Length of String Offsets Set + .short 5 + .short 0 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "x" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "y" # string offset=6 +.Linfo_string3: + .asciz "_Z3usePiS_" # string offset=8 +.Linfo_string4: + .asciz "use" # string offset=19 +.Linfo_string5: + .asciz "main" # string offset=23 +.Linfo_string6: + .asciz "argc" # string offset=28 +.Linfo_string7: + .asciz "argv" # string offset=33 +.Linfo_string8: + .asciz "char" # string offset=38 +.Linfo_string9: + .asciz "clang version 15.0.0" # string offset=43 +.Linfo_string10: + .asciz "main.cpp" # string offset=64 +.Linfo_string11: + .asciz "main.dwo" # string offset=73 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 19 + .long 23 + .long 28 + .long 33 + .long 38 + .long 43 + .long 64 + .long 73 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad 2452897014735893912 + .byte 1 # Abbrev [1] 0x14:0x7d DW_TAG_compile_unit + .byte 9 # DW_AT_producer + .short 33 # DW_AT_language + .byte 10 # DW_AT_name + .byte 11 # DW_AT_dwo_name + .byte 2 # Abbrev [2] 0x1a:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 0 + .byte 3 # Abbrev [3] 0x25:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x29:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 1 + .byte 4 # Abbrev [4] 0x34:0x23 DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 5 # Abbrev [5] 0x40:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 0 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 125 # DW_AT_type + .byte 5 # Abbrev [5] 0x4b:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 2 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 125 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x57:0x26 DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 5 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x66:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 5 # Abbrev [5] 0x71:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 130 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x7d:0x5 DW_TAG_pointer_type + .long 37 # DW_AT_type + .byte 7 # Abbrev [7] 0x82:0x5 DW_TAG_pointer_type + .long 135 # DW_AT_type + .byte 7 # Abbrev [7] 0x87:0x5 DW_TAG_pointer_type + .long 140 # DW_AT_type + .byte 3 # Abbrev [3] 0x8c:0x4 DW_TAG_base_type + .byte 8 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad x + .quad y + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 +.Ldebug_addr_end0: + .section .debug_gnu_pubnames,"",@progbits + .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info +.LpubNames_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 87 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "main" # External Name + .long 26 # DIE offset + .byte 32 # Attributes: VARIABLE, EXTERNAL + .asciz "x" # External Name + .long 41 # DIE offset + .byte 32 # Attributes: VARIABLE, EXTERNAL + .asciz "y" # External Name + .long 52 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "use" # External Name + .long 0 # End Mark +.LpubNames_end0: + .section .debug_gnu_pubtypes,"",@progbits + .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info +.LpubTypes_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 37 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "int" # External Name + .long 140 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "char" # External Name + .long 0 # End Mark +.LpubTypes_end0: + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-locaddrx.s b/bolt/test/X86/Inputs/dwarf5-locaddrx.s new file mode 100644 index 00000000000000..46c99f28ba8faa --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-locaddrx.s @@ -0,0 +1,473 @@ +# clang++ -g -gdwarf-5 -emit-llvm -S mainlocadddrx.cpp +# llc -O0 -mtriple=x86_64-unknown-linux-gnu mainlocadddrx.ll +# void use(int * x, int * y) { +# *x += 4; +# *y -= 2; +# } +# +# int x = 0; +# int y = 1; +# int main(int argc, char *argv[]) { +# x = argc; +# y = argc + 3; +# use(&x, &y); +# return x + y; +# } + + .text + .file "mainlocadddrx.cpp" + .file 0 "." "mainlocadddrx.cpp" md5 0xd4fd79ce0087c4cefd089752bf2182c6 + .globl _Z3usePiS_ # -- Begin function _Z3usePiS_ + .p2align 4, 0x90 + .type _Z3usePiS_,@function +_Z3usePiS_: # @_Z3usePiS_ +.Lfunc_begin0: + .loc 0 1 0 # mainlocadddrx.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp0: + .loc 0 2 2 prologue_end # mainlocadddrx.cpp:2:2 + movq -8(%rbp), %rax + .loc 0 2 4 is_stmt 0 # mainlocadddrx.cpp:2:4 + movl (%rax), %ecx + addl $4, %ecx + movl %ecx, (%rax) + .loc 0 3 2 is_stmt 1 # mainlocadddrx.cpp:3:2 + movq -16(%rbp), %rax + .loc 0 3 4 is_stmt 0 # mainlocadddrx.cpp:3:4 + movl (%rax), %ecx + subl $2, %ecx + movl %ecx, (%rax) + .loc 0 4 1 is_stmt 1 # mainlocadddrx.cpp:4:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size _Z3usePiS_, .Lfunc_end0-_Z3usePiS_ + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 8 0 # mainlocadddrx.cpp:8:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp2: + .loc 0 9 8 prologue_end # mainlocadddrx.cpp:9:8 + movl -8(%rbp), %eax + .loc 0 9 6 is_stmt 0 # mainlocadddrx.cpp:9:6 + movl %eax, x + .loc 0 10 8 is_stmt 1 # mainlocadddrx.cpp:10:8 + movl -8(%rbp), %eax + .loc 0 10 13 is_stmt 0 # mainlocadddrx.cpp:10:13 + addl $3, %eax + .loc 0 10 6 # mainlocadddrx.cpp:10:6 + movl %eax, y + .loc 0 11 4 is_stmt 1 # mainlocadddrx.cpp:11:4 + movabsq $x, %rdi + movabsq $y, %rsi + callq _Z3usePiS_ + .loc 0 12 11 # mainlocadddrx.cpp:12:11 + movl x, %eax + .loc 0 12 13 is_stmt 0 # mainlocadddrx.cpp:12:13 + addl y, %eax + .loc 0 12 4 # mainlocadddrx.cpp:12:4 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp3: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .type x,@object # @x + .bss + .globl x + .p2align 2 +x: + .long 0 # 0x0 + .size x, 4 + + .type y,@object # @y + .data + .globl y + .p2align 2 +y: + .long 1 # 0x1 + .size y, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_skeleton_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .ascii "\264B" # DW_AT_GNU_pubnames + .byte 25 # DW_FORM_flag_present + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad 6364283817788887055 + .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .byte 0 # DW_AT_comp_dir + # DW_AT_GNU_pubnames + .byte 1 # DW_AT_dwo_name + .byte 2 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 12 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "mainlocadddrx.dwo" # string offset=2 + .section .debug_str_offsets,"",@progbits + .long .Lskel_string0 + .long .Lskel_string1 + .section .debug_str_offsets.dwo,"e",@progbits + .long 52 # Length of String Offsets Set + .short 5 + .short 0 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "x" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=2 +.Linfo_string2: + .asciz "y" # string offset=6 +.Linfo_string3: + .asciz "_Z3usePiS_" # string offset=8 +.Linfo_string4: + .asciz "use" # string offset=19 +.Linfo_string5: + .asciz "main" # string offset=23 +.Linfo_string6: + .asciz "argc" # string offset=28 +.Linfo_string7: + .asciz "argv" # string offset=33 +.Linfo_string8: + .asciz "char" # string offset=38 +.Linfo_string9: + .asciz "clang version 15.0.0" # string offset=43 +.Linfo_string10: + .asciz "mainlocadddrx.cpp" # string offset=64 +.Linfo_string11: + .asciz "mainlocadddrx.dwo" # string offset=82 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 2 + .long 6 + .long 8 + .long 19 + .long 23 + .long 28 + .long 33 + .long 38 + .long 43 + .long 64 + .long 82 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad 6364283817788887055 + .byte 1 # Abbrev [1] 0x14:0x7d DW_TAG_compile_unit + .byte 9 # DW_AT_producer + .short 33 # DW_AT_language + .byte 10 # DW_AT_name + .byte 11 # DW_AT_dwo_name + .byte 2 # Abbrev [2] 0x1a:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 6 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 0 + .byte 3 # Abbrev [3] 0x25:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 2 # Abbrev [2] 0x29:0xb DW_TAG_variable + .byte 2 # DW_AT_name + .long 37 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 161 + .byte 1 + .byte 4 # Abbrev [4] 0x34:0x23 DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 5 # Abbrev [5] 0x40:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 0 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 125 # DW_AT_type + .byte 5 # Abbrev [5] 0x4b:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 2 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 125 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x57:0x26 DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 5 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x66:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 5 # Abbrev [5] 0x71:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .long 130 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x7d:0x5 DW_TAG_pointer_type + .long 37 # DW_AT_type + .byte 7 # Abbrev [7] 0x82:0x5 DW_TAG_pointer_type + .long 135 # DW_AT_type + .byte 7 # Abbrev [7] 0x87:0x5 DW_TAG_pointer_type + .long 140 # DW_AT_type + .byte 3 # Abbrev [3] 0x8c:0x4 DW_TAG_base_type + .byte 8 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad x + .quad y + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 +.Ldebug_addr_end0: + .section .debug_gnu_pubnames,"",@progbits + .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info +.LpubNames_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 87 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "main" # External Name + .long 26 # DIE offset + .byte 32 # Attributes: VARIABLE, EXTERNAL + .asciz "x" # External Name + .long 41 # DIE offset + .byte 32 # Attributes: VARIABLE, EXTERNAL + .asciz "y" # External Name + .long 52 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "use" # External Name + .long 0 # End Mark +.LpubNames_end0: + .section .debug_gnu_pubtypes,"",@progbits + .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info +.LpubTypes_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 37 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "int" # External Name + .long 140 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "char" # External Name + .long 0 # End Mark +.LpubTypes_end0: + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/dwarf4-df-dualcu-loclist.test b/bolt/test/X86/dwarf4-df-dualcu-loclist.test index a011488b4eaa6b..010959b5e1e18c 100644 --- a/bolt/test/X86/dwarf4-df-dualcu-loclist.test +++ b/bolt/test/X86/dwarf4-df-dualcu-loclist.test @@ -1,10 +1,10 @@ ; RUN: rm -rf %t ; RUN: mkdir %t ; RUN: cd %t -; RUN: llc -split-dwarf-file=main.dwo -split-dwarf-output=main.dwo -O2 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: -filetype=obj %p/Inputs/dwarf4-df-dualcu-loclist-main.ll -o=main.o -; RUN: llc -split-dwarf-file=helper.dwo -split-dwarf-output=helper.dwo -O2 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: -filetype=obj %p/Inputs/dwarf4-df-dualcu-loclist-helper.ll -o=helper.o +; RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-df-dualcu-loclist-main.s \ +; RUN: -split-dwarf-file=main.dwo -o main.o +; RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-df-dualcu-loclist-helper.s \ +; RUN: -split-dwarf-file=helper.dwo -o helper.o ; RUN: %clang %cflags -gdwarf-5 -O2 -gsplit-dwarf=split main.o helper.o -o main.exe ; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.dwo | FileCheck -check-prefix=PRE-BOLT-DWO-MAIN %s diff --git a/bolt/test/X86/dwarf4-df-dualcu.test b/bolt/test/X86/dwarf4-df-dualcu.test index 7befeea78bbbc4..ec9d574d3fd60d 100644 --- a/bolt/test/X86/dwarf4-df-dualcu.test +++ b/bolt/test/X86/dwarf4-df-dualcu.test @@ -1,10 +1,10 @@ ; RUN: rm -rf %t ; RUN: mkdir %t ; RUN: cd %t -; RUN: llc -split-dwarf-file=main.dwo -split-dwarf-output=main.dwo -O0 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: -filetype=obj %p/Inputs/dwarf4-df-dualcu-main.ll -o=main.o -; RUN: llc -split-dwarf-file=helper.dwo -split-dwarf-output=helper.dwo -O0 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: -filetype=obj %p/Inputs/dwarf4-df-dualcu-helper.ll -o=helper.o +;; RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-df-dualcu-main.s \ +; RUN: -split-dwarf-file=main.dwo -o main.o +; RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-df-dualcu-helper.s \ +; RUN: -split-dwarf-file=helper.dwo -o helper.o ; RUN: %clang %cflags -gdwarf-5 -gsplit-dwarf=split main.o helper.o -o main.exe ; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe | FileCheck -check-prefix=PRE-BOLT %s diff --git a/bolt/test/X86/dwarf5-df-dualcu-loclist.test b/bolt/test/X86/dwarf5-df-dualcu-loclist.test index 9a0f0481c67b55..89d8fe7b4a8a68 100644 --- a/bolt/test/X86/dwarf5-df-dualcu-loclist.test +++ b/bolt/test/X86/dwarf5-df-dualcu-loclist.test @@ -1,10 +1,10 @@ ; RUN: rm -rf %t ; RUN: mkdir %t ; RUN: cd %t -; RUN: llc -split-dwarf-file=main.dwo -split-dwarf-output=main.dwo -O2 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: -filetype=obj %p/Inputs/dwarf5-df-dualcu-loclist-main.ll -o=main.o -; RUN: llc -split-dwarf-file=helper.dwo -split-dwarf-output=helper.dwo -O2 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: -filetype=obj %p/Inputs/dwarf5-df-dualcu-loclist-helper.ll -o=helper.o +; RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-df-dualcu-loclist-main.s \ +; RUN: -split-dwarf-file=main.dwo -o main.o +; RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-df-dualcu-loclist-helper.s \ +; RUN: -split-dwarf-file=helper.dwo -o helper.o ; RUN: %clang %cflags -gdwarf-5 -O2 -gsplit-dwarf=split main.o helper.o -o main.exe ; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.dwo | FileCheck -check-prefix=PRE-BOLT-DWO-MAIN %s diff --git a/bolt/test/X86/dwarf5-df-dualcu.test b/bolt/test/X86/dwarf5-df-dualcu.test index 2902900c6f26c1..966a9886e9413f 100644 --- a/bolt/test/X86/dwarf5-df-dualcu.test +++ b/bolt/test/X86/dwarf5-df-dualcu.test @@ -1,10 +1,10 @@ ; RUN: rm -rf %t ; RUN: mkdir %t ; RUN: cd %t -; RUN: llc -split-dwarf-file=main.dwo -split-dwarf-output=main.dwo -O0 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: -filetype=obj %p/Inputs/dwarf5-df-dualcu-main.ll -o=main.o -; RUN: llc -split-dwarf-file=helper.dwo -split-dwarf-output=helper.dwo -O0 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: -filetype=obj %p/Inputs/dwarf5-df-dualcu-helper.ll -o=helper.o +; RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-df-dualcu-main.s \ +; RUN: -split-dwarf-file=main.dwo -o main.o +; RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-df-dualcu-helper.s \ +; RUN: -split-dwarf-file=helper.dwo -o helper.o ; RUN: %clang %cflags -gdwarf-5 -gsplit-dwarf=split main.o helper.o -o main.exe ; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe | FileCheck -check-prefix=PRE-BOLT %s @@ -111,7 +111,6 @@ ; BOLT-DWO-MAIN-NEXT: DW_AT_frame_base [DW_FORM_exprloc] (DW_OP_reg6 RBP) ; BOLT-DWO-MAIN-NEXT: DW_AT_name [DW_FORM_strx1] (indexed (00000005) string = "main") - ; PRE-BOLT-DWO-HELPER: version = 0x0005 ; PRE-BOLT-DWO-HELPER: DW_TAG_variable [2] ; PRE-BOLT-DWO-HELPER-NEXT: DW_AT_name [DW_FORM_strx1] (indexed (00000000) string = "z") diff --git a/bolt/test/X86/dwarf5-df-mono-dualcu.test b/bolt/test/X86/dwarf5-df-mono-dualcu.test index 66bab5e5ae3ded..786b7311a05196 100644 --- a/bolt/test/X86/dwarf5-df-mono-dualcu.test +++ b/bolt/test/X86/dwarf5-df-mono-dualcu.test @@ -1,9 +1,9 @@ ; RUN: rm -rf %t ; RUN: mkdir %t ; RUN: cd %t -; RUN: llc -split-dwarf-file=main.dwo -split-dwarf-output=main.dwo -O0 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: -filetype=obj %p/Inputs/dwarf5-df-mono-main.ll -o=main.o -; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -filetype=obj %p/Inputs/dwarf5-df-mono-helper.ll -o=helper.o +; RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-df-mono-main.s \ +; RUN: -split-dwarf-file=main.dwo -o main.o +; RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux-gnu %p/Inputs/dwarf5-df-mono-helper.s -o=helper.o ; RUN: %clang %cflags -gdwarf-5 main.o helper.o -o main.exe ; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe | FileCheck -check-prefix=PRE-BOLT %s diff --git a/bolt/test/X86/dwarf5-locaddrx.test b/bolt/test/X86/dwarf5-locaddrx.test index 1004becdd7ba21..ee96dd96869364 100644 --- a/bolt/test/X86/dwarf5-locaddrx.test +++ b/bolt/test/X86/dwarf5-locaddrx.test @@ -1,7 +1,8 @@ ; RUN: rm -rf %t ; RUN: mkdir %t ; RUN: cd %t -; RUN: llc -split-dwarf-file=mainlocadddrx.dwo -split-dwarf-output=mainlocadddrx.dwo -O0 -mtriple=x86_64-unknown-linux-gnu -filetype=obj %s -o=mainlocadddrx.o +; RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-locaddrx.s \ +; RUN: -split-dwarf-file=mainlocadddrx.dwo -o mainlocadddrx.o ; RUN: %clang %cflags -gdwarf-5 -gsplit-dwarf=split mainlocadddrx.o -o mainlocadddrx.exe ; RUN: llvm-bolt mainlocadddrx.exe -o mainlocadddrx.exe.bolt --update-debug-sections ; RUN: llvm-dwarfdump --show-form --verbose --debug-info mainlocadddrx.exe | FileCheck -check-prefix=PRE-BOLT %s @@ -82,128 +83,3 @@ ; BOLT-DWO-NEXT: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000003) ; BOLT-DWO-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000018 ; BOLT-DWO-NEXT: [0x0000000000000000, 0x0000000000000054)) - -; void use(int * x, int * y) { -; *x += 4; -; *y -= 2; -; } -; -; int x = 0; -; int y = 1; -; int main(int argc, char *argv[]) { -; x = argc; -; y = argc + 3; -; use(&x, &y); -; return x + y; -; } - -; ModuleID = 'mainlocadddrx.cpp' -source_filename = "mainlocadddrx.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@x = dso_local global i32 0, align 4, !dbg !0 -@y = dso_local global i32 1, align 4, !dbg !5 - -; Function Attrs: mustprogress noinline nounwind optnone uwtable -define dso_local void @_Z3usePiS_(i32* noundef %x, i32* noundef %y) #0 !dbg !14 { -entry: - %x.addr = alloca i32*, align 8 - %y.addr = alloca i32*, align 8 - store i32* %x, i32** %x.addr, align 8 - call void @llvm.dbg.declare(metadata i32** %x.addr, metadata !19, metadata !DIExpression()), !dbg !20 - store i32* %y, i32** %y.addr, align 8 - call void @llvm.dbg.declare(metadata i32** %y.addr, metadata !21, metadata !DIExpression()), !dbg !22 - %0 = load i32*, i32** %x.addr, align 8, !dbg !23 - %1 = load i32, i32* %0, align 4, !dbg !24 - %add = add nsw i32 %1, 4, !dbg !24 - store i32 %add, i32* %0, align 4, !dbg !24 - %2 = load i32*, i32** %y.addr, align 8, !dbg !25 - %3 = load i32, i32* %2, align 4, !dbg !26 - %sub = sub nsw i32 %3, 2, !dbg !26 - store i32 %sub, i32* %2, align 4, !dbg !26 - ret void, !dbg !27 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -; Function Attrs: mustprogress noinline norecurse nounwind optnone uwtable -define dso_local noundef i32 @main(i32 noundef %argc, i8** noundef %argv) #2 !dbg !28 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !34, metadata !DIExpression()), !dbg !35 - store i8** %argv, i8*** %argv.addr, align 8 - call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !36, metadata !DIExpression()), !dbg !37 - %0 = load i32, i32* %argc.addr, align 4, !dbg !38 - store i32 %0, i32* @x, align 4, !dbg !39 - %1 = load i32, i32* %argc.addr, align 4, !dbg !40 - %add = add nsw i32 %1, 3, !dbg !41 - store i32 %add, i32* @y, align 4, !dbg !42 - call void @_Z3usePiS_(i32* noundef @x, i32* noundef @y), !dbg !43 - %2 = load i32, i32* @x, align 4, !dbg !44 - %3 = load i32, i32* @y, align 4, !dbg !45 - %add1 = add nsw i32 %2, %3, !dbg !46 - ret i32 %add1, !dbg !47 -} - -attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } -attributes #2 = { mustprogress noinline norecurse nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!8, !9, !10, !11, !12} -!llvm.ident = !{!13} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 6, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "mainlocadddrx.dwo", emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: GNU) -!3 = !DIFile(filename: "mainlocadddrx.cpp", directory: ".", checksumkind: CSK_MD5, checksum: "d4fd79ce0087c4cefd089752bf2182c6") -!4 = !{!0, !5} -!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) -!6 = distinct !DIGlobalVariable(name: "y", scope: !2, file: !3, line: 7, type: !7, isLocal: false, isDefinition: true) -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = !{i32 7, !"Dwarf Version", i32 5} -!9 = !{i32 2, !"Debug Info Version", i32 3} -!10 = !{i32 1, !"wchar_size", i32 4} -!11 = !{i32 7, !"uwtable", i32 2} -!12 = !{i32 7, !"frame-pointer", i32 2} -!13 = !{!"clang version 15.0.0"} -!14 = distinct !DISubprogram(name: "use", linkageName: "_Z3usePiS_", scope: !3, file: !3, line: 1, type: !15, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !18) -!15 = !DISubroutineType(types: !16) -!16 = !{null, !17, !17} -!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) -!18 = !{} -!19 = !DILocalVariable(name: "x", arg: 1, scope: !14, file: !3, line: 1, type: !17) -!20 = !DILocation(line: 1, column: 16, scope: !14) -!21 = !DILocalVariable(name: "y", arg: 2, scope: !14, file: !3, line: 1, type: !17) -!22 = !DILocation(line: 1, column: 25, scope: !14) -!23 = !DILocation(line: 2, column: 2, scope: !14) -!24 = !DILocation(line: 2, column: 4, scope: !14) -!25 = !DILocation(line: 3, column: 2, scope: !14) -!26 = !DILocation(line: 3, column: 4, scope: !14) -!27 = !DILocation(line: 4, column: 1, scope: !14) -!28 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 8, type: !29, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !18) -!29 = !DISubroutineType(types: !30) -!30 = !{!7, !7, !31} -!31 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !32, size: 64) -!32 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !33, size: 64) -!33 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!34 = !DILocalVariable(name: "argc", arg: 1, scope: !28, file: !3, line: 8, type: !7) -!35 = !DILocation(line: 8, column: 15, scope: !28) -!36 = !DILocalVariable(name: "argv", arg: 2, scope: !28, file: !3, line: 8, type: !31) -!37 = !DILocation(line: 8, column: 27, scope: !28) -!38 = !DILocation(line: 9, column: 8, scope: !28) -!39 = !DILocation(line: 9, column: 6, scope: !28) -!40 = !DILocation(line: 10, column: 8, scope: !28) -!41 = !DILocation(line: 10, column: 13, scope: !28) -!42 = !DILocation(line: 10, column: 6, scope: !28) -!43 = !DILocation(line: 11, column: 4, scope: !28) -!44 = !DILocation(line: 12, column: 11, scope: !28) -!45 = !DILocation(line: 12, column: 15, scope: !28) -!46 = !DILocation(line: 12, column: 13, scope: !28) -!47 = !DILocation(line: 12, column: 4, scope: !28) From bed9efed71b954047aa11d5ed02af433dd9971cf Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Thu, 19 May 2022 13:23:40 -0700 Subject: [PATCH 518/908] [MCDisassembler] Disambiguate Size parameter in tryAddingSymbolicOperand() MCSymbolizer::tryAddingSymbolicOperand() overloaded the Size parameter to specify either the instruction size or the operand size depending on the architecture. However, for proper symbolic disassembly on X86, we need to know both sizes, as an instruction can have two operands, and the instruction size cannot be reliably calculated based on the operand offset and its size. Hence, split Size into OpSize and InstSize. For X86, the new interface allows to fix a couple of issues: * Correctly adjust the value of PC-relative operands. * Set operand size to zero when the operand is specified implicitly. Differential Revision: https://reviews.llvm.org/D126101 --- llvm/include/llvm-c/DisassemblerTypes.h | 16 +- .../llvm/MC/MCDisassembler/MCDisassembler.h | 7 +- .../MC/MCDisassembler/MCExternalSymbolizer.h | 3 +- .../llvm/MC/MCDisassembler/MCSymbolizer.h | 3 +- llvm/lib/MC/MCDisassembler/MCDisassembler.cpp | 7 +- .../MCDisassembler/MCExternalSymbolizer.cpp | 16 +- .../Disassembler/AArch64Disassembler.cpp | 12 +- .../AArch64ExternalSymbolizer.cpp | 6 +- .../Disassembler/AArch64ExternalSymbolizer.h | 3 +- .../Disassembler/AMDGPUDisassembler.cpp | 10 +- .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 4 +- .../ARC/Disassembler/ARCDisassembler.cpp | 2 +- .../ARM/Disassembler/ARMDisassembler.cpp | 3 +- .../Disassembler/HexagonDisassembler.cpp | 3 +- .../Lanai/Disassembler/LanaiDisassembler.cpp | 2 +- .../Sparc/Disassembler/SparcDisassembler.cpp | 2 +- .../Disassembler/SystemZDisassembler.cpp | 2 +- .../X86/Disassembler/X86Disassembler.cpp | 18 ++- llvm/tools/llvm-objdump/MachODump.cpp | 15 +- llvm/unittests/MC/X86/CMakeLists.txt | 12 ++ .../MC/X86/X86MCDisassemblerTest.cpp | 145 ++++++++++++++++++ 21 files changed, 226 insertions(+), 65 deletions(-) create mode 100644 llvm/unittests/MC/X86/CMakeLists.txt create mode 100644 llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp diff --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h index 53baaef1103358..6999a350ec918e 100644 --- a/llvm/include/llvm-c/DisassemblerTypes.h +++ b/llvm/include/llvm-c/DisassemblerTypes.h @@ -38,15 +38,15 @@ typedef void *LLVMDisasmContextRef; * one operand with symbolic information. To determine the symbolic operand * information for each operand, the bytes for the specific operand in the * instruction are specified by the Offset parameter and its byte widith is the - * size parameter. For instructions sets with fixed widths and one symbolic - * operand per instruction, the Offset parameter will be zero and Size parameter - * will be the instruction width. The information is returned in TagBuf and is - * Triple specific with its specific information defined by the value of - * TagType for that Triple. If symbolic information is returned the function - * returns 1, otherwise it returns 0. + * OpSize parameter. For instructions sets with fixed widths and one symbolic + * operand per instruction, the Offset parameter will be zero and InstSize + * parameter will be the instruction width. The information is returned in + * TagBuf and is Triple specific with its specific information defined by the + * value of TagType for that Triple. If symbolic information is returned the + * function * returns 1, otherwise it returns 0. */ -typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, - uint64_t Offset, uint64_t Size, +typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, + uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf); /** diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h index 7060620b6bd4b7..de069ff95c2f1d 100644 --- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -181,10 +181,9 @@ class MCDisassembler { public: // Helpers around MCSymbolizer - bool tryAddingSymbolicOperand(MCInst &Inst, - int64_t Value, - uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) const; + bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) const; void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; diff --git a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h index 18aa781de0a3b3..8af3bb2296ec13 100644 --- a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h @@ -46,7 +46,8 @@ class MCExternalSymbolizer : public MCSymbolizer { bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) override; + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override; void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream, int64_t Value, uint64_t Address) override; diff --git a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h index 3e20e13c1b96fc..1efb63f1a14256 100644 --- a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h @@ -63,12 +63,13 @@ class MCSymbolizer { /// \param Address - Load address of the instruction. /// \param IsBranch - Is the instruction a branch? /// \param Offset - Byte offset of the operand inside the inst. + /// \param OpSize - Size of the operand in bytes. /// \param InstSize - Size of the instruction in bytes. /// \return Whether a symbolic operand was added. virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, - uint64_t InstSize) = 0; + uint64_t OpSize, uint64_t InstSize) = 0; /// Try to add a comment on the PC-relative load. /// For instance, in Mach-O, this is used to add annotations to instructions diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp index a95385fd7e68a6..c6035dca4ce198 100644 --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -22,11 +22,12 @@ MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, + uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const { if (Symbolizer) - return Symbolizer->tryAddingSymbolicOperand( - Inst, *CommentStream, Value, Address, IsBranch, Offset, InstSize); + return Symbolizer->tryAddingSymbolicOperand(Inst, *CommentStream, Value, + Address, IsBranch, Offset, + OpSize, InstSize); return false; } diff --git a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp index 7befef86303cbf..e3f4cdd21557e2 100644 --- a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp +++ b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp @@ -31,19 +31,15 @@ class Triple; // is found an MCExpr is created with that, else an MCExpr with Value is // created. This function returns true if it adds an operand to the MCInst and // false otherwise. -bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, - raw_ostream &cStream, - int64_t Value, - uint64_t Address, - bool IsBranch, - uint64_t Offset, - uint64_t InstSize) { +bool MCExternalSymbolizer::tryAddingSymbolicOperand( + MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { struct LLVMOpInfo1 SymbolicOp; std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); SymbolicOp.Value = Value; if (!GetOpInfo || - !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) { + !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) { // Clear SymbolicOp.Value from above and also all other fields. std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); @@ -53,10 +49,10 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, // that always makes sense to guess. But in the case of an immediate it is // a bit more questionable if it is an address of a symbol or some other // reference. So if the immediate Value comes from a width of 1 byte, - // InstSize, we will not guess it is an address of a symbol. Because in + // OpSize, we will not guess it is an address of a symbol. Because in // object files assembled starting at address 0 this usually leads to // incorrect symbolication. - if (!SymbolLookUp || (InstSize == 1 && !IsBranch)) + if (!SymbolLookUp || (OpSize == 1 && !IsBranch)) return false; uint64_t ReferenceType; diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index ffbed4c05ef1ea..1b65589416c352 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -749,7 +749,7 @@ static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm, ImmVal |= ~((1LL << 19) - 1); if (!Decoder->tryAddingSymbolicOperand( - Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 4)) + Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(ImmVal)); return Success; } @@ -1030,7 +1030,7 @@ DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, } DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(offset)); return Success; } @@ -1640,7 +1640,7 @@ static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn, imm |= ~((1LL << 21) - 1); DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(imm)); return Success; @@ -1675,7 +1675,7 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn, DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); } - if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(ImmVal)); Inst.addOperand(MCOperand::createImm(12 * ShifterVal)); return Success; @@ -1690,7 +1690,7 @@ static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn, if (imm & (1 << (26 - 1))) imm |= ~((1LL << 26) - 1); - if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(imm)); return Success; @@ -1742,7 +1742,7 @@ static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn, else DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); Inst.addOperand(MCOperand::createImm(bit)); - if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(dst)); return Success; diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp index 5b6f06f8dbb401..11964b2075e5e7 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -60,7 +60,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) { /// an operand to the MCInst and Fail otherwise. bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, - bool IsBranch, uint64_t Offset, uint64_t InstSize) { + bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { if (!SymbolLookUp) return false; // FIXME: This method shares a lot of code with @@ -73,8 +73,8 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( SymbolicOp.Value = Value; uint64_t ReferenceType; const char *ReferenceName; - if (!GetOpInfo || - !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize, + 1, &SymbolicOp)) { if (IsBranch) { ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h index dc72331660ccd2..ca677db49739de 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h @@ -29,7 +29,8 @@ class AArch64ExternalSymbolizer : public MCExternalSymbolizer { bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) override; + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override; }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index bd75272859695e..4d5c6d1875860b 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -82,7 +82,7 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, APInt SignedOffset(18, Imm * 4, true); int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); - if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) + if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0)) return MCDisassembler::Success; return addOperand(Inst, MCOperand::createImm(Imm)); } @@ -1918,10 +1918,10 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, //===----------------------------------------------------------------------===// // Try to find symbol name for specified label -bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, - raw_ostream &/*cStream*/, int64_t Value, - uint64_t /*Address*/, bool IsBranch, - uint64_t /*Offset*/, uint64_t /*InstSize*/) { +bool AMDGPUSymbolizer::tryAddingSymbolicOperand( + MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value, + uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/, + uint64_t /*OpSize*/, uint64_t /*InstSize*/) { if (!IsBranch) { return false; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index f46ad9d4220958..5db64843176ca4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -217,8 +217,8 @@ class AMDGPUSymbolizer : public MCSymbolizer { : MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {} bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, - int64_t Value, uint64_t Address, - bool IsBranch, uint64_t Offset, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override; void tryAddingPcLoadReferenceComment(raw_ostream &cStream, diff --git a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp index 9e59e9d792854d..61810175590459 100644 --- a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp +++ b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp @@ -181,7 +181,7 @@ static bool DecodeSymbolicOperand(MCInst &Inst, uint64_t Address, const MCDisassembler *Decoder) { static const uint64_t AtLeast = 2; return (nullptr != Decoder && Decoder->tryAddingSymbolicOperand( - Inst, Value, Address, true, 0, AtLeast)); + Inst, Value, Address, true, 0, AtLeast, 0)); } static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address, diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 8bfa70c1608934..9acd49292268de 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -835,7 +835,8 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, const MCDisassembler *Decoder) { // FIXME: Does it make sense for value to be negative? return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, - isBranch, /* Offset */ 0, InstSize); + isBranch, /*Offset=*/0, /*OpSize=*/0, + InstSize); } /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 58c51e76b8965e..58d5df4c1f7161 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -768,7 +768,8 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, Bits = 15; uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits)); uint32_t Extended = FullValue + Address; - if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4)) + if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 0, + 4)) HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); return MCDisassembler::Success; } diff --git a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp index d91670bff96210..e9fecef4ac5b91 100644 --- a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp +++ b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp @@ -215,7 +215,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool IsBranch, uint64_t Width, MCInst &MI, const MCDisassembler *Decoder) { return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset, - Width); + Width, /*InstSize=*/0); } static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address, diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index da4d92713ef738..1825b95dd6ac51 100644 --- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -509,7 +509,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Width, MCInst &MI, const MCDisassembler *Decoder) { return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset, - Width); + Width, /*InstSize=*/4); } static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address, diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 94f0f10868744a..979141a1962a07 100644 --- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -75,7 +75,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Width, MCInst &MI, const MCDisassembler *Decoder) { return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset, - Width); + Width, /*InstSize=*/0); } static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 8ca5c4830cba1a..e8b9ee60233d38 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -1874,8 +1874,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, uint64_t pcrel = 0; if (type == TYPE_REL) { isBranch = true; - pcrel = insn.startLocation + - insn.immediateOffset + insn.immediateSize; + pcrel = insn.startLocation + insn.length; switch (operand.encoding) { default: break; @@ -1950,9 +1949,9 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, break; } - if (!Dis->tryAddingSymbolicOperand(mcInst, immediate + pcrel, - insn.startLocation, isBranch, - insn.immediateOffset, insn.immediateSize)) + if (!Dis->tryAddingSymbolicOperand( + mcInst, immediate + pcrel, insn.startLocation, isBranch, + insn.immediateOffset, insn.immediateSize, insn.length)) mcInst.addOperand(MCOperand::createImm(immediate)); if (type == TYPE_MOFFS) { @@ -2089,8 +2088,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, return true; } if (insn.mode == MODE_64BIT){ - pcrel = insn.startLocation + - insn.displacementOffset + insn.displacementSize; + pcrel = insn.startLocation + insn.length; Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel, insn.startLocation + insn.displacementOffset); @@ -2153,9 +2151,13 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, mcInst.addOperand(baseReg); mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); + + const uint8_t dispSize = + (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize; + if (!Dis->tryAddingSymbolicOperand( mcInst, insn.displacement + pcrel, insn.startLocation, false, - insn.displacementOffset, insn.displacementSize)) + insn.displacementOffset, dispSize, insn.length)) mcInst.addOperand(displacement); mcInst.addOperand(segmentReg); return false; diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp index c8c31d2e86638b..60c34158941bad 100644 --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -2608,7 +2608,8 @@ struct DisassembleInfo { // value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic // information is returned then this function returns 1 else it returns 0. static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, - uint64_t Size, int TagType, void *TagBuf) { + uint64_t OpSize, uint64_t InstSize, int TagType, + void *TagBuf) { struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo; struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf; uint64_t value = op_info->Value; @@ -2625,7 +2626,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, unsigned int Arch = info->O->getArch(); if (Arch == Triple::x86) { - if (Size != 1 && Size != 2 && Size != 4 && Size != 0) + if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0) return 0; if (info->O->getHeader().filetype != MachO::MH_OBJECT) { // TODO: @@ -2705,7 +2706,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, return 0; } if (Arch == Triple::x86_64) { - if (Size != 1 && Size != 2 && Size != 4 && Size != 0) + if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0) return 0; // For non MH_OBJECT types, like MH_KEXT_BUNDLE, Search the external // relocation entries of a linked image (if any) for an entry that matches @@ -2737,7 +2738,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, // adds the Pc. But for x86_64 external relocation entries the Value // is the offset from the external symbol. if (info->O->getAnyRelocationPCRel(RE)) - op_info->Value -= Pc + Offset + Size; + op_info->Value -= Pc + InstSize; const char *name = unwrapOrError(Symbol.getName(), info->O->getFileName()).data(); op_info->AddSymbol.Present = 1; @@ -2775,7 +2776,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, // adds the Pc. But for x86_64 external relocation entries the Value // is the offset from the external symbol. if (info->O->getAnyRelocationPCRel(RE)) - op_info->Value -= Pc + Offset + Size; + op_info->Value -= Pc + InstSize; const char *name = unwrapOrError(Symbol.getName(), info->O->getFileName()).data(); unsigned Type = info->O->getAnyRelocationType(RE); @@ -2803,7 +2804,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, return 0; } if (Arch == Triple::arm) { - if (Offset != 0 || (Size != 4 && Size != 2)) + if (Offset != 0 || (InstSize != 4 && InstSize != 2)) return 0; if (info->O->getHeader().filetype != MachO::MH_OBJECT) { // TODO: @@ -2940,7 +2941,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, return 1; } if (Arch == Triple::aarch64) { - if (Offset != 0 || Size != 4) + if (Offset != 0 || InstSize != 4) return 0; if (info->O->getHeader().filetype != MachO::MH_OBJECT) { // TODO: diff --git a/llvm/unittests/MC/X86/CMakeLists.txt b/llvm/unittests/MC/X86/CMakeLists.txt new file mode 100644 index 00000000000000..212e6e469f302c --- /dev/null +++ b/llvm/unittests/MC/X86/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LLVM_LINK_COMPONENTS + MC + MCDisassembler + Target + X86Desc + X86Disassembler + X86Info + ) + +add_llvm_unittest(X86MCTests + X86MCDisassemblerTest.cpp + ) diff --git a/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp new file mode 100644 index 00000000000000..97e717cc9b9795 --- /dev/null +++ b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp @@ -0,0 +1,145 @@ +//===- X86MCDisassemblerTest.cpp - Tests for X86 MCDisassembler -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +struct Context { + const char *TripleName = "x86_64-unknown-elf"; + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr Ctx; + std::unique_ptr STI; + std::unique_ptr DisAsm; + + Context() { + LLVMInitializeX86TargetInfo(); + LLVMInitializeX86TargetMC(); + LLVMInitializeX86Disassembler(); + + // If we didn't build x86, do not run the test. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + if (!TheTarget) + return; + + MRI.reset(TheTarget->createMCRegInfo(TripleName)); + MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); + STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", "")); + Ctx = std::make_unique(Triple(TripleName), MAI.get(), MRI.get(), + STI.get()); + + DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx)); + } + + operator MCContext &() { return *Ctx; }; +}; + +Context &getContext() { + static Context Ctxt; + return Ctxt; +} + +class X86MCSymbolizerTest : public MCSymbolizer { +public: + X86MCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {} + ~X86MCSymbolizerTest() {} + + struct OpInfo { + int64_t Value = 0; + uint64_t Offset = 0; + uint64_t Size; + }; + std::vector Operands; + uint64_t InstructionSize = 0; + + void reset() { + Operands.clear(); + InstructionSize = 0; + } + + bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override { + Operands.push_back({Value, Offset, OpSize}); + InstructionSize = InstSize; + return false; + } + + void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, + uint64_t Address) override {} +}; + +} // namespace + +TEST(X86Disassembler, X86MCSymbolizerTest) { + X86MCSymbolizerTest *TestSymbolizer = new X86MCSymbolizerTest(getContext()); + getContext().DisAsm->setSymbolizer( + std::unique_ptr(TestSymbolizer)); + + MCDisassembler::DecodeStatus Status; + MCInst Inst; + uint64_t InstSize; + + auto checkBytes = [&](ArrayRef Bytes) { + TestSymbolizer->reset(); + Status = + getContext().DisAsm->getInstruction(Inst, InstSize, Bytes, 0, nulls()); + ASSERT_TRUE(Status == MCDisassembler::Success); + EXPECT_EQ(TestSymbolizer->InstructionSize, InstSize); + }; + + auto checkOperand = [&](size_t OpNo, int64_t Value, uint64_t Offset, + uint64_t Size) { + ASSERT_TRUE(TestSymbolizer->Operands.size() > OpNo); + EXPECT_EQ(TestSymbolizer->Operands[OpNo].Value, Value); + EXPECT_EQ(TestSymbolizer->Operands[OpNo].Offset, Offset); + EXPECT_EQ(TestSymbolizer->Operands[OpNo].Size, Size); + }; + + // movq $0x80000, 0x80000 + checkBytes( + {0x48, 0xc7, 0x04, 0x25, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00}); + checkOperand(0, 0x80000, 4, 4); + checkOperand(1, 0x80000, 8, 4); + + // movq $0x2a, 0x123(%rax,%r14,8) + checkBytes( + {0x4a, 0xc7, 0x84, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00}); + checkOperand(0, 291, 4, 4); + checkOperand(1, 42, 8, 4); + + // movq $0xffffffffffffefe8, -0x1(%rip) + // Test that the value of the rip-relative operand is set correctly. + // The instruction address is 0 and the size is 12 bytes. + checkBytes( + {0x48, 0xc7, 0x05, 0xff, 0xff, 0xff, 0xff, 0xe8, 0xef, 0xff, 0xff}); + checkOperand(0, /*next instr address*/ 11 - /*disp*/ 1, 3, 4); + checkOperand(1, 0xffffffffffffefe8, 7, 4); + + // movq $0xfffffffffffffef5, (%r12) + // Test that the displacement operand has a size of 0, since it is not + // explicitly specified in the instruction. + checkBytes({0x49, 0xc7, 0x04, 0x24, 0xf5, 0xfe, 0xff, 0xff}); + checkOperand(0, 0, 4, 0); + checkOperand(1, 0xfffffffffffffef5, 4, 4); +} From 67e2e6e66d0aedb3a2b88fcd0f97957e3cf5ea36 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 25 May 2022 13:30:37 -0700 Subject: [PATCH 519/908] [gn build] Set llvm_have_mallinfo2 to true by default If you are using an old enough glibc which doesn't have mallinfo2, set `llvm_have_mallinfo2 = false`. At this point it's likely that most people using the gn build are compiling against a recent enough glibc (glibc 2.33 which was released in Feb 2021). Reviewed By: peterwaller-arm Differential Revision: https://reviews.llvm.org/D126415 --- llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index f3f7c8879950aa..82eba6b7e55231 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -50,7 +50,7 @@ declare_args() { # glibc is at least 2.33 which has mallinfo2. # TODO: remove this once nobody using the gn build is building against an old glibc. - llvm_have_mallinfo2 = false + llvm_have_mallinfo2 = true } write_cmake_config("abi-breaking") { From 33b598a808b9ef1a019e798f19855f203e09ad48 Mon Sep 17 00:00:00 2001 From: Eric Li Date: Wed, 25 May 2022 19:21:08 +0000 Subject: [PATCH 520/908] [clang][dataflow] Relax assert on existence of `this` pointee storage Support for unions is incomplete (per 99f7d55e) and the `this` pointee storage location is not set for unions. The assert in `VisitCXXThisExpr` is then guaranteed to trigger when analyzing member functions of a union. This commit changes the assert to an early-return. Any expression may be undefined, and so having a value for the `CXXThisExpr` is not a postcondition of the transfer function. Differential Revision: https://reviews.llvm.org/D126405 --- clang/lib/Analysis/FlowSensitive/Transfer.cpp | 5 +++- .../Analysis/FlowSensitive/TransferTest.cpp | 28 +++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index f88406595d728a..fc04e0b55ffc7d 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -279,7 +279,10 @@ class TransferVisitor : public ConstStmtVisitor { void VisitCXXThisExpr(const CXXThisExpr *S) { auto *ThisPointeeLoc = Env.getThisPointeeStorageLocation(); - assert(ThisPointeeLoc != nullptr); + if (ThisPointeeLoc == nullptr) + // Unions are not supported yet, and will not have a location for the + // `this` expression's pointee. + return; auto &Loc = Env.createStorageLocation(*S); Env.setStorageLocation(*S, Loc); diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index b6d2940a98daad..bb6e269969da7e 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -42,10 +42,11 @@ class TransferTest : public ::testing::Test { template void runDataflow(llvm::StringRef Code, Matcher Match, LangStandard::Kind Std = LangStandard::lang_cxx17, - bool ApplyBuiltinTransfer = true) { + bool ApplyBuiltinTransfer = true, + llvm::StringRef TargetFun = "target") { ASSERT_THAT_ERROR( test::checkDataflow( - Code, "target", + Code, TargetFun, [ApplyBuiltinTransfer](ASTContext &C, Environment &) { return NoopAnalysis(C, ApplyBuiltinTransfer); }, @@ -3175,4 +3176,27 @@ TEST_F(TransferTest, LoopWithStructReferenceAssignmentConverges) { }); } +TEST_F(TransferTest, DoesNotCrashOnUnionThisExpr) { + std::string Code = R"( + union Union { + int A; + float B; + }; + + void foo() { + Union A; + Union B; + A = B; + } + )"; + // This is a crash regression test when calling the transfer function on a + // `CXXThisExpr` that refers to a union. + runDataflow( + Code, + [](llvm::ArrayRef< + std::pair>>, + ASTContext &) {}, + LangStandard::lang_cxx17, /*ApplyBuiltinTransfer=*/true, "operator="); +} + } // namespace From 5520c5839016d1d98d8e01789eb17c910381bd6f Mon Sep 17 00:00:00 2001 From: Eric Li Date: Wed, 25 May 2022 20:15:46 +0000 Subject: [PATCH 521/908] [clang][dataflow] Fix incorrect CXXThisExpr pointee for lambdas When constructing the `Environment`, the `this` pointee is established for a `CXXMethodDecl` by looking at its parent. However, inside of lambdas, a `CXXThisExpr` refers to the captured `this` coming from the enclosing member function. When establishing the `this` pointee for a function, we check whether the function is a lambda, and check for an enclosing member function to establish the `this` pointee storage location. Differential Revision: https://reviews.llvm.org/D126413 --- .../FlowSensitive/DataflowEnvironment.cpp | 7 +- .../Analysis/FlowSensitive/TransferTest.cpp | 106 ++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index e555bee0e8bf28..fe573bbf9f379a 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -216,7 +216,12 @@ Environment::Environment(DataflowAnalysisContext &DACtx, } if (const auto *MethodDecl = dyn_cast(&DeclCtx)) { - if (!MethodDecl->isStatic()) { + auto *Parent = MethodDecl->getParent(); + assert(Parent != nullptr); + if (Parent->isLambda()) + MethodDecl = dyn_cast(Parent->getDeclContext()); + + if (MethodDecl && !MethodDecl->isStatic()) { QualType ThisPointeeType = MethodDecl->getThisObjectType(); // FIXME: Add support for union types. if (!ThisPointeeType->isUnionType()) { diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index bb6e269969da7e..bed6b975974ccc 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1476,6 +1476,112 @@ TEST_F(TransferTest, ClassThisMember) { }); } +TEST_F(TransferTest, StructThisInLambda) { + std::string ThisCaptureCode = R"( + struct A { + void frob() { + [this]() { + int Foo = Bar; + // [[p1]] + }(); + } + + int Bar; + }; + )"; + runDataflow( + ThisCaptureCode, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p1", _))); + const Environment &Env = Results[0].second.Env; + + const auto *ThisLoc = dyn_cast( + Env.getThisPointeeStorageLocation()); + ASSERT_THAT(ThisLoc, NotNull()); + + const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar"); + ASSERT_THAT(BarDecl, NotNull()); + + const auto *BarLoc = + cast(&ThisLoc->getChild(*BarDecl)); + ASSERT_TRUE(isa_and_nonnull(BarLoc)); + + const Value *BarVal = Env.getValue(*BarLoc); + ASSERT_TRUE(isa_and_nonnull(BarVal)); + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + EXPECT_EQ(Env.getValue(*FooDecl, SkipPast::None), BarVal); + }, + LangStandard::lang_cxx17, /*ApplyBuiltinTransfer=*/true, "operator()"); + + std::string RefCaptureDefaultCode = R"( + struct A { + void frob() { + [&]() { + int Foo = Bar; + // [[p2]] + }(); + } + + int Bar; + }; + )"; + runDataflow( + RefCaptureDefaultCode, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p2", _))); + const Environment &Env = Results[0].second.Env; + + const auto *ThisLoc = dyn_cast( + Env.getThisPointeeStorageLocation()); + ASSERT_THAT(ThisLoc, NotNull()); + + const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar"); + ASSERT_THAT(BarDecl, NotNull()); + + const auto *BarLoc = + cast(&ThisLoc->getChild(*BarDecl)); + ASSERT_TRUE(isa_and_nonnull(BarLoc)); + + const Value *BarVal = Env.getValue(*BarLoc); + ASSERT_TRUE(isa_and_nonnull(BarVal)); + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + EXPECT_EQ(Env.getValue(*FooDecl, SkipPast::None), BarVal); + }, + LangStandard::lang_cxx17, /*ApplyBuiltinTransfer=*/true, "operator()"); + + std::string FreeFunctionLambdaCode = R"( + void foo() { + int Bar; + [&]() { + int Foo = Bar; + // [[p3]] + }(); + } + )"; + runDataflow( + FreeFunctionLambdaCode, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p3", _))); + const Environment &Env = Results[0].second.Env; + + EXPECT_THAT(Env.getThisPointeeStorageLocation(), IsNull()); + }, + LangStandard::lang_cxx17, /*ApplyBuiltinTransfer=*/true, "operator()"); +} + TEST_F(TransferTest, ConstructorInitializer) { std::string Code = R"( struct target { From 10f41a214767219ef369b217f685eb545b80e386 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 May 2022 04:37:51 -0700 Subject: [PATCH 522/908] [SLP]Fix PR55688: Miscompile due to incorrect nuw/nsw handling. Need to use all ReductionOps when propagating flags for the reduction ops, otherwise transformation is not correct. Plus, need to drop nuw/nsw flags. Differential Revision: https://reviews.llvm.org/D126371 --- .../include/llvm/Transforms/Utils/LoopUtils.h | 6 ++-- llvm/lib/Transforms/Utils/LoopUtils.cpp | 5 +-- .../Transforms/Vectorize/SLPVectorizer.cpp | 36 +++++-------------- .../SLPVectorizer/AArch64/horizontal.ll | 22 ++++++------ .../SLPVectorizer/X86/horizontal-list.ll | 8 ++--- .../SLPVectorizer/X86/horizontal.ll | 3 +- .../SLPVectorizer/X86/remark_horcost.ll | 6 ++-- .../SLPVectorizer/X86/scheduling.ll | 6 ++-- 8 files changed, 37 insertions(+), 55 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 04e834c0bc8aad..676c0c1487db86 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -413,8 +413,10 @@ Value *createOrderedReduction(IRBuilderBase &B, /// of each scalar operation (VL) that will be converted into a vector (I). /// If OpValue is non-null, we only consider operations similar to OpValue /// when intersecting. -/// Flag set: NSW, NUW, exact, and all of fast-math. -void propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue = nullptr); +/// Flag set: NSW, NUW (if IncludeWrapFlags is true), exact, and all of +/// fast-math. +void propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue = nullptr, + bool IncludeWrapFlags = true); /// Returns true if we can prove that \p S is defined and always negative in /// loop \p L. diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index cd9e5a846afb67..c5582fb90bbfeb 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1097,7 +1097,8 @@ Value *llvm::createOrderedReduction(IRBuilderBase &B, return B.CreateFAddReduce(Start, Src); } -void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue) { +void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue, + bool IncludeWrapFlags) { auto *VecOp = dyn_cast(I); if (!VecOp) return; @@ -1106,7 +1107,7 @@ void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue) { if (!Intersection) return; const unsigned Opcode = Intersection->getOpcode(); - VecOp->copyIRFlags(Intersection); + VecOp->copyIRFlags(Intersection, IncludeWrapFlags); for (auto *V : VL) { auto *Instr = dyn_cast(V); if (!Instr) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d74153c4c6368c..259e0e565dd176 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10327,7 +10327,7 @@ class HorizontalReduction { } /// Creates reduction operation with the current opcode with the IR flags - /// from \p ReductionOps. + /// from \p ReductionOps, dropping nuw/nsw flags. static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS, Value *RHS, const Twine &Name, const ReductionOpsListType &ReductionOps) { @@ -10341,26 +10341,14 @@ class HorizontalReduction { Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect); if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) { if (auto *Sel = dyn_cast(Op)) { - propagateIRFlags(Sel->getCondition(), ReductionOps[0]); - propagateIRFlags(Op, ReductionOps[1]); + propagateIRFlags(Sel->getCondition(), ReductionOps[0], nullptr, + /*IncludeWrapFlags=*/false); + propagateIRFlags(Op, ReductionOps[1], nullptr, + /*IncludeWrapFlags=*/false); return Op; } } - propagateIRFlags(Op, ReductionOps[0]); - return Op; - } - - /// Creates reduction operation with the current opcode with the IR flags - /// from \p I. - static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS, - Value *RHS, const Twine &Name, Value *I) { - auto *SelI = dyn_cast(I); - Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, SelI != nullptr); - if (SelI && RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) { - if (auto *Sel = dyn_cast(Op)) - propagateIRFlags(Sel->getCondition(), SelI->getCondition()); - } - propagateIRFlags(Op, I); + propagateIRFlags(Op, ReductionOps[0], nullptr, /*IncludeWrapFlags=*/false); return Op; } @@ -11031,10 +11019,6 @@ class HorizontalReduction { for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) { Instruction *RedOp = InstVals[I + 1].first; Builder.SetCurrentDebugLocation(RedOp->getDebugLoc()); - ReductionOpsListType Ops; - if (auto *Sel = dyn_cast(RedOp)) - Ops.emplace_back().push_back(Sel->getCondition()); - Ops.emplace_back().push_back(RedOp); Value *RdxVal1 = InstVals[I].second; Value *StableRdxVal1 = RdxVal1; auto It1 = TrackedVals.find(RdxVal1); @@ -11046,7 +11030,7 @@ class HorizontalReduction { if (It2 != TrackedVals.end()) StableRdxVal2 = It2->second; Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1, - StableRdxVal2, "op.rdx", Ops); + StableRdxVal2, "op.rdx", ReductionOps); ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed); } if (Sz % 2 == 1) @@ -11081,17 +11065,13 @@ class HorizontalReduction { if (ExtraReductions.size() == 1) { Instruction *RedOp = ExtraReductions.back().first; Builder.SetCurrentDebugLocation(RedOp->getDebugLoc()); - ReductionOpsListType Ops; - if (auto *Sel = dyn_cast(RedOp)) - Ops.emplace_back().push_back(Sel->getCondition()); - Ops.emplace_back().push_back(RedOp); Value *RdxVal = ExtraReductions.back().second; Value *StableRdxVal = RdxVal; auto It = TrackedVals.find(RdxVal); if (It != TrackedVals.end()) StableRdxVal = It->second; VectorizedTree = createOp(Builder, RdxKind, VectorizedTree, - StableRdxVal, "op.rdx", Ops); + StableRdxVal, "op.rdx", ReductionOps); } ReductionRoot->replaceAllUsesWith(VectorizedTree); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll index 5c310888353bdf..df627f3456e059 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll @@ -28,7 +28,7 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[LX:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[S_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[S_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[J_025:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[P2_024:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR29:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[P1_023:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] @@ -41,7 +41,7 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) -; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]] +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP8]], [[S_026]] ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR29]] = getelementptr inbounds i32, i32* [[P2_024]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_025]], 1 @@ -50,7 +50,7 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX]], [[FOR_END_LOOPEXIT]] ] ; CHECK-NEXT: ret i32 [[S_0_LCSSA]] ; entry: @@ -148,7 +148,7 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[LX:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[S_020:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[S_020:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[IF_END:%.*]] ] ; CHECK-NEXT: [[J_019:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ] ; CHECK-NEXT: [[P2_018:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR16:%.*]], [[IF_END]] ] ; CHECK-NEXT: [[P1_017:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END]] ] @@ -158,8 +158,8 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]] -; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]] +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP5]], [[S_020]] +; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]] ; CHECK-NEXT: br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_017]], i64 [[IDX_EXT]] @@ -170,7 +170,7 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX]], [[FOR_END_LOOPEXIT]] ] ; CHECK-NEXT: ret i32 [[S_1]] ; entry: @@ -245,7 +245,7 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[LX:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[S_047:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[IF_END_86:%.*]] ] +; CHECK-NEXT: [[S_047:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[IF_END_86:%.*]] ] ; CHECK-NEXT: [[J_046:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END_86]] ] ; CHECK-NEXT: [[P2_045:%.*]] = phi i8* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ] ; CHECK-NEXT: [[P1_044:%.*]] = phi i8* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ] @@ -260,8 +260,8 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali ; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) -; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]] -; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]] +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP10]], [[S_047]] +; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]] ; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: if.end.86: ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[P1_044]], i64 [[IDX_EXT]] @@ -272,7 +272,7 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX]], [[FOR_END_LOOPEXIT]] ] ; CHECK-NEXT: ret i32 [[S_1]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index bafc67c9ea31f1..a444df9ec7ade4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -1001,8 +1001,8 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = add nuw i32 [[TMP3]], [[ARG]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = add nsw i32 [[TMP6]], [[OP_RDX]] +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP3]], [[ARG]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP6]], [[OP_RDX]] ; CHECK-NEXT: ret i32 [[OP_RDX2]] ; ; THRESHOLD-LABEL: @wobble( @@ -1016,8 +1016,8 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer ; THRESHOLD-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> ; THRESHOLD-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) -; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add nuw i32 [[TMP3]], [[ARG]] -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = add nsw i32 [[TMP6]], [[OP_RDX]] +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP3]], [[ARG]] +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP6]], [[OP_RDX]] ; THRESHOLD-NEXT: ret i32 [[OP_RDX2]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index 8ff25ec5515855..f5a87aeed95643 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -1463,7 +1463,6 @@ define float @fadd_v4f32_fmf_intersect(float* %p) { ret float %add3 } -; FIXME: Can't preserve no-wrap guarantees with reassociated math. ; This must not propagate 'nsw' to a new add instruction. define void @nsw_propagation_v4i32(i32* %res, i32 %start) { @@ -1482,7 +1481,7 @@ define void @nsw_propagation_v4i32(i32* %res, i32 %start) { ; STORE-LABEL: @nsw_propagation_v4i32( ; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 ; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; STORE-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[TMP2]], [[START:%.*]] +; STORE-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP2]], [[START:%.*]] ; STORE-NEXT: store i32 [[OP_RDX]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll index e5fa8ad8233d41..f98f99907241c9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll @@ -10,7 +10,7 @@ define i32 @foo(i32* %diff) #0 { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DIFF:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], 4 @@ -24,12 +24,12 @@ define i32 @foo(i32* %diff) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 16 ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) -; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP9]], [[A_088]] +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP9]], [[A_088]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[OP_EXTRA]] +; CHECK-NEXT: ret i32 [[OP_RDX]] ; entry: %m2 = alloca [8 x [8 x i32]], align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll index 7412b8781e53a6..813b82b9aefcd6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll @@ -9,7 +9,7 @@ define i32 @foo(i32* nocapture readonly %diff) #0 { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DIFF:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], 4 @@ -23,14 +23,14 @@ define i32 @foo(i32* nocapture readonly %diff) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 16 ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) -; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP9]], [[A_088]] +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP9]], [[A_088]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 0 ; CHECK-NEXT: call void @ff([8 x i32]* [[ARRAYDECAY]]) -; CHECK-NEXT: ret i32 [[OP_EXTRA]] +; CHECK-NEXT: ret i32 [[OP_RDX]] ; entry: %m2 = alloca [8 x [8 x i32]], align 16 From ec51971eae02a031bbfd992cf59e1f6acdadbeeb Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Tue, 24 May 2022 23:58:36 +0000 Subject: [PATCH 523/908] [memprof] Keep and display symbol names in the RawMemProfReader. Extend the Frame struct to hold the symbol name if requested when a RawMemProfReader object is constructed. This change updates the tests and removes the need to pass --debug to obtain the mapping from GUID to symbol names. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D126344 --- llvm/include/llvm/ProfileData/MemProf.h | 9 ++++ .../llvm/ProfileData/RawMemProfReader.h | 18 +++++--- llvm/lib/ProfileData/RawMemProfReader.cpp | 44 ++++++++++++------- .../tools/llvm-profdata/memprof-basic.test | 2 + .../tools/llvm-profdata/memprof-inline.test | 27 +++++------- llvm/tools/llvm-profdata/llvm-profdata.cpp | 4 +- llvm/unittests/ProfileData/MemProfTest.cpp | 8 +++- 7 files changed, 71 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 3a80ab26cd46e1..0406d4fab711e9 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -142,6 +142,9 @@ struct Frame { // A uuid (uint64_t) identifying the function. It is obtained by // llvm::md5(FunctionName) which returns the lower 64 bits. GlobalValue::GUID Function; + // The symbol name for the function. Only populated in the Frame by the reader + // if requested during initialization. This field should not be serialized. + llvm::Optional SymbolName; // The source line offset of the call from the beginning of parent function. uint32_t LineOffset; // The source column number of the call to help distinguish multiple calls @@ -152,6 +155,7 @@ struct Frame { Frame(const Frame &Other) { Function = Other.Function; + SymbolName = Other.SymbolName; LineOffset = Other.LineOffset; Column = Other.Column; IsInlineFrame = Other.IsInlineFrame; @@ -161,12 +165,15 @@ struct Frame { : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {} bool operator==(const Frame &Other) const { + // Ignore the SymbolName field to avoid a string compare. Comparing the + // function hash serves the same purpose. return Other.Function == Function && Other.LineOffset == LineOffset && Other.Column == Column && Other.IsInlineFrame == IsInlineFrame; } Frame &operator=(const Frame &Other) { Function = Other.Function; + SymbolName = Other.SymbolName; LineOffset = Other.LineOffset; Column = Other.Column; IsInlineFrame = Other.IsInlineFrame; @@ -214,6 +221,8 @@ struct Frame { void printYAML(raw_ostream &OS) const { OS << " -\n" << " Function: " << Function << "\n" + << " SymbolName: " + << (SymbolName.hasValue() ? SymbolName.getValue() : "") << "\n" << " LineOffset: " << LineOffset << "\n" << " Column: " << Column << "\n" << " Inline: " << IsInlineFrame << "\n"; diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h index 4421b77d78a16a..a81db8ee3d995a 100644 --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -57,7 +57,8 @@ class RawMemProfReader { // \p Path. The binary from which the profile has been collected is specified // via a path in \p ProfiledBinary. static Expected> - create(const Twine &Path, const StringRef ProfiledBinary); + create(const Twine &Path, const StringRef ProfiledBinary, + bool KeepName = false); using GuidMemProfRecordPair = std::pair; using Iterator = InstrProfIterator; @@ -76,9 +77,9 @@ class RawMemProfReader { RawMemProfReader(std::unique_ptr Sym, llvm::SmallVectorImpl &Seg, llvm::MapVector &Prof, - CallStackMap &SM) + CallStackMap &SM, bool KeepName = false) : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()), - CallstackProfileData(Prof), StackMap(SM) { + CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) { // We don't call initialize here since there is no raw profile to read. The // test should pass in the raw profile as structured data. @@ -103,8 +104,9 @@ class RawMemProfReader { private: RawMemProfReader(std::unique_ptr DataBuffer, - object::OwningBinary &&Bin) - : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)) {} + object::OwningBinary &&Bin, bool KeepName) + : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)), + KeepSymbolName(KeepName) {} Error initialize(); Error readRawProfile(); // Symbolize and cache all the virtual addresses we encounter in the @@ -146,8 +148,12 @@ class RawMemProfReader { llvm::MapVector FunctionProfileData; llvm::MapVector::iterator Iter; -}; + // Whether to keep the symbol name for each frame after hashing. + bool KeepSymbolName = false; + // A mapping of the hash to symbol name, only used if KeepSymbolName is true. + llvm::DenseMap GuidToSymbolName; +}; } // namespace memprof } // namespace llvm diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp index 2a619c42fa6ae0..96ed9bbe5a7680 100644 --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include "llvm/ADT/ArrayRef.h" @@ -174,7 +175,8 @@ bool isRuntimePath(const StringRef Path) { } // namespace Expected> -RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) { +RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, + bool KeepName) { auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); if (std::error_code EC = BufferOr.getError()) return report(errorCodeToError(EC), Path.getSingleStringRef()); @@ -193,8 +195,9 @@ RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) { return report(BinaryOr.takeError(), ProfiledBinary); } - std::unique_ptr Reader( - new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get()))); + // Use new here since constructor is private. + std::unique_ptr Reader(new RawMemProfReader( + std::move(Buffer), std::move(BinaryOr.get()), KeepName)); if (Error E = Reader->initialize()) { return std::move(E); } @@ -407,21 +410,21 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() { for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; I++) { const auto &DIFrame = DI.getFrame(I); - LLVM_DEBUG( - // Print out the name to guid mapping for debugging. - llvm::dbgs() << "FunctionName: " << DIFrame.FunctionName - << " GUID: " - << IndexedMemProfRecord::getGUID(DIFrame.FunctionName) - << "\n";); - - const Frame F(IndexedMemProfRecord::getGUID(DIFrame.FunctionName), - DIFrame.Line - DIFrame.StartLine, DIFrame.Column, + const uint64_t Guid = + IndexedMemProfRecord::getGUID(DIFrame.FunctionName); + const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, // Only the last entry is not an inlined location. I != NumFrames - 1); - - const FrameId Id = F.hash(); - IdToFrame.insert({Id, F}); - SymbolizedFrame[VAddr].push_back(Id); + // Here we retain a mapping from the GUID to symbol name instead of + // adding it to the frame object directly to reduce memory overhead. + // This is because there can be many unique frames, particularly for + // callsite frames. + if (KeepSymbolName) + GuidToSymbolName.insert({Guid, DIFrame.FunctionName}); + + const FrameId Hash = F.hash(); + IdToFrame.insert({Hash, F}); + SymbolizedFrame[VAddr].push_back(Hash); } } @@ -525,8 +528,15 @@ Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) { return make_error(instrprof_error::eof); auto IdToFrameCallback = [this](const FrameId Id) { - return this->idToFrame(Id); + Frame F = this->idToFrame(Id); + if (!this->KeepSymbolName) + return F; + auto Iter = this->GuidToSymbolName.find(F.Function); + assert(Iter != this->GuidToSymbolName.end()); + F.SymbolName = Iter->getSecond(); + return F; }; + const IndexedMemProfRecord &IndexedRecord = Iter->second; GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)}; Iter++; diff --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test index e72728af101dd8..5ec22e29ada3c1 100644 --- a/llvm/test/tools/llvm-profdata/memprof-basic.test +++ b/llvm/test/tools/llvm-profdata/memprof-basic.test @@ -52,6 +52,7 @@ CHECK-NEXT: - CHECK-NEXT: Callstack: CHECK-NEXT: - CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 21 CHECK-NEXT: Inline: 0 @@ -79,6 +80,7 @@ CHECK-NEXT: - CHECK-NEXT: Callstack: CHECK-NEXT: - CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 5 CHECK-NEXT: Column: 15 CHECK-NEXT: Inline: 0 diff --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test index a31903e120c727..5f34d5ac0d817b 100644 --- a/llvm/test/tools/llvm-profdata/memprof-inline.test +++ b/llvm/test/tools/llvm-profdata/memprof-inline.test @@ -35,21 +35,6 @@ bin/clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \ env MEMPROF_OPTIONS=log_path=stdout ./memprof-inline.exe > inline.memprofraw ``` -From a debug run we collect the name to guid mappings to ensure that the -ordering we observe is as expected. - -``` -build-dbg/bin/llvm-profdata show --memory --debug-only=memprof \ - inline.memprofraw --profiled-binary memprof-inline.exe - -FunctionName: qux GUID: 15505678318020221912 -FunctionName: foo GUID: 6699318081062747564 -FunctionName: bar GUID: 16434608426314478903 -FunctionName: main GUID: 15822663052811949562 - -[..omit output here which is checked below..] -``` - RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/inline.memprofexe | FileCheck %s CHECK: MemprofProfile: @@ -68,21 +53,25 @@ CHECK-NEXT: - CHECK-NEXT: Callstack: CHECK-NEXT: - CHECK-NEXT: Function: 15505678318020221912 +CHECK-NEXT: SymbolName: qux CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 15 CHECK-NEXT: Inline: 1 CHECK-NEXT: - CHECK-NEXT: Function: 6699318081062747564 +CHECK-NEXT: SymbolName: foo CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 18 CHECK-NEXT: Inline: 0 CHECK-NEXT: - CHECK-NEXT: Function: 16434608426314478903 +CHECK-NEXT: SymbolName: bar CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 19 CHECK-NEXT: Inline: 0 CHECK-NEXT: - CHECK-NEXT: Function: 15822663052811949562 +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 3 CHECK-NEXT: Inline: 0 @@ -113,21 +102,25 @@ CHECK-NEXT: - CHECK-NEXT: Callstack: CHECK-NEXT: - CHECK-NEXT: Function: 15505678318020221912 +CHECK-NEXT: SymbolName: qux CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 15 CHECK-NEXT: Inline: 1 CHECK-NEXT: - CHECK-NEXT: Function: 6699318081062747564 +CHECK-NEXT: SymbolName: foo CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 18 CHECK-NEXT: Inline: 0 CHECK-NEXT: - CHECK-NEXT: Function: 16434608426314478903 +CHECK-NEXT: SymbolName: bar CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 19 CHECK-NEXT: Inline: 0 CHECK-NEXT: - CHECK-NEXT: Function: 15822663052811949562 +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 3 CHECK-NEXT: Inline: 0 @@ -155,12 +148,14 @@ CHECK-NEXT: CallSites: CHECK-NEXT: - CHECK-NEXT: - CHECK-NEXT: Function: 15505678318020221912 +CHECK-NEXT: SymbolName: qux CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 15 CHECK-NEXT: Inline: 1 CHECK-NEXT: - CHECK-NEXT: - CHECK-NEXT: Function: 6699318081062747564 +CHECK-NEXT: SymbolName: foo CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 18 CHECK-NEXT: Inline: 0 @@ -170,6 +165,7 @@ CHECK-NEXT: CallSites: CHECK-NEXT: - CHECK-NEXT: - CHECK-NEXT: Function: 15822663052811949562 +CHECK-NEXT: SymbolName: main CHECK-NEXT: LineOffset: 1 CHECK-NEXT: Column: 3 CHECK-NEXT: Inline: 0 @@ -179,6 +175,7 @@ CHECK-NEXT: CallSites: CHECK-NEXT: - CHECK-NEXT: - CHECK-NEXT: Function: 16434608426314478903 +CHECK-NEXT: SymbolName: bar CHECK-NEXT: LineOffset: 0 CHECK-NEXT: Column: 19 CHECK-NEXT: Inline: 0 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index ffb64d71771ccb..51e3f5ee797d9b 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -2532,8 +2532,8 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, static int showMemProfProfile(const std::string &Filename, const std::string &ProfiledBinary, raw_fd_ostream &OS) { - auto ReaderOr = - llvm::memprof::RawMemProfReader::create(Filename, ProfiledBinary); + auto ReaderOr = llvm::memprof::RawMemProfReader::create( + Filename, ProfiledBinary, /*KeepNames=*/true); if (Error E = ReaderOr.takeError()) // Since the error can be related to the profile or the binary we do not // pass whence. Instead additional context is provided where necessary in diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 48d6ee77072ed5..480848100446b3 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -104,6 +104,11 @@ MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") { *result_listener << "Hash mismatch"; return false; } + if (F.SymbolName.hasValue() && F.SymbolName.getValue() != FunctionName) { + *result_listener << "SymbolName mismatch\nWant: " << FunctionName + << "\nGot: " << F.SymbolName.getValue(); + return false; + } if (F.LineOffset == LineOffset && F.Column == Column && F.IsInlineFrame == Inline) { return true; @@ -154,7 +159,8 @@ TEST(MemProf, FillsValue) { auto Seg = makeSegments(); - RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM); + RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM, + /*KeepName=*/true); llvm::DenseMap Records; for (const auto &Pair : Reader) { From 801ac2ebf19c4c7551370455743347266a5cc915 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 26 May 2022 06:10:22 +0900 Subject: [PATCH 524/908] [bazel] Bump to 15.0.0git --- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 6 +++--- .../clang/include/clang/Config/config.h | 2 +- utils/bazel/llvm-project-overlay/lld/BUILD.bazel | 2 +- .../llvm/include/llvm/Config/llvm-config.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index eb900106c21e6d..1d3ddd7286d73e 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -344,11 +344,11 @@ genrule( name = "basic_version_gen", outs = ["include/clang/Basic/Version.inc"], cmd = ( - "echo '#define CLANG_VERSION 14.0.0' >> $@\n" + - "echo '#define CLANG_VERSION_MAJOR 14' >> $@\n" + + "echo '#define CLANG_VERSION 15.0.0' >> $@\n" + + "echo '#define CLANG_VERSION_MAJOR 15' >> $@\n" + "echo '#define CLANG_VERSION_MINOR 0' >> $@\n" + "echo '#define CLANG_VERSION_PATCHLEVEL 0' >> $@\n" + - "echo '#define CLANG_VERSION_STRING \"14.0.0\"' >> $@\n" + "echo '#define CLANG_VERSION_STRING \"15.0.0\"' >> $@\n" ), ) diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h index d3f084553ea2d1..9f6e92a9bc81a0 100644 --- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h +++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h @@ -93,7 +93,7 @@ /* CLANG_HAVE_RLIMITS defined conditionally below */ /* The LLVM product name and version */ -#define BACKEND_PACKAGE_STRING "LLVM 12.0.0git" +#define BACKEND_PACKAGE_STRING "LLVM 15.0.0git" /* Linker version detected at compile time. */ /* #undef HOST_LINK_VERSION */ diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel index 2009a55808220f..433cf172f1a3d1 100644 --- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel @@ -13,7 +13,7 @@ package( genrule( name = "config_version_gen", outs = ["include/lld/Common/Version.inc"], - cmd = "echo '#define LLD_VERSION_STRING \"14.0.0\"' > $@", + cmd = "echo '#define LLD_VERSION_STRING \"15.0.0\"' > $@", ) genrule( diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h index d775f57814555b..2810f49dff4386 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h @@ -74,7 +74,7 @@ #define LLVM_USE_PERF 0 /* Major version of the LLVM API */ -#define LLVM_VERSION_MAJOR 14 +#define LLVM_VERSION_MAJOR 15 /* Minor version of the LLVM API */ #define LLVM_VERSION_MINOR 0 @@ -83,7 +83,7 @@ #define LLVM_VERSION_PATCH 0 /* LLVM version string */ -#define LLVM_VERSION_STRING "14.0.0git" +#define LLVM_VERSION_STRING "15.0.0git" /* Whether LLVM records statistics for use with GetStatistics(), * PrintStatistics() or PrintStatisticsJSON() From 65ab6b495a6ac59ccdb2a45675f4a2de9fe6e244 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 26 May 2022 06:14:16 +0900 Subject: [PATCH 525/908] [bazel] Unset REVISION as if LLVM_APPEND_VC_REV=OFF, for now. We could implement retrieving the revision here, but we may avoid "Just the same but only different revision hash string". --- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 2 +- utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 1d3ddd7286d73e..702547e87df37d 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -373,7 +373,7 @@ genrule( # are passed through bazel, it's easier to drop generated files next to # the other includes. outs = ["include/VCSVersion.inc"], - cmd = "echo '#define CLANG_REVISION \"git\"' > $@", + cmd = "echo '#undef CLANG_REVISION' > $@", ) # A hacky library to expose some internal headers of the `basic` library to its diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index e0ee148b87d033..da8def70baa09b 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -159,7 +159,7 @@ cc_library( genrule( name = "generate_vcs_revision", outs = ["include/llvm/Support/VCSRevision.h"], - cmd = "echo '#define LLVM_REVISION \"git\"' >> $@\n" + + cmd = "echo '#undef LLVM_REVISION' >> $@\n" + "echo '#undef LLVM_REPOSITORY' >> $@\n", ) From 3bf5c2c8ec3028a55755a885b1b18266c18a79ba Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 May 2022 07:21:55 -0700 Subject: [PATCH 526/908] [SLP]Do not try to generate ScatterVectorize if it will be scalarized. SLP should build ScatterVectorize nodes only if they actually end up with masked gather rather than with scalarization. In the second scenario better to build a gather node. Differential Revision: https://reviews.llvm.org/D126379 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 5 +- .../Transforms/SLPVectorizer/X86/pr47623.ll | 7 +- .../SLPVectorizer/X86/pr47629-inseltpoison.ll | 138 ++++++++---------- .../Transforms/SLPVectorizer/X86/pr47629.ll | 138 ++++++++---------- 4 files changed, 126 insertions(+), 162 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 259e0e565dd176..8cedb63eead04b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4358,8 +4358,9 @@ static LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, for (Value *V : VL) CommonAlignment = commonAlignment(CommonAlignment, cast(V)->getAlign()); - if (TTI.isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()), - CommonAlignment)) + auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); + if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) && + !TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment)) return LoadsState::ScatterVectorize; } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll index c474551d7f14fc..758e89b8da1373 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll @@ -33,8 +33,11 @@ define void @foo() { ; AVX-NEXT: ret void ; ; AVX512-LABEL: @foo( -; AVX512-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> , i32 8, <2 x i1> , <2 x i32> undef) -; AVX512-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), align 16 +; AVX512-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2), align 8 +; AVX512-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0 +; AVX512-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i64 1 +; AVX512-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* bitcast ([8 x i32]* @a to <8 x i32>*), align 16 ; AVX512-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll index 25e107978c9e9b..f1cb7835c88db5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll @@ -104,24 +104,20 @@ define void @gather_load_2(i32* noalias nocapture %0, i32* noalias nocapture rea ; ; AVX512F-LABEL: @gather_load_2( ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1 -; AVX512F-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1 -; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1 -; AVX512F-NEXT: store i32 [[TMP5]], i32* [[TMP0]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10 -; AVX512F-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2 -; AVX512F-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2 -; AVX512F-NEXT: store i32 [[TMP9]], i32* [[TMP6]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3 -; AVX512F-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3 -; AVX512F-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3 -; AVX512F-NEXT: store i32 [[TMP13]], i32* [[TMP10]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 -; AVX512F-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4 -; AVX512F-NEXT: store i32 [[TMP17]], i32* [[TMP14]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10 +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3 +; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 +; AVX512F-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 +; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1 +; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2 +; AVX512F-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 +; AVX512F-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], +; AVX512F-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_2( @@ -258,44 +254,36 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea ; AVX2-NEXT: ret void ; ; AVX512F-LABEL: @gather_load_3( -; AVX512F-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1 -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1 -; AVX512F-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11 -; AVX512F-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2 -; AVX512F-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2 -; AVX512F-NEXT: store i32 [[TMP8]], i32* [[TMP5]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4 -; AVX512F-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3 -; AVX512F-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3 -; AVX512F-NEXT: store i32 [[TMP12]], i32* [[TMP9]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15 -; AVX512F-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 4 -; AVX512F-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4 -; AVX512F-NEXT: store i32 [[TMP16]], i32* [[TMP13]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18 -; AVX512F-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 1 -; AVX512F-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5 -; AVX512F-NEXT: store i32 [[TMP20]], i32* [[TMP17]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9 -; AVX512F-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 2 -; AVX512F-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 6 -; AVX512F-NEXT: store i32 [[TMP24]], i32* [[TMP21]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6 -; AVX512F-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 3 -; AVX512F-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 7 -; AVX512F-NEXT: store i32 [[TMP28]], i32* [[TMP25]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21 -; AVX512F-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 4 -; AVX512F-NEXT: store i32 [[TMP32]], i32* [[TMP29]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 11 +; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4 +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15 +; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 4 +; AVX512F-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 +; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1 +; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2 +; AVX512F-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 +; AVX512F-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], +; AVX512F-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18 +; AVX512F-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9 +; AVX512F-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6 +; AVX512F-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21 +; AVX512F-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP17]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP18]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP19]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP20]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> poison, i32 [[TMP21]], i64 0 +; AVX512F-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP22]], i64 1 +; AVX512F-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP23]], i64 2 +; AVX512F-NEXT: [[TMP28:%.*]] = insertelement <4 x i32> [[TMP27]], i32 [[TMP24]], i64 3 +; AVX512F-NEXT: [[TMP29:%.*]] = add <4 x i32> [[TMP28]], +; AVX512F-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* [[TMP30]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_3( @@ -469,44 +457,36 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re ; AVX2-NEXT: ret void ; ; AVX512F-LABEL: @gather_load_4( -; AVX512F-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1 ; AVX512F-NEXT: [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11 -; AVX512F-NEXT: [[T9:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 2 ; AVX512F-NEXT: [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4 -; AVX512F-NEXT: [[T13:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 3 ; AVX512F-NEXT: [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15 -; AVX512F-NEXT: [[T17:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 4 +; AVX512F-NEXT: [[T17:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 4 ; AVX512F-NEXT: [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18 -; AVX512F-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5 ; AVX512F-NEXT: [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9 -; AVX512F-NEXT: [[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6 ; AVX512F-NEXT: [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6 -; AVX512F-NEXT: [[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7 ; AVX512F-NEXT: [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21 ; AVX512F-NEXT: [[T3:%.*]] = load i32, i32* [[T1]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T7:%.*]] = load i32, i32* [[T6]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T15:%.*]] = load i32, i32* [[T14]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[T3]], i64 0 +; AVX512F-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[T7]], i64 1 +; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T11]], i64 2 +; AVX512F-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T15]], i64 3 +; AVX512F-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], +; AVX512F-NEXT: [[TMP6:%.*]] = bitcast i32* [[T0]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T19:%.*]] = load i32, i32* [[T18]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T23:%.*]] = load i32, i32* [[T22]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T27:%.*]] = load i32, i32* [[T26]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T31:%.*]] = load i32, i32* [[T30]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[T4:%.*]] = add i32 [[T3]], 1 -; AVX512F-NEXT: [[T8:%.*]] = add i32 [[T7]], 2 -; AVX512F-NEXT: [[T12:%.*]] = add i32 [[T11]], 3 -; AVX512F-NEXT: [[T16:%.*]] = add i32 [[T15]], 4 -; AVX512F-NEXT: [[T20:%.*]] = add i32 [[T19]], 1 -; AVX512F-NEXT: [[T24:%.*]] = add i32 [[T23]], 2 -; AVX512F-NEXT: [[T28:%.*]] = add i32 [[T27]], 3 -; AVX512F-NEXT: [[T32:%.*]] = add i32 [[T31]], 4 -; AVX512F-NEXT: store i32 [[T4]], i32* [[T0]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T8]], i32* [[T5]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T12]], i32* [[T9]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T16]], i32* [[T13]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T20]], i32* [[T17]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T24]], i32* [[T21]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T28]], i32* [[T25]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T32]], i32* [[T29]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[T19]], i64 0 +; AVX512F-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T23]], i64 1 +; AVX512F-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[T27]], i64 2 +; AVX512F-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[T31]], i64 3 +; AVX512F-NEXT: [[TMP11:%.*]] = add <4 x i32> [[TMP10]], +; AVX512F-NEXT: [[TMP12:%.*]] = bitcast i32* [[T17]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* [[TMP12]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_4( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index 329b72ad53985d..3925563bf1f700 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -104,24 +104,20 @@ define void @gather_load_2(i32* noalias nocapture %0, i32* noalias nocapture rea ; ; AVX512F-LABEL: @gather_load_2( ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1 -; AVX512F-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1 -; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1 -; AVX512F-NEXT: store i32 [[TMP5]], i32* [[TMP0]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10 -; AVX512F-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2 -; AVX512F-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2 -; AVX512F-NEXT: store i32 [[TMP9]], i32* [[TMP6]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3 -; AVX512F-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3 -; AVX512F-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3 -; AVX512F-NEXT: store i32 [[TMP13]], i32* [[TMP10]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 -; AVX512F-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4 -; AVX512F-NEXT: store i32 [[TMP17]], i32* [[TMP14]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10 +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3 +; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 +; AVX512F-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 +; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1 +; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2 +; AVX512F-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 +; AVX512F-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], +; AVX512F-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_2( @@ -258,44 +254,36 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea ; AVX2-NEXT: ret void ; ; AVX512F-LABEL: @gather_load_3( -; AVX512F-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1 -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1 -; AVX512F-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11 -; AVX512F-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2 -; AVX512F-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2 -; AVX512F-NEXT: store i32 [[TMP8]], i32* [[TMP5]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4 -; AVX512F-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3 -; AVX512F-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3 -; AVX512F-NEXT: store i32 [[TMP12]], i32* [[TMP9]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15 -; AVX512F-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 4 -; AVX512F-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4 -; AVX512F-NEXT: store i32 [[TMP16]], i32* [[TMP13]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18 -; AVX512F-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 1 -; AVX512F-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5 -; AVX512F-NEXT: store i32 [[TMP20]], i32* [[TMP17]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9 -; AVX512F-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 2 -; AVX512F-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 6 -; AVX512F-NEXT: store i32 [[TMP24]], i32* [[TMP21]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6 -; AVX512F-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 3 -; AVX512F-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 7 -; AVX512F-NEXT: store i32 [[TMP28]], i32* [[TMP25]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21 -; AVX512F-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 4 -; AVX512F-NEXT: store i32 [[TMP32]], i32* [[TMP29]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 11 +; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4 +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15 +; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 4 +; AVX512F-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 +; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1 +; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2 +; AVX512F-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 +; AVX512F-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], +; AVX512F-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18 +; AVX512F-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9 +; AVX512F-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6 +; AVX512F-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21 +; AVX512F-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP17]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP18]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP19]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP20]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> poison, i32 [[TMP21]], i64 0 +; AVX512F-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP22]], i64 1 +; AVX512F-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP23]], i64 2 +; AVX512F-NEXT: [[TMP28:%.*]] = insertelement <4 x i32> [[TMP27]], i32 [[TMP24]], i64 3 +; AVX512F-NEXT: [[TMP29:%.*]] = add <4 x i32> [[TMP28]], +; AVX512F-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* [[TMP30]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_3( @@ -469,44 +457,36 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re ; AVX2-NEXT: ret void ; ; AVX512F-LABEL: @gather_load_4( -; AVX512F-NEXT: [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1 ; AVX512F-NEXT: [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11 -; AVX512F-NEXT: [[T9:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 2 ; AVX512F-NEXT: [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4 -; AVX512F-NEXT: [[T13:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 3 ; AVX512F-NEXT: [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15 -; AVX512F-NEXT: [[T17:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 4 +; AVX512F-NEXT: [[T17:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 4 ; AVX512F-NEXT: [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18 -; AVX512F-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5 ; AVX512F-NEXT: [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9 -; AVX512F-NEXT: [[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6 ; AVX512F-NEXT: [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6 -; AVX512F-NEXT: [[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7 ; AVX512F-NEXT: [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21 ; AVX512F-NEXT: [[T3:%.*]] = load i32, i32* [[T1]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T7:%.*]] = load i32, i32* [[T6]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T15:%.*]] = load i32, i32* [[T14]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[T3]], i64 0 +; AVX512F-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[T7]], i64 1 +; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T11]], i64 2 +; AVX512F-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T15]], i64 3 +; AVX512F-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], +; AVX512F-NEXT: [[TMP6:%.*]] = bitcast i32* [[T0]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T19:%.*]] = load i32, i32* [[T18]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T23:%.*]] = load i32, i32* [[T22]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T27:%.*]] = load i32, i32* [[T26]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: [[T31:%.*]] = load i32, i32* [[T30]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: [[T4:%.*]] = add i32 [[T3]], 1 -; AVX512F-NEXT: [[T8:%.*]] = add i32 [[T7]], 2 -; AVX512F-NEXT: [[T12:%.*]] = add i32 [[T11]], 3 -; AVX512F-NEXT: [[T16:%.*]] = add i32 [[T15]], 4 -; AVX512F-NEXT: [[T20:%.*]] = add i32 [[T19]], 1 -; AVX512F-NEXT: [[T24:%.*]] = add i32 [[T23]], 2 -; AVX512F-NEXT: [[T28:%.*]] = add i32 [[T27]], 3 -; AVX512F-NEXT: [[T32:%.*]] = add i32 [[T31]], 4 -; AVX512F-NEXT: store i32 [[T4]], i32* [[T0]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T8]], i32* [[T5]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T12]], i32* [[T9]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T16]], i32* [[T13]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T20]], i32* [[T17]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T24]], i32* [[T21]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T28]], i32* [[T25]], align 4, !tbaa [[TBAA0]] -; AVX512F-NEXT: store i32 [[T32]], i32* [[T29]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[T19]], i64 0 +; AVX512F-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T23]], i64 1 +; AVX512F-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[T27]], i64 2 +; AVX512F-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[T31]], i64 3 +; AVX512F-NEXT: [[TMP11:%.*]] = add <4 x i32> [[TMP10]], +; AVX512F-NEXT: [[TMP12:%.*]] = bitcast i32* [[T17]] to <4 x i32>* +; AVX512F-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* [[TMP12]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_4( From 534b19fbaa5a5e9b9c42091a0b51673c98a55622 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 25 May 2022 14:36:42 -0700 Subject: [PATCH 527/908] [gn build] Manually port bed9efed71 --- llvm/utils/gn/secondary/llvm/unittests/BUILD.gn | 1 + .../utils/gn/secondary/llvm/unittests/MC/X86/BUILD.gn | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 llvm/utils/gn/secondary/llvm/unittests/MC/X86/BUILD.gn diff --git a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn index 2d6a2a8accb479..0433280d978a1a 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn @@ -93,6 +93,7 @@ group("unittests") { } if (llvm_build_X86) { deps += [ + "MC/X86:X86MCTests", "Target/X86:X86Tests", "tools/llvm-exegesis/X86:LLVMExegesisX86Tests", ] diff --git a/llvm/utils/gn/secondary/llvm/unittests/MC/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/MC/X86/BUILD.gn new file mode 100644 index 00000000000000..a66c3228847c43 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/unittests/MC/X86/BUILD.gn @@ -0,0 +1,11 @@ +import("//llvm/utils/unittest/unittest.gni") + +unittest("X86MCTests") { + deps = [ + "//llvm/lib/MC", + "//llvm/lib/MC/MCParser", + "//llvm/lib/Support", + "//llvm/lib/Target/X86", + ] + sources = [ "X86MCDisassemblerTest.cpp" ] +} From 6b99dc29c6efa5d36306dd567c5e972cafb85c17 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 25 May 2022 13:32:52 -0700 Subject: [PATCH 528/908] [flang] Fix crash from a blank BIND(C,NAME="") on subprogram A recent change fixed the processing of BIND(C,NAME=expr) character expressions so that they are evaluated as constants in the scope of the subprogram. However, when the character name expression results in an empty value after trimming, the compiler emits a warning message, and this message is now causing a crash due to a lack of statement context. To fix, extend the deferred processing of the BIND(C,NAME="") so that a basic statement context exists. Differential Revision: https://reviews.llvm.org/D126416 --- flang/lib/Semantics/resolve-names.cpp | 44 ++++++++++++++++----------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 1fe21ce9bc9c48..9898fe3da57d3d 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -826,7 +826,7 @@ class SubprogramVisitor : public virtual ScopeHandler, public InterfaceVisitor { const ProgramTree::EntryStmtList * = nullptr); bool BeginMpSubprogram(const parser::Name &); void PushBlockDataScope(const parser::Name &); - void EndSubprogram( + void EndSubprogram(std::optional stmtSource = std::nullopt, const std::optional * = nullptr); protected: @@ -3237,8 +3237,9 @@ bool SubprogramVisitor::Pre(const parser::InterfaceBody::Subroutine &x) { return BeginSubprogram(name, Symbol::Flag::Subroutine); } void SubprogramVisitor::Post(const parser::InterfaceBody::Subroutine &x) { - EndSubprogram(&std::get>( - std::get>(x.t).statement.t)); + const auto &stmt{std::get>(x.t)}; + EndSubprogram(stmt.source, + &std::get>(stmt.statement.t)); } bool SubprogramVisitor::Pre(const parser::InterfaceBody::Function &x) { const auto &name{std::get( @@ -3246,9 +3247,10 @@ bool SubprogramVisitor::Pre(const parser::InterfaceBody::Function &x) { return BeginSubprogram(name, Symbol::Flag::Function); } void SubprogramVisitor::Post(const parser::InterfaceBody::Function &x) { - const auto &maybeSuffix{std::get>( - std::get>(x.t).statement.t)}; - EndSubprogram(maybeSuffix ? &maybeSuffix->binding : nullptr); + const auto &stmt{std::get>(x.t)}; + const auto &maybeSuffix{ + std::get>(stmt.statement.t)}; + EndSubprogram(stmt.source, maybeSuffix ? &maybeSuffix->binding : nullptr); } bool SubprogramVisitor::Pre(const parser::SubroutineStmt &stmt) { @@ -3598,15 +3600,19 @@ bool SubprogramVisitor::BeginSubprogram(const parser::Name &name, } void SubprogramVisitor::EndSubprogram( + std::optional stmtSource, const std::optional *binding) { if (binding && *binding && currScope().symbol()) { // Finally process the BIND(C,NAME=name) now that symbols in the name // expression will resolve local names. auto flagRestorer{common::ScopedSet(inSpecificationPart_, false)}; + auto originalStmtSource{messageHandler().currStmtSource()}; + messageHandler().set_currStmtSource(stmtSource); BeginAttrs(); Walk(**binding); SetBindNameOn(*currScope().symbol()); currScope().symbol()->attrs() |= EndAttrs(); + messageHandler().set_currStmtSource(originalStmtSource); } PopScope(); } @@ -7438,27 +7444,31 @@ bool ResolveNamesVisitor::BeginScopeForNode(const ProgramTree &node) { } void ResolveNamesVisitor::EndScopeForNode(const ProgramTree &node) { - using BindingPtr = const std::optional *; - EndSubprogram(common::visit( + std::optional stmtSource; + const std::optional *binding{nullptr}; + common::visit( common::visitors{ - [](const parser::Statement *stmt) { + [&](const parser::Statement *stmt) { if (stmt) { + stmtSource = stmt->source; if (const auto &maybeSuffix{ std::get>( stmt->statement.t)}) { - return &maybeSuffix->binding; + binding = &maybeSuffix->binding; } } - return BindingPtr{}; }, - [](const parser::Statement *stmt) { - return stmt ? &std::get>( - stmt->statement.t) - : BindingPtr{}; + [&](const parser::Statement *stmt) { + if (stmt) { + stmtSource = stmt->source; + binding = &std::get>( + stmt->statement.t); + } }, - [](const auto *) { return BindingPtr{}; }, + [](const auto *) {}, }, - node.stmt())); + node.stmt()); + EndSubprogram(stmtSource, binding); } // Some analyses and checks, such as the processing of initializers of From 3d546191ad9d7d2ad2c7928204b9de51deafa675 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 25 May 2022 14:47:30 -0700 Subject: [PATCH 529/908] [gn build] Fix D126415 to only set `llvm_have_mallinfo2 = true` on linux Otherwise mac builds are broken, e.g. http://45.33.8.238/macm1/36056/step_4.txt ../../llvm/lib/Support/Unix/Process.inc:35:10: fatal error: 'malloc.h' file not found #include --- llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 82eba6b7e55231..c60044b03049c1 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -50,7 +50,7 @@ declare_args() { # glibc is at least 2.33 which has mallinfo2. # TODO: remove this once nobody using the gn build is building against an old glibc. - llvm_have_mallinfo2 = true + llvm_have_mallinfo2 = current_os == "linux" } write_cmake_config("abi-breaking") { From c8e7c0e5dc1e0587bac387bc8d439f58ef7253be Mon Sep 17 00:00:00 2001 From: Alan Zhao Date: Wed, 25 May 2022 18:12:35 -0400 Subject: [PATCH 530/908] [NFC][llvm-ml] Rename rip-relative-addressing.asm to rip_relative_addressing.asm All the other tests here use underscores, so we should rename this file to be consistent. --- .../{rip-relative-addressing.asm => rip_relative_addressing.asm} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/tools/llvm-ml/{rip-relative-addressing.asm => rip_relative_addressing.asm} (100%) diff --git a/llvm/test/tools/llvm-ml/rip-relative-addressing.asm b/llvm/test/tools/llvm-ml/rip_relative_addressing.asm similarity index 100% rename from llvm/test/tools/llvm-ml/rip-relative-addressing.asm rename to llvm/test/tools/llvm-ml/rip_relative_addressing.asm From bef4da4a6aef8196f007f44e3e9c8e3419ffb623 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Wed, 25 May 2022 16:05:16 -0700 Subject: [PATCH 531/908] Skip testing of watchpoint hit-count/ignore-count on multithreaded Skip all watchpoint hit-count/ignore-count tests for multithreaded API tests for now on arm64 Darwin. On AArch64, insns that trigger a WP are rolled back and we are notified. lldb needs to disable the WP, insn step, re-enable it, then report it to the user. lldb only does this full step action for the "selected thread", and so when a program stops with multiple threads hitting a stop reason, some of them watchpoints, any non-selected-thread will not be completed in this way. But all threads with the initial watchpoint exception will have their hit-count/ignore-counts updated. When we resume execution, the other threads sitting at the instruction will again execute & trigger the WP exceptoin again, repeating until we've gone through all of the threads. This bug is being tracked in llvm.org/pr49433 and inside apple in rdar://93863107 --- .../hello_watchlocation/TestWatchLocation.py | 19 +++++++++++++++---- .../TestWatchLocationWithWatchSet.py | 13 +++++++++++-- .../TestConcurrentManyWatchpoints.py | 2 +- .../TestConcurrentNWatchNBreak.py | 2 +- .../TestConcurrentSignalNWatchNBreak.py | 2 +- .../TestConcurrentSignalWatch.py | 2 +- .../TestConcurrentSignalWatchBreak.py | 2 +- .../TestConcurrentTwoWatchpointThreads.py | 2 +- ...stConcurrentTwoWatchpointsOneBreakpoint.py | 2 +- ...currentTwoWatchpointsOneDelayBreakpoint.py | 2 +- .../TestConcurrentTwoWatchpointsOneSignal.py | 2 +- .../watchpoint/TestWatchpointIgnoreCount.py | 10 +++++++++- 12 files changed, 44 insertions(+), 16 deletions(-) diff --git a/lldb/test/API/commands/watchpoints/hello_watchlocation/TestWatchLocation.py b/lldb/test/API/commands/watchpoints/hello_watchlocation/TestWatchLocation.py index 16fb71fd113028..2859df3c47bd0e 100644 --- a/lldb/test/API/commands/watchpoints/hello_watchlocation/TestWatchLocation.py +++ b/lldb/test/API/commands/watchpoints/hello_watchlocation/TestWatchLocation.py @@ -31,12 +31,18 @@ def setUp(self): self.exe_name = self.testMethodName self.d = {'CXX_SOURCES': self.source, 'EXE': self.exe_name} + # on arm64 targets, lldb has incorrect hit-count / ignore-counts + # for watchpoints when they are hit with multiple threads at + # the same time. Tracked as llvm.org/pr49433 + # or rdar://93863107 inside Apple. + def affected_by_radar_93863107(self): + return (self.getArchitecture() in ['arm64', 'arm64e']) and self.platformIsDarwin() + # Most of the MIPS boards provide only one H/W watchpoints, and S/W # watchpoints are not supported yet @expectedFailureAll(triple=re.compile('^mips')) # SystemZ and PowerPC also currently supports only one H/W watchpoint @expectedFailureAll(archs=['powerpc64le', 's390x']) - @skipIfDarwin @skipIfWindows # This test is flaky on Windows def test_hello_watchlocation(self): """Test watching a location with '-s size' option.""" @@ -103,8 +109,13 @@ def test_hello_watchlocation(self): substrs=[self.violating_func]) # Use the '-v' option to do verbose listing of the watchpoint. - # The hit count should now be 1. - self.expect("watchpoint list -v", - substrs=['hit_count = 1']) + # The hit count should now be the same as the number of threads that + # stopped on a watchpoint. + threads = lldbutil.get_stopped_threads( + self.process(), lldb.eStopReasonWatchpoint) + + if not self.affected_by_radar_93863107(): + self.expect("watchpoint list -v", + substrs=['hit_count = %d' % len(threads)]) self.runCmd("thread backtrace all") diff --git a/lldb/test/API/commands/watchpoints/watchpoint_set_command/TestWatchLocationWithWatchSet.py b/lldb/test/API/commands/watchpoints/watchpoint_set_command/TestWatchLocationWithWatchSet.py index 92ea56021cc1b9..23c4b3062662ce 100644 --- a/lldb/test/API/commands/watchpoints/watchpoint_set_command/TestWatchLocationWithWatchSet.py +++ b/lldb/test/API/commands/watchpoints/watchpoint_set_command/TestWatchLocationWithWatchSet.py @@ -15,6 +15,13 @@ class WatchLocationUsingWatchpointSetTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) NO_DEBUG_INFO_TESTCASE = True + # on arm64 targets, lldb has incorrect hit-count / ignore-counts + # for watchpoints when they are hit with multiple threads at + # the same time. Tracked as llvm.org/pr49433 + # or rdar://93863107 inside Apple. + def affected_by_radar_93863107(self): + return (self.getArchitecture() in ['arm64', 'arm64e']) and self.platformIsDarwin() + def setUp(self): # Call super's setUp(). TestBase.setUp(self) @@ -102,7 +109,9 @@ def test_watchlocation_using_watchpoint_set(self): # stopped on a watchpoint. threads = lldbutil.get_stopped_threads( self.process(), lldb.eStopReasonWatchpoint) - self.expect("watchpoint list -v", - substrs=['hit_count = %d' % len(threads)]) + + if not self.affected_by_radar_93863107(): + self.expect("watchpoint list -v", + substrs=['hit_count = %d' % len(threads)]) self.runCmd("thread backtrace all") diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyWatchpoints.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyWatchpoints.py index 18a683864c33b6..e466b17633da35 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyWatchpoints.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyWatchpoints.py @@ -15,7 +15,7 @@ class ConcurrentManyWatchpoints(ConcurrentEventsBase): @skipIf( oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], archs=['arm64', 'arm64e', 'arm64_32', 'arm'], - bugnumber="rdar://81811539") + bugnumber="rdar://93863107") @add_test_categories(["watchpoint"]) @skipIfOutOfTreeDebugserver def test(self): diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentNWatchNBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentNWatchNBreak.py index 2d53cfa4802a11..d6848de5fd3191 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentNWatchNBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentNWatchNBreak.py @@ -18,7 +18,7 @@ class ConcurrentNWatchNBreak(ConcurrentEventsBase): @skipIf( oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], archs=['arm64', 'arm64e', 'arm64_32', 'arm'], - bugnumber="rdar://81811539") + bugnumber="rdar://93863107") @add_test_categories(["watchpoint"]) def test(self): """Test with 5 watchpoint and breakpoint threads.""" diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalNWatchNBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalNWatchNBreak.py index 0645274ae7e55d..8409984c67ffa5 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalNWatchNBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalNWatchNBreak.py @@ -19,7 +19,7 @@ class ConcurrentSignalNWatchNBreak(ConcurrentEventsBase): @skipIf( oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], archs=['arm64', 'arm64e', 'arm64_32', 'arm'], - bugnumber="rdar://81811539") + bugnumber="rdar://93863107") @add_test_categories(["watchpoint"]) def test(self): """Test one signal thread with 5 watchpoint and breakpoint threads.""" diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatch.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatch.py index be59daace974f3..ac9442cd25c073 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatch.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatch.py @@ -16,7 +16,7 @@ class ConcurrentSignalWatch(ConcurrentEventsBase): @skipIf( oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], archs=['arm64', 'arm64e', 'arm64_32', 'arm'], - bugnumber="rdar://81811539") + bugnumber="rdar://93863107") @add_test_categories(["watchpoint"]) def test(self): """Test a watchpoint and a signal in multiple threads.""" diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatchBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatchBreak.py index 120b48514539f8..2e18c15f5ab022 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatchBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatchBreak.py @@ -17,7 +17,7 @@ class ConcurrentSignalWatchBreak(ConcurrentEventsBase): @skipIf( oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], archs=['arm64', 'arm64e', 'arm64_32', 'arm'], - bugnumber="rdar://81811539") + bugnumber="rdar://93863107") @add_test_categories(["watchpoint"]) def test(self): """Test a signal/watchpoint/breakpoint in multiple threads.""" diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointThreads.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointThreads.py index 7deeacfc67d512..9a7088bbc3a9f3 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointThreads.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointThreads.py @@ -16,7 +16,7 @@ class ConcurrentTwoWatchpointThreads(ConcurrentEventsBase): @skipIf( oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], archs=['arm64', 'arm64e', 'arm64_32', 'arm'], - bugnumber="rdar://81811539") + bugnumber="rdar://93863107") @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint. """ diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneBreakpoint.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneBreakpoint.py index 0e8cbf37a19764..3eb9f30d8cf911 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneBreakpoint.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneBreakpoint.py @@ -16,7 +16,7 @@ class ConcurrentTwoWatchpointsOneBreakpoint(ConcurrentEventsBase): @skipIf( oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], archs=['arm64', 'arm64e', 'arm64_32', 'arm'], - bugnumber="rdar://81811539") + bugnumber="rdar://93863107") @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint and one breakpoint thread. """ diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneDelayBreakpoint.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneDelayBreakpoint.py index a4baa5e9f4dd8a..c48e39554324ca 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneDelayBreakpoint.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneDelayBreakpoint.py @@ -16,7 +16,7 @@ class ConcurrentTwoWatchpointsOneDelayBreakpoint(ConcurrentEventsBase): @skipIf( oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], archs=['arm64', 'arm64e', 'arm64_32', 'arm'], - bugnumber="rdar://81811539") + bugnumber="rdar://93863107") @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint and one (1 second delay) breakpoint thread. """ diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneSignal.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneSignal.py index 62e7cfb6f3b408..8aea699a463460 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneSignal.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneSignal.py @@ -17,7 +17,7 @@ class ConcurrentTwoWatchpointsOneSignal(ConcurrentEventsBase): @skipIf( oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], archs=['arm64', 'arm64e', 'arm64_32', 'arm'], - bugnumber="rdar://81811539") + bugnumber="rdar://93863107") @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint and one signal thread. """ diff --git a/lldb/test/API/python_api/watchpoint/TestWatchpointIgnoreCount.py b/lldb/test/API/python_api/watchpoint/TestWatchpointIgnoreCount.py index d0fd39834134d0..dd0defd784503b 100644 --- a/lldb/test/API/python_api/watchpoint/TestWatchpointIgnoreCount.py +++ b/lldb/test/API/python_api/watchpoint/TestWatchpointIgnoreCount.py @@ -25,6 +25,13 @@ def setUp(self): self.line = line_number( self.source, '// Set break point at this line.') + # on arm64 targets, lldb has incorrect hit-count / ignore-counts + # for watchpoints when they are hit with multiple threads at + # the same time. Tracked as llvm.org/pr49433 + # or rdar://93863107 inside Apple. + def affected_by_radar_93863107(self): + return (self.getArchitecture() in ['arm64', 'arm64e']) and self.platformIsDarwin() + # Read-write watchpoints not supported on SystemZ @expectedFailureAll(archs=['s390x']) def test_set_watch_ignore_count(self): @@ -87,5 +94,6 @@ def test_set_watch_ignore_count(self): # Verify some vital statistics. self.assertTrue(watchpoint) self.assertEqual(watchpoint.GetWatchSize(), 4) - self.assertEqual(watchpoint.GetHitCount(), 2) + if not self.affected_by_radar_93863107(): + self.assertEqual(watchpoint.GetHitCount(), 2) print(watchpoint) From 338e76f8ee3857ef06eb13f062ad9df39f29d465 Mon Sep 17 00:00:00 2001 From: bixia1 Date: Wed, 25 May 2022 12:39:37 -0700 Subject: [PATCH 532/908] Lower complex.expm1 to standard dialect. Add a test. Reviewed By: pifon2a Differential Revision: https://reviews.llvm.org/D126409 --- .../ComplexToStandard/ComplexToStandard.cpp | 25 +++++++++++++++++++ .../convert-to-standard.mlir | 21 ++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index a676e0afe9a885..e1eca6181dff96 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -413,6 +413,30 @@ struct ExpOpConversion : public OpConversionPattern { } }; +struct Expm1OpConversion : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(complex::Expm1Op op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto type = adaptor.getComplex().getType().cast(); + auto elementType = type.getElementType().cast(); + + mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); + Value exp = b.create(adaptor.getComplex()); + + Value real = b.create(elementType, exp); + Value one = b.create(elementType, + b.getFloatAttr(elementType, 1)); + Value realMinusOne = b.create(real, one); + Value imag = b.create(elementType, exp); + + rewriter.replaceOpWithNewOp(op, type, realMinusOne, + imag); + return success(); + } +}; + struct LogOpConversion : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -718,6 +742,7 @@ void mlir::populateComplexToStandardConversionPatterns( CosOpConversion, DivOpConversion, ExpOpConversion, + Expm1OpConversion, LogOpConversion, Log1pOpConversion, MulOpConversion, diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index 8e7098f832bb8f..6f57e722b520ef 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -190,6 +190,27 @@ func.func @complex_exp(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex +// CHECK-LABEL: func.func @complex_expm1( +// CHECK-SAME: %[[ARG:.*]]: complex) -> complex { +func.func @complex_expm1(%arg: complex) -> complex { + %expm1 = complex.expm1 %arg: complex + return %expm1 : complex +} +// CHECK: %[[REAL_I:.*]] = complex.re %[[ARG]] : complex +// CHECK: %[[IMAG_I:.*]] = complex.im %[[ARG]] : complex +// CHECK: %[[EXP:.*]] = math.exp %[[REAL_I]] : f32 +// CHECK: %[[COS:.*]] = math.cos %[[IMAG_I]] : f32 +// CHECK: %[[RES_REAL:.*]] = arith.mulf %[[EXP]], %[[COS]] : f32 +// CHECK: %[[SIN:.*]] = math.sin %[[IMAG_I]] : f32 +// CHECK: %[[RES_IMAG:.*]] = arith.mulf %[[EXP]], %[[SIN]] : f32 +// CHECK: %[[RES_EXP:.*]] = complex.create %[[RES_REAL]], %[[RES_IMAG]] : complex +// CHECK: %[[REAL:.*]] = complex.re %[[RES_EXP]] : complex +// CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[REAL_M1:.*]] = arith.subf %[[REAL]], %[[ONE]] : f32 +// CHECK: %[[IMAG:.*]] = complex.im %[[RES_EXP]] : complex +// CHECK: %[[RES:.*]] = complex.create %[[REAL_M1]], %[[IMAG]] : complex +// CHECK: return %[[RES]] : complex + // CHECK-LABEL: func @complex_log // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_log(%arg: complex) -> complex { From a14057d4bddbb78aed7beaf282e75137b2dc60b9 Mon Sep 17 00:00:00 2001 From: bixia1 Date: Tue, 24 May 2022 16:07:31 -0700 Subject: [PATCH 533/908] [mlir][sparse] Add more complex operations. Support complex operations sqrt, expm1, and tanh. Add tests. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D126393 --- .../mlir/Dialect/SparseTensor/Utils/Merger.h | 3 + .../lib/Dialect/SparseTensor/Utils/Merger.cpp | 27 ++++++ .../SparseTensor/CPU/sparse_complex_ops.mlir | 88 +++++++++++++++++-- 3 files changed, 113 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h index 039cc5ddefac50..db83ce054c0ff6 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -35,12 +35,15 @@ enum Kind { kCeilF, kFloorF, kSqrtF, + kSqrtC, kExpm1F, + kExpm1C, kLog1pF, kLog1pC, kSinF, kSinC, kTanhF, + kTanhC, kNegF, kNegC, kNegI, diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 3cf7dc60d33c33..80d2dbba187b84 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -41,12 +41,15 @@ TensorExp::TensorExp(Kind k, unsigned x, unsigned y, Value v, Operation *o) case kCeilF: case kFloorF: case kSqrtF: + case kSqrtC: case kExpm1F: + case kExpm1C: case kLog1pF: case kLog1pC: case kSinF: case kSinC: case kTanhF: + case kTanhC: case kNegF: case kNegC: case kNegI: @@ -284,12 +287,15 @@ bool Merger::isSingleCondition(unsigned t, unsigned e) const { case kCeilF: case kFloorF: case kSqrtF: + case kSqrtC: case kExpm1F: + case kExpm1C: case kLog1pF: case kLog1pC: case kSinF: case kSinC: case kTanhF: + case kTanhC: case kNegF: case kNegC: case kNegI: @@ -360,8 +366,10 @@ static const char *kindToOpSymbol(Kind kind) { case kFloorF: return "floor"; case kSqrtF: + case kSqrtC: return "sqrt"; case kExpm1F: + case kExpm1C: return "expm1"; case kLog1pF: case kLog1pC: @@ -370,6 +378,7 @@ static const char *kindToOpSymbol(Kind kind) { case kSinC: return "sin"; case kTanhF: + case kTanhC: return "tanh"; case kNegF: case kNegC: @@ -449,10 +458,13 @@ void Merger::dumpExp(unsigned e) const { case kCeilF: case kFloorF: case kSqrtF: + case kSqrtC: case kExpm1F: + case kExpm1C: case kLog1pF: case kSinF: case kTanhF: + case kTanhC: case kNegF: case kNegI: case kTruncF: @@ -555,12 +567,15 @@ unsigned Merger::buildLattices(unsigned e, unsigned i) { case kCRe: case kFloorF: case kSqrtF: + case kSqrtC: case kExpm1F: + case kExpm1C: case kLog1pF: case kLog1pC: case kSinF: case kSinC: case kTanhF: + case kTanhC: case kNegF: case kNegC: case kNegI: @@ -785,8 +800,12 @@ Optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { return addExp(kFloorF, e); if (isa(def)) return addExp(kSqrtF, e); + if (isa(def)) + return addExp(kSqrtC, e); if (isa(def)) return addExp(kExpm1F, e); + if (isa(def)) + return addExp(kExpm1C, e); if (isa(def)) return addExp(kLog1pF, e); if (isa(def)) @@ -797,6 +816,8 @@ Optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { return addExp(kSinC, e); if (isa(def)) return addExp(kTanhF, e); + if (isa(def)) + return addExp(kTanhC, e); if (isa(def)) return addExp(kNegF, e); // no negi in std if (isa(def)) @@ -952,8 +973,12 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, unsigned e, return rewriter.create(loc, v0); case kSqrtF: return rewriter.create(loc, v0); + case kSqrtC: + return rewriter.create(loc, v0); case kExpm1F: return rewriter.create(loc, v0); + case kExpm1C: + return rewriter.create(loc, v0); case kLog1pF: return rewriter.create(loc, v0); case kLog1pC: @@ -964,6 +989,8 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, unsigned e, return rewriter.create(loc, v0); case kTanhF: return rewriter.create(loc, v0); + case kTanhC: + return rewriter.create(loc, v0); case kNegF: return rewriter.create(loc, v0); case kNegC: diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir index 74c30e6f5ff839..0fbb2b7800f760 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir @@ -59,6 +59,54 @@ module { return %0 : tensor, #SparseVector> } + func.func @complex_sqrt(%arga: tensor, #SparseVector>) + -> tensor, #SparseVector> { + %c0 = arith.constant 0 : index + %d = tensor.dim %arga, %c0 : tensor, #SparseVector> + %xv = sparse_tensor.init [%d] : tensor, #SparseVector> + %0 = linalg.generic #trait_op1 + ins(%arga: tensor, #SparseVector>) + outs(%xv: tensor, #SparseVector>) { + ^bb(%a: complex, %x: complex): + %1 = complex.sqrt %a : complex + linalg.yield %1 : complex + } -> tensor, #SparseVector> + return %0 : tensor, #SparseVector> + } + + func.func @complex_tanh(%arga: tensor, #SparseVector>) + -> tensor, #SparseVector> { + %c0 = arith.constant 0 : index + %d = tensor.dim %arga, %c0 : tensor, #SparseVector> + %xv = sparse_tensor.init [%d] : tensor, #SparseVector> + %0 = linalg.generic #trait_op1 + ins(%arga: tensor, #SparseVector>) + outs(%xv: tensor, #SparseVector>) { + ^bb(%a: complex, %x: complex): + %1 = complex.tanh %a : complex + linalg.yield %1 : complex + } -> tensor, #SparseVector> + return %0 : tensor, #SparseVector> + } + + func.func @clog1p_expm1(%arga: tensor, #SparseVector>) + -> tensor, #SparseVector> { + %c0 = arith.constant 0 : index + %d = tensor.dim %arga, %c0 : tensor, #SparseVector> + %xv = sparse_tensor.init [%d] : tensor, #SparseVector> + %0 = linalg.generic #trait_op1 + ins(%arga: tensor, #SparseVector>) + outs(%xv: tensor, #SparseVector>) { + ^bb(%a: complex, %x: complex): + %1 = complex.log1p %a : complex + // TODO(bixia): Enable this line after adding complex.expm1 to + // complex to standard lowering. + // %2 = complex.expm1 %1 : complex + linalg.yield %1 : complex + } -> tensor, #SparseVector> + return %0 : tensor, #SparseVector> + } + func.func @cdiv(%arga: tensor, #SparseVector>) -> tensor, #SparseVector> { %c0 = arith.constant 0 : index @@ -131,9 +179,15 @@ module { tensor, #SparseVector>) -> tensor, #SparseVector> %1 = call @csin(%sv1) : (tensor, #SparseVector>) -> tensor, #SparseVector> - %2 = call @cdiv(%sv1) + %2 = call @complex_sqrt(%sv1) + : (tensor, #SparseVector>) -> tensor, #SparseVector> + %3 = call @complex_tanh(%sv2) + : (tensor, #SparseVector>) -> tensor, #SparseVector> + %4 = call @clog1p_expm1(%sv1) : (tensor, #SparseVector>) -> tensor, #SparseVector> - %3 = call @cabs(%sv1) + %5 = call @cdiv(%sv1) + : (tensor, #SparseVector>) -> tensor, #SparseVector> + %6 = call @cabs(%sv1) : (tensor, #SparseVector>) -> tensor // @@ -157,15 +211,36 @@ module { // CHECK-NEXT: -193.43 // CHECK-NEXT: 57.2184 call @dumpc(%1, %d3) : (tensor, #SparseVector>, index) -> () + // CHECK-NEXT: 0.433635 + // CHECK-NEXT: 2.30609 + // CHECK-NEXT: 2 + // CHECK-NEXT: 1 + // CHECK-NEXT: 2.53083 + // CHECK-NEXT: 1.18538 + call @dumpc(%2, %d3) : (tensor, #SparseVector>, index) -> () + // CHECK-NEXT: 0.761594 + // CHECK-NEXT: 0 + // CHECK-NEXT: -0.964028 + // CHECK-NEXT: 0 + // CHECK-NEXT: 0.995055 + // CHECK-NEXT: 0 + call @dumpc(%3, %d3) : (tensor, #SparseVector>, index) -> () + // CHECK-NEXT: 1.52361 + // CHECK-NEXT: 2.69061 + // CHECK-NEXT: 1.73287 + // CHECK-NEXT: 0.785398 + // CHECK-NEXT: 2.13833 + // CHECK-NEXT: 0.785398 + call @dumpc(%4, %d3) : (tensor, #SparseVector>, index) -> () // CHECK-NEXT: -2.565 // CHECK-NEXT: 1 // CHECK-NEXT: 1.5 // CHECK-NEXT: 2 // CHECK-NEXT: 2.5 // CHECK-NEXT: 3 - call @dumpc(%2, %d3) : (tensor, #SparseVector>, index) -> () + call @dumpc(%5, %d3) : (tensor, #SparseVector>, index) -> () // CHECK-NEXT: ( 5.50608, 5, 7.81025 ) - call @dumpf(%3) : (tensor) -> () + call @dumpf(%6) : (tensor) -> () // Release the resources. sparse_tensor.release %sv1 : tensor, #SparseVector> @@ -173,7 +248,10 @@ module { sparse_tensor.release %0 : tensor, #SparseVector> sparse_tensor.release %1 : tensor, #SparseVector> sparse_tensor.release %2 : tensor, #SparseVector> - sparse_tensor.release %3 : tensor + sparse_tensor.release %3 : tensor, #SparseVector> + sparse_tensor.release %4 : tensor, #SparseVector> + sparse_tensor.release %5 : tensor, #SparseVector> + sparse_tensor.release %6 : tensor return } } From 949c39efb1031903d512e131fba7bca620e17c75 Mon Sep 17 00:00:00 2001 From: V Donaldson Date: Wed, 25 May 2022 15:32:02 -0700 Subject: [PATCH 534/908] [flang] Update intrinsic module source files The f18 standard defines several intrinsic modules containing definitions and declarations for various constants, types, and procedures. This PR adds declarations for missing procedures in these modules. --- flang/module/__fortran_builtins.f90 | 4 +- flang/module/__fortran_ieee_exceptions.f90 | 186 ++--- flang/module/ieee_arithmetic.f90 | 767 +++++++++++++-------- flang/module/iso_fortran_env.f90 | 15 +- 4 files changed, 590 insertions(+), 382 deletions(-) diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90 index 4a4c55e44e0566..4ce4901c4505d2 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang/module/__fortran_builtins.f90 @@ -41,8 +41,8 @@ procedure(type(__builtin_c_ptr)) :: __builtin_c_loc - intrinsic :: __builtin_ieee_is_nan, __builtin_ieee_is_normal, & - __builtin_ieee_is_negative + intrinsic :: __builtin_ieee_is_nan, __builtin_ieee_is_negative, & + __builtin_ieee_is_normal intrinsic :: __builtin_ieee_next_after, __builtin_ieee_next_down, & __builtin_ieee_next_up intrinsic :: scale ! for ieee_scalb diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang/module/__fortran_ieee_exceptions.f90 index bd8782e4db5a64..7232bbf53cd616 100644 --- a/flang/module/__fortran_ieee_exceptions.f90 +++ b/flang/module/__fortran_ieee_exceptions.f90 @@ -40,90 +40,110 @@ private end type ieee_status_type - private :: ieee_support_flag_2, ieee_support_flag_3, & - ieee_support_flag_4, ieee_support_flag_8, ieee_support_flag_10, & - ieee_support_flag_16 - interface ieee_support_flag - module procedure :: ieee_support_flag, & - ieee_support_flag_2, ieee_support_flag_3, & - ieee_support_flag_4, ieee_support_flag_8, ieee_support_flag_10, & - ieee_support_flag_16 +! Define specifics with 1 LOGICAL or REAL argument for generic G. +#define SPECIFICS_L(G) \ + G(1) G(2) G(4) G(8) +#define SPECIFICS_R(G) \ + G(2) G(3) G(4) G(8) G(10) G(16) + +! Set PRIVATE accessibility for specifics with 1 LOGICAL or REAL argument for +! generic G. +#define PRIVATE_L(G) private :: \ + G##_l1, G##_l2, G##_l4, G##_l8 +#define PRIVATE_R(G) private :: \ + G##_a2, G##_a3, G##_a4, G##_a8, G##_a10, G##_a16 + + interface + elemental subroutine ieee_get_flag(flag, flag_value) + import ieee_flag_type + type(ieee_flag_type), intent(in) :: flag + logical, intent(out) :: flag_value + end subroutine ieee_get_flag + end interface + + interface + elemental subroutine ieee_get_halting_mode(flag, halting) + import ieee_flag_type + type(ieee_flag_type), intent(in) :: flag + logical, intent(out) :: halting + end subroutine ieee_get_halting_mode + end interface + + interface + subroutine ieee_get_modes(modes) + import ieee_modes_type + type(ieee_modes_type), intent(out) :: modes + end subroutine ieee_get_modes + end interface + + interface + subroutine ieee_get_status(status) + import ieee_status_type + type(ieee_status_type), intent(out) :: status + end subroutine ieee_get_status + end interface + +#define IEEE_SET_FLAG_L(FVKIND) \ + pure subroutine ieee_set_flag_l##FVKIND(flag,flag_value); \ + import ieee_flag_type; \ + type(ieee_flag_type), intent(in) :: flag(..); \ + logical(FVKIND), intent(in) :: flag_value(..); \ + end subroutine ieee_set_flag_l##FVKIND; + interface ieee_set_flag + SPECIFICS_L(IEEE_SET_FLAG_L) + end interface ieee_set_flag + private :: ieee_set_flag_1 + PRIVATE_L(IEEE_SET_FLAG) +#undef IEEE_SET_FLAG_L + +#define IEEE_SET_HALTING_MODE_L(HKIND) \ + pure subroutine ieee_set_halting_mode_l##HKIND(flag,halting); \ + import ieee_flag_type; \ + type(ieee_flag_type), intent(in) :: flag(..); \ + logical(HKIND), intent(in) :: halting(..); \ + end subroutine ieee_set_halting_mode_l##HKIND; + interface ieee_set_halting_mode + SPECIFICS_L(IEEE_SET_HALTING_MODE_L) + end interface ieee_set_halting_mode + private :: ieee_set_halting_mode_1 + PRIVATE_L(IEEE_SET_HALTING_MODE) +#undef IEEE_SET_HALTING_MODE_L + + interface + subroutine ieee_set_modes(modes) + import ieee_modes_type + type(ieee_modes_type), intent(in) :: modes + end subroutine ieee_set_modes + end interface + + interface + subroutine ieee_set_status(status) + import ieee_status_type + type(ieee_status_type), intent(in) :: status + end subroutine ieee_set_status end interface - contains - elemental subroutine ieee_get_flag(flag, flag_value) - type(ieee_flag_type), intent(in) :: flag - logical, intent(out) :: flag_value - end subroutine ieee_get_flag - - elemental subroutine ieee_get_halting_mode(flag, halting) - type(ieee_flag_type), intent(in) :: flag - logical, intent(out) :: halting - end subroutine ieee_get_halting_mode - - subroutine ieee_get_modes(modes) - type(ieee_modes_type), intent(out) :: modes - end subroutine ieee_get_modes - - subroutine ieee_get_status(status) - type(ieee_status_type), intent(out) :: status - end subroutine ieee_get_status - - pure subroutine ieee_set_flag(flag, flag_value) - type(ieee_flag_type), intent(in) :: flag - logical, intent(in) :: flag_value - end subroutine ieee_set_flag - - pure subroutine ieee_set_halting_mode(flag, halting) - type(ieee_flag_type), intent(in) :: flag - logical, intent(in) :: halting - end subroutine ieee_set_halting_mode - - subroutine ieee_set_modes(modes) - type(ieee_modes_type), intent(in) :: modes - end subroutine ieee_set_modes - - subroutine ieee_set_status(status) - type(ieee_status_type), intent(in) :: status - end subroutine ieee_set_status - - pure logical function ieee_support_flag(flag) - type(ieee_flag_type), intent(in) :: flag - ieee_support_flag = .true. - end function - pure logical function ieee_support_flag_2(flag, x) - type(ieee_flag_type), intent(in) :: flag - real(kind=2), intent(in) :: x(..) - ieee_support_flag_2 = .true. - end function - pure logical function ieee_support_flag_3(flag, x) - type(ieee_flag_type), intent(in) :: flag - real(kind=3), intent(in) :: x(..) - ieee_support_flag_3 = .true. - end function - pure logical function ieee_support_flag_4(flag, x) - type(ieee_flag_type), intent(in) :: flag - real(kind=4), intent(in) :: x(..) - ieee_support_flag_4 = .true. - end function - pure logical function ieee_support_flag_8(flag, x) - type(ieee_flag_type), intent(in) :: flag - real(kind=8), intent(in) :: x(..) - ieee_support_flag_8 = .true. - end function - pure logical function ieee_support_flag_10(flag, x) - type(ieee_flag_type), intent(in) :: flag - real(kind=10), intent(in) :: x(..) - ieee_support_flag_10 = .true. - end function - pure logical function ieee_support_flag_16(flag, x) - type(ieee_flag_type), intent(in) :: flag - real(kind=16), intent(in) :: x(..) - ieee_support_flag_16 = .true. - end function - - pure logical function ieee_support_halting(flag) - type(ieee_flag_type), intent(in) :: flag - end function ieee_support_halting +#define IEEE_SUPPORT_FLAG_R(XKIND) \ + logical function ieee_support_flag_a##XKIND(flag, x); \ + import ieee_flag_type; \ + type(ieee_flag_type), intent(in) :: flag; \ + real(XKIND), intent(in) :: x(..); \ + end function ieee_support_flag_a##XKIND; + interface ieee_support_flag + logical function ieee_support_flag(flag) + import ieee_flag_type + type(ieee_flag_type), intent(in) :: flag + end function ieee_support_flag + SPECIFICS_R(IEEE_SUPPORT_FLAG_R) + end interface ieee_support_flag + PRIVATE_R(IEEE_SUPPORT_FLAG) +#undef IEEE_SUPPORT_FLAG_R + + interface + pure logical function ieee_support_halting(flag) + import ieee_flag_type + type(ieee_flag_type), intent(in) :: flag + end function ieee_support_halting + end interface end module __Fortran_ieee_exceptions diff --git a/flang/module/ieee_arithmetic.f90 b/flang/module/ieee_arithmetic.f90 index 5fe57f782a815f..b2ac217aa86ae4 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang/module/ieee_arithmetic.f90 @@ -6,13 +6,18 @@ ! !===------------------------------------------------------------------------===! -! See Fortran 2018, clause 17.2 +! Fortran 2018 Clause 17 + module ieee_arithmetic + ! 17.1: "The module IEEE_ARITHMETIC behaves as if it contained a + ! USE statement for IEEE_EXCEPTIONS; everything that is public in + ! IEEE_EXCEPTIONS is public in IEEE_ARITHMETIC." + use __Fortran_ieee_exceptions use __Fortran_builtins, only: & ieee_is_nan => __builtin_ieee_is_nan, & - ieee_is_normal => __builtin_ieee_is_normal, & ieee_is_negative => __builtin_ieee_is_negative, & + ieee_is_normal => __builtin_ieee_is_normal, & ieee_next_after => __builtin_ieee_next_after, & ieee_next_down => __builtin_ieee_next_down, & ieee_next_up => __builtin_ieee_next_up, & @@ -29,11 +34,6 @@ module ieee_arithmetic ieee_support_subnormal => __builtin_ieee_support_subnormal, & ieee_support_underflow_control => __builtin_ieee_support_underflow_control - ! 17.1: "The module IEEE_ARITHMETIC behaves as if it contained a USE statement - ! for IEEE_EXCEPTIONS; everything that is public in IEEE_EXCEPTIONS is public - ! in IEEE_ARITHMETIC." - use __Fortran_ieee_exceptions - implicit none type :: ieee_class_type @@ -72,291 +72,478 @@ module ieee_arithmetic ieee_other = ieee_round_type(6) interface operator(==) - module procedure class_eq - module procedure round_eq + elemental logical function ieee_class_eq(x, y) + import ieee_class_type + type(ieee_class_type), intent(in) :: x, y + end function ieee_class_eq + elemental logical function ieee_round_eq(x, y) + import ieee_round_type + type(ieee_round_type), intent(in) :: x, y + end function ieee_round_eq end interface operator(==) interface operator(/=) - module procedure class_ne - module procedure round_ne + elemental logical function ieee_class_ne(x, y) + import ieee_class_type + type(ieee_class_type), intent(in) :: x, y + end function ieee_class_ne + elemental logical function ieee_round_ne(x, y) + import ieee_round_type + type(ieee_round_type), intent(in) :: x, y + end function ieee_round_ne end interface operator(/=) - private :: class_eq, class_ne, round_eq, round_ne - - ! See Fortran 2018, 17.10 & 17.11 - generic :: ieee_class => ieee_class_a2, ieee_class_a3, ieee_class_a4, ieee_class_a8, ieee_class_a10, ieee_class_a16 - private :: ieee_class_a2, ieee_class_a3, ieee_class_a4, ieee_class_a8, ieee_class_a10, ieee_class_a16 - - generic :: ieee_copy_sign => ieee_copy_sign_a2, ieee_copy_sign_a3, ieee_copy_sign_a4, ieee_copy_sign_a8, ieee_copy_sign_a10, ieee_copy_sign_a16 - private :: ieee_copy_sign_a2, ieee_copy_sign_a3, ieee_copy_sign_a4, ieee_copy_sign_a8, ieee_copy_sign_a10, ieee_copy_sign_a16 - - generic :: ieee_is_finite => ieee_is_finite_a2, ieee_is_finite_a3, ieee_is_finite_a4, ieee_is_finite_a8, ieee_is_finite_a10, ieee_is_finite_a16 - private :: ieee_is_finite_a2, ieee_is_finite_a3, ieee_is_finite_a4, ieee_is_finite_a8, ieee_is_finite_a10, ieee_is_finite_a16 - - generic :: ieee_rem => & - ieee_rem_a2_a2, ieee_rem_a2_a3, ieee_rem_a2_a4, ieee_rem_a2_a8, ieee_rem_a2_a10, ieee_rem_a2_a16, & - ieee_rem_a3_a2, ieee_rem_a3_a3, ieee_rem_a3_a4, ieee_rem_a3_a8, ieee_rem_a3_a10, ieee_rem_a3_a16, & - ieee_rem_a4_a2, ieee_rem_a4_a3, ieee_rem_a4_a4, ieee_rem_a4_a8, ieee_rem_a4_a10, ieee_rem_a4_a16, & - ieee_rem_a8_a2, ieee_rem_a8_a3, ieee_rem_a8_a4, ieee_rem_a8_a8, ieee_rem_a8_a10, ieee_rem_a8_a16, & - ieee_rem_a10_a2, ieee_rem_a10_a3, ieee_rem_a10_a4, ieee_rem_a10_a8, ieee_rem_a10_a10, ieee_rem_a10_a16, & - ieee_rem_a16_a2, ieee_rem_a16_a3, ieee_rem_a16_a4, ieee_rem_a16_a8, ieee_rem_a16_a10, ieee_rem_a16_a16 - private :: & - ieee_rem_a2_a2, ieee_rem_a2_a3, ieee_rem_a2_a4, ieee_rem_a2_a8, ieee_rem_a2_a10, ieee_rem_a2_a16, & - ieee_rem_a3_a2, ieee_rem_a3_a3, ieee_rem_a3_a4, ieee_rem_a3_a8, ieee_rem_a3_a10, ieee_rem_a3_a16, & - ieee_rem_a4_a2, ieee_rem_a4_a3, ieee_rem_a4_a4, ieee_rem_a4_a8, ieee_rem_a4_a10, ieee_rem_a4_a16, & - ieee_rem_a8_a2, ieee_rem_a8_a3, ieee_rem_a8_a4, ieee_rem_a8_a8, ieee_rem_a8_a10, ieee_rem_a8_a16, & - ieee_rem_a10_a2, ieee_rem_a10_a3, ieee_rem_a10_a4, ieee_rem_a10_a8, ieee_rem_a10_a10, ieee_rem_a10_a16, & - ieee_rem_a16_a2, ieee_rem_a16_a3, ieee_rem_a16_a4, ieee_rem_a16_a8, ieee_rem_a16_a10, ieee_rem_a16_a16 - - generic :: ieee_support_rounding => ieee_support_rounding_, & - ieee_support_rounding_2, ieee_support_rounding_3, & - ieee_support_rounding_4, ieee_support_rounding_8, & - ieee_support_rounding_10, ieee_support_rounding_16 - private :: ieee_support_rounding_, & - ieee_support_rounding_2, ieee_support_rounding_3, & - ieee_support_rounding_4, ieee_support_rounding_8, & - ieee_support_rounding_10, ieee_support_rounding_16 - - ! TODO: more interfaces (_fma, &c.) - - private :: classify - - contains - - elemental logical function class_eq(x,y) - type(ieee_class_type), intent(in) :: x, y - class_eq = x%which == y%which - end function class_eq - - elemental logical function class_ne(x,y) - type(ieee_class_type), intent(in) :: x, y - class_ne = x%which /= y%which - end function class_ne - - elemental logical function round_eq(x,y) - type(ieee_round_type), intent(in) :: x, y - round_eq = x%mode == y%mode - end function round_eq - - elemental logical function round_ne(x,y) - type(ieee_round_type), intent(in) :: x, y - round_ne = x%mode /= y%mode - end function round_ne - - elemental type(ieee_class_type) function classify( & - expo,maxExpo,negative,significandNZ,quietBit) - integer, intent(in) :: expo, maxExpo - logical, intent(in) :: negative, significandNZ, quietBit - if (expo == 0) then - if (significandNZ) then - if (negative) then - classify = ieee_negative_denormal - else - classify = ieee_positive_denormal - end if - else - if (negative) then - classify = ieee_negative_zero - else - classify = ieee_positive_zero - end if - end if - else if (expo == maxExpo) then - if (significandNZ) then - if (quietBit) then - classify = ieee_quiet_nan - else - classify = ieee_signaling_nan - end if - else - if (negative) then - classify = ieee_negative_inf - else - classify = ieee_positive_inf - end if - end if - else - if (negative) then - classify = ieee_negative_normal - else - classify = ieee_positive_normal - end if - end if - end function classify - -#define _CLASSIFY(RKIND,IKIND,TOTALBITS,PREC,IMPLICIT) \ - type(ieee_class_type) elemental function ieee_class_a##RKIND(x); \ - real(kind=RKIND), intent(in) :: x; \ - integer(kind=IKIND) :: raw; \ - integer, parameter :: significand = PREC - IMPLICIT; \ - integer, parameter :: exponentBits = TOTALBITS - 1 - significand; \ - integer, parameter :: maxExpo = shiftl(1, exponentBits) - 1; \ - integer :: exponent, sign; \ - logical :: negative, nzSignificand, quiet; \ - raw = transfer(x, raw); \ - exponent = ibits(raw, significand, exponentBits); \ - negative = btest(raw, TOTALBITS - 1); \ - nzSignificand = ibits(raw, 0, significand) /= 0; \ - quiet = btest(raw, significand - 1); \ - ieee_class_a##RKIND = classify(exponent, maxExpo, negative, nzSignificand, quiet); \ - end function ieee_class_a##RKIND - _CLASSIFY(2,2,16,11,1) - _CLASSIFY(3,2,16,8,1) - _CLASSIFY(4,4,32,24,1) - _CLASSIFY(8,8,64,53,1) - _CLASSIFY(10,16,80,64,0) - _CLASSIFY(16,16,128,112,1) -#undef _CLASSIFY - - ! TODO: This might need to be an actual Operation instead -#define _COPYSIGN(RKIND,IKIND,BITS) \ - real(kind=RKIND) elemental function ieee_copy_sign_a##RKIND(x,y); \ - real(kind=RKIND), intent(in) :: x, y; \ - integer(kind=IKIND) :: xbits, ybits; \ - xbits = transfer(x, xbits); \ - ybits = transfer(y, ybits); \ - xbits = ior(ibclr(xbits, BITS-1), iand(ybits, shiftl(1_##IKIND, BITS-1))); \ - ieee_copy_sign_a##RKIND = transfer(xbits, x); \ - end function ieee_copy_sign_a##RKIND - _COPYSIGN(2,2,16) - _COPYSIGN(3,2,16) - _COPYSIGN(4,4,32) - _COPYSIGN(8,8,64) - _COPYSIGN(10,16,80) - _COPYSIGN(16,16,128) -#undef _COPYSIGN - -#define _IS_FINITE(KIND) \ - elemental function ieee_is_finite_a##KIND(x) result(res); \ - real(kind=KIND), intent(in) :: x; \ - logical :: res; \ - type(ieee_class_type) :: classification; \ - classification = ieee_class(x); \ - res = classification == ieee_negative_zero .or. classification == ieee_positive_zero \ - .or. classification == ieee_negative_denormal .or. classification == ieee_positive_denormal \ - .or. classification == ieee_negative_normal .or. classification == ieee_positive_normal; \ - end function - _IS_FINITE(2) - _IS_FINITE(3) - _IS_FINITE(4) - _IS_FINITE(8) - _IS_FINITE(10) - _IS_FINITE(16) -#undef _IS_FINITE - -#define _IS_NEGATIVE(KIND) \ - elemental function ieee_is_negative_a##KIND(x) result(res); \ - real(kind=KIND), intent(in) :: x; \ - logical :: res; \ - type(ieee_class_type) :: classification; \ - classification = ieee_class(x); \ - res = classification == ieee_negative_zero .or. classification == ieee_negative_denormal \ - .or. classification == ieee_negative_normal .or. classification == ieee_negative_inf; \ - end function - _IS_NEGATIVE(2) - _IS_NEGATIVE(3) - _IS_NEGATIVE(4) - _IS_NEGATIVE(8) - _IS_NEGATIVE(10) - _IS_NEGATIVE(16) -#undef _IS_NEGATIVE - -#define _IS_NORMAL(KIND) \ - elemental function ieee_is_normal_a##KIND(x) result(res); \ - real(kind=KIND), intent(in) :: x; \ - logical :: res; \ - type(ieee_class_type) :: classification; \ - classification = ieee_class(x); \ - res = classification == ieee_negative_normal .or. classification == ieee_positive_normal \ - .or. classification == ieee_negative_zero .or. classification == ieee_positive_zero; \ - end function - _IS_NORMAL(2) - _IS_NORMAL(3) - _IS_NORMAL(4) - _IS_NORMAL(8) - _IS_NORMAL(10) - _IS_NORMAL(16) -#undef _IS_NORMAL - -! TODO: handle edge cases from 17.11.31 -#define _REM(XKIND,YKIND) \ - elemental function ieee_rem_a##XKIND##_a##YKIND(x, y) result(res); \ - real(kind=XKIND), intent(in) :: x; \ - real(kind=YKIND), intent(in) :: y; \ - integer, parameter :: rkind = max(XKIND, YKIND); \ - real(kind=rkind) :: res, tmp; \ - tmp = anint(real(x, kind=rkind) / y); \ - res = x - y * tmp; \ - end function - _REM(2,2) - _REM(2,3) - _REM(2,4) - _REM(2,8) - _REM(2,10) - _REM(2,16) - _REM(3,2) - _REM(3,3) - _REM(3,4) - _REM(3,8) - _REM(3,10) - _REM(3,16) - _REM(4,2) - _REM(4,3) - _REM(4,4) - _REM(4,8) - _REM(4,10) - _REM(4,16) - _REM(8,2) - _REM(8,3) - _REM(8,4) - _REM(8,8) - _REM(8,10) - _REM(8,16) - _REM(10,2) - _REM(10,3) - _REM(10,4) - _REM(10,8) - _REM(10,10) - _REM(10,16) - _REM(16,2) - _REM(16,3) - _REM(16,4) - _REM(16,8) - _REM(16,10) - _REM(16,16) -#undef _REM - - pure logical function ieee_support_rounding_(round_type) - type(ieee_round_type), intent(in) :: round_type - ieee_support_rounding_ = .true. - end function - pure logical function ieee_support_rounding_2(round_type,x) - type(ieee_round_type), intent(in) :: round_type - real(kind=2), intent(in) :: x - ieee_support_rounding_2 = .true. - end function - pure logical function ieee_support_rounding_3(round_type,x) - type(ieee_round_type), intent(in) :: round_type - real(kind=3), intent(in) :: x - ieee_support_rounding_3 = .true. - end function - pure logical function ieee_support_rounding_4(round_type,x) - type(ieee_round_type), intent(in) :: round_type - real(kind=4), intent(in) :: x - ieee_support_rounding_4 = .true. - end function - pure logical function ieee_support_rounding_8(round_type,x) - type(ieee_round_type), intent(in) :: round_type - real(kind=8), intent(in) :: x - ieee_support_rounding_8 = .true. - end function - pure logical function ieee_support_rounding_10(round_type,x) - type(ieee_round_type), intent(in) :: round_type - real(kind=10), intent(in) :: x - ieee_support_rounding_10 = .true. - end function - pure logical function ieee_support_rounding_16(round_type,x) - type(ieee_round_type), intent(in) :: round_type - real(kind=16), intent(in) :: x - ieee_support_rounding_16 = .true. - end function + private :: ieee_class_eq, ieee_round_eq, ieee_class_ne, ieee_round_ne + +! Define specifics with 1 or 2 INTEGER, LOGICAL, or REAL arguments for +! generic G. +#define SPECIFICS_I(G) \ + G(1) G(2) G(4) G(8) G(16) +#define SPECIFICS_L(G) \ + G(1) G(2) G(4) G(8) +#define SPECIFICS_R(G) \ + G(2) G(3) G(4) G(8) G(10) G(16) +#define SPECIFICS_II(G) \ + G(1,1) G(1,2) G(1,4) G(1,8) G(1,16) \ + G(2,1) G(2,2) G(2,4) G(2,8) G(2,16) \ + G(4,1) G(4,2) G(4,4) G(4,8) G(4,16) \ + G(8,1) G(8,2) G(8,4) G(8,8) G(8,16) \ + G(16,1) G(16,2) G(16,4) G(16,8) G(16,16) +#define SPECIFICS_RI(G) \ + G(2,1) G(2,2) G(2,4) G(2,8) G(2,16) \ + G(3,1) G(3,2) G(3,4) G(3,8) G(3,16) \ + G(4,1) G(4,2) G(4,4) G(4,8) G(4,16) \ + G(8,1) G(8,2) G(8,4) G(8,8) G(8,16) \ + G(10,1) G(10,2) G(10,4) G(10,8) G(10,16) \ + G(16,1) G(16,2) G(16,4) G(16,8) G(16,16) +#define SPECIFICS_RR(G) \ + G(2,2) G(2,3) G(2,4) G(2,8) G(2,10) G(2,16) \ + G(3,2) G(3,3) G(3,4) G(3,8) G(3,10) G(3,16) \ + G(4,2) G(4,3) G(4,4) G(4,8) G(4,10) G(4,16) \ + G(8,2) G(8,3) G(8,4) G(8,8) G(8,10) G(8,16) \ + G(10,2) G(10,3) G(10,4) G(10,8) G(10,10) G(10,16) \ + G(16,2) G(16,3) G(16,4) G(16,8) G(16,10) G(16,16) + +! Set PRIVATE accessibility for specifics with 1 or 2 INTEGER, LOGICAL, or REAL +! arguments for generic G. +#define PRIVATE_I(G) private :: \ + G##_i1, G##_i2, G##_i4, G##_i8, G##_i16 +#define PRIVATE_L(G) private :: \ + G##_l1, G##_l2, G##_l4, G##_l8 +#define PRIVATE_R(G) private :: \ + G##_a2, G##_a3, G##_a4, G##_a8, G##_a10, G##_a16 +#define PRIVATE_II(G) private :: \ + G##_i1_i1, G##_i1_i2, G##_i1_i4, G##_i1_i8, G##_i1_i16, \ + G##_i2_i1, G##_i2_i2, G##_i2_i4, G##_i2_i8, G##_i2_i16, \ + G##_i4_i1, G##_i4_i2, G##_i4_i4, G##_i4_i8, G##_i4_i16, \ + G##_i8_i1, G##_i8_i2, G##_i8_i4, G##_i8_i8, G##_i8_i16, \ + G##_i16_i1, G##_i16_i2, G##_i16_i4, G##_i16_i8, G##_i16_i16 +#define PRIVATE_RI(G) private :: \ + G##_a2_i1, G##_a2_i2, G##_a2_i4, G##_a2_i8, G##_a2_i16, \ + G##_a3_i1, G##_a3_i2, G##_a3_i4, G##_a3_i8, G##_a3_i16, \ + G##_a4_i1, G##_a4_i2, G##_a4_i4, G##_a4_i8, G##_a4_i16, \ + G##_a8_i1, G##_a8_i2, G##_a8_i4, G##_a8_i8, G##_a8_i16, \ + G##_a10_i1, G##_a10_i2, G##_a10_i4, G##_a10_i8, G##_a10_i16, \ + G##_a16_i1, G##_a16_i2, G##_a16_i4, G##_a16_i8, G##_a16_i16 +#define PRIVATE_RR(G) private :: \ + G##_a2_a2, G##_a2_a3, G##_a2_a4, G##_a2_a8, G##_a2_a10, G##_a2_a16, \ + G##_a3_a2, G##_a3_a3, G##_a3_a4, G##_a3_a8, G##_a3_a10, G##_a3_a16, \ + G##_a4_a2, G##_a4_a3, G##_a4_a4, G##_a4_a8, G##_a4_a10, G##_a4_a16, \ + G##_a8_a2, G##_a8_a3, G##_a8_a4, G##_a8_a8, G##_a8_a10, G##_a8_a16, \ + G##_a10_a2, G##_a10_a3, G##_a10_a4, G##_a10_a8, G##_a10_a10, G##_a10_a16, \ + G##_a16_a2, G##_a16_a3, G##_a16_a4, G##_a16_a8, G##_a16_a10, G##_a16_a16 + +#define IEEE_CLASS_R(XKIND) \ + elemental type(ieee_class_type) function ieee_class_a##XKIND(x); \ + import ieee_class_type; \ + real(XKIND), intent(in) :: x; \ + end function ieee_class_a##XKIND; + interface ieee_class + SPECIFICS_R(IEEE_CLASS_R) + end interface ieee_class + PRIVATE_R(IEEE_CLASS) +#undef IEEE_CLASS_R + +#define IEEE_COPY_SIGN_RR(XKIND, YKIND) \ + elemental real(XKIND) function ieee_copy_sign_a##XKIND##_a##YKIND(x, y); \ + real(XKIND), intent(in) :: x; \ + real(YKIND), intent(in) :: y; \ + end function ieee_copy_sign_a##XKIND##_a##YKIND; + interface ieee_copy_sign + SPECIFICS_RR(IEEE_COPY_SIGN_RR) + end interface ieee_copy_sign + PRIVATE_RR(IEEE_COPY_SIGN) +#undef IEEE_COPY_SIGN_RR + +#define IEEE_FMA_R(AKIND) \ + elemental real(AKIND) function ieee_fma_a##AKIND(a, b, c); \ + real(AKIND), intent(in) :: a, b, c; \ + end function ieee_fma_a##AKIND; + interface ieee_fma + SPECIFICS_R(IEEE_FMA_R) + end interface ieee_fma + PRIVATE_R(IEEE_FMA) +#undef IEEE_FMA_R + +#define IEEE_GET_ROUNDING_MODE_I(RKIND) \ + subroutine ieee_get_rounding_mode_i##RKIND(round_value, radix); \ + import ieee_round_type; \ + type(ieee_round_type), intent(out) :: round_value; \ + integer(RKIND), intent(in) :: radix; \ + end subroutine ieee_get_rounding_mode_i##RKIND; + interface ieee_get_rounding_mode + subroutine ieee_get_rounding_mode(round_value) + import ieee_round_type + type(ieee_round_type), intent(out) :: round_value + end subroutine ieee_get_rounding_mode + SPECIFICS_I(IEEE_GET_ROUNDING_MODE_I) + end interface ieee_get_rounding_mode + PRIVATE_I(IEEE_GET_ROUNDING_MODE) +#undef IEEE_GET_ROUNDING_MODE_I + +#define IEEE_GET_UNDERFLOW_MODE_L(GKIND) \ + subroutine ieee_get_underflow_mode_l##GKIND(gradual); \ + logical(GKIND), intent(out) :: gradual; \ + end subroutine ieee_get_underflow_mode_l##GKIND; + interface ieee_get_underflow_mode + SPECIFICS_L(IEEE_GET_UNDERFLOW_MODE_L) + end interface ieee_get_underflow_mode + PRIVATE_L(IEEE_GET_UNDERFLOW_MODE) +#undef IEEE_GET_UNDERFLOW_MODE_L + +! When kind argument is present, kind(result) is value(kind), not kind(kind). +! That is not known here, so return integer(16). +#define IEEE_INT_R(AKIND) \ + elemental integer function ieee_int_a##AKIND(a, round); \ + import ieee_round_type; \ + real(AKIND), intent(in) :: a; \ + type(ieee_round_type), intent(in) :: round; \ + end function ieee_int_a##AKIND; +#define IEEE_INT_RI(AKIND, KKIND) \ + elemental integer(16) function ieee_int_a##AKIND##_i##KKIND(a, round, kind); \ + import ieee_round_type; \ + real(AKIND), intent(in) :: a; \ + type(ieee_round_type), intent(in) :: round; \ + integer(KKIND), intent(in) :: kind; \ + end function ieee_int_a##AKIND##_i##KKIND; + interface ieee_int + SPECIFICS_R(IEEE_INT_R) + SPECIFICS_RI(IEEE_INT_RI) + end interface ieee_int + PRIVATE_R(IEEE_INT) + PRIVATE_RI(IEEE_INT) +#undef IEEE_INT_R +#undef IEEE_INT_RI + +#define IEEE_IS_FINITE_R(XKIND) \ + elemental logical function ieee_is_finite_a##XKIND(x); \ + real(XKIND), intent(in) :: x; \ + end function ieee_is_finite_a##XKIND; + interface ieee_is_finite + SPECIFICS_R(IEEE_IS_FINITE_R) + end interface ieee_is_finite + PRIVATE_R(IEEE_IS_FINITE) +#undef IEEE_IS_FINITE_R + +#define IEEE_LOGB_R(XKIND) \ + elemental real(XKIND) function ieee_logb_a##XKIND(x); \ + real(XKIND), intent(in) :: x; \ + end function ieee_logb_a##XKIND; + interface ieee_logb + SPECIFICS_R(IEEE_LOGB_R) + end interface ieee_logb + PRIVATE_R(IEEE_LOGB) +#undef IEEE_LOGB_R + +#define IEEE_MAX_NUM_R(XKIND) \ + elemental real(XKIND) function ieee_max_num_a##XKIND(x, y); \ + real(XKIND), intent(in) :: x, y; \ + end function ieee_max_num_a##XKIND; + interface ieee_max_num + SPECIFICS_R(IEEE_MAX_NUM_R) + end interface ieee_max_num + PRIVATE_R(IEEE_MAX_NUM) +#undef IEEE_MAX_NUM_R + +#define IEEE_MAX_NUM_MAG_R(XKIND) \ + elemental real(XKIND) function ieee_max_num_mag_a##XKIND(x, y); \ + real(XKIND), intent(in) :: x, y; \ + end function ieee_max_num_mag_a##XKIND; + interface ieee_max_num_mag + SPECIFICS_R(IEEE_MAX_NUM_MAG_R) + end interface ieee_max_num_mag + PRIVATE_R(IEEE_MAX_NUM_MAG) +#undef IEEE_MAX_NUM_MAG_R + +#define IEEE_MIN_NUM_R(XKIND) \ + elemental real(XKIND) function ieee_min_num_a##XKIND(x, y); \ + real(XKIND), intent(in) :: x, y; \ + end function ieee_min_num_a##XKIND; + interface ieee_min_num + SPECIFICS_R(IEEE_MIN_NUM_R) + end interface ieee_min_num + PRIVATE_R(IEEE_MIN_NUM) +#undef IEEE_MIN_NUM_R + +#define IEEE_MIN_NUM_MAG_R(XKIND) \ + elemental real(XKIND) function ieee_min_num_mag_a##XKIND(x, y); \ + real(XKIND), intent(in) :: x, y; \ + end function ieee_min_num_mag_a##XKIND; + interface ieee_min_num_mag + SPECIFICS_R(IEEE_MIN_NUM_MAG_R) + end interface ieee_min_num_mag + PRIVATE_R(IEEE_MIN_NUM_MAG) +#undef IEEE_MIN_NUM_MAG_R + +#define IEEE_QUIET_EQ_R(AKIND) \ + elemental logical function ieee_quiet_eq_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_quiet_eq_a##AKIND; + interface ieee_quiet_eq + SPECIFICS_R(IEEE_QUIET_EQ_R) + end interface ieee_quiet_eq + PRIVATE_R(IEEE_QUIET_EQ) +#undef IEEE_QUIET_EQ_R + +#define IEEE_QUIET_GE_R(AKIND) \ + elemental logical function ieee_quiet_ge_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_quiet_ge_a##AKIND; + interface ieee_quiet_ge + SPECIFICS_R(IEEE_QUIET_GE_R) + end interface ieee_quiet_ge + PRIVATE_R(IEEE_QUIET_GE) +#undef IEEE_QUIET_GE_R + +#define IEEE_QUIET_GT_R(AKIND) \ + elemental logical function ieee_quiet_gt_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_quiet_gt_a##AKIND; + interface ieee_quiet_gt + SPECIFICS_R(IEEE_QUIET_GT_R) + end interface ieee_quiet_gt + PRIVATE_R(IEEE_QUIET_GT) +#undef IEEE_QUIET_GT_R + +#define IEEE_QUIET_LE_R(AKIND) \ + elemental logical function ieee_quiet_le_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_quiet_le_a##AKIND; + interface ieee_quiet_le + SPECIFICS_R(IEEE_QUIET_LE_R) + end interface ieee_quiet_le + PRIVATE_R(IEEE_QUIET_LE) +#undef IEEE_QUIET_LE_R + +#define IEEE_QUIET_LT_R(AKIND) \ + elemental logical function ieee_quiet_lt_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_quiet_lt_a##AKIND; + interface ieee_quiet_lt + SPECIFICS_R(IEEE_QUIET_LT_R) + end interface ieee_quiet_lt + PRIVATE_R(IEEE_QUIET_LT) +#undef IEEE_QUIET_LT_R + +#define IEEE_QUIET_NE_R(AKIND) \ + elemental logical function ieee_quiet_ne_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_quiet_ne_a##AKIND; + interface ieee_quiet_ne + SPECIFICS_R(IEEE_QUIET_NE_R) + end interface ieee_quiet_ne + PRIVATE_R(IEEE_QUIET_NE) +#undef IEEE_QUIET_NE_R + +! When kind argument is present, kind(result) is value(kind), not kind(kind). +! That is not known here, so return real(16). +#define IEEE_REAL_I(AKIND) \ + elemental real function ieee_real_i##AKIND(a); \ + integer(AKIND), intent(in) :: a; \ + end function ieee_real_i##AKIND; +#define IEEE_REAL_R(AKIND) \ + elemental real function ieee_real_a##AKIND(a); \ + real(AKIND), intent(in) :: a; \ + end function ieee_real_a##AKIND; +#define IEEE_REAL_II(AKIND, KKIND) \ + elemental real(16) function ieee_real_i##AKIND##_i##KKIND(a, kind); \ + integer(AKIND), intent(in) :: a; \ + integer(KKIND), intent(in) :: kind; \ + end function ieee_real_i##AKIND##_i##KKIND; +#define IEEE_REAL_RI(AKIND, KKIND) \ + elemental real(16) function ieee_real_a##AKIND##_i##KKIND(a, kind); \ + real(AKIND), intent(in) :: a; \ + integer(KKIND), intent(in) :: kind; \ + end function ieee_real_a##AKIND##_i##KKIND; + interface ieee_real + SPECIFICS_I(IEEE_REAL_I) + SPECIFICS_R(IEEE_REAL_R) + SPECIFICS_II(IEEE_REAL_II) + SPECIFICS_RI(IEEE_REAL_RI) + end interface ieee_real + PRIVATE_I(IEEE_REAL) + PRIVATE_R(IEEE_REAL) + PRIVATE_II(IEEE_REAL) + PRIVATE_RI(IEEE_REAL) +#undef IEEE_REAL_I +#undef IEEE_REAL_R +#undef IEEE_REAL_II +#undef IEEE_REAL_RI + +#define IEEE_REM_RR(XKIND, YKIND) \ + elemental real(XKIND) function ieee_rem_a##XKIND##_a##YKIND(x, y); \ + real(XKIND), intent(in) :: x; \ + real(YKIND), intent(in) :: y; \ + end function ieee_rem_a##XKIND##_a##YKIND; + interface ieee_rem + SPECIFICS_RR(IEEE_REM_RR) + end interface ieee_rem + PRIVATE_RR(IEEE_REM) +#undef IEEE_REM_RR + +#define IEEE_RINT_R(XKIND) \ + elemental real(XKIND) function ieee_rint_a##XKIND(x, round); \ + import ieee_round_type; \ + real(XKIND), intent(in) :: x; \ + type(ieee_round_type), optional, intent(in) :: round; \ + end function ieee_rint_a##XKIND; + interface ieee_rint + SPECIFICS_R(IEEE_RINT_R) + end interface ieee_rint + PRIVATE_R(IEEE_RINT) +#undef IEEE_RINT_R + +#define IEEE_SET_ROUNDING_MODE_I(RKIND) \ + subroutine ieee_set_rounding_mode_i##RKIND(round_value, radix); \ + import ieee_round_type; \ + type(ieee_round_type), intent(in) :: round_value; \ + integer(RKIND), intent(in) :: radix; \ + end subroutine ieee_set_rounding_mode_i##RKIND; + interface ieee_set_rounding_mode + subroutine ieee_set_rounding_mode(round_value) + import ieee_round_type + type(ieee_round_type), intent(in) :: round_value + end subroutine ieee_set_rounding_mode + SPECIFICS_I(IEEE_SET_ROUNDING_MODE_I) + end interface ieee_set_rounding_mode + PRIVATE_I(IEEE_SET_ROUNDING_MODE) +#undef IEEE_SET_ROUNDING_MODE_I + +#define IEEE_SET_UNDERFLOW_MODE_L(GKIND) \ + subroutine ieee_set_underflow_mode_l##GKIND(gradual); \ + logical(GKIND), intent(in) :: gradual; \ + end subroutine ieee_set_underflow_mode_l##GKIND; + interface ieee_set_underflow_mode + SPECIFICS_L(IEEE_SET_UNDERFLOW_MODE_L) + end interface ieee_set_underflow_mode + PRIVATE_L(IEEE_SET_UNDERFLOW_MODE) +#undef IEEE_SET_UNDERFLOW_MODE_L + +#define IEEE_SIGNALING_EQ_R(AKIND) \ + elemental logical function ieee_signaling_eq_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_signaling_eq_a##AKIND; + interface ieee_signaling_eq + SPECIFICS_R(IEEE_SIGNALING_EQ_R) + end interface ieee_signaling_eq + PRIVATE_R(IEEE_SIGNALING_EQ) +#undef IEEE_SIGNALING_EQ_R + +#define IEEE_SIGNALING_GE_R(AKIND) \ + elemental logical function ieee_signaling_ge_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_signaling_ge_a##AKIND; + interface ieee_signaling_ge + SPECIFICS_R(IEEE_SIGNALING_GE_R) + end interface ieee_signaling_ge + PRIVATE_R(IEEE_SIGNALING_GE) +#undef IEEE_SIGNALING_GE_R + +#define IEEE_SIGNALING_GT_R(AKIND) \ + elemental logical function ieee_signaling_gt_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_signaling_gt_a##AKIND; + interface ieee_signaling_gt + SPECIFICS_R(IEEE_SIGNALING_GT_R) + end interface ieee_signaling_gt + PRIVATE_R(IEEE_SIGNALING_GT) +#undef IEEE_SIGNALING_GT_R + +#define IEEE_SIGNALING_LE_R(AKIND) \ + elemental logical function ieee_signaling_le_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_signaling_le_a##AKIND; + interface ieee_signaling_le + SPECIFICS_R(IEEE_SIGNALING_LE_R) + end interface ieee_signaling_le + PRIVATE_R(IEEE_SIGNALING_LE) +#undef IEEE_SIGNALING_LE_R + +#define IEEE_SIGNALING_LT_R(AKIND) \ + elemental logical function ieee_signaling_lt_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_signaling_lt_a##AKIND; + interface ieee_signaling_lt + SPECIFICS_R(IEEE_SIGNALING_LT_R) + end interface ieee_signaling_lt + PRIVATE_R(IEEE_SIGNALING_LT) +#undef IEEE_SIGNALING_LT_R + +#define IEEE_SIGNALING_NE_R(AKIND) \ + elemental logical function ieee_signaling_ne_a##AKIND(a, b); \ + real(AKIND), intent(in) :: a, b; \ + end function ieee_signaling_ne_a##AKIND; + interface ieee_signaling_ne + SPECIFICS_R(IEEE_SIGNALING_NE_R) + end interface ieee_signaling_ne + PRIVATE_R(IEEE_SIGNALING_NE) +#undef IEEE_SIGNALING_NE_R + +#define IEEE_SIGNBIT_R(XKIND) \ + elemental logical function ieee_signbit_a##XKIND(x); \ + real(XKIND), intent(in) :: x; \ + end function ieee_signbit_a##XKIND; + interface ieee_signbit + SPECIFICS_R(IEEE_SIGNBIT_R) + end interface ieee_signbit + PRIVATE_R(IEEE_SIGNBIT) +#undef IEEE_SIGNBIT_R + +#define IEEE_SUPPORT_ROUNDING_R(XKIND) \ + pure logical function ieee_support_rounding_a##XKIND(round_value, x); \ + import ieee_round_type; \ + type(ieee_round_type), intent(in) :: round_value; \ + real(XKIND), intent(in) :: x(..); \ + end function ieee_support_rounding_a##XKIND; + interface ieee_support_rounding + logical function ieee_support_rounding(round_value) + import ieee_round_type + type(ieee_round_type), intent(in) :: round_value + end function ieee_support_rounding + SPECIFICS_R(IEEE_SUPPORT_ROUNDING_R) + end interface ieee_support_rounding + PRIVATE_R(IEEE_SUPPORT_ROUNDING) +#undef IEEE_SUPPORT_ROUNDING_R + +#define IEEE_UNORDERED_RR(XKIND, YKIND) \ + elemental logical function ieee_unordered_a##XKIND##_a##YKIND(x, y); \ + real(XKIND), intent(in) :: x; \ + real(YKIND), intent(in) :: y; \ + end function ieee_unordered_a##XKIND##_a##YKIND; + interface ieee_unordered + SPECIFICS_RR(IEEE_UNORDERED_RR) + end interface ieee_unordered + PRIVATE_RR(IEEE_UNORDERED) +#undef IEEE_UNORDERED_RR + +#define IEEE_VALUE_R(XKIND) \ + elemental real(XKIND) function ieee_value_a##XKIND(x, class); \ + import ieee_class_type; \ + real(XKIND), intent(in) :: x; \ + type(ieee_class_type), intent(in) :: class; \ + end function ieee_value_a##XKIND; + interface ieee_value + SPECIFICS_R(IEEE_VALUE_R) + end interface ieee_value + PRIVATE_R(IEEE_VALUE) +#undef IEEE_VALUE_R end module ieee_arithmetic diff --git a/flang/module/iso_fortran_env.f90 b/flang/module/iso_fortran_env.f90 index e85d2e2a7a36cf..abf7098aca5b27 100644 --- a/flang/module/iso_fortran_env.f90 +++ b/flang/module/iso_fortran_env.f90 @@ -144,13 +144,14 @@ module iso_fortran_env integer, parameter :: stat_unlocked = FORTRAN_RUNTIME_STAT_UNLOCKED integer, parameter :: stat_unlocked_failed_image = FORTRAN_RUNTIME_STAT_UNLOCKED_FAILED_IMAGE - contains + interface compiler_options + character(len=80) function compiler_options() + end function compiler_options + end interface compiler_options - character(len=80) function compiler_options() - compiler_options = 'COMPILER_OPTIONS() not yet implemented' - end function compiler_options + interface compiler_version + character(len=80) function compiler_version() + end function compiler_version + end interface compiler_version - character(len=80) function compiler_version() - compiler_version = 'f18 in development' - end function compiler_version end module iso_fortran_env From ddd692e9d5722b8b85a871d9a4188358274b5f6e Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 25 May 2022 16:44:58 -0700 Subject: [PATCH 535/908] [flang] Allow forward reference to ENTRY from generic interface The CreateEntry() function in name resolution needs to allow for the name of an alternate entry point already having been declared in the outer scope as the homonymous specific procedure of a generic interface; e.g., interface foo module procedure foo end interface subroutine bar entry foo end subroutine Differential Revision: https://reviews.llvm.org/D126436 --- flang/lib/Semantics/resolve-names.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 9898fe3da57d3d..f03d3fa24b990f 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -3411,10 +3411,21 @@ void SubprogramVisitor::CreateEntry( if (outer.IsModule() && !attrs.test(Attr::PRIVATE)) { attrs.set(Attr::PUBLIC); } - Symbol &entrySymbol{MakeSymbol(outer, entryName.source, attrs)}; + Symbol *entrySymbol{FindInScope(outer, entryName.source)}; + if (entrySymbol) { + if (auto *generic{entrySymbol->detailsIf()}) { + if (auto *specific{generic->specific()}) { + // Forward reference to ENTRY from a generic interface + entrySymbol = specific; + entrySymbol->attrs() |= attrs; + } + } + } else { + entrySymbol = &MakeSymbol(outer, entryName.source, attrs); + } SubprogramDetails entryDetails; entryDetails.set_entryScope(currScope()); - entrySymbol.set(subpFlag); + entrySymbol->set(subpFlag); if (subpFlag == Symbol::Flag::Function) { Symbol *result{nullptr}; EntityDetails resultDetails; @@ -3443,11 +3454,12 @@ void SubprogramVisitor::CreateEntry( if (subpFlag == Symbol::Flag::Subroutine || (distinctResultName && !badResultName)) { Symbol &assoc{MakeSymbol(entryName.source)}; - assoc.set_details(HostAssocDetails{entrySymbol}); + assoc.set_details(HostAssocDetails{*entrySymbol}); assoc.set(Symbol::Flag::Subroutine); } - Resolve(entryName, entrySymbol); - entrySymbol.set_details(std::move(entryDetails)); + Resolve(entryName, *entrySymbol); + Details details{std::move(entryDetails)}; + entrySymbol->set_details(std::move(entryDetails)); } void SubprogramVisitor::PostEntryStmt(const parser::EntryStmt &stmt) { From 085acede578f1151b1b73b1050b2c42e0a6ce10d Mon Sep 17 00:00:00 2001 From: "Haocong.Lu" Date: Thu, 26 May 2022 09:54:52 +0800 Subject: [PATCH 536/908] [RISCV][NFC] Remove solved TODO for combining constant shifts Reviewed By: benshi001, asb Differential Revision: https://reviews.llvm.org/D126185 --- llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll index 22791dd99465e8..d02ab93a5aca52 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll @@ -1763,8 +1763,6 @@ define signext i32 @sext_slliw_zext(i32 zeroext %a) nounwind { ret i32 %1 } -; TODO: the constant shifts could be combined. - define zeroext i32 @zext_slliw_aext(i32 %a) nounwind { ; RV64-LABEL: zext_slliw_aext: ; RV64: # %bb.0: From 8aa6b05deb2ace6042274c9a33ff40523444f459 Mon Sep 17 00:00:00 2001 From: Lian Wang Date: Thu, 19 May 2022 07:11:18 +0000 Subject: [PATCH 537/908] [LegalizeTypes][VP] Add widen and split support for VP_TRUNCATE Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D125950 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 17 + .../RISCV/rvv/fixed-vector-trunc-vp.ll | 324 +++++++++++++++++- llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 178 +++++++++- 3 files changed, 513 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 69974df8d121a3..f274b50762489e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1038,6 +1038,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FTRUNC: case ISD::SINT_TO_FP: case ISD::TRUNCATE: + case ISD::VP_TRUNCATE: case ISD::UINT_TO_FP: case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); @@ -2664,6 +2665,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = SplitVecOp_TruncateHelper(N); break; @@ -2907,6 +2909,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Ch); + } else if (N->getNumOperands() == 3) { + assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo, MaskLo, EVLLo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi, MaskHi, EVLHi); } else { Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); @@ -3764,6 +3774,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_UINT: case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: @@ -4234,6 +4245,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTEC == WidenEC) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); + if (N->getNumOperands() == 3) { + assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue Mask = + GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); + return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2)); + } return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll index 253e9700859e8e..8d7eb7406b5fbe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s declare <2 x i7> @llvm.vp.trunc.nxv2i7.nxv2i16(<2 x i16>, <2 x i1>, i32) @@ -198,6 +198,20 @@ define <2 x i16> @vtrunc_nxv2i16_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) ret <2 x i16> %v } +declare <15 x i16> @llvm.vp.trunc.nxv15i16.nxv15i64(<15 x i64>, <15 x i1>, i32) + +define <15 x i16> @vtrunc_nxv15i16_nxv15i64(<15 x i64> %a, <15 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv15i16_nxv15i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <15 x i16> @llvm.vp.trunc.nxv15i16.nxv15i64(<15 x i64> %a, <15 x i1> %m, i32 %vl) + ret <15 x i16> %v +} + declare <2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<2 x i64>, <2 x i1>, i32) define <2 x i32> @vtrunc_nxv2i32_nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 zeroext %vl) { @@ -219,3 +233,309 @@ define <2 x i32> @vtrunc_nxv2i32_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) %v = call <2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<2 x i64> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) ret <2 x i32> %v } + +declare <128 x i32> @llvm.vp.trunc.nxv128i64.nxv128i32(<128 x i64>, <128 x i1>, i32) + +define <128 x i32> @vtrunc_nxv128i32_nxv128i64(<128 x i64> %a, <128 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv128i32_nxv128i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu +; CHECK-NEXT: addi a3, a7, -64 +; CHECK-NEXT: vslidedown.vi v2, v0, 8 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a7, a3, .LBB16_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v3, v2, 4 +; CHECK-NEXT: addi a6, a4, -32 +; CHECK-NEXT: addi a3, a1, 640 +; CHECK-NEXT: mv a5, a2 +; CHECK-NEXT: bltu a4, a6, .LBB16_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a5, a6 +; CHECK-NEXT: .LBB16_4: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v0, v3, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vle64.v v16, (a3) +; CHECK-NEXT: addi t0, a5, -16 +; CHECK-NEXT: addi a6, a1, 512 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: bltu a5, t0, .LBB16_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: mv a3, t0 +; CHECK-NEXT: .LBB16_6: +; CHECK-NEXT: vle64.v v8, (a6) +; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, mu +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: vncvt.x.x.w v24, v16, v0.t +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 4 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a5, a3, .LBB16_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: li a5, 16 +; CHECK-NEXT: .LBB16_8: +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu +; CHECK-NEXT: li a5, 64 +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: li t0, 48 +; CHECK-NEXT: mul a6, a6, t0 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a7, a5, .LBB16_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: li a7, 64 +; CHECK-NEXT: .LBB16_10: +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v3, v1, 4 +; CHECK-NEXT: addi t0, a7, -32 +; CHECK-NEXT: addi a5, a1, 128 +; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: bltu a7, t0, .LBB16_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: mv a6, t0 +; CHECK-NEXT: .LBB16_12: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v0, v3, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vle64.v v16, (a5) +; CHECK-NEXT: addi a5, a6, -16 +; CHECK-NEXT: mv t0, a2 +; CHECK-NEXT: bltu a6, a5, .LBB16_14 +; CHECK-NEXT: # %bb.13: +; CHECK-NEXT: mv t0, a5 +; CHECK-NEXT: .LBB16_14: +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: li a5, 32 +; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v24, v16, v0.t +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vs8r.v v24, (t0) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a6, a3, .LBB16_16 +; CHECK-NEXT: # %bb.15: +; CHECK-NEXT: li a6, 16 +; CHECK-NEXT: .LBB16_16: +; CHECK-NEXT: addi t0, a1, 384 +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: li t1, 40 +; CHECK-NEXT: mul a6, a6, t1 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a4, a5, .LBB16_18 +; CHECK-NEXT: # %bb.17: +; CHECK-NEXT: li a4, 32 +; CHECK-NEXT: .LBB16_18: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v0, v2, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vle64.v v24, (t0) +; CHECK-NEXT: addi t0, a4, -16 +; CHECK-NEXT: addi a6, a1, 256 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: bltu a4, t0, .LBB16_20 +; CHECK-NEXT: # %bb.19: +; CHECK-NEXT: mv a1, t0 +; CHECK-NEXT: .LBB16_20: +; CHECK-NEXT: vle64.v v8, (a6) +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a4, a3, .LBB16_22 +; CHECK-NEXT: # %bb.21: +; CHECK-NEXT: li a4, 16 +; CHECK-NEXT: .LBB16_22: +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vncvt.x.x.w v24, v8, v0.t +; CHECK-NEXT: bltu a7, a5, .LBB16_24 +; CHECK-NEXT: # %bb.23: +; CHECK-NEXT: li a7, 32 +; CHECK-NEXT: .LBB16_24: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: addi a1, a7, -16 +; CHECK-NEXT: vslidedown.vi v0, v1, 2 +; CHECK-NEXT: bltu a7, a1, .LBB16_26 +; CHECK-NEXT: # %bb.25: +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: .LBB16_26: +; CHECK-NEXT: vsetvli zero, a5, e32, m8, tu, mu +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 48 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 48 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v24, v8, 16 +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a7, a3, .LBB16_28 +; CHECK-NEXT: # %bb.27: +; CHECK-NEXT: li a7, 16 +; CHECK-NEXT: .LBB16_28: +; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, a5, e32, m8, tu, mu +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v8, 16 +; CHECK-NEXT: vse32.v v16, (a0) +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: vse32.v v24, (a1) +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: addi a0, a0, 384 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 48 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 56 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <128 x i32> @llvm.vp.trunc.nxv128i64.nxv128i32(<128 x i64> %a, <128 x i1> %m, i32 %vl) + ret <128 x i32> %v +} + +declare <32 x i32> @llvm.vp.trunc.nxv32i64.nxv32i32(<32 x i64>, <32 x i1>, i32) + +define <32 x i32> @vtrunc_nxv32i32_nxv32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv32i32_nxv32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: bltu a0, a2, .LBB17_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB17_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB17_4: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vslideup.vi v16, v8, 16 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <32 x i32> @llvm.vp.trunc.nxv32i64.nxv32i32(<32 x i64> %a, <32 x i1> %m, i32 %vl) + ret <32 x i32> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index fc5ed87865e419..28db08a23597b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -152,6 +152,41 @@ define @vtrunc_nxv2i16_nxv2i64_unmasked( %a ret %v } +declare @llvm.vp.trunc.nxv15i16.nxv15i64(, , i32) + +define @vtrunc_nxv15i16_nxv15i64( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv15i16_nxv15i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, mu +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-NEXT: bltu a0, a3, .LBB12_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v28, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v18, v28, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB12_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB12_4: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv15i16.nxv15i64( %a, %m, i32 %vl) + ret %v +} + declare @llvm.vp.trunc.nxv2i64.nxv2i32(, , i32) define @vtrunc_nxv2i32_nxv2i64( %a, %m, i32 zeroext %vl) { @@ -189,18 +224,18 @@ define @vtrunc_nxv32i7_nxv32i32( %a, @vtrunc_nxv32i7_nxv32i32( %a, @llvm.vp.trunc.nxv32i7.nxv32i32( %a, %m, i32 %vl) ret %v } + +declare @llvm.vp.trunc.nxv32i8.nxv32i32(, , i32) + +define @vtrunc_nxv32i8_nxv32i32( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv32i8_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-NEXT: bltu a0, a3, .LBB16_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v28, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v18, v28, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB16_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB16_4: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv32i8.nxv32i32( %a, %m, i32 %vl) + ret %v +} + +declare @llvm.vp.trunc.nxv32i64.nxv32i32(, , i32) + +define @vtrunc_nxv32i64_nxv32i32( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv32i64_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a4, a1, 1 +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: mv a5, a2 +; CHECK-NEXT: bltu a2, a4, .LBB17_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a5, a4 +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: li a6, 0 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, mu +; CHECK-NEXT: sub a7, a5, a1 +; CHECK-NEXT: vslidedown.vx v0, v24, a3 +; CHECK-NEXT: bltu a5, a7, .LBB17_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: .LBB17_4: +; CHECK-NEXT: srli a7, a1, 2 +; CHECK-NEXT: slli t0, a1, 3 +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v12, v16, v0.t +; CHECK-NEXT: bltu a5, a1, .LBB17_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: .LBB17_6: +; CHECK-NEXT: li a6, 0 +; CHECK-NEXT: vsetvli t1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v1, v24, a7 +; CHECK-NEXT: add a7, a0, t0 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu +; CHECK-NEXT: sub a4, a2, a4 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 3 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: bltu a2, a4, .LBB17_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: mv a6, a4 +; CHECK-NEXT: .LBB17_8: +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu +; CHECK-NEXT: vl8re64.v v16, (a7) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: sub a4, a6, a1 +; CHECK-NEXT: vslidedown.vx v0, v1, a3 +; CHECK-NEXT: bltu a6, a4, .LBB17_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: .LBB17_10: +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v20, v24, v0.t +; CHECK-NEXT: bltu a6, a1, .LBB17_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: .LBB17_12: +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv32i64.nxv32i32( %a, %m, i32 %vl) + ret %v +} From 65fd1e91b0f864f6f97742f2ed5d0243a9dc8f55 Mon Sep 17 00:00:00 2001 From: Alan Zhao Date: Wed, 25 May 2022 18:06:08 -0400 Subject: [PATCH 538/908] [llvm-ml] Add support for the .S extension Even though MASM files typically have the .asm extension, there are some use cases [0] where they have the .S extension. MSVC ml assembles such files with no problems, so llvm-ml should as well. Additionally, fix the implementation of the /Ta flag and add a test for it. [0]: https://crrev.com/c/3668287 Reviewed By: epastor Differential Revision: https://reviews.llvm.org/D126425 --- llvm/test/tools/llvm-ml/invalid_file_extension.blah | 8 ++++++++ llvm/test/tools/llvm-ml/lit.local.cfg | 2 ++ llvm/test/tools/llvm-ml/valid_file_extension.S | 5 +++++ llvm/tools/llvm-ml/Opts.td | 4 ++-- llvm/tools/llvm-ml/llvm-ml.cpp | 6 ++++-- 5 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 llvm/test/tools/llvm-ml/invalid_file_extension.blah create mode 100644 llvm/test/tools/llvm-ml/valid_file_extension.S diff --git a/llvm/test/tools/llvm-ml/invalid_file_extension.blah b/llvm/test/tools/llvm-ml/invalid_file_extension.blah new file mode 100644 index 00000000000000..7b916910a12707 --- /dev/null +++ b/llvm/test/tools/llvm-ml/invalid_file_extension.blah @@ -0,0 +1,8 @@ +; RUN: not llvm-ml %s /Fo /dev/null 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID +; RUN: llvm-ml /Ta %s -m64 -filetype=s /Fo - | FileCheck %s --check-prefixes=CHECK-TA + +; CHECK-INVALID: error: invalid option '{{.*}}invalid_file_extension.blah' + +.code +foo: +; CHECK-TA: foo: diff --git a/llvm/test/tools/llvm-ml/lit.local.cfg b/llvm/test/tools/llvm-ml/lit.local.cfg index 5729c25cfd5be9..ffc3d7a1e0ab1d 100644 --- a/llvm/test/tools/llvm-ml/lit.local.cfg +++ b/llvm/test/tools/llvm-ml/lit.local.cfg @@ -3,3 +3,5 @@ if not ('X86' in config.root.targets): config.unsupported = True config.suffixes.add('.asm') +config.suffixes.add('.S') +config.suffixes.add('.blah') diff --git a/llvm/test/tools/llvm-ml/valid_file_extension.S b/llvm/test/tools/llvm-ml/valid_file_extension.S new file mode 100644 index 00000000000000..8c7e9435fed6c3 --- /dev/null +++ b/llvm/test/tools/llvm-ml/valid_file_extension.S @@ -0,0 +1,5 @@ +; RUN: llvm-ml -m64 -filetype=s %s /Fo - | FileCheck %s + +.code +foo: +; CHECK: foo: diff --git a/llvm/tools/llvm-ml/Opts.td b/llvm/tools/llvm-ml/Opts.td index 9cd1e91ecbb556..88897a7aa7c8f1 100644 --- a/llvm/tools/llvm-ml/Opts.td +++ b/llvm/tools/llvm-ml/Opts.td @@ -31,7 +31,7 @@ class UnsupportedSeparate : Separate<["/", "-"], name>, def bitness : LLVMJoined<"m">, Values<"32,64">, HelpText<"Target platform (x86 or x86-64)">; def as_lex : LLVMFlag<"as-lex">, - HelpText<"Lex tokens from a .asm file without assembling">; + HelpText<"Lex tokens from an .asm or .S file without assembling">; def debug : LLVMFlag<"debug">, Flags<[HelpHidden]>, HelpText<"Enable debug output">; def debug_only : LLVMCommaJoined<"debug-only=">, Flags<[HelpHidden]>, @@ -82,7 +82,7 @@ def safeseh : MLFlag<"safeseh">, "32-bit.">; def assembly_file : MLJoinedOrSeparate<"Ta">, HelpText<"Assemble source file with name not ending with " - "the .asm extension">; + "the .asm or the .S extension">; def error_on_warning : MLFlag<"WX">, Alias; def parse_only : MLFlag<"Zs">, HelpText<"Run a syntax-check only">, Alias, AliasArgs<["null"]>; diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp index 2fd218a0c907b8..2651132f23ccc2 100644 --- a/llvm/tools/llvm-ml/llvm-ml.cpp +++ b/llvm/tools/llvm-ml/llvm-ml.cpp @@ -208,7 +208,9 @@ int main(int Argc, char **Argv) { std::string InputFilename; for (auto *Arg : InputArgs.filtered(OPT_INPUT)) { std::string ArgString = Arg->getAsString(InputArgs); - if (ArgString == "-" || StringRef(ArgString).endswith(".asm")) { + StringRef ArgStringRef(ArgString); + if (ArgString == "-" || ArgStringRef.endswith(".asm") || + ArgStringRef.endswith(".S")) { if (!InputFilename.empty()) { WithColor::warning(errs(), ProgName) << "does not support multiple assembly files in one command; " @@ -234,7 +236,7 @@ int main(int Argc, char **Argv) { << "does not support multiple assembly files in one command; " << "ignoring '" << InputFilename << "'\n"; } - InputFilename = Arg->getAsString(InputArgs); + InputFilename = Arg->getValue(); } for (auto *Arg : InputArgs.filtered(OPT_unsupported_Group)) { From b271488e8b13a2b776c2e80769a192607521687e Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Wed, 11 May 2022 09:17:43 +0000 Subject: [PATCH 539/908] [RISCV] Replace ISD::FP_EXTEND and ISD::FP_ROUND with RVV VL op. This patch tries to solve the incoordination between the direct and intermediate cast caused by D123975. This patch replaces ISD::FP_EXTEND and ISD::FP_ROUND with RVV VL op in the lowering of FP scalable vector direct cast to unify with the intermediate cast. And it also changes the FP widenning pattern with the VL op. Differential Revision: https://reviews.llvm.org/D125364 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 - .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 111 ++++++++++++------ 2 files changed, 76 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 49c553af1435c3..d9829298c4ace8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4269,10 +4269,6 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, bool IsDirectConv = IsDirectExtend || IsDirectTrunc; - // For FP_ROUND/FP_EXTEND of scalable vectors, leave it to the pattern. - if (!VT.isFixedLengthVector() && !IsVP && IsDirectConv) - return Op; - // Prepare any fixed-length vector operands. MVT ContainerVT = VT; SDValue Mask, VL; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index ba2000e5a31280..2bb2df11f6d35b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -410,15 +410,25 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF { foreach vtiToWti = AllWidenableFloatVectors in { defvar vti = vtiToWti.Vti; defvar wti = vtiToWti.Wti; - def : Pat<(op (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs2))), - (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs1)))), + def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), (XLenVT srcvalue)))), (!cast(instruction_name#"_VV_"#vti.LMul.MX) vti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; - def : Pat<(op (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs2))), - (wti.Vector (fpext_oneuse (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1))))), + def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue)))), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; - def : Pat<(op (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs2))), + def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; @@ -430,14 +440,18 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF { defvar vti = vtiToWti.Vti; defvar wti = vtiToWti.Wti; def : Pat<(op (wti.Vector wti.RegClass:$rs2), - (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs1)))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), (XLenVT srcvalue)))), (!cast(instruction_name#"_WV_"#vti.LMul.MX) wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; def : Pat<(op (wti.Vector wti.RegClass:$rs2), - (wti.Vector (fpext_oneuse (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1))))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue)))), (!cast(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX) wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; - def : Pat<(op (wti.Vector wti.RegClass:$rs2), + def : Pat<(op (wti.Vector wti.RegClass:$rs2), (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))), (!cast(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX) wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; @@ -453,14 +467,22 @@ multiclass VPatWidenFPMulAccSDNode_VV_VF { foreach vtiToWti = AllWidenableFloatVectors in { defvar vti = vtiToWti.Vti; defvar wti = vtiToWti.Wti; - def : Pat<(fma (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs1))), - (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs2))), + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), (wti.Vector wti.RegClass:$rd)), (!cast(instruction_name#"_VV_"#vti.LMul.MX) wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fma (wti.Vector (fpext_oneuse (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)))), - (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs2))), + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), (XLenVT srcvalue))), + (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), (XLenVT srcvalue))), (wti.Vector wti.RegClass:$rd)), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, @@ -472,20 +494,30 @@ multiclass VPatWidenFPNegMulAccSDNode_VV_VF { foreach vtiToWti = AllWidenableFloatVectors in { defvar vti = vtiToWti.Vti; defvar wti = vtiToWti.Wti; - def : Pat<(fma (fneg (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs1)))), - (fpext_oneuse (vti.Vector vti.RegClass:$rs2)), + def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), VLOpFrag))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), (fneg wti.RegClass:$rd)), (!cast(instruction_name#"_VV_"#vti.LMul.MX) wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fma (fpext_oneuse (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1))), - (fneg (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs2)))), + def : Pat<(fma (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag), + (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag))), (fneg wti.RegClass:$rd)), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fma (fneg (wti.Vector (fpext_oneuse (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1))))), - (fpext_oneuse (vti.Vector vti.RegClass:$rs2)), + def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), (fneg wti.RegClass:$rd)), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, @@ -497,14 +529,20 @@ multiclass VPatWidenFPMulSacSDNode_VV_VF { foreach vtiToWti = AllWidenableFloatVectors in { defvar vti = vtiToWti.Vti; defvar wti = vtiToWti.Wti; - def : Pat<(fma (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs1))), - (fpext_oneuse (vti.Vector vti.RegClass:$rs2)), + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), VLOpFrag)), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), (fneg wti.RegClass:$rd)), (!cast(instruction_name#"_VV_"#vti.LMul.MX) wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fma (wti.Vector (fpext_oneuse (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)))), - (fpext_oneuse (vti.Vector vti.RegClass:$rs2)), + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag)), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), (fneg wti.RegClass:$rd)), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, @@ -516,20 +554,30 @@ multiclass VPatWidenFPNegMulSacSDNode_VV_VF { foreach vtiToWti = AllWidenableFloatVectors in { defvar vti = vtiToWti.Vti; defvar wti = vtiToWti.Wti; - def : Pat<(fma (fneg (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs1)))), - (fpext_oneuse (vti.Vector vti.RegClass:$rs2)), + def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), VLOpFrag))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), wti.RegClass:$rd), (!cast(instruction_name#"_VV_"#vti.LMul.MX) wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fma (wti.Vector (fpext_oneuse (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)))), - (fneg (wti.Vector (fpext_oneuse (vti.Vector vti.RegClass:$rs2)))), + def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag)), + (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag))), wti.RegClass:$rd), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(fma (fneg (wti.Vector (fpext_oneuse (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1))))), - (fpext_oneuse (vti.Vector vti.RegClass:$rs2)), + def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), + (vti.Mask true_mask), VLOpFrag))), + (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), VLOpFrag), wti.RegClass:$rd), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, @@ -951,13 +999,6 @@ defm : VPatWConvertFP2ISDNode_V; defm : VPatWConvertFP2ISDNode_V; defm : VPatWConvertI2FPSDNode_V; defm : VPatWConvertI2FPSDNode_V; -foreach fvtiToFWti = AllWidenableFloatVectors in { - defvar fvti = fvtiToFWti.Vti; - defvar fwti = fvtiToFWti.Wti; - def : Pat<(fwti.Vector (fpextend (fvti.Vector fvti.RegClass:$rs1))), - (!cast("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX) - fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>; -} // 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions defm : VPatNConvertFP2ISDNode_V; From 41aab93afc23e8f338c3617161e42d7ff54da74b Mon Sep 17 00:00:00 2001 From: Chenbing Zheng Date: Thu, 26 May 2022 10:16:54 +0800 Subject: [PATCH 540/908] [InstCombine] bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y)) This patch break foldBitCastBitwiseLogic limite the destination must have an integer element type, and eliminate one bitcast by doing the logic op in the type of the input that has an integer element type. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D126184 --- .../InstCombine/InstCombineCasts.cpp | 30 ++++++++++- .../InstCombine/bitcast-inseltpoison.ll | 11 ++-- llvm/test/Transforms/InstCombine/bitcast.ll | 51 +++++++++++++++---- 3 files changed, 71 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index d000d79d5e3834..9fb266bd60c75f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2377,8 +2377,8 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder) { Type *DestTy = BitCast.getType(); BinaryOperator *BO; - if (!DestTy->isIntOrIntVectorTy() || - !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) || + + if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) || !BO->isBitwiseLogicOp()) return nullptr; @@ -2388,6 +2388,32 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy()) return nullptr; + if (DestTy->isFPOrFPVectorTy()) { + Value *X, *Y; + // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y)) + if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) && + match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(Y))))) { + if (X->getType()->isFPOrFPVectorTy() && + Y->getType()->isIntOrIntVectorTy()) { + Value *CastedOp = + Builder.CreateBitCast(BO->getOperand(0), Y->getType()); + Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y); + return CastInst::CreateBitOrPointerCast(NewBO, DestTy); + } + if (X->getType()->isIntOrIntVectorTy() && + Y->getType()->isFPOrFPVectorTy()) { + Value *CastedOp = + Builder.CreateBitCast(BO->getOperand(1), X->getType()); + Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X); + return CastInst::CreateBitOrPointerCast(NewBO, DestTy); + } + } + return nullptr; + } + + if (!DestTy->isIntOrIntVectorTy()) + return nullptr; + Value *X; if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy && !isa(X)) { diff --git a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll index 4e244a76b07dc0..95edb21f7340bd 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll @@ -130,16 +130,11 @@ define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) { ret <4 x i32> %bc3 } -; The destination must have an integer element type. -; FIXME: We can still eliminate one bitcast in this test by doing the logic op -; in the type of the input that has an integer element type. - define <4 x float> @bitcasts_and_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) { ; CHECK-LABEL: @bitcasts_and_bitcast_to_fp( -; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[BC2]], [[BC1]] -; CHECK-NEXT: [[BC3:%.*]] = bitcast <2 x i64> [[AND]] to <4 x float> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[BC3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[BC3]] ; %bc1 = bitcast <4 x float> %a to <2 x i64> diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll index da2ff8e3d94457..eb30719c75f2c5 100644 --- a/llvm/test/Transforms/InstCombine/bitcast.ll +++ b/llvm/test/Transforms/InstCombine/bitcast.ll @@ -4,6 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" +declare void @use_vec(<2 x i64>) + ; Bitcasts between vectors and scalars are valid. ; PR4487 define i32 @test1(i64 %a) { @@ -130,16 +132,11 @@ define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) { ret <4 x i32> %bc3 } -; The destination must have an integer element type. -; FIXME: We can still eliminate one bitcast in this test by doing the logic op -; in the type of the input that has an integer element type. - define <4 x float> @bitcasts_and_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) { ; CHECK-LABEL: @bitcasts_and_bitcast_to_fp( -; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[BC2]], [[BC1]] -; CHECK-NEXT: [[BC3:%.*]] = bitcast <2 x i64> [[AND]] to <4 x float> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[BC3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[BC3]] ; %bc1 = bitcast <4 x float> %a to <2 x i64> @@ -149,17 +146,49 @@ define <4 x float> @bitcasts_and_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) { ret <4 x float> %bc3 } -define <4 x float> @bitcasts_or_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) { +define <2 x double> @bitcasts_or_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) { ; CHECK-LABEL: @bitcasts_or_bitcast_to_fp( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[BC3:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x double> +; CHECK-NEXT: ret <2 x double> [[BC3]] +; + %bc1 = bitcast <4 x float> %a to <2 x i64> + %bc2 = bitcast <8 x i16> %b to <2 x i64> + %and = or <2 x i64> %bc1, %bc2 + %bc3 = bitcast <2 x i64> %and to <2 x double> + ret <2 x double> %bc3 +} + +define <4 x float> @bitcasts_xor_bitcast_to_fp(<2 x double> %a, <8 x i16> %b) { +; CHECK-LABEL: @bitcasts_xor_bitcast_to_fp( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i16> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[BC3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x float> +; CHECK-NEXT: ret <4 x float> [[BC3]] +; + %bc1 = bitcast <8 x i16> %b to <2 x i64> + %bc2 = bitcast <2 x double> %a to <2 x i64> + %xor = xor <2 x i64> %bc2, %bc1 + %bc3 = bitcast <2 x i64> %xor to <4 x float> + ret <4 x float> %bc3 +} + +; Negative test + +define <4 x float> @bitcasts_and_bitcast_to_fp_multiuse(<4 x float> %a, <8 x i16> %b) { +; CHECK-LABEL: @bitcasts_and_bitcast_to_fp_multiuse( ; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[AND:%.*]] = or <2 x i64> [[BC1]], [[BC2]] +; CHECK-NEXT: call void @use_vec(<2 x i64> [[BC2]]) +; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[BC2]], [[BC1]] ; CHECK-NEXT: [[BC3:%.*]] = bitcast <2 x i64> [[AND]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[BC3]] ; %bc1 = bitcast <4 x float> %a to <2 x i64> %bc2 = bitcast <8 x i16> %b to <2 x i64> - %and = or <2 x i64> %bc1, %bc2 + call void @use_vec(<2 x i64> %bc2) + %and = and <2 x i64> %bc2, %bc1 %bc3 = bitcast <2 x i64> %and to <4 x float> ret <4 x float> %bc3 } From 1486a9c9fe8221d02a4482655e20bb95455c49cf Mon Sep 17 00:00:00 2001 From: Chenbing Zheng Date: Thu, 26 May 2022 11:07:18 +0800 Subject: [PATCH 541/908] [InstCombine] [NFC] refector foldXorOfICmps Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D126268 --- .../InstCombine/InstCombineAndOrXor.cpp | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index ebe2a3f74d5dbb..30706cde8e7dab 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3054,27 +3054,26 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS && I.getOperand(1) == RHS && "Should be 'xor' with these operands"); - if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { - if (LHS->getOperand(0) == RHS->getOperand(1) && - LHS->getOperand(1) == RHS->getOperand(0)) - LHS->swapOperands(); - if (LHS->getOperand(0) == RHS->getOperand(0) && - LHS->getOperand(1) == RHS->getOperand(1)) { + ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); + Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); + Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); + + if (predicatesFoldable(PredL, PredR)) { + if (LHS0 == RHS1 && LHS1 == RHS0) { + std::swap(LHS0, LHS1); + PredL = ICmpInst::getSwappedPredicate(PredL); + } + if (LHS0 == RHS0 && LHS1 == RHS1) { // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) - Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); - unsigned Code = - getICmpCode(LHS->getPredicate()) ^ getICmpCode(RHS->getPredicate()); + unsigned Code = getICmpCode(PredL) ^ getICmpCode(PredR); bool IsSigned = LHS->isSigned() || RHS->isSigned(); - return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder); + return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder); } } // TODO: This can be generalized to compares of non-signbits using // decomposeBitTestICmp(). It could be enhanced more by using (something like) // foldLogOpOfMaskedICmps(). - ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); - Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); if ((LHS->hasOneUse() || RHS->hasOneUse()) && LHS0->getType() == RHS0->getType() && LHS0->getType()->isIntOrIntVectorTy()) { From 80ab16d0ed7535e752f4c90f6a89ccff108a22e2 Mon Sep 17 00:00:00 2001 From: Congzhe Cao Date: Wed, 25 May 2022 23:31:17 -0400 Subject: [PATCH 542/908] [NFC][LoopCacheAnalysis] Update test cases to make sure the outputs follow the right order In this patch we change test cases from using "CHECK" to using "CHECK-NEXT", which is to ensure the order of loops output by loop cache analysis is correct. After D124725 we fixed the non-deterministic output order hence we did not use "CHECK-DAG" anymore, and now we should really use "CHECK-NEXT" to make sure the loops in the output loop vector follow the right order. Reviewed By: bmahjour, #loopoptwg Differential Revision: https://reviews.llvm.org/D124984 --- .../LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll | 12 ++++++------ .../LoopCacheAnalysis/PowerPC/loads-store.ll | 4 ++-- .../Analysis/LoopCacheAnalysis/PowerPC/matmul.ll | 4 ++-- .../Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll | 8 ++++---- .../LoopCacheAnalysis/PowerPC/multi-store.ll | 6 +++--- .../LoopCacheAnalysis/PowerPC/single-store.ll | 4 ++-- .../Analysis/LoopCacheAnalysis/PowerPC/stencil.ll | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll index bb868b5d435032..47ac25f68903d5 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll @@ -7,7 +7,7 @@ target triple = "powerpc64le-unknown-linux-gnu" ; The IR is copied from llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll ; CHECK: Loop 'for.body' has cost = 4186116 -; CHECK: Loop 'for.body4' has cost = 128898 +; CHECK-NEXT: Loop 'for.body4' has cost = 128898 ;; #define N 1024 ;; #define M 2048 @@ -49,7 +49,7 @@ for.end13: ; preds = %for.inc11 ; CHECK: Loop 'for.body' has cost = 4186116 -; CHECK: Loop 'for.body4' has cost = 128898 +; CHECK-NEXT: Loop 'for.body4' has cost = 128898 define void @t2([2048 x i32]* %a) { entry: @@ -84,10 +84,10 @@ for.end13: ; preds = %for.inc11 declare [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %arg) ; CHECK: Loop 'for.body' has cost = 2112128815104000000 -; CHECK: Loop 'for.body4' has cost = 16762927104000000 -; CHECK: Loop 'for.body8' has cost = 130960368000000 -; CHECK: Loop 'for.body12' has cost = 1047682944000 -; CHECK: Loop 'for.body16' has cost = 32260032000 +; CHECK-NEXT: Loop 'for.body4' has cost = 16762927104000000 +; CHECK-NEXT: Loop 'for.body8' has cost = 130960368000000 +; CHECK-NEXT: Loop 'for.body12' has cost = 1047682944000 +; CHECK-NEXT: Loop 'for.body16' has cost = 32260032000 ;; #define N 128 ;; #define M 2048 diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll index c5c9e7602e9baf..3eb21c25f4bfa4 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll @@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu" ; } ; CHECK: Loop 'for.i' has cost = 3000000 -; CHECK: Loop 'for.k' has cost = 2030000 -; CHECK: Loop 'for.j' has cost = 1060000 +; CHECK-NEXT: Loop 'for.k' has cost = 2030000 +; CHECK-NEXT: Loop 'for.j' has cost = 1060000 define void @foo(i64 %n, i64 %m, i64 %o, i32* %A, i32* %B, i32* %C) { entry: diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll index 6679b1980ddc61..08ceb569eac4ad 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll @@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu" ; } ; CHECK:Loop 'for.i' has cost = 2010000 -; CHECK:Loop 'for.k' has cost = 1040000 -; CHECK:Loop 'for.j' has cost = 70000 +; CHECK-NEXT:Loop 'for.k' has cost = 1040000 +; CHECK-NEXT:Loop 'for.j' has cost = 70000 define void @matmul(i64 %n, i64 %m, i64 %o, i32* %A, i32* %B, i32* %C) { entry: diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll index 2b317c02b7fb84..a39dff4460c466 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll @@ -15,10 +15,10 @@ target triple = "powerpc64le-unknown-linux-gnu" ; } ; CHECK: Loop 'k_loop' has cost = 10200000000000000 -; CHECK: Loop 'j_loop' has cost = 102000000000000 -; CHECK: Loop 'i_loop' has cost = 1020000000000 -; CHECK: Loop 'm_loop' has cost = 10700000000 -; CHECK: Loop 'l_loop' has cost = 1300000000 +; CHECK-NEXT: Loop 'j_loop' has cost = 102000000000000 +; CHECK-NEXT: Loop 'i_loop' has cost = 1020000000000 +; CHECK-NEXT: Loop 'm_loop' has cost = 10700000000 +; CHECK-NEXT: Loop 'l_loop' has cost = 1300000000 %_elem_type_of_double = type <{ double }> diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll index 89d959e9278403..d334bfe2c61375 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll @@ -3,9 +3,9 @@ target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64le-unknown-linux-gnu" -; CHECK-DAG: Loop 'for.j' has cost = 201000000 -; CHECK-DAG: Loop 'for.i' has cost = 102000000 -; CHECK-DAG: Loop 'for.k' has cost = 90000 +; CHECK: Loop 'for.j' has cost = 201000000 +; CHECK-NEXT: Loop 'for.i' has cost = 102000000 +; CHECK-NEXT: Loop 'for.k' has cost = 90000 ;; Test to make sure when we have multiple conflicting access patterns, the ;; chosen loop configuration favours the majority of those accesses. diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll index 24d8e932a504b5..9a060e8d6249ec 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll @@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu" ; } ; CHECK: Loop 'for.i' has cost = 100000000 -; CHECK: Loop 'for.j' has cost = 1000000 -; CHECK: Loop 'for.k' has cost = 60000 +; CHECK-NEXT: Loop 'for.j' has cost = 1000000 +; CHECK-NEXT: Loop 'for.k' has cost = 60000 define void @foo(i64 %n, i64 %m, i64 %o, i32* %A) { entry: diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll index f2549f4c96f645..b799595f65a575 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll @@ -12,7 +12,7 @@ target triple = "powerpc64le-unknown-linux-gnu" ; } ; CHECK: Loop 'for.i' has cost = 20600 -; CHECK: Loop 'for.j' has cost = 800 +; CHECK-NEXT: Loop 'for.j' has cost = 800 define void @foo(i64 %n, i64 %m, i32* %A, i32* %B, i32* %C) { entry: From 52698a33d03118f684f0f92380e107b9049ec206 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 26 May 2022 00:37:51 +0200 Subject: [PATCH 543/908] [mlir][bufferization] Clean up imports and code comments Differential Revision: https://reviews.llvm.org/D126427 --- .../Bufferization/IR/BufferizableOpInterface.h | 9 +-------- .../Dialect/Bufferization/Transforms/Bufferize.h | 14 +++++--------- .../Bufferization/Transforms/OneShotAnalysis.h | 1 - 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index 9d3acb3ca0e428..eb3cff0722b04c 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -9,20 +9,13 @@ #ifndef MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZABLEOPINTERFACE_H_ #define MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZABLEOPINTERFACE_H_ -#include - -#include "mlir/IR/BlockAndValueMapping.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Operation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/SetVector.h" namespace mlir { -class BlockAndValueMapping; -class DominanceInfo; +class OpBuilder; namespace bufferization { diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h index 9dc2e96d2f22e5..94fc36a7387cf6 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h @@ -1,4 +1,4 @@ -//===- Bufferize.h - Bufferization utilities --------------------*- C++ -*-===// +//===- Bufferize.h - Bufferization Utilities --------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,14 +9,10 @@ // We use the term "bufferize" to mean conversion from tensor types to // memref types. // -// Generally speaking, for each op that operates on tensor types, a conversion -// pattern needs to be written. The infrastructure in this file assists in -// defining these conversion patterns in a composable way. -// -// Bufferization conversion patterns should generally use the ordinary -// conversion pattern classes (e.g. OpConversionPattern). A TypeConverter -// (accessible with getTypeConverter()) is available if needed for converting -// types. +// Generally speaking, for each op that operates on tensor types, the +// `BufferizableOpInterface` needs to be implemented. This file contains the +// bufferization driver that is responsible for bufferizing the ops in the right +// order, etc. // //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h index 22a7e0c402ba61..ae2ca6eeccbcb2 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h @@ -10,7 +10,6 @@ #define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ONESHOTANALYSIS_H #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/IR/BuiltinOps.h" #include "llvm/ADT/EquivalenceClasses.h" namespace mlir { From ebc77f012fb81da9b707446bf1f5a8651f59739e Mon Sep 17 00:00:00 2001 From: River Riddle Date: Fri, 20 May 2022 18:20:13 -0700 Subject: [PATCH 544/908] [mlir:LSP] Link the test dialect into mlir-lsp-server This allows for more easily interacting with lit files that utilize the test dialect. --- mlir/tools/mlir-lsp-server/CMakeLists.txt | 1 + mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/mlir/tools/mlir-lsp-server/CMakeLists.txt b/mlir/tools/mlir-lsp-server/CMakeLists.txt index 204c7aeac2924c..14aa124676158c 100644 --- a/mlir/tools/mlir-lsp-server/CMakeLists.txt +++ b/mlir/tools/mlir-lsp-server/CMakeLists.txt @@ -21,6 +21,7 @@ if(MLIR_INCLUDE_TESTS) MLIRTestPass MLIRTestReducer MLIRTestRewrite + MLIRTestTransformDialect MLIRTestTransforms ) endif() diff --git a/mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp b/mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp index 05d6958e1342f6..ac62de0b4ab177 100644 --- a/mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp +++ b/mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp @@ -13,8 +13,19 @@ using namespace mlir; +#ifdef MLIR_INCLUDE_TESTS +namespace test { +void registerTestDialect(DialectRegistry &); +void registerTestTransformDialectExtension(DialectRegistry &); +} // namespace test +#endif + int main(int argc, char **argv) { DialectRegistry registry; registerAllDialects(registry); +#ifdef MLIR_INCLUDE_TESTS + ::test::registerTestDialect(registry); + ::test::registerTestTransformDialectExtension(registry); +#endif return failed(MlirLspServerMain(argc, argv, registry)); } From b2cc40fd67f6a8d909a0b4b0e8fe0dd7c45562d7 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Tue, 24 May 2022 01:55:27 -0700 Subject: [PATCH 545/908] [mlir:Printer][NFC] Add utility methods for printing escaped/hex strings This simplifies quite a few cases where we manually duplicate the escaping logic. --- mlir/lib/IR/AsmPrinter.cpp | 65 ++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 62aab6dd9306d2..ed67c8eeb9b647 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -1433,6 +1433,13 @@ class AsmPrinter::Impl { void printDialectAttribute(Attribute attr); void printDialectType(Type type); + /// Print an escaped string, wrapped with "". + void printEscapedString(StringRef str); + + /// Print a hex string, wrapped with "". + void printHexString(StringRef str); + void printHexString(ArrayRef data); + /// This enum is used to represent the binding strength of the enclosing /// context that an AffineExprStorage is being printed in, so we can /// intelligently produce parens. @@ -1479,19 +1486,14 @@ void AsmPrinter::Impl::printLocationInternal(LocationAttr loc, bool pretty) { os << "unknown"; }) .Case([&](FileLineColLoc loc) { - if (pretty) { + if (pretty) os << loc.getFilename().getValue(); - } else { - os << "\""; - printEscapedString(loc.getFilename(), os); - os << "\""; - } + else + printEscapedString(loc.getFilename()); os << ':' << loc.getLine() << ':' << loc.getColumn(); }) .Case([&](NameLoc loc) { - os << '\"'; - printEscapedString(loc.getName(), os); - os << '\"'; + printEscapedString(loc.getName()); // Print the child if it isn't unknown. auto childLoc = loc.getChildLoc(); @@ -1800,9 +1802,7 @@ void AsmPrinter::Impl::printAttribute(Attribute attr, return; } else if (auto strAttr = attr.dyn_cast()) { - os << '"'; - printEscapedString(strAttr.getValue(), os); - os << '"'; + printEscapedString(strAttr.getValue()); } else if (auto arrayAttr = attr.dyn_cast()) { os << '['; @@ -1841,8 +1841,9 @@ void AsmPrinter::Impl::printAttribute(Attribute attr, if (printerFlags.shouldElideElementsAttr(opaqueAttr)) { printElidedElementsAttr(os); } else { - os << "opaque<" << opaqueAttr.getDialect() << ", \"0x" - << llvm::toHex(opaqueAttr.getValue()) << "\">"; + os << "opaque<" << opaqueAttr.getDialect() << ", "; + printHexString(opaqueAttr.getValue()); + os << ">"; } } else if (auto intOrFpEltAttr = attr.dyn_cast()) { @@ -1974,12 +1975,9 @@ void AsmPrinter::Impl::printDenseIntOrFPElementsAttr( MutableArrayRef convRawData(outDataVec); DenseIntOrFPElementsAttr::convertEndianOfArrayRefForBEmachine( rawData, convRawData, type); - os << '"' << "0x" - << llvm::toHex(StringRef(convRawData.data(), convRawData.size())) - << "\""; + printHexString(convRawData); } else { - os << '"' << "0x" - << llvm::toHex(StringRef(rawData.data(), rawData.size())) << "\""; + printHexString(rawData); } return; @@ -2030,11 +2028,7 @@ void AsmPrinter::Impl::printDenseIntOrFPElementsAttr( void AsmPrinter::Impl::printDenseStringElementsAttr( DenseStringElementsAttr attr) { ArrayRef data = attr.getRawStringData(); - auto printFn = [&](unsigned index) { - os << "\""; - printEscapedString(data[index], os); - os << "\""; - }; + auto printFn = [&](unsigned index) { printEscapedString(data[index]); }; printDenseElementsAttrImpl(attr.isSplat(), attr.getType(), os, printFn); } @@ -2240,6 +2234,19 @@ void AsmPrinter::Impl::printDialectType(Type type) { printDialectSymbol(os, "!", dialect.getNamespace(), typeName); } +void AsmPrinter::Impl::printEscapedString(StringRef str) { + os << "\""; + llvm::printEscapedString(str, os); + os << "\""; +} + +void AsmPrinter::Impl::printHexString(StringRef str) { + os << "\"0x" << llvm::toHex(str) << "\""; +} +void AsmPrinter::Impl::printHexString(ArrayRef data) { + printHexString(StringRef(data.data(), data.size())); +} + //===--------------------------------------------------------------------===// // AsmPrinter //===--------------------------------------------------------------------===// @@ -2722,7 +2729,8 @@ void OperationPrinter::printOperation(Operation *op) { // ambiguities. if (name.count('.') == 1) name.consume_front((defaultDialectStack.back() + ".").str()); - printEscapedString(name, os); + os << name; + // Print the rest of the op now. opPrinter(op, *this); return; @@ -2809,11 +2817,8 @@ void OperationPrinter::printUserIDs(Operation *user, bool prefixComma) { } void OperationPrinter::printGenericOp(Operation *op, bool printOpName) { - if (printOpName) { - os << '"'; - printEscapedString(op->getName().getStringRef(), os); - os << '"'; - } + if (printOpName) + printEscapedString(op->getName().getStringRef()); os << '('; interleaveComma(op->getOperands(), [&](Value value) { printValueID(value); }); os << ')'; From 6f3f8b669b61898727cdc9799c227ad549a5a4ef Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Thu, 26 May 2022 12:17:16 +0700 Subject: [PATCH 546/908] [GuardWidening] Update test with update_test_checks.py --- .../GuardWidening/range-check-merging.ll | 169 ++++++++++++++---- 1 file changed, 136 insertions(+), 33 deletions(-) diff --git a/llvm/test/Transforms/GuardWidening/range-check-merging.ll b/llvm/test/Transforms/GuardWidening/range-check-merging.ll index 6440dadce1e86a..a6756ed38c2c22 100644 --- a/llvm/test/Transforms/GuardWidening/range-check-merging.ll +++ b/llvm/test/Transforms/GuardWidening/range-check-merging.ll @@ -1,13 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -guard-widening < %s | FileCheck %s declare void @llvm.experimental.guard(i1,...) define void @f_0(i32 %x, i32* %length_buf) { ; CHECK-LABEL: @f_0( -; CHECK-NOT: @llvm.experimental.guard -; CHECK: %wide.chk2 = and i1 %chk3, %chk0 -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ] -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[CHK0:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH]] +; CHECK-NEXT: [[X_INC1:%.*]] = add i32 [[X]], 1 +; CHECK-NEXT: [[CHK1:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: [[X_INC2:%.*]] = add i32 [[X]], 2 +; CHECK-NEXT: [[CHK2:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[CHK2]], [[CHK0]] +; CHECK-NEXT: [[X_INC3:%.*]] = add i32 [[X]], 3 +; CHECK-NEXT: [[CHK3:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[CHK3]], [[CHK0]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: ret void +; entry: %length = load i32, i32* %length_buf, !range !0 %chk0 = icmp ult i32 %x, %length @@ -29,10 +41,21 @@ entry: define void @f_1(i32 %x, i32* %length_buf) { ; CHECK-LABEL: @f_1( -; CHECK-NOT: llvm.experimental.guard -; CHECK: %wide.chk2 = and i1 %chk3, %chk0 -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ] -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[CHK0:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH]] +; CHECK-NEXT: [[X_INC1:%.*]] = add i32 [[X]], 1 +; CHECK-NEXT: [[CHK1:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: [[X_INC2:%.*]] = add i32 [[X_INC1]], 2 +; CHECK-NEXT: [[CHK2:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[CHK2]], [[CHK0]] +; CHECK-NEXT: [[X_INC3:%.*]] = add i32 [[X_INC2]], 3 +; CHECK-NEXT: [[CHK3:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[CHK3]], [[CHK0]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: ret void +; entry: %length = load i32, i32* %length_buf, !range !0 %chk0 = icmp ult i32 %x, %length @@ -54,10 +77,22 @@ entry: define void @f_2(i32 %a, i32* %length_buf) { ; CHECK-LABEL: @f_2( -; CHECK-NOT: llvm.experimental.guard -; CHECK: %wide.chk2 = and i1 %chk3, %chk0 -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ] -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = and i32 [[A:%.*]], -256 +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[CHK0:%.*]] = icmp ult i32 [[X]], [[LENGTH]] +; CHECK-NEXT: [[X_INC1:%.*]] = or i32 [[X]], 1 +; CHECK-NEXT: [[CHK1:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: [[X_INC2:%.*]] = or i32 [[X]], 2 +; CHECK-NEXT: [[CHK2:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[CHK2]], [[CHK0]] +; CHECK-NEXT: [[X_INC3:%.*]] = or i32 [[X]], 3 +; CHECK-NEXT: [[CHK3:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[CHK3]], [[CHK0]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: ret void +; entry: %x = and i32 %a, 4294967040 ;; 4294967040 == 0xffffff00 %length = load i32, i32* %length_buf, !range !0 @@ -80,10 +115,22 @@ entry: define void @f_3(i32 %a, i32* %length_buf) { ; CHECK-LABEL: @f_3( -; CHECK-NOT: llvm.experimental.guard -; CHECK: %wide.chk2 = and i1 %chk3, %chk0 -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ] -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = and i32 [[A:%.*]], -256 +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[CHK0:%.*]] = icmp ult i32 [[X]], [[LENGTH]] +; CHECK-NEXT: [[X_INC1:%.*]] = add i32 [[X]], 1 +; CHECK-NEXT: [[CHK1:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: [[X_INC2:%.*]] = or i32 [[X_INC1]], 2 +; CHECK-NEXT: [[CHK2:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[CHK2]], [[CHK0]] +; CHECK-NEXT: [[X_INC3:%.*]] = add i32 [[X_INC2]], 3 +; CHECK-NEXT: [[CHK3:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[CHK3]], [[CHK0]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: ret void +; entry: %x = and i32 %a, 4294967040 ;; 4294967040 == 0xffffff00 %length = load i32, i32* %length_buf, !range !0 @@ -106,12 +153,23 @@ entry: define void @f_4(i32 %x, i32* %length_buf) { ; CHECK-LABEL: @f_4( -; CHECK-NOT: llvm.experimental.guard +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[CHK0:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH]] +; CHECK-NEXT: [[X_INC1:%.*]] = add i32 [[X]], -1024 +; CHECK-NEXT: [[CHK1:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: [[X_INC2:%.*]] = add i32 [[X]], 2 +; CHECK-NEXT: [[CHK2:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[CHK2]], [[CHK1]] +; CHECK-NEXT: [[X_INC3:%.*]] = add i32 [[X]], 3 +; CHECK-NEXT: [[CHK3:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[CHK3]], [[CHK1]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: ret void +; ; Note: we NOT guarding on "and i1 %chk3, %chk0", that would be incorrect. -; CHECK: %wide.chk2 = and i1 %chk3, %chk1 -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ] -; CHECK: ret void entry: %length = load i32, i32* %length_buf, !range !0 %chk0 = icmp ult i32 %x, %length @@ -133,10 +191,21 @@ entry: define void @f_5(i32 %x, i32* %length_buf) { ; CHECK-LABEL: @f_5( -; CHECK-NOT: llvm.experimental.guard -; CHECK: %wide.chk2 = and i1 %chk1, %chk2 -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ] -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[CHK0:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH]] +; CHECK-NEXT: [[X_INC1:%.*]] = add i32 [[X]], 1 +; CHECK-NEXT: [[CHK1:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: [[X_INC2:%.*]] = add i32 [[X_INC1]], -200 +; CHECK-NEXT: [[CHK2:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[CHK1]], [[CHK2]] +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[CHK1]], [[CHK2]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: [[X_INC3:%.*]] = add i32 [[X_INC2]], 3 +; CHECK-NEXT: [[CHK3:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH]] +; CHECK-NEXT: ret void +; entry: %length = load i32, i32* %length_buf, !range !0 %chk0 = icmp ult i32 %x, %length @@ -170,11 +239,21 @@ entry: ; define void @f_6(i32 %x, i32* %length_buf) { ; CHECK-LABEL: @f_6( -; CHECK-NOT: llvm.experimental.guard -; CHECK: %wide.chk = and i1 %chk0, %chk1 -; CHECK: %wide.chk1 = and i1 %wide.chk, %chk2 -; CHECK: %wide.chk2 = and i1 %wide.chk1, %chk3 -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[CHK0:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH]] +; CHECK-NEXT: [[X_INC1:%.*]] = add i32 [[X]], -2147483647 +; CHECK-NEXT: [[CHK1:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: [[X_INC2:%.*]] = add i32 [[X]], 2 +; CHECK-NEXT: [[CHK2:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[WIDE_CHK]], [[CHK2]] +; CHECK-NEXT: [[X_INC3:%.*]] = add i32 [[X]], 3 +; CHECK-NEXT: [[CHK3:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[WIDE_CHK1]], [[CHK3]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: ret void +; entry: %length = load i32, i32* %length_buf, !range !0 %chk0 = icmp ult i32 %x, %length @@ -197,11 +276,35 @@ entry: define void @f_7(i32 %x, i32* %length_buf) { ; CHECK-LABEL: @f_7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LENGTH_A:%.*]] = load volatile i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[LENGTH_B:%.*]] = load volatile i32, i32* [[LENGTH_BUF]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[CHK0_A:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH_A]] +; CHECK-NEXT: [[CHK0_B:%.*]] = icmp ult i32 [[X]], [[LENGTH_B]] +; CHECK-NEXT: [[CHK0:%.*]] = and i1 [[CHK0_A]], [[CHK0_B]] +; CHECK-NEXT: [[X_INC1:%.*]] = add i32 [[X]], 1 +; CHECK-NEXT: [[CHK1_A:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH_A]] +; CHECK-NEXT: [[CHK1_B:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH_B]] +; CHECK-NEXT: [[CHK1:%.*]] = and i1 [[CHK1_A]], [[CHK1_B]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: [[X_INC2:%.*]] = add i32 [[X]], 2 +; CHECK-NEXT: [[CHK2_A:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH_A]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CHK2_A]], [[CHK0_A]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[CHK0_B]], [[TMP0]] +; CHECK-NEXT: [[CHK2_B:%.*]] = icmp ult i32 [[X_INC2]], [[LENGTH_B]] +; CHECK-NEXT: [[WIDE_CHK1:%.*]] = and i1 [[CHK2_B]], [[TMP1]] +; CHECK-NEXT: [[X_INC3:%.*]] = add i32 [[X]], 3 +; CHECK-NEXT: [[CHK3_B:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH_B]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[CHK3_B]], [[CHK0_B]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[CHK0_A]], [[TMP2]] +; CHECK-NEXT: [[CHK3_A:%.*]] = icmp ult i32 [[X_INC3]], [[LENGTH_A]] +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[CHK3_A]], [[TMP3]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: [[CHK2:%.*]] = and i1 [[CHK2_A]], [[CHK2_B]] +; CHECK-NEXT: [[CHK3:%.*]] = and i1 [[CHK3_A]], [[CHK3_B]] +; CHECK-NEXT: ret void +; -; CHECK: [[COND_0:%[^ ]+]] = and i1 %chk3.b, %chk0.b -; CHECK: [[COND_1:%[^ ]+]] = and i1 %chk0.a, [[COND_0]] -; CHECK: [[COND_2:%[^ ]+]] = and i1 %chk3.a, [[COND_1]] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[COND_2]]) [ "deopt"() ] entry: %length_a = load volatile i32, i32* %length_buf, !range !0 From 0a838ad517073e0c866a91661a0c2eb0ad0f77a8 Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Wed, 25 May 2022 13:37:06 +0700 Subject: [PATCH 547/908] [GuardWidening] Add test showing incorrect behavior with nuw/nsw flags when we move instruction over the guard we must clear nuw/nsw flags but we do not. --- .../GuardWidening/range-check-merging.ll | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/llvm/test/Transforms/GuardWidening/range-check-merging.ll b/llvm/test/Transforms/GuardWidening/range-check-merging.ll index a6756ed38c2c22..b4ab9b32ff48be 100644 --- a/llvm/test/Transforms/GuardWidening/range-check-merging.ll +++ b/llvm/test/Transforms/GuardWidening/range-check-merging.ll @@ -334,5 +334,29 @@ entry: ret void } +define void @f_8(i32 %x, i32* %length_buf) { +; Check that we clean nuw nsw flags +; CHECK-LABEL: @f_8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[CHK0:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH]] +; CHECK-NEXT: [[X_INC1:%.*]] = add nuw nsw i32 [[X]], 1 +; CHECK-NEXT: [[CHK1:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: ret void +; +entry: + %length = load i32, i32* %length_buf, !range !0 + %chk0 = icmp ult i32 %x, %length + call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ] + + %x.inc1 = add nuw nsw i32 %x, 1 + %chk1 = icmp ult i32 %x.inc1, %length + call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ] + ret void +} + + !0 = !{i32 0, i32 2147483648} From fb67d683db46dfd88da09d99bd5a318b7cc81513 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Wed, 25 May 2022 16:29:51 -0400 Subject: [PATCH 548/908] [iwyu] Handle regressions in libLLVM header include Running iwyu-diff on LLVM codebase since 7030654296a0416bd9402a0278 detected a few regressions, fixing them. Differential Revision: https://reviews.llvm.org/D126417 --- llvm/include/llvm/FuzzMutate/FuzzerCLI.h | 1 - llvm/lib/Analysis/ModelUnderTrainingRunner.cpp | 1 - llvm/lib/CodeGen/GlobalISel/Utils.cpp | 1 - llvm/lib/CodeGen/TypePromotion.cpp | 1 - llvm/lib/Support/BinaryStreamWriter.cpp | 1 - llvm/lib/TableGen/Record.cpp | 1 - llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 1 - llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h | 2 +- llvm/lib/Transforms/Coroutines/CoroCleanup.cpp | 2 +- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 1 - llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp | 1 - 11 files changed, 2 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/FuzzMutate/FuzzerCLI.h b/llvm/include/llvm/FuzzMutate/FuzzerCLI.h index 7a4b1008efbe72..db0168d3e67583 100644 --- a/llvm/include/llvm/FuzzMutate/FuzzerCLI.h +++ b/llvm/include/llvm/FuzzMutate/FuzzerCLI.h @@ -16,7 +16,6 @@ #include "llvm/Support/DataTypes.h" #include -#include namespace llvm { diff --git a/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp b/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp index d3cbfeda3ca1d3..dc149f32627171 100644 --- a/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp +++ b/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/TensorSpec.h" #include "llvm/Config/config.h" #if defined(LLVM_HAVE_TF_API) diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 425a155d262d29..7781761bc131d5 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index c72088e853fe77..166a3c413f6adf 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -31,7 +31,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp index 2a2dd2e9ef084a..dc4ea200c7bea1 100644 --- a/llvm/lib/Support/BinaryStreamWriter.cpp +++ b/llvm/lib/Support/BinaryStreamWriter.cpp @@ -8,7 +8,6 @@ #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/BinaryStreamError.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/LEB128.h" diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 787ef801c9dce1..ef012332cd155f 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index a57656ffb38bbb..e4b547e17f64c5 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -26,7 +26,6 @@ #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 6c0763804cf473..10cbdaca477f33 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -15,7 +15,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" namespace llvm { diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index 56ba250ee5ac26..a18f7cd2240223 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -11,7 +11,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" +#include "llvm/IR/Function.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 13b4ccb72a9e69..72869f7e013c26 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -28,7 +28,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/OptimizedStructLayout.h" diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp index 2cca995b005863..2f158e05df35f3 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -38,7 +38,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" From c2eccc67ce07e9cb374eb0ecdb3038fcb8be08cd Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Wed, 25 May 2022 13:37:06 +0700 Subject: [PATCH 549/908] [GuardWidening] Remove nuw/nsw flags for hoisted instructions When we hoist instructions over guard we must clear flags due to these flags might be implied using this guard, so they make sense only after the guard. As an example of the bug due to current behavior. L is known to be in range say [0, 100) c1 = x u< L guard (c1) x1 = add x, 1 c2 = x1 u< L guard(c2) basing on guard(c1) we can say that x1 = add nuw nsw x, 1 after guard widening we get c1 = x u< L x1 = add nuw nsw x, 1 c2 = x1 u< L c = and c1, c2 guard(c) now, basing on fact that x + 1 < L and x >= 0 due to x + 1 is nuw we can prove that x + 1 u< L implies that x u< L, so we can just remove c1 x1 = add nuw nsw x, 1 c2 = x1 u< L guard(c2) But that is not correct due to we will pass x == -1 value. Reviewed By: mkazantsev Subscribers: llvm-commits, nikic Differential Revision: https://reviews.llvm.org/D126354 --- llvm/lib/Transforms/Scalar/GuardWidening.cpp | 3 +++ llvm/test/Transforms/GuardWidening/range-check-merging.ll | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index 40c99e17ffea27..5032f3106d50cd 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -495,6 +495,9 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const { makeAvailableAt(Op, Loc); Inst->moveBefore(Loc); + // If we moved instruction before guard we must clean nuw, nsw flags. + Inst->setHasNoUnsignedWrap(false); + Inst->setHasNoSignedWrap(false); } bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, diff --git a/llvm/test/Transforms/GuardWidening/range-check-merging.ll b/llvm/test/Transforms/GuardWidening/range-check-merging.ll index b4ab9b32ff48be..b2e229fd721107 100644 --- a/llvm/test/Transforms/GuardWidening/range-check-merging.ll +++ b/llvm/test/Transforms/GuardWidening/range-check-merging.ll @@ -340,7 +340,7 @@ define void @f_8(i32 %x, i32* %length_buf) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, i32* [[LENGTH_BUF:%.*]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[CHK0:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH]] -; CHECK-NEXT: [[X_INC1:%.*]] = add nuw nsw i32 [[X]], 1 +; CHECK-NEXT: [[X_INC1:%.*]] = add i32 [[X]], 1 ; CHECK-NEXT: [[CHK1:%.*]] = icmp ult i32 [[X_INC1]], [[LENGTH]] ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[CHK0]], [[CHK1]] ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] From 1e9114984490b83d4665f12a11f84c83f50ca8f0 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Sun, 8 May 2022 22:34:43 -0700 Subject: [PATCH 550/908] Replace the custom linked list in LeaderTableEntry with TinyPtrVector. The purpose of the custom linked list was to optimize for the case of a single-element list. It turns out that TinyPtrVector handles the same basic scenario even better, reducing the size of LeaderTableEntry by 33%, and requiring only log2(N) allocations as the size of the list grows. The only downside is that we have to store the Value's and BasicBlock's in separate vectors, which is slightly awkward in a few cases. Fortunately that ends up being entirely encapsulated inside helper functions. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D125205 --- llvm/include/llvm/Transforms/Scalar/GVN.h | 56 +++++++++-------------- llvm/lib/Transforms/Scalar/GVN.cpp | 48 ++++++++----------- 2 files changed, 40 insertions(+), 64 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index 16ab1a49016299..5dc5c4b3feb89a 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -19,6 +19,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" @@ -232,12 +233,10 @@ class GVNPass : public PassInfoMixin { /// A mapping from value numbers to lists of Value*'s that /// have that value number. Use findLeader to query it. struct LeaderTableEntry { - Value *Val; - const BasicBlock *BB; - LeaderTableEntry *Next; + TinyPtrVector Val; + TinyPtrVector BB; }; DenseMap LeaderTable; - BumpPtrAllocator TableAllocator; // Block-local map of equivalent values to their leader, does not // propagate to any successors. Entries added mid-block are applied @@ -266,44 +265,31 @@ class GVNPass : public PassInfoMixin { /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { LeaderTableEntry &Curr = LeaderTable[N]; - if (!Curr.Val) { - Curr.Val = V; - Curr.BB = BB; - return; + if (Curr.Val.size() == 0) { + Curr.Val.push_back(V); + Curr.BB.push_back(BB); + } else { + Curr.Val.insert(Curr.Val.begin()+1, V); + Curr.BB.insert(Curr.BB.begin()+1, BB); } - - LeaderTableEntry *Node = TableAllocator.Allocate(); - Node->Val = V; - Node->BB = BB; - Node->Next = Curr.Next; - Curr.Next = Node; } /// Scan the list of values corresponding to a given /// value number, and remove the given instruction if encountered. void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { - LeaderTableEntry *Prev = nullptr; - LeaderTableEntry *Curr = &LeaderTable[N]; - - while (Curr && (Curr->Val != I || Curr->BB != BB)) { - Prev = Curr; - Curr = Curr->Next; - } - - if (!Curr) - return; - - if (Prev) { - Prev->Next = Curr->Next; - } else { - if (!Curr->Next) { - Curr->Val = nullptr; - Curr->BB = nullptr; + LeaderTableEntry &entry = LeaderTable[N]; + assert(entry.BB.size() == entry.Val.size()); + auto VI = entry.Val.begin(); + auto VE = entry.Val.end(); + auto BI = entry.BB.begin(); + while (VI != VE) { + if (*VI == I && *BI == BB) { + VI = entry.Val.erase(VI); + BI = entry.BB.erase(BI); + VE = entry.Val.end(); } else { - LeaderTableEntry *Next = Curr->Next; - Curr->Val = Next->Val; - Curr->BB = Next->BB; - Curr->Next = Next->Next; + ++VI; + ++BI; } } } diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 34c1483a37e04d..184374d754637f 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2105,10 +2105,9 @@ GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) { /// defined in \p BB. bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, GVNPass &Gvn) { - LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; - while (Vals && Vals->BB == BB) - Vals = Vals->Next; - return !Vals; + const LeaderTableEntry &Entry = Gvn.LeaderTable[Num]; + return all_of(Entry.BB, + [BB](const BasicBlock *EntryBB) { return EntryBB == BB; }); } /// Wrap phiTranslateImpl to provide caching functionality. @@ -2130,12 +2129,11 @@ bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *PhiBlock, GVNPass &Gvn) { CallInst *Call = nullptr; - LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; - while (Vals) { - Call = dyn_cast(Vals->Val); + const LeaderTableEntry &Entry = Gvn.LeaderTable[Num]; + for (Value *Val : Entry.Val) { + Call = dyn_cast(Val); if (Call && Call->getParent() == PhiBlock) break; - Vals = Vals->Next; } if (AA->doesNotAccessMemory(Call)) @@ -2228,23 +2226,18 @@ void GVNPass::ValueTable::eraseTranslateCacheEntry( // question. This is fast because dominator tree queries consist of only // a few comparisons of DFS numbers. Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) { - LeaderTableEntry Vals = LeaderTable[num]; - if (!Vals.Val) return nullptr; + const LeaderTableEntry &Entry = LeaderTable[num]; + if (Entry.Val.empty()) + return nullptr; Value *Val = nullptr; - if (DT->dominates(Vals.BB, BB)) { - Val = Vals.Val; - if (isa(Val)) return Val; - } - - LeaderTableEntry* Next = Vals.Next; - while (Next) { - if (DT->dominates(Next->BB, BB)) { - if (isa(Next->Val)) return Next->Val; - if (!Val) Val = Next->Val; + for (size_t i = 0, e = Entry.Val.size(); i != e; ++i) { + if (DT->dominates(Entry.BB[i], BB)) { + if (isa(Entry.Val[i])) + return Entry.Val[i]; + if (!Val) + Val = Entry.Val[i]; } - - Next = Next->Next; } return Val; @@ -3033,7 +3026,6 @@ void GVNPass::cleanupGlobalSets() { VN.clear(); LeaderTable.clear(); BlockRPONumber.clear(); - TableAllocator.Reset(); ICF->clear(); InvalidBlockRPONumbers = true; } @@ -3046,12 +3038,10 @@ void GVNPass::verifyRemoved(const Instruction *Inst) const { // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. for (const auto &I : LeaderTable) { - const LeaderTableEntry *Node = &I.second; - assert(Node->Val != Inst && "Inst still in value numbering scope!"); - - while (Node->Next) { - Node = Node->Next; - assert(Node->Val != Inst && "Inst still in value numbering scope!"); + const LeaderTableEntry &Entry = I.second; + for (Value *Val : Entry.Val) { + (void)Val; + assert(Val != Inst && "Inst still in value numbering scope!"); } } } From a1ec3c5a8885d06f40859af6a310dbf32d479cdc Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Thu, 26 May 2022 00:00:16 -0700 Subject: [PATCH 551/908] [scudo] Link against libatomic on all MIPS targets Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D126418 --- compiler-rt/lib/scudo/standalone/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt index ae5c354768c89f..e293db18d30edb 100644 --- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt @@ -140,6 +140,10 @@ append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread SCUDO_LINK_FLAGS) append_list_if(FUCHSIA zircon SCUDO_LINK_LIBS) +if(COMPILER_RT_DEFAULT_TARGET_ARCH MATCHES "mips|mips64|mipsel|mips64el") + list(APPEND SCUDO_LINK_LIBS atomic) +endif() + if(COMPILER_RT_HAS_SCUDO_STANDALONE) add_compiler_rt_object_libraries(RTScudoStandalone ARCHS ${SCUDO_STANDALONE_SUPPORTED_ARCH} From 9ee15bba4744fb3b671428eec3cbec6a424dde07 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 26 May 2022 00:14:08 -0700 Subject: [PATCH 552/908] [MC] Lower case the first letter of EmitCOFF* EmitWin* EmitCV*. NFC --- llvm/include/llvm/MC/MCStreamer.h | 54 +++---- llvm/include/llvm/MC/MCWinCOFFStreamer.h | 16 +-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 6 +- .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 2 +- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 32 ++--- llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp | 6 +- llvm/lib/CodeGen/AsmPrinter/WinException.cpp | 24 ++-- llvm/lib/MC/MCAsmStreamer.cpp | 132 +++++++++--------- llvm/lib/MC/MCCodeView.cpp | 4 +- llvm/lib/MC/MCNullStreamer.cpp | 4 +- llvm/lib/MC/MCParser/AsmParser.cpp | 8 +- llvm/lib/MC/MCParser/COFFAsmParser.cpp | 32 ++--- llvm/lib/MC/MCParser/COFFMasmParser.cpp | 8 +- llvm/lib/MC/MCParser/MasmParser.cpp | 8 +- llvm/lib/MC/MCStreamer.cpp | 58 ++++---- llvm/lib/MC/MCWinCOFFStreamer.cpp | 16 +-- llvm/lib/Object/RecordStreamer.h | 4 +- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 8 +- .../MCTargetDesc/AArch64WinCOFFStreamer.cpp | 16 +-- llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 4 +- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 10 +- .../X86/MCTargetDesc/X86WinCOFFStreamer.cpp | 20 +-- llvm/lib/Target/X86/X86AsmPrinter.cpp | 10 +- llvm/lib/Target/X86/X86MCInstLower.cpp | 14 +- llvm/tools/llvm-mca/CodeRegionGenerator.cpp | 4 +- .../unittests/CodeGen/AsmPrinterDwarfTest.cpp | 2 +- llvm/unittests/CodeGen/TestAsmPrinter.h | 2 +- 27 files changed, 251 insertions(+), 253 deletions(-) diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 87b5ac2dd72dad..69cbc62759bf74 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -249,9 +249,9 @@ class MCStreamer { return CurrentWinFrameInfo; } - virtual void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame); + virtual void emitWindowsUnwindTables(WinEH::FrameInfo *Frame); - virtual void EmitWindowsUnwindTables(); + virtual void emitWindowsUnwindTables(); virtual void emitRawTextImpl(StringRef String); @@ -557,35 +557,35 @@ class MCStreamer { /// Emit the storage class of the symbol. /// /// \param StorageClass - The storage class the symbol should have. - virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void emitCOFFSymbolStorageClass(int StorageClass); /// Emit the type of the symbol. /// /// \param Type - A COFF type identifier (see COFF::SymbolType in X86COFF.h) - virtual void EmitCOFFSymbolType(int Type); + virtual void emitCOFFSymbolType(int Type); /// Marks the end of the symbol definition. virtual void EndCOFFSymbolDef(); - virtual void EmitCOFFSafeSEH(MCSymbol const *Symbol); + virtual void emitCOFFSafeSEH(MCSymbol const *Symbol); /// Emits the symbol table index of a Symbol into the current section. - virtual void EmitCOFFSymbolIndex(MCSymbol const *Symbol); + virtual void emitCOFFSymbolIndex(MCSymbol const *Symbol); /// Emits a COFF section index. /// /// \param Symbol - Symbol the section number relocation should point to. - virtual void EmitCOFFSectionIndex(MCSymbol const *Symbol); + virtual void emitCOFFSectionIndex(MCSymbol const *Symbol); /// Emits a COFF section relative relocation. /// /// \param Symbol - Symbol the section relative relocation should point to. - virtual void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset); + virtual void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset); /// Emits a COFF image relative relocation. /// /// \param Symbol - Symbol the image relative relocation should point to. - virtual void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset); + virtual void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset); /// Emits an lcomm directive with XCOFF csect information. /// @@ -926,16 +926,16 @@ class MCStreamer { /// Associate a filename with a specified logical file number, and also /// specify that file's checksum information. This implements the '.cv_file 4 /// "foo.c"' assembler directive. Returns true on success. - virtual bool EmitCVFileDirective(unsigned FileNo, StringRef Filename, + virtual bool emitCVFileDirective(unsigned FileNo, StringRef Filename, ArrayRef Checksum, unsigned ChecksumKind); /// Introduces a function id for use with .cv_loc. - virtual bool EmitCVFuncIdDirective(unsigned FunctionId); + virtual bool emitCVFuncIdDirective(unsigned FunctionId); /// Introduces an inline call site id for use with .cv_loc. Includes /// extra information for inline line table generation. - virtual bool EmitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, + virtual bool emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, unsigned IAFile, unsigned IALine, unsigned IACol, SMLoc Loc); @@ -991,7 +991,7 @@ class MCStreamer { virtual void emitCVFileChecksumOffsetDirective(unsigned FileNo) {} /// This implements the CodeView '.cv_fpo_data' assembler directive. - virtual void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc = {}) {} + virtual void emitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc = {}) {} /// Emit the absolute difference between two symbols. /// @@ -1030,28 +1030,28 @@ class MCStreamer { virtual void emitCFIWindowSave(); virtual void emitCFINegateRAState(); - virtual void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc()); - virtual void EmitWinCFIEndProc(SMLoc Loc = SMLoc()); + virtual void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc()); + virtual void emitWinCFIEndProc(SMLoc Loc = SMLoc()); /// This is used on platforms, such as Windows on ARM64, that require function /// or funclet sizes to be emitted in .xdata before the End marker is emitted /// for the frame. We cannot use the End marker, as it is not set at the /// point of emitting .xdata, in order to indicate that the frame is active. - virtual void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc()); - virtual void EmitWinCFIStartChained(SMLoc Loc = SMLoc()); - virtual void EmitWinCFIEndChained(SMLoc Loc = SMLoc()); - virtual void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISetFrame(MCRegister Register, unsigned Offset, + virtual void emitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc()); + virtual void emitWinCFIStartChained(SMLoc Loc = SMLoc()); + virtual void emitWinCFIEndChained(SMLoc Loc = SMLoc()); + virtual void emitWinCFIPushReg(MCRegister Register, SMLoc Loc = SMLoc()); + virtual void emitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); - virtual void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISaveReg(MCRegister Register, unsigned Offset, + virtual void emitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc()); + virtual void emitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, + virtual void emitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); - virtual void EmitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc()); - virtual void EmitWinCFIEndProlog(SMLoc Loc = SMLoc()); - virtual void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, + virtual void emitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc()); + virtual void emitWinCFIEndProlog(SMLoc Loc = SMLoc()); + virtual void emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, SMLoc Loc = SMLoc()); - virtual void EmitWinEHHandlerData(SMLoc Loc = SMLoc()); + virtual void emitWinEHHandlerData(SMLoc Loc = SMLoc()); virtual void emitCGProfileEntry(const MCSymbolRefExpr *From, const MCSymbolRefExpr *To, uint64_t Count); diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h index af1ed6faf753c3..386b59ba9fad4d 100644 --- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h @@ -46,14 +46,14 @@ class MCWinCOFFStreamer : public MCObjectStreamer { bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override; void BeginCOFFSymbolDef(MCSymbol const *Symbol) override; - void EmitCOFFSymbolStorageClass(int StorageClass) override; - void EmitCOFFSymbolType(int Type) override; + void emitCOFFSymbolStorageClass(int StorageClass) override; + void emitCOFFSymbolType(int Type) override; void EndCOFFSymbolDef() override; - void EmitCOFFSafeSEH(MCSymbol const *Symbol) override; - void EmitCOFFSymbolIndex(MCSymbol const *Symbol) override; - void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; - void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; - void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; + void emitCOFFSafeSEH(MCSymbol const *Symbol) override; + void emitCOFFSymbolIndex(MCSymbol const *Symbol) override; + void emitCOFFSectionIndex(MCSymbol const *Symbol) override; + void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; + void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -64,7 +64,7 @@ class MCWinCOFFStreamer : public MCObjectStreamer { void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; void emitIdent(StringRef IdentString) override; - void EmitWinEHHandlerData(SMLoc Loc) override; + void emitWinEHHandlerData(SMLoc Loc) override; void emitCGProfileEntry(const MCSymbolRefExpr *From, const MCSymbolRefExpr *To, uint64_t Count) override; void finishImpl() override; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index eeab65a911471e..1921d9350f95a7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1812,10 +1812,10 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction); if (TM.getTargetTriple().isOSBinFormatCOFF()) { OutStreamer->BeginCOFFSymbolDef(Name); - OutStreamer->EmitCOFFSymbolStorageClass( + OutStreamer->emitCOFFSymbolStorageClass( GA.hasLocalLinkage() ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL); - OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); OutStreamer->EndCOFFSymbolDef(); } @@ -2626,7 +2626,7 @@ void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size, bool IsSectionRelative) const { if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { - OutStreamer->EmitCOFFSecRel32(Label, Offset); + OutStreamer->emitCOFFSecRel32(Label, Offset); if (Size > 4) OutStreamer->emitZeros(Size - 4); return; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 2d481a0edced8c..719fec06aa3310 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -158,7 +158,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, if (MAI->needsDwarfSectionOffsetDirective()) { assert(!isDwarf64() && "emitting DWARF64 is not implemented for COFF targets"); - OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0); + OutStreamer->emitCOFFSecRel32(Label, /*Offset=*/0); return; } diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 94166922cbac72..26f500987ffb17 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -224,7 +224,7 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { break; } } - bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes, + bool Success = OS.emitCVFileDirective(NextId, FullPath, ChecksumAsBytes, static_cast(CSKind)); (void)Success; assert(Success && ".cv_file directive failed"); @@ -245,7 +245,7 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt, .SiteFuncId; Site->SiteFuncId = NextFuncId++; - OS.EmitCVInlineSiteIdDirective( + OS.emitCVInlineSiteIdDirective( Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()), InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc()); Site->Inlinee = Inlinee; @@ -1073,9 +1073,9 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, OS.AddComment("PtrNext"); OS.emitInt32(0); OS.AddComment("Thunk section relative address"); - OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); + OS.emitCOFFSecRel32(Fn, /*Offset=*/0); OS.AddComment("Thunk section index"); - OS.EmitCOFFSectionIndex(Fn); + OS.emitCOFFSectionIndex(Fn); OS.AddComment("Code size"); OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2); OS.AddComment("Ordinal"); @@ -1125,7 +1125,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // Emit FPO data, but only on 32-bit x86. No other platforms use it. if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86) - OS.EmitCVFPOData(Fn); + OS.emitCVFPOData(Fn); // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); @@ -1153,9 +1153,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.AddComment("Function type index"); OS.emitInt32(getFuncIdForSubprogram(GV->getSubprogram()).getIndex()); OS.AddComment("Function section relative address"); - OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); + OS.emitCOFFSecRel32(Fn, /*Offset=*/0); OS.AddComment("Function section index"); - OS.EmitCOFFSectionIndex(Fn); + OS.emitCOFFSectionIndex(Fn); OS.AddComment("Flags"); OS.emitInt8(0); // Emit the function display name as a null-terminated string. @@ -1200,9 +1200,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, MCSymbol *Label = Annot.first; MDTuple *Strs = cast(Annot.second); MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION); - OS.EmitCOFFSecRel32(Label, /*Offset=*/0); + OS.emitCOFFSecRel32(Label, /*Offset=*/0); // FIXME: Make sure we don't overflow the max record size. - OS.EmitCOFFSectionIndex(Label); + OS.emitCOFFSectionIndex(Label); OS.emitInt16(Strs->getNumOperands()); for (Metadata *MD : Strs->operands()) { // MDStrings are null terminated, so we can do EmitBytes and get the @@ -1220,9 +1220,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, const DIType *DITy = std::get<2>(HeapAllocSite); MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE); OS.AddComment("Call site offset"); - OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0); + OS.emitCOFFSecRel32(BeginLabel, /*Offset=*/0); OS.AddComment("Call site section index"); - OS.EmitCOFFSectionIndex(BeginLabel); + OS.emitCOFFSectionIndex(BeginLabel); OS.AddComment("Call instruction length"); OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2); OS.AddComment("Type index"); @@ -1512,7 +1512,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { // FIXME: Set GuardCfg when it is implemented. CurFn->FrameProcOpts = FPO; - OS.EmitCVFuncIdDirective(CurFn->FuncId); + OS.emitCVFuncIdDirective(CurFn->FuncId); // Find the end of the function prolog. First known non-DBG_VALUE and // non-frame setup location marks the beginning of the function body. @@ -2884,9 +2884,9 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, OS.AddComment("Code size"); OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size OS.AddComment("Function section relative address"); - OS.EmitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset + OS.emitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset OS.AddComment("Function section index"); - OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol + OS.emitCOFFSectionIndex(FI.Begin); // Func Symbol OS.AddComment("Lexical block name"); emitNullTerminatedSymbolName(OS, Block.Name); // Name endSymbolRecord(RecordEnd); @@ -3375,10 +3375,10 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end()) // Use the offset seen while collecting info on globals. Offset = CVGlobalVariableOffsets[DIGV]; - OS.EmitCOFFSecRel32(GVSym, Offset); + OS.emitCOFFSecRel32(GVSym, Offset); OS.AddComment("Segment"); - OS.EmitCOFFSectionIndex(GVSym); + OS.emitCOFFSectionIndex(GVSym); OS.AddComment("Name"); const unsigned LengthOfDataRecord = 12; emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord); diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index 20375f049f4208..6204a79c82c6e6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -109,17 +109,17 @@ void WinCFGuard::endModule() { auto &OS = *Asm->OutStreamer; OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); for (const MCSymbol *S : GFIDsEntries) - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); // Emit the symbol index of each GIATs entry to form the .giats section. OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection()); for (const MCSymbol *S : GIATsEntries) { - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); } // Emit the symbol index of each longjmp target to form the .gljmp section. OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); for (const MCSymbol *S : LongjmpTargets) { - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index 5e500cf5fccc8d..e00cd83cbead34 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -49,13 +49,13 @@ void WinException::endModule() { const Module *M = MMI->getModule(); for (const Function &F : *M) if (F.hasFnAttribute("safeseh")) - OS.EmitCOFFSafeSEH(Asm->getSymbol(&F)); + OS.emitCOFFSafeSEH(Asm->getSymbol(&F)); if (M->getModuleFlag("ehcontguard") && !EHContTargets.empty()) { // Emit the symbol index of each ehcont target. OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection()); for (const MCSymbol *S : EHContTargets) { - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); } } } @@ -116,7 +116,7 @@ void WinException::beginFunction(const MachineFunction *MF) { void WinException::markFunctionEnd() { if (isAArch64 && CurrentFuncletEntry && (shouldEmitMoves || shouldEmitPersonality)) - Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd(); } /// endFunction - Gather and emit post-function exception information. @@ -206,8 +206,8 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // Describe our funclet symbol as a function with internal linkage. Asm->OutStreamer->BeginCOFFSymbolDef(Sym); - Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); - Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + Asm->OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + Asm->OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); Asm->OutStreamer->EndCOFFSymbolDef(); @@ -223,7 +223,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // Mark 'Sym' as starting our funclet. if (shouldEmitMoves || shouldEmitPersonality) { CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly(); - Asm->OutStreamer->EmitWinCFIStartProc(Sym); + Asm->OutStreamer->emitWinCFIStartProc(Sym); } if (shouldEmitPersonality) { @@ -242,7 +242,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // inliner doesn't allow inlining them, this isn't a major problem in // practice. if (!CurrentFuncletEntry->isCleanupFuncletEntry()) - Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); + Asm->OutStreamer->emitWinEHHandler(PersHandlerSym, true, true); } } @@ -250,7 +250,7 @@ void WinException::endFunclet() { if (isAArch64 && CurrentFuncletEntry && (shouldEmitMoves || shouldEmitPersonality)) { Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); - Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd(); } endFuncletImpl(); } @@ -270,7 +270,7 @@ void WinException::endFuncletImpl() { if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality && !CurrentFuncletEntry->isCleanupFuncletEntry()) { // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + Asm->OutStreamer->emitWinEHHandlerData(); // If this is a C++ catch funclet (or the parent function), // emit a reference to the LSDA for the parent function. @@ -281,14 +281,14 @@ void WinException::endFuncletImpl() { } else if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets() && !CurrentFuncletEntry->isEHFuncletEntry()) { // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + Asm->OutStreamer->emitWinEHHandlerData(); // If this is the parent function in Win64 SEH, emit the LSDA immediately // following .seh_handlerdata. emitCSpecificHandlerTable(MF); } else if (shouldEmitPersonality || shouldEmitLSDA) { // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + Asm->OutStreamer->emitWinEHHandlerData(); // In these cases, no further info is written to the .xdata section // right here, but is written by e.g. emitExceptionTable in endFunction() // above. @@ -302,7 +302,7 @@ void WinException::endFuncletImpl() { // writing to .xdata, and emit an .seh_endproc directive to mark the end of // the function. Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); - Asm->OutStreamer->EmitWinCFIEndProc(); + Asm->OutStreamer->emitWinCFIEndProc(); } // Let's make sure we don't try to end the same funclet twice. diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 0f320bd07a751b..9042e2972aab77 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -180,14 +180,14 @@ class MCAsmStreamer final : public MCStreamer { void emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override; void BeginCOFFSymbolDef(const MCSymbol *Symbol) override; - void EmitCOFFSymbolStorageClass(int StorageClass) override; - void EmitCOFFSymbolType(int Type) override; + void emitCOFFSymbolStorageClass(int StorageClass) override; + void emitCOFFSymbolType(int Type) override; void EndCOFFSymbolDef() override; - void EmitCOFFSafeSEH(MCSymbol const *Symbol) override; - void EmitCOFFSymbolIndex(MCSymbol const *Symbol) override; - void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; - void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; - void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; + void emitCOFFSafeSEH(MCSymbol const *Symbol) override; + void emitCOFFSymbolIndex(MCSymbol const *Symbol) override; + void emitCOFFSectionIndex(MCSymbol const *Symbol) override; + void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; + void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; void emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size, MCSymbol *CsectSym, unsigned ByteAlign) override; @@ -277,11 +277,11 @@ class MCAsmStreamer final : public MCStreamer { StringRef FileName) override; MCSymbol *getDwarfLineTableSymbol(unsigned CUID) override; - bool EmitCVFileDirective(unsigned FileNo, StringRef Filename, + bool emitCVFileDirective(unsigned FileNo, StringRef Filename, ArrayRef Checksum, unsigned ChecksumKind) override; - bool EmitCVFuncIdDirective(unsigned FuncId) override; - bool EmitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, + bool emitCVFuncIdDirective(unsigned FuncId) override; + bool emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, unsigned IAFile, unsigned IALine, unsigned IACol, SMLoc Loc) override; void emitCVLocDirective(unsigned FunctionId, unsigned FileNo, unsigned Line, @@ -317,7 +317,7 @@ class MCAsmStreamer final : public MCStreamer { void emitCVStringTableDirective() override; void emitCVFileChecksumsDirective() override; void emitCVFileChecksumOffsetDirective(unsigned FileNo) override; - void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) override; + void emitCVFPOData(const MCSymbol *ProcSym, SMLoc L) override; void emitIdent(StringRef IdentString) override; void emitCFIBKeyFrame() override; @@ -345,25 +345,25 @@ class MCAsmStreamer final : public MCStreamer { void emitCFINegateRAState() override; void emitCFIReturnColumn(int64_t Register) override; - void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override; - void EmitWinCFIEndProc(SMLoc Loc) override; - void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) override; - void EmitWinCFIStartChained(SMLoc Loc) override; - void EmitWinCFIEndChained(SMLoc Loc) override; - void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) override; - void EmitWinCFISetFrame(MCRegister Register, unsigned Offset, + void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override; + void emitWinCFIEndProc(SMLoc Loc) override; + void emitWinCFIFuncletOrFuncEnd(SMLoc Loc) override; + void emitWinCFIStartChained(SMLoc Loc) override; + void emitWinCFIEndChained(SMLoc Loc) override; + void emitWinCFIPushReg(MCRegister Register, SMLoc Loc) override; + void emitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) override; - void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) override; - void EmitWinCFISaveReg(MCRegister Register, unsigned Offset, + void emitWinCFIAllocStack(unsigned Size, SMLoc Loc) override; + void emitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) override; - void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, + void emitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) override; - void EmitWinCFIPushFrame(bool Code, SMLoc Loc) override; - void EmitWinCFIEndProlog(SMLoc Loc) override; + void emitWinCFIPushFrame(bool Code, SMLoc Loc) override; + void emitWinCFIEndProlog(SMLoc Loc) override; - void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, + void emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, SMLoc Loc) override; - void EmitWinEHHandlerData(SMLoc Loc) override; + void emitWinEHHandlerData(SMLoc Loc) override; void emitCGProfileEntry(const MCSymbolRefExpr *From, const MCSymbolRefExpr *To, uint64_t Count) override; @@ -797,12 +797,12 @@ void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { EmitEOL(); } -void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) { +void MCAsmStreamer::emitCOFFSymbolStorageClass(int StorageClass) { OS << "\t.scl\t" << StorageClass << ';'; EmitEOL(); } -void MCAsmStreamer::EmitCOFFSymbolType (int Type) { +void MCAsmStreamer::emitCOFFSymbolType(int Type) { OS << "\t.type\t" << Type << ';'; EmitEOL(); } @@ -812,25 +812,25 @@ void MCAsmStreamer::EndCOFFSymbolDef() { EmitEOL(); } -void MCAsmStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { +void MCAsmStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) { OS << "\t.safeseh\t"; Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { +void MCAsmStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) { OS << "\t.symidx\t"; Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { +void MCAsmStreamer::emitCOFFSectionIndex(MCSymbol const *Symbol) { OS << "\t.secidx\t"; Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) { +void MCAsmStreamer::emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) { OS << "\t.secrel32\t"; Symbol->print(OS, MAI); if (Offset != 0) @@ -838,7 +838,7 @@ void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) { EmitEOL(); } -void MCAsmStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) { +void MCAsmStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) { OS << "\t.rva\t"; Symbol->print(OS, MAI); if (Offset > 0) @@ -1654,7 +1654,7 @@ MCSymbol *MCAsmStreamer::getDwarfLineTableSymbol(unsigned CUID) { return MCStreamer::getDwarfLineTableSymbol(0); } -bool MCAsmStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename, +bool MCAsmStreamer::emitCVFileDirective(unsigned FileNo, StringRef Filename, ArrayRef Checksum, unsigned ChecksumKind) { if (!getContext().getCVContext().addFile(*this, FileNo, Filename, Checksum, @@ -1677,19 +1677,19 @@ bool MCAsmStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename, return true; } -bool MCAsmStreamer::EmitCVFuncIdDirective(unsigned FuncId) { +bool MCAsmStreamer::emitCVFuncIdDirective(unsigned FuncId) { OS << "\t.cv_func_id " << FuncId << '\n'; - return MCStreamer::EmitCVFuncIdDirective(FuncId); + return MCStreamer::emitCVFuncIdDirective(FuncId); } -bool MCAsmStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId, +bool MCAsmStreamer::emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, unsigned IAFile, unsigned IALine, unsigned IACol, SMLoc Loc) { OS << "\t.cv_inline_site_id " << FunctionId << " within " << IAFunc << " inlined_at " << IAFile << ' ' << IALine << ' ' << IACol << '\n'; - return MCStreamer::EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, + return MCStreamer::emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, IALine, IACol, Loc); } @@ -1806,7 +1806,7 @@ void MCAsmStreamer::emitCVFileChecksumOffsetDirective(unsigned FileNo) { EmitEOL(); } -void MCAsmStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) { +void MCAsmStreamer::emitCVFPOData(const MCSymbol *ProcSym, SMLoc L) { OS << "\t.cv_fpo_data\t"; ProcSym->print(OS, MAI); EmitEOL(); @@ -2027,45 +2027,45 @@ void MCAsmStreamer::emitCFIBKeyFrame() { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { - MCStreamer::EmitWinCFIStartProc(Symbol, Loc); +void MCAsmStreamer::emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { + MCStreamer::emitWinCFIStartProc(Symbol, Loc); OS << ".seh_proc "; Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndProc(SMLoc Loc) { - MCStreamer::EmitWinCFIEndProc(Loc); +void MCAsmStreamer::emitWinCFIEndProc(SMLoc Loc) { + MCStreamer::emitWinCFIEndProc(Loc); OS << "\t.seh_endproc"; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) { - MCStreamer::EmitWinCFIFuncletOrFuncEnd(Loc); +void MCAsmStreamer::emitWinCFIFuncletOrFuncEnd(SMLoc Loc) { + MCStreamer::emitWinCFIFuncletOrFuncEnd(Loc); OS << "\t.seh_endfunclet"; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIStartChained(SMLoc Loc) { - MCStreamer::EmitWinCFIStartChained(Loc); +void MCAsmStreamer::emitWinCFIStartChained(SMLoc Loc) { + MCStreamer::emitWinCFIStartChained(Loc); OS << "\t.seh_startchained"; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndChained(SMLoc Loc) { - MCStreamer::EmitWinCFIEndChained(Loc); +void MCAsmStreamer::emitWinCFIEndChained(SMLoc Loc) { + MCStreamer::emitWinCFIEndChained(Loc); OS << "\t.seh_endchained"; EmitEOL(); } -void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, +void MCAsmStreamer::emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, SMLoc Loc) { - MCStreamer::EmitWinEHHandler(Sym, Unwind, Except, Loc); + MCStreamer::emitWinEHHandler(Sym, Unwind, Except, Loc); OS << "\t.seh_handler "; Sym->print(OS, MAI); @@ -2076,8 +2076,8 @@ void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, EmitEOL(); } -void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) { - MCStreamer::EmitWinEHHandlerData(Loc); +void MCAsmStreamer::emitWinEHHandlerData(SMLoc Loc) { + MCStreamer::emitWinEHHandlerData(Loc); // Switch sections. Don't call SwitchSection directly, because that will // cause the section switch to be visible in the emitted assembly. @@ -2098,17 +2098,17 @@ void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { - MCStreamer::EmitWinCFIPushReg(Register, Loc); +void MCAsmStreamer::emitWinCFIPushReg(MCRegister Register, SMLoc Loc) { + MCStreamer::emitWinCFIPushReg(Register, Loc); OS << "\t.seh_pushreg "; InstPrinter->printRegName(OS, Register); EmitEOL(); } -void MCAsmStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, +void MCAsmStreamer::emitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) { - MCStreamer::EmitWinCFISetFrame(Register, Offset, Loc); + MCStreamer::emitWinCFISetFrame(Register, Offset, Loc); OS << "\t.seh_setframe "; InstPrinter->printRegName(OS, Register); @@ -2116,16 +2116,16 @@ void MCAsmStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, EmitEOL(); } -void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { - MCStreamer::EmitWinCFIAllocStack(Size, Loc); +void MCAsmStreamer::emitWinCFIAllocStack(unsigned Size, SMLoc Loc) { + MCStreamer::emitWinCFIAllocStack(Size, Loc); OS << "\t.seh_stackalloc " << Size; EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, +void MCAsmStreamer::emitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) { - MCStreamer::EmitWinCFISaveReg(Register, Offset, Loc); + MCStreamer::emitWinCFISaveReg(Register, Offset, Loc); OS << "\t.seh_savereg "; InstPrinter->printRegName(OS, Register); @@ -2133,9 +2133,9 @@ void MCAsmStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, +void MCAsmStreamer::emitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) { - MCStreamer::EmitWinCFISaveXMM(Register, Offset, Loc); + MCStreamer::emitWinCFISaveXMM(Register, Offset, Loc); OS << "\t.seh_savexmm "; InstPrinter->printRegName(OS, Register); @@ -2143,8 +2143,8 @@ void MCAsmStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, EmitEOL(); } -void MCAsmStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { - MCStreamer::EmitWinCFIPushFrame(Code, Loc); +void MCAsmStreamer::emitWinCFIPushFrame(bool Code, SMLoc Loc) { + MCStreamer::emitWinCFIPushFrame(Code, Loc); OS << "\t.seh_pushframe"; if (Code) @@ -2152,8 +2152,8 @@ void MCAsmStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndProlog(SMLoc Loc) { - MCStreamer::EmitWinCFIEndProlog(Loc); +void MCAsmStreamer::emitWinCFIEndProlog(SMLoc Loc) { + MCStreamer::emitWinCFIEndProlog(Loc); OS << "\t.seh_endprologue"; EmitEOL(); diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp index d676f1094fbe9b..375d54696cb26c 100644 --- a/llvm/lib/MC/MCCodeView.cpp +++ b/llvm/lib/MC/MCCodeView.cpp @@ -335,8 +335,8 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS, OS.emitInt32(uint32_t(DebugSubsectionKind::Lines)); OS.emitAbsoluteSymbolDiff(LineEnd, LineBegin, 4); OS.emitLabel(LineBegin); - OS.EmitCOFFSecRel32(FuncBegin, /*Offset=*/0); - OS.EmitCOFFSectionIndex(FuncBegin); + OS.emitCOFFSecRel32(FuncBegin, /*Offset=*/0); + OS.emitCOFFSectionIndex(FuncBegin); // Actual line info. std::vector Locs = getFunctionLineEntries(FuncId); diff --git a/llvm/lib/MC/MCNullStreamer.cpp b/llvm/lib/MC/MCNullStreamer.cpp index 8de79ca1c77e6e..2af01081200aae 100644 --- a/llvm/lib/MC/MCNullStreamer.cpp +++ b/llvm/lib/MC/MCNullStreamer.cpp @@ -43,8 +43,8 @@ namespace { SMLoc Loc = SMLoc()) override {} void emitGPRel32Value(const MCExpr *Value) override {} void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {} - void EmitCOFFSymbolStorageClass(int StorageClass) override {} - void EmitCOFFSymbolType(int Type) override {} + void emitCOFFSymbolStorageClass(int StorageClass) override {} + void emitCOFFSymbolType(int Type) override {} void EndCOFFSymbolDef() override {} void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, MCSymbolAttr Linkage, diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 8869faef30dfee..7b98e434360ff9 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -3760,7 +3760,7 @@ bool AsmParser::parseDirectiveCVFile() { ArrayRef ChecksumAsBytes(reinterpret_cast(CKMem), Checksum.size()); - if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes, + if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes, static_cast(ChecksumKind))) return Error(FileNumberLoc, "file number already allocated"); @@ -3801,7 +3801,7 @@ bool AsmParser::parseDirectiveCVFuncId() { "unexpected token in '.cv_func_id' directive")) return true; - if (!getStreamer().EmitCVFuncIdDirective(FunctionId)) + if (!getStreamer().emitCVFuncIdDirective(FunctionId)) return Error(FunctionIdLoc, "function id already allocated"); return false; @@ -3861,7 +3861,7 @@ bool AsmParser::parseDirectiveCVInlineSiteId() { "unexpected token in '.cv_inline_site_id' directive")) return true; - if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, + if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, IALine, IACol, FunctionIdLoc)) return Error(FunctionIdLoc, "function id already allocated"); @@ -4159,7 +4159,7 @@ bool AsmParser::parseDirectiveCVFPOData() { if (parseEOL()) return true; MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); - getStreamer().EmitCVFPOData(ProcSym, DirLoc); + getStreamer().emitCVFPOData(ProcSym, DirLoc); return false; } diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp index 824ac4188b1866..9b3b8708fdf561 100644 --- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp @@ -431,7 +431,7 @@ bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitCOFFSymbolStorageClass(SymbolStorageClass); + getStreamer().emitCOFFSymbolStorageClass(SymbolStorageClass); return false; } @@ -444,7 +444,7 @@ bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitCOFFSymbolType(Type); + getStreamer().emitCOFFSymbolType(Type); return false; } @@ -479,7 +479,7 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitCOFFSecRel32(Symbol, Offset); + getStreamer().emitCOFFSecRel32(Symbol, Offset); return false; } @@ -505,7 +505,7 @@ bool COFFAsmParser::ParseDirectiveRVA(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - getStreamer().EmitCOFFImgRel32(Symbol, Offset); + getStreamer().emitCOFFImgRel32(Symbol, Offset); return false; }; @@ -525,7 +525,7 @@ bool COFFAsmParser::ParseDirectiveSafeSEH(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitCOFFSafeSEH(Symbol); + getStreamer().emitCOFFSafeSEH(Symbol); return false; } @@ -540,7 +540,7 @@ bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitCOFFSectionIndex(Symbol); + getStreamer().emitCOFFSectionIndex(Symbol); return false; } @@ -555,7 +555,7 @@ bool COFFAsmParser::ParseDirectiveSymIdx(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitCOFFSymbolIndex(Symbol); + getStreamer().emitCOFFSymbolIndex(Symbol); return false; } @@ -618,31 +618,31 @@ bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc Loc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitWinCFIStartProc(Symbol, Loc); + getStreamer().emitWinCFIStartProc(Symbol, Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIEndProc(Loc); + getStreamer().emitWinCFIEndProc(Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveEndFuncletOrFunc(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIFuncletOrFuncEnd(Loc); + getStreamer().emitWinCFIFuncletOrFuncEnd(Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIStartChained(Loc); + getStreamer().emitWinCFIStartChained(Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIEndChained(Loc); + getStreamer().emitWinCFIEndChained(Loc); return false; } @@ -668,13 +668,13 @@ bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc Loc) { MCSymbol *handler = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitWinEHHandler(handler, unwind, except, Loc); + getStreamer().emitWinEHHandler(handler, unwind, except, Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinEHHandlerData(); + getStreamer().emitWinEHHandlerData(); return false; } @@ -687,13 +687,13 @@ bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWinCFIAllocStack(Size, Loc); + getStreamer().emitWinCFIAllocStack(Size, Loc); return false; } bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIEndProlog(Loc); + getStreamer().emitWinCFIEndProlog(Loc); return false; } diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp index 022090fe1ca96b..0653808a190ba9 100644 --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -336,7 +336,7 @@ bool COFFMasmParser::ParseDirectiveProc(StringRef Directive, SMLoc Loc) { getTok().getString().equals_insensitive("frame")) { Lex(); Framed = true; - getStreamer().EmitWinCFIStartProc(Sym, Loc); + getStreamer().emitWinCFIStartProc(Sym, Loc); } getStreamer().emitLabel(Sym, Loc); @@ -357,7 +357,7 @@ bool COFFMasmParser::ParseDirectiveEndProc(StringRef Directive, SMLoc Loc) { CurrentProcedure + "'"); if (CurrentProcedureFramed) { - getStreamer().EmitWinCFIEndProc(Loc); + getStreamer().emitWinCFIEndProc(Loc); } CurrentProcedure = ""; CurrentProcedureFramed = false; @@ -391,13 +391,13 @@ bool COFFMasmParser::ParseSEHDirectiveAllocStack(StringRef Directive, return Error(SizeLoc, "expected integer size"); if (Size % 8 != 0) return Error(SizeLoc, "stack size must be a multiple of 8"); - getStreamer().EmitWinCFIAllocStack(static_cast(Size), Loc); + getStreamer().emitWinCFIAllocStack(static_cast(Size), Loc); return false; } bool COFFMasmParser::ParseSEHDirectiveEndProlog(StringRef Directive, SMLoc Loc) { - getStreamer().EmitWinCFIEndProlog(Loc); + getStreamer().emitWinCFIEndProlog(Loc); return false; } diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 0ba1076f20cf19..96bb27661440ca 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -5094,7 +5094,7 @@ bool MasmParser::parseDirectiveCVFile() { ArrayRef ChecksumAsBytes(reinterpret_cast(CKMem), Checksum.size()); - if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes, + if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes, static_cast(ChecksumKind))) return Error(FileNumberLoc, "file number already allocated"); @@ -5135,7 +5135,7 @@ bool MasmParser::parseDirectiveCVFuncId() { "unexpected token in '.cv_func_id' directive")) return true; - if (!getStreamer().EmitCVFuncIdDirective(FunctionId)) + if (!getStreamer().emitCVFuncIdDirective(FunctionId)) return Error(FunctionIdLoc, "function id already allocated"); return false; @@ -5195,7 +5195,7 @@ bool MasmParser::parseDirectiveCVInlineSiteId() { "unexpected token in '.cv_inline_site_id' directive")) return true; - if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, + if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, IALine, IACol, FunctionIdLoc)) return Error(FunctionIdLoc, "function id already allocated"); @@ -5495,7 +5495,7 @@ bool MasmParser::parseDirectiveCVFPOData() { if (parseEOL("unexpected tokens")) return addErrorSuffix(" in '.cv_fpo_data' directive"); MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); - getStreamer().EmitCVFPOData(ProcSym, DirLoc); + getStreamer().emitCVFPOData(ProcSym, DirLoc); return false; } diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 700f273513b30d..1fe60fa410703a 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -187,7 +187,7 @@ void MCStreamer::emitSymbolValue(const MCSymbol *Sym, unsigned Size, if (!IsSectionRelative) emitValueImpl(MCSymbolRefExpr::create(Sym, getContext()), Size); else - EmitCOFFSecRel32(Sym, /*Offset=*/0); + emitCOFFSecRel32(Sym, /*Offset=*/0); } void MCStreamer::emitDTPRel64Value(const MCExpr *Value) { @@ -284,18 +284,18 @@ MCDwarfFrameInfo *MCStreamer::getCurrentDwarfFrameInfo() { return &DwarfFrameInfos.back(); } -bool MCStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename, +bool MCStreamer::emitCVFileDirective(unsigned FileNo, StringRef Filename, ArrayRef Checksum, unsigned ChecksumKind) { return getContext().getCVContext().addFile(*this, FileNo, Filename, Checksum, ChecksumKind); } -bool MCStreamer::EmitCVFuncIdDirective(unsigned FunctionId) { +bool MCStreamer::emitCVFuncIdDirective(unsigned FunctionId) { return getContext().getCVContext().recordFunctionId(FunctionId); } -bool MCStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId, +bool MCStreamer::emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc, unsigned IAFile, unsigned IALine, unsigned IACol, SMLoc Loc) { @@ -699,7 +699,7 @@ WinEH::FrameInfo *MCStreamer::EnsureValidWinFrameInfo(SMLoc Loc) { return CurrentWinFrameInfo; } -void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { +void MCStreamer::emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { const MCAsmInfo *MAI = Context.getAsmInfo(); if (!MAI->usesWindowsCFI()) return getContext().reportError( @@ -717,7 +717,7 @@ void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); } -void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) { +void MCStreamer::emitWinCFIEndProc(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -731,11 +731,11 @@ void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) { for (size_t I = CurrentProcWinFrameInfoStartIndex, E = WinFrameInfos.size(); I != E; ++I) - EmitWindowsUnwindTables(WinFrameInfos[I].get()); + emitWindowsUnwindTables(WinFrameInfos[I].get()); SwitchSection(CurFrame->TextSection); } -void MCStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) { +void MCStreamer::emitWinCFIFuncletOrFuncEnd(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -746,7 +746,7 @@ void MCStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) { CurFrame->FuncletOrFuncEnd = Label; } -void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) { +void MCStreamer::emitWinCFIStartChained(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -759,7 +759,7 @@ void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) { CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); } -void MCStreamer::EmitWinCFIEndChained(SMLoc Loc) { +void MCStreamer::emitWinCFIEndChained(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -773,7 +773,7 @@ void MCStreamer::EmitWinCFIEndChained(SMLoc Loc) { CurrentWinFrameInfo = const_cast(CurFrame->ChainedParent); } -void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, +void MCStreamer::emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -790,7 +790,7 @@ void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, CurFrame->HandlesExceptions = true; } -void MCStreamer::EmitWinEHHandlerData(SMLoc Loc) { +void MCStreamer::emitWinEHHandlerData(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -854,7 +854,7 @@ static unsigned encodeSEHRegNum(MCContext &Ctx, MCRegister Reg) { return Ctx.getRegisterInfo()->getSEHRegNum(Reg); } -void MCStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { +void MCStreamer::emitWinCFIPushReg(MCRegister Register, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -866,7 +866,7 @@ void MCStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, +void MCStreamer::emitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -888,7 +888,7 @@ void MCStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { +void MCStreamer::emitWinCFIAllocStack(unsigned Size, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -905,7 +905,7 @@ void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, +void MCStreamer::emitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -922,7 +922,7 @@ void MCStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, +void MCStreamer::emitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -937,7 +937,7 @@ void MCStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { +void MCStreamer::emitWinCFIPushFrame(bool Code, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -951,7 +951,7 @@ void MCStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) { +void MCStreamer::emitWinCFIEndProlog(SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; @@ -961,15 +961,15 @@ void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) { CurFrame->PrologEnd = Label; } -void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {} +void MCStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) {} -void MCStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {} +void MCStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) {} -void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {} +void MCStreamer::emitCOFFSectionIndex(MCSymbol const *Symbol) {} -void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {} +void MCStreamer::emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {} -void MCStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {} +void MCStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {} /// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is @@ -988,11 +988,9 @@ void MCStreamer::emitRawText(const Twine &T) { emitRawTextImpl(T.toStringRef(Str)); } -void MCStreamer::EmitWindowsUnwindTables() { -} +void MCStreamer::emitWindowsUnwindTables() {} -void MCStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) { -} +void MCStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) {} void MCStreamer::Finish(SMLoc EndLoc) { if ((!DwarfFrameInfos.empty() && !DwarfFrameInfos.back().End) || @@ -1156,10 +1154,10 @@ void MCStreamer::emitFileDirective(StringRef Filename) {} void MCStreamer::emitFileDirective(StringRef Filename, StringRef CompilerVerion, StringRef TimeStamp, StringRef Description) { } -void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { +void MCStreamer::emitCOFFSymbolStorageClass(int StorageClass) { llvm_unreachable("this directive only supported on COFF targets"); } -void MCStreamer::EmitCOFFSymbolType(int Type) { +void MCStreamer::emitCOFFSymbolType(int Type) { llvm_unreachable("this directive only supported on COFF targets"); } void MCStreamer::emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size, diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp index bf0d7d2931e0a0..0fcc7f5170b94c 100644 --- a/llvm/lib/MC/MCWinCOFFStreamer.cpp +++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp @@ -141,7 +141,7 @@ void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *S) { CurSymbol = Symbol; } -void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { +void MCWinCOFFStreamer::emitCOFFSymbolStorageClass(int StorageClass) { if (!CurSymbol) { Error("storage class specified outside of symbol definition"); return; @@ -157,7 +157,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { cast(CurSymbol)->setClass((uint16_t)StorageClass); } -void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) { +void MCWinCOFFStreamer::emitCOFFSymbolType(int Type) { if (!CurSymbol) { Error("symbol type specified outside of a symbol definition"); return; @@ -178,7 +178,7 @@ void MCWinCOFFStreamer::EndCOFFSymbolDef() { CurSymbol = nullptr; } -void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { +void MCWinCOFFStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) { // SafeSEH is a feature specific to 32-bit x86. It does not exist (and is // unnecessary) on all platforms which use table-based exception dispatch. if (getContext().getTargetTriple().getArch() != Triple::x86) @@ -204,7 +204,7 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { << COFF::SCT_COMPLEX_TYPE_SHIFT); } -void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { +void MCWinCOFFStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) { MCSection *Sec = getCurrentSectionOnly(); getAssembler().registerSection(*Sec); if (Sec->getAlignment() < 4) @@ -215,7 +215,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { getAssembler().registerSymbol(*Symbol); } -void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) { +void MCWinCOFFStreamer::emitCOFFSectionIndex(const MCSymbol *Symbol) { visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext()); @@ -224,7 +224,7 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) { DF->getContents().resize(DF->getContents().size() + 2, 0); } -void MCWinCOFFStreamer::EmitCOFFSecRel32(const MCSymbol *Symbol, +void MCWinCOFFStreamer::emitCOFFSecRel32(const MCSymbol *Symbol, uint64_t Offset) { visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); @@ -242,7 +242,7 @@ void MCWinCOFFStreamer::EmitCOFFSecRel32(const MCSymbol *Symbol, DF->getContents().resize(DF->getContents().size() + 4, 0); } -void MCWinCOFFStreamer::EmitCOFFImgRel32(const MCSymbol *Symbol, +void MCWinCOFFStreamer::emitCOFFImgRel32(const MCSymbol *Symbol, int64_t Offset) { visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); @@ -333,7 +333,7 @@ void MCWinCOFFStreamer::emitIdent(StringRef IdentString) { llvm_unreachable("not implemented"); } -void MCWinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { +void MCWinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) { llvm_unreachable("not implemented"); } diff --git a/llvm/lib/Object/RecordStreamer.h b/llvm/lib/Object/RecordStreamer.h index 957d80f33bf4fc..80ff5f04441645 100644 --- a/llvm/lib/Object/RecordStreamer.h +++ b/llvm/lib/Object/RecordStreamer.h @@ -58,8 +58,8 @@ class RecordStreamer : public MCStreamer { // but the default implementation of these methods crashes, so we override // them with versions that do nothing. void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {} - void EmitCOFFSymbolStorageClass(int StorageClass) override {} - void EmitCOFFSymbolType(int Type) override {} + void emitCOFFSymbolStorageClass(int StorageClass) override {} + void emitCOFFSymbolType(int Type) override {} void EndCOFFSymbolDef() override {} /// Record .symver aliases for later processing. diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index de71c9318948e7..eadc792be3d08e 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -144,8 +144,8 @@ class AArch64AsmPrinter : public AsmPrinter { COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; OutStreamer->BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer->EmitCOFFSymbolStorageClass(Scl); - OutStreamer->EmitCOFFSymbolType(Type); + OutStreamer->emitCOFFSymbolStorageClass(Scl); + OutStreamer->emitCOFFSymbolType(Type); OutStreamer->EndCOFFSymbolDef(); } @@ -205,8 +205,8 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { // compiler features bitfield read by link.exe. MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00")); OutStreamer->BeginCOFFSymbolDef(S); - OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); - OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL); + OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL); OutStreamer->EndCOFFSymbolDef(); int64_t Feat00Flags = 0; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp index cad262077e5acc..975ed41809d741 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -27,14 +27,14 @@ class AArch64WinCOFFStreamer : public MCWinCOFFStreamer { std::unique_ptr OW) : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {} - void EmitWinEHHandlerData(SMLoc Loc) override; - void EmitWindowsUnwindTables() override; - void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; + void emitWinEHHandlerData(SMLoc Loc) override; + void emitWindowsUnwindTables() override; + void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; void finishImpl() override; }; -void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { - MCStreamer::EmitWinEHHandlerData(Loc); +void AArch64WinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) { + MCStreamer::emitWinEHHandlerData(Loc); // We have to emit the unwind info now, because this directive // actually switches to the .xdata section! @@ -42,11 +42,11 @@ void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { /* HandlerData = */ true); } -void AArch64WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) { +void AArch64WinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) { EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false); } -void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() { +void AArch64WinCOFFStreamer::emitWindowsUnwindTables() { if (!getNumWinFrameInfos()) return; EHStreamer.Emit(*this); @@ -54,7 +54,7 @@ void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() { void AArch64WinCOFFStreamer::finishImpl() { emitFrames(nullptr); - EmitWindowsUnwindTables(); + emitWindowsUnwindTables(); MCWinCOFFStreamer::finishImpl(); } diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 0874bae7d46147..e7b1aeb6825c17 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -162,8 +162,8 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; OutStreamer->BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer->EmitCOFFSymbolStorageClass(Scl); - OutStreamer->EmitCOFFSymbolType(Type); + OutStreamer->emitCOFFSymbolStorageClass(Scl); + OutStreamer->emitCOFFSymbolType(Type); OutStreamer->EndCOFFSymbolDef(); } diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 31e4744ca9ec4b..a21ed9bcede40e 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -5097,7 +5097,7 @@ bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFIPushReg(Reg, Loc); + getStreamer().emitWinCFIPushReg(Reg, Loc); return false; } @@ -5117,7 +5117,7 @@ bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFISetFrame(Reg, Off, Loc); + getStreamer().emitWinCFISetFrame(Reg, Off, Loc); return false; } @@ -5137,7 +5137,7 @@ bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFISaveReg(Reg, Off, Loc); + getStreamer().emitWinCFISaveReg(Reg, Off, Loc); return false; } @@ -5157,7 +5157,7 @@ bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc); + getStreamer().emitWinCFISaveXMM(Reg, Off, Loc); return false; } @@ -5178,7 +5178,7 @@ bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) { return TokError("unexpected token in directive"); getParser().Lex(); - getStreamer().EmitWinCFIPushFrame(Code, Loc); + getStreamer().emitWinCFIPushFrame(Code, Loc); return false; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index a319f6948ee96f..36945d1f67468f 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -26,15 +26,15 @@ class X86WinCOFFStreamer : public MCWinCOFFStreamer { std::unique_ptr OW) : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {} - void EmitWinEHHandlerData(SMLoc Loc) override; - void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; - void EmitWindowsUnwindTables() override; - void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override; + void emitWinEHHandlerData(SMLoc Loc) override; + void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; + void emitWindowsUnwindTables() override; + void emitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override; void finishImpl() override; }; -void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { - MCStreamer::EmitWinEHHandlerData(Loc); +void X86WinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) { + MCStreamer::emitWinEHHandlerData(Loc); // We have to emit the unwind info now, because this directive // actually switches to the .xdata section. @@ -42,17 +42,17 @@ void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { EHStreamer.EmitUnwindInfo(*this, CurFrame, /* HandlerData = */ true); } -void X86WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) { +void X86WinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) { EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false); } -void X86WinCOFFStreamer::EmitWindowsUnwindTables() { +void X86WinCOFFStreamer::emitWindowsUnwindTables() { if (!getNumWinFrameInfos()) return; EHStreamer.Emit(*this); } -void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) { +void X86WinCOFFStreamer::emitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) { X86TargetStreamer *XTS = static_cast(getTargetStreamer()); XTS->emitFPOData(ProcSym, Loc); @@ -60,7 +60,7 @@ void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) { void X86WinCOFFStreamer::finishImpl() { emitFrames(nullptr); - EmitWindowsUnwindTables(); + emitWindowsUnwindTables(); MCWinCOFFStreamer::finishImpl(); } diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 46f258a7163b48..7e2819f952379c 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -71,10 +71,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (Subtarget->isTargetCOFF()) { bool Local = MF.getFunction().hasLocalLinkage(); OutStreamer->BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer->EmitCOFFSymbolStorageClass( + OutStreamer->emitCOFFSymbolStorageClass( Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL); - OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION - << COFF::SCT_COMPLEX_TYPE_SHIFT); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); OutStreamer->EndCOFFSymbolDef(); } @@ -679,8 +679,8 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) { // compiler features bitfield read by link.exe. MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00")); OutStreamer->BeginCOFFSymbolDef(S); - OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); - OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL); + OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL); OutStreamer->EndCOFFSymbolDef(); int64_t Feat00Flags = 0; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 3ecde632436db6..bf583b7c7ff2bd 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1992,34 +1992,34 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { // Otherwise, use the .seh_ directives for all other Windows platforms. switch (MI->getOpcode()) { case X86::SEH_PushReg: - OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm()); + OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm()); break; case X86::SEH_SaveReg: - OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(), + OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; case X86::SEH_SaveXMM: - OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(), + OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; case X86::SEH_StackAlloc: - OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm()); + OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm()); break; case X86::SEH_SetFrame: - OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(), + OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; case X86::SEH_PushFrame: - OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm()); + OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm()); break; case X86::SEH_EndPrologue: - OutStreamer->EmitWinCFIEndProlog(); + OutStreamer->emitWinCFIEndProlog(); break; default: diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp index 5b65557e777620..0227f0feaeaf2c 100644 --- a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp +++ b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp @@ -64,8 +64,8 @@ class MCStreamerWrapper final : public MCStreamer { SMLoc Loc = SMLoc()) override {} void emitGPRel32Value(const MCExpr *Value) override {} void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {} - void EmitCOFFSymbolStorageClass(int StorageClass) override {} - void EmitCOFFSymbolType(int Type) override {} + void emitCOFFSymbolStorageClass(int StorageClass) override {} + void emitCOFFSymbolType(int Type) override {} void EndCOFFSymbolDef() override {} ArrayRef GetInstructionSequence(unsigned Index) const { diff --git a/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp b/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp index 56c24d2c5ef36a..9aebebfd250889 100644 --- a/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp +++ b/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp @@ -73,7 +73,7 @@ TEST_F(AsmPrinterEmitDwarfSymbolReferenceTest, COFF) { if (!init("x86_64-pc-windows", /*DwarfVersion=*/4, dwarf::DWARF32)) GTEST_SKIP(); - EXPECT_CALL(TestPrinter->getMS(), EmitCOFFSecRel32(Val, 0)); + EXPECT_CALL(TestPrinter->getMS(), emitCOFFSecRel32(Val, 0)); TestPrinter->getAP()->emitDwarfSymbolReference(Val, false); } diff --git a/llvm/unittests/CodeGen/TestAsmPrinter.h b/llvm/unittests/CodeGen/TestAsmPrinter.h index ce1d636b8ddd6e..1414b8229ed3b0 100644 --- a/llvm/unittests/CodeGen/TestAsmPrinter.h +++ b/llvm/unittests/CodeGen/TestAsmPrinter.h @@ -44,7 +44,7 @@ class MockMCStreamer : public MCStreamer { void(const MCExpr *Value, unsigned Size, SMLoc Loc)); MOCK_METHOD3(emitAbsoluteSymbolDiff, void(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size)); - MOCK_METHOD2(EmitCOFFSecRel32, void(MCSymbol const *Symbol, uint64_t Offset)); + MOCK_METHOD2(emitCOFFSecRel32, void(MCSymbol const *Symbol, uint64_t Offset)); }; class TestAsmPrinter { From 8ee35c5558aa86e3be5f2882f1db030d6281578f Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 26 May 2022 00:19:57 -0700 Subject: [PATCH 553/908] Update three tests to realpath paths that we compare to dyld paths I get to my work directory through a symlink, so the pathnames the tests get for their build artifacts etc are via that symlink. There are three tests which compare those symlink paths to a directory received from dyld on macOS, which is the actual real pathname. These tests have always failed for me on my dekstop but I finally sat down to figure out why. Easy quick fix. --- lldb/test/API/commands/platform/sdk/TestPlatformSDK.py | 1 + lldb/test/API/macosx/function-starts/TestFunctionStarts.py | 2 +- lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py b/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py index 91ad5a446123e0..9178e3cdac4f73 100644 --- a/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py +++ b/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py @@ -58,6 +58,7 @@ def test_macos_sdk(self): # Create a fake 'SDK' directory. test_home = os.path.join(self.getBuildDir(), 'fake_home.noindex') + test_home = os.path.realpath(test_home) macos_version = platform.mac_ver()[0] sdk_dir = os.path.join(test_home, 'Library', 'Developer', 'Xcode', 'macOS DeviceSupport', macos_version) diff --git a/lldb/test/API/macosx/function-starts/TestFunctionStarts.py b/lldb/test/API/macosx/function-starts/TestFunctionStarts.py index 72688d80e7cd45..21b494f185dc5e 100644 --- a/lldb/test/API/macosx/function-starts/TestFunctionStarts.py +++ b/lldb/test/API/macosx/function-starts/TestFunctionStarts.py @@ -35,7 +35,7 @@ def do_function_starts(self, in_memory): """Run the binary, stop at our unstripped function, make sure the caller has synthetic symbols""" - exe = self.getBuildArtifact(exe_name) + exe = os.path.realpath(self.getBuildArtifact(exe_name)) # Now strip the binary, but leave externals so we can break on dont_strip_me. self.runBuildCommand(["strip", "-u", "-x", "-S", exe]) diff --git a/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py b/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py index 840e970410121f..beb62f1ebe1a58 100644 --- a/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py +++ b/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py @@ -72,7 +72,7 @@ def check_simulator_ostype(self, sdk, platform_name, arch=platform.machine()): 'ARCH': arch, 'ARCH_CFLAGS': '-target {} {}'.format(triple, version_min), }) - exe_path = self.getBuildArtifact(exe_name) + exe_path = os.path.realpath(self.getBuildArtifact(exe_name)) cmd = [ 'xcrun', 'simctl', 'spawn', '-s', deviceUDID, exe_path, 'print-pid', 'sleep:10' From c274b6e5830ea88d3f55d6dc1d2b99e38cf6595e Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 26 May 2022 00:30:12 -0700 Subject: [PATCH 554/908] Defer source path remap tilde expansion until source file use When reading source path remappings out of a dSYM, lldb currently does tilde expansion -- expanding the tilde-username and checking that the destination pathname exists, for each dSYM with the path remappings. This cost happens during lldb's initial process launch / load, an especially perf-sensitive time. Inside Apple, we have dSYMs with source path remappings pointing to NFS directories where these extra stats for every dSYM can be very expensive if the network is slow. This patch instead keeps the source path mapping in the original tilde-username terms and does the tilde expansion when we need to read a specific source file from one of the modules. We'll be stat'ing all of those inodes to load the source file anyway, so the fact that we do the tilde expansion on every source file we load, it doesn't cost us significantly. Differential Revision: https://reviews.llvm.org/D126435 rdar://77091379 --- lldb/source/Core/SourceManager.cpp | 17 ++++++++++++++--- .../SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp | 12 ------------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index 8599c0a6a2284b..d4e1791aa3e46a 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -49,6 +49,13 @@ using namespace lldb_private; static inline bool is_newline_char(char ch) { return ch == '\n' || ch == '\r'; } +static void resolve_tilde(FileSpec &file_spec) { + if (!FileSystem::Instance().Exists(file_spec) && + file_spec.GetDirectory().GetCString()[0] == '~') { + FileSystem::Instance().Resolve(file_spec); + } +} + // SourceManager constructor SourceManager::SourceManager(const TargetSP &target_sp) : m_last_line(0), m_last_count(0), m_default_set(false), @@ -66,10 +73,13 @@ SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { if (!file_spec) return nullptr; + FileSpec resolved_fspec = file_spec; + resolve_tilde(resolved_fspec); + DebuggerSP debugger_sp(m_debugger_wp.lock()); FileSP file_sp; if (debugger_sp && debugger_sp->GetUseSourceCache()) - file_sp = debugger_sp->GetSourceFileCache().FindSourceFile(file_spec); + file_sp = debugger_sp->GetSourceFileCache().FindSourceFile(resolved_fspec); TargetSP target_sp(m_target_wp.lock()); @@ -87,9 +97,9 @@ SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { // If file_sp is no good or it points to a non-existent file, reset it. if (!file_sp || !FileSystem::Instance().Exists(file_sp->GetFileSpec())) { if (target_sp) - file_sp = std::make_shared(file_spec, target_sp.get()); + file_sp = std::make_shared(resolved_fspec, target_sp.get()); else - file_sp = std::make_shared(file_spec, debugger_sp); + file_sp = std::make_shared(resolved_fspec, debugger_sp); if (debugger_sp && debugger_sp->GetUseSourceCache()) debugger_sp->GetSourceFileCache().AddSourceFile(file_sp); @@ -441,6 +451,7 @@ void SourceManager::File::CommonInitializer(const FileSpec &file_spec, } } } + resolve_tilde(m_file_spec); // Try remapping if m_file_spec does not correspond to an existing file. if (!FileSystem::Instance().Exists(m_file_spec)) { // Check target specific source remappings (i.e., the diff --git a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp index b99ade50d857bd..ea1aa7e0587104 100644 --- a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp +++ b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp @@ -237,13 +237,6 @@ SymbolVendorMacOSX::CreateInstance(const lldb::ModuleSP &module_sp, !original_DBGSourcePath_value.empty()) { DBGSourcePath = original_DBGSourcePath_value; } - if (DBGSourcePath[0] == '~') { - FileSpec resolved_source_path( - DBGSourcePath.c_str()); - FileSystem::Instance().Resolve( - resolved_source_path); - DBGSourcePath = resolved_source_path.GetPath(); - } module_sp->GetSourceMappingList().Append( key.GetStringRef(), DBGSourcePath, true); // With version 2 of DBGSourcePathRemapping, we @@ -275,11 +268,6 @@ SymbolVendorMacOSX::CreateInstance(const lldb::ModuleSP &module_sp, DBGBuildSourcePath); plist.GetValueAsString("DBGSourcePath", DBGSourcePath); if (!DBGBuildSourcePath.empty() && !DBGSourcePath.empty()) { - if (DBGSourcePath[0] == '~') { - FileSpec resolved_source_path(DBGSourcePath.c_str()); - FileSystem::Instance().Resolve(resolved_source_path); - DBGSourcePath = resolved_source_path.GetPath(); - } module_sp->GetSourceMappingList().Append( DBGBuildSourcePath, DBGSourcePath, true); } From 56ac85a20fb98393d033582ee4dc86993843726d Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 26 May 2022 00:46:54 -0700 Subject: [PATCH 555/908] Revert "Defer source path remap tilde expansion until source file use" This reverts commit c274b6e5830ea88d3f55d6dc1d2b99e38cf6595e. The x86_64 debian bot got a failure with this patch, https://lab.llvm.org/buildbot#builders/68/builds/33078 where SymbolFile/DWARF/x86/DW_TAG_variable-DW_AT_decl_file-DW_AT_abstract_origin-crosscu1.s is crashing here - #2 0x0000000000425a9f SignalHandler(int) Signals.cpp:0:0 #3 0x00007f57160e9140 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x14140) #4 0x00007f570d911e43 lldb_private::SourceManager::GetFile(lldb_private::FileSpec const&) crtstuff.c:0:0 #5 0x00007f570d914270 lldb_private::SourceManager::DisplaySourceLinesWithLineNumbers(lldb_private::FileSpec const&, unsigned int, unsigned int, unsigned int, unsigned int, char const*, lldb_private::Stream*, lldb_private::SymbolContextList const*) crtstuff.c:0:0 #6 0x00007f570da662c8 lldb_private::StackFrame::GetStatus(lldb_private::Stream&, bool, bool, bool, char const*) crtstuff.c:0:0 I don't get a failure here my mac, I'll review this method more closely tomorrow. --- lldb/source/Core/SourceManager.cpp | 17 +++-------------- .../SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp | 12 ++++++++++++ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index d4e1791aa3e46a..8599c0a6a2284b 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -49,13 +49,6 @@ using namespace lldb_private; static inline bool is_newline_char(char ch) { return ch == '\n' || ch == '\r'; } -static void resolve_tilde(FileSpec &file_spec) { - if (!FileSystem::Instance().Exists(file_spec) && - file_spec.GetDirectory().GetCString()[0] == '~') { - FileSystem::Instance().Resolve(file_spec); - } -} - // SourceManager constructor SourceManager::SourceManager(const TargetSP &target_sp) : m_last_line(0), m_last_count(0), m_default_set(false), @@ -73,13 +66,10 @@ SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { if (!file_spec) return nullptr; - FileSpec resolved_fspec = file_spec; - resolve_tilde(resolved_fspec); - DebuggerSP debugger_sp(m_debugger_wp.lock()); FileSP file_sp; if (debugger_sp && debugger_sp->GetUseSourceCache()) - file_sp = debugger_sp->GetSourceFileCache().FindSourceFile(resolved_fspec); + file_sp = debugger_sp->GetSourceFileCache().FindSourceFile(file_spec); TargetSP target_sp(m_target_wp.lock()); @@ -97,9 +87,9 @@ SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { // If file_sp is no good or it points to a non-existent file, reset it. if (!file_sp || !FileSystem::Instance().Exists(file_sp->GetFileSpec())) { if (target_sp) - file_sp = std::make_shared(resolved_fspec, target_sp.get()); + file_sp = std::make_shared(file_spec, target_sp.get()); else - file_sp = std::make_shared(resolved_fspec, debugger_sp); + file_sp = std::make_shared(file_spec, debugger_sp); if (debugger_sp && debugger_sp->GetUseSourceCache()) debugger_sp->GetSourceFileCache().AddSourceFile(file_sp); @@ -451,7 +441,6 @@ void SourceManager::File::CommonInitializer(const FileSpec &file_spec, } } } - resolve_tilde(m_file_spec); // Try remapping if m_file_spec does not correspond to an existing file. if (!FileSystem::Instance().Exists(m_file_spec)) { // Check target specific source remappings (i.e., the diff --git a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp index ea1aa7e0587104..b99ade50d857bd 100644 --- a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp +++ b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp @@ -237,6 +237,13 @@ SymbolVendorMacOSX::CreateInstance(const lldb::ModuleSP &module_sp, !original_DBGSourcePath_value.empty()) { DBGSourcePath = original_DBGSourcePath_value; } + if (DBGSourcePath[0] == '~') { + FileSpec resolved_source_path( + DBGSourcePath.c_str()); + FileSystem::Instance().Resolve( + resolved_source_path); + DBGSourcePath = resolved_source_path.GetPath(); + } module_sp->GetSourceMappingList().Append( key.GetStringRef(), DBGSourcePath, true); // With version 2 of DBGSourcePathRemapping, we @@ -268,6 +275,11 @@ SymbolVendorMacOSX::CreateInstance(const lldb::ModuleSP &module_sp, DBGBuildSourcePath); plist.GetValueAsString("DBGSourcePath", DBGSourcePath); if (!DBGBuildSourcePath.empty() && !DBGSourcePath.empty()) { + if (DBGSourcePath[0] == '~') { + FileSpec resolved_source_path(DBGSourcePath.c_str()); + FileSystem::Instance().Resolve(resolved_source_path); + DBGSourcePath = resolved_source_path.GetPath(); + } module_sp->GetSourceMappingList().Append( DBGBuildSourcePath, DBGSourcePath, true); } From 390c0ac28db3557cad391b99ede20cd168c2dc30 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 May 2022 08:53:33 +0100 Subject: [PATCH 556/908] [LV] Fix indentation in tryToCreateWidenRecipe (NFC). --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index beddc3ee251738..1a0dea02b340d7 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8572,12 +8572,12 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); } - // Record the incoming value from the backedge, so we can add the incoming - // value from the backedge after all recipes have been created. - recordRecipeOf(cast( - Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()))); - PhisToFix.push_back(PhiRecipe); - return toVPRecipeResult(PhiRecipe); + // Record the incoming value from the backedge, so we can add the incoming + // value from the backedge after all recipes have been created. + recordRecipeOf(cast( + Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()))); + PhisToFix.push_back(PhiRecipe); + return toVPRecipeResult(PhiRecipe); } if (isa(Instr) && From e45087fd53d5946f47efa7bb7be088c668c386a1 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Fri, 20 May 2022 10:47:15 +0800 Subject: [PATCH 557/908] [RISCV] Fix state persistence bugs (PR55548) We didn't implement RISCVELFStreamer::reset and cause some very strange section output for attribute section...just reference D15950 to see how ARM implement that. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D125905 --- .../RISCV/MCTargetDesc/RISCVELFStreamer.cpp | 12 ++++++++++++ .../RISCV/MCTargetDesc/RISCVELFStreamer.h | 2 ++ .../RISCV/MCTargetDesc/RISCVTargetStreamer.cpp | 1 + .../RISCV/MCTargetDesc/RISCVTargetStreamer.h | 1 + llvm/test/MC/RISCV/twice.ll | 17 +++++++++++++++++ 5 files changed, 33 insertions(+) create mode 100644 llvm/test/MC/RISCV/twice.ll diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 88c417e974eb85..52e6af1224f596 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -180,6 +180,11 @@ void RISCVTargetELFStreamer::finish() { MCA.setELFHeaderEFlags(EFlags); } +void RISCVTargetELFStreamer::reset() { + AttributeSection = nullptr; + Contents.clear(); +} + namespace { class RISCVELFStreamer : public MCELFStreamer { static std::pair getRelocPairForSize(unsigned Size) { @@ -226,6 +231,13 @@ class RISCVELFStreamer : public MCELFStreamer { : !B.getName().empty()); } + void reset() override { + MCTargetStreamer &TS = *getTargetStreamer(); + RISCVTargetStreamer &RTS = static_cast(TS); + RTS.reset(); + MCELFStreamer::reset(); + } + public: RISCVELFStreamer(MCContext &C, std::unique_ptr MAB, std::unique_ptr MOW, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h index eb2759f2c217ff..7ca2f5ab562349 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h @@ -92,6 +92,8 @@ class RISCVTargetELFStreamer : public RISCVTargetStreamer { void finishAttributeSection() override; size_t calculateContentSize() const; + void reset() override; + public: MCELFStreamer &getStreamer(); RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 349116d6b106a4..5f9ed77d07cf58 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -22,6 +22,7 @@ using namespace llvm; RISCVTargetStreamer::RISCVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} void RISCVTargetStreamer::finish() { finishAttributeSection(); } +void RISCVTargetStreamer::reset() {} void RISCVTargetStreamer::emitDirectiveOptionPush() {} void RISCVTargetStreamer::emitDirectiveOptionPop() {} diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h index 6f09016139b035..0d35d0b698a9f5 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h @@ -23,6 +23,7 @@ class RISCVTargetStreamer : public MCTargetStreamer { public: RISCVTargetStreamer(MCStreamer &S); void finish() override; + virtual void reset(); virtual void emitDirectiveOptionPush(); virtual void emitDirectiveOptionPop(); diff --git a/llvm/test/MC/RISCV/twice.ll b/llvm/test/MC/RISCV/twice.ll new file mode 100644 index 00000000000000..c95f344f3a2cfb --- /dev/null +++ b/llvm/test/MC/RISCV/twice.ll @@ -0,0 +1,17 @@ +; Check for state persistence bugs in the RISC-V MC backend +; This should neither fail (in the comparison that the second object +; is bit-identical to the first) nor crash. Either failure would most +; likely indicate some state that is not properly reset in the +; appropriate ::reset method. +; RUN: llc -compile-twice -filetype=obj -mtriple=riscv64 %s -o - \ +; RUN: | llvm-objdump --section-headers - \ +; RUN: | FileCheck %s + +; CHECK: Sections: +; CHECK-NEXT: Idx Name Size VMA Type +; CHECK-NEXT: 0 +; CHECK-NEXT: 1 .strtab +; CHECK-NEXT: 2 .text +; CHECK-NEXT: 3 .note.GNU-stack +; CHECK-NEXT: 4 .riscv.attributes +; CHECK-NEXT: 5 .symtab From f96aa493f0459d16a81e76c9d4f2915057cbcf7a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 May 2022 09:46:23 +0100 Subject: [PATCH 558/908] [SimpleLoopUnswitch] Always skip trivial select and set condition. When updating the branch instruction outside the loopduring non-trivial unswitching, always skip trivial selects and update the condition. Otherwise we might create invalid IR, because the trivial select is inside the loop, while the condition is outside the loop. Fixes #55697. --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 5 +-- .../nontrivial-unswitch-trivial-select.ll | 35 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index b48b628485970c..198c4d76b30026 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2232,11 +2232,12 @@ static void unswitchNontrivialInvariants( BasicBlock *ClonedPH = ClonedPHs.begin()->second; BI->setSuccessor(ClonedSucc, ClonedPH); BI->setSuccessor(1 - ClonedSucc, LoopPH); + Value *Cond = skipTrivialSelect(BI->getCondition()); if (InsertFreeze) { - auto Cond = skipTrivialSelect(BI->getCondition()); if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT)) - BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI)); + Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI); } + BI->setCondition(Cond); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); } else { assert(SI && "Must either be a branch or switch!"); diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll index acf6802cfdabab..5280aa7d3e2847 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll @@ -82,3 +82,38 @@ loop.latch: exit: ret void } + +; Test case for PR55697. +define i32 @unswitch_trivial_select_cmp_outside(i32 %x) { +; CHECK-LABEL: @unswitch_trivial_select_cmp_outside( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X:%.*]], 100 +; CHECK-NEXT: br i1 [[C]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 35, [[LOOP_US]] ] +; CHECK-NEXT: br label [[LOOP_US]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 false, i1 true, i1 false +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[P]], [[LOOP]] ] +; CHECK-NEXT: ret i32 [[LCSSA]] +; +entry: + %c = icmp ult i32 %x, 100 + br label %loop + +loop: + %p = phi i32 [ 0, %entry ], [ 35, %loop ] + %spec.select = select i1 %c, i1 true, i1 false + br i1 %spec.select, label %loop, label %exit + +exit: + %lcssa = phi i32 [ %p, %loop ] + ret i32 %lcssa +} From 569d6630204d5146aee7c0f40872cf529181d9ef Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 25 May 2022 11:09:43 +0200 Subject: [PATCH 559/908] [libc++] Implement ranges::equal Reviewed By: var-const, #libc Spies: libcxx-commits, mgorny Differential Revision: https://reviews.llvm.org/D123681 --- libcxx/docs/Status/RangesAlgorithms.csv | 2 +- libcxx/include/CMakeLists.txt | 1 + libcxx/include/__algorithm/ranges_equal.h | 115 ++++++ libcxx/include/algorithm | 15 + libcxx/include/module.modulemap | 1 + ...obust_against_copying_comparators.pass.cpp | 4 +- ...obust_against_copying_projections.pass.cpp | 4 +- libcxx/test/libcxx/private_headers.verify.cpp | 1 + .../alg.equal/ranges.equal.pass.cpp | 375 ++++++++++++++++++ 9 files changed, 513 insertions(+), 5 deletions(-) create mode 100644 libcxx/include/__algorithm/ranges_equal.h create mode 100644 libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp diff --git a/libcxx/docs/Status/RangesAlgorithms.csv b/libcxx/docs/Status/RangesAlgorithms.csv index 1a5989db42bf26..c757abd2201d20 100644 --- a/libcxx/docs/Status/RangesAlgorithms.csv +++ b/libcxx/docs/Status/RangesAlgorithms.csv @@ -8,7 +8,7 @@ Search,find_if_not,Nikolas Klauser,`D121248 `_ Search,find_first_of,Not assigned,n/a,Not started Search,adjacent_find,Not assigned,n/a,Not started Search,mismatch,Nikolas Klauser,`D117817 `_,✅ -Search,equal,Not assigned,n/a,Not started +Search,equal,Nikolas Klauser,n/a,✅ Search,lexicographical_compare,Not assigned,n/a,Not started Search,partition_point,Christopher Di Bella,`D105794 `_,Under review Search,lower_bound,Christopher Di Bella,`D105795 `_,Under review diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 68482bf716f1fd..65261da1258654 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -72,6 +72,7 @@ set(files __algorithm/ranges_copy_n.h __algorithm/ranges_count.h __algorithm/ranges_count_if.h + __algorithm/ranges_equal.h __algorithm/ranges_fill.h __algorithm/ranges_fill_n.h __algorithm/ranges_find.h diff --git a/libcxx/include/__algorithm/ranges_equal.h b/libcxx/include/__algorithm/ranges_equal.h new file mode 100644 index 00000000000000..c5f2d23322165e --- /dev/null +++ b/libcxx/include/__algorithm/ranges_equal.h @@ -0,0 +1,115 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_EQUAL_H +#define _LIBCPP___ALGORITHM_RANGES_EQUAL_H + +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> +#include <__iterator/indirectly_comparable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __equal { +struct __fn { +private: + template + _LIBCPP_HIDE_FROM_ABI constexpr static + bool __equal_impl(_Iter1 __first1, _Sent1 __last1, + _Iter2 __first2, _Sent2 __last2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2) { + while (__first1 != __last1 && __first2 != __last2) { + if (!std::invoke(__pred, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2))) + return false; + ++__first1; + ++__first2; + } + return __first1 == __last1 && __first2 == __last2; + } + +public: + + template _Sent1, + input_iterator _Iter2, sentinel_for<_Iter2> _Sent2, + class _Pred = ranges::equal_to, + class _Proj1 = identity, + class _Proj2 = identity> + requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Iter1 __first1, _Sent1 __last1, + _Iter2 __first2, _Sent2 __last2, + _Pred __pred = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + if constexpr (sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>) { + if (__last1 - __first1 != __last2 - __first2) + return false; + } + return __equal_impl(std::move(__first1), std::move(__last1), + std::move(__first2), std::move(__last2), + __pred, + __proj1, + __proj2); + } + + template + requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Range1&& __range1, + _Range2&& __range2, + _Pred __pred = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + if constexpr (sized_range<_Range1> && sized_range<_Range2>) { + if (ranges::distance(__range1) != ranges::distance(__range2)) + return false; + } + return __equal_impl(ranges::begin(__range1), ranges::end(__range1), + ranges::begin(__range2), ranges::end(__range2), + __pred, + __proj1, + __proj2); + return false; + } +}; +} // namespace __equal + +inline namespace __cpo { + inline constexpr auto equal = __equal::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_EQUAL_H diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index ba8ac23e4bddc0..aa2191464a8301 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -291,6 +291,20 @@ namespace ranges { template O> constexpr O ranges::fill_n(O first, iter_difference_t n, const T& value); // since C++20 + + template S1, input_iterator I2, sentinel_for S2, + class Pred = ranges::equal_to, class Proj1 = identity, class Proj2 = identity> + requires indirectly_comparable + constexpr bool ranges::equal(I1 first1, S1 last1, I2 first2, S2 last2, + Pred pred = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template + requires indirectly_comparable, iterator_t, Pred, Proj1, Proj2> + constexpr bool ranges::equal(R1&& r1, R2&& r2, Pred pred = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + } constexpr bool // constexpr in C++20 @@ -1013,6 +1027,7 @@ template #include <__algorithm/ranges_copy_n.h> #include <__algorithm/ranges_count.h> #include <__algorithm/ranges_count_if.h> +#include <__algorithm/ranges_equal.h> #include <__algorithm/ranges_fill.h> #include <__algorithm/ranges_fill_n.h> #include <__algorithm/ranges_find.h> diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index d3a4b45c4d96d5..cf394de39a2d05 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -304,6 +304,7 @@ module std [system] { module ranges_copy_n { private header "__algorithm/ranges_copy_n.h" } module ranges_count { private header "__algorithm/ranges_count.h" } module ranges_count_if { private header "__algorithm/ranges_count_if.h" } + module ranges_equal { private header "__algorithm/ranges_equal.h" } module ranges_fill { private header "__algorithm/ranges_fill.h" } module ranges_fill_n { private header "__algorithm/ranges_fill_n.h" } module ranges_find { private header "__algorithm/ranges_find.h" } diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp index c6a8f003592199..d5b7dd11f710e1 100644 --- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp +++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp @@ -106,8 +106,8 @@ constexpr bool all_the_algorithms() #if TEST_STD_VER > 20 //(void)std::ranges::ends_with(first, last, first2, last2, Equal(&copies)); assert(copies == 0); #endif - //(void)std::ranges::equal(first, last, first2, last2, Equal(&copies)); assert(copies == 0); - //(void)std::ranges::equal(a, b, Equal(&copies)); assert(copies == 0); + (void)std::ranges::equal(first, last, first2, last2, Equal(&copies)); assert(copies == 0); + (void)std::ranges::equal(a, b, Equal(&copies)); assert(copies == 0); //(void)std::ranges::equal_range(first, last, value, Less(&copies)); assert(copies == 0); //(void)std::ranges::equal_range(a, value, Less(&copies)); assert(copies == 0); //(void)std::ranges::find_end(first, last, first2, mid2, Equal(&copies)); assert(copies == 0); diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp index ffb4faef12b525..8d6753d5620824 100644 --- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp +++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp @@ -90,8 +90,8 @@ constexpr bool all_the_algorithms() #if TEST_STD_VER > 20 //(void)std::ranges::ends_with(first, last, first2, last2, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); #endif - //(void)std::ranges::equal(first, last, first2, last2, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::equal(a, b, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); + (void)std::ranges::equal(first, last, first2, last2, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); + (void)std::ranges::equal(a, b, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); //(void)std::ranges::equal_range(first, last, value, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::equal_range(a, value, Less(), Proj(&copies)); assert(copies == 0); (void)std::ranges::find(first, last, value, Proj(&copies)); assert(copies == 0); diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index 16186899f3810f..b83308c7b8ead7 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -109,6 +109,7 @@ END-SCRIPT #include <__algorithm/ranges_copy_n.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_copy_n.h'}} #include <__algorithm/ranges_count.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_count.h'}} #include <__algorithm/ranges_count_if.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_count_if.h'}} +#include <__algorithm/ranges_equal.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_equal.h'}} #include <__algorithm/ranges_fill.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_fill.h'}} #include <__algorithm/ranges_fill_n.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_fill_n.h'}} #include <__algorithm/ranges_find.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_find.h'}} diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp new file mode 100644 index 00000000000000..c7becd880a4417 --- /dev/null +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp @@ -0,0 +1,375 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// template S1, input_iterator I2, sentinel_for S2, +// class Pred = ranges::equal_to, class Proj1 = identity, class Proj2 = identity> +// requires indirectly_comparable +// constexpr bool ranges::equal(I1 first1, S1 last1, I2 first2, S2 last2, +// Pred pred = {}, +// Proj1 proj1 = {}, Proj2 proj2 = {}); +// template +// requires indirectly_comparable, iterator_t, Pred, Proj1, Proj2> +// constexpr bool ranges::equal(R1&& r1, R2&& r2, Pred pred = {}, +// Proj1 proj1 = {}, Proj2 proj2 = {}); + +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +template , + class Iter2 = Iter1, class Sent2 = sentinel_wrapper> +concept HasEqualIt = requires (Iter1 first1, Sent1 last1, Iter2 first2, Sent2 last2) { + std::ranges::equal(first1, last1, first2, last2); +}; + +static_assert(HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); +static_assert(!HasEqualIt); + +template +concept HasEqualR = requires (Range1 range1, Range2 range2) { + std::ranges::equal(range1, range2); +}; + +static_assert(HasEqualR, UncheckedRange>); +static_assert(!HasEqualR>); +static_assert(!HasEqualR>); +static_assert(!HasEqualR>); +static_assert(!HasEqualR>); +static_assert(!HasEqualR>); +static_assert(!HasEqualR, InputRangeNotDerivedFrom>); +static_assert(!HasEqualR, InputRangeNotIndirectlyReadable>); +static_assert(!HasEqualR, InputRangeNotInputOrOutputIterator>); +static_assert(!HasEqualR, InputRangeNotSentinelSemiregular>); +static_assert(!HasEqualR, InputRangeNotSentinelEqualityComparableWith>); +static_assert(!HasEqualR, UncheckedRange>); + +template +constexpr void test_iterators() { + { // simple test + { + int a[] = {1, 2, 3, 4}; + int b[] = {1, 2, 3, 4}; + std::same_as decltype(auto) ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), + Iter2(b), Sent2(Iter2(b + 4))); + assert(ret); + } + { + int a[] = {1, 2, 3, 4}; + int b[] = {1, 2, 3, 4}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4))); + std::same_as decltype(auto) ret = std::ranges::equal(range1, range2); + assert(ret); + } + } + + { // check that the predicate is used (return true) + { + int a[] = {1, 2, 3, 4}; + int b[] = {2, 3, 4, 5}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)), + [](int l, int r) { return l != r; }); + assert(ret); + } + { + int a[] = {1, 2, 3, 4}; + int b[] = {2, 3, 4, 5}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4))); + auto ret = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; }); + assert(ret); + } + } + + { // check that the predicate is used (return false) + { + int a[] = {1, 2, 3, 4}; + int b[] = {2, 3, 3, 5}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)), + [](int l, int r) { return l != r; }); + assert(!ret); + } + { + int a[] = {1, 2, 3, 4}; + int b[] = {2, 3, 3, 5}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4))); + auto ret = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; }); + assert(!ret); + } + } + + { // check that the projections are used + { + int a[] = {1, 2, 3, 4, 5}; + int b[] = {6, 10, 14, 18, 22}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 5)), + Iter2(b), Sent2(Iter2(b + 5)), + {}, + [](int i) { return i * 4; }, + [](int i) { return i - 2; }); + assert(ret); + } + { + int a[] = {1, 2, 3, 4, 5}; + int b[] = {6, 10, 14, 18, 22}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 5))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 5))); + auto ret = std::ranges::equal(range1, + range2, + {}, + [](int i) { return i * 4; }, + [](int i) { return i - 2; }); + assert(ret); + } + } + + { // check that different sized ranges work + { + int a[] = {4, 3, 2, 1}; + int b[] = {4, 3, 2, 1, 5, 6, 7}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 7))); + assert(!ret); + } + { + int a[] = {4, 3, 2, 1}; + int b[] = {4, 3, 2, 1, 5, 6, 7}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 7))); + auto ret = std::ranges::equal(range1, range2); + assert(!ret); + } + } + + { // check that two ranges with the same size but different values are different + { + int a[] = {4, 6, 34, 76, 5}; + int b[] = {4, 6, 34, 67, 5}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 5)), Iter2(b), Sent2(Iter2(b + 5))); + assert(!ret); + } + { + int a[] = {4, 6, 34, 76, 5}; + int b[] = {4, 6, 34, 67, 5}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 5))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 5))); + auto ret = std::ranges::equal(range1, range2); + assert(!ret); + } + } + + { // check that two empty ranges work + { + int a[] = {}; + int b[] = {}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a)), Iter2(b), Sent2(Iter2(b))); + assert(ret); + } + { + int a[] = {}; + int b[] = {}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b))); + auto ret = std::ranges::equal(range1, range2); + assert(ret); + } + } + + { // check that it works with the first range empty + { + int a[] = {}; + int b[] = {1, 2}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a)), Iter2(b), Sent2(Iter2(b + 2))); + assert(!ret); + } + { + int a[] = {}; + int b[] = {1, 2}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 2))); + auto ret = std::ranges::equal(range1, range2); + assert(!ret); + } + } + + { // check that it works with the second range empty + { + int a[] = {1, 2}; + int b[] = {}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 2)), Iter2(b), Sent2(Iter2(b))); + assert(!ret); + } + { + int a[] = {1, 2}; + int b[] = {}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 2))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b))); + auto ret = std::ranges::equal(range1, range2); + assert(!ret); + } + } +} + +template +constexpr void test_iterators2() { + test_iterators, sentinel_wrapper>>(); + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators(); + test_iterators(); +} + +constexpr bool test() { + test_iterators2, sentinel_wrapper>>(); + test_iterators2, sentinel_wrapper>>(); + test_iterators2>(); + test_iterators2>(); + test_iterators2>(); + test_iterators2>(); + test_iterators2(); + test_iterators2(); + + { // check that std::invoke is used + struct S { + constexpr S(int i_) : i(i_) {} + constexpr bool equal(int o) { return i == o; } + constexpr S& identity() { return *this; } + int i; + }; + { + S a[] = {7, 8, 9}; + S b[] = {7, 8, 9}; + auto ret = std::ranges::equal(a, a + 3, b, b + 3, &S::equal, &S::identity, &S::i); + assert(ret); + } + { + S a[] = {7, 8, 9}; + S b[] = {7, 8, 9}; + auto ret = std::ranges::equal(a, b, &S::equal, &S::identity, &S::i); + assert(ret); + } + } + + { // check that the complexity requirements are met + { // different size + { + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3, 4}; + int predCount = 0; + int projCount = 0; + auto pred = [&](int l, int r) { ++predCount; return l == r; }; + auto proj = [&](int i) { ++projCount; return i; }; + auto ret = std::ranges::equal(a, a + 3, b, b + 4, pred, proj, proj); + assert(!ret); + assert(predCount == 0); + assert(projCount == 0); + } + { + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3, 4}; + int predCount = 0; + int projCount = 0; + auto pred = [&](int l, int r) { ++predCount; return l == r; }; + auto proj = [&](int i) { ++projCount; return i; }; + auto ret = std::ranges::equal(a, b, pred, proj, proj); + assert(!ret); + assert(predCount == 0); + assert(projCount == 0); + } + } + + { // not a sized sentinel + { + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3, 4}; + int predCount = 0; + int projCount = 0; + auto pred = [&](int l, int r) { ++predCount; return l == r; }; + auto proj = [&](int i) { ++projCount; return i; }; + auto ret = std::ranges::equal(a, sentinel_wrapper(a + 3), b, sentinel_wrapper(b + 4), pred, proj, proj); + assert(!ret); + assert(predCount <= 4); + assert(projCount <= 7); + } + { + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3, 4}; + int predCount = 0; + int projCount = 0; + auto pred = [&](int l, int r) { ++predCount; return l == r; }; + auto proj = [&](int i) { ++projCount; return i; }; + auto range1 = std::ranges::subrange(a, sentinel_wrapper(a + 3)); + auto range2 = std::ranges::subrange(b, sentinel_wrapper(b + 4)); + auto ret = std::ranges::equal(range1, range2, pred, proj, proj); + assert(!ret); + assert(predCount <= 4); + assert(projCount <= 7); + } + } + + { // same size + { + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3}; + int predCount = 0; + int projCount = 0; + auto pred = [&](int l, int r) { ++predCount; return l == r; }; + auto proj = [&](int i) { ++projCount; return i; }; + auto ret = std::ranges::equal(a, a + 3, b, b + 3, pred, proj, proj); + assert(ret); + assert(predCount == 3); + assert(projCount == 6); + } + { + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3}; + int predCount = 0; + int projCount = 0; + auto pred = [&](int l, int r) { ++predCount; return l == r; }; + auto proj = [&](int i) { ++projCount; return i; }; + auto ret = std::ranges::equal(a, b, pred, proj, proj); + assert(ret); + assert(predCount == 3); + assert(projCount == 6); + } + } + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} From 51f63589aec0a3960918bfad1146ebd20f213950 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 May 2022 08:51:13 +0000 Subject: [PATCH 560/908] [gn build] Port 569d6630204d --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 4e7861d3a8b2bc..f3e3e89bdd5c08 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -128,6 +128,7 @@ if (current_toolchain == default_toolchain) { "__algorithm/ranges_copy_n.h", "__algorithm/ranges_count.h", "__algorithm/ranges_count_if.h", + "__algorithm/ranges_equal.h", "__algorithm/ranges_fill.h", "__algorithm/ranges_fill_n.h", "__algorithm/ranges_find.h", From 2c9983f530512ce5be99b53c8cd895f1d2a3a04a Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 26 May 2022 10:00:33 +0100 Subject: [PATCH 561/908] [RISCV][NFC] Add braces to 'else' to match braced 'if' --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 2fc29d0d067b6a..0ee92d3bb20b82 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -613,8 +613,9 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, } else { InstrInfo.setAVLReg(VLOp.getReg()); } - } else + } else { InstrInfo.setAVLReg(RISCV::NoRegister); + } InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, MaskRegOp, StoreOp, ScalarMovOp); From 75631438e333225bc8d738c901627a940b3232c3 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 26 May 2022 10:16:21 +0100 Subject: [PATCH 562/908] [AArch64] Costmodel tests for llvm.vscale intrinsics. NFC These shows that the cost of a @llvm.vscale is indeed 1, not 10. --- .../Analysis/CostModel/AArch64/sve-vscale.ll | 23 ++++ .../LoopVectorize/AArch64/sve-fneg.ll | 101 ++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-vscale.ll create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-vscale.ll b/llvm/test/Analysis/CostModel/AArch64/sve-vscale.ll new file mode 100644 index 00000000000000..39fbadcdf909c5 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-vscale.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -passes='print' 2>&1 -disable-output < %s | FileCheck %s + +define i32 @vscale32() { +; CHECK-LABEL: 'vscale32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = call i32 @llvm.vscale.i32() +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %c +; + %c = call i32 @llvm.vscale.i32() + ret i32 %c +} + +define i64 @vscale64() { +; CHECK-LABEL: 'vscale64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = call i64 @llvm.vscale.i64() +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %c +; + %c = call i64 @llvm.vscale.i64() + ret i64 %c +} + +declare i32 @llvm.vscale.i32() +declare i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll new file mode 100644 index 00000000000000..2fc3b300fd0b3c --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-vectorize -dce -mtriple aarch64-linux-gnu -mattr=+sve < %s -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; This should be vscale x 8 vectorized, maybe with some interleaving. + +define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef readonly %s, i32 noundef %n) { +; CHECK-LABEL: @fneg( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S2:%.*]] = ptrtoint ptr [[S:%.*]] to i64 +; CHECK-NEXT: [[D1:%.*]] = ptrtoint ptr [[D:%.*]] to i64 +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[D1]], [[S2]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 16 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP7]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[TMP9]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr [[TMP9]], i32 [[TMP12]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 2 +; CHECK-NEXT: [[TMP14:%.*]] = fneg [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP15:%.*]] = fneg [[WIDE_LOAD3]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, ptr [[TMP16]], i32 0 +; CHECK-NEXT: store [[TMP14]], ptr [[TMP17]], align 2 +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds half, ptr [[TMP16]], i32 [[TMP19]] +; CHECK-NEXT: store [[TMP15]], ptr [[TMP20]], align 2 +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[FNEG:%.*]] = fneg half [[TMP24]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store half [[FNEG]], ptr [[ARRAYIDX2]], align 2 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds half, ptr %s, i64 %indvars.iv + %0 = load half, ptr %arrayidx, align 2 + %fneg = fneg half %0 + %arrayidx2 = getelementptr inbounds half, ptr %d, i64 %indvars.iv + store half %fneg, ptr %arrayidx2, align 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} From 38eb4fe74b3843ab0d7fc1eb95260caa8fbe96d7 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 26 May 2022 09:27:23 +0000 Subject: [PATCH 563/908] [llvm][DWARF] Move test using X86 triple into X86 tests Fixes failure seen when building without X86 backend: https://lab.llvm.org/buildbot/#/builders/171/builds/15124 --- llvm/test/CodeGen/{Generic => X86}/dwarf-aranges-zero-size.ll | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/{Generic => X86}/dwarf-aranges-zero-size.ll (100%) diff --git a/llvm/test/CodeGen/Generic/dwarf-aranges-zero-size.ll b/llvm/test/CodeGen/X86/dwarf-aranges-zero-size.ll similarity index 100% rename from llvm/test/CodeGen/Generic/dwarf-aranges-zero-size.ll rename to llvm/test/CodeGen/X86/dwarf-aranges-zero-size.ll From a9a012086a917dff367bb63de2d63782b23111fc Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 May 2022 10:35:38 +0100 Subject: [PATCH 564/908] [AArch64] Add additional tests for sinking free shuffles for FMAs. --- .../AArch64/sink-free-instructions.ll | 41 ++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll index 244d2c35bbacbc..5d7a26f6578455 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll @@ -585,6 +585,43 @@ if.else: ret <4 x float> %r.3 } +define <4 x float> @sink_shufflevector_first_arg_fma_v4f3(i1 %c, <8 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @sink_shufflevector_first_arg_fma_v4f3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S0]], <4 x float> [[B:%.*]], <4 x float> [[B]]) +; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S1]], <4 x float> [[R_0]], <4 x float> [[B]]) +; CHECK-NEXT: ret <4 x float> [[R_1]] +; CHECK: if.else: +; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S2]], <4 x float> [[B]], <4 x float> [[B]]) +; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S3]], <4 x float> [[R_2]], <4 x float> [[B]]) +; CHECK-NEXT: ret <4 x float> [[R_3]] +; +entry: + %s0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> zeroinitializer + %s1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + %s2 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + %s3 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + br i1 %c, label %if.then, label %if.else + +if.then: + %r.0 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s0, <4 x float> %b, <4 x float> %b) + %r.1 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s1, <4 x float> %r.0, <4 x float> %b) + ret <4 x float> %r.1 + +if.else: + %r.2 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s2, <4 x float> %b, <4 x float> %b) + %r.3 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s3, <4 x float> %r.2, <4 x float> %b) + ret <4 x float> %r.3 +} + + + declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x double> @sink_shufflevector_fma_v2f64(i1 %c, <2 x double> %a, <2 x double> %b) { @@ -639,8 +676,8 @@ if.else: declare <5 x float> @llvm.fma.v5f32(<5 x float>, <5 x float>, <5 x float>) -define <5 x float> @do_not_sink_shufflevector_fma_v5f32(i1 %c, <8 x float> %a, <5 x float> %b) { -; CHECK-LABEL: @do_not_sink_shufflevector_fma_v5f32( +define <5 x float> @sink_shufflevector_fma_v5f32(i1 %c, <8 x float> %a, <5 x float> %b) { +; CHECK-LABEL: @sink_shufflevector_fma_v5f32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> zeroinitializer ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> From b45e04685894a9d8467a291e2d9e8298330a5ab7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 26 May 2022 11:13:13 +0100 Subject: [PATCH 565/908] [X86] Add non-uniform vector tests for 'one bit diff' comparison fold --- llvm/test/CodeGen/X86/setcc-logic.ll | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll index 3e7d7d32e0e768..af9335aecab100 100644 --- a/llvm/test/CodeGen/X86/setcc-logic.ll +++ b/llvm/test/CodeGen/X86/setcc-logic.ll @@ -523,6 +523,21 @@ define i1 @or_icmps_const_1bit_diff(i8 %x) { ret i1 %r } +define <4 x i32> @or_icmps_const_1bit_diff_vec(<4 x i32> %x) { +; CHECK-LABEL: or_icmps_const_1bit_diff_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [43,45,43,45] +; CHECK-NEXT: pcmpeqd %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: retq + %a = icmp eq <4 x i32> %x, + %b = icmp eq <4 x i32> %x, + %t = or <4 x i1> %a, %b + %r = sext <4 x i1> %t to <4 x i32> + ret <4 x i32> %r +} + define i1 @and_icmps_const_1bit_diff(i32 %x) { ; CHECK-LABEL: and_icmps_const_1bit_diff: ; CHECK: # %bb.0: @@ -536,6 +551,24 @@ define i1 @and_icmps_const_1bit_diff(i32 %x) { ret i1 %r } +define <4 x i32> @and_icmps_const_1bit_diff_vec(<4 x i32> %x) { +; CHECK-LABEL: and_icmps_const_1bit_diff_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [44,60,44,60] +; CHECK-NEXT: pcmpeqd %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm0, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: retq + %a = icmp ne <4 x i32> %x, + %b = icmp ne <4 x i32> %x, + %t = and <4 x i1> %a, %b + %r = sext <4 x i1> %t to <4 x i32> + ret <4 x i32> %r +} + ; Negative test - extra use prevents optimization define i1 @or_icmps_const_1bit_diff_extra_use(i8 %x, i8* %p) { From 73c3dff1b3e42c278f4c675418b0cb5e8784174b Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Wed, 25 May 2022 15:57:22 +0200 Subject: [PATCH 566/908] [mlir] Use-after-free checker for the Transform dialect The Transform dialect uses the side effect modeling mechanism to record the effects of the transform ops on the mapping between Transform IR values and Payload IR ops. Introduce a checker pass that warns if a Transform IR value is used after it has been freed (consumed). This pass is mostly intended as a debugging aid in addition to the verification/assertion mechanisms in the transform interpreter. It reports all potential use-after-free situations. The implementation makes a series of simplifying assumptions to be simple and conservative. A more advanced implementation would rely on the data flow-like analysis associated with a side-effect resource rather than a value, which is currently not supported by the analysis infrastructure. Reviewed By: springerm Differential Revision: https://reviews.llvm.org/D126381 --- .../mlir/Dialect/Transform/CMakeLists.txt | 1 + .../mlir/Dialect/Transform/IR/TransformOps.h | 1 + .../mlir/Dialect/Transform/IR/TransformOps.td | 6 +- .../Transform/Transforms/CMakeLists.txt | 5 + .../Dialect/Transform/Transforms/Passes.h | 26 ++ .../Dialect/Transform/Transforms/Passes.td | 36 ++ mlir/include/mlir/InitAllPasses.h | 2 + mlir/lib/Dialect/Transform/CMakeLists.txt | 1 + .../lib/Dialect/Transform/IR/TransformOps.cpp | 30 ++ .../Transform/Transforms/CMakeLists.txt | 11 + .../Transform/Transforms/CheckUses.cpp | 402 ++++++++++++++++++ .../Transform/check-use-after-free.mlir | 169 ++++++++ .../TestTransformDialectExtension.cpp | 15 + .../TestTransformDialectExtension.td | 16 + .../llvm-project-overlay/mlir/BUILD.bazel | 44 ++ 15 files changed, 764 insertions(+), 1 deletion(-) create mode 100644 mlir/include/mlir/Dialect/Transform/Transforms/CMakeLists.txt create mode 100644 mlir/include/mlir/Dialect/Transform/Transforms/Passes.h create mode 100644 mlir/include/mlir/Dialect/Transform/Transforms/Passes.td create mode 100644 mlir/lib/Dialect/Transform/Transforms/CMakeLists.txt create mode 100644 mlir/lib/Dialect/Transform/Transforms/CheckUses.cpp create mode 100644 mlir/test/Dialect/Transform/check-use-after-free.mlir diff --git a/mlir/include/mlir/Dialect/Transform/CMakeLists.txt b/mlir/include/mlir/Dialect/Transform/CMakeLists.txt index f33061b2d87cff..9f57627c321fb0 100644 --- a/mlir/include/mlir/Dialect/Transform/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Transform/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(IR) +add_subdirectory(Transforms) diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h index 9714b77ad9683c..70ef6771478fc3 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h @@ -14,6 +14,7 @@ #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/SymbolTable.h" +#include "mlir/Interfaces/ControlFlowInterfaces.h" #define GET_OP_CLASSES #include "mlir/Dialect/Transform/IR/TransformOps.h.inc" diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index 2548492767df3e..0f1db4549e04f8 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -9,6 +9,7 @@ #ifndef MLIR_DIALECT_TRANSFORM_IR_TRANSFORMOPS #define MLIR_DIALECT_TRANSFORM_IR_TRANSFORMOPS +include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/IR/OpAsmInterface.td" include "mlir/IR/SymbolInterfaces.td" include "mlir/Dialect/PDL/IR/PDLTypes.td" @@ -48,7 +49,10 @@ def PDLMatchOp : TransformDialectOp<"pdl_match", } def SequenceOp : TransformDialectOp<"sequence", - [DeclareOpInterfaceMethods, + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, OpAsmOpInterface, PossibleTopLevelTransformOpTrait, SingleBlockImplicitTerminator<"::mlir::transform::YieldOp">]> { diff --git a/mlir/include/mlir/Dialect/Transform/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/Transform/Transforms/CMakeLists.txt new file mode 100644 index 00000000000000..3a399e6e0513e2 --- /dev/null +++ b/mlir/include/mlir/Dialect/Transform/Transforms/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name Transform) +add_public_tablegen_target(MLIRTransformDialectTransformsIncGen) + +add_mlir_doc(Passes TransformPasses ./ -gen-pass-doc) diff --git a/mlir/include/mlir/Dialect/Transform/Transforms/Passes.h b/mlir/include/mlir/Dialect/Transform/Transforms/Passes.h new file mode 100644 index 00000000000000..a567db6bc8e7a4 --- /dev/null +++ b/mlir/include/mlir/Dialect/Transform/Transforms/Passes.h @@ -0,0 +1,26 @@ +//===- CheckUses.h - Expensive transform value validity checks --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_TRANSFORM_TRANSFORMS_PASSES_H +#define MLIR_DIALECT_TRANSFORM_TRANSFORMS_PASSES_H + +#include "mlir/Pass/Pass.h" +#include + +namespace mlir { +class Pass; + +namespace transform { +std::unique_ptr createCheckUsesPass(); + +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/Transform/Transforms/Passes.h.inc" +} // namespace transform +} // namespace mlir + +#endif // MLIR_DIALECT_TRANSFORM_TRANSFORMS_PASSES_H diff --git a/mlir/include/mlir/Dialect/Transform/Transforms/Passes.td b/mlir/include/mlir/Dialect/Transform/Transforms/Passes.td new file mode 100644 index 00000000000000..1d7d8652c65015 --- /dev/null +++ b/mlir/include/mlir/Dialect/Transform/Transforms/Passes.td @@ -0,0 +1,36 @@ +//===-- Passes.td - Transform dialect pass definitions -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_TRANSFORM_TRANSFORMS_PASSES +#define MLIR_DIALECT_TRANSFORM_TRANSFORMS_PASSES + +include "mlir/Pass/PassBase.td" + +def CheckUses : Pass<"transform-dialect-check-uses"> { + let summary = "warn about potential use-after-free in the transform dialect"; + let description = [{ + This pass analyzes operations from the transform dialect and its extensions + and warns if a transform IR value may be used by an operation after it was + "freed" by some other operation, as described by side effects on the + `TransformMappingResource`. This statically detects situations that lead to + errors when interpreting the Transform IR. + + The pass is capable of handling branching control flow and reports all + _potential_ use-after-free situations, e.g., a may-use-after-free is + reported if at least one of the control flow paths between the definition of + a value and its use contains an operation with a "free" effect on the + `TransformMappingResource`. It does not currently perform an SCCP-style data + flow analysis to prove that some branches are not taken, however, SCCP and + other control flow simplifications can be performed on the transform IR + prior to this pass provided that transform ops implement the relevant + control flow interfaces. + }]; + let constructor = "::mlir::transform::createCheckUsesPass()"; +} + +#endif // MLIR_DIALECT_TRANSFORM_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h index ccbb74d5a7b26f..d6aacb4f6da83e 100644 --- a/mlir/include/mlir/InitAllPasses.h +++ b/mlir/include/mlir/InitAllPasses.h @@ -32,6 +32,7 @@ #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" #include "mlir/Dialect/Tensor/Transforms/Passes.h" #include "mlir/Dialect/Tosa/Transforms/Passes.h" +#include "mlir/Dialect/Transform/Transforms/Passes.h" #include "mlir/Dialect/Vector/Transforms/Passes.h" #include "mlir/Transforms/Passes.h" @@ -72,6 +73,7 @@ inline void registerAllPasses() { spirv::registerSPIRVPasses(); tensor::registerTensorPasses(); tosa::registerTosaOptPasses(); + transform::registerTransformPasses(); vector::registerVectorPasses(); // Dialect pipelines diff --git a/mlir/lib/Dialect/Transform/CMakeLists.txt b/mlir/lib/Dialect/Transform/CMakeLists.txt index f33061b2d87cff..9f57627c321fb0 100644 --- a/mlir/lib/Dialect/Transform/CMakeLists.txt +++ b/mlir/lib/Dialect/Transform/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(IR) +add_subdirectory(Transforms) diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 3dcf41447b77b0..fa3ba7e21d7de3 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -13,6 +13,7 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Rewrite/FrozenRewritePatternSet.h" #include "mlir/Rewrite/PatternApplicator.h" #include "llvm/ADT/ScopeExit.h" @@ -289,6 +290,35 @@ void transform::SequenceOp::getEffects( } } +OperandRange transform::SequenceOp::getSuccessorEntryOperands(unsigned index) { + assert(index == 0 && "unexpected region index"); + if (getOperation()->getNumOperands() == 1) + return getOperation()->getOperands(); + return OperandRange(getOperation()->operand_end(), + getOperation()->operand_end()); +} + +void transform::SequenceOp::getSuccessorRegions( + Optional index, ArrayRef operands, + SmallVectorImpl ®ions) { + if (!index.hasValue()) { + Region *bodyRegion = &getBody(); + regions.emplace_back(bodyRegion, !operands.empty() + ? bodyRegion->getArguments() + : Block::BlockArgListType()); + return; + } + + assert(*index == 0 && "unexpected region index"); + regions.emplace_back(getOperation()->getResults()); +} + +void transform::SequenceOp::getRegionInvocationBounds( + ArrayRef operands, SmallVectorImpl &bounds) { + (void)operands; + bounds.emplace_back(1, 1); +} + //===----------------------------------------------------------------------===// // WithPDLPatternsOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Transform/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Transform/Transforms/CMakeLists.txt new file mode 100644 index 00000000000000..68ad95bae3dbc5 --- /dev/null +++ b/mlir/lib/Dialect/Transform/Transforms/CMakeLists.txt @@ -0,0 +1,11 @@ +add_mlir_dialect_library(MLIRTransformDialectTransforms + CheckUses.cpp + + DEPENDS + MLIRTransformDialectTransformsIncGen + + LINK_LIBS PUBLIC + MLIRTransformDialect + MLIRIR + MLIRPass +) diff --git a/mlir/lib/Dialect/Transform/Transforms/CheckUses.cpp b/mlir/lib/Dialect/Transform/Transforms/CheckUses.cpp new file mode 100644 index 00000000000000..a325a62067f9ac --- /dev/null +++ b/mlir/lib/Dialect/Transform/Transforms/CheckUses.cpp @@ -0,0 +1,402 @@ +//===- CheckUses.cpp - Expensive transform value validity checks ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a pass that performs expensive opt-in checks for Transform +// dialect values being potentially used after they have been consumed. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/DataFlowAnalysis.h" +#include "mlir/Dialect/Transform/IR/TransformInterfaces.h" +#include "mlir/Dialect/Transform/Transforms/Passes.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/Pass/Pass.h" +#include "llvm/ADT/SetOperations.h" + +using namespace mlir; + +namespace { + +/// Returns a reference to a cached set of blocks that are reachable from the +/// given block via edges computed by the `getNextNodes` function. For example, +/// if `getNextNodes` returns successors of a block, this will return the set of +/// reachable blocks; if it returns predecessors of a block, this will return +/// the set of blocks from which the given block can be reached. The block is +/// considered reachable form itself only if there is a cycle. +template +const llvm::SmallPtrSet & +getReachableImpl(Block *block, FnTy getNextNodes, + DenseMap> &cache) { + auto it = cache.find(block); + if (it != cache.end()) + return it->getSecond(); + + llvm::SmallPtrSet &reachable = cache[block]; + SmallVector worklist; + worklist.push_back(block); + while (!worklist.empty()) { + Block *current = worklist.pop_back_val(); + for (Block *predecessor : getNextNodes(current)) { + // The block is reachable from its transitive predecessors. Only add + // them to the worklist if they weren't already visited. + if (reachable.insert(predecessor).second) + worklist.push_back(predecessor); + } + } + return reachable; +} + +/// An analysis that identifies whether a value allocated by a Transform op may +/// be used by another such op after it may have been freed by a third op on +/// some control flow path. This is conceptually similar to a data flow +/// analysis, but relies on side effects related to particular values that +/// currently cannot be modeled by the MLIR data flow analysis framework (also, +/// the lattice element would be rather expensive as it would need to include +/// live and/or freed values for each operation). +/// +/// This analysis is conservatively pessimisic: it will consider that a value +/// may be freed if it is freed on any possible control flow path between its +/// allocation and a relevant use, even if the control never actually flows +/// through the operation that frees the value. It also does not differentiate +/// between may- (freed on at least one control flow path) and must-free (freed +/// on all possible control flow paths) because it would require expensive graph +/// algorithms. +/// +/// It is intended as an additional non-blocking verification or debugging aid +/// for ops in the Transform dialect. It leverages the requirement for Transform +/// dialect ops to implement the MemoryEffectsOpInterface, and expects the +/// values in the Transform IR to have an allocation effect on the +/// TransformMappingResource when defined. +class TransformOpMemFreeAnalysis { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TransformOpMemFreeAnalysis) + + /// Computes the analysis for Transform ops nested in the given operation. + explicit TransformOpMemFreeAnalysis(Operation *root) { + root->walk([&](Operation *op) { + if (isa(op)) { + collectFreedValues(op); + return WalkResult::skip(); + } + return WalkResult::advance(); + }); + } + + /// A list of operations that may be deleting a value. Non-empty list + /// contextually converts to boolean "true" value. + class PotentialDeleters { + public: + /// Creates an empty list that corresponds to the value being live. + static PotentialDeleters live() { return PotentialDeleters({}); } + + /// Creates a list from the operations that may be deleting the value. + static PotentialDeleters maybeFreed(ArrayRef deleters) { + return PotentialDeleters(deleters); + } + + /// Converts to "true" if there are operations that may be deleting the + /// value. + explicit operator bool() const { return !deleters.empty(); } + + /// Concatenates the lists of operations that may be deleting the value. The + /// value is known to be live if the reuslting list is still empty. + PotentialDeleters &operator|=(const PotentialDeleters &other) { + llvm::append_range(deleters, other.deleters); + return *this; + } + + /// Returns the list of ops that may be deleting the value. + ArrayRef getOps() const { return deleters; } + + private: + /// Constructs the list from the given operations. + explicit PotentialDeleters(ArrayRef ops) { + llvm::append_range(deleters, ops); + } + + /// The list of operations that may be deleting the value. + SmallVector deleters; + }; + + /// Returns the list of operations that may be deleting the operand value on + /// any control flow path between the definition of the value and its use as + /// the given operand. For the purposes of this analysis, the value is + /// considered to be allocated at its definition point and never re-allocated. + PotentialDeleters isUseLive(OpOperand &operand) { + const llvm::SmallPtrSet &deleters = freedBy[operand.get()]; + if (deleters.empty()) + return live(); + +#ifndef NDEBUG + // Check that the definition point actually allcoates the value. + Operation *valueSource = + operand.get().isa() + ? operand.get().getDefiningOp() + : operand.get().getParentBlock()->getParentOp(); + auto iface = cast(valueSource); + SmallVector instances; + iface.getEffectsOnResource(transform::TransformMappingResource::get(), + instances); + assert(hasEffect(instances, operand.get()) && + "expected the op defining the value to have an allocation effect " + "on it"); +#endif + + // Collect ancestors of the use operation. + Block *defBlock = operand.get().getParentBlock(); + SmallVector ancestors; + Operation *ancestor = operand.getOwner(); + do { + ancestors.push_back(ancestor); + if (ancestor->getParentRegion() == defBlock->getParent()) + break; + ancestor = ancestor->getParentOp(); + } while (true); + std::reverse(ancestors.begin(), ancestors.end()); + + // Consider the control flow from the definition point of the value to its + // use point. If the use is located in some nested region, consider the path + // from the entry block of the region to the use. + for (Operation *ancestor : ancestors) { + // The block should be considered partially if it is the block that + // contains the definition (allocation) of the value being used, and the + // value is defined in the middle of the block, i.e., is not a block + // argument. + bool isOutermost = ancestor == ancestors.front(); + bool isFromBlockPartial = isOutermost && operand.get().isa(); + + // Check if the value may be freed by operations between its definition + // (allocation) point in its block and the terminator of the block or the + // ancestor of the use if it is located in the same block. This is only + // done for partial blocks here, full blocks will be considered below + // similarly to other blocks. + if (isFromBlockPartial) { + bool defUseSameBlock = ancestor->getBlock() == defBlock; + // Consider all ops from the def to its block terminator, except the + // when the use is in the same block, in which case only consider the + // ops until the user. + if (PotentialDeleters potentialDeleters = isFreedInBlockAfter( + operand.get().getDefiningOp(), operand.get(), + defUseSameBlock ? ancestor : nullptr)) + return potentialDeleters; + } + + // Check if the value may be freed by opeations preceding the ancestor in + // its block. Skip the check for partial blocks that contain both the + // definition and the use point, as this has been already checked above. + if (!isFromBlockPartial || ancestor->getBlock() != defBlock) { + if (PotentialDeleters potentialDeleters = + isFreedInBlockBefore(ancestor, operand.get())) + return potentialDeleters; + } + + // Check if the value may be freed by operations in any of the blocks + // between the definition point (in the outermost region) or the entry + // block of the region (in other regions) and the operand or its ancestor + // in the region. This includes the entire "form" block if (1) the block + // has not been considered as partial above and (2) the block can be + // reached again through some control-flow loop. This includes the entire + // "to" block if it can be reached form itself through some control-flow + // cycle, regardless of whether it has been visited before. + Block *ancestorBlock = ancestor->getBlock(); + Block *from = + isOutermost ? defBlock : &ancestorBlock->getParent()->front(); + if (PotentialDeleters potentialDeleters = + isMaybeFreedOnPaths(from, ancestorBlock, operand.get(), + /*alwaysIncludeFrom=*/!isFromBlockPartial)) + return potentialDeleters; + } + return live(); + } + +private: + /// Make PotentialDeleters constructors available with shorter names. + static PotentialDeleters maybeFreed(ArrayRef deleters) { + return PotentialDeleters::maybeFreed(deleters); + } + static PotentialDeleters live() { return PotentialDeleters::live(); } + + /// Returns the list of operations that may be deleting the given value betwen + /// the first and last operations, non-inclusive. `getNext` indicates the + /// direction of the traversal. + PotentialDeleters + isFreedBetween(Value value, Operation *first, Operation *last, + llvm::function_ref getNext) const { + auto it = freedBy.find(value); + if (it == freedBy.end()) + return live(); + const llvm::SmallPtrSet &deleters = it->getSecond(); + for (Operation *op = getNext(first); op != last; op = getNext(op)) { + if (deleters.contains(op)) + return maybeFreed(op); + } + return live(); + } + + /// Returns the list of operations that may be deleting the given value + /// between `root` and `before` values. `root` is expected to be in the same + /// block as `before` and precede it. If `before` is null, consider all + /// operations until the end of the block including the terminator. + PotentialDeleters isFreedInBlockAfter(Operation *root, Value value, + Operation *before = nullptr) const { + return isFreedBetween(value, root, before, + [](Operation *op) { return op->getNextNode(); }); + } + + /// Returns the list of operations that may be deleting the given value + /// between the entry of the block and the `root` operation. + PotentialDeleters isFreedInBlockBefore(Operation *root, Value value) const { + return isFreedBetween(value, root, nullptr, + [](Operation *op) { return op->getPrevNode(); }); + } + + /// Returns the list of operations that may be deleting the given value on + /// any of the control flow paths between the "form" and the "to" block. The + /// operations from any block visited on any control flow path are + /// consdiered. The "from" block is considered if there is a control flow + /// cycle going through it, i.e., if there is a possibility that all + /// operations in this block are visited or if the `alwaysIncludeFrom` flag is + /// set. The "to" block is considered only if there is a control flow cycle + /// going through it. + PotentialDeleters isMaybeFreedOnPaths(Block *from, Block *to, Value value, + bool alwaysIncludeFrom) { + // Find all blocks that lie on any path between "from" and "to", i.e., the + // intersection of blocks reachable from "from" and blocks from which "to" + // is rechable. + const llvm::SmallPtrSet &sources = getReachableFrom(to); + if (!sources.contains(from)) + return live(); + + llvm::SmallPtrSet reachable(getReachable(from)); + llvm::set_intersect(reachable, sources); + + // If requested, include the "from" block that may not be present in the set + // of visited blocks when there is no cycle going through it. + if (alwaysIncludeFrom) + reachable.insert(from); + + // Join potential deleters from all blocks as we don't know here which of + // the paths through the control flow is taken. + PotentialDeleters potentialDeleters = live(); + for (Block *block : reachable) { + for (Operation &op : *block) { + if (freedBy[value].count(&op)) + potentialDeleters |= maybeFreed(&op); + } + } + return potentialDeleters; + } + + /// Popualtes `reachable` with the set of blocks that are rechable from the + /// given block. A block is considered reachable from itself if there is a + /// cycle in the control-flow graph that invovles the block. + const llvm::SmallPtrSet &getReachable(Block *block) { + return getReachableImpl( + block, [](Block *b) { return b->getSuccessors(); }, reachableCache); + } + + /// Populates `sources` with the set of blocks from which the given block is + /// reachable. + const llvm::SmallPtrSet &getReachableFrom(Block *block) { + return getReachableImpl( + block, [](Block *b) { return b->getPredecessors(); }, + reachableFromCache); + } + + /// Returns true of `instances` contains an effect of `EffectTy` on `value`. + template + static bool hasEffect(ArrayRef instances, + Value value) { + return llvm::any_of(instances, + [&](const MemoryEffects::EffectInstance &instance) { + return instance.getValue() == value && + isa(instance.getEffect()); + }); + } + + /// Records the values that are being freed by an operation or any of its + /// children in `freedBy`. + void collectFreedValues(Operation *root) { + SmallVector instances; + root->walk([&](Operation *child) { + // TODO: extend this to conservatively handle operations with undeclared + // side effects as maybe freeing the operands. + auto iface = cast(child); + instances.clear(); + iface.getEffectsOnResource(transform::TransformMappingResource::get(), + instances); + for (Value operand : child->getOperands()) { + if (hasEffect(instances, operand)) { + // All parents of the operation that frees a value should be + // considered as potentially freeing the value as well. + // + // TODO: differentiate between must-free/may-free as well as between + // this op having the effect and children having the effect. This may + // require some analysis of all control flow paths through the nested + // regions as well as a mechanism to separate proper side effects from + // those obtained by nesting. + Operation *parent = child; + do { + freedBy[operand].insert(parent); + if (parent == root) + break; + parent = parent->getParentOp(); + } while (true); + } + } + }); + } + + /// The mapping from a value to operations that have a Free memory effect on + /// the TransformMappingResource and associated with this value, or to + /// Transform operations transitively containing such operations. + DenseMap> freedBy; + + /// Caches for sets of reachable blocks. + DenseMap> reachableCache; + DenseMap> reachableFromCache; +}; + +#define GEN_PASS_CLASSES +#include "mlir/Dialect/Transform/Transforms/Passes.h.inc" + +//// A simple pass that warns about any use of a value by a transform operation +// that may be using the value after it has been freed. +class CheckUsesPass : public CheckUsesBase { +public: + void runOnOperation() override { + auto &analysis = getAnalysis(); + + getOperation()->walk([&](Operation *child) { + for (OpOperand &operand : child->getOpOperands()) { + TransformOpMemFreeAnalysis::PotentialDeleters deleters = + analysis.isUseLive(operand); + if (!deleters) + continue; + + InFlightDiagnostic diag = child->emitWarning() + << "operand #" << operand.getOperandNumber() + << " may be used after free"; + diag.attachNote(operand.get().getLoc()) << "allocated here"; + for (Operation *d : deleters.getOps()) { + diag.attachNote(d->getLoc()) << "freed here"; + } + } + }); + } +}; + +} // namespace + +namespace mlir { +namespace transform { +std::unique_ptr createCheckUsesPass() { + return std::make_unique(); +} +} // namespace transform +} // namespace mlir diff --git a/mlir/test/Dialect/Transform/check-use-after-free.mlir b/mlir/test/Dialect/Transform/check-use-after-free.mlir new file mode 100644 index 00000000000000..6ef865a5bab76c --- /dev/null +++ b/mlir/test/Dialect/Transform/check-use-after-free.mlir @@ -0,0 +1,169 @@ +// RUN: mlir-opt %s --transform-dialect-check-uses --split-input-file --verify-diagnostics + +func.func @use_after_free_branching_control_flow() { + // expected-note @below {{allocated here}} + %0 = transform.test_produce_param_or_forward_operand 42 + transform.test_transform_op_with_regions { + "transform.test_branching_transform_op_terminator"() : () -> () + }, + { + ^bb0: + "transform.test_branching_transform_op_terminator"()[^bb1, ^bb2] : () -> () + ^bb1: + // expected-note @below {{freed here}} + transform.test_consume_operand_if_matches_param_or_fail %0[42] + "transform.test_branching_transform_op_terminator"()[^bb3] : () -> () + ^bb2: + "transform.test_branching_transform_op_terminator"()[^bb3] : () -> () + ^bb3: + // expected-warning @below {{operand #0 may be used after free}} + transform.sequence %0 { + ^bb0(%arg0: !pdl.operation): + } + "transform.test_branching_transform_op_terminator"() : () -> () + } + return +} + +// ----- + +func.func @use_after_free_in_nested_op() { + // expected-note @below {{allocated here}} + %0 = transform.test_produce_param_or_forward_operand 42 + // expected-note @below {{freed here}} + transform.test_transform_op_with_regions { + "transform.test_branching_transform_op_terminator"() : () -> () + }, + { + ^bb0: + "transform.test_branching_transform_op_terminator"()[^bb1, ^bb2] : () -> () + ^bb1: + transform.test_consume_operand_if_matches_param_or_fail %0[42] + "transform.test_branching_transform_op_terminator"()[^bb3] : () -> () + ^bb2: + "transform.test_branching_transform_op_terminator"()[^bb3] : () -> () + ^bb3: + "transform.test_branching_transform_op_terminator"() : () -> () + } + // expected-warning @below {{operand #0 may be used after free}} + transform.sequence %0 { + ^bb0(%arg0: !pdl.operation): + } + return +} + +// ----- + +func.func @use_after_free_recursive_side_effects() { + transform.sequence { + ^bb0(%arg0: !pdl.operation): + // expected-note @below {{allocated here}} + %0 = transform.sequence %arg0 attributes { ord = 1 } { + ^bb1(%arg1: !pdl.operation): + yield %arg1 : !pdl.operation + } : !pdl.operation + transform.sequence %0 attributes { ord = 2 } { + ^bb2(%arg2: !pdl.operation): + } + transform.sequence %0 attributes { ord = 3 } { + ^bb3(%arg3: !pdl.operation): + } + + // `transform.sequence` has recursive side effects so it has the same "free" + // as the child op it contains. + // expected-note @below {{freed here}} + transform.sequence %0 attributes { ord = 4 } { + ^bb4(%arg4: !pdl.operation): + test_consume_operand_if_matches_param_or_fail %0[42] + } + // expected-warning @below {{operand #0 may be used after free}} + transform.sequence %0 attributes { ord = 5 } { + ^bb3(%arg3: !pdl.operation): + } + } + return +} + +// ----- + +func.func @use_after_free() { + transform.sequence { + ^bb0(%arg0: !pdl.operation): + // expected-note @below {{allocated here}} + %0 = transform.sequence %arg0 attributes { ord = 1 } { + ^bb1(%arg1: !pdl.operation): + yield %arg1 : !pdl.operation + } : !pdl.operation + transform.sequence %0 attributes { ord = 2 } { + ^bb2(%arg2: !pdl.operation): + } + transform.sequence %0 attributes { ord = 3 } { + ^bb3(%arg3: !pdl.operation): + } + + // expected-note @below {{freed here}} + test_consume_operand_if_matches_param_or_fail %0[42] + // expected-warning @below {{operand #0 may be used after free}} + transform.sequence %0 attributes { ord = 5 } { + ^bb3(%arg3: !pdl.operation): + } + } + return +} + +// ----- + +// In the case of a control flow cycle, the operation that uses the value may +// precede the one that frees it in the same block. Both operations should +// be reported as use-after-free. +func.func @use_after_free_self_cycle() { + // expected-note @below {{allocated here}} + %0 = transform.test_produce_param_or_forward_operand 42 + transform.test_transform_op_with_regions { + "transform.test_branching_transform_op_terminator"() : () -> () + }, + { + ^bb0: + "transform.test_branching_transform_op_terminator"()[^bb1] : () -> () + ^bb1: + // expected-warning @below {{operand #0 may be used after free}} + transform.sequence %0 { + ^bb0(%arg0: !pdl.operation): + } + // expected-warning @below {{operand #0 may be used after free}} + // expected-note @below {{freed here}} + transform.test_consume_operand_if_matches_param_or_fail %0[42] + "transform.test_branching_transform_op_terminator"()[^bb1, ^bb2] : () -> () + ^bb2: + "transform.test_branching_transform_op_terminator"() : () -> () + } + return +} + + +// ----- + +// Check that the "free" that happens in a cycle is also reported as potential +// use-after-free. +func.func @use_after_free_cycle() { + // expected-note @below {{allocated here}} + %0 = transform.test_produce_param_or_forward_operand 42 + transform.test_transform_op_with_regions { + "transform.test_branching_transform_op_terminator"() : () -> () + }, + { + ^bb0: + "transform.test_branching_transform_op_terminator"()[^bb1, ^bb2] : () -> () + ^bb1: + // expected-warning @below {{operand #0 may be used after free}} + // expected-note @below {{freed here}} + transform.test_consume_operand_if_matches_param_or_fail %0[42] + "transform.test_branching_transform_op_terminator"()[^bb2, ^bb3] : () -> () + ^bb2: + "transform.test_branching_transform_op_terminator"()[^bb1] : () -> () + ^bb3: + "transform.test_branching_transform_op_terminator"() : () -> () + } + return +} + diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp index c9687fa2d5bd68..39a3afce95fc7d 100644 --- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp +++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp @@ -184,6 +184,21 @@ LogicalResult mlir::test::TestRemoveTestExtensionOp::apply( state.removeExtension(); return success(); } +LogicalResult mlir::test::TestTransformOpWithRegions::apply( + transform::TransformResults &results, transform::TransformState &state) { + return success(); +} + +void mlir::test::TestTransformOpWithRegions::getEffects( + SmallVectorImpl &effects) {} + +LogicalResult mlir::test::TestBranchingTransformOpTerminator::apply( + transform::TransformResults &results, transform::TransformState &state) { + return success(); +} + +void mlir::test::TestBranchingTransformOpTerminator::getEffects( + SmallVectorImpl &effects) {} namespace { /// Test extension of the Transform dialect. Registers additional ops and diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td index d33f7907f27f2f..8623b8a18aaa80 100644 --- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td +++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td @@ -92,5 +92,21 @@ def TestRemoveTestExtensionOp let cppNamespace = "::mlir::test"; } +def TestTransformOpWithRegions + : Op, + DeclareOpInterfaceMethods]> { + let regions = (region AnyRegion:$first, AnyRegion:$second); + let assemblyFormat = "attr-dict-with-keyword regions"; + let cppNamespace = "::mlir::test"; +} + +def TestBranchingTransformOpTerminator + : Op, + DeclareOpInterfaceMethods]> { + let successors = (successor VariadicSuccessor:$succ); + let cppNamespace = "::mlir::test"; +} #endif // MLIR_TESTTRANSFORMDIALECTEXTENSION_TD diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 00fec62b2ac6c5..61171f32174be0 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -6266,6 +6266,7 @@ cc_library( ":TosaDialect", ":TosaToLinalg", ":TransformDialect", + ":TransformDialectTransforms", ":Transforms", ":TransformsPassIncGen", ":VectorOps", @@ -7876,6 +7877,7 @@ td_library( name = "TransformDialectTdFiles", srcs = glob(["include/mlir/Dialect/Transform/IR/*.td"]), deps = [ + ":ControlFlowInterfacesTdFiles", ":OpBaseTdFiles", ":PDLDialectTdFiles", ":SideEffectInterfacesTdFiles", @@ -7949,6 +7951,7 @@ cc_library( srcs = glob(["lib/Dialect/Transform/IR/*.cpp"]), hdrs = glob(["include/mlir/Dialect/Transform/IR/*.h"]), deps = [ + ":ControlFlowInterfaces", ":IR", ":PDLDialect", ":PDLInterpDialect", @@ -7962,6 +7965,47 @@ cc_library( ], ) +td_library( + name = "TransformDialectTransformsTdFiles", + srcs = glob(["include/mlir/Dialect/Transform/Transforms/*.td"]), + deps = [ + ":PassBaseTdFiles", + ":TransformDialectTdFiles", + ], +) + +gentbl_cc_library( + name = "TransformDialectTransformsIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + [ + "-gen-pass-decls", + "-name=Transform", + ], + "include/mlir/Dialect/Transform/Transforms/Passes.h.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Transform/Transforms/Passes.td", + deps = [":TransformDialectTransformsTdFiles"], +) + +cc_library( + name = "TransformDialectTransforms", + srcs = glob(["lib/Dialect/Transform/Transforms/*.cpp"]), + hdrs = glob(["include/mlir/Dialect/Transform/Transforms/*.h"]), + deps = [ + ":Analysis", + ":IR", + ":Pass", + ":SideEffectInterfaces", + ":TransformDialect", + ":TransformDialectTransformsIncGen", + "//llvm:Support", + ], +) + td_library( name = "ComplexOpsTdFiles", srcs = [ From ad1d60c3befd606d6864b367f939238e50fb0f7e Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Thu, 26 May 2022 09:48:01 +0100 Subject: [PATCH 567/908] [FileCheck] Catch missspelled directives. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D125604 --- clang/test/CodeGenCXX/attr-mustprogress.cpp | 2 +- .../CodeGenCXX/eh-aggregate-copy-destroy.cpp | 2 +- .../CodeGenCXX/inheriting-constructor.cpp | 2 +- clang/test/CodeGenObjC/non-runtime-protocol.m | 2 +- .../master_taskloop_private_codegen.cpp | 2 +- .../master_taskloop_simd_private_codegen.cpp | 2 +- ...rallel_master_taskloop_private_codegen.cpp | 2 +- ...l_master_taskloop_simd_private_codegen.cpp | 2 +- clang/test/OpenMP/task_private_codegen.cpp | 2 +- .../taskgroup_task_reduction_codegen.cpp | 4 +- .../test/OpenMP/taskloop_private_codegen.cpp | 2 +- .../OpenMP/taskloop_simd_private_codegen.cpp | 2 +- flang/test/Fir/convert-to-llvm.fir | 4 +- flang/test/Lower/Intrinsics/not.f90 | 3 +- llvm/include/llvm/FileCheck/FileCheck.h | 1 + llvm/lib/FileCheck/FileCheck.cpp | 28 ++++++++- .../Analysis/MemorySSA/phi-translation.ll | 2 +- .../Analysis/RegionInfo/infinite_loop_4.ll | 2 +- .../AMDGPU/divergence-driven-bfe-isel.ll | 4 +- llvm/test/CodeGen/AMDGPU/hoist-cond.ll | 2 +- .../CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll | 2 +- llvm/test/CodeGen/AMDGPU/mode-register.mir | 2 +- llvm/test/CodeGen/AMDGPU/smrd.ll | 12 ++-- llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll | 2 +- llvm/test/CodeGen/AVR/atomics/fence.ll | 2 +- .../BPF/CORE/offset-reloc-middle-chain.ll | 24 +++---- llvm/test/CodeGen/WebAssembly/libcalls.ll | 3 +- llvm/test/DebugInfo/NVPTX/debug-info.ll | 2 +- llvm/test/FileCheck/missspelled-directive.txt | 28 +++++++++ llvm/test/MC/AMDGPU/data.s | 2 +- llvm/test/MC/AsmParser/directive_file-g.s | 2 +- .../MC/PowerPC/ppc64-reloc-directive-pcrel.s | 16 ++--- llvm/test/MC/WebAssembly/unnamed-data.ll | 2 +- .../test/Transforms/Inline/inline-strictfp.ll | 14 ++--- .../LoopVectorize/X86/gather-vs-interleave.ll | 2 +- llvm/test/Transforms/MergeFunc/alias.ll | 2 +- llvm/test/Transforms/PGOProfile/PR41279.ll | 4 +- .../test/Transforms/PGOProfile/memop_clone.ll | 2 +- .../PGOProfile/memop_size_from_strlen.ll | 2 +- llvm/test/tools/llvm-dwp/X86/tu_units_v5.s | 2 +- .../tools/llvm-dwp/X86/type_dedup_v5.test | 2 +- .../llvm-objdump/MachO/disassemble-all.test | 62 +++++++++---------- .../COFF/unwind-arm64-windows.test | 2 +- mlir/test/Dialect/Affine/loop-coalescing.mlir | 2 +- .../fuse-with-reshape-by-collapsing.mlir | 4 +- .../Dialect/Linalg/tile-and-fuse-no-fuse.mlir | 2 +- mlir/test/Dialect/MemRef/canonicalize.mlir | 4 +- mlir/test/Dialect/SPIRV/IR/memory-ops.mlir | 6 +- .../vector-transfer-full-partial-split.mlir | 4 +- mlir/test/IR/dynamic.mlir | 4 +- mlir/test/mlir-tblgen/op-decl-and-defs.td | 14 ++--- polly/test/ScopDetect/dot-scops-npm.ll | 58 ++++++++--------- 52 files changed, 208 insertions(+), 155 deletions(-) create mode 100644 llvm/test/FileCheck/missspelled-directive.txt diff --git a/clang/test/CodeGenCXX/attr-mustprogress.cpp b/clang/test/CodeGenCXX/attr-mustprogress.cpp index 592ebd537cc731..4dac0a4b93b0d0 100644 --- a/clang/test/CodeGenCXX/attr-mustprogress.cpp +++ b/clang/test/CodeGenCXX/attr-mustprogress.cpp @@ -125,7 +125,7 @@ void F() { // CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] // CHECK-NEXT: br i1 [[CMP]], label %for.body, label %for.end // CHECK: for.body: -// CXX98_NOT: br {{.*}} !llvm.loop +// CXX98-NOT: br {{.*}} !llvm.loop // CXX11-NEXT: br label %for.cond, !llvm.loop [[LOOP6:!.*]] // FINITE-NEXT: br label %for.cond, !llvm.loop [[LOOP6:!.*]] // CHECK: for.end: diff --git a/clang/test/CodeGenCXX/eh-aggregate-copy-destroy.cpp b/clang/test/CodeGenCXX/eh-aggregate-copy-destroy.cpp index 9f0f36c8756180..37d5b8654eee0a 100644 --- a/clang/test/CodeGenCXX/eh-aggregate-copy-destroy.cpp +++ b/clang/test/CodeGenCXX/eh-aggregate-copy-destroy.cpp @@ -23,7 +23,7 @@ struct Container { int main () { try { Container c1; - // CHECK_LABEL: main + // CHECK-LABEL: main // CHECK-NOT: call void @_ZN9ThrowCopyC1ERKS_ // CHECK: invoke void @_ZN9ThrowCopyC1ERKS_ // CHECK98: invoke void @_ZN12ImplicitCopyD1Ev diff --git a/clang/test/CodeGenCXX/inheriting-constructor.cpp b/clang/test/CodeGenCXX/inheriting-constructor.cpp index dffd13d9bfbcd8..751604fad194cd 100644 --- a/clang/test/CodeGenCXX/inheriting-constructor.cpp +++ b/clang/test/CodeGenCXX/inheriting-constructor.cpp @@ -93,7 +93,7 @@ namespace noninline_virt { C c(1, 2, &c); // Complete object ctor forwards to A ctor, then calls B's base inheriting // constructor, which takes no arguments other than the this pointer and VTT. - // ITANIUM_LABEL: define linkonce_odr void @_ZN14noninline_virt1CCI1NS_1AEEiO1QPvU17pass_object_size0( + // ITANIUM-LABEL: define linkonce_odr void @_ZN14noninline_virt1CCI1NS_1AEEiO1QPvU17pass_object_size0( // ITANIUM: call void @_ZN14noninline_virt1AC2EiO1QPvU17pass_object_size0({{.*}} %{{.*}}, i32 %{{.*}}, %{{.*}}* {{.*}}, i8* %{{.*}}, i{{32|64}} %{{.*}}) // ITANIUM: call void @_ZN14noninline_virt1BCI2NS_1AEEiO1QPvU17pass_object_size0(%{{.*}}* {{[^,]*}} %{{.*}}, i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @_ZTTN14noninline_virt1CE, i64 0, i64 1)) // ITANIUM: store {{.*}} @_ZTVN14noninline_virt1CE diff --git a/clang/test/CodeGenObjC/non-runtime-protocol.m b/clang/test/CodeGenObjC/non-runtime-protocol.m index b29c3d18b93e99..a6d0d5f1b0caae 100644 --- a/clang/test/CodeGenObjC/non-runtime-protocol.m +++ b/clang/test/CodeGenObjC/non-runtime-protocol.m @@ -61,7 +61,7 @@ @implementation Implementer // NONFRAGILE-NOT: _OBJC_CLASS_PROTOCOLS_$_NonRuntimeImplementer // FRAGILE-NOT: OBJC_CLASS_NAME_.{{.*}}"Runtime\00" // FRAGILE-NOT: OBJC_PROTOCOL_NonRuntime -// FRAGILE_NOT: OBJC_PROTOCOLS_NonRuntimeImplementer +// FRAGILE-NOT: OBJC_PROTOCOLS_NonRuntimeImplementer // GNU-NOT: private unnamed_addr constant {{.*}} c"NonRuntimeProtocol\00" // GNU-NOT: @.objc_protocol {{.*}} // GNU2-NOT: private unnamed_addr constant {{.*}} c"NonRuntimeProtocol\00" diff --git a/clang/test/OpenMP/master_taskloop_private_codegen.cpp b/clang/test/OpenMP/master_taskloop_private_codegen.cpp index 8df3db4c06c5a7..5be1eb1a89c677 100644 --- a/clang/test/OpenMP/master_taskloop_private_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_private_codegen.cpp @@ -251,7 +251,7 @@ int main() { // CHECK-DAG: [[PRIV_T_VAR]] // CHECK-DAG: [[PRIV_S_ARR]] // CHECK-DAG: [[PRIV_VEC]] -// CHECK_DAG: [[PRIV_SIVAR]] +// CHECK-DAG: [[PRIV_SIVAR]] // CHECK: ret diff --git a/clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp index bb53cfe3d0cf06..e2d0db1a783a13 100644 --- a/clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp @@ -244,7 +244,7 @@ int main() { // CHECK-DAG: [[PRIV_T_VAR]] // CHECK-DAG: [[PRIV_S_ARR]] // CHECK-DAG: [[PRIV_VEC]] -// CHECK_DAG: [[PRIV_SIVAR]] +// CHECK-DAG: [[PRIV_SIVAR]] // CHECK: ret diff --git a/clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp index a8941a937a16bd..47c2001c4f9b99 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp @@ -243,7 +243,7 @@ int main() { // CHECK-DAG: [[PRIV_T_VAR]] // CHECK-DAG: [[PRIV_S_ARR]] // CHECK-DAG: [[PRIV_VEC]] -// CHECK_DAG: [[PRIV_SIVAR]] +// CHECK-DAG: [[PRIV_SIVAR]] // CHECK: ret diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp index 4700688d8e7f1c..3936b7bec3c0fe 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp @@ -243,7 +243,7 @@ int main() { // CHECK-DAG: [[PRIV_T_VAR]] // CHECK-DAG: [[PRIV_S_ARR]] // CHECK-DAG: [[PRIV_VEC]] -// CHECK_DAG: [[PRIV_SIVAR]] +// CHECK-DAG: [[PRIV_SIVAR]] // CHECK: ret diff --git a/clang/test/OpenMP/task_private_codegen.cpp b/clang/test/OpenMP/task_private_codegen.cpp index c648fc5550411e..5bdf11b9fc3258 100644 --- a/clang/test/OpenMP/task_private_codegen.cpp +++ b/clang/test/OpenMP/task_private_codegen.cpp @@ -244,7 +244,7 @@ int main() { // CHECK-DAG: [[PRIV_T_VAR]] // CHECK-DAG: [[PRIV_S_ARR]] // CHECK-DAG: [[PRIV_VEC]] -// CHECK_DAG: [[PRIV_SIVAR]] +// CHECK-DAG: [[PRIV_SIVAR]] // CHECK: ret diff --git a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp index ca20d5d8ec3218..9db61efed4c4c5 100644 --- a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp @@ -220,7 +220,7 @@ int main(int argc, char **argv) { // CHECK-DAG: call {{.+}}(%struct.S* {{.+}}) // CHECK-DAG: br i1 % // CHECK-DAG: ret void -// CHECK_DAG: } +// CHECK-DAG: } // CHECK-DAG: define internal void @[[VLAINIT]](i8* noalias noundef %{{.+}}, i8* noalias noundef %{{.+}}) // CHECK-DAG: call i32 @__kmpc_global_thread_num(%struct.ident_t* {{[^,]+}}) @@ -240,7 +240,7 @@ int main(int argc, char **argv) { // CHECK-DAG: add nsw i32 % // CHECK-DAG: trunc i32 %{{.+}} to i16 // CHECK-DAG: store i16 % -// CHECK_DAG: br i1 % +// CHECK-DAG: br i1 % // CHECK-DAG: ret void // CHECK-DAG: } #endif diff --git a/clang/test/OpenMP/taskloop_private_codegen.cpp b/clang/test/OpenMP/taskloop_private_codegen.cpp index e9feaedcaf88b8..9589a7bafd1bf0 100644 --- a/clang/test/OpenMP/taskloop_private_codegen.cpp +++ b/clang/test/OpenMP/taskloop_private_codegen.cpp @@ -244,7 +244,7 @@ int main() { // CHECK-DAG: [[PRIV_T_VAR]] // CHECK-DAG: [[PRIV_S_ARR]] // CHECK-DAG: [[PRIV_VEC]] -// CHECK_DAG: [[PRIV_SIVAR]] +// CHECK-DAG: [[PRIV_SIVAR]] // CHECK: ret diff --git a/clang/test/OpenMP/taskloop_simd_private_codegen.cpp b/clang/test/OpenMP/taskloop_simd_private_codegen.cpp index 041da5b073310a..73fdcb8dd2b1e5 100644 --- a/clang/test/OpenMP/taskloop_simd_private_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_private_codegen.cpp @@ -244,7 +244,7 @@ int main() { // CHECK-DAG: [[PRIV_T_VAR]] // CHECK-DAG: [[PRIV_S_ARR]] // CHECK-DAG: [[PRIV_VEC]] -// CHECK_DAG: [[PRIV_SIVAR]] +// CHECK-DAG: [[PRIV_SIVAR]] // CHECK: ret diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 2a1fd185aa9431..fa1b8940d83833 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -774,7 +774,7 @@ func.func @test_constc4() -> !fir.complex<4> { } // CHECK-LABEL: @test_constc4 -// CHECK_SAME: () -> !llvm.struct<(f32, f32)> +// CHECK-SAME: () -> !llvm.struct<(f32, f32)> // CHECK-DAG: [[rp:%.*]] = llvm.mlir.constant(1.400000e+00 : f32) : f32 // CHECK-DAG: [[ip:%.*]] = llvm.mlir.constant(2.300000e+00 : f32) : f32 // CHECK: [[undef:%.*]] = llvm.mlir.undef : !llvm.struct<(f32, f32)> @@ -788,7 +788,7 @@ func.func @test_constc8() -> !fir.complex<8> { } // CHECK-LABEL: @test_constc8 -// CHECK_SAME: () -> !llvm.struct<(f64, f64)> +// CHECK-SAME: () -> !llvm.struct<(f64, f64)> // CHECK-DAG: [[rp:%.*]] = llvm.mlir.constant(1.800000e+00 : f64) : f64 // CHECK-DAG: [[ip:%.*]] = llvm.mlir.constant(2.300000e+00 : f64) : f64 // CHECK: [[undef:%.*]] = llvm.mlir.undef : !llvm.struct<(f64, f64)> diff --git a/flang/test/Lower/Intrinsics/not.f90 b/flang/test/Lower/Intrinsics/not.f90 index e900e211261b1c..1c735af41c31dd 100644 --- a/flang/test/Lower/Intrinsics/not.f90 +++ b/flang/test/Lower/Intrinsics/not.f90 @@ -1,10 +1,9 @@ ! RUN: bbc -emit-fir %s -o - | FileCheck %s -! CHECK-LABEL: not_test subroutine not_test integer :: source integer :: destination - ! CHECK_LABEL: not_test + ! CHECK-LABEL: not_test ! CHECK: %[[dest:.*]] = fir.alloca i32 {bindc_name = "destination", uniq_name = "_QFnot_testEdestination"} ! CHECK: %[[source:.*]] = fir.alloca i32 {bindc_name = "source", uniq_name = "_QFnot_testEsource"} ! CHECK: %[[loaded_source:.*]] = fir.load %[[source]] : !fir.ref diff --git a/llvm/include/llvm/FileCheck/FileCheck.h b/llvm/include/llvm/FileCheck/FileCheck.h index ca75b25daf10ef..d6d8dc531e1001 100644 --- a/llvm/include/llvm/FileCheck/FileCheck.h +++ b/llvm/include/llvm/FileCheck/FileCheck.h @@ -48,6 +48,7 @@ namespace Check { enum FileCheckKind { CheckNone = 0, + CheckMisspelled, CheckPlain, CheckNext, CheckSame, diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp index 6186af444e7368..bf13b6c325ecfb 100644 --- a/llvm/lib/FileCheck/FileCheck.cpp +++ b/llvm/lib/FileCheck/FileCheck.cpp @@ -1651,6 +1651,8 @@ std::string Check::FileCheckType::getDescription(StringRef Prefix) const { switch (Kind) { case Check::CheckNone: return "invalid"; + case Check::CheckMisspelled: + return "misspelled"; case Check::CheckPlain: if (Count > 1) return WithModifiers("-COUNT"); @@ -1680,7 +1682,8 @@ std::string Check::FileCheckType::getDescription(StringRef Prefix) const { } static std::pair -FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { +FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix, + bool &Misspelled) { if (Buffer.size() <= Prefix.size()) return {Check::CheckNone, StringRef()}; @@ -1722,7 +1725,9 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { if (Rest.front() == '{') return ConsumeModifiers(Check::CheckPlain); - if (!Rest.consume_front("-")) + if (Rest.consume_front("_")) + Misspelled = true; + else if (!Rest.consume_front("-")) return {Check::CheckNone, StringRef()}; if (Rest.consume_front("COUNT-")) { @@ -1766,6 +1771,15 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { return {Check::CheckNone, Rest}; } +static std::pair +FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { + bool Misspelled = false; + auto Res = FindCheckType(Req, Buffer, Prefix, Misspelled); + if (Res.first != Check::CheckNone && Misspelled) + return {Check::CheckMisspelled, Res.second}; + return Res; +} + // From the given position, find the next character after the word. static size_t SkipWord(StringRef Str, size_t Loc) { while (Loc < Str.size() && IsPartOfWord(Str[Loc])) @@ -1939,6 +1953,16 @@ bool FileCheck::readCheckFile( Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) : AfterSuffix; + // Complain about misspelled directives. + if (CheckTy == Check::CheckMisspelled) { + StringRef UsedDirective(UsedPrefix.data(), + AfterSuffix.data() - UsedPrefix.data()); + SM.PrintMessage(SMLoc::getFromPointer(UsedDirective.data()), + SourceMgr::DK_Error, + "misspelled directive '" + UsedDirective + "'"); + return true; + } + // Complain about useful-looking but unsupported suffixes. if (CheckTy == Check::CheckBadNot) { SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll index 7a7206a8a9e1fd..e813d31b070f73 100644 --- a/llvm/test/Analysis/MemorySSA/phi-translation.ll +++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll @@ -195,7 +195,7 @@ if.end: define i32 @use_not_optimized_due_to_backedge(i32* nocapture %m_i_strides, i32* nocapture readonly %eval_left_dims) { entry: ; CHECK: 1 = MemoryDef(liveOnEntry) -; CHECK_NEXT: store i32 1, i32* %m_i_strides, align 4 +; CHECK-NEXT: store i32 1, i32* %m_i_strides, align 4 store i32 1, i32* %m_i_strides, align 4 br label %for.body diff --git a/llvm/test/Analysis/RegionInfo/infinite_loop_4.ll b/llvm/test/Analysis/RegionInfo/infinite_loop_4.ll index 611427e561f420..e1aea085a610d5 100644 --- a/llvm/test/Analysis/RegionInfo/infinite_loop_4.ll +++ b/llvm/test/Analysis/RegionInfo/infinite_loop_4.ll @@ -40,7 +40,7 @@ define void @normal_condition() nounwind { ; CHECK-NOT: => ; CHECK: [0] 0 => ; CHECK-NEXT: [1] 2 => 10 -; CHECK_NEXT: [2] 5 => 6 +; CHECK-NEXT: [2] 5 => 6 ; STAT: 3 region - The # of regions ; STAT: 1 region - The # of simple regions diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll index ead58214bdf991..e40fdb7ba471be 100644 --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; GCN_LABEL: @bfe_uniform +; GCN-LABEL: @bfe_uniform ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x40010 define amdgpu_kernel void @bfe_uniform(i32 %val, i32 addrspace(1)* %out) { %hibits = lshr i32 %val, 16 @@ -9,7 +9,7 @@ define amdgpu_kernel void @bfe_uniform(i32 %val, i32 addrspace(1)* %out) { ret void } -; GCN_LABEL: @bfe_divergent +; GCN-LABEL: @bfe_divergent ; GCN: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 4 define amdgpu_kernel void @bfe_divergent(i32 %val, i32 addrspace(1)* %out) { %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll index c7da460cfd2639..cc734e0453ba37 100644 --- a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll @@ -7,7 +7,7 @@ ; CHECK: v_cmp_{{..}}_u32_e{{32|64}} [[COND:s\[[0-9]+:[0-9]+\]|vcc]] ; CHECK: BB0_1: ; CHECK-NOT: v_cmp -; CHECK_NOT: v_cndmask +; CHECK-NOT: v_cndmask ; CHECK: s_and_saveexec_b64 s[{{[0-9]+:[0-9]+}}], [[COND]] ; CHECK: ; %bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll index 34f9cd9df0b538..6c8e4a68e292eb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll @@ -37,7 +37,7 @@ define amdgpu_ps float @test2() #0 { ; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec ; CHECK-DAG: s_wqm_b64 exec, exec ; CHECK-DAG: s_xor_b64 [[HELPER:s\[[0-9]+:[0-9]+\]]], [[LIVE]], -1 -; CHECK_DAG: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[HELPER]] +; CHECK-DAG: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[HELPER]] ; CHECK: ; %dead define amdgpu_ps float @test3(i32 %in) #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/mode-register.mir b/llvm/test/CodeGen/AMDGPU/mode-register.mir index 382050bc3888f4..04b7f3cd773412 100644 --- a/llvm/test/CodeGen/AMDGPU/mode-register.mir +++ b/llvm/test/CodeGen/AMDGPU/mode-register.mir @@ -278,7 +278,7 @@ body: | # check that mode is propagated back to start of loop and through a block that # neither sets or uses the mode. # CHECK-LABEL: name: loop_indirect -# CHECK_NOT: S_SETREG_IMM32_B32 +# CHECK-NOT: S_SETREG_IMM32_B32 # CHECK-LABEL: bb.3: # CHECK: S_SETREG_IMM32_B32 3, 2177 # CHECK: V_INTERP_P1LL_F16 diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll index fba92c69be724f..0782b2f4ba35c6 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -396,12 +396,12 @@ main_body: ; ; GCN: s_buffer_load_dwordx2 ; SICIVI: s_mov_b32 m0 -; SICIVI_DAG: v_interp_p1_f32 -; SICIVI_DAG: v_interp_p1_f32 -; SICIVI_DAG: v_interp_p1_f32 -; SICIVI_DAG: v_interp_p2_f32 -; SICIVI_DAG: v_interp_p2_f32 -; SICIVI_DAG: v_interp_p2_f32 +; SICIVI-DAG: v_interp_p1_f32 +; SICIVI-DAG: v_interp_p1_f32 +; SICIVI-DAG: v_interp_p1_f32 +; SICIVI-DAG: v_interp_p2_f32 +; SICIVI-DAG: v_interp_p2_f32 +; SICIVI-DAG: v_interp_p2_f32 ; ; extractelement does not result in movrels anymore for vectors gitting 8 dwords ; SICIVI-NOT: s_mov_b32 m0 diff --git a/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll b/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll index cace190d15e6c7..67baec1810f282 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll @@ -4,7 +4,7 @@ @y = global i64 20, align 8 @z = global i64 20, align 8 -; CHECK_LABEL: main: +; CHECK-LABEL: main: ; CHECK: ldr [[R2:r[0-9]+]], [[[R1:r[0-9]+]]] ; CHECK-NEXT: ldr [[R1]], [[[R1]], #4] ; CHECK: mov [[R4:r[0-9]+]], [[R1]] diff --git a/llvm/test/CodeGen/AVR/atomics/fence.ll b/llvm/test/CodeGen/AVR/atomics/fence.ll index b4cd215f3a26af..4a0701bcff9bcb 100644 --- a/llvm/test/CodeGen/AVR/atomics/fence.ll +++ b/llvm/test/CodeGen/AVR/atomics/fence.ll @@ -3,7 +3,7 @@ ; Checks that atomic fences are simply removed from IR. ; AVR is always singlethreaded so fences do nothing. -; CHECK_LABEL: atomic_fence8 +; CHECK-LABEL: atomic_fence8 ; CHECK: ; %bb.0: ; CHECK-NEXT: ret define void @atomic_fence8() { diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-middle-chain.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-middle-chain.ll index fa2fbeb60d7369..c7471958a903c0 100644 --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-middle-chain.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-middle-chain.ll @@ -56,18 +56,18 @@ entry: ; CHECK: .long 16 # FieldReloc ; CHECK-NEXT: .long 29 # Field reloc section string offset=29 ; CHECK-NEXT: .long 3 -; CHECK_NEXT: .long .Ltmp{{[0-9]+}} -; CHECK_NEXT: .long 2 -; CHECK_NEXT: .long 72 -; CHECK_NEXT: .long 0 -; CHECK_NEXT: .long .Ltmp{{[0-9]+}} -; CHECK_NEXT: .long 2 -; CHECK_NEXT: .long 76 -; CHECK_NEXT: .long 0 -; CHECK_NEXT: .long .Ltmp{{[0-9]+}} -; CHECK_NEXT: .long 2 -; CHECK_NEXT: .long 82 -; CHECK_NEXT: .long 0 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 72 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 76 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 82 +; CHECK-NEXT: .long 0 ; Function Attrs: nounwind readnone declare %struct.s1* @llvm.preserve.struct.access.index.p0s_struct.s1s.p0s_struct.r1s(%struct.r1*, i32, i32) #1 diff --git a/llvm/test/CodeGen/WebAssembly/libcalls.ll b/llvm/test/CodeGen/WebAssembly/libcalls.ll index 336368be15e693..efa4041c891420 100644 --- a/llvm/test/CodeGen/WebAssembly/libcalls.ll +++ b/llvm/test/CodeGen/WebAssembly/libcalls.ll @@ -49,7 +49,8 @@ define fp128 @fp128libcalls(fp128 %x, fp128 %y, i32 %z) { ; CHECK-LABEL: i128libcalls: define i128 @i128libcalls(i128 %x, i128 %y) { ; Basic ops should be expanded - ; CHECK_NOT: call + ; CHECK: .local + ; CHECK-NOT: call %a = add i128 %x, %y ; CHECK: call __multi3 %b = mul i128 %a, %y diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 73842031fc585b..825435014383f8 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -101,7 +101,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-DAG: .file {{[0-9]+}} "{{.*}}/usr/include{{/|\\\\}}stdlib-bsearch.h" ; CHECK-DAG: .file {{[0-9]+}} "{{.*}}clang/include{{/|\\\\}}stddef.h" ; CHECK-DAG: .file {{[0-9]+}} "{{.*}}/usr/local/cuda/include{{/|\\\\}}math_functions.hpp" -; CHECK_DAG: .file {{[0-9]+}} "{{.*}}clang/include{{/|\\\\}}__clang_cuda_cmath.h" +; CHECK-DAG: .file {{[0-9]+}} "{{.*}}clang/include{{/|\\\\}}__clang_cuda_cmath.h" ; CHECK-DAG: .file {{[0-9]+}} "{{.*}}/usr/local/cuda/include{{/|\\\\}}device_functions.hpp" ; CHECK-DAG: .file [[DEBUG_INFO_CU]] "{{.*}}debug-info.cu" ; CHECK-DAG: .file [[BUILTUIN_VARS_H]] "{{.*}}clang/include{{/|\\\\}}__clang_cuda_builtin_vars.h" diff --git a/llvm/test/FileCheck/missspelled-directive.txt b/llvm/test/FileCheck/missspelled-directive.txt new file mode 100644 index 00000000000000..bb6255823941e0 --- /dev/null +++ b/llvm/test/FileCheck/missspelled-directive.txt @@ -0,0 +1,28 @@ +RUN: not FileCheck --check-prefix=P1 %s 2>&1 | FileCheck --check-prefix=CHECK1 %s +RUN: not FileCheck --check-prefix=P2 %s 2>&1 | FileCheck --check-prefix=CHECK2 %s +RUN: not FileCheck --check-prefix=P3 %s 2>&1 | FileCheck --check-prefix=CHECK3 %s +RUN: not FileCheck --check-prefix=P4 %s 2>&1 | FileCheck --check-prefix=CHECK4 %s +RUN: not FileCheck --check-prefix=P5 %s 2>&1 | FileCheck --check-prefix=CHECK5 %s +RUN: not FileCheck --check-prefix=P6 %s 2>&1 | FileCheck --check-prefix=CHECK6 %s +RUN: not FileCheck --check-prefix=P7 %s 2>&1 | FileCheck --check-prefix=CHECK7 %s + +P1_LABEL: foo +CHECK1: error: misspelled directive 'P1_LABEL:' + +P2_NEXT: foo +CHECK2: error: misspelled directive 'P2_NEXT:' + +P3_NOT: foo +CHECK3: error: misspelled directive 'P3_NOT:' + +P4_COUNT-2: foo +CHECK4: error: misspelled directive 'P4_COUNT-2:' + +P5_SAME: foo +CHECK5: error: misspelled directive 'P5_SAME:' + +P6_EMPTY: foo +CHECK6: error: misspelled directive 'P6_EMPTY:' + +P7_DAG: foo +CHECK7: error: misspelled directive 'P7_DAG:' diff --git a/llvm/test/MC/AMDGPU/data.s b/llvm/test/MC/AMDGPU/data.s index e799553dd68967..3bc273c080e964 100644 --- a/llvm/test/MC/AMDGPU/data.s +++ b/llvm/test/MC/AMDGPU/data.s @@ -16,7 +16,7 @@ // CHECK: v_mov_b32 // CHECK: v_mov_b32 // CHECK: .long 0xabadc0de -// CHECK_SAME: : ABADC0DE +// CHECK-SAME: : ABADC0DE // CHECK: s_endpgm // CHECK: .long 0xabadc0d1 // CHECK: .long 0xabadc0d2 diff --git a/llvm/test/MC/AsmParser/directive_file-g.s b/llvm/test/MC/AsmParser/directive_file-g.s index 3b769f12626a4c..fd270c8765e037 100644 --- a/llvm/test/MC/AsmParser/directive_file-g.s +++ b/llvm/test/MC/AsmParser/directive_file-g.s @@ -16,7 +16,7 @@ foo: ## gcc does generate a DW_TAG_compile_unit in this case, with or without ## -g on the command line, but we do not. # CHECK-EMPTY: -# CHECK_NEXT: .debug_line +# CHECK-NEXT: .debug_line # CHECK: file_names[ 1]: # CHECK-NEXT: name: "a.c" # CHECK-NEXT: dir_index: 0 diff --git a/llvm/test/MC/PowerPC/ppc64-reloc-directive-pcrel.s b/llvm/test/MC/PowerPC/ppc64-reloc-directive-pcrel.s index c17445f0a50221..a31ea568fd8d57 100644 --- a/llvm/test/MC/PowerPC/ppc64-reloc-directive-pcrel.s +++ b/llvm/test/MC/PowerPC/ppc64-reloc-directive-pcrel.s @@ -42,7 +42,7 @@ SingleInsnBetween: blr .long 0 .quad 0 -# CHECK_LABEL: SingleInsnBetween +# CHECK-LABEL: SingleInsnBetween # CHECK: pld 3, 0(0), 1 # CHECK-NEXT: R_PPC64_GOT_PCREL34 vec # CHECK-NEXT: R_PPC64_PCREL_OPT *ABS*+0xc @@ -68,7 +68,7 @@ MultiInsnBetween: blr .long 0 .quad 0 -# CHECK_LABEL: MultiInsnBetween +# CHECK-LABEL: MultiInsnBetween # CHECK: pld 3, 0(0), 1 # CHECK-NEXT: R_PPC64_GOT_PCREL34 vec # CHECK-NEXT: R_PPC64_PCREL_OPT *ABS*+0x1c @@ -98,7 +98,7 @@ PrefixInsnBetween: blr .long 0 .quad 0 -# CHECK_LABEL: PrefixInsnBetween +# CHECK-LABEL: PrefixInsnBetween # CHECK: pld 3, 0(0), 1 # CHECK-NEXT: R_PPC64_GOT_PCREL34 vec # CHECK-NEXT: R_PPC64_PCREL_OPT *ABS*+0x28 @@ -130,7 +130,7 @@ SpaceBetween: blr .long 0 .quad 0 -# CHECK_LABEL: SpaceBetween +# CHECK-LABEL: SpaceBetween # CHECK: pld 3, 0(0), 1 # CHECK-NEXT: R_PPC64_GOT_PCREL34 vec # CHECK-NEXT: R_PPC64_PCREL_OPT *ABS*+0x50 @@ -202,7 +202,7 @@ VarLabelSingleInsnBetween: blr .long 0 .quad 0 -# CHECK_LABEL: VarLabelSingleInsnBetween +# CHECK-LABEL: VarLabelSingleInsnBetween # CHECK: pld 3, 0(0), 1 # CHECK-NEXT: R_PPC64_GOT_PCREL34 vec # CHECK-NEXT: R_PPC64_PCREL_OPT *ABS*+0xc @@ -227,7 +227,7 @@ VarLabelMultiInsnBetween: blr .long 0 .quad 0 -# CHECK_LABEL: VarLabelMultiInsnBetween +# CHECK-LABEL: VarLabelMultiInsnBetween # CHECK: pld 3, 0(0), 1 # CHECK-NEXT: R_PPC64_GOT_PCREL34 vec # CHECK-NEXT: R_PPC64_PCREL_OPT *ABS*+0x1c @@ -257,7 +257,7 @@ VarLabelPrefixInsnBetween: blr .long 0 .quad 0 -# CHECK_LABEL: VarLabelPrefixInsnBetween +# CHECK-LABEL: VarLabelPrefixInsnBetween # CHECK: pld 3, 0(0), 1 # CHECK-NEXT: R_PPC64_GOT_PCREL34 vec # CHECK-NEXT: R_PPC64_PCREL_OPT *ABS*+0x24 @@ -288,7 +288,7 @@ VarLabelSpaceBetween: blr .long 0 .quad 0 -# CHECK_LABEL: VarLabelSpaceBetween +# CHECK-LABEL: VarLabelSpaceBetween # CHECK: pld 3, 0(0), 1 # CHECK-NEXT: R_PPC64_GOT_PCREL34 vec # CHECK-NEXT: R_PPC64_PCREL_OPT *ABS*+0x4c diff --git a/llvm/test/MC/WebAssembly/unnamed-data.ll b/llvm/test/MC/WebAssembly/unnamed-data.ll index f1766d14f8480b..1fe6df2f77afc6 100644 --- a/llvm/test/MC/WebAssembly/unnamed-data.ll +++ b/llvm/test/MC/WebAssembly/unnamed-data.ll @@ -87,4 +87,4 @@ target triple = "wasm32-unknown-unknown" ; CHECK-NEXT: Name: .data.b ; CHECK-NEXT: Alignment: 3 ; CHECK-NEXT: Flags: [ ] -; CHECK_NEXT: ... +; CHECK-NEXT: ... diff --git a/llvm/test/Transforms/Inline/inline-strictfp.ll b/llvm/test/Transforms/Inline/inline-strictfp.ll index 3028409fc167ea..f6566fc54abc21 100644 --- a/llvm/test/Transforms/Inline/inline-strictfp.ll +++ b/llvm/test/Transforms/Inline/inline-strictfp.ll @@ -14,7 +14,7 @@ entry: %0 = call float @inlined_01(float %a) #0 %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %add -; CHECK_LABEL: @host_02 +; CHECK-LABEL: @host_02 ; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 ; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 } @@ -33,7 +33,7 @@ entry: %0 = call float @inlined_03(float %a) #0 %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %add -; CHECK_LABEL: @host_04 +; CHECK-LABEL: @host_04 ; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.downward", metadata !"fpexcept.maytrap") #0 ; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 } @@ -52,7 +52,7 @@ entry: %0 = call float @inlined_05(float %a) %add = fadd float %0, 2.000000e+00 ret float %add -; CHECK_LABEL: @host_06 +; CHECK-LABEL: @host_06 ; CHECK: call float @inlined_05(float %a) ; CHECK: fadd float %0, 2.000000e+00 } @@ -75,7 +75,7 @@ entry: %0 = call float @inlined_07(float %a) #0 %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %add -; CHECK_LABEL: @host_08 +; CHECK-LABEL: @host_08 ; CHECK: call float @func_ext(float {{.*}}) #0 ; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 ; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -96,7 +96,7 @@ entry: %0 = call double @inlined_09(float %a) #0 %add = call double @llvm.experimental.constrained.fadd.f64(double %0, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret double %add -; CHECK_LABEL: @host_10 +; CHECK-LABEL: @host_10 ; CHECK: call double @llvm.experimental.constrained.fpext.f64.f32(float {{.*}}, metadata !"fpexcept.ignore") #0 ; CHECK: call double @llvm.experimental.constrained.fadd.f64(double {{.*}}, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 } @@ -113,7 +113,7 @@ entry: %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 %cmp = call i1 @inlined_11(float %a, float %b) #0 ret i1 %cmp -; CHECK_LABEL: @host_12 +; CHECK-LABEL: @host_12 ; CHECK: call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ; CHECK: call i1 @llvm.experimental.constrained.fcmp.f32(float {{.*}}, metadata !"oeq", metadata !"fpexcept.ignore") #0 } @@ -130,7 +130,7 @@ entry: %0 = call float @inlined_13(float %a) #0 %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %add -; CHECK_LABEL: @host_14 +; CHECK-LABEL: @host_14 ; CHECK: call float @llvm.experimental.constrained.ceil.f32(float %a, metadata !"fpexcept.ignore") #0 ; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 } diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll b/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll index f38782cb7e597a..e27702e0fcac10 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll @@ -19,7 +19,7 @@ target triple = "x86_64-unknown-linux-gnu" @B = global [10240 x i64] zeroinitializer, align 16 -; CHECK_LABEL: strided_load_i64 +; CHECK-LABEL: strided_load_i64 ; CHECK: masked.gather define void @strided_load_i64() { br label %1 diff --git a/llvm/test/Transforms/MergeFunc/alias.ll b/llvm/test/Transforms/MergeFunc/alias.ll index ee1c7af5e817f9..1de6ede7a355b5 100644 --- a/llvm/test/Transforms/MergeFunc/alias.ll +++ b/llvm/test/Transforms/MergeFunc/alias.ll @@ -14,7 +14,7 @@ ; A strong backing function had to be created for the weak-weak pair ; CHECK: define private void @0(i32* %a) unnamed_addr -; CHECK_NEXT: call void @dummy4() +; CHECK-NEXT: call void @dummy4() ; These internal functions are dropped in favor of the external ones diff --git a/llvm/test/Transforms/PGOProfile/PR41279.ll b/llvm/test/Transforms/PGOProfile/PR41279.ll index 777dd8ffc53f48..ff762d26e12896 100644 --- a/llvm/test/Transforms/PGOProfile/PR41279.ll +++ b/llvm/test/Transforms/PGOProfile/PR41279.ll @@ -77,6 +77,6 @@ define void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFram ; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}} ; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}} ; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 8} -; USE_DAG: ![[BW_ENTRY1]] = !{!"branch_weights", i32 5, i32 3} -; USE_DAG: ![[BW_ENTRY2]] = !{!"branch_weights", i32 2, i32 1} +; USE-DAG: ![[BW_ENTRY1]] = !{!"branch_weights", i32 5, i32 3} +; USE-DAG: ![[BW_ENTRY2]] = !{!"branch_weights", i32 2, i32 1} diff --git a/llvm/test/Transforms/PGOProfile/memop_clone.ll b/llvm/test/Transforms/PGOProfile/memop_clone.ll index 0381b8f47dbae2..3aa6f56a0817c1 100644 --- a/llvm/test/Transforms/PGOProfile/memop_clone.ll +++ b/llvm/test/Transforms/PGOProfile/memop_clone.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -S | FileCheck %s define i32 @test(i8* %a, i8* %b) !prof !1 { -; CHECK_LABEL: test +; CHECK-LABEL: test ; CHECK: MemOP.Case.3: ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %a, i32 3, i1 false) ; CHECK: MemOP.Case.2: diff --git a/llvm/test/Transforms/PGOProfile/memop_size_from_strlen.ll b/llvm/test/Transforms/PGOProfile/memop_size_from_strlen.ll index ce053faff58fe6..3dd28d2f804792 100644 --- a/llvm/test/Transforms/PGOProfile/memop_size_from_strlen.ll +++ b/llvm/test/Transforms/PGOProfile/memop_size_from_strlen.ll @@ -3,7 +3,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) declare i32 @strlen(i8* nocapture) -; CHECK_LABEL: test +; CHECK-LABEL: test ; CHECK: %1 = zext i32 %c to i64 ; CHECK: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_test, i32 0, i32 0), i64 {{[0-9]+}}, i64 %1, i32 1, i32 0) diff --git a/llvm/test/tools/llvm-dwp/X86/tu_units_v5.s b/llvm/test/tools/llvm-dwp/X86/tu_units_v5.s index 161e20f19f64d7..643628182acbaf 100644 --- a/llvm/test/tools/llvm-dwp/X86/tu_units_v5.s +++ b/llvm/test/tools/llvm-dwp/X86/tu_units_v5.s @@ -11,7 +11,7 @@ # CHECK-DAG: .debug_info.dwo contents: # CHECK: 0x00000000: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID1:.*]], type_offset = 0x0019 (next unit at 0x0000001b) # CHECK: 0x0000001b: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID2:.*]], type_offset = 0x0019 (next unit at 0x00000036) -# CHECK_DAG: .debug_tu_index contents: +# CHECK-DAG: .debug_tu_index contents: # CHECK: version = 5, units = 2, slots = 4 # CHECK: Index Signature INFO ABBREV # CHECK: 1 [[TUID1]] [0x00000000, 0x0000001b) [0x00000000, 0x00000010) diff --git a/llvm/test/tools/llvm-dwp/X86/type_dedup_v5.test b/llvm/test/tools/llvm-dwp/X86/type_dedup_v5.test index c72c6d2b764f5a..b43a75bcd0df32 100644 --- a/llvm/test/tools/llvm-dwp/X86/type_dedup_v5.test +++ b/llvm/test/tools/llvm-dwp/X86/type_dedup_v5.test @@ -7,5 +7,5 @@ # RUN: llvm-dwp %t-a.dwo %t-b.dwo -o %t.dwp # RUN: llvm-dwarfdump -debug-tu-index %t.dwp | FileCheck %s -# CHECK_DAG: .debug_tu_index contents: +# CHECK-DAG: .debug_tu_index contents: # CHECK: version = 5, units = 1, slots = 2 diff --git a/llvm/test/tools/llvm-objdump/MachO/disassemble-all.test b/llvm/test/tools/llvm-objdump/MachO/disassemble-all.test index 05d4a5d9ca9e68..f3770bf6ed7262 100644 --- a/llvm/test/tools/llvm-objdump/MachO/disassemble-all.test +++ b/llvm/test/tools/llvm-objdump/MachO/disassemble-all.test @@ -1,39 +1,39 @@ // RUN: llvm-objdump --macho -d --full-leading-addr --print-imm-hex --no-show-raw-insn %p/Inputs/macho-multiple-text | FileCheck %s --check-prefix=TEXT TEXT: (__TEXT,__text) section -TEXT_NEXT: _main: -TEXT_NEXT: 0000000100000f60 pushq %rbp -TEXT_NEXT: 0000000100000f61 movq %rsp, %rbp -TEXT_NEXT: 0000000100000f64 subq $0x10, %rsp -TEXT_NEXT: 0000000100000f68 movl $0x0, -0x4(%rbp) -TEXT_NEXT: 0000000100000f6f callq _hello -TEXT_NEXT: 0000000100000f74 xorl %eax, %eax -TEXT_NEXT: 0000000100000f76 addq $0x10, %rsp -TEXT_NEXT: 0000000100000f7a popq %rbp -TEXT_NEXT: 0000000100000f7b retq +TEXT-NEXT: _main: +TEXT-NEXT: 0000000100000f60 pushq %rbp +TEXT-NEXT: 0000000100000f61 movq %rsp, %rbp +TEXT-NEXT: 0000000100000f64 subq $0x10, %rsp +TEXT-NEXT: 0000000100000f68 movl $0x0, -0x4(%rbp) +TEXT-NEXT: 0000000100000f6f callq _hello +TEXT-NEXT: 0000000100000f74 xorl %eax, %eax +TEXT-NEXT: 0000000100000f76 addq $0x10, %rsp +TEXT-NEXT: 0000000100000f7a popq %rbp +TEXT-NEXT: 0000000100000f7b retq // RUN: llvm-objdump --macho -D --full-leading-addr --print-imm-hex --no-show-raw-insn %p/Inputs/macho-multiple-text | FileCheck %s --check-prefix=ALL ALL: (__TEXT,__text) section -ALL_NEXT: _main: -ALL_NEXT: 0000000100000f60 pushq %rbp -ALL_NEXT: 0000000100000f61 movq %rsp, %rbp -ALL_NEXT: 0000000100000f64 subq $0x10, %rsp -ALL_NEXT: 0000000100000f68 movl $0x0, -0x4(%rbp) -ALL_NEXT: 0000000100000f6f callq _hello -ALL_NEXT: 0000000100000f74 xorl %eax, %eax -ALL_NEXT: 0000000100000f76 addq $0x10, %rsp -ALL_NEXT: 0000000100000f7a popq %rbp -ALL_NEXT: 0000000100000f7b retq +ALL-NEXT: _main: +ALL-NEXT: 0000000100000f60 pushq %rbp +ALL-NEXT: 0000000100000f61 movq %rsp, %rbp +ALL-NEXT: 0000000100000f64 subq $0x10, %rsp +ALL-NEXT: 0000000100000f68 movl $0x0, -0x4(%rbp) +ALL-NEXT: 0000000100000f6f callq _hello +ALL-NEXT: 0000000100000f74 xorl %eax, %eax +ALL-NEXT: 0000000100000f76 addq $0x10, %rsp +ALL-NEXT: 0000000100000f7a popq %rbp +ALL-NEXT: 0000000100000f7b retq ALL: (__TEXT_EXEC,__text) section -ALL_NEXT: _hello: -ALL_NEXT: 0000000100001000 pushq %rbp -ALL_NEXT: 0000000100001001 movq %rsp, %rbp -ALL_NEXT: 0000000100001004 subq $0x10, %rsp -ALL_NEXT: 0000000100001008 leaq -0x71(%rip), %rdi ## literal pool for: "hello, world!\n" -ALL_NEXT: 000000010000100f movb $0x0, %al -ALL_NEXT: 0000000100001011 callq 0x100000f7c ## symbol stub for: _printf -ALL_NEXT: 0000000100001016 movl %eax, -0x4(%rbp) -ALL_NEXT: 0000000100001019 addq $0x10, %rsp -ALL_NEXT: 000000010000101d popq %rbp -ALL_NEXT: 000000010000101e retq +ALL-NEXT: _hello: +ALL-NEXT: 0000000100001000 pushq %rbp +ALL-NEXT: 0000000100001001 movq %rsp, %rbp +ALL-NEXT: 0000000100001004 subq $0x10, %rsp +ALL-NEXT: 0000000100001008 leaq -0x71(%rip), %rdi ## literal pool for: "hello, world!\n" +ALL-NEXT: 000000010000100f movb $0x0, %al +ALL-NEXT: 0000000100001011 callq 0x100000f7c ## symbol stub for: _printf +ALL-NEXT: 0000000100001016 movl %eax, -0x4(%rbp) +ALL-NEXT: 0000000100001019 addq $0x10, %rsp +ALL-NEXT: 000000010000101d popq %rbp +ALL-NEXT: 000000010000101e retq diff --git a/llvm/test/tools/llvm-readobj/COFF/unwind-arm64-windows.test b/llvm/test/tools/llvm-readobj/COFF/unwind-arm64-windows.test index 17d05163346c6a..f118816506ee37 100644 --- a/llvm/test/tools/llvm-readobj/COFF/unwind-arm64-windows.test +++ b/llvm/test/tools/llvm-readobj/COFF/unwind-arm64-windows.test @@ -27,7 +27,7 @@ UNWIND1-NEXT: 0xd2c2 ; ldr x30, [sp, #16] UNWIND1-NEXT: 0x28 ; ldp x19, x20, [sp], #64 UNWIND1-NEXT: 0xe4 ; end UNWIND1-NEXT: ] -UNWIND1_NEXT: } +UNWIND1-NEXT: } UNWIND2: ExceptionData { diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir index 1f191a14ed25df..664dfa39cb5e0e 100644 --- a/mlir/test/Dialect/Affine/loop-coalescing.mlir +++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir @@ -182,7 +182,7 @@ func.func @two_bands() { // The inner pair of loops is coalesced separately. // CHECK: scf.for scf.for %k = %i to %j step %c1 { - // CHECK_NOT: scf.for + // CHECK-NOT: scf.for scf.for %l = %i to %j step %c1 { "foo"() : () -> () } diff --git a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir index a526bf7ded8d3c..fd0bd88276365b 100644 --- a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir +++ b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir @@ -39,7 +39,7 @@ func.func @fuse_by_collapsing(%arg0 : tensor<2x12x5x336x9xi32>, // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] : -// CHECK_SAME: outs(%[[INIT_RESHAPE]] : +// CHECK-SAME: outs(%[[INIT_RESHAPE]] : // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2], [3], [4, 5, 6], [7]{{\]}} // CHECK: return %[[RESULT_RESHAPE]] @@ -153,7 +153,7 @@ func.func @fuse_by_collapsing_change_reshape_order(%arg0 : tensor<9x56x2x60x6xi3 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] : -// CHECK_SAME: outs(%[[INIT_RESHAPE]] : +// CHECK-SAME: outs(%[[INIT_RESHAPE]] : // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2, 3], [4], [5, 6], [7]{{\]}} // CHECK: return %[[RESULT_RESHAPE]] diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir index 7509fdb866e435..463d205ec2a149 100644 --- a/mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir @@ -23,7 +23,7 @@ func.func @tile_sizes_zero(%arg0 : tensor, %arg1 : tensor) -> // ----- -// UNARY_LABEL: @shape_only( +// UNARY-LABEL: @shape_only( func.func @shape_only(%arg0 : tensor, %arg1 : tensor) -> tensor { %cst = arith.constant 0.0 : f32 diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir index 8ebd9f39ccbd6d..138a0f4474084a 100644 --- a/mlir/test/Dialect/MemRef/canonicalize.mlir +++ b/mlir/test/Dialect/MemRef/canonicalize.mlir @@ -429,7 +429,7 @@ func.func @collapse_after_memref_cast_type_change(%arg0 : memref) // CHECK-LABEL: func @collapse_after_memref_cast( // CHECK-SAME: %[[INPUT:.*]]: memref) -> memref { // CHECK: %[[COLLAPSED:.*]] = memref.collapse_shape %[[INPUT]] -// CHECK_SAME: {{\[\[}}0], [1, 2, 3]] : memref into memref +// CHECK-SAME: {{\[\[}}0], [1, 2, 3]] : memref into memref // CHECK: return %[[COLLAPSED]] : memref func.func @collapse_after_memref_cast(%arg0 : memref) -> memref { %dynamic = memref.cast %arg0: memref to memref @@ -442,7 +442,7 @@ func.func @collapse_after_memref_cast(%arg0 : memref) -> memref) -> memref { // CHECK: %[[COLLAPSED:.*]] = memref.collapse_shape %[[INPUT]] -// CHECK_SAME: {{\[\[}}0, 1, 2], [3]] : memref<1x1x1x?xi64> into memref<1x?xi64> +// CHECK-SAME: {{\[\[}}0, 1, 2], [3]] : memref<1x1x1x?xi64> into memref<1x?xi64> // CHECK: %[[DYNAMIC:.*]] = memref.cast %[[COLLAPSED]] : // CHECK-SAME: memref<1x?xi64> to memref // CHECK: return %[[DYNAMIC]] : memref diff --git a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir index 0ff8235e52fdc8..b1a152f1837bc4 100644 --- a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir @@ -342,7 +342,7 @@ func.func @aligned_load_incorrect_attributes() -> () { spv.module Logical GLSL450 { spv.GlobalVariable @var0 : !spv.ptr spv.GlobalVariable @var1 : !spv.ptr>, UniformConstant> - // CHECK_LABEL: @simple_load + // CHECK-LABEL: @simple_load spv.func @simple_load() -> () "None" { // CHECK: spv.Load "Input" {{%.*}} : f32 %0 = spv.mlir.addressof @var0 : !spv.ptr @@ -367,7 +367,7 @@ func.func @simple_store(%arg0 : f32) -> () { return } -// CHECK_LABEL: @volatile_store +// CHECK-LABEL: @volatile_store func.func @volatile_store(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr // CHECK: spv.Store "Function" %0, %arg0 ["Volatile"] : f32 @@ -375,7 +375,7 @@ func.func @volatile_store(%arg0 : f32) -> () { return } -// CHECK_LABEL: @aligned_store +// CHECK-LABEL: @aligned_store func.func @aligned_store(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr // CHECK: spv.Store "Function" %0, %arg0 ["Aligned", 4] : f32 diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir index b4abb17717f3aa..34f7f7095d4d8c 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -54,7 +54,7 @@ func.func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: ind // CHECK-SAME: memref, index, index // CHECK: } // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst - // CHECK_SAME: {in_bounds = [true, true]} : memref, vector<4x8xf32> + // CHECK-SAME: {in_bounds = [true, true]} : memref, vector<4x8xf32> // LINALG-DAG: %[[c0:.*]] = arith.constant 0 : index // LINALG-DAG: %[[c4:.*]] = arith.constant 4 : index @@ -89,7 +89,7 @@ func.func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: ind // LINALG-SAME: memref, index, index // LINALG: } // LINALG: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst - // LINALG_SAME: {in_bounds = [true, true]} : memref, vector<4x8xf32> + // LINALG-SAME: {in_bounds = [true, true]} : memref, vector<4x8xf32> %1 = vector.transfer_read %A[%i, %j], %f0 : memref, vector<4x8xf32> // LINALG: return %[[res]] : vector<4x8xf32> diff --git a/mlir/test/IR/dynamic.mlir b/mlir/test/IR/dynamic.mlir index 992cc588d4707d..677fd9894dc85a 100644 --- a/mlir/test/IR/dynamic.mlir +++ b/mlir/test/IR/dynamic.mlir @@ -11,7 +11,7 @@ func.func @succeededDynamicTypeVerifier() { "unregistered_op"() : () -> !test.dynamic_singleton // CHECK-NEXT: "unregistered_op"() : () -> !test.dynamic_pair "unregistered_op"() : () -> !test.dynamic_pair - // CHECK_NEXT: %{{.*}} = "unregistered_op"() : () -> !test.dynamic_pair, !test.dynamic_singleton> + // CHECK-NEXT: %{{.*}} = "unregistered_op"() : () -> !test.dynamic_pair, !test.dynamic_singleton> "unregistered_op"() : () -> !test.dynamic_pair, !test.dynamic_singleton> return } @@ -53,7 +53,7 @@ func.func @succeededDynamicAttributeVerifier() { "unregistered_op"() {test_attr = #test.dynamic_singleton} : () -> () // CHECK-NEXT: "unregistered_op"() {test_attr = #test.dynamic_pair<3 : i32, 5 : i32>} : () -> () "unregistered_op"() {test_attr = #test.dynamic_pair<3 : i32, 5 : i32>} : () -> () - // CHECK_NEXT: "unregistered_op"() {test_attr = #test.dynamic_pair<3 : i32, 5 : i32>} : () -> () + // CHECK-NEXT: "unregistered_op"() {test_attr = #test.dynamic_pair<#test.dynamic_pair<3 : i32, 5 : i32>, f64>} : () -> () "unregistered_op"() {test_attr = #test.dynamic_pair<#test.dynamic_pair<3 : i32, 5 : i32>, f64>} : () -> () return } diff --git a/mlir/test/mlir-tblgen/op-decl-and-defs.td b/mlir/test/mlir-tblgen/op-decl-and-defs.td index 1883b16f6fbaf3..cab95c3294673b 100644 --- a/mlir/test/mlir-tblgen/op-decl-and-defs.td +++ b/mlir/test/mlir-tblgen/op-decl-and-defs.td @@ -199,7 +199,7 @@ def NS_HCollectiveParamsOp : NS_Op<"op_collective_params", []> { let results = (outs AnyType:$b); } -// CHECK_LABEL: class HCollectiveParamsOp : +// CHECK-LABEL: class HCollectiveParamsOp : // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type b, ::mlir::Value a); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::Value a); // CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {}) @@ -212,7 +212,7 @@ def NS_HCollectiveParamsSuppress0Op : NS_Op<"op_collective_suppress0", []> { let results = (outs Variadic:$b); } -// CHECK_LABEL: class HCollectiveParamsSuppress0Op : +// CHECK-LABEL: class HCollectiveParamsSuppress0Op : // CHECK-NOT: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::TypeRange b, ::mlir::ValueRange a); // CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {}); @@ -224,7 +224,7 @@ def NS_HCollectiveParamsSuppress1Op : NS_Op<"op_collective_suppress1", []> { let results = (outs I32:$b); } -// CHECK_LABEL: class HCollectiveParamsSuppress1Op : +// CHECK-LABEL: class HCollectiveParamsSuppress1Op : // CHECK-NOT: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::TypeRange b, ::mlir::ValueRange a); // CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {}); @@ -237,7 +237,7 @@ def NS_HCollectiveParamsSuppress2Op : NS_Op<"op_collective_suppress2", [SameVari let arguments = (ins Variadic:$a); let results = (outs Variadic:$b, Variadic:$c); } -// CHECK_LABEL: class HCollectiveParamsSuppress2Op : +// CHECK-LABEL: class HCollectiveParamsSuppress2Op : // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::TypeRange b, ::mlir::TypeRange c, ::mlir::ValueRange a); // CHECK-NOT: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::TypeRange b, ::mlir::ValueRange a); // CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {}); @@ -247,7 +247,7 @@ def NS_IOp : NS_Op<"op_with_same_operands_and_result_types_trait", [SameOperands let arguments = (ins AnyType:$a, AnyType:$b); let results = (outs AnyType:$r); } -// CHECK_LABEL: class IOp : +// CHECK-LABEL: class IOp : // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type r, ::mlir::Value a, ::mlir::Value b); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Value a, ::mlir::Value b); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::Value a, ::mlir::Value b); @@ -259,7 +259,7 @@ def NS_JOp : NS_Op<"op_with_InferTypeOpInterface_interface", [DeclareOpInterface let arguments = (ins AnyType:$a, AnyType:$b); let results = (outs AnyType:$r); } -// CHECK_LABEL: class JOp : +// CHECK-LABEL: class JOp : // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type r, ::mlir::Value a, ::mlir::Value b); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Value a, ::mlir::Value b); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::Value a, ::mlir::Value b); @@ -292,7 +292,7 @@ def NS_LOp : NS_Op<"op_with_same_operands_and_result_types_unwrapped_attr", [Sam let arguments = (ins AnyType:$a, AnyType:$b, I32Attr:$attr1); let results = (outs AnyType:$r); } -// CHECK_LABEL: class LOp : +// CHECK-LABEL: class LOp : // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type r, ::mlir::Value a, ::mlir::Value b, ::mlir::IntegerAttr attr1); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Value a, ::mlir::Value b, ::mlir::IntegerAttr attr1); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::TypeRange resultTypes, ::mlir::Value a, ::mlir::Value b, ::mlir::IntegerAttr attr1); diff --git a/polly/test/ScopDetect/dot-scops-npm.ll b/polly/test/ScopDetect/dot-scops-npm.ll index 16211f5b564238..3911d0fb97cff5 100644 --- a/polly/test/ScopDetect/dot-scops-npm.ll +++ b/polly/test/ScopDetect/dot-scops-npm.ll @@ -30,35 +30,35 @@ define void @func(i32 %n, i32 %m, double* noalias nonnull %A) { ; CHECK-NEXT: Node0x[[OUTER_EXIT]] -> Node0x[[RETURN_ID:.*]]; ; CHECK-NEXT: Node0x[[RETURN_ID]] [shape=record,label="{return: ; CHECK-NEXT: colorscheme = "paired12" -; CHECK_NEXT: subgraph cluster_0x[[:.*]] { -; CHECK_NEXT: label = ""; -; CHECK_NEXT: style = solid; -; CHECK_NEXT: color = 1 -; CHECK_NEXT: subgraph cluster_0x[[:.*]] { -; CHECK_NEXT: label = ""; -; CHECK_NEXT: style = filled; -; CHECK_NEXT: color = 3 subgraph cluster_0x7152c40 { -; CHECK_NEXT: label = ""; -; CHECK_NEXT: style = solid; -; CHECK_NEXT: color = 5 -; CHECK_NEXT: subgraph cluster_0x[[:.*]] { -; CHECK_NEXT: label = ""; -; CHECK_NEXT: style = solid; -; CHECK_NEXT: color = 7 -; CHECK_NEXT: Node0x[[INNER_FOR_ID]]; -; CHECK_NEXT: Node0x[[BABY1_ID]]; -; CHECK_NEXT: Node0x[[INNER_INC_ID]]; -; CHECK_NEXT: } -; CHECK_NEXT: Node0x[[OUTER_FOR_ID]]; -; CHECK_NEXT: Node0x[[INNER_EXIT_ID]]; -; CHECK_NEXT: Node0x[[OUTER_INC_ID]]; -; CHECK_NEXT: } -; CHECK_NEXT: Node0x[[OUTER_EXIT]]; -; CHECK_NEXT: } -; CHECK_NEXT: Node0x[[EntryID]]; -; CHECK_NEXT: Node0x[[RETURN_ID]]; -; CHECK_NEXT: } -; CHECK_NEXT: } +; CHECK-NEXT: subgraph cluster_0x{{.*}} { +; CHECK-NEXT: label = ""; +; CHECK-NEXT: style = solid; +; CHECK-NEXT: color = 1 +; CHECK-NEXT: subgraph cluster_0x{{.*}} { +; CHECK-NEXT: label = ""; +; CHECK-NEXT: style = filled; +; CHECK-NEXT: color = 3 subgraph cluster_0x{{.*}} { +; CHECK-NEXT: label = ""; +; CHECK-NEXT: style = solid; +; CHECK-NEXT: color = 5 +; CHECK-NEXT: subgraph cluster_0x{{.*}} { +; CHECK-NEXT: label = ""; +; CHECK-NEXT: style = solid; +; CHECK-NEXT: color = 7 +; CHECK-NEXT: Node0x[[INNER_FOR_ID]]; +; CHECK-NEXT: Node0x[[BABY1_ID]]; +; CHECK-NEXT: Node0x[[INNER_INC_ID]]; +; CHECK-NEXT: } +; CHECK-NEXT: Node0x[[OUTER_FOR_ID]]; +; CHECK-NEXT: Node0x[[INNER_EXIT_ID]]; +; CHECK-NEXT: Node0x[[OUTER_INC_ID]]; +; CHECK-NEXT: } +; CHECK-NEXT: Node0x[[OUTER_EXIT]]; +; CHECK-NEXT: } +; CHECK-NEXT: Node0x[[EntryID]]; +; CHECK-NEXT: Node0x[[RETURN_ID]]; +; CHECK-NEXT: } +; CHECK-NEXT: } entry: br label %outer.for From d79275238f9fb11fac31d42a846fe80fca2306d9 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Tue, 19 Apr 2022 03:40:17 -0400 Subject: [PATCH 568/908] [MachineSink] replace MachineLoop with MachineCycle reapply 62a9b36fcf728b104ea87e6eb84c0be69b779df7 and fix module build failue: 1: remove MachineCycleInfoWrapperPass in MachinePassRegistry.def MachineCycleInfoWrapperPass is a anylysis pass, should not be there. 2: move the definition for MachineCycleInfoPrinterPass to cpp file. Otherwise, there are module conflicit for MachineCycleInfoWrapperPass in MachinePassRegistry.def and MachineCycleAnalysis.h after 62a9b36fcf728b104ea87e6eb84c0be69b779df7. MachineCycle can handle irreducible loop. Natural loop analysis (MachineLoop) can not return correct loop depth if the loop is irreducible loop. And MachineSink is sensitive to the loop depth, see MachineSinking::isProfitableToSinkTo(). This patch tries to use MachineCycle so that we can handle irreducible loop better. Reviewed By: sameerds, MatzeB Differential Revision: https://reviews.llvm.org/D123995 --- llvm/include/llvm/ADT/GenericCycleImpl.h | 50 +++++ llvm/include/llvm/ADT/GenericCycleInfo.h | 15 ++ .../llvm/CodeGen/MachineCycleAnalysis.h | 25 +++ .../llvm/CodeGen/MachinePassRegistry.def | 1 - llvm/include/llvm/CodeGen/MachineSSAContext.h | 2 + llvm/lib/CodeGen/MachineCycleAnalysis.cpp | 110 ++++++---- llvm/lib/CodeGen/MachineSink.cpp | 190 +++++++++--------- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + llvm/test/CodeGen/AArch64/loop-sink-limit.mir | 4 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 4 + llvm/test/CodeGen/ARM/O3-pipeline.ll | 1 + llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + llvm/test/CodeGen/X86/opt-pipeline.ll | 1 + llvm/test/CodeGen/X86/pr38795.ll | 93 ++++----- llvm/test/CodeGen/X86/switch-phi-const.ll | 4 +- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 36 ++-- 17 files changed, 340 insertions(+), 199 deletions(-) diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h index 17f3a793c985b2..ea2847f8c8ee62 100644 --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -66,6 +66,44 @@ void GenericCycle::getExitBlocks( } } +template +auto GenericCycle::getCyclePreheader() const -> BlockT * { + BlockT *Predecessor = getCyclePredecessor(); + if (!Predecessor) + return nullptr; + + assert(isReducible() && "Cycle Predecessor must be in a reducible cycle!"); + + if (succ_size(Predecessor) != 1) + return nullptr; + + // Make sure we are allowed to hoist instructions into the predecessor. + if (!Predecessor->isLegalToHoistInto()) + return nullptr; + + return Predecessor; +} + +template +auto GenericCycle::getCyclePredecessor() const -> BlockT * { + if (!isReducible()) + return nullptr; + + BlockT *Out = nullptr; + + // Loop over the predecessors of the header node... + BlockT *Header = getHeader(); + for (const auto Pred : predecessors(Header)) { + if (!contains(Pred)) { + if (Out && Out != Pred) + return nullptr; + Out = Pred; + } + } + + return Out; +} + /// \brief Helper class for computing cycle information. template class GenericCycleInfoCompute { using BlockT = typename ContextT::BlockT; @@ -326,6 +364,18 @@ auto GenericCycleInfo::getCycle(const BlockT *Block) const return nullptr; } +/// \brief get the depth for the cycle which containing a given block. +/// +/// \returns the depth for the innermost cycle containing \p Block or 0 if it is +/// not contained in any cycle. +template +unsigned GenericCycleInfo::getCycleDepth(const BlockT *Block) const { + CycleT *Cycle = getCycle(Block); + if (!Cycle) + return 0; + return Cycle->getDepth(); +} + #ifndef NDEBUG /// \brief Validate the internal consistency of the cycle tree. /// diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h index 5c2190411c1e04..970664b8571544 100644 --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -100,6 +100,10 @@ template class GenericCycle { BlockT *getHeader() const { return Entries[0]; } + const SmallVectorImpl & getEntries() const { + return Entries; + } + /// \brief Return whether \p Block is an entry block of the cycle. bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); } @@ -124,6 +128,16 @@ template class GenericCycle { /// branched to. void getExitBlocks(SmallVectorImpl &TmpStorage) const; + /// Return the preheader block for this cycle. Pre-header is well-defined for + /// reducible cycle in docs/LoopTerminology.rst as: the only one entering + /// block and its only edge is to the entry block. Return null for irreducible + /// cycles. + BlockT *getCyclePreheader() const; + + /// If the cycle has exactly one entry with exactly one predecessor, return + /// it, otherwise return nullptr. + BlockT *getCyclePredecessor() const; + /// Iteration over child cycles. //@{ using const_child_iterator_base = @@ -239,6 +253,7 @@ template class GenericCycleInfo { const ContextT &getSSAContext() const { return Context; } CycleT *getCycle(const BlockT *Block) const; + unsigned getCycleDepth(const BlockT *Block) const; CycleT *getTopLevelParentCycle(const BlockT *Block) const; /// Move \p Child to \p NewParent by manipulating Children vectors. diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h index 54d9860e54c9a5..3f89f2076d5037 100644 --- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -16,6 +16,8 @@ #include "llvm/ADT/GenericCycleInfo.h" #include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" namespace llvm { @@ -25,6 +27,29 @@ extern template class GenericCycle; using MachineCycleInfo = GenericCycleInfo; using MachineCycle = MachineCycleInfo::CycleT; +/// Legacy analysis pass which computes a \ref MachineCycleInfo. +class MachineCycleInfoWrapperPass : public MachineFunctionPass { + MachineFunction *F = nullptr; + MachineCycleInfo CI; + +public: + static char ID; + + MachineCycleInfoWrapperPass(); + + MachineCycleInfo &getCycleInfo() { return CI; } + const MachineCycleInfo &getCycleInfo() const { return CI; } + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; +}; + +// TODO: add this function to GenericCycle template after implementing IR +// version. +bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I); + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def index 5b98254cacc99f..7748055f5d35a4 100644 --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -199,6 +199,5 @@ DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ()) DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ()) DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ()) DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-cycles", MachineCycleInfoWrapperPass, ()) DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass, ()) #undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h index c945b8fefc80d3..f59d7cf8a52295 100644 --- a/llvm/include/llvm/CodeGen/MachineSSAContext.h +++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -28,6 +28,8 @@ template class DominatorTreeBase; inline auto successors(MachineBasicBlock *BB) { return BB->successors(); } inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); } +inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); } +inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); } template <> class GenericSSAContext { const MachineRegisterInfo *RegInfo = nullptr; diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp index 42a5e2b7af01ba..6871ac35b300f0 100644 --- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -8,50 +8,15 @@ #include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/ADT/GenericCycleImpl.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineSSAContext.h" -#include "llvm/InitializePasses.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" using namespace llvm; template class llvm::GenericCycleInfo; template class llvm::GenericCycle; -namespace { - -/// Legacy analysis pass which computes a \ref MachineCycleInfo. -class MachineCycleInfoWrapperPass : public MachineFunctionPass { - MachineFunction *F = nullptr; - MachineCycleInfo CI; - -public: - static char ID; - - MachineCycleInfoWrapperPass(); - - MachineCycleInfo &getCycleInfo() { return CI; } - const MachineCycleInfo &getCycleInfo() const { return CI; } - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; - void print(raw_ostream &OS, const Module *M = nullptr) const override; - - // TODO: verify analysis -}; - -class MachineCycleInfoPrinterPass : public MachineFunctionPass { -public: - static char ID; - - MachineCycleInfoPrinterPass(); - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; -}; - -} // namespace - char MachineCycleInfoWrapperPass::ID = 0; MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass() @@ -87,6 +52,16 @@ void MachineCycleInfoWrapperPass::releaseMemory() { F = nullptr; } +class MachineCycleInfoPrinterPass : public MachineFunctionPass { +public: + static char ID; + + MachineCycleInfoPrinterPass(); + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + char MachineCycleInfoPrinterPass::ID = 0; MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass() @@ -111,3 +86,62 @@ bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) { CI.print(errs()); return false; } + +bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) { + MachineFunction *MF = I.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + const TargetInstrInfo *TII = ST.getInstrInfo(); + + // The instruction is cycle invariant if all of its operands are. + for (const MachineOperand &MO : I.operands()) { + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + // An instruction that uses or defines a physical register can't e.g. be + // hoisted, so mark this as not invariant. + if (Register::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && + !TII->isIgnorableUse(MO)) + return false; + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead can't be moved. + return false; + } else if (any_of(Cycle->getEntries(), + [&](const MachineBasicBlock *Block) { + return Block->isLiveIn(Reg); + })) { + // If the reg is live into any header of the cycle we can't hoist an + // instruction which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!"); + + // If the cycle contains the definition of an operand, then the instruction + // isn't cycle invariant. + if (Cycle->contains(MRI->getVRegDef(Reg)->getParent())) + return false; + } + + // If we got this far, the instruction is cycle invariant! + return true; +} diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 966b012725d81f..797433e7a2809c 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -95,18 +96,18 @@ static cl::opt SinkLoadBlocksThreshold( cl::init(20), cl::Hidden); static cl::opt -SinkInstsIntoLoop("sink-insts-to-avoid-spills", - cl::desc("Sink instructions into loops to avoid " - "register spills"), - cl::init(false), cl::Hidden); - -static cl::opt SinkIntoLoopLimit( - "machine-sink-loop-limit", - cl::desc("The maximum number of instructions considered for loop sinking."), + SinkInstsIntoCycle("sink-insts-to-avoid-spills", + cl::desc("Sink instructions into cycles to avoid " + "register spills"), + cl::init(false), cl::Hidden); + +static cl::opt SinkIntoCycleLimit( + "machine-sink-cycle-limit", + cl::desc("The maximum number of instructions considered for cycle sinking."), cl::init(50), cl::Hidden); STATISTIC(NumSunk, "Number of machine instructions sunk"); -STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); +STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); @@ -119,7 +120,7 @@ namespace { MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree - MachineLoopInfo *LI; + MachineCycleInfo *CI; MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; @@ -180,8 +181,9 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); @@ -232,9 +234,9 @@ namespace { MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, - SmallVectorImpl &Candidates); - bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, + SmallVectorImpl &Candidates); + bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -261,7 +263,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) @@ -378,26 +380,27 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { return false; } -void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, +void MachineSinking::FindCycleSinkCandidates( + MachineCycle *Cycle, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); if (!TII->shouldSink(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " "target\n"); continue; } - if (!L->isLoopInvariant(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); + if (!isCycleInvariant(Cycle, MI)) { + LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n"); continue; } bool DontMoveAcrossStore = true; if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n"); continue; } if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n"); continue; } if (MI.isConvergent()) @@ -409,7 +412,7 @@ void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *B if (!MRI->hasOneDef(MO.getReg())) continue; - LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); } } @@ -425,22 +428,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DT = &getAnalysis(); PDT = &getAnalysis(); - LI = &getAnalysis(); + CI = &getAnalysis().getCycleInfo(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); - // MachineSink currently uses MachineLoopInfo, which only recognizes natural - // loops. As such, we could sink instructions into irreducible cycles, which - // would be non-profitable. - // WARNING: The current implementation of hasStoreBetween() is incorrect for - // sinking into irreducible cycles (PR53990), this bailout is currently - // necessary for correctness, not just profitability. - ReversePostOrderTraversal RPOT(&*MF.begin()); - if (containsIrreducibleCFG(RPOT, *LI)) - return false; - bool EverMadeChange = false; while (true) { @@ -473,32 +466,33 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { EverMadeChange = true; } - if (SinkInstsIntoLoop) { - SmallVector Loops(LI->begin(), LI->end()); - for (auto *L : Loops) { - MachineBasicBlock *Preheader = LI->findLoopPreheader(L); + if (SinkInstsIntoCycle) { + SmallVector Cycles(CI->toplevel_begin(), + CI->toplevel_end()); + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); if (!Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); continue; } SmallVector Candidates; - FindLoopSinkCandidates(L, Preheader, Candidates); + FindCycleSinkCandidates(Cycle, Preheader, Candidates); // Walk the candidates in reverse order so that we start with the use // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; for (MachineInstr *I : llvm::reverse(Candidates)) { - if (i++ == SinkIntoLoopLimit) { - LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " + if (i++ == SinkIntoCycleLimit) { + LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " "be analysed."); break; } - if (!SinkIntoLoop(L, *I)) + if (!SinkIntoCycle(Cycle, *I)) break; EverMadeChange = true; - ++NumLoopSunk; + ++NumCycleSunk; } } } @@ -520,12 +514,12 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an - // unreachable loop there may be nowhere to stop. + // unreachable cycle there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; - // Cache all successors, sorted by frequency info and loop depth. + // Cache all successors, sorted by frequency info and cycle depth. AllSuccsCache AllSuccessors; // Walk the basic block bottom-up. Remember if we saw a store. @@ -644,13 +638,16 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) return false; - // Avoid breaking back edge. From == To means backedge for single BB loop. + // Avoid breaking back edge. From == To means backedge for single BB cycle. if (!SplitEdges || FromBB == ToBB) return false; - // Check for backedges of more "complex" loops. - if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && - LI->isLoopHeader(ToBB)) + MachineCycle *FromCycle = CI->getCycle(FromBB); + MachineCycle *ToCycle = CI->getCycle(ToBB); + + // Check for backedges of more "complex" cycles. + if (FromCycle == ToCycle && FromCycle && + (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB)) return false; // It's not always legal to break critical edges and sink the computation @@ -753,9 +750,9 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, if (!PDT->dominates(SuccToSinkTo, MBB)) return true; - // It is profitable to sink an instruction from a deeper loop to a shallower - // loop, even if the latter post-dominates the former (PR21115). - if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) + // It is profitable to sink an instruction from a deeper cycle to a shallower + // cycle, even if the latter post-dominates the former (PR21115). + if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo)) return true; // Check if only use in post dominated block is PHI instruction. @@ -776,11 +773,11 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - MachineLoop *ML = LI->getLoopFor(MBB); + MachineCycle *MCycle = CI->getCycle(MBB); - // If the instruction is not inside a loop, it is not profitable to sink MI to + // If the instruction is not inside a cycle, it is not profitable to sink MI to // a post dominate block SuccToSinkTo. - if (!ML) + if (!MCycle) return false; auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { @@ -798,7 +795,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; }; - // If this instruction is inside a loop and sinking this instruction can make + // If this instruction is inside a Cycle and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. @@ -826,14 +823,15 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; } else { MachineInstr *DefMI = MRI->getVRegDef(Reg); - // DefMI is defined outside of loop. There should be no live range - // impact for this operand. Defination outside of loop means: - // 1: defination is outside of loop. - // 2: defination is in this loop, but it is a PHI in the loop header. - if (LI->getLoopFor(DefMI->getParent()) != ML || - (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + MachineCycle *Cycle = CI->getCycle(DefMI->getParent()); + // DefMI is defined outside of cycle. There should be no live range + // impact for this operand. Defination outside of cycle means: + // 1: defination is outside of cycle. + // 2: defination is in this cycle, but it is a PHI in the cycle header. + if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() && + Cycle->getHeader() == DefMI->getParent())) continue; - // The DefMI is defined inside the loop. + // The DefMI is defined inside the cycle. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { @@ -843,8 +841,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, } } - // If MI is in loop and all its operands are alive across the whole loop or if - // no operand sinking make register pressure set exceed limit, it is + // If MI is in cycle and all its operands are alive across the whole cycle or + // if no operand sinking make register pressure set exceed limit, it is // profitable to sink MI. return true; } @@ -876,14 +874,14 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccs.push_back(DTChild->getBlock()); } - // Sort Successors according to their loop depth or block frequency info. + // Sort Successors according to their cycle depth or block frequency info. llvm::stable_sort( AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; return HasBlockFreq ? LHSFreq < RHSFreq - : LI->getLoopDepth(L) < LI->getLoopDepth(R); + : CI->getCycleDepth(L) < CI->getCycleDepth(R); }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); @@ -898,7 +896,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) { assert (MBB && "Invalid MachineBasicBlock!"); - // Loop over all the operands of the specified instruction. If there is + // loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. // SuccToSinkTo - This is the successor to sink this instruction to, once we @@ -945,7 +943,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, // Otherwise, we should look at all the successors and decide which one // we should sink to. If we have reliable block frequency information // (frequency != 0) available, give successors with smaller frequencies - // higher priority, otherwise prioritize smaller loop depths. + // higher priority, otherwise prioritize smaller cycle depths. for (MachineBasicBlock *SuccBlock : GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { bool LocalUse = false; @@ -968,7 +966,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, } // It is not possible to sink an instruction into its own block. This can - // happen with loops. + // happen with cycles. if (MBB == SuccToSinkTo) return nullptr; @@ -1222,68 +1220,70 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } -/// Sink instructions into loops if profitable. This especially tries to prevent -/// register spills caused by register pressure if there is little to no -/// overhead moving instructions into loops. -bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { - LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); - MachineBasicBlock *Preheader = L->getLoopPreheader(); - assert(Preheader && "Loop sink needs a preheader block"); +/// Sink instructions into cycles if profitable. This especially tries to +/// prevent register spills caused by register pressure if there is little to no +/// overhead moving instructions into cycles. +bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { + LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + assert(Preheader && "Cycle sink needs a preheader block"); MachineBasicBlock *SinkBlock = nullptr; bool CanSink = true; const MachineOperand &MO = I.getOperand(0); for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); - if (!L->contains(&MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); + if (!Cycle->contains(MI.getParent())) { + LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); CanSink = false; break; } // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every loop + // the instruction (and thus possibly executing it on every cycle // iteration) is more expensive than a register. // For now assumes that copies are cheap and thus almost always worth it. if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n"); CanSink = false; break; } if (!SinkBlock) { SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " + LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " << printMBBReference(*SinkBlock) << "\n"); continue; } SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); CanSink = false; break; } - LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << + LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " << printMBBReference(*SinkBlock) << "\n"); } if (!CanSink) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n"); return false; } if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n"); return false; } if (SinkBlock == Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); + LLVM_DEBUG( + dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); return false; } if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) { - LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n"); + LLVM_DEBUG( + dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n"); return false; } - LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n"); SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, I); @@ -1407,9 +1407,11 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, TryBreak = true; } - // Don't sink instructions into a loop. - if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { - LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + // Don't sink instructions into a cycle. + if (!TryBreak && CI->getCycle(SuccToSinkTo) && + (!CI->getCycle(SuccToSinkTo)->isReducible() || + CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) { + LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n"); TryBreak = true; } diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 0f532f4fe3240d..2f205f9ab37356 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -132,6 +132,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir index bdb99f296fcabe..0a886925ffab49 100644 --- a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir +++ b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir @@ -1,10 +1,10 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \ -# RUN: -machine-sink-loop-limit=1 -verify-machineinstrs %s -o - 2>&1 | \ +# RUN: -machine-sink-cycle-limit=1 -verify-machineinstrs %s -o - 2>&1 | \ # RUN: FileCheck %s --check-prefix=SINK1 # # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \ -# RUN: -machine-sink-loop-limit=2 -verify-machineinstrs %s -o - 2>&1 | \ +# RUN: -machine-sink-cycle-limit=2 -verify-machineinstrs %s -o - 2>&1 | \ # RUN: FileCheck %s --check-prefix=SINK2 --- | diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index e23bea234be9fa..684774b18308e0 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -296,6 +296,7 @@ ; GCN-O1-NEXT: Machine Block Frequency Analysis ; GCN-O1-NEXT: Machine Common Subexpression Elimination ; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: Machine Cycle Info Analysis ; GCN-O1-NEXT: Machine code sinking ; GCN-O1-NEXT: Peephole Optimizations ; GCN-O1-NEXT: Remove dead machine instructions @@ -574,6 +575,7 @@ ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Machine Common Subexpression Elimination ; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction +; GCN-O1-OPTS-NEXT: Machine Cycle Info Analysis ; GCN-O1-OPTS-NEXT: Machine code sinking ; GCN-O1-OPTS-NEXT: Peephole Optimizations ; GCN-O1-OPTS-NEXT: Remove dead machine instructions @@ -861,6 +863,7 @@ ; GCN-O2-NEXT: Machine Block Frequency Analysis ; GCN-O2-NEXT: Machine Common Subexpression Elimination ; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: Machine Cycle Info Analysis ; GCN-O2-NEXT: Machine code sinking ; GCN-O2-NEXT: Peephole Optimizations ; GCN-O2-NEXT: Remove dead machine instructions @@ -1161,6 +1164,7 @@ ; GCN-O3-NEXT: Machine Block Frequency Analysis ; GCN-O3-NEXT: Machine Common Subexpression Elimination ; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: Machine Cycle Info Analysis ; GCN-O3-NEXT: Machine code sinking ; GCN-O3-NEXT: Peephole Optimizations ; GCN-O3-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index d2507bf6365b6d..098422a4a770d5 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -91,6 +91,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index d67fed77d35699..eff5184e9c84c8 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -111,6 +111,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index c1f69756b921d3..d0bbe886f67968 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -90,6 +90,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index e83266f72e488f..8552ebc348cbb9 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -107,6 +107,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index b526e4f471b1a2..d805dcad8b6e64 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -32,14 +32,13 @@ define dso_local void @fn() { ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_15: # %for.inc +; CHECK-NEXT: .LBB0_16: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_19 Depth 2 +; CHECK-NEXT: # Child Loop BB0_20 Depth 2 ; CHECK-NEXT: cmpb $8, %dl ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: ja .LBB0_3 @@ -56,7 +55,7 @@ define dso_local void @fn() { ; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h ; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: jg .LBB0_9 +; CHECK-NEXT: jg .LBB0_8 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi @@ -65,10 +64,12 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: jne .LBB0_16 ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %if.then @@ -77,82 +78,82 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; CHECK-NEXT: jmp .LBB0_6 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_9: # %if.end21 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_10 -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %dl, %dh ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: movl %edi, %esi -; CHECK-NEXT: movl $0, %edi -; CHECK-NEXT: movb %cl, %dl -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: # %bb.7: # %af +; CHECK-NEXT: je .LBB0_7 +; CHECK-NEXT: .LBB0_11: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_8 -; CHECK-NEXT: .LBB0_16: # %if.end39 +; CHECK-NEXT: jne .LBB0_12 +; CHECK-NEXT: .LBB0_17: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_18 -; CHECK-NEXT: # %bb.17: # %if.then41 +; CHECK-NEXT: je .LBB0_19 +; CHECK-NEXT: # %bb.18: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_18: # %for.end46 +; CHECK-NEXT: .LBB0_19: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_20 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_8: # %if.end21 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_19: # %for.cond47 +; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movb %dl, %dh +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_20: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_19 -; CHECK-NEXT: # %bb.20: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2 +; CHECK-NEXT: jne .LBB0_20 +; CHECK-NEXT: # %bb.21: # %for.cond47 +; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_19 -; CHECK-NEXT: .LBB0_10: # %ae +; CHECK-NEXT: jne .LBB0_20 +; CHECK-NEXT: .LBB0_9: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: # %bb.12: # %if.end26 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je .LBB0_15 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: # %bb.14: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_15 -; CHECK-NEXT: # %bb.14: # %if.then31 +; CHECK-NEXT: jne .LBB0_16 +; CHECK-NEXT: # %bb.15: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_15 +; CHECK-NEXT: jmp .LBB0_16 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_17 +; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl +; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: jmp .LBB0_7 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll index e28169217c01e7..981a60d09545f7 100644 --- a/llvm/test/CodeGen/X86/switch-phi-const.ll +++ b/llvm/test/CodeGen/X86/switch-phi-const.ll @@ -93,12 +93,12 @@ define void @switch_trunc_phi_const(i32 %x) { ; CHECK: # %bb.0: # %bb0 ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: movzbl %dil, %ecx -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: movl $3895, %edx # imm = 0xF37 ; CHECK-NEXT: decl %ecx ; CHECK-NEXT: cmpl $54, %ecx ; CHECK-NEXT: ja .LBB1_8 ; CHECK-NEXT: # %bb.1: # %bb0 +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: movl $3895, %edx # imm = 0xF37 ; CHECK-NEXT: jmpq *.LJTI1_0(,%rcx,8) ; CHECK-NEXT: .LBB1_8: # %default ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index b44895293b4117..0f8bb837f82a51 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1377,8 +1377,6 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: pushq %rbx ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_offset %rbx, -24 -; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1388,20 +1386,24 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_4 -; ENABLE-NEXT: ## %bb.3: ## %for.body4.i +; ENABLE-NEXT: je LBB16_3 +; ENABLE-NEXT: ## %bb.4: ## %for.body4.i +; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something +; ENABLE-NEXT: jmp LBB16_5 +; ENABLE-NEXT: LBB16_3: +; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: .p2align 4, 0x90 -; ENABLE-NEXT: LBB16_4: ## %for.inc +; ENABLE-NEXT: LBB16_5: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: incl %ebx ; ENABLE-NEXT: cmpl $7, %ebx -; ENABLE-NEXT: jl LBB16_4 -; ENABLE-NEXT: ## %bb.5: ## %fn1.exit +; ENABLE-NEXT: jl LBB16_5 +; ENABLE-NEXT: ## %bb.6: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: addq $8, %rsp ; ENABLE-NEXT: popq %rbx @@ -1418,8 +1420,6 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: pushq %rbx ; DISABLE-NEXT: pushq %rax ; DISABLE-NEXT: .cfi_offset %rbx, -24 -; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1429,20 +1429,24 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_4 -; DISABLE-NEXT: ## %bb.3: ## %for.body4.i +; DISABLE-NEXT: je LBB16_3 +; DISABLE-NEXT: ## %bb.4: ## %for.body4.i +; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something +; DISABLE-NEXT: jmp LBB16_5 +; DISABLE-NEXT: LBB16_3: +; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: .p2align 4, 0x90 -; DISABLE-NEXT: LBB16_4: ## %for.inc +; DISABLE-NEXT: LBB16_5: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: incl %ebx ; DISABLE-NEXT: cmpl $7, %ebx -; DISABLE-NEXT: jl LBB16_4 -; DISABLE-NEXT: ## %bb.5: ## %fn1.exit +; DISABLE-NEXT: jl LBB16_5 +; DISABLE-NEXT: ## %bb.6: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: addq $8, %rsp ; DISABLE-NEXT: popq %rbx From 8894c05b0d351f998b1836542ba791247394bd12 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Thu, 26 May 2022 12:20:15 +0100 Subject: [PATCH 569/908] [FileCheck] GetCheckTypeAbbreviation() to handle the misspelled case. Also fix directives not covered by D125604. --- llvm/utils/FileCheck/FileCheck.cpp | 2 ++ mlir/test/python/ir/integer_set.py | 2 +- polly/test/GPGPU/non-read-only-scalars.ll | 18 +++++++++--------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp index ad647a57564ee4..7ab84d88d6a3ca 100644 --- a/llvm/utils/FileCheck/FileCheck.cpp +++ b/llvm/utils/FileCheck/FileCheck.cpp @@ -369,6 +369,8 @@ static std::string GetCheckTypeAbbreviation(Check::FileCheckType Ty) { return "bad-not"; case Check::CheckBadCount: return "bad-count"; + case Check::CheckMisspelled: + return "misspelled"; case Check::CheckNone: llvm_unreachable("invalid FileCheckType"); } diff --git a/mlir/test/python/ir/integer_set.py b/mlir/test/python/ir/integer_set.py index b916d9ab386e9b..d9f158c0d29ce0 100644 --- a/mlir/test/python/ir/integer_set.py +++ b/mlir/test/python/ir/integer_set.py @@ -126,7 +126,7 @@ def testIntegerSetProperties(): print(" == 0" if cstr.is_eq else " >= 0") -# CHECK_LABEL: TEST: testHash +# TODO-LABEL: TEST: testHash @run def testHash(): with Context(): diff --git a/polly/test/GPGPU/non-read-only-scalars.ll b/polly/test/GPGPU/non-read-only-scalars.ll index 022fa920adb672..5036644ab3d9a4 100644 --- a/polly/test/GPGPU/non-read-only-scalars.ll +++ b/polly/test/GPGPU/non-read-only-scalars.ll @@ -68,15 +68,15 @@ ; CODE-NEXT: Stmt_bb17(); ; CODE: # kernel2 -; CODE_NEXT: { -; CODE_NEXT: read(); -; CODE_NEXT: for (int c0 = 0; c0 <= 32; c0 += 1) { -; CODE_NEXT: Stmt_bb18(c0); -; CODE_NEXT: if (c0 <= 31) -; CODE_NEXT: Stmt_bb20(c0); -; CODE_NEXT: } -; CODE_NEXT: write(); -; CODE_NEXT: } +; TODO-NEXT: { +; TODO-NEXT: read(); +; TODO-NEXT: for (int c0 = 0; c0 <= 32; c0 += 1) { +; TODO-NEXT: Stmt_bb18(c0); +; TODO-NEXT: if (c0 <= 31) +; TODO-NEXT: Stmt_bb20(c0); +; TODO-NEXT: } +; TODO-NEXT: write(); +; TODO-NEXT: } ; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_sum_0__phi) From 7b617eef8018d4cd001b36ad28f014e21270fde5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 26 May 2022 12:34:02 +0100 Subject: [PATCH 570/908] [DAG] Cleanup "and/or of cmp with single bit diff" fold to use ISD::matchBinaryPredicate Prep work as I'm investigating some cases where TLI::convertSetCCLogicToBitwiseLogic should accept vectors. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 61d23aeec06eaa..2c1f7d990aadd5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5418,29 +5418,27 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, } // Turn compare of constants whose difference is 1 bit into add+and+setcc. - // TODO - support non-uniform vector amounts. if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) { // Match a shared variable operand and 2 non-opaque constant operands. - ConstantSDNode *C0 = isConstOrConstSplat(LR); - ConstantSDNode *C1 = isConstOrConstSplat(RR); - if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) { + auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) { + // The difference of the constants must be a single bit. const APInt &CMax = APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue()); const APInt &CMin = APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue()); - // The difference of the constants must be a single bit. - if ((CMax - CMin).isPowerOf2()) { - // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) --> - // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq - SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR); - SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR); - SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min); - SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min); - SDValue Mask = DAG.getNOT(DL, Diff, OpVT); - SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask); - SDValue Zero = DAG.getConstant(0, DL, OpVT); - return DAG.getSetCC(DL, VT, And, Zero, CC0); - } + return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2(); + }; + if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) { + // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) --> + // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq + SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR); + SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR); + SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min); + SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min); + SDValue Mask = DAG.getNOT(DL, Diff, OpVT); + SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask); + SDValue Zero = DAG.getConstant(0, DL, OpVT); + return DAG.getSetCC(DL, VT, And, Zero, CC0); } } } From 85750de685f543443ec38c407392d9d7cdc19451 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 26 May 2022 07:40:10 -0400 Subject: [PATCH 571/908] Use the canonical type when matching a generic selection association This ensures that a deduced type like __auto_type matches the correct association instead of matching all associations. This addresses a regression from e4a42c5b64d044ae28d9483b0ebd12038d5b5917 Fixes #55702 --- clang/lib/Sema/SemaExpr.cpp | 6 +++++- clang/test/Sema/auto-type.c | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 9d9cb99085f331..3e8bd63e89ae90 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1753,10 +1753,14 @@ Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, SmallVector CompatIndices; unsigned DefaultIndex = -1U; + // Look at the canonical type of the controlling expression in case it was a + // deduced type like __auto_type. However, when issuing diagnostics, use the + // type the user wrote in source rather than the canonical one. for (unsigned i = 0; i < NumAssocs; ++i) { if (!Types[i]) DefaultIndex = i; - else if (Context.typesAreCompatible(ControllingExpr->getType(), + else if (Context.typesAreCompatible( + ControllingExpr->getType().getCanonicalType(), Types[i]->getType())) CompatIndices.push_back(i); } diff --git a/clang/test/Sema/auto-type.c b/clang/test/Sema/auto-type.c index 4e85eecc6b0575..1170c687c96aa7 100644 --- a/clang/test/Sema/auto-type.c +++ b/clang/test/Sema/auto-type.c @@ -78,3 +78,13 @@ void Issue53652(void) { __typeof__(i) : 0, // expected-note {{compatible type 'typeof (i)' (aka 'int') specified here}} __typeof__(a) : 1); // expected-error {{type 'typeof (a)' (aka 'int') in generic association compatible with previously specified type 'typeof (i)' (aka 'int')}} } + +void Issue55702(void) { + // A controlling expression which uses __auto_type should not be + // automatically compatible with every association; we should be using the + // canonical type for that comparison. + void *ptr = 0; + __auto_type v = ptr; + (void)_Generic(v, long double : 0, double : 0, default : 1); // OK + _Static_assert(_Generic(v, long double : 0, default : 1) == 1, "fail"); +} From b0ccf38b0187e91fa36e5b7c2a2eecd9b5c00f01 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Thu, 26 May 2022 12:41:55 +0100 Subject: [PATCH 572/908] [AMDGPU][GFX9] Support base+soffset+offset SMEM loads. Resolves part of https://github.com/llvm/llvm-project/issues/38652 Reviewed By: dp Differential Revision: https://reviews.llvm.org/D125700 --- llvm/lib/Target/AMDGPU/SMInstructions.td | 57 +++++++++++++++---- llvm/test/MC/AMDGPU/gfx9_asm_smem.s | 39 +++++++++++++ llvm/test/MC/AMDGPU/smem.s | 4 +- .../MC/Disassembler/AMDGPU/gfx8_dasm_all.txt | 10 ++++ .../MC/Disassembler/AMDGPU/gfx9_dasm_all.txt | 41 +++++++++++++ 5 files changed, 137 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 35a6726e15b2c2..aef940ad8e03bc 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -522,38 +522,71 @@ def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>; //===----------------------------------------------------------------------===// -// VI +// VI and GFX9. //===----------------------------------------------------------------------===// class SMEM_Real_vi op, SM_Pseudo ps> : SM_Real , SIMCInstr , Enc64 { - let AssemblerPredicate = isGFX8GFX9; + field bit IsGFX9Specific = false; + let AssemblerPredicate = !if(IsGFX9Specific, isGFX9Only, isGFX8GFX9); let DecoderNamespace = "GFX8"; let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); + // Note that for GFX9 instructions with immediate offsets, soffset_en + // must be defined, whereas in GFX8 it's undefined in all cases, + // meaning GFX9 is not perfectly backward-compatible with GFX8, despite + // documentation suggesting otherwise. + field bit SOffsetEn = !if(IsGFX9Specific, + !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)), + ?); + let Inst{14} = SOffsetEn; + let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); + + // imm + // TODO: Shall not be defined if the instruction has no offset nor + // soffset. let Inst{17} = ps.has_offset; + let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. // Offset value is corrected accordingly when offset is encoded/decoded. - let Inst{38-32} = !if(ps.has_offset, offset{6-0}, !if(ps.has_soffset, soffset{6-0}, ?)); - let Inst{52-39} = !if(ps.has_offset, offset{20-7}, ?); + // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics. + field bits<21> Offset; + let Offset{6-0} = !if(ps.has_offset, offset{6-0}, + !if(ps.has_soffset, soffset{6-0}, ?)); + let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?); + let Inst{52-32} = Offset; + + // soffset + let Inst{63-57} = !if(!and(IsGFX9Specific, ps.has_soffset), soffset{6-0}, ?); } -multiclass SM_Real_Loads_vi op, string ps, - SM_Load_Pseudo immPs = !cast(ps#_IMM), - SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { - def _IMM_vi : SMEM_Real_vi { - let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); - } - def _SGPR_vi : SMEM_Real_vi { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); +class SMEM_Real_Load_vi op, string ps, dag offsets> + : SMEM_Real_vi(ps)> { + RegisterClass BaseClass = !cast(ps).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol)); +} + +multiclass SM_Real_Loads_vi op, string ps> { + def _IMM_vi : SMEM_Real_Load_vi ; + def _SGPR_vi : SMEM_Real_Load_vi ; + let IsGFX9Specific = true in { + // The alternative GFX9 SGPR encoding using soffset to encode the + // offset register. Not available in assembler and goes to the GFX9 + // encoding family to avoid conflicts with the primary SGPR variant. + let SOffsetEn = 1, Offset = ?, Subtarget = SIEncodingFamily.GFX9, + AsmVariantName = "NonParsable" in + def _SGPR_alt_gfx9 : SMEM_Real_Load_vi ; + def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi < + op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>; } } diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s index d61dac88f457e3..64918dcceb3e5c 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s @@ -60,6 +60,9 @@ s_load_dword s5, s[2:3], m0 s_load_dword s5, s[2:3], 0x0 // CHECK: [0x41,0x01,0x02,0xc0,0x00,0x00,0x00,0x00] +s_load_dword s5, s[2:3], s7 offset:0x12345 +// CHECK: [0x41,0x41,0x02,0xc0,0x45,0x23,0x01,0x0e] + s_load_dword s5, s[2:3], s0 glc // CHECK: [0x41,0x01,0x01,0xc0,0x00,0x00,0x00,0x00] @@ -120,6 +123,9 @@ s_load_dwordx2 s[10:11], s[2:3], m0 s_load_dwordx2 s[10:11], s[2:3], 0x0 // CHECK: [0x81,0x02,0x06,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 +// CHECK: [0x81,0x42,0x06,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx2 s[10:11], s[2:3], s0 glc // CHECK: [0x81,0x02,0x05,0xc0,0x00,0x00,0x00,0x00] @@ -174,6 +180,9 @@ s_load_dwordx4 s[20:23], s[2:3], m0 s_load_dwordx4 s[20:23], s[2:3], 0x0 // CHECK: [0x01,0x05,0x0a,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x0a,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx4 s[20:23], s[2:3], s0 glc // CHECK: [0x01,0x05,0x09,0xc0,0x00,0x00,0x00,0x00] @@ -228,6 +237,9 @@ s_load_dwordx8 s[20:27], s[2:3], m0 s_load_dwordx8 s[20:27], s[2:3], 0x0 // CHECK: [0x01,0x05,0x0e,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx8 s[20:27], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x0e,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx8 s[20:27], s[2:3], s0 glc // CHECK: [0x01,0x05,0x0d,0xc0,0x00,0x00,0x00,0x00] @@ -282,6 +294,9 @@ s_load_dwordx16 s[20:35], s[2:3], m0 s_load_dwordx16 s[20:35], s[2:3], 0x0 // CHECK: [0x01,0x05,0x12,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x12,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx16 s[20:35], s[2:3], s0 glc // CHECK: [0x01,0x05,0x11,0xc0,0x00,0x00,0x00,0x00] @@ -345,6 +360,9 @@ s_scratch_load_dword s5, s[2:3], m0 s_scratch_load_dword s5, s[2:3], 0x0 // CHECK: [0x41,0x01,0x16,0xc0,0x00,0x00,0x00,0x00] +s_scratch_load_dword s5, s[2:3], s0 offset:0x12345 +// CHECK: [0x41,0x41,0x16,0xc0,0x45,0x23,0x01,0x00] + s_scratch_load_dword s5, s[2:3], s0 glc // CHECK: [0x41,0x01,0x15,0xc0,0x00,0x00,0x00,0x00] @@ -405,6 +423,9 @@ s_scratch_load_dwordx2 s[10:11], s[2:3], m0 s_scratch_load_dwordx2 s[10:11], s[2:3], 0x0 // CHECK: [0x81,0x02,0x1a,0xc0,0x00,0x00,0x00,0x00] +s_scratch_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 +// CHECK: [0x81,0x42,0x1a,0xc0,0x45,0x23,0x01,0x00] + s_scratch_load_dwordx2 s[10:11], s[2:3], s0 glc // CHECK: [0x81,0x02,0x19,0xc0,0x00,0x00,0x00,0x00] @@ -459,6 +480,9 @@ s_scratch_load_dwordx4 s[20:23], s[2:3], m0 s_scratch_load_dwordx4 s[20:23], s[2:3], 0x0 // CHECK: [0x01,0x05,0x1e,0xc0,0x00,0x00,0x00,0x00] +s_scratch_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x1e,0xc0,0x45,0x23,0x01,0x00] + s_scratch_load_dwordx4 s[20:23], s[2:3], s0 glc // CHECK: [0x01,0x05,0x1d,0xc0,0x00,0x00,0x00,0x00] @@ -516,6 +540,9 @@ s_buffer_load_dword s5, s[4:7], m0 s_buffer_load_dword s5, s[4:7], 0x0 // CHECK: [0x42,0x01,0x22,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dword s5, s[4:7], s0 offset:0x12345 +// CHECK: [0x42,0x41,0x22,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dword s5, s[4:7], s0 glc // CHECK: [0x42,0x01,0x21,0xc0,0x00,0x00,0x00,0x00] @@ -570,6 +597,9 @@ s_buffer_load_dwordx2 s[10:11], s[4:7], m0 s_buffer_load_dwordx2 s[10:11], s[4:7], 0x0 // CHECK: [0x82,0x02,0x26,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx2 s[10:11], s[4:7], s0 offset:0x12345 +// CHECK: [0x82,0x42,0x26,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx2 s[10:11], s[4:7], s0 glc // CHECK: [0x82,0x02,0x25,0xc0,0x00,0x00,0x00,0x00] @@ -618,6 +648,9 @@ s_buffer_load_dwordx4 s[20:23], s[4:7], m0 s_buffer_load_dwordx4 s[20:23], s[4:7], 0x0 // CHECK: [0x02,0x05,0x2a,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx4 s[20:23], s[4:7], s0 offset:0x12345 +// CHECK: [0x02,0x45,0x2a,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx4 s[20:23], s[4:7], s0 glc // CHECK: [0x02,0x05,0x29,0xc0,0x00,0x00,0x00,0x00] @@ -666,6 +699,9 @@ s_buffer_load_dwordx8 s[20:27], s[4:7], m0 s_buffer_load_dwordx8 s[20:27], s[4:7], 0x0 // CHECK: [0x02,0x05,0x2e,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx8 s[20:27], s[4:7], s0 offset:0x12345 +// CHECK: [0x02,0x45,0x2e,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx8 s[20:27], s[4:7], s0 glc // CHECK: [0x02,0x05,0x2d,0xc0,0x00,0x00,0x00,0x00] @@ -714,6 +750,9 @@ s_buffer_load_dwordx16 s[20:35], s[4:7], m0 s_buffer_load_dwordx16 s[20:35], s[4:7], 0x0 // CHECK: [0x02,0x05,0x32,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx16 s[20:35], s[4:7], s0 offset:0x12345 +// CHECK: [0x02,0x45,0x32,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx16 s[20:35], s[4:7], s0 glc // CHECK: [0x02,0x05,0x31,0xc0,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s index 4ef8e91b01f2d4..2c6ae01f5112ab 100644 --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -673,7 +673,7 @@ s_atc_probe_buffer 0x1, s[8:11], 0x1FFFFF s_load_dword s1, s[2:3], s0 offset:0x1FFFFF // NOSICI: error: operands are not valid for this GPU or mode // NOVI: error: operands are not valid for this GPU or mode -// NOGFX9: error: operands are not valid for this GPU or mode +// NOGFX9: error: expected a 21-bit signed offset // NOGFX10: error: expected a 21-bit signed offset s_store_dword s1, s[2:3], 0x1FFFFF @@ -684,7 +684,7 @@ s_store_dword s1, s[2:3], 0x1FFFFF s_buffer_load_dword s10, s[92:95], s0 offset:-1 // NOSICI: error: operands are not valid for this GPU or mode // NOVI: error: operands are not valid for this GPU or mode -// NOGFX9: error: operands are not valid for this GPU or mode +// NOGFX9: error: expected a 20-bit unsigned offset // NOGFX10: error: expected a 20-bit unsigned offset s_buffer_store_dword s10, s[92:95], 0x1FFFFF diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt index 764b8053a818c6..5a7fa762763308 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt @@ -9588,6 +9588,11 @@ # CHECK: s_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00] 0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00 +# Make sure that raising the GFX9 soffset_en bit doesn't affect GFX8 +# decoding. +# CHECK: s_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00] +0x41,0x41,0x00,0xc0,0x65,0x00,0x00,0x00 + # CHECK: s_load_dword s5, s[2:3], flat_scratch_lo ; encoding: [0x41,0x01,0x00,0xc0,0x66,0x00,0x00,0x00] 0x41,0x01,0x00,0xc0,0x66,0x00,0x00,0x00 @@ -9621,6 +9626,11 @@ # CHECK: s_load_dword s5, s[2:3], 0x7ffff ; encoding: [0x41,0x01,0x02,0xc0,0xff,0xff,0x07,0x00] 0x41,0x01,0x02,0xc0,0xff,0xff,0x07,0x00 +# Make sure that raising the GFX9 soffset_en bit doesn't affect GFX8 +# decoding. +# CHECK: s_load_dword s5, s[2:3], 0x7ffff ; encoding: [0x41,0x01,0x02,0xc0,0xff,0xff,0x07,0x00] +0x41,0x41,0x02,0xc0,0xff,0xff,0x07,0x00 + # CHECK: s_load_dword s5, s[2:3], s2 glc ; encoding: [0x41,0x01,0x01,0xc0,0x02,0x00,0x00,0x00] 0x41,0x01,0x01,0xc0,0x02,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt index ae2789703b37aa..f5346734706ded 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -10056,6 +10056,12 @@ # CHECK: s_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00] 0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00 +# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1 +# and the offset register encoded in the soffset field with the offset +# field being disregarded. +# CHECK: s_load_dword s5, s[2:3], s64 ; encoding: [0x41,0x41,0x00,0xc0,0x00,0x00,0x00,0x80] +0x41,0x41,0x00,0xc0,0x65,0x00,0x00,0x80 + # CHECK: s_load_dword s5, s[2:3], flat_scratch_lo ; encoding: [0x41,0x01,0x00,0xc0,0x66,0x00,0x00,0x00] 0x41,0x01,0x00,0xc0,0x66,0x00,0x00,0x00 @@ -10074,6 +10080,14 @@ # CHECK: s_load_dword s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x02,0xc0,0x00,0x00,0x00,0x00] 0x41,0x01,0x02,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dword s5, s[2:3], s0 offset:0x12345 ; encoding: [0x41,0x41,0x02,0xc0,0x45,0x23,0x01,0x00] +0x41,0x41,0x02,0xc0,0x45,0x23,0x01,0x00 + +# SP3 prefers to decode instructions with imm=1 and soffset_en=1 to the +# form with the 'offset:' modifier, even if the offset is 0. +# CHECK: s_load_dword s5, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x41,0x02,0xc0,0x00,0x00,0x00,0x00] +0x41,0x41,0x02,0xc0,0x00,0x00,0x00,0x00 + # CHECK: s_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x01,0xc0,0x00,0x00,0x00,0x00] 0x41,0x01,0x01,0xc0,0x00,0x00,0x00,0x00 @@ -10125,6 +10139,9 @@ # CHECK: s_load_dwordx2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x06,0xc0,0x00,0x00,0x00,0x00] 0x81,0x02,0x06,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 ; encoding: [0x81,0x42,0x06,0xc0,0x45,0x23,0x01,0x00] +0x81,0x42,0x06,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xc0,0x00,0x00,0x00,0x00] 0x81,0x02,0x05,0xc0,0x00,0x00,0x00,0x00 @@ -10170,6 +10187,9 @@ # CHECK: s_load_dwordx4 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x0a,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x0a,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 ; encoding: [0x01,0x45,0x0a,0xc0,0x45,0x23,0x01,0x00] +0x01,0x45,0x0a,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx4 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x09,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x09,0xc0,0x00,0x00,0x00,0x00 @@ -10215,6 +10235,9 @@ # CHECK: s_load_dwordx8 s[20:27], s[2:3], 0x0 ; encoding: [0x01,0x05,0x0e,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x0e,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx8 s[20:27], s[2:3], s0 offset:0x12345 ; encoding: [0x01,0x45,0x0e,0xc0,0x45,0x23,0x01,0x00] +0x01,0x45,0x0e,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx8 s[20:27], s[2:3], s0 glc ; encoding: [0x01,0x05,0x0d,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x0d,0xc0,0x00,0x00,0x00,0x00 @@ -10260,6 +10283,9 @@ # CHECK: s_load_dwordx16 s[20:35], s[2:3], 0x0 ; encoding: [0x01,0x05,0x12,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x12,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 ; encoding: [0x01,0x45,0x12,0xc0,0x45,0x23,0x01,0x00] +0x01,0x45,0x12,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx16 s[20:35], s[2:3], s0 glc ; encoding: [0x01,0x05,0x11,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x11,0xc0,0x00,0x00,0x00,0x00 @@ -10308,6 +10334,9 @@ # CHECK: s_buffer_load_dword s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x22,0xc0,0x00,0x00,0x00,0x00] 0x42,0x01,0x22,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dword s5, s[4:7], s0 offset:0x12345 ; encoding: [0x42,0x41,0x22,0xc0,0x45,0x23,0x01,0x00] +0x42,0x41,0x22,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dword s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x21,0xc0,0x00,0x00,0x00,0x00] 0x42,0x01,0x21,0xc0,0x00,0x00,0x00,0x00 @@ -10353,6 +10382,9 @@ # CHECK: s_buffer_load_dwordx2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x26,0xc0,0x00,0x00,0x00,0x00] 0x82,0x02,0x26,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx2 s[10:11], s[4:7], s0 offset:0x12345 ; encoding: [0x82,0x42,0x26,0xc0,0x45,0x23,0x01,0x00] +0x82,0x42,0x26,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x25,0xc0,0x00,0x00,0x00,0x00] 0x82,0x02,0x25,0xc0,0x00,0x00,0x00,0x00 @@ -10392,6 +10424,9 @@ # CHECK: s_buffer_load_dwordx4 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x2a,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x2a,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx4 s[20:23], s[4:7], s0 offset:0x12345 ; encoding: [0x02,0x45,0x2a,0xc0,0x45,0x23,0x01,0x00] +0x02,0x45,0x2a,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx4 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x29,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x29,0xc0,0x00,0x00,0x00,0x00 @@ -10431,6 +10466,9 @@ # CHECK: s_buffer_load_dwordx8 s[20:27], s[4:7], 0x0 ; encoding: [0x02,0x05,0x2e,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x2e,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx8 s[20:27], s[4:7], s0 offset:0x12345 ; encoding: [0x02,0x45,0x2e,0xc0,0x45,0x23,0x01,0x00] +0x02,0x45,0x2e,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx8 s[20:27], s[4:7], s0 glc ; encoding: [0x02,0x05,0x2d,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x2d,0xc0,0x00,0x00,0x00,0x00 @@ -10470,6 +10508,9 @@ # CHECK: s_buffer_load_dwordx16 s[20:35], s[4:7], 0x0 ; encoding: [0x02,0x05,0x32,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x32,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx16 s[20:35], s[4:7], s0 offset:0x12345 ; encoding: [0x02,0x45,0x32,0xc0,0x45,0x23,0x01,0x00] +0x02,0x45,0x32,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx16 s[20:35], s[4:7], s0 glc ; encoding: [0x02,0x05,0x31,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x31,0xc0,0x00,0x00,0x00,0x00 From ca3d962548b9699d69157d102e071607d27ba126 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Wed, 25 May 2022 21:27:29 +0200 Subject: [PATCH 573/908] [analyzer] Return from reAssume if State is posteriorly overconstrained Depends on D124758. That patch introduced serious regression in the run-time in some special cases. This fixes that. Differential Revision: https://reviews.llvm.org/D126406 --- .../Core/PathSensitive/ProgramState.h | 18 ++++-- .../Core/RangeConstraintManager.cpp | 11 ++++ clang/test/Analysis/runtime-regression.c | 55 +++++++++++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 clang/test/Analysis/runtime-regression.c diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h index cb6540ff7e7c43..f36ba70babc71f 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h @@ -55,6 +55,8 @@ template struct ProgramStateTrait { } }; +class RangeSet; + /// \class ProgramState /// ProgramState - This class encapsulates: /// @@ -79,7 +81,6 @@ class ProgramState : public llvm::FoldingSetNode { friend class ExplodedGraph; friend class ExplodedNode; friend class NodeBuilder; - friend class ConstraintManager; ProgramStateManager *stateMgr; Environment Env; // Maps a Stmt to its current SVal. @@ -113,6 +114,16 @@ class ProgramState : public llvm::FoldingSetNode { // posteriorly over-constrained. These parents are handled with special care: // we do not allow transitions to exploded nodes with such states. bool PosteriorlyOverconstrained = false; + // Make internal constraint solver entities friends so they can access the + // overconstrained-related functions. We want to keep this API inaccessible + // for Checkers. + friend class ConstraintManager; + friend ProgramStateRef reAssume(ProgramStateRef State, + const RangeSet *Constraint, SVal TheValue); + bool isPosteriorlyOverconstrained() const { + return PosteriorlyOverconstrained; + } + ProgramStateRef cloneAsPosteriorlyOverconstrained() const; unsigned refCount; @@ -122,11 +133,6 @@ class ProgramState : public llvm::FoldingSetNode { void setStore(const StoreRef &storeRef); - ProgramStateRef cloneAsPosteriorlyOverconstrained() const; - bool isPosteriorlyOverconstrained() const { - return PosteriorlyOverconstrained; - } - public: /// This ctor is used when creating the first ProgramState object. ProgramState(ProgramStateManager *mgr, const Environment& env, diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index e0ceac51d14f34..3d8d8aeafaec68 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -2530,10 +2530,19 @@ EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) { return State; } +// We must declare reAssume in clang::ento, otherwise we could not declare that +// as a friend in ProgramState. More precisely, the call of reAssume would be +// ambiguous (one in the global namespace and an other which is declared in +// ProgramState is in clang::ento). +namespace clang { +namespace ento { // Re-evaluate an SVal with top-level `State->assume` logic. LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue) { + assert(State); + if (State->isPosteriorlyOverconstrained()) + return nullptr; if (!Constraint) return State; @@ -2556,6 +2565,8 @@ LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State, return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(), Constraint->getMaxValue(), true); } +} // namespace ento +} // namespace clang // Iterate over all symbols and try to simplify them. Once a symbol is // simplified then we check if we can merge the simplified symbol's equivalence diff --git a/clang/test/Analysis/runtime-regression.c b/clang/test/Analysis/runtime-regression.c new file mode 100644 index 00000000000000..55988e9df5ee0f --- /dev/null +++ b/clang/test/Analysis/runtime-regression.c @@ -0,0 +1,55 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2 \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -triple x86_64-unknown-linux-gnu \ +// RUN: -verify + +// This test is here to check if there is no significant run-time regression +// related to the assume machinery. The analysis should finish in less than 10 +// seconds. + +// expected-no-diagnostics + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long uint64_t; + +int filter_slice_word(int sat_linesize, int sigma, int radius, uint64_t *sat, + uint64_t *square_sat, int width, int height, + int src_linesize, int dst_linesize, const uint16_t *src, + uint16_t *dst, int jobnr, int nb_jobs) { + const int starty = height * jobnr / nb_jobs; + const int endy = height * (jobnr + 1) / nb_jobs; + + for (int y = starty; y < endy; y++) { + + int lower_y = y - radius < 0 ? 0 : y - radius; + int higher_y = y + radius + 1 > height ? height : y + radius + 1; + int dist_y = higher_y - lower_y; + + for (int x = 0; x < width; x++) { + + int lower_x = x - radius < 0 ? 0 : x - radius; + int higher_x = x + radius + 1 > width ? width : x + radius + 1; + int count = dist_y * (higher_x - lower_x); + + // The below hunk caused significant regression in run-time. +#if 1 + uint64_t sum = sat[higher_y * sat_linesize + higher_x] - + sat[higher_y * sat_linesize + lower_x] - + sat[lower_y * sat_linesize + higher_x] + + sat[lower_y * sat_linesize + lower_x]; + uint64_t square_sum = square_sat[higher_y * sat_linesize + higher_x] - + square_sat[higher_y * sat_linesize + lower_x] - + square_sat[lower_y * sat_linesize + higher_x] + + square_sat[lower_y * sat_linesize + lower_x]; + uint64_t mean = sum / count; + uint64_t var = (square_sum - sum * sum / count) / count; + dst[y * dst_linesize + x] = + (sigma * mean + var * src[y * src_linesize + x]) / (sigma + var); +#endif + + } + } + return 0; +} From b5b2aec1ff51197343dede9741ea377f8314d41b Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Tue, 10 May 2022 17:13:07 +0200 Subject: [PATCH 574/908] [analyzer] Add UnarySymExpr This patch adds a new descendant to the SymExpr hierarchy. This way, now we can assign constraints to symbolic unary expressions. Only the unary minus and bitwise negation are handled. Differential Revision: https://reviews.llvm.org/D125318 --- .../StaticAnalyzer/Checkers/SValExplainer.h | 5 ++ .../Core/PathSensitive/SValBuilder.h | 6 +++ .../Core/PathSensitive/SymbolManager.h | 52 +++++++++++++++++++ .../Core/PathSensitive/Symbols.def | 2 + .../Checkers/ExprInspectionChecker.cpp | 8 +++ clang/lib/StaticAnalyzer/Core/SValBuilder.cpp | 20 +++++++ .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 20 +++++++ .../lib/StaticAnalyzer/Core/SymbolManager.cpp | 32 ++++++++++++ clang/test/Analysis/explain-svals.cpp | 1 + clang/test/Analysis/expr-inspection.cpp | 2 + clang/test/Analysis/unary-sym-expr.c | 38 ++++++++++++++ 11 files changed, 186 insertions(+) create mode 100644 clang/test/Analysis/unary-sym-expr.c diff --git a/clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h b/clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h index ad64cfc742dcaa..4c9c8239113e3d 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h +++ b/clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h @@ -135,6 +135,11 @@ class SValExplainer : public FullSValVisitor { " (" + Visit(S->getRHS()) + ")"; } + std::string VisitUnarySymExpr(const UnarySymExpr *S) { + return std::string(UnaryOperator::getOpcodeStr(S->getOpcode())) + " (" + + Visit(S->getOperand()) + ")"; + } + // TODO: SymbolCast doesn't appear in practice. // Add the relevant code once it does. diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h index d0bbf1750d7e5f..2154a1dedeeebb 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h @@ -153,6 +153,9 @@ class SValBuilder { SVal makeSymExprValNN(BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy); + SVal evalUnaryOp(ProgramStateRef state, UnaryOperator::Opcode opc, + SVal operand, QualType type); + SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type); @@ -350,6 +353,9 @@ class SValBuilder { nonloc::SymbolVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType type); + NonLoc makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op, + QualType type); + /// Create a NonLoc value for cast. nonloc::SymbolVal makeNonLoc(const SymExpr *operand, QualType fromTy, QualType toTy); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index c71cb88f5574ca..3ba3aa5c01566e 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -309,6 +309,55 @@ class SymbolCast : public SymExpr { } }; +/// Represents a symbolic expression involving a unary operator. +class UnarySymExpr : public SymExpr { + const SymExpr *Operand; + UnaryOperator::Opcode Op; + QualType T; + +public: + UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T) + : SymExpr(UnarySymExprKind), Operand(In), Op(Op), T(T) { + // Note, some unary operators are modeled as a binary operator. E.g. ++x is + // modeled as x + 1. + assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression"); + // Unary expressions are results of arithmetic. Pointer arithmetic is not + // handled by unary expressions, but it is instead handled by applying + // sub-regions to regions. + assert(isValidTypeForSymbol(T) && "non-valid type for unary symbol"); + assert(!Loc::isLocType(T) && "unary symbol should be nonloc"); + } + + unsigned computeComplexity() const override { + if (Complexity == 0) + Complexity = 1 + Operand->computeComplexity(); + return Complexity; + } + + const SymExpr *getOperand() const { return Operand; } + UnaryOperator::Opcode getOpcode() const { return Op; } + QualType getType() const override { return T; } + + void dumpToStream(raw_ostream &os) const override; + + static void Profile(llvm::FoldingSetNodeID &ID, const SymExpr *In, + UnaryOperator::Opcode Op, QualType T) { + ID.AddInteger((unsigned)UnarySymExprKind); + ID.AddPointer(In); + ID.AddInteger(Op); + ID.Add(T); + } + + void Profile(llvm::FoldingSetNodeID &ID) override { + Profile(ID, Operand, Op, T); + } + + // Implement isa support. + static bool classof(const SymExpr *SE) { + return SE->getKind() == UnarySymExprKind; + } +}; + /// Represents a symbolic expression involving a binary operator class BinarySymExpr : public SymExpr { BinaryOperator::Opcode Op; @@ -486,6 +535,9 @@ class SymbolManager { const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t); + const UnarySymExpr *getUnarySymExpr(const SymExpr *operand, + UnaryOperator::Opcode op, QualType t); + QualType getType(const SymExpr *SE) const { return SE->getType(); } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def index 7163a16263ab8d..b93f8e2501559d 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def @@ -33,6 +33,8 @@ #define SYMBOL_RANGE(Id, First, Last) #endif +SYMBOL(UnarySymExpr, SymExpr) + ABSTRACT_SYMBOL(BinarySymExpr, SymExpr) SYMBOL(IntSymExpr, BinarySymExpr) SYMBOL(SymIntExpr, BinarySymExpr) diff --git a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp index ea72d1dbb317c6..1c33648b2b3212 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -474,6 +474,14 @@ class SymbolExpressor return None; } + Optional VisitUnarySymExpr(const UnarySymExpr *S) { + if (Optional Str = lookup(S)) + return Str; + if (Optional Str = Visit(S->getOperand())) + return (UnaryOperator::getOpcodeStr(S->getOpcode()) + *Str).str(); + return None; + } + Optional VisitSymbolCast(const SymbolCast *S) { if (Optional Str = lookup(S)) return Str; diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index ea66bb3780f616..ae590a3b733876 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -102,6 +102,13 @@ nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *lhs, return nonloc::SymbolVal(SymMgr.getSymSymExpr(lhs, op, rhs, type)); } +NonLoc SValBuilder::makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op, + QualType type) { + assert(operand); + assert(!Loc::isLocType(type)); + return nonloc::SymbolVal(SymMgr.getUnarySymExpr(operand, op, type)); +} + nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *operand, QualType fromTy, QualType toTy) { assert(operand); @@ -434,6 +441,19 @@ SVal SValBuilder::makeSymExprValNN(BinaryOperator::Opcode Op, return UnknownVal(); } +SVal SValBuilder::evalUnaryOp(ProgramStateRef state, UnaryOperator::Opcode opc, + SVal operand, QualType type) { + auto OpN = operand.getAs(); + if (!OpN) + return UnknownVal(); + + if (opc == UO_Minus) + return evalMinus(*OpN); + if (opc == UO_Not) + return evalComplement(*OpN); + llvm_unreachable("Unexpected unary operator"); +} + SVal SValBuilder::evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type) { if (lhs.isUndef() || rhs.isUndef()) diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 03a751845fe1e8..cfc8816a0ecbaf 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -90,6 +90,9 @@ SVal SimpleSValBuilder::evalMinus(NonLoc val) { switch (val.getSubKind()) { case nonloc::ConcreteIntKind: return val.castAs().evalMinus(*this); + case nonloc::SymbolValKind: + return makeNonLoc(val.castAs().getSymbol(), UO_Minus, + val.getType(Context)); default: return UnknownVal(); } @@ -99,6 +102,9 @@ SVal SimpleSValBuilder::evalComplement(NonLoc X) { switch (X.getSubKind()) { case nonloc::ConcreteIntKind: return X.castAs().evalComplement(*this); + case nonloc::SymbolValKind: + return makeNonLoc(X.castAs().getSymbol(), UO_Not, + X.getType(Context)); default: return UnknownVal(); } @@ -1343,6 +1349,20 @@ SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) { S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); } + // FIXME add VisitSymbolCast + + SVal VisitUnarySymExpr(const UnarySymExpr *S) { + auto I = Cached.find(S); + if (I != Cached.end()) + return I->second; + SVal Op = getConstOrVisit(S->getOperand()); + if (isUnchanged(S->getOperand(), Op)) + return skip(S); + + return cache( + S, SVB.evalUnaryOp(State, S->getOpcode(), Op, S->getType())); + } + SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); } SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); } diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index 59964ad5ab3b7f..2227bd324adc4c 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -70,6 +70,16 @@ void SymbolCast::dumpToStream(raw_ostream &os) const { os << ')'; } +void UnarySymExpr::dumpToStream(raw_ostream &os) const { + os << UnaryOperator::getOpcodeStr(Op); + bool Binary = isa(Operand); + if (Binary) + os << '('; + Operand->dumpToStream(os); + if (Binary) + os << ')'; +} + void SymbolConjured::dumpToStream(raw_ostream &os) const { os << getKindStr() << getSymbolID() << '{' << T << ", LC" << LCtx->getID(); if (S) @@ -134,6 +144,9 @@ void SymExpr::symbol_iterator::expand() { case SymExpr::SymbolCastKind: itr.push_back(cast(SE)->getOperand()); return; + case SymExpr::UnarySymExprKind: + itr.push_back(cast(SE)->getOperand()); + return; case SymExpr::SymIntExprKind: itr.push_back(cast(SE)->getLHS()); return; @@ -306,6 +319,22 @@ const SymSymExpr *SymbolManager::getSymSymExpr(const SymExpr *lhs, return cast(data); } +const UnarySymExpr *SymbolManager::getUnarySymExpr(const SymExpr *Operand, + UnaryOperator::Opcode Opc, + QualType T) { + llvm::FoldingSetNodeID ID; + UnarySymExpr::Profile(ID, Operand, Opc, T); + void *InsertPos; + SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); + if (!data) { + data = (UnarySymExpr *)BPAlloc.Allocate(); + new (data) UnarySymExpr(Operand, Opc, T); + DataSet.InsertNode(data, InsertPos); + } + + return cast(data); +} + QualType SymbolConjured::getType() const { return T; } @@ -465,6 +494,9 @@ bool SymbolReaper::isLive(SymbolRef sym) { case SymExpr::SymbolCastKind: KnownLive = isLive(cast(sym)->getOperand()); break; + case SymExpr::UnarySymExprKind: + KnownLive = isLive(cast(sym)->getOperand()); + break; } if (KnownLive) diff --git a/clang/test/Analysis/explain-svals.cpp b/clang/test/Analysis/explain-svals.cpp index 44cd66b491ea7c..30368b6976cc23 100644 --- a/clang/test/Analysis/explain-svals.cpp +++ b/clang/test/Analysis/explain-svals.cpp @@ -72,6 +72,7 @@ void test_3(S s) { void test_4(int x, int y) { int z; static int stat; + clang_analyzer_explain(-x); // expected-warning-re{{{{^\- \(argument 'x'\)$}}}} clang_analyzer_explain(x + 1); // expected-warning-re{{{{^\(argument 'x'\) \+ 1$}}}} clang_analyzer_explain(1 + y); // expected-warning-re{{{{^\(argument 'y'\) \+ 1$}}}} clang_analyzer_explain(x + y); // expected-warning-re{{{{^\(argument 'x'\) \+ \(argument 'y'\)$}}}} diff --git a/clang/test/Analysis/expr-inspection.cpp b/clang/test/Analysis/expr-inspection.cpp index bf8ce5f7681ed0..3e65305032927a 100644 --- a/clang/test/Analysis/expr-inspection.cpp +++ b/clang/test/Analysis/expr-inspection.cpp @@ -18,6 +18,8 @@ void foo(int x, unsigned y) { clang_analyzer_express(x); // expected-warning{{Unable to express}} clang_analyzer_denote(x, "$x"); + clang_analyzer_express(-x); // expected-warning{{-$x}} + clang_analyzer_denote(y, "$y"); clang_analyzer_express(x + y); // expected-warning{{$x + $y}} diff --git a/clang/test/Analysis/unary-sym-expr.c b/clang/test/Analysis/unary-sym-expr.c new file mode 100644 index 00000000000000..14b3219de3723f --- /dev/null +++ b/clang/test/Analysis/unary-sym-expr.c @@ -0,0 +1,38 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false \ +// RUN: -verify + +void clang_analyzer_eval(int); +void clang_analyzer_dump(int); + +int test(int x, int y) { + + clang_analyzer_dump(-x); // expected-warning{{-reg_$0}} + clang_analyzer_dump(~x); // expected-warning{{~reg_$0}} + int z = x + y; + clang_analyzer_dump(-z); // expected-warning{{-((reg_$0) + (reg_$1))}} + clang_analyzer_dump(-(x + y)); // expected-warning{{-((reg_$0) + (reg_$1))}} + clang_analyzer_dump(-x + y); // expected-warning{{(-reg_$0) + (reg_$1)}} + + if (-x == 0) { + clang_analyzer_eval(-x == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(-x > 0); // expected-warning{{FALSE}} + clang_analyzer_eval(-x < 0); // expected-warning{{FALSE}} + } + if (~y == 0) { + clang_analyzer_eval(~y == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(~y > 0); // expected-warning{{FALSE}} + clang_analyzer_eval(~y < 0); // expected-warning{{FALSE}} + } + (void)(x); + return 42; +} + +void test_svalbuilder_simplification(int x, int y) { + if (x + y != 3) + return; + clang_analyzer_eval(-(x + y) == -3); // expected-warning{{TRUE}} + // FIXME Commutativity is not supported yet. + clang_analyzer_eval(-(y + x) == -3); // expected-warning{{UNKNOWN}} +} From 88abc50398eb26d536518270b91939ce18687305 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Wed, 11 May 2022 17:40:25 +0200 Subject: [PATCH 575/908] [analyzer][solver] Handle UnarySymExpr in RangeConstraintSolver Fixes https://github.com/llvm/llvm-project/issues/55241 Differential Revision: https://reviews.llvm.org/D125395 --- .../Core/RangeConstraintManager.cpp | 39 +++--- .../test/Analysis/constraint_manager_negate.c | 113 ++++++++++++++++++ .../constraint_manager_negate_difference.c | 99 ++++++++------- clang/test/Analysis/unary-sym-expr-no-crash.c | 23 ++++ clang/test/Analysis/unary-sym-expr.c | 9 ++ 5 files changed, 214 insertions(+), 69 deletions(-) create mode 100644 clang/test/Analysis/constraint_manager_negate.c create mode 100644 clang/test/Analysis/unary-sym-expr-no-crash.c diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 3d8d8aeafaec68..bf535a8d02057c 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1443,30 +1443,35 @@ class SymbolicRangeInferrer return RangeFactory.deletePoint(Domain, IntType.getZeroValue()); } - // FIXME: Once SValBuilder supports unary minus, we should use SValBuilder to - // obtain the negated symbolic expression instead of constructing the - // symbol manually. This will allow us to support finding ranges of not - // only negated SymSymExpr-type expressions, but also of other, simpler - // expressions which we currently do not know how to negate. Optional getRangeForNegatedSub(SymbolRef Sym) { - if (const SymSymExpr *SSE = dyn_cast(Sym)) { + // Do not negate if the type cannot be meaningfully negated. + if (!Sym->getType()->isUnsignedIntegerOrEnumerationType() && + !Sym->getType()->isSignedIntegerOrEnumerationType()) + return llvm::None; + + const RangeSet *NegatedRange = nullptr; + SymbolManager &SymMgr = State->getSymbolManager(); + if (const auto *USE = dyn_cast(Sym)) { + if (USE->getOpcode() == UO_Minus) { + // Just get the operand when we negate a symbol that is already negated. + // -(-a) == a + NegatedRange = getConstraint(State, USE->getOperand()); + } + } else if (const SymSymExpr *SSE = dyn_cast(Sym)) { if (SSE->getOpcode() == BO_Sub) { QualType T = Sym->getType(); - - // Do not negate unsigned ranges - if (!T->isUnsignedIntegerOrEnumerationType() && - !T->isSignedIntegerOrEnumerationType()) - return llvm::None; - - SymbolManager &SymMgr = State->getSymbolManager(); SymbolRef NegatedSym = SymMgr.getSymSymExpr(SSE->getRHS(), BO_Sub, SSE->getLHS(), T); - - if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym)) { - return RangeFactory.negate(*NegatedRange); - } + NegatedRange = getConstraint(State, NegatedSym); } + } else { + SymbolRef NegatedSym = + SymMgr.getUnarySymExpr(Sym, UO_Minus, Sym->getType()); + NegatedRange = getConstraint(State, NegatedSym); } + + if (NegatedRange) + return RangeFactory.negate(*NegatedRange); return llvm::None; } diff --git a/clang/test/Analysis/constraint_manager_negate.c b/clang/test/Analysis/constraint_manager_negate.c new file mode 100644 index 00000000000000..d955973e1911ed --- /dev/null +++ b/clang/test/Analysis/constraint_manager_negate.c @@ -0,0 +1,113 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection,core.builtin \ +// RUN: -analyzer-config eagerly-assume=false \ +// RUN: -verify %s + +void clang_analyzer_eval(int); +void clang_analyzer_dump(int); + +void exit(int); + +#define UINT_MIN (0U) +#define UINT_MAX (~UINT_MIN) +#define UINT_MID (UINT_MAX / 2 + 1) +#define INT_MAX (UINT_MAX & (UINT_MAX >> 1)) +#define INT_MIN (UINT_MAX & ~(UINT_MAX >> 1)) + +extern void abort() __attribute__((__noreturn__)); +#define assert(expr) ((expr) ? (void)(0) : abort()) + +void negate_positive_range(int a) { + assert(-a > 0); + // -a: [1, INT_MAX] + // a: [INT_MIN + 1, -1] + clang_analyzer_eval(a < 0); // expected-warning{{TRUE}} + clang_analyzer_eval(a > INT_MIN); // expected-warning{{TRUE}} +} + +void negate_positive_range2(int a) { + assert(a > 0); + // a: [1, INT_MAX] + // -a: [INT_MIN + 1, -1] + clang_analyzer_eval(-a < 0); // expected-warning{{TRUE}} + clang_analyzer_eval(-a > INT_MIN); // expected-warning{{TRUE}} +} + +// INT_MIN equals 0b100...00. +// Its two's compelement is 0b011...11 + 1 = 0b100...00 (itself). +_Static_assert(INT_MIN == -INT_MIN, ""); +void negate_int_min(int a) { + assert(a == INT_MIN); + clang_analyzer_eval(-a == INT_MIN); // expected-warning{{TRUE}} +} + +void negate_mixed(int a) { + assert(a > 0 || a == INT_MIN); + clang_analyzer_eval(-a <= 0); // expected-warning{{TRUE}} +} + +void effective_range(int a) { + assert(a >= 0); + assert(-a >= 0); + clang_analyzer_eval(a == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(-a == 0); // expected-warning{{TRUE}} +} + +_Static_assert(-INT_MIN == INT_MIN, ""); +void effective_range_2(int a) { + assert(a <= 0); + assert(-a <= 0); + clang_analyzer_eval(a == 0 || a == INT_MIN); // expected-warning{{TRUE}} +} + +void negate_symexpr(int a, int b) { + assert(3 <= a * b && a * b <= 5); + clang_analyzer_eval(-(a * b) >= -5); // expected-warning{{TRUE}} + clang_analyzer_eval(-(a * b) <= -3); // expected-warning{{TRUE}} +} + +void negate_unsigned_min(unsigned a) { + assert(a == UINT_MIN); + clang_analyzer_eval(-a == UINT_MIN); // expected-warning{{TRUE}} + clang_analyzer_eval(-a != UINT_MIN); // expected-warning{{FALSE}} + clang_analyzer_eval(-a > UINT_MIN); // expected-warning{{FALSE}} + clang_analyzer_eval(-a < UINT_MIN); // expected-warning{{FALSE}} +} + +_Static_assert(7u - 3u != 3u - 7u, ""); +_Static_assert(-4u == UINT_MAX - 3u, ""); +void negate_unsigned_4(unsigned a) { + assert(a == 4u); + clang_analyzer_eval(-a == 4u); // expected-warning{{FALSE}} + clang_analyzer_eval(-a != 4u); // expected-warning{{TRUE}} + clang_analyzer_eval(-a == UINT_MAX - 3u); // expected-warning{{TRUE}} +} + +// UINT_MID equals 0b100...00. +// Its two's compelement is 0b011...11 + 1 = 0b100...00 (itself). +_Static_assert(UINT_MID == -UINT_MID, ""); +void negate_unsigned_mid(unsigned a) { + assert(a == UINT_MID); + clang_analyzer_eval(-a == UINT_MID); // expected-warning{{TRUE}} + clang_analyzer_eval(-a != UINT_MID); // expected-warning{{FALSE}} +} + +void negate_unsigned_mid2(unsigned a) { + assert(UINT_MIN < a && a < UINT_MID); + // a: [UINT_MIN+1, UINT_MID-1] + // -a: [UINT_MID+1, UINT_MAX] + clang_analyzer_eval(-a > UINT_MID); // expected-warning{{TRUE}} + clang_analyzer_eval(-a <= UINT_MAX); // expected-warning{{TRUE}} +} + +_Static_assert(1u - 2u == UINT_MAX, ""); +_Static_assert(2u - 1u == 1, ""); +void negate_unsigned_max(unsigned a) { + assert(a == UINT_MAX); + clang_analyzer_eval(-a == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(-a != 1); // expected-warning{{FALSE}} +} +void negate_unsigned_one(unsigned a) { + assert(a == 1); + clang_analyzer_eval(-a == UINT_MAX); // expected-warning{{TRUE}} + clang_analyzer_eval(-a < UINT_MAX); // expected-warning{{FALSE}} +} diff --git a/clang/test/Analysis/constraint_manager_negate_difference.c b/clang/test/Analysis/constraint_manager_negate_difference.c index a33c5ca81c26aa..0cac576a31ecc0 100644 --- a/clang/test/Analysis/constraint_manager_negate_difference.c +++ b/clang/test/Analysis/constraint_manager_negate_difference.c @@ -1,4 +1,6 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection,core.builtin -analyzer-config aggressive-binary-operation-simplification=true -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection,core.builtin \ +// RUN: -analyzer-config eagerly-assume=false \ +// RUN: -verify %s void clang_analyzer_eval(int); @@ -10,11 +12,8 @@ void exit(int); #define INT_MAX (UINT_MAX & (UINT_MAX >> 1)) #define INT_MIN (UINT_MAX & ~(UINT_MAX >> 1)) -extern void __assert_fail (__const char *__assertion, __const char *__file, - unsigned int __line, __const char *__function) - __attribute__ ((__noreturn__)); -#define assert(expr) \ - ((expr) ? (void)(0) : __assert_fail (#expr, __FILE__, __LINE__, __func__)) +extern void abort() __attribute__((__noreturn__)); +#define assert(expr) ((expr) ? (void)(0) : abort()) void assert_in_range(int x) { assert(x <= ((int)INT_MAX / 4)); @@ -33,69 +32,60 @@ void assert_in_range_2(int m, int n) { void equal(int m, int n) { assert_in_range_2(m, n); - if (m != n) - return; + assert(m == n); assert_in_wide_range(m - n); clang_analyzer_eval(n == m); // expected-warning{{TRUE}} } void non_equal(int m, int n) { assert_in_range_2(m, n); - if (m == n) - return; + assert(m != n); assert_in_wide_range(m - n); clang_analyzer_eval(n != m); // expected-warning{{TRUE}} } void less_or_equal(int m, int n) { assert_in_range_2(m, n); - if (m < n) - return; + assert(m >= n); assert_in_wide_range(m - n); clang_analyzer_eval(n <= m); // expected-warning{{TRUE}} } void less(int m, int n) { assert_in_range_2(m, n); - if (m <= n) - return; + assert(m > n); assert_in_wide_range(m - n); clang_analyzer_eval(n < m); // expected-warning{{TRUE}} } void greater_or_equal(int m, int n) { assert_in_range_2(m, n); - if (m > n) - return; + assert(m <= n); assert_in_wide_range(m - n); clang_analyzer_eval(n >= m); // expected-warning{{TRUE}} } void greater(int m, int n) { assert_in_range_2(m, n); - if (m >= n) - return; + assert(m < n); assert_in_wide_range(m - n); clang_analyzer_eval(n > m); // expected-warning{{TRUE}} } void negate_positive_range(int m, int n) { - if (m - n <= 0) - return; + assert(m - n > 0); clang_analyzer_eval(n - m < 0); // expected-warning{{TRUE}} clang_analyzer_eval(n - m > INT_MIN); // expected-warning{{TRUE}} - clang_analyzer_eval(n - m == INT_MIN); // expected-warning{{FALSE}} } +_Static_assert(INT_MIN == -INT_MIN, ""); void negate_int_min(int m, int n) { - if (m - n != INT_MIN) - return; + assert(m - n == INT_MIN); clang_analyzer_eval(n - m == INT_MIN); // expected-warning{{TRUE}} } void negate_mixed(int m, int n) { - if (m -n > INT_MIN && m - n <= 0) - return; + assert(m - n > 0 || m - n == INT_MIN); clang_analyzer_eval(n - m <= 0); // expected-warning{{TRUE}} } @@ -106,54 +96,59 @@ void effective_range(int m, int n) { clang_analyzer_eval(n - m == 0); // expected-warning{{TRUE}} } +_Static_assert(INT_MIN == -INT_MIN, ""); void effective_range_2(int m, int n) { assert(m - n <= 0); assert(n - m <= 0); - clang_analyzer_eval(m - n == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} - clang_analyzer_eval(n - m == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(m - n == 0 || m - n == INT_MIN); // expected-warning{{TRUE}} } void negate_unsigned_min(unsigned m, unsigned n) { - if (m - n == UINT_MIN) { - clang_analyzer_eval(n - m == UINT_MIN); // expected-warning{{TRUE}} - clang_analyzer_eval(n - m != UINT_MIN); // expected-warning{{FALSE}} - clang_analyzer_eval(n - m > UINT_MIN); // expected-warning{{FALSE}} - clang_analyzer_eval(n - m < UINT_MIN); // expected-warning{{FALSE}} - } + assert(m - n == UINT_MIN); + clang_analyzer_eval(n - m == UINT_MIN); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m != UINT_MIN); // expected-warning{{FALSE}} + clang_analyzer_eval(n - m > UINT_MIN); // expected-warning{{FALSE}} + clang_analyzer_eval(n - m < UINT_MIN); // expected-warning{{FALSE}} } +_Static_assert(7u - 3u != 3u - 7u, ""); +void negate_unsigned_4(unsigned m, unsigned n) { + assert(m - n == 4u); + clang_analyzer_eval(n - m == 4u); // expected-warning{{FALSE}} + clang_analyzer_eval(n - m != 4u); // expected-warning{{TRUE}} +} + +_Static_assert(UINT_MID == -UINT_MID, ""); void negate_unsigned_mid(unsigned m, unsigned n) { - if (m - n == UINT_MID) { - clang_analyzer_eval(n - m == UINT_MID); // expected-warning{{TRUE}} - clang_analyzer_eval(n - m != UINT_MID); // expected-warning{{FALSE}} - } + assert(m - n == UINT_MID); + clang_analyzer_eval(n - m == UINT_MID); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m != UINT_MID); // expected-warning{{FALSE}} } void negate_unsigned_mid2(unsigned m, unsigned n) { - if (m - n < UINT_MID && m - n > UINT_MIN) { - clang_analyzer_eval(n - m > UINT_MID); // expected-warning{{TRUE}} - clang_analyzer_eval(n - m < UINT_MID); // expected-warning{{FALSE}} - } + assert(UINT_MIN < m - n && m - n < UINT_MID); + clang_analyzer_eval(n - m > UINT_MID); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m <= UINT_MAX); // expected-warning{{TRUE}} } +_Static_assert(1u - 2u == UINT_MAX, ""); +_Static_assert(2u - 1u == 1, ""); void negate_unsigned_max(unsigned m, unsigned n) { - if (m - n == UINT_MAX) { - clang_analyzer_eval(n - m == 1); // expected-warning{{TRUE}} - clang_analyzer_eval(n - m != 1); // expected-warning{{FALSE}} - } + assert(m - n == UINT_MAX); + clang_analyzer_eval(n - m == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m != 1); // expected-warning{{FALSE}} } void negate_unsigned_one(unsigned m, unsigned n) { - if (m - n == 1) { - clang_analyzer_eval(n - m == UINT_MAX); // expected-warning{{TRUE}} - clang_analyzer_eval(n - m < UINT_MAX); // expected-warning{{FALSE}} - } + assert(m - n == 1); + clang_analyzer_eval(n - m == UINT_MAX); // expected-warning{{TRUE}} + clang_analyzer_eval(n - m < UINT_MAX); // expected-warning{{FALSE}} } // The next code is a repro for the bug PR41588 void negated_unsigned_range(unsigned x, unsigned y) { - clang_analyzer_eval(x - y != 0); // expected-warning{{FALSE}} expected-warning{{TRUE}} - clang_analyzer_eval(y - x != 0); // expected-warning{{FALSE}} expected-warning{{TRUE}} + clang_analyzer_eval(x - y != 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(y - x != 0); // expected-warning{{UNKNOWN}} // expected no assertion on the next line - clang_analyzer_eval(x - y != 0); // expected-warning{{FALSE}} expected-warning{{TRUE}} + clang_analyzer_eval(x - y != 0); // expected-warning{{UNKNOWN}} } diff --git a/clang/test/Analysis/unary-sym-expr-no-crash.c b/clang/test/Analysis/unary-sym-expr-no-crash.c new file mode 100644 index 00000000000000..5a4e0f943b9c86 --- /dev/null +++ b/clang/test/Analysis/unary-sym-expr-no-crash.c @@ -0,0 +1,23 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false \ +// RUN: -analyzer-config support-symbolic-integer-casts=false \ +// RUN: -verify + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false \ +// RUN: -analyzer-config support-symbolic-integer-casts=true \ +// RUN: -verify + +// expected-no-diagnostics + +void clang_analyzer_eval(int); +void clang_analyzer_dump(int); + +void crash(int b, long c) { + b = c; + if (b > 0) + if(-b) // should not crash here + ; +} diff --git a/clang/test/Analysis/unary-sym-expr.c b/clang/test/Analysis/unary-sym-expr.c index 14b3219de3723f..7c4774f3cca82f 100644 --- a/clang/test/Analysis/unary-sym-expr.c +++ b/clang/test/Analysis/unary-sym-expr.c @@ -36,3 +36,12 @@ void test_svalbuilder_simplification(int x, int y) { // FIXME Commutativity is not supported yet. clang_analyzer_eval(-(y + x) == -3); // expected-warning{{UNKNOWN}} } + +int test_fp(int flag) { + int value; + if (flag == 0) + value = 1; + if (-flag == 0) + return value; // no-warning + return 42; +} From cd5783d3e82b98bfa140853fee95170852fd3c74 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Fri, 13 May 2022 15:52:48 +0200 Subject: [PATCH 576/908] [analyzer][solver] Handle UnarySymExpr in SMTConv Dependent patch adds UnarySymExpr, now I'd like to handle that for SMT conversions like refutation. Differential Revision: https://reviews.llvm.org/D125547 --- .../Core/PathSensitive/SMTConv.h | 22 +++++++++++++ .../Analysis/unary-sym-expr-z3-refutation.c | 33 +++++++++++++++++++ clang/test/Analysis/z3-crosscheck.c | 14 ++++++++ 3 files changed, 69 insertions(+) create mode 100644 clang/test/Analysis/unary-sym-expr-z3-refutation.c diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h index 2d0f169260a457..bc38a54533be75 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h @@ -446,6 +446,28 @@ class SMTConv { return getCastExpr(Solver, Ctx, Exp, FromTy, Sym->getType()); } + if (const UnarySymExpr *USE = dyn_cast(Sym)) { + if (RetTy) + *RetTy = Sym->getType(); + + QualType OperandTy; + llvm::SMTExprRef OperandExp = + getSymExpr(Solver, Ctx, USE->getOperand(), &OperandTy, hasComparison); + llvm::SMTExprRef UnaryExp = + fromUnOp(Solver, USE->getOpcode(), OperandExp); + + // Currently, without the `support-symbolic-integer-casts=true` option, + // we do not emit `SymbolCast`s for implicit casts. + // One such implicit cast is missing if the operand of the unary operator + // has a different type than the unary itself. + if (Ctx.getTypeSize(OperandTy) != Ctx.getTypeSize(Sym->getType())) { + if (hasComparison) + *hasComparison = false; + return getCastExpr(Solver, Ctx, UnaryExp, OperandTy, Sym->getType()); + } + return UnaryExp; + } + if (const BinarySymExpr *BSE = dyn_cast(Sym)) { llvm::SMTExprRef Exp = getSymBinExpr(Solver, Ctx, BSE, hasComparison, RetTy); diff --git a/clang/test/Analysis/unary-sym-expr-z3-refutation.c b/clang/test/Analysis/unary-sym-expr-z3-refutation.c new file mode 100644 index 00000000000000..33585236232adc --- /dev/null +++ b/clang/test/Analysis/unary-sym-expr-z3-refutation.c @@ -0,0 +1,33 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=true \ +// RUN: -verify + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=true \ +// RUN: -analyzer-config support-symbolic-integer-casts=true \ +// RUN: -verify + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=true \ +// RUN: -analyzer-config crosscheck-with-z3=true \ +// RUN: -verify + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=true \ +// RUN: -analyzer-config crosscheck-with-z3=true \ +// RUN: -analyzer-config support-symbolic-integer-casts=true \ +// RUN: -verify + +// REQUIRES: z3 + +void k(long L) { + int g = L; + int h = g + 1; + int j; + j += -h < 0; // should not crash + // expected-warning@-1{{garbage}} +} diff --git a/clang/test/Analysis/z3-crosscheck.c b/clang/test/Analysis/z3-crosscheck.c index 67d410434fbb03..7711597eb9ec8b 100644 --- a/clang/test/Analysis/z3-crosscheck.c +++ b/clang/test/Analysis/z3-crosscheck.c @@ -14,6 +14,20 @@ int foo(int x) return 0; } +int unary(int x, long l) +{ + int *z = 0; + int y = l; + if ((x & 1) && ((x & 1) ^ 1)) + if (-y) +#ifdef NO_CROSSCHECK + return *z; // expected-warning {{Dereference of null pointer (loaded from variable 'z')}} +#else + return *z; // no-warning +#endif + return 0; +} + void g(int d); void f(int *a, int *b) { From 681c50c62e9338afdf58ebfd663f8e3ff43439fb Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 26 May 2022 07:52:19 -0400 Subject: [PATCH 577/908] Improve the strict prototype diagnostic behavior Post-commit feedback on https://reviews.llvm.org/D122895 pointed out that the diagnostic wording for some code was using "declaration" in a confusing way, such as: int foo(); // warning: a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x int foo(int arg) { // warning: a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x return 5; } And that we had other minor issues with the diagnostics being somewhat confusing. This patch addresses the confusion by reworking the implementation to be a bit more simple and a bit less chatty. Specifically, it changes the warning and note diagnostics to be able to specify "declaration" or "definition" as appropriate, and it changes the function merging logic so that the function without a prototype is always what gets warned on, and the function with a prototype is sometimes what gets noted. Additionally, when diagnosing a K&R C definition that is preceded by a function without a prototype, we don't note the prior declaration, we warn on it because it will also be changing behavior in C2x. Differential Revision: https://reviews.llvm.org/D125814 --- .../clang/Basic/DiagnosticSemaKinds.td | 10 +- clang/lib/Sema/SemaDecl.cpp | 109 ++++++------------ clang/test/C/drs/dr0xx.c | 6 +- clang/test/CodeGen/2009-06-01-addrofknr.c | 2 +- clang/test/Parser/declarators.c | 10 +- clang/test/Sema/arg-duplicate.c | 2 +- clang/test/Sema/knr-def-call.c | 10 +- clang/test/Sema/knr-variadic-def.c | 4 +- .../test/Sema/warn-deprecated-non-prototype.c | 38 +++--- clang/test/Sema/warn-strict-prototypes.c | 10 +- 10 files changed, 78 insertions(+), 123 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 3168c20969e1b1..0f8ed0a6e8c313 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5589,10 +5589,12 @@ def warn_strict_prototypes : Extension< "a %select{function|block}0 declaration without a prototype is deprecated " "%select{in all versions of C|}0">, InGroup; def warn_non_prototype_changes_behavior : Warning< - "a function declaration without a prototype is deprecated in all versions of " - "C and is not supported in C2x">, InGroup; -def note_func_decl_changes_behavior : Note< - "a function declaration without a prototype is not supported in C2x">; + "a function %select{declaration|definition}0 without a prototype is " + "deprecated in all versions of C %select{and is not supported in C2x|and is " + "treated as a zero-parameter prototype in C2x, conflicting with a " + "%select{previous|subsequent}2 %select{declaration|definition}3}1">, + InGroup; +def note_conflicting_prototype : Note<"conflicting prototype is here">; def warn_missing_variable_declarations : Warning< "no previous extern declaration for non-static variable %0">, InGroup>, DefaultIgnore; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index c5d4075a49e2f7..d68d1f3ff070d2 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -3940,59 +3940,29 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, Scope *S, } if (WithProto->getNumParams() != 0) { - // The function definition has parameters, so this will change - // behavior in C2x. - // - // If we already warned about about the function without a prototype - // being deprecated, add a note that it also changes behavior. If we - // didn't warn about it being deprecated (because the diagnostic is - // not enabled), warn now that it is deprecated and changes behavior. - bool AddNote = false; - if (Diags.isIgnored(diag::warn_strict_prototypes, - WithoutProto->getLocation())) { - if (WithoutProto->getBuiltinID() == 0 && - !WithoutProto->isImplicit() && - SourceMgr.isBeforeInTranslationUnit(WithoutProto->getLocation(), - WithProto->getLocation())) { - PartialDiagnostic PD = - PDiag(diag::warn_non_prototype_changes_behavior); - if (TypeSourceInfo *TSI = WithoutProto->getTypeSourceInfo()) { - if (auto FTL = TSI->getTypeLoc().getAs()) - PD << FixItHint::CreateInsertion(FTL.getRParenLoc(), "void"); - } - Diag(WithoutProto->getLocation(), PD); - } - } else { - AddNote = true; - } - - // Because the function with a prototype has parameters but a previous - // declaration had none, the function with the prototype will also - // change behavior in C2x. - if (WithProto->getBuiltinID() == 0 && !WithProto->isImplicit()) { - if (SourceMgr.isBeforeInTranslationUnit( - WithProto->getLocation(), WithoutProto->getLocation())) { - // If the function with the prototype comes before the function - // without the prototype, we only want to diagnose the one without - // the prototype. - Diag(WithoutProto->getLocation(), - diag::warn_non_prototype_changes_behavior); - } else { - // Otherwise, diagnose the one with the prototype, and potentially - // attach a note to the one without a prototype if needed. - Diag(WithProto->getLocation(), - diag::warn_non_prototype_changes_behavior); - if (AddNote && WithoutProto->getBuiltinID() == 0) - Diag(WithoutProto->getLocation(), - diag::note_func_decl_changes_behavior); - } - } else if (AddNote && WithoutProto->getBuiltinID() == 0 && - !WithoutProto->isImplicit()) { - // If we were supposed to add a note but the function with a - // prototype is a builtin or was implicitly declared, which means we - // have nothing to attach the note to, so we issue a warning instead. + if (WithoutProto->getBuiltinID() == 0 && !WithoutProto->isImplicit()) { + // The one without the prototype will be changing behavior in C2x, so + // warn about that one so long as it's a user-visible declaration. + bool IsWithoutProtoADef = false, IsWithProtoADef = false; + if (WithoutProto == New) + IsWithoutProtoADef = NewDeclIsDefn; + else + IsWithProtoADef = NewDeclIsDefn; Diag(WithoutProto->getLocation(), - diag::warn_non_prototype_changes_behavior); + diag::warn_non_prototype_changes_behavior) + << IsWithoutProtoADef << (WithoutProto->getNumParams() ? 0 : 1) + << (WithoutProto == Old) << IsWithProtoADef; + + // The reason the one without the prototype will be changing behavior + // is because of the one with the prototype, so note that so long as + // it's a user-visible declaration. There is one exception to this: + // when the new declaration is a definition without a prototype, the + // old declaration with a prototype is not the cause of the issue, + // and that does not need to be noted because the one with a + // prototype will not change behavior in C2x. + if (WithProto->getBuiltinID() == 0 && !WithProto->isImplicit() && + !IsWithoutProtoADef) + Diag(WithProto->getLocation(), diag::note_conflicting_prototype); } } } @@ -15053,29 +15023,22 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, // deprecated, add a note that it also changes behavior. If we didn't // warn about it being deprecated (because the diagnostic is not // enabled), warn now that it is deprecated and changes behavior. - bool AddNote = false; - if (PossiblePrototype) { - if (Diags.isIgnored(diag::warn_strict_prototypes, - PossiblePrototype->getLocation())) { - - PartialDiagnostic PD = - PDiag(diag::warn_non_prototype_changes_behavior); - if (TypeSourceInfo *TSI = PossiblePrototype->getTypeSourceInfo()) { - if (auto FTL = TSI->getTypeLoc().getAs()) - PD << FixItHint::CreateInsertion(FTL.getRParenLoc(), "void"); - } - Diag(PossiblePrototype->getLocation(), PD); - } else { - AddNote = true; - } - } - // Because this function definition has no prototype and it has - // parameters, it will definitely change behavior in C2x. - Diag(FD->getLocation(), diag::warn_non_prototype_changes_behavior); - if (AddNote) + // This K&R C function definition definitely changes behavior in C2x, + // so diagnose it. + Diag(FD->getLocation(), diag::warn_non_prototype_changes_behavior) + << /*definition*/ 1 << /* not supported in C2x */ 0; + + // If we have a possible prototype for the function which is a user- + // visible declaration, we already tested that it has no prototype. + // This will change behavior in C2x. This gets a warning rather than a + // note because it's the same behavior-changing problem as with the + // definition. + if (PossiblePrototype) Diag(PossiblePrototype->getLocation(), - diag::note_func_decl_changes_behavior); + diag::warn_non_prototype_changes_behavior) + << /*declaration*/ 0 << /* conflicting */ 1 << /*subsequent*/ 1 + << /*definition*/ 1; } // Warn on CPUDispatch with an actual body. diff --git a/clang/test/C/drs/dr0xx.c b/clang/test/C/drs/dr0xx.c index 22b38de1a88763..5cc0823eeb0b7b 100644 --- a/clang/test/C/drs/dr0xx.c +++ b/clang/test/C/drs/dr0xx.c @@ -231,13 +231,13 @@ int dr032 = (1, 2); /* expected-warning {{left operand of comma operator has no /* WG14 DR035: partial * Questions about definition of functions without a prototype */ -void dr035_1(a, b) /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} */ +void dr035_1(a, b) /* expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} */ int a(enum b {x, y}); /* expected-warning {{declaration of 'enum b' will not be visible outside of this function}} */ int b; { int test = x; /* expected-error {{use of undeclared identifier 'x'}} */ } -void dr035_2(c) /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} */ +void dr035_2(c) /* expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} */ enum m{q, r} c; { /* expected-warning {{declaration of 'enum m' will not be visible outside of this function}} */ /* FIXME: This should be accepted because the scope of m, q, and r ends at * the closing brace of the function per C89 6.1.2.1. @@ -391,7 +391,7 @@ void dr068(void) { * a prototype causes implicit conversions rather than relying on default * argument promotion and warm thoughts. */ -void dr070_1(c) /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} */ +void dr070_1(c) /* expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} */ int c; { } diff --git a/clang/test/CodeGen/2009-06-01-addrofknr.c b/clang/test/CodeGen/2009-06-01-addrofknr.c index 1c8c9430a9f087..905c696721ca72 100644 --- a/clang/test/CodeGen/2009-06-01-addrofknr.c +++ b/clang/test/CodeGen/2009-06-01-addrofknr.c @@ -5,7 +5,7 @@ struct funcptr { int (*func)(); }; -static int func(f) // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +static int func(f) // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} void *f; { return 0; diff --git a/clang/test/Parser/declarators.c b/clang/test/Parser/declarators.c index c309cada073a98..c294bd085366e9 100644 --- a/clang/test/Parser/declarators.c +++ b/clang/test/Parser/declarators.c @@ -27,12 +27,12 @@ void test2(int *P, int A) { } typedef int atype; -void test3(x, /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} */ +void test3(x, /* expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} */ atype /* expected-error {{unexpected type name 'atype': expected identifier}} */ ) int x, atype; {} void test4(x, x) int x; {} // expected-error {{redefinition of parameter 'x'}} \ - // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} + // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} // PR3031 @@ -103,11 +103,11 @@ void *test14b = (void*)test14a; // Make sure test14a didn't get skipped. long struct X { int x; } test15(void); // expected-error {{'long struct' is invalid}} void test16(i) int i j; { } // expected-error {{expected ';' at end of declaration}} \ - // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} + // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} void test17(i, j) int i, j k; { } // expected-error {{expected ';' at end of declaration}} \ - // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} + // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} void knrNoSemi(i) int i { } // expected-error {{expected ';' at end of declaration}} \ - // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} + // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} // PR12595 void test18(void) { diff --git a/clang/test/Sema/arg-duplicate.c b/clang/test/Sema/arg-duplicate.c index 70aab5f95b12fd..d0034cd2c777e9 100644 --- a/clang/test/Sema/arg-duplicate.c +++ b/clang/test/Sema/arg-duplicate.c @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -std=c99 -int f3(y, x, // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +int f3(y, x, // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} x) // expected-error {{redefinition of parameter}} int y, x, // expected-note {{previous declaration is here}} diff --git a/clang/test/Sema/knr-def-call.c b/clang/test/Sema/knr-def-call.c index 36aeab358e37b5..172cb07c93c637 100644 --- a/clang/test/Sema/knr-def-call.c +++ b/clang/test/Sema/knr-def-call.c @@ -1,19 +1,19 @@ // RUN: %clang_cc1 -triple i386-pc-unknown -Wconversion -Wliteral-conversion -fsyntax-only -verify %s // C DR #316, PR 3626. -void f0(a, b, c, d) int a,b,c,d; {} // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void f0(a, b, c, d) int a,b,c,d; {} // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} void t0(void) { f0(1); // expected-warning{{too few arguments}} } -void f1(a, b) int a, b; {} // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void f1(a, b) int a, b; {} // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} void t1(void) { f1(1, 2, 3); // expected-warning{{too many arguments}} } void f2(float); // expected-note{{previous declaration is here}} void f2(x) float x; { } // expected-warning{{promoted type 'double' of K&R function parameter is not compatible with the parameter type 'float' declared in a previous prototype}} \ - expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} + expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} typedef void (*f3)(void); f3 t3(int b) { return b? f0 : f1; } // okay @@ -33,7 +33,7 @@ char *rindex(s, c) // PR8314 void proto(int); -void proto(x) // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void proto(x) // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} int x; { } @@ -45,6 +45,6 @@ void use_proto() { // PR31020 void func(short d) __attribute__((cdecl)); // expected-note{{previous declaration is here}} -void __attribute__((cdecl)) func(d) // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void __attribute__((cdecl)) func(d) // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} short d; // expected-warning{{promoted type 'int' of K&R function parameter is not compatible with the parameter type 'short' declared in a previous prototype}} {} diff --git a/clang/test/Sema/knr-variadic-def.c b/clang/test/Sema/knr-variadic-def.c index 6daaa0a97a7ebb..18ca866e03bdec 100644 --- a/clang/test/Sema/knr-variadic-def.c +++ b/clang/test/Sema/knr-variadic-def.c @@ -5,7 +5,7 @@ char *foo = "test"; int test(char*,...); -int test(fmt) // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +int test(fmt) // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} char*fmt; { va_list ap; @@ -21,7 +21,7 @@ int test(fmt) // expected-warning {{a function declaration without a prototype i void exit(); // expected-warning {{a function declaration without a prototype is deprecated in all versions of C}} -int main(argc,argv) // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +int main(argc,argv) // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} int argc;char**argv; { exit(test("",foo)); diff --git a/clang/test/Sema/warn-deprecated-non-prototype.c b/clang/test/Sema/warn-deprecated-non-prototype.c index 78d4b0d418fbe4..351745e49069fc 100644 --- a/clang/test/Sema/warn-deprecated-non-prototype.c +++ b/clang/test/Sema/warn-deprecated-non-prototype.c @@ -23,15 +23,14 @@ void whatever(void) { void again() {} // strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} // On by default warnings -void func(); // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} \ - strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} \ - strict-note {{a function declaration without a prototype is not supported in C2x}} -void func(a, b) int a, b; {} // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void func(); // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is treated as a zero-parameter prototype in C2x, conflicting with a subsequent definition}} \ + strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} +void func(a, b) int a, b; {} // both-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} -void one_more(a, b) int a, b; {} // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void one_more(a, b) int a, b; {} // both-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} void sheesh(int a); -void sheesh(a) int a; {} // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void sheesh(a) int a; {} // both-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} void another(); // strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} @@ -39,35 +38,26 @@ int main(void) { another(1, 2); // both-warning {{passing arguments to 'another' without a prototype is deprecated in all versions of C and is not supported in C2x}} } -void order1(); // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} \ - strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} \ - strict-note {{a function declaration without a prototype is not supported in C2x}} -void order1(int i); // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void order1(); // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is treated as a zero-parameter prototype in C2x, conflicting with a subsequent declaration}} \ + strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} +void order1(int i); // both-note {{conflicting prototype is here}} -void order2(int i); -void order2(); // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} \ +void order2(int i); // both-note {{conflicting prototype is here}} +void order2(); // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is treated as a zero-parameter prototype in C2x, conflicting with a previous declaration}} \ strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} -void order3(); // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} \ - strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} \ - strict-note {{a function declaration without a prototype is not supported in C2x}} -void order3(int i) {} // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void order3(); // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is treated as a zero-parameter prototype in C2x, conflicting with a subsequent definition}} \ + strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} +void order3(int i) {} // both-note {{conflicting prototype is here}} // Just because the prototype is variadic doesn't mean we shouldn't warn on the // K&R C function definition; this still changes behavior in C2x. void test(char*,...); -void test(fmt) // both-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void test(fmt) // both-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} char*fmt; { } -// FIXME: we get two diagnostics here when running in pedantic mode. The first -// comes when forming the function type for the definition, and the second -// comes from merging the function declarations together. The second is the -// point at which we know the behavior has changed (because we can see the -// previous declaration at that point), but we've already issued the type -// error by that point. It's not ideal to be this chatty, but this situation -// should be pretty rare. void blapp(int); // both-note {{previous declaration is here}} void blapp() { } // both-error {{conflicting types for 'blapp'}} \ // strict-warning {{a function declaration without a prototype is deprecated in all versions of C}} diff --git a/clang/test/Sema/warn-strict-prototypes.c b/clang/test/Sema/warn-strict-prototypes.c index c16073d0820eb2..21e6f8ed7868a7 100644 --- a/clang/test/Sema/warn-strict-prototypes.c +++ b/clang/test/Sema/warn-strict-prototypes.c @@ -48,7 +48,7 @@ void bar3(void) { } // K&R function definition not preceded by full prototype -int foo9(a, b) // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +int foo9(a, b) // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} int a, b; { return a + b; @@ -56,17 +56,17 @@ int foo9(a, b) // expected-warning {{a function declaration without a prototype // Function declaration with no types void foo10(); // expected-warning {{a function declaration without a prototype is deprecated in all versions of C}} \ - expected-note {{a function declaration without a prototype is not supported in C2x}} + expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is treated as a zero-parameter prototype in C2x, conflicting with a subsequent definition}} // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"void" // K&R function definition with incomplete param list declared -void foo10(p, p2) void *p; {} // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} \ +void foo10(p, p2) void *p; {} // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} \ expected-warning {{parameter 'p2' was not declared, defaults to 'int'; ISO C99 and later do not support implicit int}} void foo11(int p, int p2); -void foo11(p, p2) int p; int p2; {} // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void foo11(p, p2) int p; int p2; {} // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} // PR31020 -void __attribute__((cdecl)) foo12(d) // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void __attribute__((cdecl)) foo12(d) // expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} short d; {} From 605651135b4c37e423825a9201b06a16ff6160a1 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 26 May 2022 08:46:11 -0400 Subject: [PATCH 578/908] Fix failing test case with strict prototype changes Amends 681c50c62e9338afdf58ebfd663f8e3ff43439fb and hopefully fixes: https://lab.llvm.org/buildbot/#/builders/109/builds/39347 https://lab.llvm.org/buildbot/#/builders/188/builds/14634 and others --- clang/test/SemaObjC/nonnull.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/SemaObjC/nonnull.m b/clang/test/SemaObjC/nonnull.m index 3b087b096d8f9e..218aad85e673b6 100644 --- a/clang/test/SemaObjC/nonnull.m +++ b/clang/test/SemaObjC/nonnull.m @@ -22,7 +22,7 @@ extern void func3 (void (^block1)(), int, void (^block2)(), int) extern void func4 (void (^block1)(), void (^block2)()) __attribute__((nonnull(1))) __attribute__((nonnull(2))); -void func6(); // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void func6(); // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is treated as a zero-parameter prototype in C2x, conflicting with a subsequent definition}} void func7(); void @@ -60,7 +60,7 @@ extern void func4 (void (^block1)(), void (^block2)()) __attribute__((nonnull(1) __attribute__((nonnull)) void _dispatch_queue_push_list(dispatch_object_t _head); // no warning -void func6(dispatch_object_t _head) { // expected-warning {{a function declaration without a prototype is deprecated in all versions of C and is not supported in C2x}} +void func6(dispatch_object_t _head) { // expected-note {{conflicting prototype is here}} _dispatch_queue_push_list(0); // expected-warning {{null passed to a callee that requires a non-null argument}} _dispatch_queue_push_list(_head._do); // no warning } From d4d28f2ace764a0420a33462628b43a1c71fc3dc Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Sat, 21 May 2022 00:05:51 +0200 Subject: [PATCH 579/908] [clang-format] Fix QualifierAlignment with global namespace qualified types. Fixes https://github.com/llvm/llvm-project/issues/55610. Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D126096 --- clang/lib/Format/QualifierAlignmentFixer.cpp | 62 +++++++++++++------ clang/unittests/Format/QualifierFixerTest.cpp | 8 +++ 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp index dd12298a57fff0..61f17cae383eec 100644 --- a/clang/lib/Format/QualifierAlignmentFixer.cpp +++ b/clang/lib/Format/QualifierAlignmentFixer.cpp @@ -216,6 +216,29 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight( if (LeftRightQualifierAlignmentFixer::isPossibleMacro(Tok->Next)) return Tok; + auto AnalyzeTemplate = + [&](const FormatToken *Tok, + const FormatToken *StartTemplate) -> const FormatToken * { + // Read from the TemplateOpener to TemplateCloser. + FormatToken *EndTemplate = StartTemplate->MatchingParen; + if (EndTemplate) { + // Move to the end of any template class members e.g. + // `Foo::iterator`. + if (EndTemplate->startsSequence(TT_TemplateCloser, tok::coloncolon, + tok::identifier)) { + EndTemplate = EndTemplate->Next->Next; + } + } + if (EndTemplate && EndTemplate->Next && + !EndTemplate->Next->isOneOf(tok::equal, tok::l_paren)) { + insertQualifierAfter(SourceMgr, Fixes, EndTemplate, Qualifier); + // Remove the qualifier. + removeToken(SourceMgr, Fixes, Tok); + return Tok; + } + return nullptr; + }; + FormatToken *Qual = Tok->Next; FormatToken *LastQual = Qual; while (Qual && isQualifierOrType(Qual, ConfiguredQualifierTokens)) { @@ -233,27 +256,24 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight( return Tok; } else if (Tok->startsSequence(QualifierType, tok::identifier, TT_TemplateOpener)) { - // Read from the TemplateOpener to - // TemplateCloser as in const ArrayRef a; const ArrayRef &a; - FormatToken *EndTemplate = Tok->Next->Next->MatchingParen; - if (EndTemplate) { - // Move to the end of any template class members e.g. - // `Foo::iterator`. - if (EndTemplate->startsSequence(TT_TemplateCloser, tok::coloncolon, - tok::identifier)) { - EndTemplate = EndTemplate->Next->Next; - } - } - if (EndTemplate && EndTemplate->Next && - !EndTemplate->Next->isOneOf(tok::equal, tok::l_paren)) { - insertQualifierAfter(SourceMgr, Fixes, EndTemplate, Qualifier); - // Remove the qualifier. - removeToken(SourceMgr, Fixes, Tok); - return Tok; - } - } else if (Tok->startsSequence(QualifierType, tok::identifier)) { + // `const ArrayRef a;` + // `const ArrayRef &a;` + const FormatToken *NewTok = AnalyzeTemplate(Tok, Tok->Next->Next); + if (NewTok) + return NewTok; + } else if (Tok->startsSequence(QualifierType, tok::coloncolon, + tok::identifier, TT_TemplateOpener)) { + // `const ::ArrayRef a;` + // `const ::ArrayRef &a;` + const FormatToken *NewTok = AnalyzeTemplate(Tok, Tok->Next->Next->Next); + if (NewTok) + return NewTok; + } else if (Tok->startsSequence(QualifierType, tok::identifier) || + Tok->startsSequence(QualifierType, tok::coloncolon, + tok::identifier)) { FormatToken *Next = Tok->Next; // The case `const Foo` -> `Foo const` + // The case `const ::Foo` -> `::Foo const` // The case `const Foo *` -> `Foo const *` // The case `const Foo &` -> `Foo const &` // The case `const Foo &&` -> `Foo const &&` @@ -331,7 +351,9 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeLeft( Tok->Next->Next && Tok->Next->Next->is(QualifierType)) { rotateTokens(SourceMgr, Fixes, Tok->Next, Tok->Next->Next, /*Left=*/true); } - if (Tok->startsSequence(tok::identifier) && Tok->Next) { + if ((Tok->startsSequence(tok::coloncolon, tok::identifier) || + Tok->is(tok::identifier)) && + Tok->Next) { if (Tok->Previous && Tok->Previous->isOneOf(tok::star, tok::ampamp, tok::amp)) { return Tok; diff --git a/clang/unittests/Format/QualifierFixerTest.cpp b/clang/unittests/Format/QualifierFixerTest.cpp index fc7e932af4402d..b01aff53150da1 100755 --- a/clang/unittests/Format/QualifierFixerTest.cpp +++ b/clang/unittests/Format/QualifierFixerTest.cpp @@ -318,6 +318,8 @@ TEST_F(QualifierFixerTest, RightQualifier) { verifyFormat("Foo const &a", "const Foo &a", Style); verifyFormat("Foo::iterator const &a", "const Foo::iterator &a", Style); + verifyFormat("::Foo::iterator const &a", "const ::Foo::iterator &a", + Style); verifyFormat("Foo(int a, " "unsigned b, // c-style args\n" @@ -355,6 +357,8 @@ TEST_F(QualifierFixerTest, RightQualifier) { verifyFormat("void fn(Foo const &i);", "void fn(const Foo &i);", Style); verifyFormat("void fns(ns::S const &s);", "void fns(const ns::S &s);", Style); + verifyFormat("void fns(::ns::S const &s);", "void fns(const ::ns::S &s);", + Style); verifyFormat("void fn(ns::Foo const &i);", "void fn(const ns::Foo &i);", Style); verifyFormat("void fns(ns::ns2::S const &s);", @@ -445,6 +449,8 @@ TEST_F(QualifierFixerTest, LeftQualifier) { verifyFormat("const Foo &a", "Foo const &a", Style); verifyFormat("const Foo::iterator &a", "Foo::iterator const &a", Style); + verifyFormat("const ::Foo::iterator &a", "::Foo::iterator const &a", + Style); verifyFormat("const int a;", "int const a;", Style); verifyFormat("const int *a;", "int const *a;", Style); @@ -508,6 +514,8 @@ TEST_F(QualifierFixerTest, LeftQualifier) { verifyFormat("void fn(const Foo &i);", "void fn(Foo const &i);", Style); verifyFormat("void fns(const ns::S &s);", "void fns(ns::S const &s);", Style); + verifyFormat("void fns(const ::ns::S &s);", "void fns(::ns::S const &s);", + Style); verifyFormat("void fn(const ns::Foo &i);", "void fn(ns::Foo const &i);", Style); verifyFormat("void fns(const ns::ns2::S &s);", From f366acdbf694f93e0d3fbaeec5a7756ea2032df2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 26 May 2022 14:05:00 +0100 Subject: [PATCH 580/908] [DAG] Generalize (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2)) constant folding Remove local (uniform) constant folding and rely on getNode() to perform it Minor cleanup step toward adding non-uniform shift amount support --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2c1f7d990aadd5..dc2ce7f65141da 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9286,8 +9286,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits; if (LargeShift->getAPIntValue() == TruncBits) { SDLoc DL(N); - SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL, - getShiftAmountTy(LargeVT)); + EVT LargeShiftVT = getShiftAmountTy(LargeVT); + SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT); + Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt, + DAG.getConstant(TruncBits, DL, LargeShiftVT)); SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); From 851bfc07c86e7f84ee38d918cb4b6b49a47434f1 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 9 May 2022 11:54:10 -0400 Subject: [PATCH 581/908] [libc++abi] Use from-scratch testing configs for libc++abi by default Like we have been doing for libc++ for a while now, start using from-scratch testing configurations for libc++abi. As a fly-by fix, remove the LIBCXXABI_NO_TIMER macro, which was defined but never used. Differential Revision: https://reviews.llvm.org/D125242 --- libcxxabi/CMakeLists.txt | 19 ++++++++++++- .../apple-libc++abi-backdeployment.cfg.in | 2 +- .../configs/apple-libc++abi-shared.cfg.in | 2 +- libcxxabi/test/configs/cmake-bridge.cfg.in | 7 ++++- .../test/configs/ibm-libc++abi-shared.cfg.in | 4 +-- .../test/configs/llvm-libc++abi-merged.cfg.in | 2 +- .../test/configs/llvm-libc++abi-mingw.cfg.in | 25 +++++++++++++++++ .../llvm-libc++abi-shared-clangcl.cfg.in | 25 +++++++++++++++++ .../configs/llvm-libc++abi-shared-gcc.cfg.in | 26 ++++++++++++++++++ .../test/configs/llvm-libc++abi-shared.cfg.in | 27 +++++++++++++++++++ .../llvm-libc++abi-static-clangcl.cfg.in | 25 +++++++++++++++++ .../test/configs/llvm-libc++abi-static.cfg.in | 27 +++++++++++++++++++ .../test/cxa_thread_atexit_test.pass.cpp | 2 +- libcxxabi/test/forced_unwind3.pass.cpp | 2 +- libcxxabi/test/guard_test_basic.pass.cpp | 11 ++++++-- libcxxabi/test/guard_threaded_test.pass.cpp | 2 +- libcxxabi/test/libcxxabi/test/config.py | 2 +- .../test/test_exception_storage.pass.cpp | 12 +++++---- libcxxabi/test/test_fallback_malloc.pass.cpp | 9 +++++++ libcxxabi/test/test_guard.pass.cpp | 19 +++++++------ .../thread_local_destruction_order.pass.cpp | 2 +- 21 files changed, 223 insertions(+), 29 deletions(-) create mode 100644 libcxxabi/test/configs/llvm-libc++abi-mingw.cfg.in create mode 100644 libcxxabi/test/configs/llvm-libc++abi-shared-clangcl.cfg.in create mode 100644 libcxxabi/test/configs/llvm-libc++abi-shared-gcc.cfg.in create mode 100644 libcxxabi/test/configs/llvm-libc++abi-shared.cfg.in create mode 100644 libcxxabi/test/configs/llvm-libc++abi-static-clangcl.cfg.in create mode 100644 libcxxabi/test/configs/llvm-libc++abi-static.cfg.in diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index 76d54f8d32a7b2..bc7354e9d38064 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -143,7 +143,24 @@ endif() option(LIBCXXABI_HERMETIC_STATIC_LIBRARY "Do not export any symbols from the static library." ${LIBCXXABI_HERMETIC_STATIC_LIBRARY_DEFAULT}) -set(LIBCXXABI_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/lit.site.cfg.in" CACHE STRING +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(LIBCXXABI_DEFAULT_TEST_CONFIG "llvm-libc++abi-shared-gcc.cfg.in") +elseif(MINGW) + set(LIBCXXABI_DEFAULT_TEST_CONFIG "llvm-libc++abi-mingw.cfg.in") +elseif(WIN32) # clang-cl + if (LIBCXXABI_ENABLE_SHARED) + set(LIBCXXABI_DEFAULT_TEST_CONFIG "llvm-libc++abi-shared-clangcl.cfg.in") + else() + set(LIBCXXABI_DEFAULT_TEST_CONFIG "llvm-libc++abi-static-clangcl.cfg.in") + endif() +else() + if (LIBCXXABI_ENABLE_SHARED) + set(LIBCXXABI_DEFAULT_TEST_CONFIG "llvm-libc++abi-shared.cfg.in") + else() + set(LIBCXXABI_DEFAULT_TEST_CONFIG "llvm-libc++abi-static.cfg.in") + endif() +endif() +set(LIBCXXABI_TEST_CONFIG "${LIBCXXABI_DEFAULT_TEST_CONFIG}" CACHE STRING "The path to the Lit testing configuration to use when running the tests. If a relative path is provided, it is assumed to be relative to '/libcxxabi/test/configs'.") if (NOT IS_ABSOLUTE "${LIBCXXABI_TEST_CONFIG}") diff --git a/libcxxabi/test/configs/apple-libc++abi-backdeployment.cfg.in b/libcxxabi/test/configs/apple-libc++abi-backdeployment.cfg.in index 510ab8a4b62391..e94f15cf225782 100644 --- a/libcxxabi/test/configs/apple-libc++abi-backdeployment.cfg.in +++ b/libcxxabi/test/configs/apple-libc++abi-backdeployment.cfg.in @@ -45,7 +45,7 @@ config.substitutions.append(('%{flags}', '-isysroot {}'.format('@CMAKE_OSX_SYSROOT@') if '@CMAKE_OSX_SYSROOT@' else '' )) config.substitutions.append(('%{compile_flags}', - '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} -DLIBCXXABI_NO_TIMER -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS ' + + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS ' + '-I %{libcxx}/test/support -I %{libcxx}/src' )) config.substitutions.append(('%{link_flags}', diff --git a/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in b/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in index 3a744dbd1070e6..20298bc8c1ec9b 100644 --- a/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in +++ b/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in @@ -6,7 +6,7 @@ config.substitutions.append(('%{flags}', '-isysroot {}'.format('@CMAKE_OSX_SYSROOT@') if '@CMAKE_OSX_SYSROOT@' else '' )) config.substitutions.append(('%{compile_flags}', - '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} -DLIBCXXABI_NO_TIMER -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS ' + + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS ' + '-I %{libcxx}/test/support -I %{libcxx}/src' )) config.substitutions.append(('%{link_flags}', diff --git a/libcxxabi/test/configs/cmake-bridge.cfg.in b/libcxxabi/test/configs/cmake-bridge.cfg.in index 4c37681319927c..d61303aa5173cc 100644 --- a/libcxxabi/test/configs/cmake-bridge.cfg.in +++ b/libcxxabi/test/configs/cmake-bridge.cfg.in @@ -30,6 +30,11 @@ config.substitutions.append(('%{cxx}', '@CMAKE_CXX_COMPILER@')) config.substitutions.append(('%{libcxx}', '@LIBCXXABI_LIBCXX_PATH@')) config.substitutions.append(('%{include}', '@LIBCXXABI_SOURCE_DIR@/include')) config.substitutions.append(('%{cxx-include}', '@LIBCXXABI_HEADER_DIR@/include/c++/v1')) -config.substitutions.append(('%{cxx-target-include}', '@LIBCXXABI_HEADER_DIR@/%{triple}/include/c++/v1')) +config.substitutions.append(('%{cxx-target-include}', '@LIBCXXABI_HEADER_DIR@/include/%{triple}/c++/v1')) config.substitutions.append(('%{lib}', '@LIBCXXABI_LIBRARY_DIR@')) config.substitutions.append(('%{executor}', '@LIBCXXABI_EXECUTOR@')) + +if @LIBCXXABI_USE_LLVM_UNWINDER@: + config.substitutions.append(('%{maybe-include-libunwind}', '-I "@LIBCXXABI_LIBUNWIND_INCLUDES_INTERNAL@"')) +else: + config.substitutions.append(('%{maybe-include-libunwind}', '')) diff --git a/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in b/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in index 874e7354af4a15..402841fe8fc867 100644 --- a/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in +++ b/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in @@ -4,8 +4,8 @@ lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}','')) config.substitutions.append(('%{compile_flags}', - '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} ' + - '-D__LIBC_NO_CPP_MATH_OVERLOADS__ -DLIBCXXABI_NO_TIMER ' + + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} ' + + '-D__LIBC_NO_CPP_MATH_OVERLOADS__ ' + '-D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS ' + '-I %{libcxx}/test/support -I %{libcxx}/src' )) diff --git a/libcxxabi/test/configs/llvm-libc++abi-merged.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-merged.cfg.in index c17cb0801dfa75..e61c6afe2901d7 100644 --- a/libcxxabi/test/configs/llvm-libc++abi-merged.cfg.in +++ b/libcxxabi/test/configs/llvm-libc++abi-merged.cfg.in @@ -6,7 +6,7 @@ config.substitutions.append(('%{flags}', '-isysroot {}'.format('@CMAKE_OSX_SYSROOT@') if '@CMAKE_OSX_SYSROOT@' else '' )) config.substitutions.append(('%{compile_flags}', - '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} -DLIBCXXABI_NO_TIMER -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS ' + + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS ' + '-I %{libcxx}/test/support -I %{libcxx}/src' )) config.substitutions.append(('%{link_flags}', diff --git a/libcxxabi/test/configs/llvm-libc++abi-mingw.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-mingw.cfg.in new file mode 100644 index 00000000000000..b0e07d5f2a4a1e --- /dev/null +++ b/libcxxabi/test/configs/llvm-libc++abi-mingw.cfg.in @@ -0,0 +1,25 @@ +# This testing configuration handles running the test suite against LLVM's libc++abi +# using either a DLL or a static library, with MinGW/Clang on Windows. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -I %{libcxx}/test/support -I %{libcxx}/src -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib++ -L %{lib} -lc++ -lc++abi' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env PATH=%{lib} -- ' +)) + +import os, site +site.addsitedir(os.path.join('@LIBCXXABI_LIBCXX_PATH@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxxabi/test/configs/llvm-libc++abi-shared-clangcl.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-shared-clangcl.cfg.in new file mode 100644 index 00000000000000..58d1e87be49e82 --- /dev/null +++ b/libcxxabi/test/configs/llvm-libc++abi-shared-clangcl.cfg.in @@ -0,0 +1,25 @@ +# This testing configuration handles running the test suite against LLVM's libc++abi +# using a DLL, with Clang-cl on Windows. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '--driver-mode=g++')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -I %{libcxx}/test/support -I %{libcxx}/src -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -D_CRT_STDIO_ISO_WIDE_SPECIFIERS -DNOMINMAX' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib -L %{lib} -lc++ -lc++abi -lmsvcrt -lmsvcprt -loldnames' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env PATH=%{lib} -- ' +)) + +import os, site +site.addsitedir(os.path.join('@LIBCXXABI_LIBCXX_PATH@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxxabi/test/configs/llvm-libc++abi-shared-gcc.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-shared-gcc.cfg.in new file mode 100644 index 00000000000000..325fcd5e9e9e6d --- /dev/null +++ b/libcxxabi/test/configs/llvm-libc++abi-shared-gcc.cfg.in @@ -0,0 +1,26 @@ +# This testing configuration handles running the test suite against LLVM's libc++abi +# using a shared library, with GCC. This is done differently from Clang because +# GCC does not support the -nostdlib++ command-line flag. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -I %{libcxx}/test/support -I %{libcxx}/src -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS' +)) +config.substitutions.append(('%{link_flags}', + '-L %{lib} -Wl,-rpath,%{lib} -nodefaultlibs -lc++ -lc++abi -lm -lgcc_s -lgcc -lpthread -lc -lgcc_s -lgcc -latomic' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T -- ' +)) + +import os, site +site.addsitedir(os.path.join('@LIBCXXABI_LIBCXX_PATH@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxxabi/test/configs/llvm-libc++abi-shared.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-shared.cfg.in new file mode 100644 index 00000000000000..f742574e6fbb7f --- /dev/null +++ b/libcxxabi/test/configs/llvm-libc++abi-shared.cfg.in @@ -0,0 +1,27 @@ +# This testing configuration handles running the test suite against LLVM's libc++abi +# using a shared library. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', + '-isysroot {}'.format('@CMAKE_OSX_SYSROOT@') if '@CMAKE_OSX_SYSROOT@' else '' +)) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -I %{libcxx}/test/support -I %{libcxx}/src -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib++ -L %{lib} -Wl,-rpath,%{lib} -lc++ -lc++abi -pthread' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T -- ' +)) + +import os, site +site.addsitedir(os.path.join('@LIBCXXABI_LIBCXX_PATH@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxxabi/test/configs/llvm-libc++abi-static-clangcl.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-static-clangcl.cfg.in new file mode 100644 index 00000000000000..8820c5b7d7511f --- /dev/null +++ b/libcxxabi/test/configs/llvm-libc++abi-static-clangcl.cfg.in @@ -0,0 +1,25 @@ +# This testing configuration handles running the test suite against LLVM's libc++abi +# using a static library, with Clang-cl on Windows. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '--driver-mode=g++')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -I %{libcxx}/test/support -I %{libcxx}/src -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -D_CRT_STDIO_ISO_WIDE_SPECIFIERS -DNOMINMAX' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib -L %{lib} -llibc++ -llibc++abi -lmsvcrt -lmsvcprt -loldnames' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env PATH=%{lib} -- ' +)) + +import os, site +site.addsitedir(os.path.join('@LIBCXXABI_LIBCXX_PATH@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxxabi/test/configs/llvm-libc++abi-static.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-static.cfg.in new file mode 100644 index 00000000000000..c3e7c0ac80ba66 --- /dev/null +++ b/libcxxabi/test/configs/llvm-libc++abi-static.cfg.in @@ -0,0 +1,27 @@ +# This testing configuration handles running the test suite against LLVM's libc++abi +# using a static library. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', + '-isysroot {}'.format('@CMAKE_OSX_SYSROOT@') if '@CMAKE_OSX_SYSROOT@' else '' +)) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -I %{libcxx}/test/support -I %{libcxx}/src -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib++ -L %{lib} -lc++ -lc++abi -pthread' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T -- ' +)) + +import os, site +site.addsitedir(os.path.join('@LIBCXXABI_LIBCXX_PATH@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxxabi/test/cxa_thread_atexit_test.pass.cpp b/libcxxabi/test/cxa_thread_atexit_test.pass.cpp index 0f9324bfc7bb3c..16186e361df4a5 100644 --- a/libcxxabi/test/cxa_thread_atexit_test.pass.cpp +++ b/libcxxabi/test/cxa_thread_atexit_test.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcxxabi-no-threads +// UNSUPPORTED: libcpp-has-no-threads // REQUIRES: linux #include diff --git a/libcxxabi/test/forced_unwind3.pass.cpp b/libcxxabi/test/forced_unwind3.pass.cpp index 3bcc7978aeab82..3468813a7dfd94 100644 --- a/libcxxabi/test/forced_unwind3.pass.cpp +++ b/libcxxabi/test/forced_unwind3.pass.cpp @@ -10,7 +10,7 @@ // what pthread_cancel does. // UNSUPPORTED: c++03 -// UNSUPPORTED: libcxxabi-no-threads +// UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: no-exceptions #include diff --git a/libcxxabi/test/guard_test_basic.pass.cpp b/libcxxabi/test/guard_test_basic.pass.cpp index 2ad8e1122247da..6b15dd042a38db 100644 --- a/libcxxabi/test/guard_test_basic.pass.cpp +++ b/libcxxabi/test/guard_test_basic.pass.cpp @@ -8,6 +8,13 @@ // // UNSUPPORTED: c++03 +// Necessary because we include a private header of libc++abi, which +// only understands _LIBCXXABI_HAS_NO_THREADS. +#include "test_macros.h" +#ifdef TEST_HAS_NO_THREADS +# define _LIBCXXABI_HAS_NO_THREADS +#endif + #define TESTING_CXA_GUARD #include "../src/cxa_guard_impl.h" #include @@ -117,7 +124,7 @@ uint32_t MockGetThreadID() { return 0; } int main(int, char**) { { -#if defined(_LIBCXXABI_HAS_NO_THREADS) +#if defined(TEST_HAS_NO_THREADS) static_assert(CurrentImplementation == Implementation::NoThreads, ""); static_assert(std::is_same::value, ""); #else @@ -129,7 +136,7 @@ int main(int, char**) { #endif } { -#if (defined(__APPLE__) || defined(__linux__)) && !defined(_LIBCXXABI_HAS_NO_THREADS) +#if (defined(__APPLE__) || defined(__linux__)) && !defined(TEST_HAS_NO_THREADS) assert(PlatformThreadID); #endif if (PlatformThreadID != nullptr) { diff --git a/libcxxabi/test/guard_threaded_test.pass.cpp b/libcxxabi/test/guard_threaded_test.pass.cpp index ce589fe680b44c..fc94dd970a1189 100644 --- a/libcxxabi/test/guard_threaded_test.pass.cpp +++ b/libcxxabi/test/guard_threaded_test.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcxxabi-no-threads +// UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: no-exceptions #define TESTING_CXA_GUARD diff --git a/libcxxabi/test/libcxxabi/test/config.py b/libcxxabi/test/libcxxabi/test/config.py index 5855240a4b24c1..be93f153c75377 100644 --- a/libcxxabi/test/libcxxabi/test/config.py +++ b/libcxxabi/test/libcxxabi/test/config.py @@ -50,7 +50,7 @@ def configure_compile_flags(self): self.cxx.compile_flags += ['-funwind-tables'] if not self.get_lit_bool('enable_threads', True): self.cxx.compile_flags += ['-D_LIBCXXABI_HAS_NO_THREADS'] - self.config.available_features.add('libcxxabi-no-threads') + self.config.available_features.add('libcpp-has-no-threads') super(Configuration, self).configure_compile_flags() def configure_compile_flags_header_includes(self): diff --git a/libcxxabi/test/test_exception_storage.pass.cpp b/libcxxabi/test/test_exception_storage.pass.cpp index 934cc4b6a7cba4..f1cfe6e934f61f 100644 --- a/libcxxabi/test/test_exception_storage.pass.cpp +++ b/libcxxabi/test/test_exception_storage.pass.cpp @@ -14,6 +14,8 @@ #include "../src/cxa_exception.h" +#include "test_macros.h" + typedef __cxxabiv1::__cxa_eh_globals globals_t ; void *thread_code (void *parm) { @@ -29,20 +31,20 @@ void *thread_code (void *parm) { std::printf("Got different globals!\n"); *result = (size_t) glob1; -#ifndef _LIBCXXABI_HAS_NO_THREADS +#ifndef TEST_HAS_NO_THREADS sleep ( 1 ); #endif return parm; } -#ifndef _LIBCXXABI_HAS_NO_THREADS +#ifndef TEST_HAS_NO_THREADS #define NUMTHREADS 10 size_t thread_globals [ NUMTHREADS ] = { 0 }; std::__libcpp_thread_t threads [ NUMTHREADS ]; #endif int main() { -#ifndef _LIBCXXABI_HAS_NO_THREADS +#ifndef TEST_HAS_NO_THREADS // Make the threads, let them run, and wait for them to finish for ( int i = 0; i < NUMTHREADS; ++i ) std::__libcpp_thread_create ( threads + i, thread_code, (void *) (thread_globals + i)); @@ -65,10 +67,10 @@ int main() { } } return retVal; -#else // _LIBCXXABI_HAS_NO_THREADS +#else // TEST_HAS_NO_THREADS size_t thread_globals; thread_code(&thread_globals); // Check that __cxa_get_globals() is not NULL. return (thread_globals == 0) ? 1 : 0; -#endif // !_LIBCXXABI_HAS_NO_THREADS +#endif // !TEST_HAS_NO_THREADS } diff --git a/libcxxabi/test/test_fallback_malloc.pass.cpp b/libcxxabi/test/test_fallback_malloc.pass.cpp index cc59129bf6608d..55713d2520de2e 100644 --- a/libcxxabi/test/test_fallback_malloc.pass.cpp +++ b/libcxxabi/test/test_fallback_malloc.pass.cpp @@ -11,6 +11,15 @@ #include <__threading_support> +// UNSUPPORTED: modules-build && libcpp-has-no-threads + +// Necessary because we include a private source file of libc++abi, which +// only understands _LIBCXXABI_HAS_NO_THREADS. +#include "test_macros.h" +#ifdef TEST_HAS_NO_THREADS +# define _LIBCXXABI_HAS_NO_THREADS +#endif + typedef std::deque container; // #define DEBUG_FALLBACK_MALLOC diff --git a/libcxxabi/test/test_guard.pass.cpp b/libcxxabi/test/test_guard.pass.cpp index 01e5347e9470c4..b02c71e0bd7a7e 100644 --- a/libcxxabi/test/test_guard.pass.cpp +++ b/libcxxabi/test/test_guard.pass.cpp @@ -6,17 +6,16 @@ // //===----------------------------------------------------------------------===// -#include "cxxabi.h" - #include - -#ifndef _LIBCXXABI_HAS_NO_THREADS -#include -#include "make_test_thread.h" -#endif +#include #include "test_macros.h" +#ifndef TEST_HAS_NO_THREADS +# include +# include "make_test_thread.h" +#endif + // Ensure that we initialize each variable once and only once. namespace test1 { static int run_count = 0; @@ -83,7 +82,7 @@ namespace test3 { } } -#ifndef _LIBCXXABI_HAS_NO_THREADS +#ifndef TEST_HAS_NO_THREADS // A simple thread test of two threads racing to initialize a variable. This // isn't guaranteed to catch any particular threading problems. namespace test4 { @@ -136,14 +135,14 @@ namespace test5 { assert(run_count == 1); } } -#endif /* _LIBCXXABI_HAS_NO_THREADS */ +#endif /* TEST_HAS_NO_THREADS */ int main(int, char**) { test1::test(); test2::test(); test3::test(); -#ifndef _LIBCXXABI_HAS_NO_THREADS +#ifndef TEST_HAS_NO_THREADS test4::test(); test5::test(); #endif diff --git a/libcxxabi/test/thread_local_destruction_order.pass.cpp b/libcxxabi/test/thread_local_destruction_order.pass.cpp index c1e58f90c51109..765cd7cc8a23aa 100644 --- a/libcxxabi/test/thread_local_destruction_order.pass.cpp +++ b/libcxxabi/test/thread_local_destruction_order.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcxxabi-no-threads +// UNSUPPORTED: libcpp-has-no-threads #include #include From ea6171c108c47c1ee486388adfa106e13e280e33 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 25 May 2022 14:22:56 -0400 Subject: [PATCH 582/908] [InstCombine] add tests for icmp with udiv operand; NFC This covers a generalization of one of the transforms suggested in #55695. --- .../InstCombine/icmp-div-constant.ll | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll index aab916c4c7a069..ff587dc347a84f 100644 --- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll @@ -196,3 +196,81 @@ exit: ret i32 %add } +define i1 @udiv_eq_umax(i8 %x, i8 %y) { +; CHECK-LABEL: @udiv_eq_umax( +; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[D]], -1 +; CHECK-NEXT: ret i1 [[R]] +; + %d = udiv i8 %x, %y + %r = icmp eq i8 %d, 255 + ret i1 %r +} + +define <2 x i1> @udiv_ne_umax(<2 x i5> %x, <2 x i5> %y) { +; CHECK-LABEL: @udiv_ne_umax( +; CHECK-NEXT: [[D:%.*]] = udiv <2 x i5> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[D]], +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %d = udiv <2 x i5> %x, %y + %r = icmp ne <2 x i5> %d, + ret <2 x i1> %r +} + +define i1 @udiv_eq_big(i8 %x, i8 %y) { +; CHECK-LABEL: @udiv_eq_big( +; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[D]], -128 +; CHECK-NEXT: ret i1 [[R]] +; + %d = udiv i8 %x, %y + %r = icmp eq i8 %d, 128 + ret i1 %r +} + +define i1 @udiv_ne_big(i8 %x, i8 %y) { +; CHECK-LABEL: @udiv_ne_big( +; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[D]], -128 +; CHECK-NEXT: ret i1 [[R]] +; + %d = udiv i8 %x, %y + %r = icmp ne i8 %d, 128 + ret i1 %r +} + +define i1 @udiv_eq_not_big(i8 %x, i8 %y) { +; CHECK-LABEL: @udiv_eq_not_big( +; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[D]], 127 +; CHECK-NEXT: ret i1 [[R]] +; + %d = udiv i8 %x, %y + %r = icmp eq i8 %d, 127 + ret i1 %r +} + +define i1 @udiv_slt_umax(i8 %x, i8 %y) { +; CHECK-LABEL: @udiv_slt_umax( +; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[D]], -1 +; CHECK-NEXT: ret i1 [[R]] +; + %d = udiv i8 %x, %y + %r = icmp slt i8 %d, 255 + ret i1 %r +} + +define i1 @udiv_eq_umax_use(i32 %x, i32 %y) { +; CHECK-LABEL: @udiv_eq_umax_use( +; CHECK-NEXT: [[D:%.*]] = udiv i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i32 [[D]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[D]], -1 +; CHECK-NEXT: ret i1 [[R]] +; + %d = udiv i32 %x, %y + call void @use(i32 %d) + %r = icmp eq i32 %d, -1 + ret i1 %r +} From 3952c905ef08580de1ddc5d776177497407a6093 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 26 May 2022 08:23:36 -0400 Subject: [PATCH 583/908] [InstCombine] fold icmp equality with udiv and large constant With large compare constant: (X u/ Y) == C --> (X == C) && (Y == 1) (X u/ Y) != C --> (X != C) || (Y != 1) https://alive2.llvm.org/ce/z/EhKwh6 There are various potential missing icmp (div) transforms shown here: https://github.com/llvm/llvm-project/issues/55695 This is a generalization for part of the udiv + equality. I didn't check in detail, but some of those may only make sense as codegen transforms. This results in one extra instruction in IR, but it is better for analysis, and looks much better in codegen on all targets that I tried. Differential Revision: https://reviews.llvm.org/D126410 --- .../InstCombine/InstCombineCompares.cpp | 27 ++++++++++++++----- .../InstCombine/icmp-div-constant.ll | 26 ++++++++++++------ 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index f88585929248a5..9f5f65cf3bb2d8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2383,26 +2383,41 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp, Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, const APInt &C) { + ICmpInst::Predicate Pred = Cmp.getPredicate(); + Value *X = UDiv->getOperand(0); + Value *Y = UDiv->getOperand(1); + Type *Ty = UDiv->getType(); + + // If the compare constant is bigger than UMAX/2 (negative), there's only one + // pair of values that satisfies an equality check, so eliminate the division: + // (X u/ Y) == C --> (X == C) && (Y == 1) + // (X u/ Y) != C --> (X != C) || (Y != 1) + if (Cmp.isEquality() && UDiv->hasOneUse() && C.isSignBitSet()) { + Value *XBig = Builder.CreateICmp(Pred, X, ConstantInt::get(Ty, C)); + Value *YOne = Builder.CreateICmp(Pred, Y, ConstantInt::get(Ty, 1)); + auto Logic = Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + return BinaryOperator::Create(Logic, XBig, YOne); + } + const APInt *C2; - if (!match(UDiv->getOperand(0), m_APInt(C2))) + if (!match(X, m_APInt(C2))) return nullptr; assert(*C2 != 0 && "udiv 0, X should have been simplified already."); // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1)) - Value *Y = UDiv->getOperand(1); - if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) { + if (Pred == ICmpInst::ICMP_UGT) { assert(!C.isMaxValue() && "icmp ugt X, UINT_MAX should have been simplified already."); return new ICmpInst(ICmpInst::ICMP_ULE, Y, - ConstantInt::get(Y->getType(), C2->udiv(C + 1))); + ConstantInt::get(Ty, C2->udiv(C + 1))); } // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C) - if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) { + if (Pred == ICmpInst::ICMP_ULT) { assert(C != 0 && "icmp ult X, 0 should have been simplified already."); return new ICmpInst(ICmpInst::ICMP_UGT, Y, - ConstantInt::get(Y->getType(), C2->udiv(C))); + ConstantInt::get(Ty, C2->udiv(C))); } return nullptr; diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll index ff587dc347a84f..e9b26e6b4bca62 100644 --- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll @@ -198,8 +198,9 @@ exit: define i1 @udiv_eq_umax(i8 %x, i8 %y) { ; CHECK-LABEL: @udiv_eq_umax( -; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[D]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[Y:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i1 [[R]] ; %d = udiv i8 %x, %y @@ -209,8 +210,9 @@ define i1 @udiv_eq_umax(i8 %x, i8 %y) { define <2 x i1> @udiv_ne_umax(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @udiv_ne_umax( -; CHECK-NEXT: [[D:%.*]] = udiv <2 x i5> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[D]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %d = udiv <2 x i5> %x, %y @@ -220,8 +222,9 @@ define <2 x i1> @udiv_ne_umax(<2 x i5> %x, <2 x i5> %y) { define i1 @udiv_eq_big(i8 %x, i8 %y) { ; CHECK-LABEL: @udiv_eq_big( -; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[D]], -128 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[Y:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i1 [[R]] ; %d = udiv i8 %x, %y @@ -231,8 +234,9 @@ define i1 @udiv_eq_big(i8 %x, i8 %y) { define i1 @udiv_ne_big(i8 %x, i8 %y) { ; CHECK-LABEL: @udiv_ne_big( -; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[D]], -128 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[X:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[Y:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i1 [[R]] ; %d = udiv i8 %x, %y @@ -240,6 +244,8 @@ define i1 @udiv_ne_big(i8 %x, i8 %y) { ret i1 %r } +; negative test - must have negative compare constant + define i1 @udiv_eq_not_big(i8 %x, i8 %y) { ; CHECK-LABEL: @udiv_eq_not_big( ; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] @@ -251,6 +257,8 @@ define i1 @udiv_eq_not_big(i8 %x, i8 %y) { ret i1 %r } +; negative test - must be equality predicate + define i1 @udiv_slt_umax(i8 %x, i8 %y) { ; CHECK-LABEL: @udiv_slt_umax( ; CHECK-NEXT: [[D:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] @@ -262,6 +270,8 @@ define i1 @udiv_slt_umax(i8 %x, i8 %y) { ret i1 %r } +; negative test - extra use + define i1 @udiv_eq_umax_use(i32 %x, i32 %y) { ; CHECK-LABEL: @udiv_eq_umax_use( ; CHECK-NEXT: [[D:%.*]] = udiv i32 [[X:%.*]], [[Y:%.*]] From 1bae02b77335eb1a01d9a0bb36c2b2a29dfdd5d9 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 18 May 2022 09:19:15 -0400 Subject: [PATCH 584/908] [Cuda] Use fallback method to mangle externalized decls if no CUID given CUDA requires that static variables be visible to the host when offloading. However, The standard semantics of a stiatc variable dictate that it should not be visible outside of the current file. In order to access it from the host we need to perform "externalization" on the static variable on the device. This requires generating a semi-unique name that can be affixed to the variable as to not cause linker errors. This is currently done using the CUID functionality, an MD5 hash value set up by the clang driver. This allows us to achieve is mostly unique ID that is unique even between multiple compilations of the same file. However, this is not always availible. Instead, this patch uses the unique ID from the file to generate a unique symbol name. This will create a unique name that is consistent between the host and device side compilations without requiring the CUID to be entered by the driver. The one downside to this is that we are no longer stable under multiple compilations of the same file. However, this is a very niche use-case and is not supported by Nvidia's CUDA compiler so it likely to be good enough. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D125904 --- clang/lib/CodeGen/CGCUDANV.cpp | 3 +- clang/lib/CodeGen/CodeGenModule.cpp | 37 ++++++++++++++++--- clang/lib/CodeGen/CodeGenModule.h | 5 ++- clang/test/CodeGenCUDA/device-fun-linkage.cu | 4 +- .../test/CodeGenCUDA/static-device-var-rdc.cu | 22 +++++++---- 5 files changed, 53 insertions(+), 18 deletions(-) diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 10f8bd222b7e79..6f2679cb15e4c6 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -285,8 +285,7 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { // Make unique name for device side static file-scope variable for HIP. if (CGM.getContext().shouldExternalize(ND) && - CGM.getLangOpts().GPURelocatableDeviceCode && - !CGM.getLangOpts().CUID.empty()) { + CGM.getLangOpts().GPURelocatableDeviceCode) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << DeviceSideName; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 703cf4edf5f560..a035e5ddd9e6db 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1416,8 +1416,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, // Make unique name for device side static file-scope variable for HIP. if (CGM.getContext().shouldExternalize(ND) && CGM.getLangOpts().GPURelocatableDeviceCode && - CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty()) + CGM.getLangOpts().CUDAIsDevice) CGM.printPostfixForExternalizedDecl(Out, ND); + return std::string(Out.str()); } @@ -6825,12 +6826,38 @@ bool CodeGenModule::stopAutoInit() { void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const { - StringRef Tag; // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers // postfix beginning with '.' since the symbol name can be demangled. if (LangOpts.HIP) - Tag = (isa(D) ? ".static." : ".intern."); + OS << (isa(D) ? ".static." : ".intern."); else - Tag = (isa(D) ? "__static__" : "__intern__"); - OS << Tag << getContext().getCUIDHash(); + OS << (isa(D) ? "__static__" : "__intern__"); + + // If the CUID is not specified we try to generate a unique postfix. + if (getLangOpts().CUID.empty()) { + SourceManager &SM = getContext().getSourceManager(); + PresumedLoc PLoc = SM.getPresumedLoc(D->getLocation()); + assert(PLoc.isValid() && "Source location is expected to be valid."); + + // Get the hash of the user defined macros. + llvm::MD5 Hash; + llvm::MD5::MD5Result Result; + for (const auto &Arg : PreprocessorOpts.Macros) + Hash.update(Arg.first); + Hash.final(Result); + + // Get the UniqueID for the file containing the decl. + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { + PLoc = SM.getPresumedLoc(D->getLocation(), /*UseLineDirectives=*/false); + assert(PLoc.isValid() && "Source location is expected to be valid."); + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + SM.getDiagnostics().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + } + OS << llvm::format("%x", ID.getFile()) << llvm::format("%x", ID.getDevice()) + << "_" << llvm::utohexstr(Result.low(), /*LowerCase=*/true, /*Width=*/8); + } else { + OS << getContext().getCUIDHash(); + } } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 8393d43682ea52..a5ec4c8f988d6d 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1467,7 +1467,10 @@ class CodeGenModule : public CodeGenTypeCache { bool stopAutoInit(); /// Print the postfix for externalized static variable or kernels for single - /// source offloading languages CUDA and HIP. + /// source offloading languages CUDA and HIP. The unique postfix is created + /// using either the CUID argument, or the file's UniqueID and active macros. + /// The fallback method without a CUID requires that the offloading toolchain + /// does not define separate macros via the -cc1 options. void printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const; diff --git a/clang/test/CodeGenCUDA/device-fun-linkage.cu b/clang/test/CodeGenCUDA/device-fun-linkage.cu index d8ad6d438be9c9..54899e0e9c0f16 100644 --- a/clang/test/CodeGenCUDA/device-fun-linkage.cu +++ b/clang/test/CodeGenCUDA/device-fun-linkage.cu @@ -23,10 +23,10 @@ template __global__ void kernel(); // Ensure that unused static device function is eliminated static __device__ void static_func() {} // NORDC-NEG-NOT: define{{.*}} void @_ZL13static_funcv() -// RDC-NEG-NOT: define{{.*}} void @_ZL13static_funcv() +// RDC-NEG-NOT: define{{.*}} void @_ZL13static_funcv[[FILEID:.*]]() // Ensure that kernel function has external or weak_odr // linkage regardless static specifier static __global__ void static_kernel() {} // NORDC: define void @_ZL13static_kernelv() -// RDC: define weak_odr void @_ZL13static_kernelv() +// RDC: define weak_odr void @_ZL13static_kernelv[[FILEID:.*]]() diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu index 81a2bd0fd455ca..a9cca4e9212aba 100644 --- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -2,12 +2,12 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ -// RUN: -check-prefixes=DEV,INT-DEV %s +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o %t.nocuid.dev -x hip %s +// RUN: cat %t.nocuid.dev | FileCheck -check-prefixes=DEV,INT-DEV %s // RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-gnu-linux \ -// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ -// RUN: -check-prefixes=HOST,INT-HOST %s +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o %t.nocuid.host -x hip %s +// RUN: cat %t.nocuid.host | FileCheck -check-prefixes=HOST,INT-HOST %s // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ // RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev @@ -21,6 +21,7 @@ // variable names. // RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s +// RUN: cat %t.nocuid.dev %t.nocuid.host | FileCheck -check-prefix=POSTFIX-ID %s // Negative tests. @@ -48,6 +49,9 @@ #include "Inputs/cuda.h" +// Make sure we can still mangle with a line directive. +#line 0 "-" + // Test function scope static device variable, which should not be externalized. // DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1 @@ -56,8 +60,8 @@ // HOST-DAG: @_ZL1y = internal global i32 undef // Test normal static device variables -// INT-DEV-DAG: @_ZL1x = addrspace(1) externally_initialized global i32 0 -// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" +// INT-DEV-DAG: @_ZL1x[[FILEID:.*]] = addrspace(1) externally_initialized global i32 0 +// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x[[FILEID:.*]]\00" // Test externalized static device variables // EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 @@ -66,6 +70,8 @@ // POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 // POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00" +// POSTFIX-ID: @_ZL1x.static.[[FILEID:.*]] = addrspace(1) externally_initialized global i32 0 +// POSTFIX-ID: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[FILEID]]\00" static __device__ int x; @@ -75,8 +81,8 @@ static __device__ int x; static __device__ int x2; // Test normal static device variables -// INT-DEV-DAG: @_ZL1y = addrspace(4) externally_initialized global i32 0 -// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" +// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized global i32 0 +// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y[[FILEID:.*]]\00" // Test externalized static device variables // EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 From 4c549a0b593e787ac2431ecbcf50b5a5de795f0c Mon Sep 17 00:00:00 2001 From: Emil Kieri Date: Sun, 22 May 2022 22:28:04 +0200 Subject: [PATCH 585/908] [flang][NFC] Make semantics test dosemantics03.f90 warning-correct This is a preparation for D125804, which makes test_errors.py test warnings the same way it already tests errors, i.e., assert that the emitted and expected errors are identical. The following changes are made to the test: - Add the WARNING directive where warnings are expected. - Remove -Werror in the RUN line. It does not serve much purpose here: with -Werror flang makes compilation fail in the presence of warnings, but warnings are still printed as warnings and not as errors. And I anyway find it better to test the warnings as warnings instead of promoting them and test both warnings and errors as errors. - Update the header comment describing the test case, mostly in response to the removal of -Werror. - Remove the reference to 'issue 458', referring to https://github.com/flang-compiler/f18/issues/458, from the header. I think the relevant reference here is to C1120 of the standard, and references to bug trackers from other projects (from before upstreaming) can be confusing. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D126176 --- flang/test/Semantics/dosemantics03.f90 | 37 +++++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/flang/test/Semantics/dosemantics03.f90 b/flang/test/Semantics/dosemantics03.f90 index 59c8ddaadeff5f..0d2664062cbb07 100644 --- a/flang/test/Semantics/dosemantics03.f90 +++ b/flang/test/Semantics/dosemantics03.f90 @@ -1,15 +1,14 @@ -! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic -Werror - -! Issue 458 -- semantic checks for a normal DO loop. The DO variable -! and the initial, final, and step expressions must be INTEGER if the -! options for standard conformance and turning warnings into errors -! are both in effect. This test turns on the options for standards -! conformance and turning warnings into errors. This produces error -! messages for the cases where REAL and DOUBLE PRECISION variables -! and expressions are used in the DO controls. - +! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic +! ! C1120 -- DO variable (and associated expressions) must be INTEGER. ! This is extended by allowing REAL and DOUBLE PRECISION +! +! The standard requires the DO variable and the initial, final, and step +! expressions to be INTEGER. As an extension, we do however allow them to be +! REAL or DOUBLE PRECISION. This test turns on the option for standard +! conformance checking to test that we get portability warnings for these +! cases. We also check that other types, such as CHARACTER and LOGICAL, yield +! errors when used in the DO controls. MODULE share INTEGER :: intvarshare @@ -51,11 +50,13 @@ END FUNCTION ifunc END DO ! REAL DO variable +!WARNING: DO controls should be INTEGER DO rvar = 1, 10, 3 PRINT *, "rvar is: ", rvar END DO ! DOUBLE PRECISISON DO variable +!WARNING: DO controls should be INTEGER DO dvar = 1, 10, 3 PRINT *, "dvar is: ", dvar END DO @@ -68,12 +69,14 @@ END FUNCTION ifunc ! Pointer to REAL DO variable ALLOCATE(prvar) +!WARNING: DO controls should be INTEGER DO prvar = 1, 10, 3 PRINT *, "prvar is: ", prvar END DO ! Pointer to DOUBLE PRECISION DO variable ALLOCATE(pdvar) +!WARNING: DO controls should be INTEGER DO pdvar = 1, 10, 3 PRINT *, "pdvar is: ", pdvar END DO @@ -145,22 +148,26 @@ END FUNCTION ifunc END DO ! Shared association REAL DO variable +!WARNING: DO controls should be INTEGER DO realvarshare = 1, 10, 3 PRINT *, "ivar is: ", ivar END DO ! Shared association DOUBLE PRECISION DO variable +!WARNING: DO controls should be INTEGER DO dpvarshare = 1, 10, 3 PRINT *, "ivar is: ", ivar END DO ! Initial expressions ! REAL initial expression +!WARNING: DO controls should be INTEGER DO ivar = rvar, 10, 3 PRINT *, "ivar is: ", ivar END DO ! DOUBLE PRECISION initial expression +!WARNING: DO controls should be INTEGER DO ivar = dvar, 10, 3 PRINT *, "ivar is: ", ivar END DO @@ -171,11 +178,13 @@ END FUNCTION ifunc END DO ! Pointer to REAL initial expression +!WARNING: DO controls should be INTEGER DO ivar = prvar, 10, 3 PRINT *, "ivar is: ", ivar END DO ! Pointer to DOUBLE PRECISION initial expression +!WARNING: DO controls should be INTEGER DO ivar = pdvar, 10, 3 PRINT *, "ivar is: ", ivar END DO @@ -212,11 +221,13 @@ END FUNCTION ifunc ! Final expression ! REAL final expression +!WARNING: DO controls should be INTEGER DO ivar = 1, rvar, 3 PRINT *, "ivar is: ", ivar END DO ! DOUBLE PRECISION final expression +!WARNING: DO controls should be INTEGER DO ivar = 1, dvar, 3 PRINT *, "ivar is: ", ivar END DO @@ -227,11 +238,13 @@ END FUNCTION ifunc END DO ! Pointer to REAL final expression +!WARNING: DO controls should be INTEGER DO ivar = 1, prvar, 3 PRINT *, "ivar is: ", ivar END DO ! Pointer to DOUBLE PRECISION final expression +!WARNING: DO controls should be INTEGER DO ivar = pdvar, 10, 3 PRINT *, "ivar is: ", ivar END DO @@ -250,11 +263,13 @@ END FUNCTION ifunc ! Step expression ! REAL step expression +!WARNING: DO controls should be INTEGER DO ivar = 1, 10, rvar PRINT *, "ivar is: ", ivar END DO ! DOUBLE PRECISION step expression +!WARNING: DO controls should be INTEGER DO ivar = 1, 10, dvar PRINT *, "ivar is: ", ivar END DO @@ -265,11 +280,13 @@ END FUNCTION ifunc END DO ! Pointer to REAL step expression +!WARNING: DO controls should be INTEGER DO ivar = 1, 10, prvar PRINT *, "ivar is: ", ivar END DO ! Pointer to DOUBLE PRECISION step expression +!WARNING: DO controls should be INTEGER DO ivar = 1, 10, pdvar PRINT *, "ivar is: ", ivar END DO From 9139d484d46a0b63275e00b988895bfb419bbe71 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 May 2022 07:15:49 -0700 Subject: [PATCH 586/908] [SLP]Fix crash on reordering of ScatterVectorize nodes. ScatterVectorize nodes should be handled same way as gathers in reorderBottomToTop function, since we can simple reorder the loads in this node. Because of that need to include such nodes to the list of gathered nodes to fix compiler crash. Differential Revision: https://reviews.llvm.org/D126378 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 5 ++ .../X86/scatter-vectorize-reorder.ll | 74 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8cedb63eead04b..5efb7148cc03d6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3810,6 +3810,11 @@ bool BoUpSLP::canReorderOperands( // Add the node to the list of the ordered nodes with the identity // order. Edges.emplace_back(I, TE); + // Add ScatterVectorize nodes to the list of operands, where just + // reordering of the scalars is required. Similar to the gathers, so + // simply add to the list of gathered ops. + if (TE->State != TreeEntry::Vectorize) + GatherOps.push_back(TE); continue; } ArrayRef VL = UserTE->getOperand(I); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll new file mode 100644 index 00000000000000..e269117102fab7 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=cascadelake < %s | FileCheck %s + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX10_I_I86:%.*]] = getelementptr inbounds float, ptr undef, i64 2 +; CHECK-NEXT: [[ARRAYIDX21_I:%.*]] = getelementptr inbounds [4 x float], ptr undef, i64 2 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr undef, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr undef, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> , <2 x float> [[TMP0]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> , float [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[SHUFFLE]], zeroinitializer +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP9]], [[BB2]] ], [ zeroinitializer, [[BB1]] ] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x float> [[TMP1]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = fsub <2 x float> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP13]], zeroinitializer +; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[ARRAYIDX21_I]], align 16 +; CHECK-NEXT: ret void +; +entry: + %arrayidx10.i.i86 = getelementptr inbounds float, ptr undef, i64 2 + %arrayidx6.i66.i = getelementptr inbounds float, ptr undef, i64 1 + %arrayidx21.i = getelementptr inbounds [4 x float], ptr undef, i64 2 + %arrayidx6.i109.i = getelementptr inbounds [4 x float], ptr undef, i64 2, i64 1 + br label %bb1 + +bb1: + %0 = load float, ptr undef, align 4 + %sub.i71.i = fsub float 0.000000e+00, %0 + %1 = load float, ptr %arrayidx6.i66.i, align 4 + %sub5.i74.i = fsub float 0.000000e+00, %1 + %2 = load float, ptr %arrayidx10.i.i86, align 4 + %3 = call float @llvm.fmuladd.f32(float %1, float %2, float 0.000000e+00) + %4 = load float, ptr undef, align 4 + %5 = call float @llvm.fmuladd.f32(float 0.000000e+00, float %4, float %2) + br i1 false, label %bb2, label %bb3 + +bb2: + %mul.i95 = fmul float %3, 0.000000e+00 + %mul3.i96 = fmul float %5, 0.000000e+00 + br label %bb3 + +bb3: + %vddir.sroa.8.0.i = phi float [ %mul3.i96, %bb2 ], [ 0.000000e+00, %bb1 ] + %vddir.sroa.0.0.i = phi float [ %mul.i95, %bb2 ], [ 0.000000e+00, %bb1 ] + %add.i.i = fadd float %sub.i71.i, %vddir.sroa.0.0.i + %add5.i.i = fadd float %sub5.i74.i, %vddir.sroa.8.0.i + %add.i105.i = fadd float %add.i.i, 0.000000e+00 + %add5.i108.i = fadd float %add5.i.i, 0.000000e+00 + %sub.i114.i = fsub float %add.i105.i, 0.000000e+00 + %sub4.i.i = fsub float %add5.i108.i, 0.000000e+00 + %sub.i118.i = fsub float %sub.i114.i, 0.000000e+00 + store float %sub.i118.i, ptr %arrayidx21.i, align 16 + %sub4.i121.i = fsub float %sub4.i.i, 0.000000e+00 + store float %sub4.i121.i, ptr %arrayidx6.i109.i, align 4 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) + From f8c8bda965dd0f622de1ad3863b728661af6eb72 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 May 2022 09:35:01 -0400 Subject: [PATCH 587/908] [libc++] Remove temporary workaround for existing CMake caches If you are broken by this change, you should remove your CMake cache and re-run the CMake generation step. --- libcxx/CMakeLists.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index a182a76dda782c..c36b8e61b30d19 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -221,12 +221,6 @@ endif() set(LIBCXX_SUPPORTED_ABI_LIBRARIES none libcxxabi system-libcxxabi libcxxrt libstdc++ libsupc++ vcruntime) set(LIBCXX_CXX_ABI "${LIBCXX_DEFAULT_ABI_LIBRARY}" CACHE STRING "Specify C++ ABI library to use. Supported values are ${LIBCXX_SUPPORTED_ABI_LIBRARIES}.") - -# Temporary to still accept existing CMake caches that contain "default" as the value -if (LIBCXX_CXX_ABI STREQUAL "default") - set(LIBCXX_CXX_ABI "${LIBCXX_DEFAULT_ABI_LIBRARY}" CACHE STRING "" FORCE) -endif() - if (NOT "${LIBCXX_CXX_ABI}" IN_LIST LIBCXX_SUPPORTED_ABI_LIBRARIES) message(FATAL_ERROR "Unsupported C++ ABI library: '${LIBCXX_CXX_ABI}'. Supported values are ${LIBCXX_SUPPORTED_ABI_LIBRARIES}.") endif() From 6f434776da0ba2c1dd666470e0ddef7954a21c76 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 26 May 2022 22:37:14 +0900 Subject: [PATCH 588/908] [bazel] Introduce "VE" CodeGen in LLVM. --- utils/bazel/configure.bzl | 1 + utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/utils/bazel/configure.bzl b/utils/bazel/configure.bzl index 4c5ab8bd097249..6fca2060491fc7 100644 --- a/utils/bazel/configure.bzl +++ b/utils/bazel/configure.bzl @@ -26,6 +26,7 @@ DEFAULT_TARGETS = [ "RISCV", "Sparc", "SystemZ", + "VE", "WebAssembly", "X86", "XCore", diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index da8def70baa09b..a6f25064715d5c 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -1723,6 +1723,21 @@ llvm_target_lib_list = [lib for lib in [ ("-gen-searchable-tables", "lib/Target/RISCV/RISCVGenSearchableTables.inc"), ], }, + { + "name": "VE", + "short_name": "VE", + "tbl_outs": [ + ("-gen-asm-matcher", "lib/Target/VE/VEGenAsmMatcher.inc"), + ("-gen-asm-writer", "lib/Target/VE/VEGenAsmWriter.inc"), + ("-gen-callingconv", "lib/Target/VE/VEGenCallingConv.inc"), + ("-gen-dag-isel", "lib/Target/VE/VEGenDAGISel.inc"), + ("-gen-disassembler", "lib/Target/VE/VEGenDisassemblerTables.inc"), + ("-gen-emitter", "lib/Target/VE/VEGenMCCodeEmitter.inc"), + ("-gen-instr-info", "lib/Target/VE/VEGenInstrInfo.inc"), + ("-gen-register-info", "lib/Target/VE/VEGenRegisterInfo.inc"), + ("-gen-subtarget", "lib/Target/VE/VEGenSubtargetInfo.inc"), + ], + }, { "name": "WebAssembly", "short_name": "WebAssembly", From 6b8c6f15fdd8c147dfb7b0beb6f3d6eb98bf5772 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 25 May 2022 10:11:39 -0700 Subject: [PATCH 589/908] [clang][PR55406] CFG for coroutine CoreturnStmt needs to keep the operand value distinct from its use in any return_value call, so that instantiation may rebuild the latter. But it also needs to keep the operand value separate in the case of calling return_void. Code generation checks the operand value form to determine whether it is a distincte entity to the promise call. This adds the same logic to CFG generation. Reviewed By: bruno Differential Revision: https://reviews.llvm.org/D126399 --- clang/lib/Analysis/CFG.cpp | 15 +++++++- clang/test/SemaCXX/thread-safety-coro.cpp | 47 +++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 clang/test/SemaCXX/thread-safety-coro.cpp diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index fd8d1b196f1145..1417b95aad9314 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -3137,8 +3137,19 @@ CFGBlock *CFGBuilder::VisitReturnStmt(Stmt *S) { return Visit(O, AddStmtChoice::AlwaysAdd, /*ExternallyDestructed=*/true); return Block; } - // co_return - return VisitChildren(S); + + CoreturnStmt *CRS = cast(S); + auto *B = Block; + if (CFGBlock *R = Visit(CRS->getPromiseCall())) + B = R; + + if (Expr *RV = CRS->getOperand()) + if (RV->getType()->isVoidType() && !isa(RV)) + // A non-initlist void expression. + if (CFGBlock *R = Visit(RV)) + B = R; + + return B; } CFGBlock *CFGBuilder::VisitSEHExceptStmt(SEHExceptStmt *ES) { diff --git a/clang/test/SemaCXX/thread-safety-coro.cpp b/clang/test/SemaCXX/thread-safety-coro.cpp new file mode 100644 index 00000000000000..a500ad5435e4ac --- /dev/null +++ b/clang/test/SemaCXX/thread-safety-coro.cpp @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -Wthread-safety -std=c++17 -fcoroutines-ts %s + +// expected-no-diagnostics + +namespace std { +template +struct coroutine_traits { + using promise_type = typename _Result::promise_type; +}; + +template +struct coroutine_handle; + +template <> +struct coroutine_handle { + static coroutine_handle from_address(void *__a) noexcept; + void resume() const noexcept; + void destroy() const noexcept; +}; + +template +struct coroutine_handle : coroutine_handle<> {}; + +struct suspend_always { + bool await_ready() const noexcept; + void await_suspend(coroutine_handle<>) const noexcept; + void await_resume() const noexcept; +}; +} // namespace std + +class Task { +public: + struct promise_type { + public: + std::suspend_always initial_suspend() noexcept; + std::suspend_always final_suspend() noexcept; + + Task get_return_object() noexcept; + void unhandled_exception() noexcept; + void return_value(int value) noexcept; + }; +}; + +Task Foo() noexcept { + // ICE'd + co_return({ int frame = 0; 0; }); +} From 8f70d16c9ab2d7c060f5c92a31a0fc4a82349897 Mon Sep 17 00:00:00 2001 From: Tyler Chatow Date: Thu, 26 May 2022 15:16:02 +0200 Subject: [PATCH 590/908] [clang-format] Handle attributes in enum declaration. Fixes https://github.com/llvm/llvm-project/issues/55457 Ensures that attributes in the enum declaration are interpreted correctly, for instance: ``` enum class [[nodiscard]] E { a, b }; ``` Reviewed By: MyDeveloperDay, curdeius Differential Revision: https://reviews.llvm.org/D125848 --- clang/lib/Format/UnwrappedLineParser.cpp | 9 +++++- clang/unittests/Format/FormatTest.cpp | 36 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index a20562dd77a4cd..0611e9eace476f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -3419,11 +3419,18 @@ bool UnwrappedLineParser::parseEnum() { while (FormatTok->Tok.getIdentifierInfo() || FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, - tok::greater, tok::comma, tok::question)) { + tok::greater, tok::comma, tok::question, + tok::l_square, tok::r_square)) { nextToken(); // We can have macros or attributes in between 'enum' and the enum name. if (FormatTok->is(tok::l_paren)) parseParens(); + if (FormatTok->is(TT_AttributeSquare)) { + parseSquare(); + // Consume the closing TT_AttributeSquare. + if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) + nextToken(); + } if (FormatTok->is(tok::identifier)) { nextToken(); // If there are two identifiers in a row, this is likely an elaborate diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 3621ba66781872..44ef882fe7db4c 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -3573,6 +3573,7 @@ TEST_F(FormatTest, FormatsEnum) { verifyFormat("enum X E {} d;"); verifyFormat("enum __attribute__((...)) E {} d;"); verifyFormat("enum __declspec__((...)) E {} d;"); + verifyFormat("enum [[nodiscard]] E {} d;"); verifyFormat("enum {\n" " Bar = Foo::value\n" "};", @@ -3619,6 +3620,17 @@ TEST_F(FormatTest, FormatsEnum) { "};", EightIndent); + verifyFormat("enum [[nodiscard]] E {\n" + " ONE,\n" + " TWO,\n" + "};"); + verifyFormat("enum [[nodiscard]] E {\n" + " // Comment 1\n" + " ONE,\n" + " // Comment 2\n" + " TWO,\n" + "};"); + // Not enums. verifyFormat("enum X f() {\n" " a();\n" @@ -3666,7 +3678,19 @@ TEST_F(FormatTest, FormatsEnumStruct) { verifyFormat("enum struct X E {} d;"); verifyFormat("enum struct __attribute__((...)) E {} d;"); verifyFormat("enum struct __declspec__((...)) E {} d;"); + verifyFormat("enum struct [[nodiscard]] E {} d;"); verifyFormat("enum struct X f() {\n a();\n return 42;\n}"); + + verifyFormat("enum struct [[nodiscard]] E {\n" + " ONE,\n" + " TWO,\n" + "};"); + verifyFormat("enum struct [[nodiscard]] E {\n" + " // Comment 1\n" + " ONE,\n" + " // Comment 2\n" + " TWO,\n" + "};"); } TEST_F(FormatTest, FormatsEnumClass) { @@ -3683,7 +3707,19 @@ TEST_F(FormatTest, FormatsEnumClass) { verifyFormat("enum class X E {} d;"); verifyFormat("enum class __attribute__((...)) E {} d;"); verifyFormat("enum class __declspec__((...)) E {} d;"); + verifyFormat("enum class [[nodiscard]] E {} d;"); verifyFormat("enum class X f() {\n a();\n return 42;\n}"); + + verifyFormat("enum class [[nodiscard]] E {\n" + " ONE,\n" + " TWO,\n" + "};"); + verifyFormat("enum class [[nodiscard]] E {\n" + " // Comment 1\n" + " ONE,\n" + " // Comment 2\n" + " TWO,\n" + "};"); } TEST_F(FormatTest, FormatsEnumTypes) { From 634c8ef69a836f3436d027b03965965bad6f3ff0 Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Wed, 25 May 2022 12:39:23 -0700 Subject: [PATCH 591/908] [PS5] Allow dllimport/dllexport same as PS4 --- clang/include/clang/Basic/TargetInfo.h | 4 ++-- clang/lib/Sema/SemaTemplate.cpp | 4 ++-- clang/test/CodeGen/ps4-dllimport-dllexport.c | 8 ++------ clang/test/CodeGenCXX/dllexport-vtable-thunks.cpp | 1 + clang/test/CodeGenCXX/ps4-dllstorage-vtable-rtti.cpp | 1 + ...windows-implicit-dllexport-template-specialization.cpp | 1 + clang/test/CodeGenCXX/windows-itanium-dllexport.cpp | 1 + clang/test/Sema/dllimport.c | 1 + clang/test/SemaCXX/dllexport.cpp | 1 + clang/test/SemaCXX/dllimport.cpp | 1 + llvm/include/llvm/ADT/Triple.h | 2 +- 11 files changed, 14 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 470d153d845de6..e4b5f0b751c488 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1195,12 +1195,12 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Microsoft C++ code using dllimport/export attributes? virtual bool shouldDLLImportComdatSymbols() const { return getTriple().isWindowsMSVCEnvironment() || - getTriple().isWindowsItaniumEnvironment() || getTriple().isPS4(); + getTriple().isWindowsItaniumEnvironment() || getTriple().isPS(); } // Does this target have PS4 specific dllimport/export handling? virtual bool hasPS4DLLImportExport() const { - return getTriple().isPS4() || + return getTriple().isPS() || // Windows Itanium support allows for testing the SCEI flavour of // dllimport/export handling on a Windows system. (getTriple().isWindowsItaniumEnvironment() && diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 4a42969c34d3fc..b11b97e12403c5 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -9760,7 +9760,7 @@ DeclResult Sema::ActOnExplicitInstantiation( if (!getDLLAttr(Def) && getDLLAttr(Specialization) && (Context.getTargetInfo().shouldDLLImportComdatSymbols() && - !Context.getTargetInfo().getTriple().isPS4())) { + !Context.getTargetInfo().getTriple().isPS())) { // An explicit instantiation definition can add a dll attribute to a // template with a previous instantiation declaration. MinGW doesn't // allow this. @@ -9778,7 +9778,7 @@ DeclResult Sema::ActOnExplicitInstantiation( !PreviouslyDLLExported && Specialization->hasAttr(); if (Old_TSK == TSK_ImplicitInstantiation && NewlyDLLExported && (Context.getTargetInfo().shouldDLLImportComdatSymbols() && - !Context.getTargetInfo().getTriple().isPS4())) { + !Context.getTargetInfo().getTriple().isPS())) { // An explicit instantiation definition can add a dll attribute to a // template with a previous implicit instantiation. MinGW doesn't allow // this. We limit clang to only adding dllexport, to avoid potentially diff --git a/clang/test/CodeGen/ps4-dllimport-dllexport.c b/clang/test/CodeGen/ps4-dllimport-dllexport.c index a945000e7e3232..efcdc4d7a5c482 100644 --- a/clang/test/CodeGen/ps4-dllimport-dllexport.c +++ b/clang/test/CodeGen/ps4-dllimport-dllexport.c @@ -1,9 +1,5 @@ -// RUN: %clang_cc1 \ -// RUN: -triple x86_64-scei-ps4 \ -// RUN: -fdeclspec \ -// RUN: -Werror \ -// RUN: -emit-llvm %s -o - | \ -// RUN: FileCheck %s +// RUN: %clang_cc1 -triple x86_64-scei-ps4 -fdeclspec -Werror -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fdeclspec -Werror -emit-llvm %s -o - | FileCheck %s __declspec(dllexport) int export_int; diff --git a/clang/test/CodeGenCXX/dllexport-vtable-thunks.cpp b/clang/test/CodeGenCXX/dllexport-vtable-thunks.cpp index f0327567d5d080..432ef7835ddc5b 100644 --- a/clang/test/CodeGenCXX/dllexport-vtable-thunks.cpp +++ b/clang/test/CodeGenCXX/dllexport-vtable-thunks.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -triple x86_64-windows-gnu -fdeclspec -emit-llvm -o - %s | FileCheck %s -DDSO_ATTRS="dso_local dllexport" // RUN: %clang_cc1 -triple x86_64-windows-itanium -fdeclspec -emit-llvm -o - %s | FileCheck %s -DDSO_ATTRS="dso_local dllexport" // RUN: %clang_cc1 -triple x86_64-scei-ps4 -fdeclspec -emit-llvm -o - %s | FileCheck %s -DDSO_ATTRS=dllexport +// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fdeclspec -emit-llvm -o - %s | FileCheck %s -DDSO_ATTRS=dllexport struct __declspec(dllexport) A { virtual void m(); diff --git a/clang/test/CodeGenCXX/ps4-dllstorage-vtable-rtti.cpp b/clang/test/CodeGenCXX/ps4-dllstorage-vtable-rtti.cpp index 87d0686d34980f..77f0be81a69352 100644 --- a/clang/test/CodeGenCXX/ps4-dllstorage-vtable-rtti.cpp +++ b/clang/test/CodeGenCXX/ps4-dllstorage-vtable-rtti.cpp @@ -39,6 +39,7 @@ // RUN: %clang_cc1 -no-opaque-pointers -I%S -fdeclspec -triple x86_64-unknown-windows-itanium -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s -check-prefix=WI // RUN: %clang_cc1 -no-opaque-pointers -I%S -fdeclspec -triple x86_64-scei-windows-itanium -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefixes=PS4,SCEI_WI // RUN: %clang_cc1 -no-opaque-pointers -I%S -fdeclspec -triple x86_64-scei-ps4 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefixes=PS4,SCEI_PS4 +// RUN: %clang_cc1 -no-opaque-pointers -I%S -fdeclspec -triple x86_64-sie-ps5 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefixes=PS4,SCEI_PS4 #include diff --git a/clang/test/CodeGenCXX/windows-implicit-dllexport-template-specialization.cpp b/clang/test/CodeGenCXX/windows-implicit-dllexport-template-specialization.cpp index 726be735502292..3a5693275824fd 100644 --- a/clang/test/CodeGenCXX/windows-implicit-dllexport-template-specialization.cpp +++ b/clang/test/CodeGenCXX/windows-implicit-dllexport-template-specialization.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -std=c++11 -triple i686-windows -fdeclspec -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-MS // RUN: %clang_cc1 -std=c++11 -triple i686-windows-itanium -fdeclspec -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-IA // RUN: %clang_cc1 -std=c++11 -triple x86_64-scei-ps4 -fdeclspec -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-PS4 +// RUN: %clang_cc1 -std=c++11 -triple x86_64-sie-ps5 -fdeclspec -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-PS4 template struct s {}; diff --git a/clang/test/CodeGenCXX/windows-itanium-dllexport.cpp b/clang/test/CodeGenCXX/windows-itanium-dllexport.cpp index 6b30369e6835cc..c09fa30d761af4 100644 --- a/clang/test/CodeGenCXX/windows-itanium-dllexport.cpp +++ b/clang/test/CodeGenCXX/windows-itanium-dllexport.cpp @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -emit-llvm -triple i686-windows-itanium -fdeclspec %s -o - | FileCheck %s --check-prefixes=CHECK,WI // RUN: %clang_cc1 -emit-llvm -triple x86_64-scei-ps4 -fdeclspec %s -o - | FileCheck %s --check-prefixes=CHECK,PS4 +// RUN: %clang_cc1 -emit-llvm -triple x86_64-sie-ps5 -fdeclspec %s -o - | FileCheck %s --check-prefixes=CHECK,PS4 #define JOIN2(x, y) x##y #define JOIN(x, y) JOIN2(x, y) diff --git a/clang/test/Sema/dllimport.c b/clang/test/Sema/dllimport.c index e5e378379c6140..26ee0a7e80ed63 100644 --- a/clang/test/Sema/dllimport.c +++ b/clang/test/Sema/dllimport.c @@ -7,6 +7,7 @@ // RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c11 -DWI %s // RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fms-extensions -verify -std=c11 -DWI %s // RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fms-extensions -verify -std=c99 -DWI %s +// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fms-extensions -verify -std=c11 -DWI %s // Invalid usage. __declspec(dllimport) typedef int typedef1; diff --git a/clang/test/SemaCXX/dllexport.cpp b/clang/test/SemaCXX/dllexport.cpp index b4af68086b940b..4aa1427563a2e1 100644 --- a/clang/test/SemaCXX/dllexport.cpp +++ b/clang/test/SemaCXX/dllexport.cpp @@ -5,6 +5,7 @@ // RUN: %clang_cc1 -triple i686-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI %s +// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fdeclspec -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI %s // Helper structs to make templates more expressive. struct ImplicitInst_Exported {}; diff --git a/clang/test/SemaCXX/dllimport.cpp b/clang/test/SemaCXX/dllimport.cpp index 642cdd07a66420..c5291d925523d9 100644 --- a/clang/test/SemaCXX/dllimport.cpp +++ b/clang/test/SemaCXX/dllimport.cpp @@ -7,6 +7,7 @@ // RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++17 -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify -std=c++11 -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify -std=c++17 -Wunsupported-dll-base-class-template -DWI %s +// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fdeclspec -verify -std=c++17 -Wunsupported-dll-base-class-template -DWI %s // Helper structs to make templates more expressive. struct ImplicitInst_Imported {}; diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index b1deb3e7b9a7a5..e54663c6eff062 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -928,7 +928,7 @@ class Triple { } /// Tests if the environment supports dllimport/export annotations. - bool hasDLLImportExport() const { return isOSWindows() || isPS4(); } + bool hasDLLImportExport() const { return isOSWindows() || isPS(); } /// @} /// @name Mutators From 610eb39c685ce75333e2fceb2fefc37bec2201b0 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Thu, 26 May 2022 08:35:59 -0500 Subject: [PATCH 592/908] [PowerPC][Future] Add an ISA Future to go with mcpu=future. On Power PC we have ISA3.0 for Power 9, ISA3.1 for Power 10. This patchs adds an ISA for mcpu=future. The idea is to have a placeholder ISA for work that is experimental and may not be supported by existing ISAs. Reviewed By: lei Differential Revision: https://reviews.llvm.org/D126075 --- llvm/lib/Target/PowerPC/PPC.td | 6 +++++- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 1 + llvm/lib/Target/PowerPC/PPCScheduleP10.td | 2 +- llvm/lib/Target/PowerPC/PPCScheduleP9.td | 2 +- llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 + llvm/lib/Target/PowerPC/PPCSubtarget.h | 2 ++ 6 files changed, 11 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 15c149df5abebf..310bf8125f1c37 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -263,6 +263,10 @@ def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1", "true", "Enable instructions in ISA 3.1.", [FeatureISA3_0]>; +def FeatureISAFuture : SubtargetFeature<"isa-future-instructions", + "IsISAFuture", "true", + "Enable instructions for Future ISA.", + [FeatureISA3_1]>; def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true", "Enable POWER9 Altivec instructions", [FeatureISA3_0, FeatureP8Altivec]>; @@ -430,7 +434,7 @@ def ProcessorFeatures { // Future // For future CPU we assume that all of the existing features from Power10 // still exist with the exception of those we know are Power10 specific. - list FutureAdditionalFeatures = []; + list FutureAdditionalFeatures = [FeatureISAFuture]; list FutureSpecificFeatures = []; list FutureInheritableFeatures = !listconcat(P10InheritableFeatures, FutureAdditionalFeatures); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 399c9567cd2d21..212eb4e8f9d1d3 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -704,6 +704,7 @@ def ModernAs: Predicate<"!Subtarget->isAIXABI() || Subtarget->HasModernAIXAs">, AssemblerPredicate<(any_of (not AIXOS), FeatureModernAIXAs)>; def IsAIX : Predicate<"Subtarget->isAIXABI()">; def NotAIX : Predicate<"!Subtarget->isAIXABI()">; +def IsISAFuture : Predicate<"Subtarget->isISAFuture()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td index bf56491f373a46..f89ef735a36711 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP10.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td @@ -36,7 +36,7 @@ def P10Model : SchedMachineModel { let CompleteModel = 1; // Do not support SPE (Signal Procesing Engine) on Power 10. - let UnsupportedFeatures = [HasSPE, IsE500, IsBookE]; + let UnsupportedFeatures = [HasSPE, IsE500, IsBookE, IsISAFuture]; } let SchedModel = P10Model in { diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td index 1dd55037453aba..d3501117171592 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -42,7 +42,7 @@ def P9Model : SchedMachineModel { // Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions // introduced in ISA 3.1. let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA, - PCRelativeMemops, IsISA3_1]; + PCRelativeMemops, IsISA3_1, IsISAFuture]; } let SchedModel = P9Model in { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 2f453b698ae181..98424234a592e1 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -141,6 +141,7 @@ void PPCSubtarget::initializeEnvironment() { IsISA2_07 = false; IsISA3_0 = false; IsISA3_1 = false; + IsISAFuture = false; UseLongCalls = false; SecurePlt = false; VectorsUseTwoUnits = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 102b0ae4952946..3281816eab4af2 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -160,6 +160,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool IsISA2_07; bool IsISA3_0; bool IsISA3_1; + bool IsISAFuture; bool UseLongCalls; bool SecurePlt; bool VectorsUseTwoUnits; @@ -336,6 +337,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool isISA2_07() const { return IsISA2_07; } bool isISA3_0() const { return IsISA3_0; } bool isISA3_1() const { return IsISA3_1; } + bool isISAFuture() const { return IsISAFuture; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } bool hasStoreFusion() const { return HasStoreFusion; } From 14258d6fb5f311033def5f11432958315fa63683 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 26 May 2022 15:23:49 +0100 Subject: [PATCH 593/908] [SLP] Move canVectorizeLoads implementation to simplify the diff in D105986. NFC. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 132 +++++++++--------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5efb7148cc03d6..f4e94050fc5f40 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3473,6 +3473,72 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { return None; } +namespace { +/// Tracks the state we can represent the loads in the given sequence. +enum class LoadsState { Gather, Vectorize, ScatterVectorize }; +} // anonymous namespace + +/// Checks if the given array of loads can be represented as a vectorized, +/// scatter or just simple gather. +static LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, + const TargetTransformInfo &TTI, + const DataLayout &DL, ScalarEvolution &SE, + SmallVectorImpl &Order, + SmallVectorImpl &PointerOps) { + // Check that a vectorized load would load the same memory as a scalar + // load. For example, we don't want to vectorize loads that are smaller + // than 8-bit. Even though we have a packed struct {} LLVM + // treats loading/storing it as an i8 struct. If we vectorize loads/stores + // from such a struct, we read/write packed bits disagreeing with the + // unvectorized version. + Type *ScalarTy = VL0->getType(); + + if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy)) + return LoadsState::Gather; + + // Make sure all loads in the bundle are simple - we can't vectorize + // atomic or volatile loads. + PointerOps.clear(); + PointerOps.resize(VL.size()); + auto *POIter = PointerOps.begin(); + for (Value *V : VL) { + auto *L = cast(V); + if (!L->isSimple()) + return LoadsState::Gather; + *POIter = L->getPointerOperand(); + ++POIter; + } + + Order.clear(); + // Check the order of pointer operands. + if (llvm::sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order)) { + Value *Ptr0; + Value *PtrN; + if (Order.empty()) { + Ptr0 = PointerOps.front(); + PtrN = PointerOps.back(); + } else { + Ptr0 = PointerOps[Order.front()]; + PtrN = PointerOps[Order.back()]; + } + Optional Diff = + getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE); + // Check that the sorted loads are consecutive. + if (static_cast(*Diff) == VL.size() - 1) + return LoadsState::Vectorize; + Align CommonAlignment = cast(VL0)->getAlign(); + for (Value *V : VL) + CommonAlignment = + commonAlignment(CommonAlignment, cast(V)->getAlign()); + auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); + if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) && + !TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment)) + return LoadsState::ScatterVectorize; + } + + return LoadsState::Gather; +} + bool clusterSortPtrAccesses(ArrayRef VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl &SortedIndices) { @@ -4306,72 +4372,6 @@ void BoUpSLP::buildTree(ArrayRef Roots) { buildTree_rec(Roots, 0, EdgeInfo()); } -namespace { -/// Tracks the state we can represent the loads in the given sequence. -enum class LoadsState { Gather, Vectorize, ScatterVectorize }; -} // anonymous namespace - -/// Checks if the given array of loads can be represented as a vectorized, -/// scatter or just simple gather. -static LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, - const TargetTransformInfo &TTI, - const DataLayout &DL, ScalarEvolution &SE, - SmallVectorImpl &Order, - SmallVectorImpl &PointerOps) { - // Check that a vectorized load would load the same memory as a scalar - // load. For example, we don't want to vectorize loads that are smaller - // than 8-bit. Even though we have a packed struct {} LLVM - // treats loading/storing it as an i8 struct. If we vectorize loads/stores - // from such a struct, we read/write packed bits disagreeing with the - // unvectorized version. - Type *ScalarTy = VL0->getType(); - - if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy)) - return LoadsState::Gather; - - // Make sure all loads in the bundle are simple - we can't vectorize - // atomic or volatile loads. - PointerOps.clear(); - PointerOps.resize(VL.size()); - auto *POIter = PointerOps.begin(); - for (Value *V : VL) { - auto *L = cast(V); - if (!L->isSimple()) - return LoadsState::Gather; - *POIter = L->getPointerOperand(); - ++POIter; - } - - Order.clear(); - // Check the order of pointer operands. - if (llvm::sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order)) { - Value *Ptr0; - Value *PtrN; - if (Order.empty()) { - Ptr0 = PointerOps.front(); - PtrN = PointerOps.back(); - } else { - Ptr0 = PointerOps[Order.front()]; - PtrN = PointerOps[Order.back()]; - } - Optional Diff = - getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE); - // Check that the sorted loads are consecutive. - if (static_cast(*Diff) == VL.size() - 1) - return LoadsState::Vectorize; - Align CommonAlignment = cast(VL0)->getAlign(); - for (Value *V : VL) - CommonAlignment = - commonAlignment(CommonAlignment, cast(V)->getAlign()); - auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); - if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) && - !TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment)) - return LoadsState::ScatterVectorize; - } - - return LoadsState::Gather; -} - /// \return true if the specified list of values has only one instruction that /// requires scheduling, false otherwise. #ifndef NDEBUG From 0e3dc1a52ffe8a2c7464d6acaeaec23eca21f4f4 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 26 May 2022 16:42:46 +0200 Subject: [PATCH 594/908] [libc++] Implement ranges::{all, any, none}_of Reviewed By: ldionne, var-const, #libc Spies: libcxx-commits, mgorny Differential Revision: https://reviews.llvm.org/D123016 --- libcxx/docs/Status/RangesAlgorithms.csv | 6 +- libcxx/include/CMakeLists.txt | 3 + libcxx/include/__algorithm/ranges_all_of.h | 68 ++++++++ libcxx/include/__algorithm/ranges_any_of.h | 68 ++++++++ libcxx/include/__algorithm/ranges_none_of.h | 68 ++++++++ libcxx/include/algorithm | 27 +++ libcxx/include/module.modulemap | 3 + ...obust_against_copying_comparators.pass.cpp | 12 +- ...obust_against_copying_projections.pass.cpp | 12 +- libcxx/test/libcxx/private_headers.verify.cpp | 3 + .../alg.all_of/ranges.all_of.pass.cpp | 159 ++++++++++++++++++ .../alg.any_of/ranges.any_of.pass.cpp | 159 ++++++++++++++++++ .../alg.none_of/ranges.none_of.pass.cpp | 159 ++++++++++++++++++ .../niebloid.compile.pass.cpp | 6 +- 14 files changed, 735 insertions(+), 18 deletions(-) create mode 100644 libcxx/include/__algorithm/ranges_all_of.h create mode 100644 libcxx/include/__algorithm/ranges_any_of.h create mode 100644 libcxx/include/__algorithm/ranges_none_of.h create mode 100644 libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/ranges.all_of.pass.cpp create mode 100644 libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/ranges.any_of.pass.cpp create mode 100644 libcxx/test/std/algorithms/alg.nonmodifying/alg.none_of/ranges.none_of.pass.cpp diff --git a/libcxx/docs/Status/RangesAlgorithms.csv b/libcxx/docs/Status/RangesAlgorithms.csv index c757abd2201d20..283470af0f11c4 100644 --- a/libcxx/docs/Status/RangesAlgorithms.csv +++ b/libcxx/docs/Status/RangesAlgorithms.csv @@ -1,7 +1,7 @@ Category,Algorithm,Assignee,CL,Complete -Search,any_of,Christopher Di Bella,`D105793 `_ -Search,all_of,Christopher Di Bella,`D105793 `_ -Search,none_of,Christopher Di Bella,`D105793 `_ +Search,any_of,Nikolas Klauser,`D123016 `_,✅ +Search,all_of,Nikolas Klauser,`D123016 `_,✅ +Search,none_of,Nikolas Klauser,`D123016 `_,✅ Search,find,Nikolas Klauser,`D121248 `_,✅ Search,find_if,Nikolas Klauser,`D121248 `_,✅ Search,find_if_not,Nikolas Klauser,`D121248 `_,✅ diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 65261da1258654..80261bf2a4537c 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -66,6 +66,8 @@ set(files __algorithm/pop_heap.h __algorithm/prev_permutation.h __algorithm/push_heap.h + __algorithm/ranges_all_of.h + __algorithm/ranges_any_of.h __algorithm/ranges_copy.h __algorithm/ranges_copy_backward.h __algorithm/ranges_copy_if.h @@ -88,6 +90,7 @@ set(files __algorithm/ranges_minmax.h __algorithm/ranges_minmax_element.h __algorithm/ranges_mismatch.h + __algorithm/ranges_none_of.h __algorithm/ranges_reverse.h __algorithm/ranges_swap_ranges.h __algorithm/ranges_transform.h diff --git a/libcxx/include/__algorithm/ranges_all_of.h b/libcxx/include/__algorithm/ranges_all_of.h new file mode 100644 index 00000000000000..a146865652a7b3 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_all_of.h @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_ALL_OF_H +#define _LIBCPP___ALGORITHM_RANGES_ALL_OF_H + +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__iterator/concepts.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __all_of { +struct __fn { + + template + _LIBCPP_HIDE_FROM_ABI constexpr static + bool __all_of_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { + for (; __first != __last; ++__first) { + if (!std::invoke(__pred, std::invoke(__proj, *__first))) + return false; + } + return true; + } + + template _Sent, class _Proj = identity, + indirect_unary_predicate> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { + return __all_of_impl(std::move(__first), std::move(__last), __pred, __proj); + } + + template , _Proj>> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { + return __all_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); + } +}; +} // namespace __all_of + +inline namespace __cpo { + inline constexpr auto all_of = __all_of::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_ALL_OF_H diff --git a/libcxx/include/__algorithm/ranges_any_of.h b/libcxx/include/__algorithm/ranges_any_of.h new file mode 100644 index 00000000000000..11c52cbe21eace --- /dev/null +++ b/libcxx/include/__algorithm/ranges_any_of.h @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_ANY_OF_H +#define _LIBCPP___ALGORITHM_RANGES_ANY_OF_H + +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__iterator/concepts.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __any_of { +struct __fn { + + template + _LIBCPP_HIDE_FROM_ABI constexpr static + bool __any_of_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { + for (; __first != __last; ++__first) { + if (std::invoke(__pred, std::invoke(__proj, *__first))) + return true; + } + return false; + } + + template _Sent, class _Proj = identity, + indirect_unary_predicate> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { + return __any_of_impl(std::move(__first), std::move(__last), __pred, __proj); + } + + template , _Proj>> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { + return __any_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); + } +}; +} // namespace __any_of + +inline namespace __cpo { + inline constexpr auto any_of = __any_of::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_ANY_OF_H diff --git a/libcxx/include/__algorithm/ranges_none_of.h b/libcxx/include/__algorithm/ranges_none_of.h new file mode 100644 index 00000000000000..706ff5cd741dce --- /dev/null +++ b/libcxx/include/__algorithm/ranges_none_of.h @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_NONE_OF_H +#define _LIBCPP___ALGORITHM_RANGES_NONE_OF_H + +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__iterator/concepts.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __none_of { +struct __fn { + + template + _LIBCPP_HIDE_FROM_ABI constexpr static + bool __none_of_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { + for (; __first != __last; ++__first) { + if (std::invoke(__pred, std::invoke(__proj, *__first))) + return false; + } + return true; + } + + template _Sent, class _Proj = identity, + indirect_unary_predicate> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { + return __none_of_impl(std::move(__first), std::move(__last), __pred, __proj); + } + + template , _Proj>> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { + return __none_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); + } +}; +} // namespace __none_of + +inline namespace __cpo { + inline constexpr auto none_of = __none_of::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_NONE_OF_H diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index aa2191464a8301..30f647c416f88b 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -305,6 +305,30 @@ namespace ranges { constexpr bool ranges::equal(R1&& r1, R2&& r2, Pred pred = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + template S, class Proj = identity, + indirect_unary_predicate> Pred> + constexpr bool ranges::all_of(I first, S last, Pred pred, Proj proj = {}); // since C++20 + + template, Proj>> Pred> + constexpr bool ranges::all_of(R&& r, Pred pred, Proj proj = {}); // since C++20 + + template S, class Proj = identity, + indirect_unary_predicate> Pred> + constexpr bool ranges::any_of(I first, S last, Pred pred, Proj proj = {}); // since C++20 + + template, Proj>> Pred> + constexpr bool ranges::any_of(R&& r, Pred pred, Proj proj = {}); // since C++20 + + template S, class Proj = identity, + indirect_unary_predicate> Pred> + constexpr bool ranges::none_of(I first, S last, Pred pred, Proj proj = {}); // since C++20 + + template, Proj>> Pred> + constexpr bool ranges::none_of(R&& r, Pred pred, Proj proj = {}); // since C++20 + } constexpr bool // constexpr in C++20 @@ -1021,6 +1045,8 @@ template #include <__algorithm/pop_heap.h> #include <__algorithm/prev_permutation.h> #include <__algorithm/push_heap.h> +#include <__algorithm/ranges_all_of.h> +#include <__algorithm/ranges_any_of.h> #include <__algorithm/ranges_copy.h> #include <__algorithm/ranges_copy_backward.h> #include <__algorithm/ranges_copy_if.h> @@ -1043,6 +1069,7 @@ template #include <__algorithm/ranges_minmax.h> #include <__algorithm/ranges_minmax_element.h> #include <__algorithm/ranges_mismatch.h> +#include <__algorithm/ranges_none_of.h> #include <__algorithm/ranges_reverse.h> #include <__algorithm/ranges_swap_ranges.h> #include <__algorithm/ranges_transform.h> diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index cf394de39a2d05..0e43ca4e44b1d2 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -298,6 +298,8 @@ module std [system] { module pop_heap { private header "__algorithm/pop_heap.h" } module prev_permutation { private header "__algorithm/prev_permutation.h" } module push_heap { private header "__algorithm/push_heap.h" } + module ranges_all_of { private header "__algorithm/ranges_all_of.h" } + module ranges_any_of { private header "__algorithm/ranges_any_of.h" } module ranges_copy { private header "__algorithm/ranges_copy.h" } module ranges_copy_backward { private header "__algorithm/ranges_copy_backward.h" } module ranges_copy_if { private header "__algorithm/ranges_copy_if.h" } @@ -320,6 +322,7 @@ module std [system] { module ranges_minmax { private header "__algorithm/ranges_minmax.h" } module ranges_minmax_element { private header "__algorithm/ranges_minmax_element.h" } module ranges_mismatch { private header "__algorithm/ranges_mismatch.h" } + module ranges_none_of { private header "__algorithm/ranges_none_of.h" } module ranges_reverse { private header "__algorithm/ranges_reverse.h" } module ranges_swap_ranges { private header "__algorithm/ranges_swap_ranges.h" } module ranges_transform { private header "__algorithm/ranges_transform.h" } diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp index d5b7dd11f710e1..95f1ed7248df0e 100644 --- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp +++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp @@ -92,10 +92,10 @@ constexpr bool all_the_algorithms() int copies = 0; //(void)std::ranges::adjacent_find(first, last, Equal(&copies)); assert(copies == 0); //(void)std::ranges::adjacent_find(a, Equal(&copies)); assert(copies == 0); - //(void)std::ranges::all_of(first, last, UnaryTrue(&copies)); assert(copies == 0); - //(void)std::ranges::all_of(a, UnaryTrue(&copies)); assert(copies == 0); - //(void)std::ranges::any_of(first, last, UnaryTrue(&copies)); assert(copies == 0); - //(void)std::ranges::any_of(a, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::all_of(first, last, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::all_of(a, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::any_of(first, last, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::any_of(a, UnaryTrue(&copies)); assert(copies == 0); //(void)std::ranges::binary_search(first, last, value, Less(&copies)); assert(copies == 0); //(void)std::ranges::binary_search(a, value, Less(&copies)); assert(copies == 0); //(void)std::ranges::clamp(value, value, value, Less(&copies)); assert(copies == 0); @@ -167,8 +167,8 @@ constexpr bool all_the_algorithms() (void)std::ranges::mismatch(a, b, Equal(&copies)); assert(copies == 0); //(void)std::ranges::next_permutation(first, last, Less(&copies)); assert(copies == 0); //(void)std::ranges::next_permutation(a, Less(&copies)); assert(copies == 0); - //(void)std::ranges::none_of(first, last, UnaryTrue(&copies)); assert(copies == 0); - //(void)std::ranges::none_of(a, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::none_of(first, last, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::none_of(a, UnaryTrue(&copies)); assert(copies == 0); //(void)std::ranges::nth_element(first, mid, last, Less(&copies)); assert(copies == 0); //(void)std::ranges::nth_element(a, mid, Less(&copies)); assert(copies == 0); //(void)std::ranges::partial_sort(first, mid, last, Less(&copies)); assert(copies == 0); diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp index 8d6753d5620824..9efb19d6e3ee02 100644 --- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp +++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp @@ -74,10 +74,10 @@ constexpr bool all_the_algorithms() int copies = 0; //(void)std::ranges::adjacent_find(first, last, Equal(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::adjacent_find(a, Equal(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::all_of(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::all_of(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::any_of(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::any_of(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::all_of(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::all_of(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::any_of(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::any_of(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::binary_search(first, last, value, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::binary_search(a, value, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::clamp(T(), T(), T(), Less(), Proj(&copies)); assert(copies == 0); @@ -150,8 +150,8 @@ constexpr bool all_the_algorithms() (void)std::ranges::mismatch(a, b, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); //(void)std::ranges::next_permutation(first, last, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::next_permutation(a, Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::none_of(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::none_of(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::none_of(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::none_of(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::nth_element(first, mid, last, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::nth_element(a, mid, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::partial_sort(first, mid, last, Less(), Proj(&copies)); assert(copies == 0); diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index b83308c7b8ead7..d7889f7bb2b8c6 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -103,6 +103,8 @@ END-SCRIPT #include <__algorithm/pop_heap.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pop_heap.h'}} #include <__algorithm/prev_permutation.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/prev_permutation.h'}} #include <__algorithm/push_heap.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/push_heap.h'}} +#include <__algorithm/ranges_all_of.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_all_of.h'}} +#include <__algorithm/ranges_any_of.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_any_of.h'}} #include <__algorithm/ranges_copy.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_copy.h'}} #include <__algorithm/ranges_copy_backward.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_copy_backward.h'}} #include <__algorithm/ranges_copy_if.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_copy_if.h'}} @@ -125,6 +127,7 @@ END-SCRIPT #include <__algorithm/ranges_minmax.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_minmax.h'}} #include <__algorithm/ranges_minmax_element.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_minmax_element.h'}} #include <__algorithm/ranges_mismatch.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_mismatch.h'}} +#include <__algorithm/ranges_none_of.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_none_of.h'}} #include <__algorithm/ranges_reverse.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_reverse.h'}} #include <__algorithm/ranges_swap_ranges.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_swap_ranges.h'}} #include <__algorithm/ranges_transform.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_transform.h'}} diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/ranges.all_of.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/ranges.all_of.pass.cpp new file mode 100644 index 00000000000000..343d60ae9ef6de --- /dev/null +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/ranges.all_of.pass.cpp @@ -0,0 +1,159 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// template S, class Proj = identity, +// indirect_unary_predicate> Pred> +// constexpr bool ranges::all_of(I first, S last, Pred pred, Proj proj = {}); +// template, Proj>> Pred> +// constexpr bool ranges::all_of(R&& r, Pred pred, Proj proj = {}); + +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +struct UnaryFunctor { + bool operator()(auto&&); +}; + +template > +concept HasAllOfIt = requires(It first, Sent last) { std::ranges::all_of(first, last, UnaryFunctor{}); }; + +static_assert(HasAllOfIt); +static_assert(!HasAllOfIt); +static_assert(!HasAllOfIt); +static_assert(!HasAllOfIt); +static_assert(!HasAllOfIt); +static_assert(!HasAllOfIt); + +template +concept HasAllOfItFunc = requires(int* ptr) { std::ranges::all_of(ptr, ptr, Func{}); }; + +static_assert(HasAllOfItFunc); +static_assert(!HasAllOfItFunc); +static_assert(!HasAllOfItFunc); + +template +concept HasAllOfR = requires(Range range) { std::ranges::all_of(range, UnaryFunctor{}); }; + +static_assert(HasAllOfR>); +static_assert(!HasAllOfR); +static_assert(!HasAllOfR); +static_assert(!HasAllOfR); +static_assert(!HasAllOfR); +static_assert(!HasAllOfR); + +template +concept HasAllOfRFunc = requires(std::array range) { std::ranges::all_of(range, Func{}); }; + +static_assert(HasAllOfRFunc); +static_assert(!HasAllOfRFunc); +static_assert(!HasAllOfRFunc); + +template +constexpr void test_iterators() { + { // simple test + { + int a[] = {1, 2, 3, 4}; + std::same_as decltype(auto) ret = std::ranges::all_of(It(a), Sent(It(a + 4)), [](int) { return true; }); + assert(ret); + } + { + int a[] = {1, 2, 3, 4}; + auto range = std::ranges::subrange(It(a), Sent(It(a + 4))); + std::same_as decltype(auto) ret = std::ranges::all_of(range, [](int) { return true; }); + assert(ret); + } + } + + { // check that an empty range works + std::array a; + assert(std::ranges::all_of(It(a.data()), Sent(It(a.data())), [](int) { return false; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data()))); + assert(std::ranges::all_of(range, [](int) { return false; })); + } + + { // check that the complexity requirements are met + { + int predicateCount = 0; + int projectionCount = 0; + auto pred = [&](int) { ++predicateCount; return true; }; + auto proj = [&](int i) { ++projectionCount; return i; }; + std::array a = {9, 7, 5, 3}; + assert(std::ranges::all_of(It(a.begin()), Sent(It(a.end())), pred, proj)); + assert(predicateCount == 4); + assert(projectionCount == 4); + } + { + int predicateCount = 0; + int projectionCount = 0; + auto pred = [&](int) { ++predicateCount; return true; }; + auto proj = [&](int i) { ++projectionCount; return i; }; + std::array a = {9, 7, 5, 3}; + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(std::ranges::all_of(range, pred, proj)); + assert(predicateCount == 4); + assert(projectionCount == 4); + } + } + + { // check that false is returned if no element satisfies the condition + std::array a = {1, 2, 3, 4}; + assert(!std::ranges::all_of(It(a.data()), Sent(It(a.data() + a.size())), [](int i) { return i > 5; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(!std::ranges::all_of(range, [](int i) { return i > 5; })); + } + + { // check that true is returned if all elements satisfy the condition + std::array a = {1, 2, 3, 4}; + assert(std::ranges::all_of(It(a.data()), Sent(It(a.data() + a.size())), [](int i) { return i < 5; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(std::ranges::all_of(range, [](int i) { return i < 5; })); + } + + { // check that false is returned if ony one elements satisfies the condition + std::array a = {1, 2, 3, 4, 6}; + assert(!std::ranges::all_of(It(a.data()), Sent(It(a.data() + a.size())), [](int i) { return i > 5; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(!std::ranges::all_of(range, [](int i) { return i > 5; })); + } +} + +constexpr bool test() { + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators(); + + { // check that std::invoke is used + struct S { int check; int other; }; + S a[] = {{1, 2}, {1, 7}, {1, 3}}; + assert(std::ranges::all_of(a, a + 3, [](int i) { return i == 1; }, &S::check)); + assert(std::ranges::all_of(a, [](int i) { return i == 1; }, &S::check)); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/ranges.any_of.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/ranges.any_of.pass.cpp new file mode 100644 index 00000000000000..28045e6b286070 --- /dev/null +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/ranges.any_of.pass.cpp @@ -0,0 +1,159 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// template S, class Proj = identity, +// indirect_unary_predicate> Pred> +// constexpr bool ranges::any_of(I first, S last, Pred pred, Proj proj = {}); +// template, Proj>> Pred> +// constexpr bool ranges::any_of(R&& r, Pred pred, Proj proj = {}); + +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +struct UnaryFunctor { + bool operator()(auto&&); +}; + +template > +concept HasAnyOfIt = requires(It first, Sent last) { std::ranges::any_of(first, last, UnaryFunctor{}); }; + +static_assert(HasAnyOfIt); +static_assert(!HasAnyOfIt); +static_assert(!HasAnyOfIt); +static_assert(!HasAnyOfIt); +static_assert(!HasAnyOfIt); +static_assert(!HasAnyOfIt); + +template +concept HasAnyOfItFunc = requires(int* ptr) { std::ranges::any_of(ptr, ptr, Func{}); }; + +static_assert(HasAnyOfItFunc); +static_assert(!HasAnyOfItFunc); +static_assert(!HasAnyOfItFunc); + +template +concept HasAnyOfR = requires(Range range) { std::ranges::any_of(range, UnaryFunctor{}); }; + +static_assert(HasAnyOfR>); +static_assert(!HasAnyOfR); +static_assert(!HasAnyOfR); +static_assert(!HasAnyOfR); +static_assert(!HasAnyOfR); +static_assert(!HasAnyOfR); + +template +concept HasAnyOfRFunc = requires(std::array range) { std::ranges::any_of(range, Func{}); }; + +static_assert(HasAnyOfRFunc); +static_assert(!HasAnyOfRFunc); +static_assert(!HasAnyOfRFunc); + +template +constexpr void test_iterators() { + { // simple test + { + int a[] = {1, 2, 3, 4}; + std::same_as decltype(auto) ret = std::ranges::any_of(It(a), Sent(It(a + 4)), [](int) { return true; }); + assert(ret); + } + { + int a[] = {1, 2, 3, 4}; + auto range = std::ranges::subrange(It(a), Sent(It(a + 4))); + std::same_as decltype(auto) ret = std::ranges::any_of(range, [](int) { return true; }); + assert(ret); + } + } + + { // check that an empty range works + std::array a; + assert(!std::ranges::any_of(It(a.data()), Sent(It(a.data())), [](int) { return false; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data()))); + assert(!std::ranges::any_of(range, [](int) { return false; })); + } + + { // check that the complexity requirements are met + { + int predicateCount = 0; + int projectionCount = 0; + auto pred = [&](int) { ++predicateCount; return false; }; + auto proj = [&](int i) { ++projectionCount; return i; }; + std::array a = {9, 7, 5, 3}; + assert(!std::ranges::any_of(It(a.begin()), Sent(It(a.end())), pred, proj)); + assert(predicateCount == 4); + assert(projectionCount == 4); + } + { + int predicateCount = 0; + int projectionCount = 0; + auto pred = [&](int) { ++predicateCount; return false; }; + auto proj = [&](int i) { ++projectionCount; return i; }; + std::array a = {9, 7, 5, 3}; + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(!std::ranges::any_of(range, pred, proj)); + assert(predicateCount == 4); + assert(projectionCount == 4); + } + } + + { // check that false is returned if no element satisfies the condition + std::array a = {1, 2, 3, 4}; + assert(!std::ranges::any_of(It(a.data()), Sent(It(a.data() + a.size())), [](int i) { return i > 5; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(!std::ranges::any_of(range, [](int i) { return i > 5; })); + } + + { // check that true is returned if all elements satisfy the condition + std::array a = {1, 2, 3, 4}; + assert(std::ranges::any_of(It(a.data()), Sent(It(a.data() + a.size())), [](int i) { return i < 5; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(std::ranges::any_of(range, [](int i) { return i < 5; })); + } + + { // check that true is returned if ony one elements satisfies the condition + std::array a = {1, 2, 3, 4, 6}; + assert(std::ranges::any_of(It(a.data()), Sent(It(a.data() + a.size())), [](int i) { return i > 5; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(std::ranges::any_of(range, [](int i) { return i > 5; })); + } +} + +constexpr bool test() { + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators(); + + { // check that std::invoke is used + struct S { int check; int other; }; + S a[] = {{1, 2}, {1, 7}, {1, 3}}; + assert(std::ranges::any_of(a, a + 3, [](int i) { return i == 1; }, &S::check)); + assert(std::ranges::any_of(a, [](int i) { return i == 1; }, &S::check)); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.none_of/ranges.none_of.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.none_of/ranges.none_of.pass.cpp new file mode 100644 index 00000000000000..1975dfff6b3b26 --- /dev/null +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.none_of/ranges.none_of.pass.cpp @@ -0,0 +1,159 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// template S, class Proj = identity, +// indirect_unary_predicate> Pred> +// constexpr bool ranges::none_of(I first, S last, Pred pred, Proj proj = {}); +// template, Proj>> Pred> +// constexpr bool ranges::none_of(R&& r, Pred pred, Proj proj = {}); + +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +struct UnaryFunctor { + bool operator()(auto&&); +}; + +template > +concept HasNoneOfIt = requires(It first, Sent last) { std::ranges::none_of(first, last, UnaryFunctor{}); }; + +static_assert(HasNoneOfIt); +static_assert(!HasNoneOfIt); +static_assert(!HasNoneOfIt); +static_assert(!HasNoneOfIt); +static_assert(!HasNoneOfIt); +static_assert(!HasNoneOfIt); + +template +concept HasNoneOfItFunc = requires(int* ptr) { std::ranges::none_of(ptr, ptr, Func{}); }; + +static_assert(HasNoneOfItFunc); +static_assert(!HasNoneOfItFunc); +static_assert(!HasNoneOfItFunc); + +template +concept HasNoneOfR = requires(Range range) { std::ranges::none_of(range, UnaryFunctor{}); }; + +static_assert(HasNoneOfR>); +static_assert(!HasNoneOfR); +static_assert(!HasNoneOfR); +static_assert(!HasNoneOfR); +static_assert(!HasNoneOfR); +static_assert(!HasNoneOfR); + +template +concept HasNoneOfRFunc = requires(std::array range) { std::ranges::none_of(range, Func{}); }; + +static_assert(HasNoneOfRFunc); +static_assert(!HasNoneOfRFunc); +static_assert(!HasNoneOfRFunc); + +template +constexpr void test_iterators() { + { // simple test + { + int a[] = {1, 2, 3, 4}; + std::same_as decltype(auto) ret = std::ranges::none_of(It(a), Sent(It(a + 4)), [](int) { return true; }); + assert(!ret); + } + { + int a[] = {1, 2, 3, 4}; + auto range = std::ranges::subrange(It(a), Sent(It(a + 4))); + std::same_as decltype(auto) ret = std::ranges::none_of(range, [](int) { return true; }); + assert(!ret); + } + } + + { // check that an empty range works + std::array a; + assert(std::ranges::none_of(It(a.data()), Sent(It(a.data())), [](int) { return false; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data()))); + assert(std::ranges::none_of(range, [](int) { return false; })); + } + + { // check that the complexity requirements are met + { + int predicateCount = 0; + int projectionCount = 0; + auto pred = [&](int) { ++predicateCount; return false; }; + auto proj = [&](int i) { ++projectionCount; return i; }; + std::array a = {9, 7, 5, 3}; + assert(std::ranges::none_of(It(a.begin()), Sent(It(a.end())), pred, proj)); + assert(predicateCount == 4); + assert(projectionCount == 4); + } + { + int predicateCount = 0; + int projectionCount = 0; + auto pred = [&](int) { ++predicateCount; return false; }; + auto proj = [&](int i) { ++projectionCount; return i; }; + std::array a = {9, 7, 5, 3}; + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(std::ranges::none_of(range, pred, proj)); + assert(predicateCount == 4); + assert(projectionCount == 4); + } + } + + { // check that true is returned if no element satisfies the condition + std::array a = {1, 2, 3, 4}; + assert(std::ranges::none_of(It(a.data()), Sent(It(a.data() + a.size())), [](int i) { return i > 5; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(std::ranges::none_of(range, [](int i) { return i > 5; })); + } + + { // check that false is returned if all elements satisfy the condition + std::array a = {1, 2, 3, 4}; + assert(!std::ranges::none_of(It(a.data()), Sent(It(a.data() + a.size())), [](int i) { return i < 5; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(!std::ranges::none_of(range, [](int i) { return i < 5; })); + } + + { // check that false is returned if ony one elements satisfies the condition + std::array a = {1, 2, 3, 4, 6}; + assert(!std::ranges::none_of(It(a.data()), Sent(It(a.data() + a.size())), [](int i) { return i > 5; })); + auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size()))); + assert(!std::ranges::none_of(range, [](int i) { return i > 5; })); + } +} + +constexpr bool test() { + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators(); + + { // check that std::invoke is used + struct S { int check; int other; }; + S a[] = {{1, 2}, {1, 7}, {1, 3}}; + assert(std::ranges::none_of(a, a + 3, [](int i) { return i != 1; }, &S::check)); + assert(std::ranges::none_of(a, [](int i) { return i != 1; }, &S::check)); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp index 03c0563597b985..47707c091c475c 100644 --- a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp @@ -61,8 +61,8 @@ auto odd = [](int x) { return x % 2 != 0; }; // [algorithm.syn] //static_assert(test(std::ranges::adjacent_find, a)); -//static_assert(test(std::ranges::all_of, a, odd)); -//static_assert(test(std::ranges::any_of, a, odd)); +static_assert(test(std::ranges::all_of, a, odd)); +static_assert(test(std::ranges::any_of, a, odd)); //static_assert(test(std::ranges::binary_search, a, 42)); //static_assert(test(std::ranges::clamp, 42, 42, 42)); //static_assert(test(std::ranges::copy, a, a)); @@ -107,7 +107,7 @@ static_assert(test(std::ranges::mismatch, a, a)); //static_assert(test(std::ranges::move, a, a)); //static_assert(test(std::ranges::move_backward, a, a)); //static_assert(test(std::ranges::next_permutation, a)); -//static_assert(test(std::ranges::none_of, a, odd)); +static_assert(test(std::ranges::none_of, a, odd)); //static_assert(test(std::ranges::nth_element, a, a+5)); //static_assert(test(std::ranges::partial_sort, a, a+5)); //static_assert(test(std::ranges::partial_sort_copy, a, a)); From ec0ef6809af592cc17c2d67a5e8fae1d4fe65147 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 May 2022 14:50:15 +0000 Subject: [PATCH 595/908] [gn build] Port 0e3dc1a52ffe --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index f3e3e89bdd5c08..b6eba6a91626a3 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -122,6 +122,8 @@ if (current_toolchain == default_toolchain) { "__algorithm/pop_heap.h", "__algorithm/prev_permutation.h", "__algorithm/push_heap.h", + "__algorithm/ranges_all_of.h", + "__algorithm/ranges_any_of.h", "__algorithm/ranges_copy.h", "__algorithm/ranges_copy_backward.h", "__algorithm/ranges_copy_if.h", @@ -144,6 +146,7 @@ if (current_toolchain == default_toolchain) { "__algorithm/ranges_minmax.h", "__algorithm/ranges_minmax_element.h", "__algorithm/ranges_mismatch.h", + "__algorithm/ranges_none_of.h", "__algorithm/ranges_reverse.h", "__algorithm/ranges_swap_ranges.h", "__algorithm/ranges_transform.h", From 3087afb421bf4ca4450d8981a1410e1a09f3794a Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Thu, 26 May 2022 15:47:56 +0100 Subject: [PATCH 596/908] [OpenCL][Doc] Misc improvements related to SPIR-V support. --- clang/docs/OpenCLSupport.rst | 33 +++++++++++++++++++++++++-------- clang/docs/UsersManual.rst | 20 +++++++++++++------- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/clang/docs/OpenCLSupport.rst b/clang/docs/OpenCLSupport.rst index 6b05b5df1d0b41..bcd55f6cc84134 100644 --- a/clang/docs/OpenCLSupport.rst +++ b/clang/docs/OpenCLSupport.rst @@ -25,9 +25,19 @@ Clang also supports :ref:`the C++ for OpenCL kernel language ` available. -For general issues and bugs with OpenCL in clang refer to `the GitHub issue -list -`__. + + +Missing features or with limited support +======================================== + +- For general issues and bugs with OpenCL in clang refer to `the GitHub issue + list + `__. + +- Command-line flag :ref:`-cl-ext ` (used to override extensions/ + features supported by a target) is missing support of some functionality i.e. that is + implemented fully through libraries (see :ref:`library-based features and + extensions `). Internals Manual ================ @@ -213,18 +223,22 @@ indicating the presence of the extension should be added to clang. The default flow for adding a new extension into the frontend is to modify `OpenCLExtensions.def -`_ +`__, +containing the list of all extensions and optional features supported by +the frontend. This will add the macro automatically and also add a field in the target options ``clang::TargetOptions::OpenCLFeaturesMap`` to control the exposure of the new extension during the compilation. -Note that by default targets like `SPIR` or `X86` expose all the OpenCL +Note that by default targets like `SPIR-V`, `SPIR` or `X86` expose all the OpenCL extensions. For all other targets the configuration has to be made explicitly. Note that the target extension support performed by clang can be overridden with :ref:`-cl-ext ` command-line flags. +.. _opencl_ext_libs: + **Library functionality** If an extension adds functionality that does not modify standard language @@ -239,7 +253,9 @@ for more details refer to :ref:`the section on the OpenCL Header `. The macros indicating the presence of such extensions can be added in the standard header files conditioned on target specific predefined macros or/and language version -predefined macros. +predefined macros (see `feature/extension preprocessor macros defined in +opencl-c-base.h +`__). **Pragmas** @@ -336,8 +352,9 @@ user should specify both (extension and feature) in command-line flag: .. code-block:: console - $ clang -cc1 -cl-std=CL3.0 -cl-ext=+cl_khr_fp64,+__opencl_c_fp64 ... - $ clang -cc1 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,-__opencl_c_fp64 ... + $ clang -cl-std=CL3.0 -cl-ext=+cl_khr_fp64,+__opencl_c_fp64 ... + $ clang -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,-__opencl_c_fp64 ... + OpenCL C 3.0 Implementation Status diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 45de85d342c800..b8c468333a67bc 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -3065,7 +3065,7 @@ Compiling to bitcode can be done as follows: This will produce a file `test.bc` that can be used in vendor toolchains to perform machine code generation. -Note that if compiled to bitcode for generic targets such as SPIR, +Note that if compiled to bitcode for generic targets such as SPIR/SPIR-V, portable IR is produced that can be used with various vendor tools as well as open source tools such as `SPIRV-LLVM Translator `_ @@ -3073,15 +3073,18 @@ to produce SPIR-V binary. More details are provided in `the offline compilation from OpenCL kernel sources into SPIR-V using open source tools `_. +From clang 14 onwards SPIR-V can be generated directly as detailed in +:ref:`the SPIR-V support section `. Clang currently supports OpenCL C language standards up to v2.0. Clang mainly supports full profile. There is only very limited support of the embedded profile. -Starting from clang 9 a C++ mode is available for OpenCL (see +From clang 9 a C++ mode is available for OpenCL (see :ref:`C++ for OpenCL `). OpenCL v3.0 support is complete but it remains in experimental state, see more -details about the experimental features in :doc:`OpenCLSupport` page. +details about the experimental features and limitations in :doc:`OpenCLSupport` +page. OpenCL Specific Options ----------------------- @@ -3137,7 +3140,7 @@ Example disabling double support for the 64-bit SPIR-V target: .. code-block:: console - $ clang -target spirv64 -cl-ext=-cl_khr_fp64 test.cl + $ clang -c -target spirv64 -cl-ext=-cl_khr_fp64 test.cl Enabling all extensions except double support in R600 AMD GPU can be done using: @@ -3214,8 +3217,6 @@ Generic Targets however :option:`-cl-ext` flag can be used to toggle individual extensions and features. - - .. _opencl_header: OpenCL Header @@ -3427,6 +3428,7 @@ Example of use: .. code-block:: console clang -cl-std=clc++1.0 test.clcpp + clang -cl-std=clc++ -c -target spirv64 test.cl By default, files with ``.clcpp`` extension are compiled with the C++ for @@ -3640,7 +3642,7 @@ To generate SPIR-V binaries, Clang uses the external ``llvm-spirv`` tool from th Prior to the generation of SPIR-V binary with Clang, ``llvm-spirv`` should be built or installed. Please refer to `the following instructions `_ -for more details. Clang will expects the ``llvm-spirv`` executable to +for more details. Clang will expect the ``llvm-spirv`` executable to be present in the ``PATH`` environment variable. Clang uses ``llvm-spirv`` with `the widely adopted assembly syntax package `_. @@ -3692,6 +3694,10 @@ installation instructions $ clang -target spirv64 test1.cl test2.cl +More information about the SPIR-V target settings and supported versions of SPIR-V +format can be found in `the SPIR-V target guide +`__. + .. _clang-cl: clang-cl From aee6b8efd09c014348c96b81d64961f8e7a4ab6a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 25 May 2022 11:20:02 -0700 Subject: [PATCH 597/908] [ADT] Explicitly delete copy/move constructors and operator= in IntervalMap The default implementations will perform a shallow copy instead of a deep copy, causing some internal data structures to be shared between different objects. Disable these operations so they don't get accidentally used. Differential Revision: https://reviews.llvm.org/D126401 --- llvm/include/llvm/ADT/IntervalMap.h | 11 +++++++++++ llvm/unittests/ADT/IntervalMapTest.cpp | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h index 3c65a108dcd86b..57f02df252c07a 100644 --- a/llvm/include/llvm/ADT/IntervalMap.h +++ b/llvm/include/llvm/ADT/IntervalMap.h @@ -1042,6 +1042,17 @@ class IntervalMap { new(&rootLeaf()) RootLeaf(); } + // The default copy/move constructors and assignment operators would perform + // a shallow copy, leading to an incorrect internal state. To prevent + // accidental use, explicitly delete these operators. + // If necessary, implement them to perform a deep copy. + IntervalMap(const IntervalMap &Other) = delete; + IntervalMap(IntervalMap &&Other) = delete; + // Note: these are already implicitly deleted, because RootLeaf (union + // member) has a non-trivial assignment operator (because of std::pair). + IntervalMap &operator=(const IntervalMap &Other) = delete; + IntervalMap &operator=(IntervalMap &&Other) = delete; + ~IntervalMap() { clear(); rootLeaf().~RootLeaf(); diff --git a/llvm/unittests/ADT/IntervalMapTest.cpp b/llvm/unittests/ADT/IntervalMapTest.cpp index 65a9064cbc3bc8..07cda05f4f59ca 100644 --- a/llvm/unittests/ADT/IntervalMapTest.cpp +++ b/llvm/unittests/ADT/IntervalMapTest.cpp @@ -8,6 +8,7 @@ #include "llvm/ADT/IntervalMap.h" #include "gtest/gtest.h" +#include using namespace llvm; @@ -17,6 +18,15 @@ typedef IntervalMap UUMap; typedef IntervalMap> UUHalfOpenMap; +static_assert(!std::is_copy_constructible::value, + "IntervalMap copy constructor should be deleted"); +static_assert(!std::is_move_constructible::value, + "IntervalMap move constructor should be deleted"); +static_assert(!std::is_copy_assignable::value, + "IntervalMap copy assignment should be deleted"); +static_assert(!std::is_move_assignable::value, + "IntervalMap move assignment should be deleted"); + // Empty map tests TEST(IntervalMapTest, EmptyMap) { UUMap::Allocator allocator; From 9c66ed9b73170519a87310444d4624b519c2983b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 May 2022 16:08:31 +0100 Subject: [PATCH 598/908] [SCEV] Add test with loop guarded by assume with an AND condition. Show a missed case where the AND is currently blocks applying the information from the assume. --- .../max-backedge-taken-count-guard-info.ll | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index bf72608d1a73ab..d8e95705c72d74 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -819,6 +819,43 @@ exit: ret void } +define void @test_guard_assume_and(i32* nocapture readonly %data, i64 %count) { +; CHECK-LABEL: 'test_guard_assume_and' +; CHECK-NEXT: Classifying expressions for: @test_guard_assume_and +; CHECK-NEXT: %cmp.and = and i1 %cmp.ult, %cmp.ne +; CHECK-NEXT: --> (%cmp.ult umin %cmp.ne) U: full-set S: full-set +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-1) S: [0,-1) Exits: (-1 + %count) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %data, i64 %iv +; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guard_assume_and +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: max backedge-taken count is -2 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %count) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.ult = icmp ult i64 %count, 5 + %cmp.ne = icmp ne i64 %count, 0 + %cmp.and = and i1 %cmp.ult, %cmp.ne + call void @llvm.assume(i1 %cmp.and) + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %idx = getelementptr inbounds i32, i32* %data, i64 %iv + store i32 1, i32* %idx, align 4 + %iv.next = add nuw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %count + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + ; Function Attrs: nounwind willreturn declare void @llvm.assume(i1 noundef) From 8b0d7634743965948234b666c77393d4dd8535d7 Mon Sep 17 00:00:00 2001 From: Alex Zhikhartsev Date: Wed, 25 May 2022 19:10:57 -0400 Subject: [PATCH 599/908] [DFAJumpThreading] Relax analysis to handle unpredictable initial values Responding to a feature request from the Rust community: https://github.com/rust-lang/rust/issues/80630 void foo(X) { for (...) switch (X) case A X = B case B X = C } Even though the initial switch value is non-constant, the switch statement can still be threaded: the initial value will hit the switch statement but the rest of the state changes will proceed by jumping unconditionally. The early predictability check is relaxed to allow unpredictable values anywhere, but later, after the paths through the switch statement have been enumerated, no non-constant state values are allowed along the paths. Any state value not along a path will be an initial switch value, which can be safely ignored. Differential Revision: https://reviews.llvm.org/D124394 --- .../Transforms/Scalar/DFAJumpThreading.cpp | 130 +++++++++--------- .../dfa-jump-threading-analysis.ll | 65 +++++++++ .../Transforms/DFAJumpThreading/negative.ll | 49 +++++++ 3 files changed, 181 insertions(+), 63 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index a715df51b41618..d9254f12e41d2d 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -103,6 +103,11 @@ static cl::opt MaxPathLength( cl::desc("Max number of blocks searched to find a threading path"), cl::Hidden, cl::init(20)); +static cl::opt MaxNumPaths( + "dfa-max-num-paths", + cl::desc("Max number of paths enumerated around a switch"), + cl::Hidden, cl::init(200)); + static cl::opt CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), @@ -415,7 +420,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { struct MainSwitch { MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) { - if (isPredictable(SI)) { + if (isCandidate(SI)) { Instr = SI; } else { ORE->emit([&]() { @@ -433,83 +438,60 @@ struct MainSwitch { } private: - /// Do a use-def chain traversal. Make sure the value of the switch variable - /// is always a known constant. This means that all conditional jumps based on - /// switch variable can be converted to unconditional jumps. - bool isPredictable(const SwitchInst *SI) { - std::deque Q; + /// Do a use-def chain traversal starting from the switch condition to see if + /// \p SI is a potential condidate. + /// + /// Also, collect select instructions to unfold. + bool isCandidate(const SwitchInst *SI) { + std::deque Q; SmallSet SeenValues; SelectInsts.clear(); - Value *FirstDef = SI->getOperand(0); - auto *Inst = dyn_cast(FirstDef); - - // If this is a function argument or another non-instruction, then give up. - // We are interested in loop local variables. - if (!Inst) + Value *SICond = SI->getCondition(); + LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n"); + if (!isa(SICond)) return false; - // Require the first definition to be a PHINode - if (!isa(Inst)) - return false; - - LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n"); - - Q.push_back(Inst); - SeenValues.insert(FirstDef); + addToQueue(SICond, Q, SeenValues); while (!Q.empty()) { - Instruction *Current = Q.front(); + Value *Current = Q.front(); Q.pop_front(); if (auto *Phi = dyn_cast(Current)) { for (Value *Incoming : Phi->incoming_values()) { - if (!isPredictableValue(Incoming, SeenValues)) - return false; - addInstToQueue(Incoming, Q, SeenValues); + addToQueue(Incoming, Q, SeenValues); } - LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n"); } else if (SelectInst *SelI = dyn_cast(Current)) { if (!isValidSelectInst(SelI)) return false; - if (!isPredictableValue(SelI->getTrueValue(), SeenValues) || - !isPredictableValue(SelI->getFalseValue(), SeenValues)) { - return false; - } - addInstToQueue(SelI->getTrueValue(), Q, SeenValues); - addInstToQueue(SelI->getFalseValue(), Q, SeenValues); - LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n"); + addToQueue(SelI->getTrueValue(), Q, SeenValues); + addToQueue(SelI->getFalseValue(), Q, SeenValues); + LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n"); if (auto *SelIUse = dyn_cast(SelI->user_back())) SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); + } else if (isa(Current)) { + LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n"); + continue; } else { - // If it is neither a phi nor a select, then we give up. - return false; + LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n"); + // Allow unpredictable values. The hope is that those will be the + // initial switch values that can be ignored (they will hit the + // unthreaded switch) but this assumption will get checked later after + // paths have been enumerated (in function getStateDefMap). + continue; } } return true; } - bool isPredictableValue(Value *InpVal, SmallSet &SeenValues) { - if (SeenValues.contains(InpVal)) - return true; - - if (isa(InpVal)) - return true; - - // If this is a function argument or another non-instruction, then give up. - if (!isa(InpVal)) - return false; - - return true; - } - - void addInstToQueue(Value *Val, std::deque &Q, - SmallSet &SeenValues) { + void addToQueue(Value *Val, std::deque &Q, + SmallSet &SeenValues) { if (SeenValues.contains(Val)) return; - if (Instruction *I = dyn_cast(Val)) - Q.push_back(I); + Q.push_back(Val); SeenValues.insert(Val); } @@ -563,7 +545,16 @@ struct AllSwitchPaths { void run() { VisitedBlocks Visited; PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1); - StateDefMap StateDef = getStateDefMap(); + StateDefMap StateDef = getStateDefMap(LoopPaths); + + if (StateDef.empty()) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", + Switch) + << "Switch instruction is not predictable."; + }); + return; + } for (PathType Path : LoopPaths) { ThreadingPath TPath; @@ -638,6 +629,9 @@ struct AllSwitchPaths { PathType NewPath(Path); NewPath.push_front(BB); Res.push_back(NewPath); + if (Res.size() >= MaxNumPaths) { + return Res; + } } } // This block could now be visited again from a different predecessor. Note @@ -648,14 +642,22 @@ struct AllSwitchPaths { } /// Walk the use-def chain and collect all the state-defining instructions. - StateDefMap getStateDefMap() const { + /// + /// Return an empty map if unpredictable values encountered inside the basic + /// blocks of \p LoopPaths. + StateDefMap getStateDefMap(const PathsType &LoopPaths) const { StateDefMap Res; + // Basic blocks belonging to any of the loops around the switch statement. + SmallPtrSet LoopBBs; + for (const PathType &Path : LoopPaths) { + for (BasicBlock *BB : Path) + LoopBBs.insert(BB); + } + Value *FirstDef = Switch->getOperand(0); - assert(isa(FirstDef) && "After select unfolding, all state " - "definitions are expected to be phi " - "nodes."); + assert(isa(FirstDef) && "The first definition must be a phi."); SmallVector Stack; Stack.push_back(dyn_cast(FirstDef)); @@ -667,15 +669,17 @@ struct AllSwitchPaths { Res[CurPhi->getParent()] = CurPhi; SeenValues.insert(CurPhi); - for (Value *Incoming : CurPhi->incoming_values()) { + for (BasicBlock *IncomingBB : CurPhi->blocks()) { + Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB); + bool IsOutsideLoops = LoopBBs.count(IncomingBB) == 0; if (Incoming == FirstDef || isa(Incoming) || - SeenValues.contains(Incoming)) { + SeenValues.contains(Incoming) || IsOutsideLoops) { continue; } - assert(isa(Incoming) && "After select unfolding, all state " - "definitions are expected to be phi " - "nodes."); + // Any unpredictable value inside the loops means we must bail out. + if (!isa(Incoming)) + return StateDefMap(); Stack.push_back(cast(Incoming)); } @@ -1279,7 +1283,7 @@ bool DFAJumpThreading::run(Function &F) { continue; LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() - << " is predictable\n"); + << " is a candidate\n"); MainSwitch Switch(SI, ORE); if (!Switch.getInstr()) diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index ccc9b38759c414..537fc9ee58cac7 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -184,3 +184,68 @@ bb49: ; preds = %bb43, %bb43 bb66: ; preds = %bb59 ret i32 0 } + +; Value %init is not predictable but it's okay since it is the value initial to the switch. +define i32 @initial.value.positive1(i32 %init) { +; CHECK: < loop.3 case2 > [ 3, loop.3 ] +; CHECK-NEXT: < loop.3 case2 loop.1.backedge loop.1 loop.2 > [ 1, loop.1 ] +; CHECK-NEXT: < loop.3 case2 loop.1.backedge si.unfold.false loop.1 loop.2 > [ 4, loop.1.backedge ] +; CHECK-NEXT: < loop.3 case3 loop.2.backedge loop.2 > [ 0, loop.2.backedge ] +; CHECK-NEXT: < loop.3 case3 case4 loop.2.backedge loop.2 > [ 3, loop.2.backedge ] +; CHECK-NEXT: < loop.3 case3 case4 loop.1.backedge loop.1 loop.2 > [ 1, loop.1 ] +; CHECK-NEXT: < loop.3 case3 case4 loop.1.backedge si.unfold.false loop.1 loop.2 > [ 2, loop.1.backedge ] +; CHECK-NEXT: < loop.3 case4 loop.2.backedge loop.2 > [ 3, loop.2.backedge ] +; CHECK-NEXT: < loop.3 case4 loop.1.backedge loop.1 loop.2 > [ 1, loop.1 ] +; CHECK-NEXT: < loop.3 case4 loop.1.backedge si.unfold.false loop.1 loop.2 > [ 2, loop.1.backedge ] +entry: + %cmp = icmp eq i32 %init, 0 + br label %loop.1 + +loop.1: + %state.1 = phi i32 [ %init, %entry ], [ %state.1.be2, %loop.1.backedge ] + br label %loop.2 + +loop.2: + %state.2 = phi i32 [ %state.1, %loop.1 ], [ %state.2.be, %loop.2.backedge ] + br label %loop.3 + +loop.3: + %state = phi i32 [ %state.2, %loop.2 ], [ 3, %case2 ] + switch i32 %state, label %infloop.i [ + i32 2, label %case2 + i32 3, label %case3 + i32 4, label %case4 + i32 0, label %case0 + i32 1, label %case1 + ] + +case2: + br i1 %cmp, label %loop.3, label %loop.1.backedge + +case3: + br i1 %cmp, label %loop.2.backedge, label %case4 + +case4: + br i1 %cmp, label %loop.2.backedge, label %loop.1.backedge + +loop.1.backedge: + %state.1.be = phi i32 [ 2, %case4 ], [ 4, %case2 ] + %state.1.be2 = select i1 %cmp, i32 1, i32 %state.1.be + br label %loop.1 + +loop.2.backedge: + %state.2.be = phi i32 [ 3, %case4 ], [ 0, %case3 ] + br label %loop.2 + +case0: + br label %exit + +case1: + br label %exit + +infloop.i: + br label %infloop.i + +exit: + ret i32 0 +} diff --git a/llvm/test/Transforms/DFAJumpThreading/negative.ll b/llvm/test/Transforms/DFAJumpThreading/negative.ll index 009ffa3c557db2..bd521d7ee7d22f 100644 --- a/llvm/test/Transforms/DFAJumpThreading/negative.ll +++ b/llvm/test/Transforms/DFAJumpThreading/negative.ll @@ -214,3 +214,52 @@ for.inc: for.end: ret i32 0 } + +declare i32 @arbitrary_function() + +; Don't confuse %state.2 for the initial switch value. +define i32 @negative6(i32 %init) { +; REMARK: SwitchNotPredictable +; REMARK-NEXT: negative6 +entry: + %cmp = icmp eq i32 %init, 0 + br label %loop.2 + +loop.2: + %state.2 = call i32 @arbitrary_function() + br label %loop.3 + +loop.3: + %state = phi i32 [ %state.2, %loop.2 ], [ 3, %case2 ] + switch i32 %state, label %infloop.i [ + i32 2, label %case2 + i32 3, label %case3 + i32 4, label %case4 + i32 0, label %case0 + i32 1, label %case1 + ] + +case2: + br label %loop.3 + +case3: + br i1 %cmp, label %loop.2.backedge, label %case4 + +case4: + br label %loop.2.backedge + +loop.2.backedge: + br label %loop.2 + +case0: + br label %exit + +case1: + br label %exit + +infloop.i: + br label %infloop.i + +exit: + ret i32 0 +} From 120d52b0ef8b0d41d5ce5898422d2d2f42047d0b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 24 May 2022 06:53:19 -0700 Subject: [PATCH 600/908] [SLP]Fix PR55653: emit undefs where required, not poison. Need to handle a corner case correctly, if all elements are Undefs/Poisons, need to emit actual values, not just poisons. Differential Revision: https://reviews.llvm.org/D126298 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++++ .../SLPVectorizer/AArch64/reorder-fmuladd-crash.ll | 4 ++-- .../test/Transforms/SLPVectorizer/SystemZ/pr34619.ll | 2 +- .../SLPVectorizer/X86/horizontal-minmax.ll | 12 ++++++------ .../Transforms/SLPVectorizer/X86/insert-shuffle.ll | 6 +++--- .../SLPVectorizer/X86/jumbled-load-used-in-phi.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/partail.ll | 4 ++-- .../SLPVectorizer/X86/shrink_after_reorder.ll | 6 +++--- .../SLPVectorizer/X86/shrink_after_reorder2.ll | 2 +- .../X86/vectorize-reorder-alt-shuffle.ll | 4 ++-- .../SLPVectorizer/slp-umax-rdx-matcher-crash.ll | 4 ++-- 11 files changed, 27 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f4e94050fc5f40..791e0193951dd5 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7543,6 +7543,10 @@ Value *BoUpSLP::createBuildVector(ArrayRef VL) { ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), UndefMaskElem); } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) { + if (UniqueValues.empty()) { + assert(all_of(VL, UndefValue::classof) && "Expected list of undefs."); + NumValues = VF; + } ReuseShuffleIndicies.clear(); UniqueValues.clear(); UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll index 70bbe65fa2cd50..18365e015e3c36 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll @@ -21,14 +21,14 @@ define i32 @foo() { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> zeroinitializer, <4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> zeroinitializer, <4 x double> [[TMP2]]) ; CHECK-NEXT: br label [[SW_EPILOG:%.*]] ; CHECK: sw.bb195: ; CHECK-NEXT: br label [[SW_EPILOG]] ; CHECK: do.body: ; CHECK-NEXT: unreachable ; CHECK: sw.epilog: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ poison, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ undef, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ] ; CHECK-NEXT: ret i32 undef ; CHECK: if.end.1: ; CHECK-NEXT: br label [[FOR_COND15_1:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll index 89d66812782178..51f7c0c52ac9b0 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -16,7 +16,7 @@ define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> poison, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 801b4863e7c19e..262359c0eea86d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1430,8 +1430,8 @@ define void @PR49730() { ; SSE-NEXT: ret void ; ; AVX-LABEL: @PR49730( -; AVX-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) -; AVX-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> poison, [[TMP1]] +; AVX-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; AVX-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]] ; AVX-NEXT: [[T12:%.*]] = sub nsw i32 undef, undef ; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]]) ; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[T12]], i32 undef) @@ -1440,8 +1440,8 @@ define void @PR49730() { ; AVX-NEXT: ret void ; ; AVX2-LABEL: @PR49730( -; AVX2-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) -; AVX2-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> poison, [[TMP1]] +; AVX2-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; AVX2-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]] ; AVX2-NEXT: [[T12:%.*]] = sub nsw i32 undef, undef ; AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]]) ; AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[T12]], i32 undef) @@ -1450,8 +1450,8 @@ define void @PR49730() { ; AVX2-NEXT: ret void ; ; THRESH-LABEL: @PR49730( -; THRESH-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) -; THRESH-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> poison, [[TMP1]] +; THRESH-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; THRESH-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]] ; THRESH-NEXT: [[T12:%.*]] = sub nsw i32 undef, undef ; THRESH-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]]) ; THRESH-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[T12]], i32 undef) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll index b64c0e4e35a235..b3879bda223561 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -16,9 +16,9 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1 ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], poison -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], poison -; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], poison +; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], undef +; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], undef ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP10]], 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll index 8de5c70e5f0589..f387f9c1ff8a59 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll @@ -49,7 +49,7 @@ define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias noca ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ poison, [[ENTRY]] ], [ [[TMP14]], [[FOR_INC]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP14]], [[FOR_INC]] ] ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll index ce195e8d75242b..c4787a6cb8ebca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -18,8 +18,8 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 { ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], poison -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> poison +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], undef +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> undef ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll index e64969b09d4de0..c31947a9d7b1cd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -12,9 +12,9 @@ define void @wombat(i32* %ptr, i32* %ptr1) { ; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[SHRINK_SHUFFLE]], ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], poison -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> poison, <4 x i32> [[SHUFFLE1]] -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> poison, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], undef +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> undef, <4 x i32> [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP27]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll index caf0a3d8357487..533b0dad45a5b1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll @@ -26,7 +26,7 @@ define void @foo(%class.e* %this, %struct.a* %p, i32 %add7) { ; CHECK: sw.epilog: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ undef, [[ENTRY:%.*]] ], [ [[TMP5]], [[SW_BB]] ] ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> poison, [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> undef, [[SHUFFLE]] ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SHUFFLE1]] ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[B]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll index 120eec38c6db36..290c0d215cc56a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll @@ -12,9 +12,9 @@ define void @foo(i8* %c, float* %d) { ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> poison, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <4 x i32> [[TMP6]] to <4 x float> -; CHECK-NEXT: [[TMP8:%.*]] = fdiv <4 x float> [[TMP7]], poison +; CHECK-NEXT: [[TMP8:%.*]] = fdiv <4 x float> [[TMP7]], undef ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[ADD_PTR53]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP9]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll index b2ae15cf6166ff..13c8109b016ed2 100644 --- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll @@ -43,8 +43,8 @@ declare i32 @llvm.umin.i32(i32, i32) define void @test2() { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) -; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> poison, [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> undef, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 77) ; CHECK-NEXT: [[E:%.*]] = icmp ugt i32 [[TMP3]], 1 From 7b809c30b9261b167cf55ce289eccf88c6000c65 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 May 2022 06:27:30 -0700 Subject: [PATCH 601/908] [SLP]Improve compile time, NFC. Patch improves compile time. For function calls, which cannot be vectorized, create a unique group for each such a call instead of subgroup. It prevents them from being grouped by a subgroups and attempts for their vectorization. Also, looks through casts operand to try to check their groups/subgroups. Reduces number of vectorization attempts. No changes in the statistics for SPEC2017/2006/llvm-test-suite. Differential Revision: https://reviews.llvm.org/D126476 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 47 +++++++++++++------ 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 791e0193951dd5..db8f97271db632 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4430,6 +4430,14 @@ static std::pair generateKeySubkey( hash_value(isa(I) ? I->getType() : cast(I)->getOperand(0)->getType())); + // For casts, look through the only operand to improve compile time. + if (isa(I)) { + std::pair OpVals = + generateKeySubkey(I->getOperand(0), TLI, LoadsSubkeyGenerator, + /*=AllowAlternate*/ true); + Key = hash_combine(OpVals.first, Key); + SubKey = hash_combine(OpVals.first, SubKey); + } } else if (auto *CI = dyn_cast(I)) { CmpInst::Predicate Pred = CI->getPredicate(); if (CI->isCommutative()) @@ -4440,13 +4448,15 @@ static std::pair generateKeySubkey( hash_value(CI->getOperand(0)->getType())); } else if (auto *Call = dyn_cast(I)) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, TLI); - if (isTriviallyVectorizable(ID)) + if (isTriviallyVectorizable(ID)) { SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(ID)); - else if (!VFDatabase(*Call).getMappings(*Call).empty()) + } else if (!VFDatabase(*Call).getMappings(*Call).empty()) { SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Call->getCalledFunction())); - else + } else { + Key = hash_combine(hash_value(Call), Key); SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Call)); + } for (const CallBase::BundleOpInfo &Op : Call->bundle_op_infos()) SubKey = hash_combine(hash_value(Op.Begin), hash_value(Op.End), hash_value(Op.Tag), SubKey); @@ -10640,12 +10650,16 @@ class HorizontalReduction { std::tie(Key, Idx) = generateKeySubkey( V, &TLI, [&PossibleReducedVals, &DL, &SE](size_t Key, LoadInst *LI) { - for (const auto &LoadData : PossibleReducedVals[Key]) { - auto *RLI = cast(LoadData.second.front().first); - if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(), - LI->getType(), LI->getPointerOperand(), - DL, SE, /*StrictCheck=*/true)) - return hash_value(RLI->getPointerOperand()); + auto It = PossibleReducedVals.find(Key); + if (It != PossibleReducedVals.end()) { + for (const auto &LoadData : It->second) { + auto *RLI = cast(LoadData.second.front().first); + if (getPointersDiff(RLI->getType(), + RLI->getPointerOperand(), LI->getType(), + LI->getPointerOperand(), DL, SE, + /*StrictCheck=*/true)) + return hash_value(RLI->getPointerOperand()); + } } return hash_value(LI->getPointerOperand()); }, @@ -10661,12 +10675,15 @@ class HorizontalReduction { std::tie(Key, Idx) = generateKeySubkey( TreeN, &TLI, [&PossibleReducedVals, &DL, &SE](size_t Key, LoadInst *LI) { - for (const auto &LoadData : PossibleReducedVals[Key]) { - auto *RLI = cast(LoadData.second.front().first); - if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(), - LI->getType(), LI->getPointerOperand(), DL, - SE, /*StrictCheck=*/true)) - return hash_value(RLI->getPointerOperand()); + auto It = PossibleReducedVals.find(Key); + if (It != PossibleReducedVals.end()) { + for (const auto &LoadData : It->second) { + auto *RLI = cast(LoadData.second.front().first); + if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(), + LI->getType(), LI->getPointerOperand(), + DL, SE, /*StrictCheck=*/true)) + return hash_value(RLI->getPointerOperand()); + } } return hash_value(LI->getPointerOperand()); }, From afe49934a68afdf44dcd48489d9f5e820bc5424d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 26 May 2022 07:56:38 -0700 Subject: [PATCH 602/908] [RISCV] Allow compatible VTYPE in AVL Reg Forward cases During insertion of VSETVLI, we have two related bits of code which decide whether we can reuse a previous vsetvli result. As was pointed out in the original review, these cases can allow any prior state for which we know that VL is the same for any value of AVL. This was originally separated out of a desire for separate tests and review. As it turns out, finding a test case for this has been quite challenging. Most of the cases I tried, we manage to already get through other chains of logic. We do have one correct test change, but that only exercises one of the two changes. Differential Revision: https://reviews.llvm.org/D126400 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 11 +++++++---- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll | 1 - 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 0ee92d3bb20b82..afd80754fa0ca7 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -202,6 +202,9 @@ class VSETVLIInfo { } // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. + // Note that having the same VLMAX ensures that both share the same + // function from AVL to VL; that is, they must produce the same VL value + // for any given AVL value. bool hasSameVLMAX(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); @@ -717,7 +720,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, return false; // We didn't find a compatible value. If our AVL is a virtual register, - // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need + // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need // and the last VL/VTYPE we observed is the same, we don't need a // VSETVLI here. if (!CurInfo.isUnknown() && Require.hasAVLReg() && @@ -726,7 +729,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { if (isVectorConfigInstr(*DefMI)) { VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo)) + if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) return false; } } @@ -1237,7 +1240,7 @@ void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) { } } - // If AVL is defined by a vsetvli with the same vtype, we can + // If AVL is defined by a vsetvli with the same VLMAX, we can // replace the AVL operand with the AVL of the defining vsetvli. // We avoid general register AVLs to avoid extending live ranges // without being sure we can kill the original source reg entirely. @@ -1246,7 +1249,7 @@ void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) { if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { if (isVectorConfigInstr(*DefMI)) { VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - if (DefInfo.hasSameVTYPE(Require) && + if (DefInfo.hasSameVLMAX(Require) && (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (DefInfo.hasAVLImm()) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 9b92926b25f3c3..b554b11a6ab88d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -816,7 +816,6 @@ define @cross_block_mutate( %a, %mask) { From c8eb83f2d007dd3279a9ff459f3464a3cd9568b6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 26 May 2022 17:59:27 +0200 Subject: [PATCH 603/908] [ControlHeightReduction] Use logical and Use logical instead of bitwise and to combine conditions, to avoid propagating poison from a later condition if an earlier one is already false. This avoids introducing branch on poison. Differential Revision: https://reviews.llvm.org/D125898 --- .../Instrumentation/ControlHeightReduction.cpp | 3 ++- llvm/test/Transforms/PGOProfile/chr.ll | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 2d1b76293272b9..9611342d1c7ceb 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1981,7 +1981,8 @@ void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, !isGuaranteedNotToBeUndefOrPoison(Cond)) Cond = IRB.CreateFreeze(Cond); - MergedCondition = IRB.CreateAnd(MergedCondition, Cond); + // Use logical and to avoid propagating poison from later conditions. + MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); } void CHR::transformScopes(SmallVectorImpl &CHRScopes) { diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll index 18fd7c0182a589..e412ec5f8805f2 100644 --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -553,7 +553,7 @@ define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[DOTFR1]], 11 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 11 -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i1 [[TMP2]], i1 false ; CHECK-NEXT: br i1 [[TMP5]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: bb0: ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SUM0]], 85 @@ -655,7 +655,7 @@ define i32 @test_chr_6(i32* %i, i32* %j, i32 %sum0) !prof !14 { ; CHECK-NEXT: [[V10:%.*]] = icmp ne i32 [[V9]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I0_FR]], 10 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 10 -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[V10]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i1 [[V10]], i1 false ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: bb0: ; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 @@ -1123,7 +1123,7 @@ define void @test_chr_11(i32* %i, i32 %x) !prof !14 { ; CHECK-NEXT: [[MUL16:%.*]] = fmul double [[DIV]], [[CONV]] ; CHECK-NEXT: [[CONV717:%.*]] = fptosi double [[MUL16]] to i32 ; CHECK-NEXT: [[CMP18:%.*]] = icmp sgt i32 [[CONV717]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[CMP18]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i1 [[CMP18]], i1 false ; CHECK-NEXT: br i1 [[TMP3]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: bb0: ; CHECK-NEXT: call void @foo() @@ -1919,8 +1919,8 @@ define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 { ; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i64 [[J:%.*]], [[K:%.*]] ; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i64 [[J]], [[I:%.*]] ; CHECK-NEXT: [[CMP_I:%.*]] = icmp ne i64 [[I]], 86 -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CMP0]], [[CMP3]] -; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[CMP_I]] +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP0]], i1 [[CMP3]], i1 false +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i1 [[CMP_I]], i1 false ; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: bb1: ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[I]], 2 From 3d2b5b7b87857b0cc4c322cfc145c46c42fe2bbf Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Mon, 23 May 2022 22:11:34 -0700 Subject: [PATCH 604/908] [libunwind] Factor out sigreturn check condition. NFC Create a macro for this instead of duplicating the architecture checks everywhere. (It's a little redundant to use it when we're checking for a specific architecture, but I'm also applying it there for consistency.) Reviewed By: rprichard, MaskRay, #libunwind Differential Revision: https://reviews.llvm.org/D126342 --- libunwind/src/UnwindCursor.hpp | 35 +++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 7e56eff471596f..01f0c404a2ac45 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -30,6 +30,11 @@ #include #endif +#if defined(_LIBUNWIND_TARGET_LINUX) && \ + (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_S390X)) +#define _LIBUNWIND_CHECK_LINUX_SIGRETURN 1 +#endif + #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) // Provide a definition for the DISPATCHER_CONTEXT struct for old (Win7 and // earlier) SDKs. @@ -953,7 +958,7 @@ class UnwindCursor : public AbstractUnwindCursor{ } #endif -#if defined(_LIBUNWIND_TARGET_LINUX) && (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_S390X)) +#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) bool setInfoForSigReturn() { R dummy; return setInfoForSigReturn(dummy); @@ -962,14 +967,14 @@ class UnwindCursor : public AbstractUnwindCursor{ R dummy; return stepThroughSigReturn(dummy); } - #if defined(_LIBUNWIND_TARGET_AARCH64) +#if defined(_LIBUNWIND_TARGET_AARCH64) bool setInfoForSigReturn(Registers_arm64 &); int stepThroughSigReturn(Registers_arm64 &); - #endif - #if defined(_LIBUNWIND_TARGET_S390X) +#endif +#if defined(_LIBUNWIND_TARGET_S390X) bool setInfoForSigReturn(Registers_s390x &); int stepThroughSigReturn(Registers_s390x &); - #endif +#endif template bool setInfoForSigReturn(Registers &) { return false; } @@ -1264,7 +1269,7 @@ class UnwindCursor : public AbstractUnwindCursor{ unw_proc_info_t _info; bool _unwindInfoMissing; bool _isSignalFrame; -#if defined(_LIBUNWIND_TARGET_LINUX) && (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_S390X)) +#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) bool _isSigReturn = false; #endif }; @@ -2471,7 +2476,7 @@ int UnwindCursor::stepWithTBTable(pint_t pc, tbtable *TBTable, template void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { -#if defined(_LIBUNWIND_TARGET_LINUX) && (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_S390X)) +#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) _isSigReturn = false; #endif @@ -2586,7 +2591,7 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { } #endif // #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) -#if defined(_LIBUNWIND_TARGET_LINUX) && (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_S390X)) +#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) if (setInfoForSigReturn()) return; #endif @@ -2595,7 +2600,8 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { _unwindInfoMissing = true; } -#if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64) +#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) && \ + defined(_LIBUNWIND_TARGET_AARCH64) template bool UnwindCursor::setInfoForSigReturn(Registers_arm64 &) { // Look for the sigreturn trampoline. The trampoline's body is two @@ -2657,9 +2663,11 @@ int UnwindCursor::stepThroughSigReturn(Registers_arm64 &) { _isSignalFrame = true; return UNW_STEP_SUCCESS; } -#endif // defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64) +#endif // defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) && + // defined(_LIBUNWIND_TARGET_AARCH64) -#if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_S390X) +#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) && \ + defined(_LIBUNWIND_TARGET_S390X) template bool UnwindCursor::setInfoForSigReturn(Registers_s390x &) { // Look for the sigreturn trampoline. The trampoline's body is a @@ -2755,7 +2763,8 @@ int UnwindCursor::stepThroughSigReturn(Registers_s390x &) { return UNW_STEP_SUCCESS; } -#endif // defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_S390X) +#endif // defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) && + // defined(_LIBUNWIND_TARGET_S390X) template int UnwindCursor::step() { @@ -2765,7 +2774,7 @@ int UnwindCursor::step() { // Use unwinding info to modify register set as if function returned. int result; -#if defined(_LIBUNWIND_TARGET_LINUX) && (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_S390X)) +#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) if (_isSigReturn) { result = this->stepThroughSigReturn(); } else From 0be0a53df65cb402359c257922d80ab93d86fb40 Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Wed, 25 May 2022 21:39:12 -0700 Subject: [PATCH 605/908] [libunwind] Use process_vm_readv to avoid potential segfaults We've observed segfaults in libunwind when attempting to check for the Linux aarch64 sigreturn frame, presumably because of bad unwind info leading to an incorrect PC that we attempt to read from. Use process_vm_readv to read the memory safely instead. The s390x code path should likely follow suit, but I don't have the hardware to be able to test that, so I didn't modify it here either. Reviewed By: MaskRay, rprichard, #libunwind Differential Revision: https://reviews.llvm.org/D126343 --- libunwind/src/UnwindCursor.hpp | 33 +++++++++---- libunwind/test/bad_unwind_info.pass.cpp | 66 +++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 9 deletions(-) create mode 100644 libunwind/test/bad_unwind_info.pass.cpp diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 01f0c404a2ac45..6e8fc45ebfa8f3 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -32,6 +32,9 @@ #if defined(_LIBUNWIND_TARGET_LINUX) && \ (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_S390X)) +#include +#include +#include #define _LIBUNWIND_CHECK_LINUX_SIGRETURN 1 #endif @@ -2620,16 +2623,28 @@ bool UnwindCursor::setInfoForSigReturn(Registers_arm64 &) { // // [1] https://github.com/torvalds/linux/blob/master/arch/arm64/kernel/vdso/sigreturn.S const pint_t pc = static_cast(this->getReg(UNW_REG_IP)); + // The PC might contain an invalid address if the unwind info is bad, so + // directly accessing it could cause a segfault. Use process_vm_readv to read + // the memory safely instead. process_vm_readv was added in Linux 3.2, and + // AArch64 supported was added in Linux 3.7, so the syscall is guaranteed to + // be present. Unfortunately, there are Linux AArch64 environments where the + // libc wrapper for the syscall might not be present (e.g. Android 5), so call + // the syscall directly instead. + uint32_t instructions[2]; + struct iovec local_iov = {&instructions, sizeof instructions}; + struct iovec remote_iov = {reinterpret_cast(pc), sizeof instructions}; + long bytesRead = + syscall(SYS_process_vm_readv, getpid(), &local_iov, 1, &remote_iov, 1, 0); // Look for instructions: mov x8, #0x8b; svc #0x0 - if (_addressSpace.get32(pc) == 0xd2801168 && - _addressSpace.get32(pc + 4) == 0xd4000001) { - _info = {}; - _info.start_ip = pc; - _info.end_ip = pc + 4; - _isSigReturn = true; - return true; - } - return false; + if (bytesRead != sizeof instructions || instructions[0] != 0xd2801168 || + instructions[1] != 0xd4000001) + return false; + + _info = {}; + _info.start_ip = pc; + _info.end_ip = pc + 4; + _isSigReturn = true; + return true; } template diff --git a/libunwind/test/bad_unwind_info.pass.cpp b/libunwind/test/bad_unwind_info.pass.cpp new file mode 100644 index 00000000000000..97427ea9227372 --- /dev/null +++ b/libunwind/test/bad_unwind_info.pass.cpp @@ -0,0 +1,66 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Ensure that libunwind doesn't crash on invalid info; the Linux aarch64 +// sigreturn frame check would previously attempt to access invalid memory in +// this scenario. +// REQUIRES: linux && (target={{aarch64-.+}} || target={{x86_64-.+}}) + +// GCC doesn't support __attribute__((naked)) on AArch64. +// UNSUPPORTED: gcc + +// Inline assembly is incompatible with MSAN. +// UNSUPPORTED: msan + +#undef NDEBUG +#include +#include +#include + +__attribute__((naked)) void bad_unwind_info() { +#if defined(__aarch64__) + __asm__("// not using 0 because unwinder was already resilient to that\n" + "mov x8, #4\n" + "stp x30, x8, [sp, #-16]!\n" + ".cfi_def_cfa_offset 16\n" + "// purposely use incorrect offset for x30\n" + ".cfi_offset x30, -8\n" + "bl stepper\n" + "ldr x30, [sp], #16\n" + ".cfi_def_cfa_offset 0\n" + ".cfi_restore x30\n" + "ret\n"); +#elif defined(__x86_64__) + __asm__("pushq %rbx\n" + ".cfi_def_cfa_offset 16\n" + "movq 8(%rsp), %rbx\n" + "# purposely corrupt return value on stack\n" + "movq $4, 8(%rsp)\n" + "callq stepper\n" + "movq %rbx, 8(%rsp)\n" + "popq %rbx\n" + ".cfi_def_cfa_offset 8\n" + "ret\n"); +#else +#error This test is only supported on aarch64 or x86-64 +#endif +} + +extern "C" void stepper() { + unw_cursor_t cursor; + unw_context_t uc; + unw_getcontext(&uc); + unw_init_local(&cursor, &uc); + // stepping to bad_unwind_info should succeed + assert(unw_step(&cursor) > 0); + // stepping past bad_unwind_info should fail but not crash + assert(unw_step(&cursor) <= 0); +} + +int main() { bad_unwind_info(); } From a831ce528fc0fd16734d644e62ae956834276c1b Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Thu, 26 May 2022 09:34:18 -0700 Subject: [PATCH 606/908] Revert "[runtimes] Detect changes to Tests.cmake" This reverts commit ec10ac750a8ad96983d85263323635f3dabe92fd. See https://discourse.llvm.org/t/cmake-regeneration-is-broken/62788. This change caused Ninja's CMake regeneration to depend on the build, which prevented CMake regeneration from functioning properly and caused spurious build failures on incremental builds when a CMake change occurred. --- .../modules/LLVMExternalProjectUtils.cmake | 5 +---- llvm/runtimes/CMakeLists.txt | 22 ++++++++++--------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake index cf0f18476b60c6..f99a50df22801b 100644 --- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake @@ -56,14 +56,12 @@ endfunction() # Use provided strip tool instead of the default one. # TARGET_TRIPLE triple # Optional target triple to pass to the compiler -# BYPRODUCTS files... -# Files generated as a byproduct of this build. # ) function(llvm_ExternalProject_Add name source_dir) cmake_parse_arguments(ARG "USE_TOOLCHAIN;EXCLUDE_FROM_ALL;NO_INSTALL;ALWAYS_CLEAN" "SOURCE_DIR" - "CMAKE_ARGS;TOOLCHAIN_TOOLS;RUNTIME_LIBRARIES;DEPENDS;EXTRA_TARGETS;PASSTHROUGH_PREFIXES;STRIP_TOOL;TARGET_TRIPLE;BYPRODUCTS" + "CMAKE_ARGS;TOOLCHAIN_TOOLS;RUNTIME_LIBRARIES;DEPENDS;EXTRA_TARGETS;PASSTHROUGH_PREFIXES;STRIP_TOOL;TARGET_TRIPLE" ${ARGN}) canonicalize_tool_name(${name} nameCanon) @@ -324,7 +322,6 @@ function(llvm_ExternalProject_Add name source_dir) INSTALL_COMMAND "" STEP_TARGETS configure build BUILD_ALWAYS 1 - BUILD_BYPRODUCTS ${ARG_BYPRODUCTS} USES_TERMINAL_CONFIGURE 1 USES_TERMINAL_BUILD 1 USES_TERMINAL_INSTALL 1 diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 836e9149a71fe4..34fcccab100c5f 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -219,11 +219,13 @@ function(runtime_default_target) endforeach() if(LLVM_INCLUDE_TESTS) - include(${LLVM_BINARY_DIR}/runtimes/Tests.cmake OPTIONAL) + include(${LLVM_BINARY_DIR}/runtimes/Tests.cmake OPTIONAL RESULT_VARIABLE have_tests) set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/Tests.cmake) - set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES ${SUB_LIT_TESTSUITES}) - set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_PARAMS ${SUB_LIT_PARAMS}) - set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_EXTRA_ARGS ${SUB_LIT_EXTRA_ARGS}) + if(have_tests) + set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES ${SUB_LIT_TESTSUITES}) + set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_PARAMS ${SUB_LIT_PARAMS}) + set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_EXTRA_ARGS ${SUB_LIT_EXTRA_ARGS}) + endif() list(APPEND test_targets runtimes-test-depends check-runtimes) endif() @@ -258,7 +260,6 @@ function(runtime_default_target) ${SUB_INSTALL_TARGETS} USE_TOOLCHAIN TARGET_TRIPLE ${LLVM_TARGET_TRIPLE} - BYPRODUCTS ${LLVM_BINARY_DIR}/runtimes/Tests.cmake ${EXTRA_ARGS}) endfunction() @@ -300,11 +301,13 @@ function(runtime_register_target name target) endforeach() if(LLVM_INCLUDE_TESTS) - include(${LLVM_BINARY_DIR}/runtimes/${name}/Tests.cmake OPTIONAL) + include(${LLVM_BINARY_DIR}/runtimes/${name}/Tests.cmake OPTIONAL RESULT_VARIABLE have_tests) set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/${name}/Tests.cmake) - set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES ${SUB_LIT_TESTSUITES}) - set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_PARAMS ${SUB_LIT_PARAMS}) - set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_EXTRA_ARGS ${SUB_LIT_EXTRA_ARGS}) + if(have_tests) + set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES ${SUB_LIT_TESTSUITES}) + set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_PARAMS ${SUB_LIT_PARAMS}) + set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_EXTRA_ARGS ${SUB_LIT_EXTRA_ARGS}) + endif() set(runtimes-test-depends-${name} runtimes-test-depends) set(check-runtimes-${name} check-runtimes) list(APPEND ${name}_test_targets runtimes-test-depends-${name} check-runtimes-${name}) @@ -364,7 +367,6 @@ function(runtime_register_target name target) ${${name}_test_targets} USE_TOOLCHAIN TARGET_TRIPLE ${target} - BYPRODUCTS ${LLVM_BINARY_DIR}/runtimes/${name}/Tests.cmake ${EXTRA_ARGS}) endfunction() From d58cc0839ee5935e649dbbc18c898a8e4bcd6724 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 26 May 2022 08:55:47 -0700 Subject: [PATCH 607/908] [RISCV] reorganize getFrameIndexReference to reduce code duplication [nfc] This change reorganizes the majority of frame index resolution into a two strep process. Step 1 - Select which base register we're going to use. Step 2 - Compute the offset from that base register. The key point is that this allows us to share the step 2 logic for the SP case. This reduces the code duplication, and (I think) makes the code much easier to follow. I also went ahead and added assertions into phase 2 to catch errors where we select an illegal base pointer. In general, we can't index from a base register to a stack location if that requires crossing a variable and unknown region. In practice, we have two such cases: dynamic stack realign and var sized objects. Note that crossing the scalable region is fine since while variable, it's a known variability which can be expressed in the offset. Differential Revision: https://reviews.llvm.org/D126403 --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 174 +++++++++---------- 1 file changed, 87 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 5062d992c6cc41..8bc5f54d91481f 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -687,7 +687,10 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, Offset += StackOffset::getFixed(FirstSPAdjustAmount); else Offset += StackOffset::getFixed(getStackSizeWithRVVPadding(MF)); - } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { + return Offset; + } + + if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { // If the stack was realigned, the frame pointer is set in order to allow // SP to be restored, so we need another base register to record the stack // after realignment. @@ -727,97 +730,94 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, assert(!MFI.hasVarSizedObjects()); FrameReg = RISCV::X2; } - // The total amount of padding surrounding RVV objects is described by - // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV - // objects to the required alignment. - if (MFI.getStackID(FI) == TargetStackID::Default) { - Offset += StackOffset::getFixed(MFI.getStackSize()); - assert(FI >= 0 && "Unhandled negative frame index"); - } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - // Ensure the base of the RVV stack is correctly aligned: add on the - // alignment padding. - int ScalarLocalVarSize = MFI.getStackSize() - - RVFI->getCalleeSavedStackSize() + - RVFI->getRVVPadding(); - Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize()); - } } else { FrameReg = RI->getFrameRegister(MF); - if (hasFP(MF)) { - Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); - if (FI >= 0) - Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize()); - // When using FP to access scalable vector objects, we need to minus - // the frame size. - // - // |--------------------------| -- <-- FP - // | callee-allocated save | | - // | area for register varargs| | - // |--------------------------| | - // | callee-saved registers | | - // |--------------------------| | MFI.getStackSize() - // | scalar local variables | | - // |--------------------------| -- (Offset of RVV objects is from here.) - // | RVV objects | - // |--------------------------| - // | VarSize objects | - // |--------------------------| <-- SP - if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - // We don't expect any extra RVV alignment padding, as the stack size - // and RVV object sections should be correct aligned in their own - // right. - assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) && - "Inconsistent stack layout"); - Offset -= StackOffset::getFixed(MFI.getStackSize()); - } - } else { - // When using SP to access frame objects, we need to add RVV stack size. - // - // |--------------------------| -- <-- FP - // | callee-allocated save | | <----| - // | area for register varargs| | | - // |--------------------------| | | - // | callee-saved registers | | | - // |--------------------------| -- | - // | RVV alignment padding | | | - // | (not counted in | | | - // | MFI.getStackSize() but | | | - // | counted in | | | - // | RVFI.getRVVStackSize()) | | | - // |--------------------------| -- | - // | RVV objects | | |-- MFI.getStackSize() - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | padding before RVV | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | - // |--------------------------| -- | - // | scalar local variables | | <----' - // |--------------------------| -- <-- SP - // - // The total amount of padding surrounding RVV objects is described by - // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV - // objects to the required alignment. - if (MFI.getStackID(FI) == TargetStackID::Default) { - if (MFI.isFixedObjectIndex(FI)) { - Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) + - RVFI->getLibCallStackSize(), - RVFI->getRVVStackSize()); - } else { - Offset += StackOffset::getFixed(MFI.getStackSize()); - } - } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - // Ensure the base of the RVV stack is correctly aligned: add on the - // alignment padding. - int ScalarLocalVarSize = - MFI.getStackSize() - RVFI->getCalleeSavedStackSize() - - RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding(); - Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize()); - } + } + + if (FrameReg == getFPReg(STI)) { + Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); + if (FI >= 0) + Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize()); + // When using FP to access scalable vector objects, we need to minus + // the frame size. + // + // |--------------------------| -- <-- FP + // | callee-allocated save | | + // | area for register varargs| | + // |--------------------------| | + // | callee-saved registers | | + // |--------------------------| | MFI.getStackSize() + // | scalar local variables | | + // |--------------------------| -- (Offset of RVV objects is from here.) + // | RVV objects | + // |--------------------------| + // | VarSize objects | + // |--------------------------| <-- SP + if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + assert(!RI->hasStackRealignment(MF) && + "Can't index across variable sized realign"); + // We don't expect any extra RVV alignment padding, as the stack size + // and RVV object sections should be correct aligned in their own + // right. + assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) && + "Inconsistent stack layout"); + Offset -= StackOffset::getFixed(MFI.getStackSize()); } + return Offset; } + // This case handles indexing off both SP and BP. + // If indexing off SP, there must not be any var sized objects + assert(FrameReg == RISCVABI::getBPReg() || !MFI.hasVarSizedObjects()); + + // When using SP to access frame objects, we need to add RVV stack size. + // + // |--------------------------| -- <-- FP + // | callee-allocated save | | <----| + // | area for register varargs| | | + // |--------------------------| | | + // | callee-saved registers | | | + // |--------------------------| -- | + // | RVV alignment padding | | | + // | (not counted in | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | + // |--------------------------| -- | + // | RVV objects | | |-- MFI.getStackSize() + // | (not counted in | | | + // | MFI.getStackSize()) | | | + // |--------------------------| -- | + // | padding before RVV | | | + // | (not counted in | | | + // | MFI.getStackSize()) | | | + // |--------------------------| -- | + // | scalar local variables | | <----' + // |--------------------------| -- <-- BP (if var sized objects present) + // | VarSize objects | | + // |--------------------------| -- <-- SP + // + // The total amount of padding surrounding RVV objects is described by + // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV + // objects to the required alignment. + if (MFI.getStackID(FI) == TargetStackID::Default) { + if (MFI.isFixedObjectIndex(FI)) { + assert(!RI->hasStackRealignment(MF) && + "Can't index across variable sized realign"); + Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) + + RVFI->getLibCallStackSize(), + RVFI->getRVVStackSize()); + } else { + Offset += StackOffset::getFixed(MFI.getStackSize()); + } + } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + // Ensure the base of the RVV stack is correctly aligned: add on the + // alignment padding. + int ScalarLocalVarSize = + MFI.getStackSize() - RVFI->getCalleeSavedStackSize() - + RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding(); + Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize()); + } return Offset; } From 939a43461ba3cd540a049a62a27dab2732bedd64 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 26 May 2022 09:50:36 -0700 Subject: [PATCH 608/908] Revert "Replace the custom linked list in LeaderTableEntry with TinyPtrVector." This reverts commit 1e9114984490b83d4665f12a11f84c83f50ca8f0. Pending further discussion. --- llvm/include/llvm/Transforms/Scalar/GVN.h | 56 ++++++++++++++--------- llvm/lib/Transforms/Scalar/GVN.cpp | 48 +++++++++++-------- 2 files changed, 64 insertions(+), 40 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index 5dc5c4b3feb89a..16ab1a49016299 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -19,7 +19,6 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" @@ -233,10 +232,12 @@ class GVNPass : public PassInfoMixin { /// A mapping from value numbers to lists of Value*'s that /// have that value number. Use findLeader to query it. struct LeaderTableEntry { - TinyPtrVector Val; - TinyPtrVector BB; + Value *Val; + const BasicBlock *BB; + LeaderTableEntry *Next; }; DenseMap LeaderTable; + BumpPtrAllocator TableAllocator; // Block-local map of equivalent values to their leader, does not // propagate to any successors. Entries added mid-block are applied @@ -265,31 +266,44 @@ class GVNPass : public PassInfoMixin { /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { LeaderTableEntry &Curr = LeaderTable[N]; - if (Curr.Val.size() == 0) { - Curr.Val.push_back(V); - Curr.BB.push_back(BB); - } else { - Curr.Val.insert(Curr.Val.begin()+1, V); - Curr.BB.insert(Curr.BB.begin()+1, BB); + if (!Curr.Val) { + Curr.Val = V; + Curr.BB = BB; + return; } + + LeaderTableEntry *Node = TableAllocator.Allocate(); + Node->Val = V; + Node->BB = BB; + Node->Next = Curr.Next; + Curr.Next = Node; } /// Scan the list of values corresponding to a given /// value number, and remove the given instruction if encountered. void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { - LeaderTableEntry &entry = LeaderTable[N]; - assert(entry.BB.size() == entry.Val.size()); - auto VI = entry.Val.begin(); - auto VE = entry.Val.end(); - auto BI = entry.BB.begin(); - while (VI != VE) { - if (*VI == I && *BI == BB) { - VI = entry.Val.erase(VI); - BI = entry.BB.erase(BI); - VE = entry.Val.end(); + LeaderTableEntry *Prev = nullptr; + LeaderTableEntry *Curr = &LeaderTable[N]; + + while (Curr && (Curr->Val != I || Curr->BB != BB)) { + Prev = Curr; + Curr = Curr->Next; + } + + if (!Curr) + return; + + if (Prev) { + Prev->Next = Curr->Next; + } else { + if (!Curr->Next) { + Curr->Val = nullptr; + Curr->BB = nullptr; } else { - ++VI; - ++BI; + LeaderTableEntry *Next = Curr->Next; + Curr->Val = Next->Val; + Curr->BB = Next->BB; + Curr->Next = Next->Next; } } } diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 184374d754637f..34c1483a37e04d 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2105,9 +2105,10 @@ GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) { /// defined in \p BB. bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, GVNPass &Gvn) { - const LeaderTableEntry &Entry = Gvn.LeaderTable[Num]; - return all_of(Entry.BB, - [BB](const BasicBlock *EntryBB) { return EntryBB == BB; }); + LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; + while (Vals && Vals->BB == BB) + Vals = Vals->Next; + return !Vals; } /// Wrap phiTranslateImpl to provide caching functionality. @@ -2129,11 +2130,12 @@ bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *PhiBlock, GVNPass &Gvn) { CallInst *Call = nullptr; - const LeaderTableEntry &Entry = Gvn.LeaderTable[Num]; - for (Value *Val : Entry.Val) { - Call = dyn_cast(Val); + LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; + while (Vals) { + Call = dyn_cast(Vals->Val); if (Call && Call->getParent() == PhiBlock) break; + Vals = Vals->Next; } if (AA->doesNotAccessMemory(Call)) @@ -2226,18 +2228,23 @@ void GVNPass::ValueTable::eraseTranslateCacheEntry( // question. This is fast because dominator tree queries consist of only // a few comparisons of DFS numbers. Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) { - const LeaderTableEntry &Entry = LeaderTable[num]; - if (Entry.Val.empty()) - return nullptr; + LeaderTableEntry Vals = LeaderTable[num]; + if (!Vals.Val) return nullptr; Value *Val = nullptr; - for (size_t i = 0, e = Entry.Val.size(); i != e; ++i) { - if (DT->dominates(Entry.BB[i], BB)) { - if (isa(Entry.Val[i])) - return Entry.Val[i]; - if (!Val) - Val = Entry.Val[i]; + if (DT->dominates(Vals.BB, BB)) { + Val = Vals.Val; + if (isa(Val)) return Val; + } + + LeaderTableEntry* Next = Vals.Next; + while (Next) { + if (DT->dominates(Next->BB, BB)) { + if (isa(Next->Val)) return Next->Val; + if (!Val) Val = Next->Val; } + + Next = Next->Next; } return Val; @@ -3026,6 +3033,7 @@ void GVNPass::cleanupGlobalSets() { VN.clear(); LeaderTable.clear(); BlockRPONumber.clear(); + TableAllocator.Reset(); ICF->clear(); InvalidBlockRPONumbers = true; } @@ -3038,10 +3046,12 @@ void GVNPass::verifyRemoved(const Instruction *Inst) const { // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. for (const auto &I : LeaderTable) { - const LeaderTableEntry &Entry = I.second; - for (Value *Val : Entry.Val) { - (void)Val; - assert(Val != Inst && "Inst still in value numbering scope!"); + const LeaderTableEntry *Node = &I.second; + assert(Node->Val != Inst && "Inst still in value numbering scope!"); + + while (Node->Next) { + Node = Node->Next; + assert(Node->Val != Inst && "Inst still in value numbering scope!"); } } } From 0a5cfbf7b2e82e7980b66428e88b4e28e814d7bb Mon Sep 17 00:00:00 2001 From: Mike Rice Date: Wed, 25 May 2022 15:02:26 -0700 Subject: [PATCH 609/908] [OpenMP] Use the align clause value from 'omp allocate' for globals Refactor the code that handles the align clause of 'omp allocate' so it can be used with globals as well as local variables. Differential Revision: https://reviews.llvm.org/D126426 --- clang/lib/CodeGen/CGDecl.cpp | 19 ++++++ clang/lib/CodeGen/CGOpenMPRuntime.cpp | 27 +++----- clang/lib/CodeGen/CodeGenModule.cpp | 7 ++- clang/lib/CodeGen/CodeGenModule.h | 3 + .../OpenMP/align_clause_global_codegen.cpp | 61 +++++++++++++++++++ 5 files changed, 97 insertions(+), 20 deletions(-) create mode 100644 clang/test/OpenMP/align_clause_global_codegen.cpp diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 0f16c7f50a0037..a16551e83f93b6 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -2694,3 +2694,22 @@ void CodeGenModule::EmitOMPAllocateDecl(const OMPAllocateDecl *D) { DummyGV->eraseFromParent(); } } + +llvm::Optional +CodeGenModule::getOMPAllocateAlignment(const VarDecl *VD) { + if (const auto *AA = VD->getAttr()) { + if (Expr *Alignment = AA->getAlignment()) { + unsigned UserAlign = + Alignment->EvaluateKnownConstInt(getContext()).getExtValue(); + CharUnits NaturalAlign = + getNaturalTypeAlignment(VD->getType().getNonReferenceType()); + + // OpenMP5.1 pg 185 lines 7-10 + // Each item in the align modifier list must be aligned to the maximum + // of the specified alignment and the type's natural alignment. + return CharUnits::fromQuantity( + std::max(UserAlign, NaturalAlign.getQuantity())); + } + } + return llvm::None; +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 89e26b10dfae5e..605d4fe76bc966 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -12185,25 +12185,15 @@ static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, return AllocVal; } -/// Given the allocate directive list item type and align clause value, -/// return appropriate alignment. -static llvm::Value *getAlignmentValue(CodeGenFunction &CGF, QualType ListItemTy, - const Expr *Alignment) { - if (!Alignment) - return nullptr; +/// Return the alignment from an allocate directive if present. +static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { + llvm::Optional AllocateAlignment = CGM.getOMPAllocateAlignment(VD); - unsigned UserAlign = - Alignment->EvaluateKnownConstInt(CGF.getContext()).getExtValue(); - CharUnits NaturalAlign = CGF.CGM.getNaturalTypeAlignment(ListItemTy); + if (!AllocateAlignment) + return nullptr; - // OpenMP5.1 pg 185 lines 7-10 - // Each item in the align modifier list must be aligned to the maximum - // of the specified alignment and the type's natural alignment. - // - // If no alignment specified then use the natural alignment. - return llvm::ConstantInt::get( - CGF.CGM.SizeTy, - std::max(UserAlign, NaturalAlign.getQuantity())); + return llvm::ConstantInt::get(CGM.SizeTy, + AllocateAlignment.getValue().getQuantity()); } Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, @@ -12244,8 +12234,7 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const auto *AA = CVD->getAttr(); const Expr *Allocator = AA->getAllocator(); llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); - llvm::Value *Alignment = getAlignmentValue( - CGF, VD->getType().getNonReferenceType(), AA->getAlignment()); + llvm::Value *Alignment = getAlignmentValue(CGM, CVD); SmallVector Args; Args.push_back(ThreadID); if (Alignment) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a035e5ddd9e6db..b4ae266f898819 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4738,7 +4738,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, GV->setConstant(true); } - GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); + CharUnits AlignVal = getContext().getDeclAlign(D); + // Check for alignment specifed in an 'omp allocate' directive. + if (llvm::Optional AlignValFromAllocate = + getOMPAllocateAlignment(D)) + AlignVal = AlignValFromAllocate.getValue(); + GV->setAlignment(AlignVal.getAsAlign()); // On Darwin, unlike other Itanium C++ ABI platforms, the thread-wrapper // function is only defined alongside the variable, not also alongside diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index a5ec4c8f988d6d..0ac476dd6dbccb 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1365,6 +1365,9 @@ class CodeGenModule : public CodeGenTypeCache { /// \param D The allocate declaration void EmitOMPAllocateDecl(const OMPAllocateDecl *D); + /// Return the alignment specified in an allocate directive, if present. + llvm::Optional getOMPAllocateAlignment(const VarDecl *VD); + /// Returns whether the given record has hidden LTO visibility and therefore /// may participate in (single-module) CFI and whole-program vtable /// optimization. diff --git a/clang/test/OpenMP/align_clause_global_codegen.cpp b/clang/test/OpenMP/align_clause_global_codegen.cpp new file mode 100644 index 00000000000000..3be8e87cf87aae --- /dev/null +++ b/clang/test/OpenMP/align_clause_global_codegen.cpp @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 -emit-llvm -o - -fopenmp \ +// RUN: -triple i386-unknown-unknown -fopenmp-version=51 %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-32 + +// RUN: %clang_cc1 -emit-llvm -o - -fopenmp \ +// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-64 + +typedef enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +} omp_allocator_handle_t; + +// +// Should allow larger alignment. +// + +// CHECK: @foo_global1 = global float 0.000000e+00, align 16 +float foo_global1; +#pragma omp allocate(foo_global1) align(16) + +// CHECK: @foo_global2 = global float 0.000000e+00, align 16 +float foo_global2; +#pragma omp allocate(foo_global2) allocator(omp_default_mem_alloc) align(16) + +// CHECK: @foo_global3 = global float 0.000000e+00, align 16 +float foo_global3; +#pragma omp allocate(foo_global3) allocator(omp_large_cap_mem_alloc) align(16) + +// CHECK: @foop_global1 = global ptr null, align 16 +int *foop_global1; +#pragma omp allocate(foop_global1) align(16) + +// +// Should use natural alignment when alignment specified is too small. +// + +// CHECK: @foo_global4 = global float 0.000000e+00, align 4 +float foo_global4; +#pragma omp allocate(foo_global4) align(2) + +// CHECK: @foo_global5 = global float 0.000000e+00, align 4 +float foo_global5; +#pragma omp allocate(foo_global5) allocator(omp_default_mem_alloc) align(2) + +// CHECK: @foo_global6 = global float 0.000000e+00, align 4 +float foo_global6; +#pragma omp allocate(foo_global6) allocator(omp_large_cap_mem_alloc) align(2) + +// CHECK-32: @foop_global2 = global ptr null, align 4 +// CHECK-64: @foop_global2 = global ptr null, align 8 +int *foop_global2; +#pragma omp allocate(foop_global2) align(2) From 917dc0749b77467188872c4a0f217cedd874de36 Mon Sep 17 00:00:00 2001 From: David Penry Date: Mon, 28 Mar 2022 12:18:37 -0700 Subject: [PATCH 610/908] [ARM] Recognize t2LoopEnd for software pipelining - Add t2LoopEnd to TargetInstrInfo::analyzeBranch and related functions. As there are many side effects of analyzing a branch, only do so if software pipelining is enabled to maintain previous behavior when pipelining is not desired. - Make sure that t2LoopEndDec is immediately followed by a t2B when it is synthesized from a t2LoopEnd. This is done because the t2LoopEnd might have acquired a fall-through path, but IfConversion assumes that fall-through are only possible on analyzable branches. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D126322 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 98 +++++++-- .../ARM/MVETPAndVPTOptimisationsPass.cpp | 11 ++ llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir | 186 ++++++++++++++++++ 3 files changed, 282 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9a7960268a7589..7fa253a200cb70 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -343,6 +343,13 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, } // Branch analysis. +// Cond vector output format: +// 0 elements indicates an unconditional branch +// 2 elements indicates a conditional branch; the elements are +// the condition to check and the CPSR. +// 3 elements indicates a hardware loop end; the elements +// are the opcode, the operand value to test, and a dummy +// operand used to pad out to 3 operands. bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -394,6 +401,17 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } else if (I->isReturn()) { // Returns can't be analyzed, but we should run cleanup. CantAnalyze = true; + } else if (I->getOpcode() == ARM::t2LoopEnd && + MBB.getParent() + ->getSubtarget() + .enableMachinePipeliner()) { + if (!Cond.empty()) + return true; + FBB = TBB; + TBB = I->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + Cond.push_back(MachineOperand::CreateImm(0)); } else { // We encountered other unrecognized terminator. Bail out immediately. return true; @@ -457,7 +475,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, return 0; if (!isUncondBranchOpcode(I->getOpcode()) && - !isCondBranchOpcode(I->getOpcode())) + !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) return 0; // Remove the branch. @@ -467,7 +485,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, if (I == MBB.begin()) return 1; --I; - if (!isCondBranchOpcode(I->getOpcode())) + if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) return 1; // Remove the branch. @@ -491,8 +509,8 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 2 || Cond.size() == 0) && - "ARM branch conditions have two components!"); + assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) && + "ARM branch conditions have two or three components!"); // For conditional branches, we use addOperand to preserve CPSR flags. @@ -502,19 +520,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); else BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - } else + } else if (Cond.size() == 2) { BuildMI(&MBB, DL, get(BccOpc)) .addMBB(TBB) .addImm(Cond[0].getImm()) .add(Cond[1]); + } else + BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); return 1; } // Two-way conditional branch. - BuildMI(&MBB, DL, get(BccOpc)) - .addMBB(TBB) - .addImm(Cond[0].getImm()) - .add(Cond[1]); + if (Cond.size() == 2) + BuildMI(&MBB, DL, get(BccOpc)) + .addMBB(TBB) + .addImm(Cond[0].getImm()) + .add(Cond[1]); + else if (Cond.size() == 3) + BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); else @@ -524,9 +547,12 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, bool ARMBaseInstrInfo:: reverseBranchCondition(SmallVectorImpl &Cond) const { - ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); - Cond[0].setImm(ARMCC::getOppositeCondition(CC)); - return false; + if (Cond.size() == 2) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); + Cond[0].setImm(ARMCC::getOppositeCondition(CC)); + return false; + } + return true; } bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { @@ -6730,9 +6756,11 @@ class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { // Meanings of the various stuff with loop types: // t2Bcc: - // Loop = null -- there is no setup. // EndLoop = branch at end of original BB that will become a kernel // LoopCount = CC setter live into branch + // t2LoopEnd: + // EndLoop = branch at end of original BB + // LoopCount = t2LoopDec public: ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount) : EndLoop(EndLoop), LoopCount(LoopCount), @@ -6755,6 +6783,23 @@ class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { TII->reverseBranchCondition(Cond); } return {}; + } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) { + // General case just lets the unrolled t2LoopDec do the subtraction and + // therefore just needs to check if zero has been reached. + MachineInstr *LoopDec = nullptr; + for (auto &I : MBB.instrs()) + if (I.getOpcode() == ARM::t2LoopDec) + LoopDec = &I; + assert(LoopDec && "Unable to find copied LoopDec"); + // Check if we're done with the loop. + BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri)) + .addReg(LoopDec->getOperand(0).getReg()) + .addImm(0) + .addImm(ARMCC::AL) + .addReg(ARM::NoRegister); + Cond.push_back(MachineOperand::CreateImm(ARMCC::EQ)); + Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false)); + return {}; } else llvm_unreachable("Unknown EndLoop"); } @@ -6793,5 +6838,32 @@ ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { // that pipeline will work } + // Recognize: + // preheader: + // %1 = t2DoopLoopStart %0 + // loop: + // %2 = phi %1, , %..., %loop + // %3 = t2LoopDec %2, + // t2LoopEnd %3, %loop + + if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) { + for (auto &L : LoopBB->instrs()) + if (L.isCall()) + return nullptr; + else if (isVCTP(&L)) + return nullptr; + Register LoopDecResult = I->getOperand(0).getReg(); + MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); + MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult); + if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) + return nullptr; + MachineInstr *LoopStart = nullptr; + for (auto &J : Preheader->instrs()) + if (J.getOpcode() == ARM::t2DoLoopStart) + LoopStart = &J; + if (!LoopStart) + return nullptr; + return std::make_unique(&*I, LoopDec); + } return nullptr; } diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp index 7e31ea77f4f530..92c324d58c254a 100644 --- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp +++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp @@ -404,6 +404,17 @@ bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { LoopPhi->getOperand(3).setReg(DecReg); } + SmallVector Cond; // For analyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. + if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) { + // If the LoopEnd falls through, need to insert a t2B to the fall-through + // block so that the non-analyzable t2LoopEndDec doesn't fall through. + MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator(); + BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B)) + .addMBB(&*MBBI) + .add(predOps(ARMCC::AL)); + } + // Replace the loop dec and loop end as a single instruction. MachineInstrBuilder MI = BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(), diff --git a/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir new file mode 100644 index 00000000000000..871e412fc991ed --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir @@ -0,0 +1,186 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# NOTE: cortex-m7 is used to provide scheduling information despite not having LE instructions +# RUN: llc -mtriple=thumbv8.1-m.main-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK + +--- | + define hidden float @dot(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 { + entry: + %cmp8 = icmp sgt i32 %sz, 0 + br i1 %cmp8, label %for.body.preheader, label %for.end + + for.body.preheader: ; preds = %entry + %scevgep = getelementptr float, float* %b, i32 -1 + %scevgep4 = getelementptr float, float* %a, i32 -1 + br label %for.body + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv5 = phi float* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ] + %lsr.iv1 = phi float* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ] + %lsr.iv = phi i32 [ %sz, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.010 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %scevgep7 = getelementptr float, float* %lsr.iv5, i32 1 + %0 = load float, float* %scevgep7, align 4 + %scevgep3 = getelementptr float, float* %lsr.iv1, i32 1 + %1 = load float, float* %scevgep3, align 4 + %mul = fmul fast float %1, %0 + %add = fadd fast float %mul, %sum.010 + %lsr.iv.next = add i32 %lsr.iv, -1 + %scevgep2 = getelementptr float, float* %lsr.iv1, i32 1 + %scevgep6 = getelementptr float, float* %lsr.iv5, i32 1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + + for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + ret float %sum.0.lcssa + } + + !0 = distinct !{!0, !1, !2, !3} + !1 = !{!"llvm.loop.mustprogress"} + !2 = !{!"llvm.loop.unroll.disable"} + !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3} + +... +--- +name: dot +alignment: 2 +tracksRegLiveness: true +constants: + - id: 0 + value: 'float 0.000000e+00' + alignment: 4 + isTargetSpecific: false +body: | + ; CHECK-LABEL: name: dot + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000) + ; CHECK-NEXT: liveins: $r0, $r1, $r2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprlr = COPY $r2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnopc = COPY $r1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnopc = COPY $r0 + ; CHECK-NEXT: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NEXT: t2Bcc %bb.2, 10 /* CC::ge */, $cpsr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[VLDRS:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) + ; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.for.body.preheader: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[COPY1]], 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprnopc = COPY [[t2SUBri]] + ; CHECK-NEXT: [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[COPY2]], 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: [[VLDRS1:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) + ; CHECK-NEXT: [[t2DoLoopStart:%[0-9]+]]:gprlr = t2DoLoopStart [[COPY]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gprnopc = COPY [[t2SUBri1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.for.body: + ; CHECK-NEXT: successors: %bb.6(0x80000000), %bb.7(0x00000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY4]], 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7) + ; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) + ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprlr = COPY [[t2DoLoopStart]] + ; CHECK-NEXT: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[COPY5]], 1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY [[t2LoopDec]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr = COPY [[t2ADDri1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2ADDri]] + ; CHECK-NEXT: t2CMPri [[t2LoopDec]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NEXT: t2Bcc %bb.7, 0 /* CC::eq */, $cpsr + ; CHECK-NEXT: t2B %bb.6, 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.for.body: + ; CHECK-NEXT: successors: %bb.7(0x04000000), %bb.6(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.5, %43, %bb.6 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.5, %44, %bb.6 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gpr = PHI [[COPY6]], %bb.5, %47, %bb.6 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, %46, %bb.6 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, %45, %bb.6 + ; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4) + ; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gprlr = COPY [[PHI2]] + ; CHECK-NEXT: [[t2LoopDec1:%[0-9]+]]:gprlr = t2LoopDec [[COPY9]], 1 + ; CHECK-NEXT: [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr = COPY [[t2ADDri2]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri3]] + ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI4]], [[PHI3]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2LoopDec1]] + ; CHECK-NEXT: t2LoopEnd [[t2LoopDec1]], %bb.6, implicit-def $cpsr + ; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS]], %bb.6 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[VMULS1]], %bb.6 + ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI5]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.for.end: + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:spr = PHI [[VLDRS]], %bb.1, [[VADDS1]], %bb.7 + ; CHECK-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS [[PHI7]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: $r0 = COPY [[VMOVRS]] + ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.4(0x30000000) + liveins: $r0, $r1, $r2 + + %13:gprlr = COPY $r2 + %12:gprnopc = COPY $r1 + %11:gprnopc = COPY $r0 + t2CMPri %13, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.1, 10 /* CC::ge */, $cpsr + + bb.4: + successors: %bb.3(0x80000000) + + %14:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) + t2B %bb.3, 14 /* CC::al */, $noreg + + bb.1.for.body.preheader: + successors: %bb.2(0x80000000) + + %16:rgpr = t2SUBri %12, 4, 14 /* CC::al */, $noreg, $noreg + %0:gpr = COPY %16 + %17:rgpr = t2SUBri %11, 4, 14 /* CC::al */, $noreg, $noreg + %15:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) + %44:gprlr = t2DoLoopStart %13 + %1:gpr = COPY %17 + + bb.2.for.body: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %2:gprnopc = PHI %1, %bb.1, %9, %bb.2 + %3:gprnopc = PHI %0, %bb.1, %8, %bb.2 + %4:gpr = PHI %44, %bb.1, %7, %bb.2 + %5:spr = PHI %15, %bb.1, %6, %bb.2 + %18:rgpr = t2ADDri %2, 4, 14 /* CC::al */, $noreg, $noreg + %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7) + %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg + %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) + %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg + %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg + %42:gprlr = COPY %4 + %23:gprlr = t2LoopDec %42:gprlr, 1 + %7:gpr = COPY %23 + %8:gpr = COPY %20 + %9:gpr = COPY %18 + t2LoopEnd %23:gprlr, %bb.2, implicit-def dead $cpsr + t2B %bb.3, 14 /* CC::al */, $noreg + + bb.3.for.end: + %10:spr = PHI %14, %bb.4, %6, %bb.2 + %24:gpr = VMOVRS %10, 14 /* CC::al */, $noreg + $r0 = COPY %24 + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + +... From 6af5f5697c8560e09c305d5cfd74a7bda0d4d311 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 May 2022 18:16:37 +0100 Subject: [PATCH 611/908] [SCEV] Collect conditions from assumes same way as for branches. Also collect conditions from assume up-front in applyLoopGuards. This allows re-using the logic to handle logical ANDs as assume conditions. It should should pave the road for a fix for #55645. --- llvm/lib/Analysis/ScalarEvolution.cpp | 28 +++++++++---------- .../max-backedge-taken-count-guard-info.ll | 8 +++--- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c8193732ea16d5..59e1f5a0857cc4 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -14518,12 +14518,23 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { ExprsToRewrite.push_back(LHS); } }; - // First, collect conditions from dominating branches. Starting at the loop + + SmallVector> Terms; + // First, collect information from assumptions dominating the loop. + for (auto &AssumeVH : AC.assumptions()) { + if (!AssumeVH) + continue; + auto *AssumeI = cast(AssumeVH); + if (!DT.dominates(AssumeI, L->getHeader())) + continue; + Terms.emplace_back(AssumeI->getOperand(0), true); + } + + // Second, collect conditions from dominating branches. Starting at the loop // predecessor, climb up the predecessor chain, as long as there are // predecessors that can be found that have unique successors leading to the // original header. // TODO: share this logic with isLoopEntryGuardedByCond. - SmallVector> Terms; for (std::pair Pair( L->getLoopPredecessor(), L->getHeader()); Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { @@ -14570,19 +14581,6 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { } } - // Also collect information from assumptions dominating the loop. - for (auto &AssumeVH : AC.assumptions()) { - if (!AssumeVH) - continue; - auto *AssumeI = cast(AssumeVH); - auto *Cmp = dyn_cast(AssumeI->getOperand(0)); - if (!Cmp || !DT.dominates(AssumeI, L->getHeader())) - continue; - const auto *LHS = getSCEV(Cmp->getOperand(0)); - const auto *RHS = getSCEV(Cmp->getOperand(1)); - CollectCondition(Cmp->getPredicate(), LHS, RHS, RewriteMap); - } - if (RewriteMap.empty()) return Expr; diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index d8e95705c72d74..7ee5a0a622b1cd 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -825,14 +825,14 @@ define void @test_guard_assume_and(i32* nocapture readonly %data, i64 %count) { ; CHECK-NEXT: %cmp.and = and i1 %cmp.ult, %cmp.ne ; CHECK-NEXT: --> (%cmp.ult umin %cmp.ne) U: full-set S: full-set ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-1) S: [0,-1) Exits: (-1 + %count) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4) S: [0,4) Exits: (-1 + %count) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %data, i64 %iv -; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,5) S: [1,5) Exits: %count LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_assume_and ; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) -; CHECK-NEXT: Loop %loop: max backedge-taken count is -2 +; CHECK-NEXT: Loop %loop: max backedge-taken count is 3 ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %count) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 From f15c60218d5c88c9f32942bce119e7ac25cb6d73 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 22 Apr 2022 12:49:15 -0700 Subject: [PATCH 612/908] [UpdateTestChecks] Auto-generate stub bodies for unused prefixes This is scoped to autogenerated tests. The goal is to support having each RUN line specify a list of check-prefixes where one can specify potentially redundant prefixes. For example, for X86, if one specified prefixes for both AVX1 and AVX2, and the codegen happened to match today, one of the prefixes would be used and the onther one not. If the unused prefix were dropped, and later, codegen differences were introduced, one would have to go figure out where to add what prefix (paraphrasing https://lists.llvm.org/pipermail/llvm-dev/2021-February/148326.html) To avoid getting errors due to unused prefixes, whole directories can be opted out (as discussed on that thread), but that means that tests that aren't autogenerated in such directories could have undetected unused prefix bugs. This patch proposes an alternative that both avoids the above, dir-level optout, and supports the main autogen scenario discussed first. The autogen tool appends at the end of the test file the list of unused prefixes, together with a note explaining that is the case. Each prefix is set up to always pass. This way, unexpected unused prefixes are easily discoverable, and expected cases "just work". Differential Revision: https://reviews.llvm.org/D124306 --- .../amdgpu_no_merge_comments.ll.expected | 2 + .../common-label-different-bodies-1-next.ll | 30 ++++++++++ .../Inputs/common-label-different-bodies-1.ll | 1 - .../Inputs/common-label-different-bodies-2.ll | 1 - .../Inputs/common-label-different-bodies-3.ll | 1 - .../redundant-and-unmatching-prefixes.ll | 18 ++++++ .../common-label-different-bodies.test | 22 ++++++- .../prefix-never-matches.test | 4 +- .../redundant-and-unmatched-prefixes.test | 18 ++++++ llvm/utils/UpdateTestChecks/asm.py | 6 +- llvm/utils/UpdateTestChecks/common.py | 58 ++++++++++++++----- llvm/utils/UpdateTestChecks/isel.py | 6 +- llvm/utils/update_cc_test_checks.py | 20 +++---- llvm/utils/update_llc_test_checks.py | 30 ++++++---- 14 files changed, 169 insertions(+), 48 deletions(-) create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-1-next.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/redundant-and-unmatching-prefixes.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/redundant-and-unmatched-prefixes.test diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_no_merge_comments.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_no_merge_comments.ll.expected index a240941bdf2559..6cc7cb84dfc19d 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_no_merge_comments.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_no_merge_comments.ll.expected @@ -28,3 +28,5 @@ define hidden i32 @main(i32 %a) { %sub = sub i32 %mul, %add ret i32 %sub } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-1-next.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-1-next.ll new file mode 100644 index 00000000000000..253716d74f1ff0 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-1-next.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=A,B +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --allow-unused-prefixes=true --check-prefixes=C,A,UNUSED +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --allow-unused-prefixes=true --check-prefixe=A + +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +define <2 x i64> @fold_v2i64() { +; B-LABEL: fold_v2i64: +; B: # %bb.0: # %entry +; B-NEXT: movaps {{.*#+}} xmm0 = [0,4278190080,4294967295,4294967295] +; B-NEXT: retl +; +; C-LABEL: fold_v2i64: +; C: # %bb.0: # %entry +; C-NEXT: movaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615] +; C-NEXT: retq +entry: + %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> ) + ret <2 x i64> %r +} + +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) + +define <4 x i32> @test2(<4 x i32> %v) { + %r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v) + ret <4 x i32> %r +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; A: {{.*}} +; UNUSED: {{.*}} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-1.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-1.ll index c5f2bc9ba5bc7f..a93753420c0620 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-1.ll +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-1.ll @@ -2,7 +2,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --allow-unused-prefixes=true --check-prefixes=C,A,UNUSED declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) -; A: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) define <2 x i64> @fold_v2i64() { entry: diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-2.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-2.ll index bc1990462d37d3..b6250a202b1441 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-2.ll +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-2.ll @@ -2,7 +2,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=C,A declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) -; A: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) define <2 x i64> @fold_v2i64() { entry: diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-3.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-3.ll index b478b0ad721552..3ab54c2a9f1a77 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-3.ll +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/common-label-different-bodies-3.ll @@ -2,7 +2,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=A,C declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) -; A: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) define <2 x i64> @fold_v2i64() { entry: diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/redundant-and-unmatching-prefixes.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/redundant-and-unmatching-prefixes.ll new file mode 100644 index 00000000000000..9209a3d80ceb17 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/redundant-and-unmatching-prefixes.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=A,B,REDUNDANT +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=C,A,UNUSED + +; prefix 'A' has conflicting outputs, while the REDUNDANT and UNUSED ones are +; unused +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +define <2 x i64> @function_1() { +entry: + %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> ) + ret <2 x i64> %r +} + +define <2 x i64> @function_2() { +entry: + %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> ) + ret <2 x i64> %r +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/common-label-different-bodies.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/common-label-different-bodies.test index 409114db02b3b4..0454e292ef0f17 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/common-label-different-bodies.test +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/common-label-different-bodies.test @@ -4,11 +4,29 @@ # RUN: cp -f %S/Inputs/common-label-different-bodies-2.ll %t-2.ll # RUN: cp -f %S/Inputs/common-label-different-bodies-3.ll %t-3.ll # RUN: %update_llc_test_checks %t-1.ll + +# re-running update_llc_test_checks leaves the file as-is. +# RUN: cp -f %t-1.ll %t-1-copy.ll +# RUN: %update_llc_test_checks %t-1.ll +# RUN: diff %t-1.ll %t-1-copy.ll # RUN: %update_llc_test_checks %t-2.ll # RUN: %update_llc_test_checks %t-3.ll -# RUN: FileCheck --input-file=%t-1.ll %s +# RUN: FileCheck --input-file=%t-1.ll %s --check-prefixes=CHECK,CHECK-UNUSED # RUN: FileCheck --input-file=%t-2.ll %s # RUN: FileCheck --input-file=%t-3.ll %s # CHECK: B-LABEL: fold_v2i64 -# CHECK-NOT: A-LABEL: fold_v2i64 \ No newline at end of file +# CHECK-NOT: A-LABEL: fold_v2i64 +# CHECK: NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +# CHECK-NEXT: A: +# CHECK-UNUSED-NEXT: UNUSED: + +# adding a test removes that label from "unused" prefixes list +# the input file is like "1" after the tool was run, and then we added a new test +# RUN: cp -f %S/Inputs/common-label-different-bodies-1-next.ll %t-1-next.ll +# RUN: %update_llc_test_checks %t-1-next.ll +# RUN: FileCheck --input-file=%t-1-next.ll %s --check-prefixes=AFTER-CHANGE + +# AFTER-CHANGE: NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +# AFTER-CHANGE-NEXT: UNUSED: +# AFTER-CHANGE-NOT: A: diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/prefix-never-matches.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/prefix-never-matches.test index c8b15cd39ff36c..2e75148addd844 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/prefix-never-matches.test +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/prefix-never-matches.test @@ -1,8 +1,8 @@ # REQUIRES: x86-registered-target # RUN: cp -f %S/Inputs/prefix-never-matches.ll %t.ll -# RUN: %update_llc_test_checks %t.ll 2>&1 | FileCheck %s +# RUN: %update_llc_test_checks --no-generate-body-for-unused-prefixes %t.ll 2>&1 | FileCheck %s # RUN: FileCheck --input-file=%t.ll %s --check-prefix=OUTPUT # CHECK: WARNING: Prefix A had conflicting output -# OUTPUT-NOT: A: \ No newline at end of file +# OUTPUT-NOT: A: diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/redundant-and-unmatched-prefixes.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/redundant-and-unmatched-prefixes.test new file mode 100644 index 00000000000000..35e12dd57801ad --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/redundant-and-unmatched-prefixes.test @@ -0,0 +1,18 @@ +# REQUIRES: x86-registered-target + +# RUN: cp -f %S/Inputs/redundant-and-unmatching-prefixes.ll %t-1.ll +# RUN: %update_llc_test_checks %t-1.ll +# RUN: FileCheck --input-file=%t-1.ll %s + +# CHECK: B-LABEL: function_1: +# CHECK-NOT: A-LABEL: function_1 +# CHECK-NOT: REDUNDANT-LABEL: function_1: + +# CHECK: B-LABEL: function_2: +# CHECK-NOT: A-LABEL: function_2: +# CHECK-NOT: UNUSED-LABEL: function_2: + +# CHECK: NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +# CHECK-NEXT: A: +# CHECK-NEXT: REDUNDANT: +# CHECK-NEXT: UNUSED: diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py index a00a817d3a848d..b26c9a791d50e3 100644 --- a/llvm/utils/UpdateTestChecks/asm.py +++ b/llvm/utils/UpdateTestChecks/asm.py @@ -484,6 +484,6 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, global_vars_seen_dict, is_filtered): # Label format is based on ASM string. check_label_format = '{} %s-LABEL: %s%s%s'.format(comment_marker) - common.add_checks(output_lines, comment_marker, prefix_list, func_dict, - func_name, check_label_format, True, False, - global_vars_seen_dict, is_filtered=is_filtered) + return common.add_checks(output_lines, comment_marker, prefix_list, func_dict, + func_name, check_label_format, True, False, + global_vars_seen_dict, is_filtered=is_filtered) diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 90dd326fb053aa..7ca546c3d251e6 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -10,6 +10,8 @@ import sys import shlex +from typing import List + ##### Common utilities for update_*test_checks.py @@ -129,6 +131,11 @@ def __call__(self, parser, namespace, values, option_string=None): help='List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)') parser.add_argument('--global-hex-value-regex', nargs='+', default=[], help='List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives') + parser.add_argument('--generate-body-for-unused-prefixes', + action=argparse.BooleanOptionalAction, + dest='gen_unused_prefix_body', + default=True, + help='Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.') args = parser.parse_args() global _verbose, _global_value_regex, _global_hex_value_regex _verbose = args.verbose @@ -167,6 +174,7 @@ def __init__(self, test, parser, script_name, input_lines, args, argv, self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT self.test_autogenerated_note = self.autogenerated_note_prefix + script_name self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) + self.test_unused_note = self.comment_prefix + self.comment_prefix + ' ' + UNUSED_NOTE def ro_iterlines(self): for line_num, input_line in enumerate(self.input_lines): @@ -188,6 +196,22 @@ def iterlines(self, output_lines): continue yield line_info + def get_checks_for_unused_prefixes(self, run_list, used_prefixes: List[str]) -> List[str]: + unused_prefixes = set( + [prefix for sublist in run_list for prefix in sublist[0]]).difference(set(used_prefixes)) + + ret = [] + if not unused_prefixes: + return ret + ret.append(self.test_unused_note) + for unused in sorted(unused_prefixes): + ret.append('{comment} {prefix}: {match_everything}'.format( + comment=self.comment_prefix, + prefix=unused, + match_everything=r"""{{.*}}""" + )) + return ret + def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None): for pattern in test_patterns: # On Windows we must expand the patterns ourselves. @@ -212,7 +236,12 @@ def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_ assert UTC_ADVERT not in first_line warn("Skipping test which isn't autogenerated: " + test) continue - yield TestInfo(test, parser, script_name, input_lines, args, argv, + final_input_lines = [] + for l in input_lines: + if UNUSED_NOTE in l: + break + final_input_lines.append(l) + yield TestInfo(test, parser, script_name, final_input_lines, args, argv, comment_prefix, argparse_callback) @@ -293,6 +322,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): UTC_ARGS_KEY = 'UTC_ARGS:' UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P.*)\s*$') UTC_ADVERT = 'NOTE: Assertions have been autogenerated by ' +UNUSED_NOTE = 'NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:' OPT_FUNCTION_RE = re.compile( r'^(\s*;\s*Function\sAttrs:\s(?P[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P[\w.$-]+?)\s*' @@ -471,7 +501,7 @@ def __init__(self, run_list, flags, scrubber_args, path): self._global_var_dict.update({prefix:dict()}) def finish_and_get_func_dict(self): - for prefix in self._get_failed_prefixes(): + for prefix in self.get_failed_prefixes(): warn('Prefix %s had conflicting output from different RUN lines for all functions in test %s' % (prefix,self._path,)) return self._func_dict @@ -577,11 +607,10 @@ def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes, is_ scrubbed_body, scrubbed_extra, args_and_sig, attrs, func_name_separator) self._func_order[prefix].append(func) - def _get_failed_prefixes(self): + def get_failed_prefixes(self): # This returns the list of those prefixes that failed to match any function, # because there were conflicting bodies produced by different RUN lines, in - # all instances of the prefix. Effectively, this prefix is unused and should - # be removed. + # all instances of the prefix. for prefix in self._func_dict: if (self._func_dict[prefix] and (not [fct for fct in self._func_dict[prefix] @@ -974,6 +1003,7 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, if key not in global_vars_seen_before: global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] break + return printed_prefixes def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, preserve_names, function_sig, @@ -981,16 +1011,16 @@ def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, # Label format is based on IR string. function_def_regex = 'define {{[^@]+}}' if function_sig else '' check_label_format = '{} %s-LABEL: {}@%s%s%s'.format(comment_marker, function_def_regex) - add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, - check_label_format, False, preserve_names, global_vars_seen_dict, - is_filtered) + return add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, + check_label_format, False, preserve_names, global_vars_seen_dict, + is_filtered) def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered): check_label_format = '{} %s-LABEL: \'%s%s%s\''.format(comment_marker) global_vars_seen_dict = {} - add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, - check_label_format, False, True, global_vars_seen_dict, - is_filtered) + return add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, + check_label_format, False, True, global_vars_seen_dict, + is_filtered) def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): for nameless_value in itertools.chain(ir_nameless_values, asm_nameless_values): @@ -1189,6 +1219,7 @@ def dump_input_lines(output_lines, test_info, prefix_set, comment_string): def add_checks_at_end(output_lines, prefix_list, func_order, comment_string, check_generator): added = set() + generated_prefixes = [] for prefix in prefix_list: prefixes = prefix[0] tool_args = prefix[1] @@ -1212,6 +1243,7 @@ def add_checks_at_end(output_lines, prefix_list, func_order, # single prefix before moving on to the next prefix. So checks # are ordered by prefix instead of by function as in "normal" # mode. - check_generator(output_lines, + generated_prefixes.extend(check_generator(output_lines, [([prefix], tool_args)], - func) + func)) + return generated_prefixes diff --git a/llvm/utils/UpdateTestChecks/isel.py b/llvm/utils/UpdateTestChecks/isel.py index 796e620e6679be..2978e5133ecbc6 100644 --- a/llvm/utils/UpdateTestChecks/isel.py +++ b/llvm/utils/UpdateTestChecks/isel.py @@ -52,6 +52,6 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, global_vars_seen_dict, is_filtered): # Label format is based on iSel string. check_label_format = '{} %s-LABEL: %s%s%s'.format(comment_marker) - common.add_checks(output_lines, comment_marker, prefix_list, func_dict, - func_name, check_label_format, True, False, - global_vars_seen_dict, is_filtered = is_filtered) + return common.add_checks(output_lines, comment_marker, prefix_list, func_dict, + func_name, check_label_format, True, False, + global_vars_seen_dict, is_filtered=is_filtered) diff --git a/llvm/utils/update_cc_test_checks.py b/llvm/utils/update_cc_test_checks.py index 97ef7e39093c90..0ca98a47c9fcb7 100755 --- a/llvm/utils/update_cc_test_checks.py +++ b/llvm/utils/update_cc_test_checks.py @@ -340,17 +340,17 @@ def main(): # Now generate all the checks. def check_generator(my_output_lines, prefixes, func): if '-emit-llvm' in clang_args: - common.add_ir_checks(my_output_lines, '//', - prefixes, - func_dict, func, False, - ti.args.function_signature, - global_vars_seen_dict, - is_filtered=builder.is_filtered()) + return common.add_ir_checks(my_output_lines, '//', + prefixes, + func_dict, func, False, + ti.args.function_signature, + global_vars_seen_dict, + is_filtered=builder.is_filtered()) else: - asm.add_checks(my_output_lines, '//', - prefixes, - func_dict, func, global_vars_seen_dict, - is_filtered=builder.is_filtered()) + return asm.add_checks(my_output_lines, '//', + prefixes, + func_dict, func, global_vars_seen_dict, + is_filtered=builder.is_filtered()) if ti.args.check_globals: common.add_global_checks(builder.global_var_dict(), '//', run_list, diff --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py index c9b39276c93e86..66eb5d4e4f5e37 100755 --- a/llvm/utils/update_llc_test_checks.py +++ b/llvm/utils/update_llc_test_checks.py @@ -153,6 +153,7 @@ def main(): '--include-generated-funcs', True) + generated_prefixes = [] if include_generated_funcs: # Generate the appropriate checks for each function. We need to emit # these in the order according to the generated output so that CHECK-LABEL @@ -164,14 +165,15 @@ def main(): common.dump_input_lines(output_lines, ti, prefix_set, ';') # Now generate all the checks. - common.add_checks_at_end(output_lines, run_list, builder.func_order(), - check_indent + ';', - lambda my_output_lines, prefixes, func: - output_type.add_checks(my_output_lines, - check_indent + ';', - prefixes, func_dict, func, - global_vars_seen_dict, - is_filtered=builder.is_filtered())) + generated_prefixes = common.add_checks_at_end( + output_lines, run_list, builder.func_order(), + check_indent + ';', + lambda my_output_lines, prefixes, func: + output_type.add_checks(my_output_lines, + check_indent + ';', + prefixes, func_dict, func, + global_vars_seen_dict, + is_filtered=builder.is_filtered())) else: for input_info in ti.iterlines(output_lines): input_line = input_info.line @@ -186,9 +188,10 @@ def main(): continue # Print out the various check lines here. - output_type.add_checks(output_lines, check_indent + ';', run_list, - func_dict, func_name, global_vars_seen_dict, - is_filtered=builder.is_filtered()) + generated_prefixes.extend( + output_type.add_checks(output_lines, check_indent + ';', run_list, + func_dict, func_name, global_vars_seen_dict, + is_filtered=builder.is_filtered())) is_in_function_start = False if is_in_function: @@ -213,8 +216,11 @@ def main(): continue is_in_function = is_in_function_start = True + if ti.args.gen_unused_prefix_body: + output_lines.extend(ti.get_checks_for_unused_prefixes( + run_list, generated_prefixes)) + common.debug('Writing %d lines to %s...' % (len(output_lines), ti.path)) - with open(ti.path, 'wb') as f: f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) From ce54b22657f01d1c40de4941ceb6e7119848aecf Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 24 May 2022 16:56:22 -0700 Subject: [PATCH 613/908] [Clang][CoverageMapping] Fix switch counter codegen compile time explosion C++ generated code with huge amount of switch cases chokes badly while emitting coverage mapping, in our specific testcase (~72k cases), it won't stop after hours. After this change, the frontend job now finishes in 4.5s and shrinks down `@__covrec_` by 288k when compared to disabling simplification altogether. There's probably no good way to create a testcase for this, but it's easy to reproduce, just add thousands of cases in the below switch, and build with `-fprofile-instr-generate -fcoverage-mapping`. ``` enum type : int { FEATURE_INVALID = 0, FEATURE_A = 1, ... }; const char *to_string(type e) { switch (e) { case type::FEATURE_INVALID: return "FEATURE_INVALID"; case type::FEATURE_A: return "FEATURE_A";} ... } ``` Differential Revision: https://reviews.llvm.org/D126345 --- clang/lib/CodeGen/CoverageMappingGen.cpp | 20 ++++++++++++------- .../ProfileData/Coverage/CoverageMapping.h | 4 ++-- .../ProfileData/Coverage/CoverageMapping.cpp | 12 ++++++----- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 9b81c8a670f59d..8952125eeefcb6 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -550,17 +550,18 @@ struct CounterCoverageMappingBuilder Counter GapRegionCounter; /// Return a counter for the subtraction of \c RHS from \c LHS - Counter subtractCounters(Counter LHS, Counter RHS) { - return Builder.subtract(LHS, RHS); + Counter subtractCounters(Counter LHS, Counter RHS, bool Simplify = true) { + return Builder.subtract(LHS, RHS, Simplify); } /// Return a counter for the sum of \c LHS and \c RHS. - Counter addCounters(Counter LHS, Counter RHS) { - return Builder.add(LHS, RHS); + Counter addCounters(Counter LHS, Counter RHS, bool Simplify = true) { + return Builder.add(LHS, RHS, Simplify); } - Counter addCounters(Counter C1, Counter C2, Counter C3) { - return addCounters(addCounters(C1, C2), C3); + Counter addCounters(Counter C1, Counter C2, Counter C3, + bool Simplify = true) { + return addCounters(addCounters(C1, C2, Simplify), C3, Simplify); } /// Return the region counter for the given statement. @@ -1317,11 +1318,16 @@ struct CounterCoverageMappingBuilder const SwitchCase *Case = S->getSwitchCaseList(); for (; Case; Case = Case->getNextSwitchCase()) { HasDefaultCase = HasDefaultCase || isa(Case); - CaseCountSum = addCounters(CaseCountSum, getRegionCounter(Case)); + CaseCountSum = + addCounters(CaseCountSum, getRegionCounter(Case), /*Simplify=*/false); createSwitchCaseRegion( Case, getRegionCounter(Case), subtractCounters(ParentCount, getRegionCounter(Case))); } + // Simplify is skipped while building the counters above: it can get really + // slow on top of switches with thousands of cases. Instead, trigger + // simplification by adding zero to the last counter. + CaseCountSum = addCounters(CaseCountSum, Counter::getZero()); // If no explicit default case exists, create a branch region to represent // the hidden branch, which will be added later by the CodeGen. This region diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index e1f45019b1a929..e35751512245d7 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -195,11 +195,11 @@ class CounterExpressionBuilder { ArrayRef getExpressions() const { return Expressions; } /// Return a counter that represents the expression that adds LHS and RHS. - Counter add(Counter LHS, Counter RHS); + Counter add(Counter LHS, Counter RHS, bool Simplify = true); /// Return a counter that represents the expression that subtracts RHS from /// LHS. - Counter subtract(Counter LHS, Counter RHS); + Counter subtract(Counter LHS, Counter RHS, bool Simplify = true); }; using LineColPair = std::pair; diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index 94c2bee3590cb5..f9e58fd6afa506 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -123,13 +123,15 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) { return C; } -Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS) { - return simplify(get(CounterExpression(CounterExpression::Add, LHS, RHS))); +Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS, bool Simplify) { + auto Cnt = get(CounterExpression(CounterExpression::Add, LHS, RHS)); + return Simplify ? simplify(Cnt) : Cnt; } -Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS) { - return simplify( - get(CounterExpression(CounterExpression::Subtract, LHS, RHS))); +Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS, + bool Simplify) { + auto Cnt = get(CounterExpression(CounterExpression::Subtract, LHS, RHS)); + return Simplify ? simplify(Cnt) : Cnt; } void CounterMappingContext::dump(const Counter &C, raw_ostream &OS) const { From 8a467284d5608522fdf80cb78056d42d28f5ebb5 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 26 May 2022 00:30:12 -0700 Subject: [PATCH 614/908] Defer source path remap tilde expansion until source file use When reading source path remappings out of a dSYM, lldb currently does tilde expansion -- expanding the tilde-username and checking that the destination pathname exists, for each dSYM with the path remappings. This cost happens during lldb's initial process launch / load, an especially perf-sensitive time. Inside Apple, we have dSYMs with source path remappings pointing to NFS directories where these extra stats for every dSYM can be very expensive if the network is slow. This patch instead keeps the source path mapping in the original tilde-username terms and does the tilde expansion when we need to read a specific source file from one of the modules. We'll be stat'ing all of those inodes to load the source file anyway, so the fact that we do the tilde expansion on every source file we load, it doesn't cost us significantly. Differential Revision: https://reviews.llvm.org/D126435 rdar://77091379 (cherry picked from commit c274b6e5830ea88d3f55d6dc1d2b99e38cf6595e) --- lldb/source/Core/SourceManager.cpp | 17 ++++++++++++++--- .../SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp | 12 ------------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index 8599c0a6a2284b..d4e1791aa3e46a 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -49,6 +49,13 @@ using namespace lldb_private; static inline bool is_newline_char(char ch) { return ch == '\n' || ch == '\r'; } +static void resolve_tilde(FileSpec &file_spec) { + if (!FileSystem::Instance().Exists(file_spec) && + file_spec.GetDirectory().GetCString()[0] == '~') { + FileSystem::Instance().Resolve(file_spec); + } +} + // SourceManager constructor SourceManager::SourceManager(const TargetSP &target_sp) : m_last_line(0), m_last_count(0), m_default_set(false), @@ -66,10 +73,13 @@ SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { if (!file_spec) return nullptr; + FileSpec resolved_fspec = file_spec; + resolve_tilde(resolved_fspec); + DebuggerSP debugger_sp(m_debugger_wp.lock()); FileSP file_sp; if (debugger_sp && debugger_sp->GetUseSourceCache()) - file_sp = debugger_sp->GetSourceFileCache().FindSourceFile(file_spec); + file_sp = debugger_sp->GetSourceFileCache().FindSourceFile(resolved_fspec); TargetSP target_sp(m_target_wp.lock()); @@ -87,9 +97,9 @@ SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { // If file_sp is no good or it points to a non-existent file, reset it. if (!file_sp || !FileSystem::Instance().Exists(file_sp->GetFileSpec())) { if (target_sp) - file_sp = std::make_shared(file_spec, target_sp.get()); + file_sp = std::make_shared(resolved_fspec, target_sp.get()); else - file_sp = std::make_shared(file_spec, debugger_sp); + file_sp = std::make_shared(resolved_fspec, debugger_sp); if (debugger_sp && debugger_sp->GetUseSourceCache()) debugger_sp->GetSourceFileCache().AddSourceFile(file_sp); @@ -441,6 +451,7 @@ void SourceManager::File::CommonInitializer(const FileSpec &file_spec, } } } + resolve_tilde(m_file_spec); // Try remapping if m_file_spec does not correspond to an existing file. if (!FileSystem::Instance().Exists(m_file_spec)) { // Check target specific source remappings (i.e., the diff --git a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp index b99ade50d857bd..ea1aa7e0587104 100644 --- a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp +++ b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp @@ -237,13 +237,6 @@ SymbolVendorMacOSX::CreateInstance(const lldb::ModuleSP &module_sp, !original_DBGSourcePath_value.empty()) { DBGSourcePath = original_DBGSourcePath_value; } - if (DBGSourcePath[0] == '~') { - FileSpec resolved_source_path( - DBGSourcePath.c_str()); - FileSystem::Instance().Resolve( - resolved_source_path); - DBGSourcePath = resolved_source_path.GetPath(); - } module_sp->GetSourceMappingList().Append( key.GetStringRef(), DBGSourcePath, true); // With version 2 of DBGSourcePathRemapping, we @@ -275,11 +268,6 @@ SymbolVendorMacOSX::CreateInstance(const lldb::ModuleSP &module_sp, DBGBuildSourcePath); plist.GetValueAsString("DBGSourcePath", DBGSourcePath); if (!DBGBuildSourcePath.empty() && !DBGSourcePath.empty()) { - if (DBGSourcePath[0] == '~') { - FileSpec resolved_source_path(DBGSourcePath.c_str()); - FileSystem::Instance().Resolve(resolved_source_path); - DBGSourcePath = resolved_source_path.GetPath(); - } module_sp->GetSourceMappingList().Append( DBGBuildSourcePath, DBGSourcePath, true); } From bd67468645c08a96aec7839c30385956edd88021 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 26 May 2022 11:11:43 -0700 Subject: [PATCH 615/908] Check that a FileSpec has a Directory component before using A follow on to my patch for https://reviews.llvm.org/D126435 hit by an x86_64 linux bot; I assumed that a FileSpec had a directory component and checked if the first character was a '~'. This was not a valid assumption. --- lldb/source/Core/SourceManager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index d4e1791aa3e46a..0e2157f377e0a6 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -51,6 +51,7 @@ static inline bool is_newline_char(char ch) { return ch == '\n' || ch == '\r'; } static void resolve_tilde(FileSpec &file_spec) { if (!FileSystem::Instance().Exists(file_spec) && + file_spec.GetDirectory() && file_spec.GetDirectory().GetCString()[0] == '~') { FileSystem::Instance().Resolve(file_spec); } From ad73ce318ef901545fa82d57a609c71af787e1e1 Mon Sep 17 00:00:00 2001 From: Zongwei Lan Date: Thu, 26 May 2022 11:22:41 -0700 Subject: [PATCH 616/908] [Target] use getSubtarget<> instead of static_cast<>(getSubtarget()) Differential Revision: https://reviews.llvm.org/D125391 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 3 ++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 2 +- .../Target/AArch64/AArch64FalkorHWPFFix.cpp | 2 +- llvm/lib/Target/AArch64/AArch64FastISel.cpp | 3 +- .../Target/AArch64/AArch64ISelLowering.cpp | 28 +++++++------------ .../AArch64/AArch64LoadStoreOptimizer.cpp | 2 +- .../GISel/AArch64InstructionSelector.cpp | 3 +- llvm/lib/Target/ARM/ARMBlockPlacement.cpp | 2 +- llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 2 +- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 2 +- llvm/lib/Target/ARM/ARMFastISel.cpp | 5 ++-- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 5 ++-- llvm/lib/Target/ARM/ARMISelLowering.cpp | 8 ++---- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 4 +-- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 2 +- .../ARM/MVETPAndVPTOptimisationsPass.cpp | 3 +- llvm/lib/Target/ARM/MVEVPTBlockPass.cpp | 3 +- llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp | 3 +- llvm/lib/Target/ARM/Thumb2SizeReduction.cpp | 2 +- .../Target/CSKY/CSKYConstantIslandPass.cpp | 2 +- .../Target/Hexagon/HexagonISelDAGToDAGHVX.cpp | 2 +- .../Target/Hexagon/HexagonISelLowering.cpp | 7 ++--- llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 2 +- .../lib/Target/M68k/M68kCollapseMOVEMPass.cpp | 2 +- llvm/lib/Target/M68k/M68kExpandPseudo.cpp | 2 +- llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp | 2 +- .../Target/Mips/MicroMipsSizeReduction.cpp | 2 +- llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 2 +- llvm/lib/Target/Mips/MipsBranchExpansion.cpp | 2 +- llvm/lib/Target/Mips/MipsCallLowering.cpp | 3 +- .../Target/Mips/MipsConstantIslandPass.cpp | 2 +- llvm/lib/Target/Mips/MipsExpandPseudo.cpp | 2 +- llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp | 2 +- llvm/lib/Target/Mips/MipsLegalizerInfo.cpp | 3 +- llvm/lib/Target/Mips/MipsMachineFunction.cpp | 2 +- llvm/lib/Target/Mips/MipsOptimizePICCall.cpp | 2 +- .../Target/Mips/MipsPreLegalizerCombiner.cpp | 3 +- llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp | 5 ++-- llvm/lib/Target/Mips/MipsSEFrameLowering.cpp | 2 +- llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 2 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 9 ++---- llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 3 +- llvm/lib/Target/X86/X86CallingConv.cpp | 2 +- llvm/lib/Target/X86/X86ExpandPseudo.cpp | 2 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++-- 46 files changed, 68 insertions(+), 93 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 6d5f6c8b50c3e0..82eda0a9177830 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -452,6 +452,9 @@ class SelectionDAG { const DataLayout &getDataLayout() const { return MF->getDataLayout(); } const TargetMachine &getTarget() const { return TM; } const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); } + template const STC &getSubtarget() const { + return MF->getSubtarget(); + } const TargetLowering &getTargetLoweringInfo() const { return *TLI; } const TargetLibraryInfo &getLibInfo() const { return *LibInfo; } const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; } diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index eadc792be3d08e..58e3cfd1d00a1d 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -132,7 +132,7 @@ class AArch64AsmPrinter : public AsmPrinter { bool runOnMachineFunction(MachineFunction &MF) override { AArch64FI = MF.getInfo(); - STI = static_cast(&MF.getSubtarget()); + STI = &MF.getSubtarget(); SetupMachineFunction(MF); diff --git a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index 793663ef97d7e4..6de37412546695 100644 --- a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -813,7 +813,7 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) { } bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) { - auto &ST = static_cast(Fn.getSubtarget()); + auto &ST = Fn.getSubtarget(); if (ST.getProcFamily() != AArch64Subtarget::Falkor) return false; diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index dc5e6807945d72..49fffa01a974d6 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -283,8 +283,7 @@ class AArch64FastISel final : public FastISel { explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { - Subtarget = - &static_cast(FuncInfo.MF->getSubtarget()); + Subtarget = &FuncInfo.MF->getSubtarget(); Context = &FuncInfo.Fn->getContext(); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3aaadc767dbdc8..e31a58da083137 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2566,8 +2566,7 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, EVT VT = LHS.getValueType(); assert(VT != MVT::f128); - const bool FullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); if (VT == MVT::f16 && !FullFP16) { LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other}, @@ -2585,8 +2584,7 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG) { EVT VT = LHS.getValueType(); - const bool FullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); if (VT.isFloatingPoint()) { assert(VT != MVT::f128); @@ -2694,8 +2692,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG) { unsigned Opcode = 0; - const bool FullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); if (LHS.getValueType().isFloatingPoint()) { assert(LHS.getValueType() != MVT::f128); @@ -11865,8 +11862,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); } - const bool FullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); // Make v4f16 (only) fcmp operations utilise vector instructions // v8f16 support will be a litle more complicated @@ -12000,7 +11996,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const { - auto &Subtarget = static_cast(DAG.getSubtarget()); + auto &Subtarget = DAG.getSubtarget(); if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) return SDValue(); @@ -12017,7 +12013,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op, SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const { - auto &Subtarget = static_cast(DAG.getSubtarget()); + auto &Subtarget = DAG.getSubtarget(); if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) return SDValue(); @@ -14797,8 +14793,7 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, ConstantSDNode *ConstantN1 = dyn_cast(N1); EVT VT = N->getValueType(0); - const bool FullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); bool IsStrict = N0->isStrictFPOpcode(); // Rewrite for pairwise fadd pattern @@ -16009,8 +16004,7 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) { // If we're compiling for a specific vector-length, we can check if the // pattern's VL equals that of the scalable vector at runtime. if (N.getOpcode() == AArch64ISD::PTRUE) { - const auto &Subtarget = - static_cast(DAG.getSubtarget()); + const auto &Subtarget = DAG.getSubtarget(); unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits(); unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits(); if (MaxSVESize && MinSVESize == MaxSVESize) { @@ -17231,8 +17225,7 @@ static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N, Stride > std::numeric_limits::max()) return Changed; - const auto &Subtarget = - static_cast(DAG.getSubtarget()); + const auto &Subtarget = DAG.getSubtarget(); unsigned MaxVScale = Subtarget.getMaxSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock; int64_t LastElementOffset = @@ -20219,8 +20212,7 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use // AArch64SVEPredPattern::all, which can enable the use of unpredicated // variants of instructions when available. - const auto &Subtarget = - static_cast(DAG.getSubtarget()); + const auto &Subtarget = DAG.getSubtarget(); unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits(); unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits(); if (MaxSVESize && MinSVESize == MaxSVESize && diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 02795f8310fc22..502b6321d5bb0f 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -2308,7 +2308,7 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; - Subtarget = &static_cast(Fn.getSubtarget()); + Subtarget = &Fn.getSubtarget(); TII = static_cast(Subtarget->getInstrInfo()); TRI = Subtarget->getRegisterInfo(); AA = &getAnalysis().getAAResults(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 156c3a8c06e91b..539a1b85a4e375 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2335,8 +2335,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const AArch64Subtarget *Subtarget = - &static_cast(MF.getSubtarget()); + const AArch64Subtarget *Subtarget = &MF.getSubtarget(); if (Subtarget->requiresStrictAlign()) { // We don't support this feature yet. LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n"); diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp index e05d88218ba90e..b2d291bbe7ffde 100644 --- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp +++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -213,7 +213,7 @@ bool ARMBlockPlacement::processPostOrderLoops(MachineLoop *ML) { bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - const ARMSubtarget &ST = static_cast(MF.getSubtarget()); + const ARMSubtarget &ST = MF.getSubtarget(); if (!ST.hasLOB()) return false; LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n"); diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index a2a4f1f3bdfdeb..d77c3afd05e574 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -396,7 +396,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlign().value() << " bytes *****\n"); - STI = &static_cast(MF->getSubtarget()); + STI = &MF->getSubtarget(); TII = STI->getInstrInfo(); isPositionIndependentOrROPI = STI->getTargetLowering()->isPositionIndependent() || STI->isROPI(); diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2f083561bbd405..61fe27095e4f60 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -3132,7 +3132,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); AFI = MF.getInfo(); diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 5d94b99d4c5d95..1f9a4ad687f146 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -122,8 +122,7 @@ class ARMFastISel final : public FastISel { explicit ARMFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo), - Subtarget( - &static_cast(funcInfo.MF->getSubtarget())), + Subtarget(&funcInfo.MF->getSubtarget()), M(const_cast(*funcInfo.Fn->getParent())), TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) { @@ -156,7 +155,7 @@ class ARMFastISel final : public FastISel { const LoadInst *LI) override; bool fastLowerArguments() override; - #include "ARMGenFastISel.inc" +#include "ARMGenFastISel.inc" // Instruction selection routines. diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index b2765b2370ff74..f3d95f057e2752 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -392,8 +392,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction) { - const ARMSubtarget &AST = - static_cast(MF.getSubtarget()); + const ARMSubtarget &AST = MF.getSubtarget(); const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); const unsigned AlignMask = Alignment.value() - 1U; const unsigned NrBitsToZero = Log2(Alignment); @@ -1768,7 +1767,7 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { return; // We are planning to use NEON instructions vst1 / vld1. - if (!static_cast(MF.getSubtarget()).hasNEON()) + if (!MF.getSubtarget().hasNEON()) return; // Don't bother if the default stack alignment is sufficiently high. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index d3e1b92c3b957b..eb1a0c55ca9b7c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5807,8 +5807,7 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return DAG.UnrollVectorOp(Op.getNode()); } - const bool HasFullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool HasFullFP16 = DAG.getSubtarget().hasFullFP16(); EVT NewTy; const EVT OpTy = Op.getOperand(0).getValueType(); @@ -5918,8 +5917,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"); - const bool HasFullFP16 = - static_cast(DAG.getSubtarget()).hasFullFP16(); + const bool HasFullFP16 = DAG.getSubtarget().hasFullFP16(); EVT DestVecType; if (VT == MVT::v4f32) @@ -9882,7 +9880,7 @@ ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, if (N->getOpcode() != ISD::SDIV) return SDValue(); - const auto &ST = static_cast(DAG.getSubtarget()); + const auto &ST = DAG.getSubtarget(); const bool MinSize = ST.hasMinSize(); const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode() : ST.hasDivideInARMMode(); diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index fff286b25413bd..fb9d6863aca747 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2110,7 +2110,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return false; MF = &Fn; - STI = &static_cast(Fn.getSubtarget()); + STI = &Fn.getSubtarget(); TL = STI->getTargetLowering(); AFI = Fn.getInfo(); TII = STI->getInstrInfo(); @@ -2201,7 +2201,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return false; TD = &Fn.getDataLayout(); - STI = &static_cast(Fn.getSubtarget()); + STI = &Fn.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); MRI = &Fn.getRegInfo(); diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 83ef6554ceb2d8..aa739db44da2ca 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1299,7 +1299,7 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) { } bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { - const ARMSubtarget &ST = static_cast(mf.getSubtarget()); + const ARMSubtarget &ST = mf.getSubtarget(); if (!ST.hasLOB()) return false; diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp index 92c324d58c254a..6bad9d61238e76 100644 --- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp +++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp @@ -1052,8 +1052,7 @@ bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) { } bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast(Fn.getSubtarget()); + const ARMSubtarget &STI = Fn.getSubtarget(); if (!STI.isThumb2() || !STI.hasLOB()) return false; diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index c7f451cba14ffb..d6d43b9143d612 100644 --- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -312,8 +312,7 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { } bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast(Fn.getSubtarget()); + const ARMSubtarget &STI = Fn.getSubtarget(); if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) return false; diff --git a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index c979a26303abbe..155555152ced13 100644 --- a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -284,8 +284,7 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) { } bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast(Fn.getSubtarget()); + const ARMSubtarget &STI = Fn.getSubtarget(); if (!STI.isThumb2()) return false; AFI = Fn.getInfo(); diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 1cc5422523f172..cdbcc9527e1568 100644 --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -1130,7 +1130,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { if (PredicateFtor && !PredicateFtor(MF.getFunction())) return false; - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); if (STI->isThumb1Only() || STI->prefers32BitThumb()) return false; diff --git a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp index 0aad16c15353d4..5d7241258543b0 100644 --- a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp +++ b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp @@ -287,7 +287,7 @@ LLVM_DUMP_METHOD void CSKYConstantIslands::dumpBBs() { bool CSKYConstantIslands::runOnMachineFunction(MachineFunction &Mf) { MF = &Mf; MCP = Mf.getConstantPool(); - STI = &static_cast(Mf.getSubtarget()); + STI = &Mf.getSubtarget(); LLVM_DEBUG(dbgs() << "***** CSKYConstantIslands: " << MCP->getConstants().size() << " CP entries, aligned to " diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 0a6dd727eb82d3..0848d30e7403e9 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -801,7 +801,7 @@ static const HexagonTargetLowering &getHexagonLowering(SelectionDAG &G) { return static_cast(G.getTargetLoweringInfo()); } static const HexagonSubtarget &getHexagonSubtarget(SelectionDAG &G) { - return static_cast(G.getSubtarget()); + return G.getSubtarget(); } namespace llvm { diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 0cbda5786897e4..11dd8e2c2b674a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1396,10 +1396,9 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag); InFlag = Chain.getValue(1); - unsigned Flags = - static_cast(DAG.getSubtarget()).useLongCalls() - ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended - : HexagonII::MO_GDPLT; + unsigned Flags = DAG.getSubtarget().useLongCalls() + ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended + : HexagonII::MO_GDPLT; return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT, Hexagon::R0, Flags); diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 6769548d6cbc28..6fb1313667a956 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -37,7 +37,7 @@ def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; def HwLen2: SDNodeXForm(CurDAG->getSubtarget()); + const auto &ST = CurDAG->getSubtarget(); return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32); }]>; diff --git a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp index 7f0c0dd92dbb7b..cbd69f24666eed 100644 --- a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp +++ b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp @@ -231,7 +231,7 @@ class M68kCollapseMOVEM : public MachineFunctionPass { } bool runOnMachineFunction(MachineFunction &MF) override { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); MFI = MF.getInfo(); diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp index acfa30f28c2bb0..51a148f5aa04ad 100644 --- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp +++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp @@ -302,7 +302,7 @@ bool M68kExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } bool M68kExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); MFI = MF.getInfo(); diff --git a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp index 9ef97b96ea9a93..b75fbc3c7f2202 100644 --- a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp +++ b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp @@ -312,7 +312,7 @@ class M68kDAGToDAGISel : public SelectionDAGISel { } // namespace bool M68kDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } diff --git a/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp b/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp index 55d3c59cbf0348..b0de8dacf6913a 100644 --- a/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp +++ b/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp @@ -774,7 +774,7 @@ bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI, bool MicroMipsSizeReduce::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); // TODO: Add support for the subtarget microMIPS32R6. if (!Subtarget->inMicroMipsMode() || !Subtarget->hasMips32r2() || diff --git a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 50147c019bfd51..ce04124a7b00bc 100644 --- a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -35,7 +35,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-isel" bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); if (!Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); diff --git a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp index 797547f9986177..a4fa0792a998d3 100644 --- a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp +++ b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp @@ -879,7 +879,7 @@ bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); IsPIC = TM.isPositionIndependent(); ABI = static_cast(TM).getABI(); - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = static_cast(STI->getInstrInfo()); if (IsPIC && ABI.IsO32() && diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp index 409c24d86e650a..3c1c2bcd7a1b9f 100644 --- a/llvm/lib/Target/Mips/MipsCallLowering.cpp +++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp @@ -541,8 +541,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, } MIRBuilder.insertInstr(MIB); if (MIB->getOpcode() == Mips::JALRPseudo) { - const MipsSubtarget &STI = - static_cast(MIRBuilder.getMF().getSubtarget()); + const MipsSubtarget &STI = MIRBuilder.getMF().getSubtarget(); MIB.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), *STI.getRegBankInfo()); } diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index 73691e0db3d41c..0341af0caac46e 100644 --- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -436,7 +436,7 @@ bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) { // FIXME: MF = &mf; MCP = mf.getConstantPool(); - STI = &static_cast(mf.getSubtarget()); + STI = &mf.getSubtarget(); LLVM_DEBUG(dbgs() << "constant island machine function " << "\n"); if (!STI->inMips16Mode() || !MipsSubtarget::useConstantIslands()) { diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp index 31180d5a23ef05..d242083f958b86 100644 --- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp +++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp @@ -892,7 +892,7 @@ bool MipsExpandPseudo::expandMBB(MachineBasicBlock &MBB) { } bool MipsExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); bool Modified = false; diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index d88696525e9e51..c4bb3d90b4d547 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -54,7 +54,7 @@ void MipsDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { } bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); bool Ret = SelectionDAGISel::runOnMachineFunction(MF); processFunctionAfterISel(MF); diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index 98f98b5463c44c..35b0fe218d8f41 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -503,8 +503,7 @@ static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; - const MipsSubtarget &ST = - static_cast(MI.getMF()->getSubtarget()); + const MipsSubtarget &ST = MI.getMF()->getSubtarget(); const MipsInstrInfo &TII = *ST.getInstrInfo(); const MipsRegisterInfo &TRI = *ST.getRegisterInfo(); const RegisterBankInfo &RBI = *ST.getRegBankInfo(); diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/llvm/lib/Target/Mips/MipsMachineFunction.cpp index 411a26e42713e1..ba1716a7c0b3ef 100644 --- a/llvm/lib/Target/Mips/MipsMachineFunction.cpp +++ b/llvm/lib/Target/Mips/MipsMachineFunction.cpp @@ -29,7 +29,7 @@ bool MipsFunctionInfo::globalBaseRegSet() const { } static const TargetRegisterClass &getGlobalBaseRegClass(MachineFunction &MF) { - auto &STI = static_cast(MF.getSubtarget()); + auto &STI = MF.getSubtarget(); auto &TM = static_cast(MF.getTarget()); if (STI.inMips16Mode()) diff --git a/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp index 1b5c46c1edba0b..204c42ae5e5f50 100644 --- a/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -194,7 +194,7 @@ void MBBInfo::postVisit() { // OptimizePICCall methods. bool OptimizePICCall::runOnMachineFunction(MachineFunction &F) { - if (static_cast(F.getSubtarget()).inMips16Mode()) + if (F.getSubtarget().inMips16Mode()) return false; // Do a pre-order traversal of the dominator tree. diff --git a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp index e2074e24c44d8a..cb6d53ec0a1243 100644 --- a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp @@ -51,8 +51,7 @@ bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, // Don't attempt to combine non power of 2 loads or unaligned loads when // subtarget doesn't support them. auto MMO = *MI.memoperands_begin(); - const MipsSubtarget &STI = - static_cast(MI.getMF()->getSubtarget()); + const MipsSubtarget &STI = MI.getMF()->getSubtarget(); if (!isPowerOf2_64(MMO->getSize())) return false; bool isUnaligned = MMO->getAlign() < MMO->getSize(); diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp index 2cb59e6960313d..2544d9d9b76d9e 100644 --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -153,8 +153,7 @@ static bool isGprbTwoInstrUnalignedLoadOrStore(const MachineInstr *MI) { if (MI->getOpcode() == TargetOpcode::G_LOAD || MI->getOpcode() == TargetOpcode::G_STORE) { auto MMO = *MI->memoperands_begin(); - const MipsSubtarget &STI = - static_cast(MI->getMF()->getSubtarget()); + const MipsSubtarget &STI = MI->getMF()->getSubtarget(); if (MMO->getSize() == 4 && (!STI.systemSupportsUnalignedAccess() && MMO->getAlign() < MMO->getSize())) return true; @@ -398,7 +397,7 @@ void MipsRegisterBankInfo::TypeInfoForMF::cleanupIfNewFunction( static const MipsRegisterBankInfo::ValueMapping * getMSAMapping(const MachineFunction &MF) { - assert(static_cast(MF.getSubtarget()).hasMSA() && + assert(MF.getSubtarget().hasMSA() && "MSA mapping not available on target without MSA."); return &Mips::ValueMappings[Mips::MSAIdx]; } diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index 7ee2ddf3605ff1..7729d9cf92dae6 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -97,7 +97,7 @@ class ExpandPseudo { ExpandPseudo::ExpandPseudo(MachineFunction &MF_) : MF(MF_), MRI(MF.getRegInfo()), - Subtarget(static_cast(MF.getSubtarget())), + Subtarget(MF.getSubtarget()), TII(*static_cast(Subtarget.getInstrInfo())), RegInfo(*Subtarget.getRegisterInfo()) {} diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 9a3a851b4a8b5a..1124111c1a6edb 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -38,7 +38,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-isel" bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); if (Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index dd4290a605a9cc..dfa84bc735e2c9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -42,7 +42,7 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, } bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &static_cast(MF.getSubtarget()); + Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f9da62a27af585..6a31bd763ccd9d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1883,8 +1883,7 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - const PPCSubtarget& Subtarget = - static_cast(DAG.getSubtarget()); + const PPCSubtarget &Subtarget = DAG.getSubtarget(); if (!Subtarget.hasP8Vector()) return false; @@ -6922,8 +6921,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( if (useSoftFloat()) report_fatal_error("Soft float support is unimplemented on AIX."); - const PPCSubtarget &Subtarget = - static_cast(DAG.getSubtarget()); + const PPCSubtarget &Subtarget = DAG.getSubtarget(); const bool IsPPC64 = Subtarget.isPPC64(); const unsigned PtrByteSize = IsPPC64 ? 8 : 4; @@ -7228,8 +7226,7 @@ SDValue PPCTargetLowering::LowerCall_AIX( if (CFlags.IsPatchPoint) report_fatal_error("This call type is unimplemented on AIX."); - const PPCSubtarget& Subtarget = - static_cast(DAG.getSubtarget()); + const PPCSubtarget &Subtarget = DAG.getSubtarget(); MachineFunction &MF = DAG.getMachineFunction(); SmallVector ArgLocs; diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index e34f522ab5505f..123cff7f0f1591 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -1052,8 +1052,7 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) { } void SystemZAsmPrinter::emitFunctionEntryLabel() { - const SystemZSubtarget &Subtarget = - static_cast(MF->getSubtarget()); + const SystemZSubtarget &Subtarget = MF->getSubtarget(); if (Subtarget.getTargetTriple().isOSzOS()) { MCContext &OutContext = OutStreamer->getContext(); diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp index c80a5d5bb332b0..ded93fdc011cfa 100644 --- a/llvm/lib/Target/X86/X86CallingConv.cpp +++ b/llvm/lib/Target/X86/X86CallingConv.cpp @@ -299,7 +299,7 @@ static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State) { const MachineFunction &MF = State.getMachineFunction(); size_t ArgCount = State.getMachineFunction().getFunction().arg_size(); - bool Is64Bit = static_cast(MF.getSubtarget()).is64Bit(); + bool Is64Bit = MF.getSubtarget().is64Bit(); unsigned SlotSize = Is64Bit ? 8 : 4; unsigned Offset; if (ArgCount == 1 && ValNo == 0) { diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index d98c2a4112422b..aebeec5a6d274a 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -730,7 +730,7 @@ bool X86ExpandPseudo::ExpandPseudosWhichAffectControlFlow(MachineFunction &MF) { } bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - STI = &static_cast(MF.getSubtarget()); + STI = &MF.getSubtarget(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); X86FI = MF.getInfo(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 92f4e012ef8ffa..6b38e14704c74d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26078,8 +26078,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, // Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after // prologue to RBP in the parent function. - const X86Subtarget &Subtarget = - static_cast(DAG.getSubtarget()); + const X86Subtarget &Subtarget = DAG.getSubtarget(); if (Subtarget.is64Bit()) return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset); @@ -39122,8 +39121,7 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, SDValue N0 = V.getOperand(0); SDValue N1 = V.getOperand(1); unsigned Imm = V.getConstantOperandVal(2); - const X86Subtarget &Subtarget = - static_cast(DAG.getSubtarget()); + const X86Subtarget &Subtarget = DAG.getSubtarget(); if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) || X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget)) return SDValue(); From ea6d0c4b2a1ec9c12aa5513ddf574f3146cf9025 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 26 May 2022 11:24:13 -0700 Subject: [PATCH 617/908] One further tweak for realpathing filepath to match dyld I missed one place I need to realpath the build artifact path, to make it match the path we get back from dyld. --- lldb/test/API/commands/platform/sdk/TestPlatformSDK.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py b/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py index 9178e3cdac4f73..06c780dc6a5e3f 100644 --- a/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py +++ b/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py @@ -85,7 +85,7 @@ def cleanup(): lldbutil.wait_for_file_on_target(self, token) # Move the binary into the 'SDK'. - rel_exe_path = os.path.relpath(exe, '/') + rel_exe_path = os.path.relpath(os.path.realpath(exe), '/') exe_sdk_path = os.path.join(symbols_dir, rel_exe_path) lldbutil.mkdir_p(os.path.dirname(exe_sdk_path)) shutil.move(exe, exe_sdk_path) From 7c13ae6490b102a7d14d75c8f502f3e003118f49 Mon Sep 17 00:00:00 2001 From: Adrian Tong Date: Wed, 4 May 2022 23:00:42 +0000 Subject: [PATCH 618/908] Give option to use isCopyInstr to determine which MI is treated as Copy instruction in MCP. This is then used in AArch64 to remove copy instructions after taildup ran in machine block placement Differential Revision: https://reviews.llvm.org/D125335 --- llvm/include/llvm/CodeGen/Passes.h | 2 + llvm/lib/CodeGen/MachineCopyPropagation.cpp | 256 ++++++++++++------ .../Target/AArch64/AArch64TargetMachine.cpp | 10 + llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + llvm/test/CodeGen/AArch64/copyprop.ll | 40 +++ llvm/test/CodeGen/AArch64/copyprop.mir | 13 +- 6 files changed, 240 insertions(+), 82 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/copyprop.ll diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 7eb82cd1609de6..bfd774f578ac88 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -331,6 +331,8 @@ namespace llvm { /// machine instructions. extern char &MachineCopyPropagationID; + MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr); + /// PeepholeOptimizer - This pass performs peephole optimizations - /// like extension and comparison eliminations. extern char &PeepholeOptimizerID; diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 0535332a833ec5..a8909c20d7a203 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -61,6 +61,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" @@ -82,8 +83,24 @@ STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated"); DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", "Controls which register COPYs are forwarded"); +static cl::opt MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false), + cl::Hidden); + namespace { +static Optional isCopyInstr(const MachineInstr &MI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { + if (UseCopyInstr) + return TII.isCopyInstr(MI); + + if (MI.isCopy()) + return Optional( + DestSourcePair{MI.getOperand(0), MI.getOperand(1)}); + + return None; +} + class CopyTracker { struct CopyInfo { MachineInstr *MI; @@ -109,7 +126,8 @@ class CopyTracker { } /// Remove register from copy maps. - void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI) { + void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { // Since Reg might be a subreg of some registers, only invalidate Reg is not // enough. We have to find the COPY defines Reg or registers defined by Reg // and invalidate all of them. @@ -119,8 +137,13 @@ class CopyTracker { auto I = Copies.find(*RUI); if (I != Copies.end()) { if (MachineInstr *MI = I->second.MI) { - RegsToInvalidate.insert(MI->getOperand(0).getReg().asMCReg()); - RegsToInvalidate.insert(MI->getOperand(1).getReg().asMCReg()); + Optional CopyOperands = + isCopyInstr(*MI, TII, UseCopyInstr); + assert(CopyOperands && "Expect copy"); + + RegsToInvalidate.insert( + CopyOperands->Destination->getReg().asMCReg()); + RegsToInvalidate.insert(CopyOperands->Source->getReg().asMCReg()); } RegsToInvalidate.insert(I->second.DefRegs.begin(), I->second.DefRegs.end()); @@ -132,7 +155,8 @@ class CopyTracker { } /// Clobber a single register, removing it from the tracker's copy maps. - void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI) { + void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { auto I = Copies.find(*RUI); if (I != Copies.end()) { @@ -141,8 +165,12 @@ class CopyTracker { markRegsUnavailable(I->second.DefRegs, TRI); // When we clobber the destination of a copy, we need to clobber the // whole register it defined. - if (MachineInstr *MI = I->second.MI) - markRegsUnavailable({MI->getOperand(0).getReg().asMCReg()}, TRI); + if (MachineInstr *MI = I->second.MI) { + Optional CopyOperands = + isCopyInstr(*MI, TII, UseCopyInstr); + markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()}, + TRI); + } // Now we can erase the copy. Copies.erase(I); } @@ -150,11 +178,13 @@ class CopyTracker { } /// Add this copy's registers into the tracker's copy maps. - void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) { - assert(MI->isCopy() && "Tracking non-copy?"); + void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { + Optional CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr); + assert(CopyOperands && "Tracking non-copy?"); - MCRegister Def = MI->getOperand(0).getReg().asMCReg(); - MCRegister Src = MI->getOperand(1).getReg().asMCReg(); + MCRegister Src = CopyOperands->Source->getReg().asMCReg(); + MCRegister Def = CopyOperands->Destination->getReg().asMCReg(); // Remember Def is defined by the copy. for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI) @@ -197,15 +227,22 @@ class CopyTracker { } MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { MCRegUnitIterator RUI(Reg, &TRI); MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI); - if (!AvailCopy || - !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg)) + + if (!AvailCopy) + return nullptr; + + Optional CopyOperands = + isCopyInstr(*AvailCopy, TII, UseCopyInstr); + Register AvailSrc = CopyOperands->Source->getReg(); + Register AvailDef = CopyOperands->Destination->getReg(); + if (!TRI.isSubRegisterEq(AvailSrc, Reg)) return nullptr; - Register AvailSrc = AvailCopy->getOperand(1).getReg(); - Register AvailDef = AvailCopy->getOperand(0).getReg(); for (const MachineInstr &MI : make_range(AvailCopy->getReverseIterator(), I.getReverseIterator())) for (const MachineOperand &MO : MI.operands()) @@ -218,20 +255,26 @@ class CopyTracker { } MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { // We check the first RegUnit here, since we'll only be interested in the // copy if it copies the entire register anyway. MCRegUnitIterator RUI(Reg, &TRI); MachineInstr *AvailCopy = findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true); - if (!AvailCopy || - !TRI.isSubRegisterEq(AvailCopy->getOperand(0).getReg(), Reg)) + + if (!AvailCopy) + return nullptr; + + Optional CopyOperands = + isCopyInstr(*AvailCopy, TII, UseCopyInstr); + Register AvailSrc = CopyOperands->Source->getReg(); + Register AvailDef = CopyOperands->Destination->getReg(); + if (!TRI.isSubRegisterEq(AvailDef, Reg)) return nullptr; // Check that the available copy isn't clobbered by any regmasks between // itself and the destination. - Register AvailSrc = AvailCopy->getOperand(1).getReg(); - Register AvailDef = AvailCopy->getOperand(0).getReg(); for (const MachineInstr &MI : make_range(AvailCopy->getIterator(), DestCopy.getIterator())) for (const MachineOperand &MO : MI.operands()) @@ -252,10 +295,14 @@ class MachineCopyPropagation : public MachineFunctionPass { const TargetInstrInfo *TII; const MachineRegisterInfo *MRI; + // Return true if this is a copy instruction and false otherwise. + bool UseCopyInstr; + public: static char ID; // Pass identification, replacement for typeid - MachineCopyPropagation() : MachineFunctionPass(ID) { + MachineCopyPropagation(bool CopyInstr = false) + : MachineFunctionPass(ID), UseCopyInstr(CopyInstr || MCPUseCopyInstr) { initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); } @@ -333,9 +380,13 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader, /// isNopCopy("ecx = COPY eax", AX, CX) == true /// isNopCopy("ecx = COPY eax", AH, CL) == false static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src, - MCRegister Def, const TargetRegisterInfo *TRI) { - MCRegister PreviousSrc = PreviousCopy.getOperand(1).getReg().asMCReg(); - MCRegister PreviousDef = PreviousCopy.getOperand(0).getReg().asMCReg(); + MCRegister Def, const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, bool UseCopyInstr) { + + Optional CopyOperands = + isCopyInstr(PreviousCopy, *TII, UseCopyInstr); + MCRegister PreviousSrc = CopyOperands->Source->getReg().asMCReg(); + MCRegister PreviousDef = CopyOperands->Destination->getReg().asMCReg(); if (Src == PreviousSrc && Def == PreviousDef) return true; if (!TRI->isSubRegister(PreviousSrc, Src)) @@ -355,22 +406,26 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, return false; // Search for an existing copy. - MachineInstr *PrevCopy = Tracker.findAvailCopy(Copy, Def, *TRI); + MachineInstr *PrevCopy = + Tracker.findAvailCopy(Copy, Def, *TRI, *TII, UseCopyInstr); if (!PrevCopy) return false; + auto PrevCopyOperands = isCopyInstr(*PrevCopy, *TII, UseCopyInstr); // Check that the existing copy uses the correct sub registers. - if (PrevCopy->getOperand(0).isDead()) + if (PrevCopyOperands->Destination->isDead()) return false; - if (!isNopCopy(*PrevCopy, Src, Def, TRI)) + if (!isNopCopy(*PrevCopy, Src, Def, TRI, TII, UseCopyInstr)) return false; LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump()); // Copy was redundantly redefining either Src or Def. Remove earlier kill // flags between Copy and PrevCopy because the value will be reused now. - assert(Copy.isCopy()); - Register CopyDef = Copy.getOperand(0).getReg(); + Optional CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + assert(CopyOperands); + + Register CopyDef = CopyOperands->Destination->getReg(); assert(CopyDef == Src || CopyDef == Def); for (MachineInstr &MI : make_range(PrevCopy->getIterator(), Copy.getIterator())) @@ -384,7 +439,9 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy( const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - Register Def = Copy.getOperand(0).getReg(); + + Optional CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + Register Def = CopyOperands->Destination->getReg(); if (const TargetRegisterClass *URC = UseI.getRegClassConstraint(UseIdx, TII, TRI)) @@ -402,7 +459,8 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - Register CopySrcReg = Copy.getOperand(1).getReg(); + Optional CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + Register CopySrcReg = CopyOperands->Source->getReg(); // If the new register meets the opcode register constraints, then allow // forwarding. @@ -410,7 +468,8 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, UseI.getRegClassConstraint(UseIdx, TII, TRI)) return URC->contains(CopySrcReg); - if (!UseI.isCopy()) + auto UseICopyOperands = isCopyInstr(UseI, *TII, UseCopyInstr); + if (!UseICopyOperands) return false; /// COPYs don't have register class constraints, so if the user instruction @@ -433,7 +492,7 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, // Allow forwarding if src and dst belong to any common class, so long as they // don't belong to any (possibly smaller) common class that requires copies to // go via a different class. - Register UseDstReg = UseI.getOperand(0).getReg(); + Register UseDstReg = UseICopyOperands->Destination->getReg(); bool Found = false; bool IsCrossClass = false; for (const TargetRegisterClass *RC : TRI->regclasses()) { @@ -451,7 +510,7 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, return true; // The forwarded copy would be cross-class. Only do this if the original copy // was also cross-class. - Register CopyDstReg = Copy.getOperand(0).getReg(); + Register CopyDstReg = CopyOperands->Destination->getReg(); for (const TargetRegisterClass *RC : TRI->regclasses()) { if (RC->contains(CopySrcReg) && RC->contains(CopyDstReg) && TRI->getCrossCopyRegClass(RC) != RC) @@ -523,13 +582,15 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (!MOUse.isRenamable()) continue; - MachineInstr *Copy = - Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), *TRI); + MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), + *TRI, *TII, UseCopyInstr); if (!Copy) continue; - Register CopyDstReg = Copy->getOperand(0).getReg(); - const MachineOperand &CopySrc = Copy->getOperand(1); + Optional CopyOperands = + isCopyInstr(*Copy, *TII, UseCopyInstr); + Register CopyDstReg = CopyOperands->Destination->getReg(); + const MachineOperand &CopySrc = *CopyOperands->Source; Register CopySrcReg = CopySrc.getReg(); // FIXME: Don't handle partial uses of wider COPYs yet. @@ -553,7 +614,8 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { // Check that the instruction is not a copy that partially overwrites the // original copy source that we are about to use. The tracker mechanism // cannot cope with that. - if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) && + if (isCopyInstr(MI, *TII, UseCopyInstr) && + MI.modifiesRegister(CopySrcReg, TRI) && !MI.definesRegister(CopySrcReg)) { LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI); continue; @@ -592,14 +654,20 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { // Analyze copies (which don't overlap themselves). - if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(), - MI.getOperand(1).getReg())) { - assert(MI.getOperand(0).getReg().isPhysical() && - MI.getOperand(1).getReg().isPhysical() && + Optional CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + if (CopyOperands) { + + Register RegSrc = CopyOperands->Source->getReg(); + Register RegDef = CopyOperands->Destination->getReg(); + + if (TRI->regsOverlap(RegDef, RegSrc)) + continue; + + assert(RegDef.isPhysical() && RegSrc.isPhysical() && "MachineCopyPropagation should be run after register allocation!"); - MCRegister Def = MI.getOperand(0).getReg().asMCReg(); - MCRegister Src = MI.getOperand(1).getReg().asMCReg(); + MCRegister Def = RegDef.asMCReg(); + MCRegister Src = RegSrc.asMCReg(); // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. @@ -622,7 +690,8 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { forwardUses(MI); // Src may have been changed by forwardUses() - Src = MI.getOperand(1).getReg().asMCReg(); + CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + Src = CopyOperands->Source->getReg().asMCReg(); // If Src is defined by a previous copy, the previous copy cannot be // eliminated. @@ -649,17 +718,17 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // %xmm2 = copy %xmm0 // ... // %xmm2 = copy %xmm9 - Tracker.clobberRegister(Def, *TRI); + Tracker.clobberRegister(Def, *TRI, *TII, UseCopyInstr); for (const MachineOperand &MO : MI.implicit_operands()) { if (!MO.isReg() || !MO.isDef()) continue; MCRegister Reg = MO.getReg().asMCReg(); if (!Reg) continue; - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } - Tracker.trackCopy(&MI, *TRI); + Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr); continue; } @@ -673,7 +742,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // later. if (MO.isTied()) ReadRegister(Reg, MI, RegularUse); - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } forwardUses(MI); @@ -709,7 +778,9 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDeadCopies.begin(); DI != MaybeDeadCopies.end();) { MachineInstr *MaybeDead = *DI; - MCRegister Reg = MaybeDead->getOperand(0).getReg().asMCReg(); + Optional CopyOperands = + isCopyInstr(*MaybeDead, *TII, UseCopyInstr); + MCRegister Reg = CopyOperands->Destination->getReg().asMCReg(); assert(!MRI->isReserved(Reg)); if (!RegMask->clobbersPhysReg(Reg)) { @@ -722,7 +793,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Make sure we invalidate any entries in the copy maps before erasing // the instruction. - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); // erase() will return the next valid iterator pointing to the next // element after the erased one. @@ -735,7 +806,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Any previous copy definition or reading the Defs is no longer available. for (MCRegister Reg : Defs) - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } // If MBB doesn't have successors, delete the copies whose defs are not used. @@ -745,12 +816,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineInstr *MaybeDead : MaybeDeadCopies) { LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; MaybeDead->dump()); - assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); + + Optional CopyOperands = + isCopyInstr(*MaybeDead, *TII, UseCopyInstr); + assert(CopyOperands); + + Register SrcReg = CopyOperands->Source->getReg(); + Register DestReg = CopyOperands->Destination->getReg(); + assert(!MRI->isReserved(DestReg)); // Update matching debug values, if any. - assert(MaybeDead->isCopy()); - Register SrcReg = MaybeDead->getOperand(1).getReg(); - Register DestReg = MaybeDead->getOperand(0).getReg(); SmallVector MaybeDeadDbgUsers( CopyDbgUsers[MaybeDead].begin(), CopyDbgUsers[MaybeDead].end()); MRI->updateDbgUsersToReg(DestReg.asMCReg(), SrcReg.asMCReg(), @@ -768,10 +843,14 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { } static bool isBackwardPropagatableCopy(MachineInstr &MI, - const MachineRegisterInfo &MRI) { - assert(MI.isCopy() && "MI is expected to be a COPY"); - Register Def = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); + const MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { + Optional CopyOperands = isCopyInstr(MI, TII, UseCopyInstr); + assert(CopyOperands && "MI is expected to be a COPY"); + + Register Def = CopyOperands->Destination->getReg(); + Register Src = CopyOperands->Source->getReg(); if (!Def || !Src) return false; @@ -779,7 +858,7 @@ static bool isBackwardPropagatableCopy(MachineInstr &MI, if (MRI.isReserved(Def) || MRI.isReserved(Src)) return false; - return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill(); + return CopyOperands->Source->isRenamable() && CopyOperands->Source->isKill(); } void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { @@ -804,13 +883,15 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { if (!MODef.isRenamable()) continue; - MachineInstr *Copy = - Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI); + MachineInstr *Copy = Tracker.findAvailBackwardCopy( + MI, MODef.getReg().asMCReg(), *TRI, *TII, UseCopyInstr); if (!Copy) continue; - Register Def = Copy->getOperand(0).getReg(); - Register Src = Copy->getOperand(1).getReg(); + Optional CopyOperands = + isCopyInstr(*Copy, *TII, UseCopyInstr); + Register Def = CopyOperands->Destination->getReg(); + Register Src = CopyOperands->Source->getReg(); if (MODef.getReg() != Src) continue; @@ -829,7 +910,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { << MI << " from " << *Copy); MODef.setReg(Def); - MODef.setIsRenamable(Copy->getOperand(0).isRenamable()); + MODef.setIsRenamable(CopyOperands->Destination->isRenamable()); LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); MaybeDeadCopies.insert(Copy); @@ -845,19 +926,23 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) { // Ignore non-trivial COPYs. - if (MI.isCopy() && MI.getNumOperands() == 2 && - !TRI->regsOverlap(MI.getOperand(0).getReg(), - MI.getOperand(1).getReg())) { + Optional CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + if (CopyOperands) { + Register DefReg = CopyOperands->Destination->getReg(); + Register SrcReg = CopyOperands->Source->getReg(); - MCRegister Def = MI.getOperand(0).getReg().asMCReg(); - MCRegister Src = MI.getOperand(1).getReg().asMCReg(); + if (TRI->regsOverlap(DefReg, SrcReg)) + continue; + + MCRegister Def = DefReg.asMCReg(); + MCRegister Src = SrcReg.asMCReg(); // Unlike forward cp, we don't invoke propagateDefs here, // just let forward cp do COPY-to-COPY propagation. - if (isBackwardPropagatableCopy(MI, *MRI)) { - Tracker.invalidateRegister(Src, *TRI); - Tracker.invalidateRegister(Def, *TRI); - Tracker.trackCopy(&MI, *TRI); + if (isBackwardPropagatableCopy(MI, *MRI, *TII, UseCopyInstr)) { + Tracker.invalidateRegister(Src, *TRI, *TII, UseCopyInstr); + Tracker.invalidateRegister(Def, *TRI, *TII, UseCopyInstr); + Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr); continue; } } @@ -868,7 +953,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( MCRegister Reg = MO.getReg().asMCReg(); if (!Reg) continue; - Tracker.invalidateRegister(Reg, *TRI); + Tracker.invalidateRegister(Reg, *TRI, *TII, UseCopyInstr); } propagateDefs(MI); @@ -880,7 +965,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( continue; if (MO.isDef()) - Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII, + UseCopyInstr); if (MO.readsReg()) { if (MO.isDebug()) { @@ -894,7 +980,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( } } } else { - Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII, + UseCopyInstr); } } } @@ -902,8 +989,10 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( for (auto *Copy : MaybeDeadCopies) { - Register Src = Copy->getOperand(1).getReg(); - Register Def = Copy->getOperand(0).getReg(); + Optional CopyOperands = + isCopyInstr(*Copy, *TII, UseCopyInstr); + Register Src = CopyOperands->Source->getReg(); + Register Def = CopyOperands->Destination->getReg(); SmallVector MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(), CopyDbgUsers[Copy].end()); @@ -934,3 +1023,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +MachineFunctionPass * +llvm::createMachineCopyPropagationPass(bool UseCopyInstr = false) { + return new MachineCopyPropagation(UseCopyInstr); +} diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index bcfc3508055472..3c205fd5c319b9 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -62,6 +63,11 @@ static cl::opt cl::desc("Enable the conditional branch tuning pass"), cl::init(true), cl::Hidden); +static cl::opt EnableAArch64CopyPropagation( + "aarch64-enable-copy-propagation", + cl::desc("Enable the copy propagation with AArch64 copy instr"), + cl::init(true), cl::Hidden); + static cl::opt EnableMCR("aarch64-enable-mcr", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); @@ -772,6 +778,10 @@ void AArch64PassConfig::addPreEmitPass() { if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt) addPass(createAArch64LoadStoreOptimizationPass()); + if (TM->getOptLevel() >= CodeGenOpt::Aggressive && + EnableAArch64CopyPropagation) + addPass(createMachineCopyPropagationPass(true)); + addPass(createAArch64A53Fix835769()); if (EnableBranchTargets) diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 2f205f9ab37356..708c8bef290a1b 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -200,6 +200,7 @@ ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: AArch64 load / store optimization pass +; CHECK-NEXT: Machine Copy Propagation Pass ; CHECK-NEXT: Workaround A53 erratum 835769 pass ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Branch relaxation pass diff --git a/llvm/test/CodeGen/AArch64/copyprop.ll b/llvm/test/CodeGen/AArch64/copyprop.ll new file mode 100644 index 00000000000000..a605f23baba210 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/copyprop.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O3 -mtriple=aarch64-- | FileCheck %s + +define void @copyprop_after_mbp(i32 %v, i32* %a, i32* %b, i32* %c, i32* %d) { +; CHECK-LABEL: copyprop_after_mbp: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #10 +; CHECK-NEXT: b.ne .LBB0_2 +; CHECK-NEXT: // %bb.1: // %bb.0 +; CHECK-NEXT: mov w9, #15 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w9, [x2] +; CHECK-NEXT: mov w9, #12 +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: str w9, [x4] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %bb.1 +; CHECK-NEXT: mov w9, #25 +; CHECK-NEXT: str w9, [x3] +; CHECK-NEXT: mov w9, #12 +; CHECK-NEXT: str wzr, [x1] +; CHECK-NEXT: str w9, [x4] +; CHECK-NEXT: ret + %1 = icmp eq i32 %v, 10 + br i1 %1, label %bb.0, label %bb.1 + +bb.0: + store i32 15, i32* %b, align 4 + br label %bb.2 + +bb.1: + store i32 25, i32* %c, align 4 + br label %bb.2 + +bb.2: + %2 = phi i32 [ 1, %bb.0 ], [ 0, %bb.1 ] + store i32 %2, i32* %a, align 4 + store i32 12, i32* %d, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/copyprop.mir b/llvm/test/CodeGen/AArch64/copyprop.mir index e23002c569070c..2454509d808625 100644 --- a/llvm/test/CodeGen/AArch64/copyprop.mir +++ b/llvm/test/CodeGen/AArch64/copyprop.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-linux-gnu -run-pass machine-cp -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass machine-cp -mcp-use-is-copy-instr -o - %s | FileCheck %s # Tests for MachineCopyPropagation copy forwarding. --- # Simple forwarding. @@ -102,3 +102,14 @@ body: | $x0 = ADDXri $x0, 1, 0 $x2 = SUBXri renamable $x1, 1, 0 ... +--- +# CHECK-LABEL: name: test1_orr_as_copy +# CHECK: STRBBui $wzr, killed renamable $x8, 36 +name: test1_orr_as_copy +tracksRegLiveness: true +body: | + bb.0: + liveins: $x8 + $w9 = ORRWrs $wzr, $wzr, 0 + STRBBui killed renamable $w9, killed renamable $x8, 36 +... From 6273b5cbcdd346a833120c55061ab56f61827068 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 26 May 2022 14:43:28 -0400 Subject: [PATCH 619/908] Roll back use of #warning for header deprecations e5ccd668019888de2704ae670da88a7be8cf7e0f and 5029dce492b3cf3ac191eda0b5bf268c3acac2e0 added deprecation warnings to the and headers, respectively, because the headers are deprecated in C2x. However, there are system headers that include these headers unconditionally, and #warning diagnostics within system headers are shown to users instead of suppressed, which means these deprecation warnings are being triggered in circumstances that users have no control over except to disable all the warnings through the _CLANG_DISABLE_CRT_DEPRECATION_WARNINGS macro or other means. This removes the problematic #warning uses until we find a more palatable solution. --- clang/lib/Headers/stdbool.h | 6 +++--- clang/lib/Headers/stdnoreturn.h | 4 +++- clang/test/Headers/stdbool.c | 9 +-------- clang/test/Sema/c2x-noreturn.c | 2 +- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/clang/lib/Headers/stdbool.h b/clang/lib/Headers/stdbool.h index ecf560218c3b5d..f0e588532e166f 100644 --- a/clang/lib/Headers/stdbool.h +++ b/clang/lib/Headers/stdbool.h @@ -13,9 +13,9 @@ #define __bool_true_false_are_defined 1 #if __STDC_VERSION__ > 201710L -#if !defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS) -#warning "the header is deprecated in C2x" -#endif /* !defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS) */ +/* FIXME: We should be issuing a deprecation warning here, but cannot yet due + * to system headers which include this header file unconditionally. + */ #elif !defined(__cplusplus) #define bool _Bool #define true 1 diff --git a/clang/lib/Headers/stdnoreturn.h b/clang/lib/Headers/stdnoreturn.h index 944e6904c7df61..7d19fa7b2f2b9e 100644 --- a/clang/lib/Headers/stdnoreturn.h +++ b/clang/lib/Headers/stdnoreturn.h @@ -21,7 +21,9 @@ followed by code that writes [[noreturn]]. The issue with such code is not with the attribute, or the use of 'noreturn', but the inclusion of the header. */ -#warning "the '' header is deprecated in C2x; either use the '_Noreturn' keyword or the '[[noreturn]]' attribute" +/* FIXME: We should be issuing a deprecation warning here, but cannot yet due + * to system headers which include this header file unconditionally. + */ #endif #endif /* __STDNORETURN_H */ diff --git a/clang/test/Headers/stdbool.c b/clang/test/Headers/stdbool.c index 1ac94dc67a28fc..a5189633c35962 100644 --- a/clang/test/Headers/stdbool.c +++ b/clang/test/Headers/stdbool.c @@ -1,6 +1,5 @@ // RUN: %clang_cc1 -fgnuc-version=4.2.1 -std=c11 -E -dM %s 2>&1 | FileCheck --check-prefix=CHECK-C11 %s // RUN: %clang_cc1 -fgnuc-version=4.2.1 -std=c2x -E -dM %s 2>&1 | FileCheck --check-prefix=CHECK-C2X %s -// RUN: %clang_cc1 -fgnuc-version=4.2.1 -std=c2x -E -dM -D_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS %s 2>&1 | FileCheck --check-prefix=CHECK-C2X-CRT %s #include @@ -8,12 +7,6 @@ // CHECK-C11: #define false 0 // CHECK-C11: #define true 1 -// CHECK-C2X: warning "the header is deprecated // CHECK-C2X-NOT: #define bool // CHECK-C2X-NOT: #define true -// CHECK-C2X-NOT: #define falsea - -// CHECK-C2X-CRT-NOT: warning "the header is deprecated -// CHECK-C2X-CRT-NOT: #define bool -// CHECK-C2X-CRT-NOT: #define true -// CHECK-C2X-CRT-NOT: #define false +// CHECK-C2X-NOT: #define false diff --git a/clang/test/Sema/c2x-noreturn.c b/clang/test/Sema/c2x-noreturn.c index 6c119736f6454e..cc276877c28603 100644 --- a/clang/test/Sema/c2x-noreturn.c +++ b/clang/test/Sema/c2x-noreturn.c @@ -36,7 +36,7 @@ _Noreturn void func1(void); // ok, using the function specifier [[_Noreturn]] void func3(void); // all-warning {{the '[[_Noreturn]]' attribute spelling is deprecated in C2x; use '[[noreturn]]' instead}} // Test the behavior of including -#include // c2x-warning@stdnoreturn.h:* {{the '' header is deprecated in C2x; either use the '_Noreturn' keyword or the '[[noreturn]]' attribute}} +#include [[noreturn]] void func6(void); From 6ddc4cd1c23fb2b418425543ea8c1554d4113f70 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 26 May 2022 11:48:29 -0700 Subject: [PATCH 620/908] Fix break introduced by D124306 argparse.BooleanOptionalAction is not supported until python 3.9. --- llvm/utils/UpdateTestChecks/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 7ca546c3d251e6..4ee6e0927a1d26 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -131,8 +131,10 @@ def __call__(self, parser, namespace, values, option_string=None): help='List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)') parser.add_argument('--global-hex-value-regex', nargs='+', default=[], help='List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives') - parser.add_argument('--generate-body-for-unused-prefixes', - action=argparse.BooleanOptionalAction, + # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, + # we need to rename the flag to just -generate-body-for-unused-prefixes. + parser.add_argument('--no-generate-body-for-unused-prefixes', + action='store_false', dest='gen_unused_prefix_body', default=True, help='Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.') From 3bba72e653267ad45162757a222e23356a16fa01 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 May 2022 14:58:24 -0400 Subject: [PATCH 621/908] [libc++] Time tests during CI --- libcxx/utils/ci/run-buildbot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 0732ffc2e6f50a..e664ac8173bcf4 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -86,7 +86,7 @@ function generate-cmake-base() { -DLIBCXX_ENABLE_WERROR=YES \ -DLIBCXXABI_ENABLE_WERROR=YES \ -DLIBUNWIND_ENABLE_WERROR=YES \ - -DLLVM_LIT_ARGS="-sv --show-unsupported --xunit-xml-output test-results.xml --timeout=1200" \ + -DLLVM_LIT_ARGS="-sv --show-unsupported --xunit-xml-output test-results.xml --timeout=1200 --time-tests" \ "${@}" } From 48ca3a5ebb156ccb776eea399138b7cda4d13395 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Thu, 26 May 2022 14:59:16 -0400 Subject: [PATCH 622/908] [OpenMP] Extend omp teams to permit nested omp atomic OpenMP 5.2, sec. 10.2 "teams Construct", p. 232, L9-12 restricts what regions can be strictly nested within a `teams` construct. This patch relaxes Clang's enforcement of this restriction in the case of nested `atomic` constructs unless `-fno-openmp-extensions` is specified. Cases like the following then seem to work fine with no additional implementation changes: ``` #pragma omp target teams map(tofrom:x) #pragma omp atomic update x++; ``` Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D126323 --- clang/lib/Sema/SemaOpenMP.cpp | 6 ++- clang/test/OpenMP/nesting_of_regions.cpp | 21 ++++---- .../test/offloading/target-teams-atomic.c | 50 +++++++++++++++++++ openmp/runtime/test/teams/teams-atomic.c | 49 ++++++++++++++++++ 4 files changed, 115 insertions(+), 11 deletions(-) create mode 100644 openmp/libomptarget/test/offloading/target-teams-atomic.c create mode 100644 openmp/runtime/test/teams/teams-atomic.c diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 09c187f24a802d..753decfb167cdd 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4975,9 +4975,13 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack, // omp_get_num_teams() regions, and omp_get_team_num() regions are the // only OpenMP regions that may be strictly nested inside the teams // region. + // + // As an extension, we permit atomic within teams as well. NestingProhibited = !isOpenMPParallelDirective(CurrentRegion) && !isOpenMPDistributeDirective(CurrentRegion) && - CurrentRegion != OMPD_loop; + CurrentRegion != OMPD_loop && + !(SemaRef.getLangOpts().OpenMPExtensions && + CurrentRegion == OMPD_atomic); Recommend = ShouldBeInParallelRegion; } if (!NestingProhibited && CurrentRegion == OMPD_loop) { diff --git a/clang/test/OpenMP/nesting_of_regions.cpp b/clang/test/OpenMP/nesting_of_regions.cpp index 7e23e005481976..3581b05e3a043e 100644 --- a/clang/test/OpenMP/nesting_of_regions.cpp +++ b/clang/test/OpenMP/nesting_of_regions.cpp @@ -1,10 +1,11 @@ -// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -verify=expected,omp45,omp45warn %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp -verify=expected,omp50 %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -verify=expected,omp45 -Wno-openmp %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -verify=expected,omp45 -Wno-source-uses-openmp %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -fno-openmp-extensions -verify=expected,omp45,omp45warn,omp %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fno-openmp-extensions -verify=expected,omp50,omp %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-extensions -verify=expected,omp50 %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -verify=expected,omp45,omp -fno-openmp-extensions -Wno-openmp %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -verify=expected,omp45,omp -fno-openmp-extensions -Wno-source-uses-openmp %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=45 -verify=expected,omp45,omp45warn %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify=expected,omp50 %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=45 -fno-openmp-extensions -verify=expected,omp45,omp45warn,omp %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify=expected,omp50,omp -fno-openmp-extensions %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} void bar(); @@ -5264,7 +5265,7 @@ void foo() { #pragma omp target #pragma omp teams { -#pragma omp atomic // expected-error {{region cannot be closely nested inside 'teams' region; perhaps you forget to enclose 'omp atomic' directive into a parallel region?}} +#pragma omp atomic // omp-error {{region cannot be closely nested inside 'teams' region; perhaps you forget to enclose 'omp atomic' directive into a parallel region?}} ++a; } #pragma omp target @@ -8422,7 +8423,7 @@ void foo() { } #pragma omp target teams { -#pragma omp atomic // expected-error {{region cannot be closely nested inside 'target teams' region; perhaps you forget to enclose 'omp atomic' directive into a parallel region?}} +#pragma omp atomic // omp-error {{region cannot be closely nested inside 'target teams' region; perhaps you forget to enclose 'omp atomic' directive into a parallel region?}} ++a; } #pragma omp target teams @@ -14096,7 +14097,7 @@ void foo() { #pragma omp target #pragma omp teams { -#pragma omp atomic // expected-error {{region cannot be closely nested inside 'teams' region; perhaps you forget to enclose 'omp atomic' directive into a parallel region?}} +#pragma omp atomic // omp-error {{region cannot be closely nested inside 'teams' region; perhaps you forget to enclose 'omp atomic' directive into a parallel region?}} ++a; } #pragma omp target @@ -17299,7 +17300,7 @@ void foo() { } #pragma omp target teams { -#pragma omp atomic // expected-error {{region cannot be closely nested inside 'target teams' region; perhaps you forget to enclose 'omp atomic' directive into a parallel region?}} +#pragma omp atomic // omp-error {{region cannot be closely nested inside 'target teams' region; perhaps you forget to enclose 'omp atomic' directive into a parallel region?}} ++a; } #pragma omp target teams diff --git a/openmp/libomptarget/test/offloading/target-teams-atomic.c b/openmp/libomptarget/test/offloading/target-teams-atomic.c new file mode 100644 index 00000000000000..5d482f1cc1be7b --- /dev/null +++ b/openmp/libomptarget/test/offloading/target-teams-atomic.c @@ -0,0 +1,50 @@ +// Check that omp atomic is permitted and behaves when strictly nested within +// omp target teams. This is an extension to OpenMP 5.2 and is enabled by +// default. + +// RUN: %libomptarget-compile-run-and-check-generic + +#include +#include +#include +#include + +// High parallelism increases our chances of detecting a lack of atomicity. +#define NUM_TEAMS_TRY 256 + +int main() { + // CHECK: update: num_teams=[[#NUM_TEAMS:]]{{$}} + // CHECK-NEXT: update: x=[[#NUM_TEAMS]]{{$}} + int x = 0; + int numTeams; + #pragma omp target teams num_teams(NUM_TEAMS_TRY) map(tofrom:x, numTeams) + { + #pragma omp atomic update + ++x; + if (omp_get_team_num() == 0) + numTeams = omp_get_num_teams(); + } + printf("update: num_teams=%d\n", numTeams); + printf("update: x=%d\n", x); + + // CHECK-NEXT: capture: x=[[#NUM_TEAMS]]{{$}} + // CHECK-NEXT: capture: xCapturedCount=[[#NUM_TEAMS]]{{$}} + bool xCaptured[numTeams]; + memset(xCaptured, 0, sizeof xCaptured); + x = 0; + #pragma omp target teams num_teams(NUM_TEAMS_TRY) map(tofrom:x, numTeams) + { + int v; + #pragma omp atomic capture + v = x++; + xCaptured[v] = true; + } + printf("capture: x=%d\n", x); + int xCapturedCount = 0; + for (int i = 0; i < numTeams; ++i) { + if (xCaptured[i]) + ++xCapturedCount; + } + printf("capture: xCapturedCount=%d\n", xCapturedCount); + return 0; +} diff --git a/openmp/runtime/test/teams/teams-atomic.c b/openmp/runtime/test/teams/teams-atomic.c new file mode 100644 index 00000000000000..fc9692316cefa3 --- /dev/null +++ b/openmp/runtime/test/teams/teams-atomic.c @@ -0,0 +1,49 @@ +// Check that omp atomic is permitted and behaves when strictly nested within +// omp teams. This is an extension to OpenMP 5.2 and is enabled by default. + +// RUN: %libomp-compile-and-run | FileCheck %s + +#include +#include +#include +#include + +// High parallelism increases our chances of detecting a lack of atomicity. +#define NUM_TEAMS_TRY 256 + +int main() { + // CHECK: update: num_teams=[[#NUM_TEAMS:]]{{$}} + // CHECK-NEXT: update: x=[[#NUM_TEAMS]]{{$}} + int x = 0; + int numTeams; + #pragma omp teams num_teams(NUM_TEAMS_TRY) + { + #pragma omp atomic update + ++x; + if (omp_get_team_num() == 0) + numTeams = omp_get_num_teams(); + } + printf("update: num_teams=%d\n", numTeams); + printf("update: x=%d\n", x); + + // CHECK-NEXT: capture: x=[[#NUM_TEAMS]]{{$}} + // CHECK-NEXT: capture: xCapturedCount=[[#NUM_TEAMS]]{{$}} + bool xCaptured[numTeams]; + memset(xCaptured, 0, sizeof xCaptured); + x = 0; + #pragma omp teams num_teams(NUM_TEAMS_TRY) + { + int v; + #pragma omp atomic capture + v = x++; + xCaptured[v] = true; + } + printf("capture: x=%d\n", x); + int xCapturedCount = 0; + for (int i = 0; i < numTeams; ++i) { + if (xCaptured[i]) + ++xCapturedCount; + } + printf("capture: xCapturedCount=%d\n", xCapturedCount); + return 0; +} From 84b9985f5f63509c3f99e2ebaa488f0a0ef33d49 Mon Sep 17 00:00:00 2001 From: Will Hawkins Date: Thu, 26 May 2022 12:14:22 -0700 Subject: [PATCH 623/908] [lldb] Fix broken bad-address-breakpoint test After changing the "fallback" behavior when a user sets a breakpoint without specifying a module the bad-address-breakpoint test case failed incorrectly. This patch updates that test case in order to more thoroughly discover an illegal address and use that as the means for testing whether a breakpoint set at an illegal address fails to resolve. Differential revision: https://reviews.llvm.org/D126109 --- .../TestBadAddressBreakpoints.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/lldb/test/API/functionalities/breakpoint/address_breakpoints/TestBadAddressBreakpoints.py b/lldb/test/API/functionalities/breakpoint/address_breakpoints/TestBadAddressBreakpoints.py index bd3da0f2b427c0..cd33ba86eb3693 100644 --- a/lldb/test/API/functionalities/breakpoint/address_breakpoints/TestBadAddressBreakpoints.py +++ b/lldb/test/API/functionalities/breakpoint/address_breakpoints/TestBadAddressBreakpoints.py @@ -27,14 +27,26 @@ def address_breakpoints(self): "Set a breakpoint here", lldb.SBFileSpec("main.c")) - # Now see if we can read from 0. If I can't do that, I don't - # have a good way to know what an illegal address is... - error = lldb.SBError() - ptr = process.ReadPointerFromMemory(0x0, error) - if not error.Success(): - bkpt = target.BreakpointCreateByAddress(0x0) + # illegal_address will hold (optionally) an address that, if + # used as a breakpoint, will generate an unresolved breakpoint. + illegal_address = None + + # Walk through all the memory regions in the process and + # find an address that is invalid. + regions = process.GetMemoryRegions() + for region_idx in range(regions.GetSize()): + region = lldb.SBMemoryRegionInfo() + regions.GetMemoryRegionAtIndex(region_idx, region) + if illegal_address == None or \ + region.GetRegionEnd() > illegal_address: + illegal_address = region.GetRegionEnd() + + if illegal_address is not None: + # Now, set a breakpoint at the address we know is illegal. + bkpt = target.BreakpointCreateByAddress(illegal_address) + # Verify that breakpoint is not resolved. for bp_loc in bkpt: self.assertEquals(bp_loc.IsResolved(), False) else: From 75acec818e3a419836cf4827eb64faee719becb8 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Thu, 26 May 2022 12:16:24 -0700 Subject: [PATCH 624/908] [lldb] Fix enums-layout test on Windows The test was broken by: https://reviews.llvm.org/D125604 --- .../Shell/SymbolFile/PDB/enums-layout.test | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/lldb/test/Shell/SymbolFile/PDB/enums-layout.test b/lldb/test/Shell/SymbolFile/PDB/enums-layout.test index 2bfb9eb0b7f282..ad7d1549e3620b 100644 --- a/lldb/test/Shell/SymbolFile/PDB/enums-layout.test +++ b/lldb/test/Shell/SymbolFile/PDB/enums-layout.test @@ -1,45 +1,45 @@ REQUIRES: system-windows, msvc RUN: %build --compiler=msvc --arch=32 --nodefaultlib --output=%T/SimpleTypesTest.cpp.enums.exe %S/Inputs/SimpleTypesTest.cpp RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=ENUM %s -RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=ENUM_CONST %s -RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=ENUM_EMPTY %s -RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=ENUM_UCHAR %s -RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=ENUM_CLASS %s -RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=ENUM_STRUCT %s +RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=CONST-ENUM %s +RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=EMPTY-ENUM %s +RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=UCHAR-ENUM %s +RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=CLASS-ENUM %s +RUN: lldb-test symbols %T/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=STRUCT-ENUM %s ; FIXME: PDB does not have information about scoped enumeration (Enum class) so the ; compiler type used is the same as the one for unscoped enumeration. ENUM: Type{{.*}} , name = "Enum", size = 4, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:19, compiler_type = {{.*}} enum Enum { -ENUM_NEXT: RED, -ENUM_NEXT: GREEN, -ENUM_NEXT: BLUE -ENUM_NEXT:} +ENUM-NEXT: RED, +ENUM-NEXT: GREEN, +ENUM-NEXT: BLUE +ENUM-NEXT:} -ENUM_CONST: Type{{.*}} , name = "EnumConst", size = 4, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:22, compiler_type = {{.*}} enum EnumConst { -ENUM_CONST-NEXT: LOW, -ENUM_CONST-NEXT: NORMAL, -ENUM_CONST-NEXT: HIGH -ENUM_CONST-NEXT:} +CONST-ENUM: Type{{.*}} , name = "EnumConst", size = 4, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:22, compiler_type = {{.*}} enum EnumConst { +CONST-ENUM-NEXT: LOW, +CONST-ENUM-NEXT: NORMAL, +CONST-ENUM-NEXT: HIGH +CONST-ENUM-NEXT:} -ENUM_EMPTY: Type{{.*}} , name = "EnumEmpty", size = 4, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:25, compiler_type = {{.*}} enum EnumEmpty { -ENUM_EMPTY-NEXT:} +EMPTY-ENUM: Type{{.*}} , name = "EnumEmpty", size = 4, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:25, compiler_type = {{.*}} enum EnumEmpty { +EMPTY-ENUM-NEXT:} -ENUM_UCHAR: Type{{.*}} , name = "EnumUChar", size = 1, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:28, compiler_type = {{.*}} enum EnumUChar { -ENUM_UCHAR-NEXT: ON, -ENUM_UCHAR-NEXT: OFF, -ENUM_UCHAR-NEXT: AUTO -ENUM_UCHAR-NEXT:} +UCHAR-ENUM: Type{{.*}} , name = "EnumUChar", size = 1, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:28, compiler_type = {{.*}} enum EnumUChar { +UCHAR-ENUM-NEXT: ON, +UCHAR-ENUM-NEXT: OFF, +UCHAR-ENUM-NEXT: AUTO +UCHAR-ENUM-NEXT:} ; Note that `enum EnumClass` is tested instead of `enum class EnumClass` -ENUM_CLASS: Type{{.*}} , name = "EnumClass", size = 4, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:32, compiler_type = {{.*}} enum EnumClass { -ENUM_CLASS-NEXT: YES, -ENUM_CLASS-NEXT: NO, -ENUM_CLASS-NEXT: DEFAULT -ENUM_CLASS-NEXT:} +CLASS-ENUM: Type{{.*}} , name = "EnumClass", size = 4, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:32, compiler_type = {{.*}} enum EnumClass { +CLASS-ENUM-NEXT: YES, +CLASS-ENUM-NEXT: NO, +CLASS-ENUM-NEXT: DEFAULT +CLASS-ENUM-NEXT:} -ENUM_STRUCT: Type{{.*}} , name = "EnumStruct", size = 4, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:35, compiler_type = {{.*}} enum EnumStruct { -ENUM_STRUCT-NEXT: red, -ENUM_STRUCT-NEXT: blue, -ENUM_STRUCT-NEXT: black -ENUM_STRUCT-NEXT:} +STRUCT-ENUM: Type{{.*}} , name = "EnumStruct", size = 4, decl = {{[Ss]}}imple{{[Tt]}}ypes{{[Tt]}}est.cpp:35, compiler_type = {{.*}} enum EnumStruct { +STRUCT-ENUM-NEXT: red, +STRUCT-ENUM-NEXT: blue, +STRUCT-ENUM-NEXT: black +STRUCT-ENUM-NEXT:} From 92bbcfaa97408991813fda5c56e864c629f67a01 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 May 2022 10:19:25 -0400 Subject: [PATCH 625/908] [libunwind] Tidy-up the testing configuration for libunwind Start testing Apple backdeployment with older libunwinds, and stop explicitly specifying the libunwind testing config, since it is already selected correctly by default. Differential Revision: https://reviews.llvm.org/D126470 --- libcxx/utils/ci/run-buildbot | 118 +++++++----------- .../apple-libunwind-backdeployment.cfg.in | 64 ++++++++++ libunwind/test/libunwind_01.pass.cpp | 4 +- libunwind/test/signal_frame.pass.cpp | 4 +- 4 files changed, 112 insertions(+), 78 deletions(-) create mode 100644 libunwind/test/configs/apple-libunwind-backdeployment.cfg.in diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index e664ac8173bcf4..23708e680893fa 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -193,78 +193,67 @@ check-generated-output) ;; generic-cxx03) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx03.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx03.cmake" check-runtimes check-abi-list ;; generic-cxx11) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx11.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx11.cmake" check-runtimes check-abi-list ;; generic-cxx14) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx14.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx14.cmake" check-runtimes check-abi-list ;; generic-cxx17) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx17.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx17.cmake" check-runtimes check-abi-list ;; generic-cxx20) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx20.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx20.cmake" check-runtimes check-abi-list ;; generic-cxx2b) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx2b.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx2b.cmake" check-runtimes check-abi-list ;; generic-assertions) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-assertions.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-assertions.cmake" check-runtimes check-abi-list ;; generic-debug-iterators) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-debug-iterators.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-debug-iterators.cmake" check-runtimes check-abi-list ;; generic-noexceptions) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-noexceptions.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-noexceptions.cmake" check-runtimes check-abi-list ;; generic-modules) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-modules.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-modules.cmake" check-runtimes check-abi-list ;; generic-static) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-static.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-static.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-static.cmake" check-runtimes ;; generic-merged) @@ -279,7 +268,7 @@ generic-clang-13) export CC=clang-13 export CXX=clang++-13 clean - generate-cmake -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake check-runtimes check-abi-list ;; @@ -287,7 +276,7 @@ generic-clang-14) export CC=clang-14 export CXX=clang++-14 clean - generate-cmake -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake check-runtimes check-abi-list ;; @@ -295,8 +284,7 @@ generic-gcc) export CC=gcc-11 export CXX=g++-11 clean - generate-cmake -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" \ - -DLIBCXX_ENABLE_WERROR=NO \ + generate-cmake -DLIBCXX_ENABLE_WERROR=NO \ -DLIBCXXABI_ENABLE_WERROR=NO \ -DLIBUNWIND_ENABLE_WERROR=NO check-runtimes @@ -306,7 +294,6 @@ generic-gcc-cxx11) export CXX=g++-11 clean generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-cxx11.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" \ -DLIBCXX_ENABLE_WERROR=NO \ -DLIBCXXABI_ENABLE_WERROR=NO \ -DLIBUNWIND_ENABLE_WERROR=NO @@ -314,88 +301,74 @@ generic-gcc-cxx11) ;; generic-asan) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-asan.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-asan.cmake" check-runtimes ;; generic-msan) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-msan.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-msan.cmake" check-runtimes ;; generic-tsan) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-tsan.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-tsan.cmake" check-runtimes ;; generic-ubsan) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-ubsan.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-ubsan.cmake" check-runtimes ;; generic-with_llvm_unwinder) clean - generate-cmake -DLIBCXXABI_USE_LLVM_UNWINDER=ON \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -DLIBCXXABI_USE_LLVM_UNWINDER=ON check-runtimes ;; generic-no-threads) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-threads.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-threads.cmake" check-runtimes ;; generic-no-debug) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-debug.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-debug.cmake" check-runtimes check-abi-list ;; generic-no-filesystem) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-filesystem.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-filesystem.cmake" check-runtimes ;; generic-no-random_device) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-random_device.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-random_device.cmake" check-runtimes ;; generic-no-localization) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-localization.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-localization.cmake" check-runtimes ;; generic-no-unicode) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-unicode.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-unicode.cmake" check-runtimes ;; generic-no-wide-characters) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-wide-characters.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-wide-characters.cmake" check-runtimes ;; generic-no-experimental) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-experimental.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-no-experimental.cmake" check-runtimes check-abi-list ;; generic-abi-unstable) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-abi-unstable.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-abi-unstable.cmake" check-runtimes ;; apple-system) @@ -427,10 +400,13 @@ apple-system-backdeployment-*) DEPLOYMENT_TARGET="${BUILDER#apple-system-backdeployment-}" - # TODO: On Apple platforms, we never produce libc++abi.1.dylib, always libc++abi.dylib. - # Fix that in the build so that the tests stop searching for @rpath/libc++abi.1.dylib. + # TODO: On Apple platforms, we never produce libc++abi.1.dylib or libunwind.1.dylib, + # only libc++abi.dylib and libunwind.dylib. Fix that in the build so that the + # tests stop searching for @rpath/libc++abi.1.dylib and @rpath/libunwind.1.dylib. cp "${OSX_ROOTS}/macOS/libc++abi/${DEPLOYMENT_TARGET}/libc++abi.dylib" \ "${OSX_ROOTS}/macOS/libc++abi/${DEPLOYMENT_TARGET}/libc++abi.1.dylib" + cp "${OSX_ROOTS}/macOS/libunwind/${DEPLOYMENT_TARGET}/libunwind.dylib" \ + "${OSX_ROOTS}/macOS/libunwind/${DEPLOYMENT_TARGET}/libunwind.1.dylib" arch="$(uname -m)" PARAMS="target_triple=${arch}-apple-macosx${DEPLOYMENT_TARGET}" @@ -439,18 +415,19 @@ apple-system-backdeployment-*) PARAMS+=";unwind_runtime_root=${OSX_ROOTS}/macOS/libunwind/${DEPLOYMENT_TARGET}" PARAMS+=";use_system_cxx_lib=True" - # TODO: We should run the libunwind tests using the back-deployment dylibs too. generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Apple.cmake" \ -DLIBCXX_TEST_CONFIG="apple-libc++-backdeployment.cfg.in" \ -DLIBCXXABI_TEST_CONFIG="apple-libc++abi-backdeployment.cfg.in" \ + -DLIBUNWIND_TEST_CONFIG="apple-libunwind-backdeployment.cfg.in" \ -DLIBCXX_TEST_PARAMS="${PARAMS}" \ - -DLIBCXXABI_TEST_PARAMS="${PARAMS}" + -DLIBCXXABI_TEST_PARAMS="${PARAMS}" \ + -DLIBUNWIND_TEST_PARAMS="${PARAMS}" check-runtimes ;; benchmarks) clean - generate-cmake -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake check-cxx-benchmarks ;; documentation) @@ -475,8 +452,7 @@ bootstrapping-build) -DLLVM_RUNTIME_TARGETS="$(c++ --print-target-triple)" \ -DLLVM_TARGETS_TO_BUILD="host" \ -DRUNTIMES_BUILD_ALLOW_DARWIN=ON \ - -DLLVM_ENABLE_ASSERTIONS=ON \ - -DRUNTIMES_LIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + -DLLVM_ENABLE_ASSERTIONS=ON echo "+++ Running the libc++ and libc++abi tests" ${NINJA} -C "${BUILD_DIR}" check-runtimes @@ -506,42 +482,36 @@ legacy-project-build) ;; aarch64) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/AArch64.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/AArch64.cmake" check-runtimes ;; aarch64-noexceptions) clean generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/AArch64.cmake" \ -DLIBCXX_ENABLE_EXCEPTIONS=OFF \ - -DLIBCXXABI_ENABLE_EXCEPTIONS=OFF \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + -DLIBCXXABI_ENABLE_EXCEPTIONS=OFF check-runtimes ;; # Aka Armv8 32 bit armv8) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Armv8Arm.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Armv8Arm.cmake" check-runtimes ;; armv8-noexceptions) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Armv8Thumb-noexceptions.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Armv8Thumb-noexceptions.cmake" check-runtimes ;; # Armv7 32 bit. One building Arm only one Thumb only code. armv7) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Armv7Arm.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Armv7Arm.cmake" check-runtimes ;; armv7-noexceptions) clean - generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Armv7Thumb-noexceptions.cmake" \ - -DLIBUNWIND_TEST_CONFIG="llvm-libunwind-shared.cfg.in" + generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Armv7Thumb-noexceptions.cmake" check-runtimes ;; clang-cl-dll) diff --git a/libunwind/test/configs/apple-libunwind-backdeployment.cfg.in b/libunwind/test/configs/apple-libunwind-backdeployment.cfg.in new file mode 100644 index 00000000000000..d4777e2f6917a7 --- /dev/null +++ b/libunwind/test/configs/apple-libunwind-backdeployment.cfg.in @@ -0,0 +1,64 @@ +# Testing configuration for back-deployment against older Apple system libunwind. +# +# Under this configuration, we compile and link all the test suite against the latest libunwind, +# however we run against the libunwind on a different platform. This emulates the workflow of +# a developer building their application using recent tools but with the goal of deploying +# on existing devices running an older OS (and hence an older dylib). + +import os, site +site.addsitedir(os.path.join('@LIBUNWIND_LIBCXX_PATH@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig, libcxx.test.dsl + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +BACKDEPLOYMENT_PARAMETERS = [ + libcxx.test.dsl.Parameter(name='cxx_runtime_root', type=str, + actions=lambda root: [libcxx.test.dsl.AddSubstitution('%{cxx-runtime-root}', root)], + help=""" + The simulated root of the system (for libc++) when running tests. + + This should be a directory hierarchy under which the libc++ dylib can be found. + The dylib in that hierarchy is the one that will be used at runtime when running + the tests. + """), + libcxx.test.dsl.Parameter(name='abi_runtime_root', type=str, + actions=lambda root: [libcxx.test.dsl.AddSubstitution('%{abi-runtime-root}', root)], + help=""" + The simulated root of the system (for libc++abi) when running tests. + + This should be a directory hierarchy under which the libc++abi dylib can be found. + The dylib in that hierarchy is the one that will be used at runtime when running + the tests. + """), + libcxx.test.dsl.Parameter(name='unwind_runtime_root', type=str, + actions=lambda root: [libcxx.test.dsl.AddSubstitution('%{unwind-runtime-root}', root)], + help=""" + The simulated root of the system (for libunwind) when running tests. + + This should be a directory hierarchy under which the libunwind dylib can be found. + The dylib in that hierarchy is the one that will be used at runtime when running + the tests. + """), +] + +config.substitutions.append(('%{flags}', + '-isysroot {}'.format('@CMAKE_OSX_SYSROOT@') if '@CMAKE_OSX_SYSROOT@' else '' +)) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -I %{include}' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib++ -L %{lib} -lc++ -lc++abi -lunwind' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env DYLD_LIBRARY_PATH="%{cxx-runtime-root}:%{abi-runtime-root}:%{unwind-runtime-root}" -- ' +)) + +import os, site +import libcxx.test.params, libcxx.test.newconfig, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS + BACKDEPLOYMENT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libunwind/test/libunwind_01.pass.cpp b/libunwind/test/libunwind_01.pass.cpp index 8b4825ac30dca8..bbfaffd153cc28 100644 --- a/libunwind/test/libunwind_01.pass.cpp +++ b/libunwind/test/libunwind_01.pass.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -// TODO: Investigate these failures on x86_64 macOS back deployment -// UNSUPPORTED: target=x86_64-apple-darwin{{.+}} +// TODO: Investigate this failure on x86_64 macOS back deployment +// XFAIL: use_system_cxx_lib && target=x86_64-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0|12.0}} // TODO: Figure out why this fails with Memory Sanitizer. // XFAIL: msan diff --git a/libunwind/test/signal_frame.pass.cpp b/libunwind/test/signal_frame.pass.cpp index d9fb439cd1e7f4..b17804efa08c8e 100644 --- a/libunwind/test/signal_frame.pass.cpp +++ b/libunwind/test/signal_frame.pass.cpp @@ -9,8 +9,8 @@ // Ensure that functions marked as signal frames are reported as such. -// TODO: Investigate this failure on macOS -// XFAIL: target={{.+}}-apple-darwin{{.+}} +// TODO: Investigate this failure on Apple +// XFAIL: target={{.+}}-apple-{{.+}} // TODO: Figure out why this fails with Memory Sanitizer. // XFAIL: msan From 628b2bfad828edebca8f3ab02aa97cc82c6af7d2 Mon Sep 17 00:00:00 2001 From: python3kgae Date: Tue, 24 May 2022 11:04:51 -0700 Subject: [PATCH 626/908] [DirectX] Update test for dxil-dis. Update metadata index which changed for dx.valver. Reviewed By: beanz Differential Revision: https://reviews.llvm.org/D126311 --- llvm/test/tools/dxil-dis/debug-info.ll | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/test/tools/dxil-dis/debug-info.ll b/llvm/test/tools/dxil-dis/debug-info.ll index 4799bc2f79bbea..067279a006efaa 100644 --- a/llvm/test/tools/dxil-dis/debug-info.ll +++ b/llvm/test/tools/dxil-dis/debug-info.ll @@ -2,17 +2,17 @@ target triple = "dxil-unknown-unknown" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -; CHECK: define float @fma(float, float, float) unnamed_addr #0 !dbg !6 +; CHECK: define float @fma(float, float, float) unnamed_addr #0 !dbg !7 ; Function Attrs: norecurse nounwind readnone willreturn define dso_local float @fma(float %0, float %1, float %2) local_unnamed_addr #0 !dbg !6 { -; CHECK-NEXT: call void @llvm.dbg.value(metadata float %0, metadata !11, metadata !14), !dbg !15 -; CHECK-NEXT: call void @llvm.dbg.value(metadata float %1, metadata !12, metadata !14), !dbg !15 -; CHECK-NEXT: call void @llvm.dbg.value(metadata float %2, metadata !13, metadata !14), !dbg !15 +; CHECK-NEXT: call void @llvm.dbg.value(metadata float %0, metadata !12, metadata !15), !dbg !16 +; CHECK-NEXT: call void @llvm.dbg.value(metadata float %1, metadata !13, metadata !15), !dbg !16 +; CHECK-NEXT: call void @llvm.dbg.value(metadata float %2, metadata !14, metadata !15), !dbg !16 call void @llvm.dbg.value(metadata float %0, metadata !11, metadata !DIExpression()), !dbg !14 call void @llvm.dbg.value(metadata float %1, metadata !12, metadata !DIExpression()), !dbg !14 call void @llvm.dbg.value(metadata float %2, metadata !13, metadata !DIExpression()), !dbg !14 -; CHECK-NEXT: %4 = fmul float %0, %1, !dbg !16 -; CHECK-NEXT: %5 = fadd float %4, %2, !dbg !17 +; CHECK-NEXT: %4 = fmul float %0, %1, !dbg !17 +; CHECK-NEXT: %5 = fadd float %4, %2, !dbg !18 %4 = fmul float %0, %1, !dbg !15 %5 = fadd float %4, %2, !dbg !16 ret float %5, !dbg !17 @@ -36,15 +36,15 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } ; CHECK: !0 = distinct !DICompileUnit ; CHECK-NEXT: !1 = !DIFile(filename: -; CHECK: !6 = distinct !DISubprogram(name: "fma", -; CHECK: !11 = !DILocalVariable(tag: -; CHECK-NEXT: !12 = !DILocalVariable(tag: +; CHECK: !7 = distinct !DISubprogram(name: "fma", +; CHECK: !12 = !DILocalVariable(tag: ; CHECK-NEXT: !13 = !DILocalVariable(tag: -; CHECK-NEXT: !14 = !DIExpression() -; CHECK-NEXT: !15 = !DILocation(line: +; CHECK-NEXT: !14 = !DILocalVariable(tag: +; CHECK-NEXT: !15 = !DIExpression() ; CHECK-NEXT: !16 = !DILocation(line: ; CHECK-NEXT: !17 = !DILocation(line: ; CHECK-NEXT: !18 = !DILocation(line: +; CHECK-NEXT: !19 = !DILocation(line: !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "in.c", directory: "dir") From b58a420ff4f92b085fd718600fda162059171a58 Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Wed, 11 May 2022 21:21:17 -0700 Subject: [PATCH 627/908] [Tooling/DependencyScanning] Rename refactorings towards transitioning dependency scanning to use pre-lexed preprocessor directive tokens This is first of a series of patches for making the special lexing for dependency scanning a first-class feature of the `Preprocessor` and `Lexer`. This patch only includes NFC renaming changes to make reviewing of the functionality changing parts easier. Differential Revision: https://reviews.llvm.org/D125484 --- .../include/clang/Basic/DiagnosticLexKinds.td | 6 +- ...imizer.h => DependencyDirectivesScanner.h} | 37 ++-- .../DependencyScanningFilesystem.h | 58 +++--- .../DependencyScanningService.h | 10 +- clang/lib/Frontend/FrontendActions.cpp | 8 +- clang/lib/Lex/CMakeLists.txt | 2 +- ...er.cpp => DependencyDirectivesScanner.cpp} | 170 +++++++++--------- .../DependencyScanningFilesystem.cpp | 65 +++---- .../DependencyScanningWorker.cpp | 10 +- .../ClangScanDeps/has_include_if_elif.cpp | 2 +- clang/test/ClangScanDeps/macro-expansions.cpp | 40 +++++ clang/test/ClangScanDeps/modulemap-via-vfs.m | 2 +- .../modules-fmodule-name-no-module-built.m | 2 +- .../modules-full-by-mod-name.cpp | 4 +- clang/test/ClangScanDeps/modules-full.cpp | 8 +- .../modules-inferred-explicit-build.m | 2 +- clang/test/ClangScanDeps/modules-inferred.m | 2 +- clang/test/ClangScanDeps/modules.cpp | 12 +- .../preprocess_minimized_pragmas.cpp | 2 +- clang/test/ClangScanDeps/regular_cdb.cpp | 12 +- clang/tools/clang-scan-deps/ClangScanDeps.cpp | 14 +- clang/unittests/Lex/CMakeLists.txt | 2 +- ...pp => DependencyDirectivesScannerTest.cpp} | 139 +++++++------- .../Tooling/DependencyScannerTest.cpp | 8 +- 24 files changed, 328 insertions(+), 289 deletions(-) rename clang/include/clang/Lex/{DependencyDirectivesSourceMinimizer.h => DependencyDirectivesScanner.h} (73%) rename clang/lib/Lex/{DependencyDirectivesSourceMinimizer.cpp => DependencyDirectivesScanner.cpp} (84%) create mode 100644 clang/test/ClangScanDeps/macro-expansions.cpp rename clang/unittests/Lex/{DependencyDirectivesSourceMinimizerTest.cpp => DependencyDirectivesScannerTest.cpp} (89%) diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 4cfd314c012700..398fb15352fe64 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -899,11 +899,11 @@ def err_pp_eof_in_assume_nonnull : Error< } -let CategoryName = "Dependency Directive Source Minimization Issue" in { +let CategoryName = "Dependency Directive Source Scanner Issue" in { -def err_dep_source_minimizer_missing_sema_after_at_import : Error< +def err_dep_source_scanner_missing_semi_after_at_import : Error< "could not find ';' after @import">; -def err_dep_source_minimizer_unexpected_tokens_at_import : Error< +def err_dep_source_scanner_unexpected_tokens_at_import : Error< "unexpected extra tokens at end of @import declaration">; } diff --git a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h similarity index 73% rename from clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h rename to clang/include/clang/Lex/DependencyDirectivesScanner.h index 56025c8a3ed5d6..b65891c6b8aba1 100644 --- a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h +++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h @@ -1,4 +1,4 @@ -//===- clang/Lex/DependencyDirectivesSourceMinimizer.h - ----------*- C++ -*-// +//===- clang/Lex/DependencyDirectivesScanner.h ---------------------*- C++ -*-// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,15 +7,15 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This is the interface for minimizing header and source files to the +/// This is the interface for scanning header and source files to get the /// minimum necessary preprocessor directives for evaluating includes. It /// reduces the source down to #define, #include, #import, @import, and any /// conditional preprocessor logic that contains one of those. /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSOURCEMINIMIZER_H -#define LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSOURCEMINIMIZER_H +#ifndef LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H +#define LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/ArrayRef.h" @@ -26,11 +26,11 @@ namespace clang { class DiagnosticsEngine; -namespace minimize_source_to_dependency_directives { +namespace dependency_directives_scan { /// Represents the kind of preprocessor directive or a module declaration that -/// is tracked by the source minimizer in its token output. -enum TokenKind { +/// is tracked by the scanner in its token output. +enum DirectiveKind : uint8_t { pp_none, pp_include, pp___include_macros, @@ -58,17 +58,17 @@ enum TokenKind { pp_eof, }; -/// Represents a simplified token that's lexed as part of the source -/// minimization. It's used to track the location of various preprocessor -/// directives that could potentially have an effect on the depedencies. -struct Token { +/// Represents a directive that's lexed as part of the dependency directives +/// scanning. It's used to track various preprocessor directives that could +/// potentially have an effect on the depedencies. +struct Directive { /// The kind of token. - TokenKind K = pp_none; + DirectiveKind Kind = pp_none; /// Offset into the output byte stream of where the directive begins. int Offset = -1; - Token(TokenKind K, int Offset) : K(K), Offset(Offset) {} + Directive(DirectiveKind K, int Offset) : Kind(K), Offset(Offset) {} }; /// Simplified token range to track the range of a potentially skippable PP @@ -86,10 +86,10 @@ struct SkippedRange { /// when skipping a directive like #if, #ifdef or #elsif. /// /// \returns false on success, true on error. -bool computeSkippedRanges(ArrayRef Input, +bool computeSkippedRanges(ArrayRef Input, llvm::SmallVectorImpl &Range); -} // end namespace minimize_source_to_dependency_directives +} // end namespace dependency_directives_scan /// Minimize the input down to the preprocessor directives that might have /// an effect on the dependencies for a compilation unit. @@ -103,13 +103,12 @@ bool computeSkippedRanges(ArrayRef Input, /// \returns false on success, true on error. If the diagnostic engine is not /// null, an appropriate error is reported using the given input location /// with the offset that corresponds to the minimizer's current buffer offset. -bool minimizeSourceToDependencyDirectives( +bool scanSourceForDependencyDirectives( llvm::StringRef Input, llvm::SmallVectorImpl &Output, - llvm::SmallVectorImpl - &Tokens, + llvm::SmallVectorImpl &Directives, DiagnosticsEngine *Diags = nullptr, SourceLocation InputSourceLoc = SourceLocation()); } // end namespace clang -#endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSOURCEMINIMIZER_H +#endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index 021a158530537f..c5f239a2e4bff7 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -28,14 +28,14 @@ struct CachedFileContents { CachedFileContents(std::unique_ptr Original) : Original(std::move(Original)), MinimizedAccess(nullptr) {} - /// Owning storage for the minimized contents. + /// Owning storage for the original contents. std::unique_ptr Original; - /// The mutex that must be locked before mutating minimized contents. + /// The mutex that must be locked before mutating directive tokens. std::mutex ValueLock; /// Owning storage for the minimized contents. std::unique_ptr MinimizedStorage; - /// Accessor to the minimized contents that's atomic to avoid data races. + /// Accessor to the directive tokens that's atomic to avoid data races. std::atomic MinimizedAccess; /// Skipped range mapping of the minimized contents. /// This is initialized iff `MinimizedAccess != nullptr`. @@ -46,8 +46,8 @@ struct CachedFileContents { /// the dependency scanning filesystem. /// /// It represents one of the following: -/// - opened file with original contents and a stat value, -/// - opened file with original contents, minimized contents and a stat value, +/// - opened file with contents and a stat value, +/// - opened file with contents, directive tokens and a stat value, /// - directory entry with its stat value, /// - filesystem error. /// @@ -84,8 +84,9 @@ class CachedFileSystemEntry { return Contents->Original->getBuffer(); } - /// \returns Minimized contents of the file. - StringRef getMinimizedContents() const { + /// \returns The scanned preprocessor directive tokens of the file that are + /// used to speed up preprocessing, if available. + StringRef getDirectiveTokens() const { assert(!isError() && "error"); assert(!MaybeStat->isDirectory() && "not a file"); assert(Contents && "contents not initialized"); @@ -119,8 +120,8 @@ class CachedFileSystemEntry { return Contents->PPSkippedRangeMapping; } - /// \returns The data structure holding both original and minimized contents. - CachedFileContents *getContents() const { + /// \returns The data structure holding both contents and directive tokens. + CachedFileContents *getCachedContents() const { assert(!isError() && "error"); assert(!isDirectory() && "not a file"); return Contents; @@ -145,7 +146,7 @@ class CachedFileSystemEntry { }; /// This class is a shared cache, that caches the 'stat' and 'open' calls to the -/// underlying real file system. It distinguishes between minimized and original +/// underlying real file system, and the scanned preprocessor directives of /// files. /// /// It is sharded based on the hash of the key to reduce the lock contention for @@ -210,8 +211,7 @@ class DependencyScanningFilesystemSharedCache { }; /// This class is a local cache, that caches the 'stat' and 'open' calls to the -/// underlying real file system. It distinguishes between minimized and original -/// files. +/// underlying real file system. class DependencyScanningFilesystemLocalCache { llvm::StringMap Cache; @@ -234,9 +234,8 @@ class DependencyScanningFilesystemLocalCache { }; /// Reference to a CachedFileSystemEntry. -/// If the underlying entry is an opened file, this wrapper returns the correct -/// contents (original or minimized) and ensures consistency with file size -/// reported by status. +/// If the underlying entry is an opened file, this wrapper returns the file +/// contents and the scanned preprocessor directives. class EntryRef { /// For entry that is an opened file, this bit signifies whether its contents /// are minimized. @@ -270,8 +269,7 @@ class EntryRef { } StringRef getContents() const { - return Minimized ? Entry.getMinimizedContents() - : Entry.getOriginalContents(); + return Minimized ? Entry.getDirectiveTokens() : Entry.getOriginalContents(); } const PreprocessorSkippedRangeMapping *getPPSkippedRangeMapping() const { @@ -301,14 +299,14 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { llvm::ErrorOr> openFileForRead(const Twine &Path) override; - /// Disable minimization of the given file. - void disableMinimization(StringRef Filename); - /// Enable minimization of all files. - void enableMinimizationOfAllFiles() { NotToBeMinimized.clear(); } + /// Disable directives scanning of the given file. + void disableDirectivesScanning(StringRef Filename); + /// Enable directives scanning of all files. + void enableDirectivesScanningOfAllFiles() { NotToBeScanned.clear(); } private: - /// Check whether the file should be minimized. - bool shouldMinimize(StringRef Filename, llvm::sys::fs::UniqueID UID); + /// Check whether the file should be scanned for preprocessor directives. + bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID); /// Returns entry for the given filename. /// @@ -316,7 +314,7 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { /// using the underlying filesystem. llvm::ErrorOr getOrCreateFileSystemEntry(StringRef Filename, - bool DisableMinimization = false); + bool DisableDirectivesScanning = false); /// For a filename that's not yet associated with any entry in the caches, /// uses the underlying filesystem to either look up the entry based in the @@ -324,10 +322,10 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { llvm::ErrorOr computeAndStoreResult(StringRef Filename); - /// Minimizes the given entry if necessary and returns a wrapper object with - /// reference semantics. - EntryRef minimizeIfNecessary(const CachedFileSystemEntry &Entry, - StringRef Filename, bool Disable); + /// Scan for preprocessor directives for the given entry if necessary and + /// returns a wrapper object with reference semantics. + EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry, + StringRef Filename, bool Disable); /// Represents a filesystem entry that has been stat-ed (and potentially read) /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. @@ -402,8 +400,8 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { /// excluded conditional directive skip mappings that are used by the /// currently active preprocessor. ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings; - /// The set of files that should not be minimized. - llvm::DenseSet NotToBeMinimized; + /// The set of files that should not be scanned for PP directives. + llvm::DenseSet NotToBeScanned; }; } // end namespace dependencies diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h index 79abe10887298e..c0c8b2348ae430 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -19,15 +19,13 @@ namespace dependencies { /// dependencies. enum class ScanningMode { /// This mode is used to compute the dependencies by running the preprocessor - /// over - /// the unmodified source files. + /// over the source files. CanonicalPreprocessing, /// This mode is used to compute the dependencies by running the preprocessor - /// over - /// the source files that have been minimized to contents that might affect - /// the dependencies. - MinimizedSourcePreprocessing + /// with special kind of lexing after scanning header and source files to get + /// the minimum necessary preprocessor directives for evaluating includes. + DependencyDirectivesScan, }; /// The format that is output by the dependency scanner. diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 10d6e1f8417bf0..ba492c29d9c02b 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -18,7 +18,7 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/MultiplexConsumer.h" #include "clang/Frontend/Utils.h" -#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" @@ -1158,9 +1158,9 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() { llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(SM.getMainFileID()); llvm::SmallString<1024> Output; - llvm::SmallVector Toks; - if (minimizeSourceToDependencyDirectives( - FromFile.getBuffer(), Output, Toks, &CI.getDiagnostics(), + llvm::SmallVector Directives; + if (scanSourceForDependencyDirectives( + FromFile.getBuffer(), Output, Directives, &CI.getDiagnostics(), SM.getLocForStartOfFile(SM.getMainFileID()))) { assert(CI.getDiagnostics().hasErrorOccurred() && "no errors reported for failure"); diff --git a/clang/lib/Lex/CMakeLists.txt b/clang/lib/Lex/CMakeLists.txt index 44f0149628c881..7694eaba6cf10e 100644 --- a/clang/lib/Lex/CMakeLists.txt +++ b/clang/lib/Lex/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangLex - DependencyDirectivesSourceMinimizer.cpp + DependencyDirectivesScanner.cpp HeaderMap.cpp HeaderSearch.cpp InitHeaderSearch.cpp diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp similarity index 84% rename from clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp rename to clang/lib/Lex/DependencyDirectivesScanner.cpp index 10536438e2fc55..dd1ea1cfe11e04 100644 --- a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -1,4 +1,4 @@ -//===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===// +//===- DependencyDirectivesScanner.cpp ------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,14 +7,14 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This is the implementation for minimizing header and source files to the +/// This is the interface for scanning header and source files to get the /// minimum necessary preprocessor directives for evaluating includes. It /// reduces the source down to #define, #include, #import, @import, and any /// conditional preprocessor logic that contains one of those. /// //===----------------------------------------------------------------------===// -#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/Diagnostic.h" #include "clang/Lex/LexDiagnostic.h" @@ -24,26 +24,26 @@ using namespace llvm; using namespace clang; -using namespace clang::minimize_source_to_dependency_directives; +using namespace clang::dependency_directives_scan; namespace { -struct Minimizer { +struct Scanner { /// Minimized output. SmallVectorImpl &Out; /// The known tokens encountered during the minimization. - SmallVectorImpl &Tokens; + SmallVectorImpl &Directives; - Minimizer(SmallVectorImpl &Out, SmallVectorImpl &Tokens, - StringRef Input, DiagnosticsEngine *Diags, - SourceLocation InputSourceLoc) - : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags), + Scanner(SmallVectorImpl &Out, SmallVectorImpl &Directives, + StringRef Input, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) + : Out(Out), Directives(Directives), Input(Input), Diags(Diags), InputSourceLoc(InputSourceLoc) {} - /// Lex the provided source and emit the minimized output. + /// Lex the provided source and emit the directive tokens. /// /// \returns True on error. - bool minimize(); + bool scan(); private: struct IdInfo { @@ -57,31 +57,33 @@ struct Minimizer { LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, const char *const End); - LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End); + LLVM_NODISCARD bool scanImpl(const char *First, const char *const End); LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); - LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive, + LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, StringRef Directive, const char *&First, const char *const End); - Token &makeToken(TokenKind K) { - Tokens.emplace_back(K, Out.size()); - return Tokens.back(); + Directive &pushDirective(DirectiveKind K) { + Directives.emplace_back(K, Out.size()); + return Directives.back(); } - void popToken() { - Out.resize(Tokens.back().Offset); - Tokens.pop_back(); + void popDirective() { + Out.resize(Directives.back().Offset); + Directives.pop_back(); + } + DirectiveKind topDirective() const { + return Directives.empty() ? pp_none : Directives.back().Kind; } - TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; } - Minimizer &put(char Byte) { + Scanner &put(char Byte) { Out.push_back(Byte); return *this; } - Minimizer &append(StringRef S) { return append(S.begin(), S.end()); } - Minimizer &append(const char *First, const char *Last) { + Scanner &append(StringRef S) { return append(S.begin(), S.end()); } + Scanner &append(const char *First, const char *Last) { Out.append(First, Last); return *this; } @@ -106,7 +108,7 @@ struct Minimizer { } // end anonymous namespace -bool Minimizer::reportError(const char *CurPtr, unsigned Err) { +bool Scanner::reportError(const char *CurPtr, unsigned Err) { if (!Diags) return true; assert(CurPtr >= Input.data() && "invalid buffer ptr"); @@ -279,8 +281,7 @@ static const char *findLastNonSpaceNonBackslash(const char *First, return Last; } -static const char *findFirstTrailingSpace(const char *First, - const char *Last) { +static const char *findFirstTrailingSpace(const char *First, const char *Last) { const char *LastNonSpace = findLastNonSpace(First, Last); if (Last == LastNonSpace) return Last; @@ -395,13 +396,14 @@ static void skipDirective(StringRef Name, const char *&First, skipLine(First, End); } -void Minimizer::printToNewline(const char *&First, const char *const End) { +void Scanner::printToNewline(const char *&First, const char *const End) { while (First != End && !isVerticalWhitespace(*First)) { const char *Last = First; do { // Iterate over strings correctly to avoid comments and newlines. if (*Last == '"' || *Last == '\'' || - (*Last == '<' && (top() == pp_include || top() == pp_import))) { + (*Last == '<' && + (topDirective() == pp_include || topDirective() == pp_import))) { if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) skipRawString(Last, End); else @@ -487,8 +489,8 @@ static void skipWhitespace(const char *&First, const char *const End) { } } -void Minimizer::printAdjacentModuleNameParts(const char *&First, - const char *const End) { +void Scanner::printAdjacentModuleNameParts(const char *&First, + const char *const End) { // Skip over parts of the body. const char *Last = First; do @@ -498,7 +500,7 @@ void Minimizer::printAdjacentModuleNameParts(const char *&First, First = Last; } -bool Minimizer::printAtImportBody(const char *&First, const char *const End) { +bool Scanner::printAtImportBody(const char *&First, const char *const End) { for (;;) { skipWhitespace(First, End); if (First == End) @@ -523,7 +525,7 @@ bool Minimizer::printAtImportBody(const char *&First, const char *const End) { } } -void Minimizer::printDirectiveBody(const char *&First, const char *const End) { +void Scanner::printDirectiveBody(const char *&First, const char *const End) { skipWhitespace(First, End); // Skip initial whitespace. printToNewline(First, End); while (Out.back() == ' ') @@ -552,8 +554,8 @@ getIdentifierContinuation(const char *First, const char *const End) { return isAsciiIdentifierContinue(First[0]) ? First : nullptr; } -Minimizer::IdInfo Minimizer::lexIdentifier(const char *First, - const char *const End) { +Scanner::IdInfo Scanner::lexIdentifier(const char *First, + const char *const End) { const char *Last = lexRawIdentifier(First, End); const char *Next = getIdentifierContinuation(Last, End); if (LLVM_LIKELY(!Next)) @@ -571,8 +573,8 @@ Minimizer::IdInfo Minimizer::lexIdentifier(const char *First, SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; } -void Minimizer::printAdjacentMacroArgs(const char *&First, - const char *const End) { +void Scanner::printAdjacentMacroArgs(const char *&First, + const char *const End) { // Skip over parts of the body. const char *Last = First; do @@ -583,7 +585,7 @@ void Minimizer::printAdjacentMacroArgs(const char *&First, First = Last; } -bool Minimizer::printMacroArgs(const char *&First, const char *const End) { +bool Scanner::printMacroArgs(const char *&First, const char *const End) { assert(*First == '('); put(*First++); for (;;) { @@ -608,8 +610,8 @@ bool Minimizer::printMacroArgs(const char *&First, const char *const End) { /// /// Updates "First" to just past the next identifier, if any. Returns true iff /// the identifier matches "Id". -bool Minimizer::isNextIdentifier(StringRef Id, const char *&First, - const char *const End) { +bool Scanner::isNextIdentifier(StringRef Id, const char *&First, + const char *const End) { skipWhitespace(First, End); if (First == End || !isAsciiIdentifierStart(*First)) return false; @@ -619,29 +621,29 @@ bool Minimizer::isNextIdentifier(StringRef Id, const char *&First, return FoundId.Name == Id; } -bool Minimizer::lexAt(const char *&First, const char *const End) { +bool Scanner::lexAt(const char *&First, const char *const End) { // Handle "@import". const char *ImportLoc = First++; if (!isNextIdentifier("import", First, End)) { skipLine(First, End); return false; } - makeToken(decl_at_import); + pushDirective(decl_at_import); append("@import "); if (printAtImportBody(First, End)) return reportError( - ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import); + ImportLoc, diag::err_dep_source_scanner_missing_semi_after_at_import); skipWhitespace(First, End); if (First == End) return false; if (!isVerticalWhitespace(*First)) return reportError( - ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import); + ImportLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); skipNewline(First, End); return false; } -bool Minimizer::lexModule(const char *&First, const char *const End) { +bool Scanner::lexModule(const char *&First, const char *const End) { IdInfo Id = lexIdentifier(First, End); First = Id.Last; bool Export = false; @@ -679,14 +681,14 @@ bool Minimizer::lexModule(const char *&First, const char *const End) { } if (Export) { - makeToken(cxx_export_decl); + pushDirective(cxx_export_decl); append("export "); } if (Id.Name == "module") - makeToken(cxx_module_decl); + pushDirective(cxx_module_decl); else - makeToken(cxx_import_decl); + pushDirective(cxx_import_decl); append(Id.Name); append(" "); printToNewline(First, End); @@ -694,8 +696,8 @@ bool Minimizer::lexModule(const char *&First, const char *const End) { return false; } -bool Minimizer::lexDefine(const char *&First, const char *const End) { - makeToken(pp_define); +bool Scanner::lexDefine(const char *&First, const char *const End) { + pushDirective(pp_define); append("#define "); skipWhitespace(First, End); @@ -728,7 +730,7 @@ bool Minimizer::lexDefine(const char *&First, const char *const End) { return false; } -bool Minimizer::lexPragma(const char *&First, const char *const End) { +bool Scanner::lexPragma(const char *&First, const char *const End) { // #pragma. skipWhitespace(First, End); if (First == End || !isAsciiIdentifierStart(*First)) @@ -739,27 +741,27 @@ bool Minimizer::lexPragma(const char *&First, const char *const End) { if (FoundId.Name == "once") { // #pragma once skipLine(First, End); - makeToken(pp_pragma_once); + pushDirective(pp_pragma_once); append("#pragma once\n"); return false; } if (FoundId.Name == "push_macro") { // #pragma push_macro - makeToken(pp_pragma_push_macro); + pushDirective(pp_pragma_push_macro); append("#pragma push_macro"); printDirectiveBody(First, End); return false; } if (FoundId.Name == "pop_macro") { // #pragma pop_macro - makeToken(pp_pragma_pop_macro); + pushDirective(pp_pragma_pop_macro); append("#pragma pop_macro"); printDirectiveBody(First, End); return false; } if (FoundId.Name == "include_alias") { // #pragma include_alias - makeToken(pp_pragma_include_alias); + pushDirective(pp_pragma_include_alias); append("#pragma include_alias"); printDirectiveBody(First, End); return false; @@ -783,16 +785,16 @@ bool Minimizer::lexPragma(const char *&First, const char *const End) { } // #pragma clang module import. - makeToken(pp_pragma_import); + pushDirective(pp_pragma_import); append("#pragma clang module import "); printDirectiveBody(First, End); return false; } -bool Minimizer::lexEndif(const char *&First, const char *const End) { +bool Scanner::lexEndif(const char *&First, const char *const End) { // Strip out "#else" if it's empty. - if (top() == pp_else) - popToken(); + if (topDirective() == pp_else) + popDirective(); // If "#ifdef" is empty, strip it and skip the "#endif". // @@ -800,8 +802,8 @@ bool Minimizer::lexEndif(const char *&First, const char *const End) { // we can skip empty `#if` and `#elif` blocks as well after scanning for a // literal __has_include in the condition. Even without that rule we could // drop the tokens if we scan for identifiers in the condition and find none. - if (top() == pp_ifdef || top() == pp_ifndef) { - popToken(); + if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) { + popDirective(); skipLine(First, End); return false; } @@ -809,9 +811,9 @@ bool Minimizer::lexEndif(const char *&First, const char *const End) { return lexDefault(pp_endif, "endif", First, End); } -bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive, - const char *&First, const char *const End) { - makeToken(Kind); +bool Scanner::lexDefault(DirectiveKind Kind, StringRef Directive, + const char *&First, const char *const End) { + pushDirective(Kind); put('#').append(Directive).put(' '); printDirectiveBody(First, End); return false; @@ -829,7 +831,7 @@ static bool isStartOfRelevantLine(char First) { return false; } -bool Minimizer::lexPPLine(const char *&First, const char *const End) { +bool Scanner::lexPPLine(const char *&First, const char *const End) { assert(First != End); skipWhitespace(First, End); @@ -869,7 +871,7 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) { if (Id.Name == "pragma") return lexPragma(First, End); - auto Kind = llvm::StringSwitch(Id.Name) + auto Kind = llvm::StringSwitch(Id.Name) .Case("include", pp_include) .Case("__include_macros", pp___include_macros) .Case("define", pp_define) @@ -906,7 +908,7 @@ static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { First += 3; } -bool Minimizer::minimizeImpl(const char *First, const char *const End) { +bool Scanner::scanImpl(const char *First, const char *const End) { skipUTF8ByteOrderMark(First, End); while (First != End) if (lexPPLine(First, End)) @@ -914,14 +916,14 @@ bool Minimizer::minimizeImpl(const char *First, const char *const End) { return false; } -bool Minimizer::minimize() { - bool Error = minimizeImpl(Input.begin(), Input.end()); +bool Scanner::scan() { + bool Error = scanImpl(Input.begin(), Input.end()); if (!Error) { // Add a trailing newline and an EOF on success. if (!Out.empty() && Out.back() != '\n') Out.push_back('\n'); - makeToken(pp_eof); + pushDirective(pp_eof); } // Null-terminate the output. This way the memory buffer that's passed to @@ -931,9 +933,9 @@ bool Minimizer::minimize() { return Error; } -bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( - ArrayRef Input, llvm::SmallVectorImpl &Range) { - struct Directive { +bool clang::dependency_directives_scan::computeSkippedRanges( + ArrayRef Input, llvm::SmallVectorImpl &Range) { + struct IfElseDirective { enum DirectiveKind { If, // if/ifdef/ifndef Else // elif/elifdef/elifndef, else @@ -941,13 +943,13 @@ bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( int Offset; DirectiveKind Kind; }; - llvm::SmallVector Offsets; - for (const Token &T : Input) { - switch (T.K) { + llvm::SmallVector Offsets; + for (const Directive &T : Input) { + switch (T.Kind) { case pp_if: case pp_ifdef: case pp_ifndef: - Offsets.push_back({T.Offset, Directive::If}); + Offsets.push_back({T.Offset, IfElseDirective::If}); break; case pp_elif: @@ -958,7 +960,7 @@ bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( return true; int PreviousOffset = Offsets.back().Offset; Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); - Offsets.push_back({T.Offset, Directive::Else}); + Offsets.push_back({T.Offset, IfElseDirective::Else}); break; } @@ -968,8 +970,8 @@ bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( int PreviousOffset = Offsets.back().Offset; Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); do { - Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind; - if (Kind == Directive::If) + IfElseDirective::DirectiveKind Kind = Offsets.pop_back_val().Kind; + if (Kind == IfElseDirective::If) break; } while (!Offsets.empty()); break; @@ -981,11 +983,11 @@ bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( return false; } -bool clang::minimizeSourceToDependencyDirectives( +bool clang::scanSourceForDependencyDirectives( StringRef Input, SmallVectorImpl &Output, - SmallVectorImpl &Tokens, DiagnosticsEngine *Diags, + SmallVectorImpl &Directives, DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) { Output.clear(); - Tokens.clear(); - return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize(); + Directives.clear(); + return Scanner(Output, Directives, Input, Diags, InputSourceLoc).scan(); } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index 5e0130db722fe7..c47f7d068eb7f2 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" -#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/Threading.h" @@ -41,13 +41,13 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { return TentativeEntry(Stat, std::move(Buffer)); } -EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary( +EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary( const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { if (Entry.isError() || Entry.isDirectory() || Disable || - !shouldMinimize(Filename, Entry.getUniqueID())) + !shouldScanForDirectives(Filename, Entry.getUniqueID())) return EntryRef(/*Minimized=*/false, Filename, Entry); - CachedFileContents *Contents = Entry.getContents(); + CachedFileContents *Contents = Entry.getCachedContents(); assert(Contents && "contents not initialized"); // Double-checked locking. @@ -62,9 +62,9 @@ EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary( llvm::SmallString<1024> MinimizedFileContents; // Minimize the file down to directives that might affect the dependencies. - SmallVector Tokens; - if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(), - MinimizedFileContents, Tokens)) { + SmallVector Tokens; + if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), + MinimizedFileContents, Tokens)) { // FIXME: Propagate the diagnostic if desired by the client. // Use the original file if the minimization failed. Contents->MinimizedStorage = @@ -79,10 +79,8 @@ EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary( // Compute the skipped PP ranges that speedup skipping over inactive // preprocessor blocks. - llvm::SmallVector - SkippedRanges; - minimize_source_to_dependency_directives::computeSkippedRanges(Tokens, - SkippedRanges); + llvm::SmallVector SkippedRanges; + dependency_directives_scan::computeSkippedRanges(Tokens, SkippedRanges); PreprocessorSkippedRangeMapping Mapping; for (const auto &Range : SkippedRanges) { if (Range.Length < 16) { @@ -189,7 +187,7 @@ DependencyScanningFilesystemSharedCache::CacheShard:: /// /// This is kinda hacky, it would be better if we knew what kind of file Clang /// was expecting instead. -static bool shouldMinimizeBasedOnExtension(StringRef Filename) { +static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) { StringRef Ext = llvm::sys::path::extension(Filename); if (Ext.empty()) return true; // C++ standard library @@ -207,22 +205,22 @@ static bool shouldCacheStatFailures(StringRef Filename) { if (Ext.empty()) return false; // This may be the module cache directory. // Only cache stat failures on source files. - return shouldMinimizeBasedOnExtension(Filename); + return shouldScanForDirectivesBasedOnExtension(Filename); } -void DependencyScanningWorkerFilesystem::disableMinimization( +void DependencyScanningWorkerFilesystem::disableDirectivesScanning( StringRef Filename) { - // Since we're not done setting up `NotToBeMinimized` yet, we need to disable - // minimization explicitly. - if (llvm::ErrorOr Result = - getOrCreateFileSystemEntry(Filename, /*DisableMinimization=*/true)) - NotToBeMinimized.insert(Result->getStatus().getUniqueID()); + // Since we're not done setting up `NotToBeScanned` yet, we need to disable + // directive scanning explicitly. + if (llvm::ErrorOr Result = getOrCreateFileSystemEntry( + Filename, /*DisableDirectivesScanning=*/true)) + NotToBeScanned.insert(Result->getStatus().getUniqueID()); } -bool DependencyScanningWorkerFilesystem::shouldMinimize( +bool DependencyScanningWorkerFilesystem::shouldScanForDirectives( StringRef Filename, llvm::sys::fs::UniqueID UID) { - return shouldMinimizeBasedOnExtension(Filename) && - !NotToBeMinimized.contains(UID); + return shouldScanForDirectivesBasedOnExtension(Filename) && + !NotToBeScanned.contains(UID); } const CachedFileSystemEntry & @@ -275,14 +273,16 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) { llvm::ErrorOr DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( - StringRef Filename, bool DisableMinimization) { + StringRef Filename, bool DisableDirectivesScanning) { if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename)) - return minimizeIfNecessary(*Entry, Filename, DisableMinimization) + return scanForDirectivesIfNecessary(*Entry, Filename, + DisableDirectivesScanning) .unwrapError(); auto MaybeEntry = computeAndStoreResult(Filename); if (!MaybeEntry) return MaybeEntry.getError(); - return minimizeIfNecessary(*MaybeEntry, Filename, DisableMinimization) + return scanForDirectivesIfNecessary(*MaybeEntry, Filename, + DisableDirectivesScanning) .unwrapError(); } @@ -301,10 +301,10 @@ namespace { /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using /// this subclass. -class MinimizedVFSFile final : public llvm::vfs::File { +class DepScanFile final : public llvm::vfs::File { public: - MinimizedVFSFile(std::unique_ptr Buffer, - llvm::vfs::Status Stat) + DepScanFile(std::unique_ptr Buffer, + llvm::vfs::Status Stat) : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} static llvm::ErrorOr> @@ -328,14 +328,15 @@ class MinimizedVFSFile final : public llvm::vfs::File { } // end anonymous namespace -llvm::ErrorOr> MinimizedVFSFile::create( - EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) { +llvm::ErrorOr> +DepScanFile::create(EntryRef Entry, + ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) { assert(!Entry.isError() && "error"); if (Entry.isDirectory()) return std::make_error_code(std::errc::is_a_directory); - auto Result = std::make_unique( + auto Result = std::make_unique( llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), Entry.getStatus().getName(), /*RequiresNullTerminator=*/false), @@ -357,5 +358,5 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); if (!Result) return Result.getError(); - return MinimizedVFSFile::create(Result.get(), PPSkipMappings); + return DepScanFile::create(Result.get(), PPSkipMappings); } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index a6f078f9496d83..30f5aeae118a63 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -183,19 +183,19 @@ class DependencyScanningAction : public tooling::ToolAction { // Use the dependency scanning optimized file system if requested to do so. if (DepFS) { - DepFS->enableMinimizationOfAllFiles(); + DepFS->enableDirectivesScanningOfAllFiles(); // Don't minimize any files that contributed to prebuilt modules. The // implicit build validates the modules by comparing the reported sizes of // their inputs to the current state of the filesystem. Minimization would // throw this mechanism off. for (const auto &File : PrebuiltModulesInputFiles) - DepFS->disableMinimization(File.getKey()); + DepFS->disableDirectivesScanning(File.getKey()); // Don't minimize any files that were explicitly passed in the build // settings and that might be opened. for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries) - DepFS->disableMinimization(E.Path); + DepFS->disableDirectivesScanning(E.Path); for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles) - DepFS->disableMinimization(F); + DepFS->disableDirectivesScanning(F); // Support for virtual file system overlays on top of the caching // filesystem. @@ -287,7 +287,7 @@ DependencyScanningWorker::DependencyScanningWorker( OverlayFS->pushOverlay(InMemoryFS); RealFS = OverlayFS; - if (Service.getMode() == ScanningMode::MinimizedSourcePreprocessing) + if (Service.getMode() == ScanningMode::DependencyDirectivesScan) DepFS = new DependencyScanningWorkerFilesystem(Service.getSharedCache(), RealFS, PPSkipMappings); if (Service.canReuseFileManager()) diff --git a/clang/test/ClangScanDeps/has_include_if_elif.cpp b/clang/test/ClangScanDeps/has_include_if_elif.cpp index 17eda40c166290..5813bf08482ece 100644 --- a/clang/test/ClangScanDeps/has_include_if_elif.cpp +++ b/clang/test/ClangScanDeps/has_include_if_elif.cpp @@ -10,7 +10,7 @@ // RUN: cp %S/Inputs/header.h %t.dir/Inputs/header4.h // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/has_include_if_elif.json > %t.cdb // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-dependency-directives | \ // RUN: FileCheck %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \ // RUN: FileCheck %s diff --git a/clang/test/ClangScanDeps/macro-expansions.cpp b/clang/test/ClangScanDeps/macro-expansions.cpp new file mode 100644 index 00000000000000..78415d6716c897 --- /dev/null +++ b/clang/test/ClangScanDeps/macro-expansions.cpp @@ -0,0 +1,40 @@ +// This checks that there's no issue with the preprocessor handling user or built-in macro +// expansion during dependency scanning. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json | FileCheck %s + +// CHECK: test.o: +// CHECK-NEXT: test.cpp +// CHECK-NEXT: header1.h +// CHECK-NEXT: header2.h + +//--- cdb.json.template +[{ + "directory" : "DIR", + "command" : "clang -c DIR/test.cpp -o DIR/test.o", + "file" : "DIR/test.o" +}] + +//--- test.cpp +#define FN_MACRO(x) 1 +#if FN_MACRO(a) +#include "header1.h" +#endif + +#if __has_cpp_attribute(clang::fallthrough) +#include "header2.h" +#endif + +//--- header1.h +#ifndef _HEADER1_H_ +#define _HEADER1_H_ +#endif + +//--- header2.h +#ifndef _HEADER2_H_ +#define _HEADER2_H_ +#endif diff --git a/clang/test/ClangScanDeps/modulemap-via-vfs.m b/clang/test/ClangScanDeps/modulemap-via-vfs.m index 7958bb8dbe19bd..c0928909e13126 100644 --- a/clang/test/ClangScanDeps/modulemap-via-vfs.m +++ b/clang/test/ClangScanDeps/modulemap-via-vfs.m @@ -3,7 +3,7 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %t.dir/build/compile-commands.json.in > %t.dir/build/compile-commands.json // RUN: sed -e "s|DIR|%/t.dir|g" %t.dir/build/vfs.yaml.in > %t.dir/build/vfs.yaml // RUN: clang-scan-deps -compilation-database %t.dir/build/compile-commands.json -j 1 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources -generate-modules-path-args > %t.db +// RUN: -mode preprocess-dependency-directives -generate-modules-path-args > %t.db // RUN: %deps-to-rsp %t.db --module-name=A > %t.A.cc1.rsp // RUN: cat %t.A.cc1.rsp | sed 's:\\\\\?:/:g' | FileCheck %s diff --git a/clang/test/ClangScanDeps/modules-fmodule-name-no-module-built.m b/clang/test/ClangScanDeps/modules-fmodule-name-no-module-built.m index 518d7040facd83..52868308743eff 100644 --- a/clang/test/ClangScanDeps/modules-fmodule-name-no-module-built.m +++ b/clang/test/ClangScanDeps/modules-fmodule-name-no-module-built.m @@ -10,7 +10,7 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/module_fmodule_name_cdb.json > %t.cdb // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -format experimental-full \ -// RUN: -generate-modules-path-args -mode preprocess-minimized-sources > %t.result +// RUN: -generate-modules-path-args -mode preprocess-dependency-directives > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK %s #import "header3.h" diff --git a/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp b/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp index f5d957ffc7a745..5a0829e2797043 100644 --- a/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp +++ b/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp @@ -12,11 +12,11 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb_clangcl_by_mod_name.json > %t_clangcl.cdb // // RUN: clang-scan-deps -compilation-database %t.cdb -j 4 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources -module-name=header1 > %t.result +// RUN: -mode preprocess-dependency-directives -module-name=header1 > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK %s // // RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 4 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources -module-name=header1 > %t_clangcl.result +// RUN: -mode preprocess-dependency-directives -module-name=header1 > %t_clangcl.result // RUN: cat %t_clangcl.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK %s // CHECK: { diff --git a/clang/test/ClangScanDeps/modules-full.cpp b/clang/test/ClangScanDeps/modules-full.cpp index e628a5fa7a8396..bce764b8952a36 100644 --- a/clang/test/ClangScanDeps/modules-full.cpp +++ b/clang/test/ClangScanDeps/modules-full.cpp @@ -11,20 +11,20 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb_clangcl.json > %t_clangcl.cdb // // RUN: clang-scan-deps -compilation-database %t.cdb -j 4 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources > %t.result +// RUN: -mode preprocess-dependency-directives > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK,CHECK-NO-ABS %s // // RUN: clang-scan-deps -compilation-database %t.cdb -j 4 -format experimental-full \ -// RUN: -generate-modules-path-args -mode preprocess-minimized-sources > %t.result +// RUN: -generate-modules-path-args -mode preprocess-dependency-directives > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK,CHECK-ABS %s // // RUN: clang-scan-deps -compilation-database %t.cdb -j 4 -format experimental-full \ // RUN: -generate-modules-path-args -module-files-dir %t.dir/custom \ -// RUN: -mode preprocess-minimized-sources > %t.result +// RUN: -mode preprocess-dependency-directives > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK,CHECK-CUSTOM %s // // RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 4 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources > %t_clangcl.result +// RUN: -mode preprocess-dependency-directives > %t_clangcl.result // RUN: cat %t_clangcl.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK,CHECK-NO-ABS %s #include "header.h" diff --git a/clang/test/ClangScanDeps/modules-inferred-explicit-build.m b/clang/test/ClangScanDeps/modules-inferred-explicit-build.m index 28199a873f3763..697e35b637d0ba 100644 --- a/clang/test/ClangScanDeps/modules-inferred-explicit-build.m +++ b/clang/test/ClangScanDeps/modules-inferred-explicit-build.m @@ -6,7 +6,7 @@ // RUN: %S/Inputs/modules_inferred_cdb.json > %t.cdb // // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources -generate-modules-path-args > %t.db +// RUN: -mode preprocess-dependency-directives -generate-modules-path-args > %t.db // RUN: %deps-to-rsp %t.db --module-name=Inferred > %t.inferred.cc1.rsp // RUN: %deps-to-rsp %t.db --module-name=System > %t.system.cc1.rsp // RUN: %deps-to-rsp %t.db --tu-index=0 > %t.tu.rsp diff --git a/clang/test/ClangScanDeps/modules-inferred.m b/clang/test/ClangScanDeps/modules-inferred.m index d1e028d7d6fc45..71b0520ee16fd0 100644 --- a/clang/test/ClangScanDeps/modules-inferred.m +++ b/clang/test/ClangScanDeps/modules-inferred.m @@ -6,7 +6,7 @@ // RUN: %/S/Inputs/modules_inferred_cdb.json > %t.cdb // // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -format experimental-full \ -// RUN: -generate-modules-path-args -mode preprocess-minimized-sources > %t.result +// RUN: -generate-modules-path-args -mode preprocess-dependency-directives > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t.dir -DSOURCEDIR=%/S --check-prefixes=CHECK #include diff --git a/clang/test/ClangScanDeps/modules.cpp b/clang/test/ClangScanDeps/modules.cpp index af7cdcb9d68c24..32b0dac64f93c7 100644 --- a/clang/test/ClangScanDeps/modules.cpp +++ b/clang/test/ClangScanDeps/modules.cpp @@ -13,9 +13,9 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb.json > %t.cdb // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb_clangcl.json > %t_clangcl.cdb // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s // // The output order is non-deterministic when using more than one thread, @@ -23,17 +23,17 @@ // as it might fail if the results for `modules_cdb_input.cpp` are reported before // `modules_cdb_input2.cpp`. // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefix=CHECK1 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefix=CHECK1 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ // RUN: FileCheck --check-prefix=CHECK1 %s // RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess | \ // RUN: FileCheck --check-prefix=CHECK1 %s -// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefix=CHECK2 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefix=CHECK2 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ // RUN: FileCheck --check-prefix=CHECK2 %s diff --git a/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp b/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp index fa906f94df9ea0..67096d394cfb92 100644 --- a/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp +++ b/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp @@ -11,7 +11,7 @@ // RUN: touch %t.dir/Inputs/c_alias.h // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/preprocess_minimized_pragmas_cdb.json > %t.cdb // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-dependency-directives | \ // RUN: FileCheck %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \ // RUN: FileCheck %s diff --git a/clang/test/ClangScanDeps/regular_cdb.cpp b/clang/test/ClangScanDeps/regular_cdb.cpp index 64d54f31578655..2d7edc8c34d5a0 100644 --- a/clang/test/ClangScanDeps/regular_cdb.cpp +++ b/clang/test/ClangScanDeps/regular_cdb.cpp @@ -10,9 +10,9 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb.json > %t.cdb // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb_clangcl.json > %t_clangcl.cdb // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \ @@ -31,9 +31,9 @@ // as it might fail if the results for `regular_cdb_input.cpp` are reported before // `regular_cdb_input2.cpp`. // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefix=CHECK1 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefix=CHECK1 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ @@ -41,9 +41,9 @@ // RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess | \ // RUN: FileCheck --check-prefix=CHECK1 %s -// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefix=CHECK2 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-dependency-directives | \ // RUN: FileCheck --check-prefix=CHECK2 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 2523e24718e00c..c37d098a70672b 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -116,15 +116,15 @@ static llvm::cl::opt ScanMode( "mode", llvm::cl::desc("The preprocessing mode used to compute the dependencies"), llvm::cl::values( - clEnumValN(ScanningMode::MinimizedSourcePreprocessing, - "preprocess-minimized-sources", - "The set of dependencies is computed by preprocessing the " - "source files that were minimized to only include the " - "contents that might affect the dependencies"), + clEnumValN(ScanningMode::DependencyDirectivesScan, + "preprocess-dependency-directives", + "The set of dependencies is computed by preprocessing with " + "special lexing after scanning the source files to get the " + "directives that might affect the dependencies"), clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess", "The set of dependencies is computed by preprocessing the " - "unmodified source files")), - llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing), + "source files")), + llvm::cl::init(ScanningMode::DependencyDirectivesScan), llvm::cl::cat(DependencyScannerCategory)); static llvm::cl::opt Format( diff --git a/clang/unittests/Lex/CMakeLists.txt b/clang/unittests/Lex/CMakeLists.txt index 99024ba896ac8b..7aa7ecf92584f7 100644 --- a/clang/unittests/Lex/CMakeLists.txt +++ b/clang/unittests/Lex/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS ) add_clang_unittest(LexTests - DependencyDirectivesSourceMinimizerTest.cpp + DependencyDirectivesScannerTest.cpp HeaderMapTest.cpp HeaderSearchTest.cpp LexerTest.cpp diff --git a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp similarity index 89% rename from clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp rename to clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index f6ef96eaed5c05..cec28eae0f2b80 100644 --- a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -1,4 +1,4 @@ -//===- unittests/Lex/DependencyDirectivesSourceMinimizer.cpp - -----------===// +//===- unittests/Lex/DependencyDirectivesScannerTest.cpp ------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,45 +6,48 @@ // //===----------------------------------------------------------------------===// -#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/ADT/SmallString.h" #include "gtest/gtest.h" using namespace llvm; using namespace clang; -using namespace clang::minimize_source_to_dependency_directives; +using namespace clang::dependency_directives_scan; -namespace clang { - -bool minimizeSourceToDependencyDirectives(StringRef Input, - SmallVectorImpl &Out) { - SmallVector Tokens; - return minimizeSourceToDependencyDirectives(Input, Out, Tokens); +static bool minimizeSourceToDependencyDirectives(StringRef Input, + SmallVectorImpl &Out) { + SmallVector Directives; + return scanSourceForDependencyDirectives(Input, Out, Directives); } -} // end namespace clang +static bool +minimizeSourceToDependencyDirectives(StringRef Input, + SmallVectorImpl &Out, + SmallVectorImpl &Directives) { + return scanSourceForDependencyDirectives(Input, Out, Directives); +} namespace { TEST(MinimizeSourceToDependencyDirectivesTest, Empty) { SmallVector Out; - SmallVector Tokens; + SmallVector Directives; - ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Directives)); EXPECT_TRUE(Out.empty()); - ASSERT_EQ(1u, Tokens.size()); - ASSERT_EQ(pp_eof, Tokens.back().K); + ASSERT_EQ(1u, Directives.size()); + ASSERT_EQ(pp_eof, Directives.back().Kind); ASSERT_FALSE( - minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens)); + minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Directives)); EXPECT_TRUE(Out.empty()); - ASSERT_EQ(1u, Tokens.size()); - ASSERT_EQ(pp_eof, Tokens.back().K); + ASSERT_EQ(1u, Directives.size()); + ASSERT_EQ(pp_eof, Directives.back().Kind); } -TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { +TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) { SmallVector Out; - SmallVector Tokens; + SmallVector Directives; ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define A\n" @@ -68,41 +71,41 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { "#pragma include_alias(, )\n" "export module m;\n" "import m;\n", - Out, Tokens)); - EXPECT_EQ(pp_define, Tokens[0].K); - EXPECT_EQ(pp_undef, Tokens[1].K); - EXPECT_EQ(pp_endif, Tokens[2].K); - EXPECT_EQ(pp_if, Tokens[3].K); - EXPECT_EQ(pp_ifdef, Tokens[4].K); - EXPECT_EQ(pp_ifndef, Tokens[5].K); - EXPECT_EQ(pp_elifdef, Tokens[6].K); - EXPECT_EQ(pp_elifndef, Tokens[7].K); - EXPECT_EQ(pp_elif, Tokens[8].K); - EXPECT_EQ(pp_else, Tokens[9].K); - EXPECT_EQ(pp_include, Tokens[10].K); - EXPECT_EQ(pp_include_next, Tokens[11].K); - EXPECT_EQ(pp___include_macros, Tokens[12].K); - EXPECT_EQ(pp_import, Tokens[13].K); - EXPECT_EQ(decl_at_import, Tokens[14].K); - EXPECT_EQ(pp_pragma_import, Tokens[15].K); - EXPECT_EQ(pp_pragma_push_macro, Tokens[16].K); - EXPECT_EQ(pp_pragma_pop_macro, Tokens[17].K); - EXPECT_EQ(pp_pragma_include_alias, Tokens[18].K); - EXPECT_EQ(cxx_export_decl, Tokens[19].K); - EXPECT_EQ(cxx_module_decl, Tokens[20].K); - EXPECT_EQ(cxx_import_decl, Tokens[21].K); - EXPECT_EQ(pp_eof, Tokens[22].K); + Out, Directives)); + EXPECT_EQ(pp_define, Directives[0].Kind); + EXPECT_EQ(pp_undef, Directives[1].Kind); + EXPECT_EQ(pp_endif, Directives[2].Kind); + EXPECT_EQ(pp_if, Directives[3].Kind); + EXPECT_EQ(pp_ifdef, Directives[4].Kind); + EXPECT_EQ(pp_ifndef, Directives[5].Kind); + EXPECT_EQ(pp_elifdef, Directives[6].Kind); + EXPECT_EQ(pp_elifndef, Directives[7].Kind); + EXPECT_EQ(pp_elif, Directives[8].Kind); + EXPECT_EQ(pp_else, Directives[9].Kind); + EXPECT_EQ(pp_include, Directives[10].Kind); + EXPECT_EQ(pp_include_next, Directives[11].Kind); + EXPECT_EQ(pp___include_macros, Directives[12].Kind); + EXPECT_EQ(pp_import, Directives[13].Kind); + EXPECT_EQ(decl_at_import, Directives[14].Kind); + EXPECT_EQ(pp_pragma_import, Directives[15].Kind); + EXPECT_EQ(pp_pragma_push_macro, Directives[16].Kind); + EXPECT_EQ(pp_pragma_pop_macro, Directives[17].Kind); + EXPECT_EQ(pp_pragma_include_alias, Directives[18].Kind); + EXPECT_EQ(cxx_export_decl, Directives[19].Kind); + EXPECT_EQ(cxx_module_decl, Directives[20].Kind); + EXPECT_EQ(cxx_import_decl, Directives[21].Kind); + EXPECT_EQ(pp_eof, Directives[22].Kind); } TEST(MinimizeSourceToDependencyDirectivesTest, Define) { SmallVector Out; - SmallVector Tokens; + SmallVector Directives; ASSERT_FALSE( - minimizeSourceToDependencyDirectives("#define MACRO", Out, Tokens)); + minimizeSourceToDependencyDirectives("#define MACRO", Out, Directives)); EXPECT_STREQ("#define MACRO\n", Out.data()); - ASSERT_EQ(2u, Tokens.size()); - ASSERT_EQ(pp_define, Tokens.front().K); + ASSERT_EQ(2u, Directives.size()); + ASSERT_EQ(pp_define, Directives.front().Kind); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineSpacing) { @@ -679,18 +682,17 @@ int z = 128'78; TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) { SmallVector Out; - SmallVector Tokens; + SmallVector Directives; StringRef Source = R"(// comment #pragma once // another comment #include )"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); EXPECT_STREQ("#pragma once\n#include \n", Out.data()); - ASSERT_EQ(Tokens.size(), 3u); - EXPECT_EQ(Tokens[0].K, - minimize_source_to_dependency_directives::pp_pragma_once); + ASSERT_EQ(Directives.size(), 3u); + EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_pragma_once); Source = R"(// comment #pragma once extra tokens @@ -758,7 +760,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) { SmallVector Out; - SmallVector Tokens; + SmallVector Directives; StringRef Source = R"( module; @@ -787,29 +789,28 @@ ort \ import f(->a = 3); } )"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;\n" "export import :l [[rename]];\n" "import <<= 3;\nimport a b d e d e f e;\n" "import foo [[no_unique_address]];\nimport foo();\n" - "import f(:sefse);\nimport f(->a = 3);\n", Out.data()); - ASSERT_EQ(Tokens.size(), 12u); - EXPECT_EQ(Tokens[0].K, - minimize_source_to_dependency_directives::pp_include); - EXPECT_EQ(Tokens[2].K, - minimize_source_to_dependency_directives::cxx_module_decl); + "import f(:sefse);\nimport f(->a = 3);\n", + Out.data()); + ASSERT_EQ(Directives.size(), 12u); + EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_include); + EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::cxx_module_decl); } TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasic) { SmallString<128> Out; - SmallVector Toks; + SmallVector Directives; StringRef Source = "#ifndef GUARD\n" "#define GUARD\n" "void foo();\n" "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Toks)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Toks, Ranges)); + ASSERT_FALSE(computeSkippedRanges(Directives, Ranges)); EXPECT_EQ(Ranges.size(), 1u); EXPECT_EQ(Ranges[0].Offset, 0); EXPECT_EQ(Ranges[0].Length, (int)Out.find("#endif")); @@ -817,7 +818,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasic) { TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasicElifdef) { SmallString<128> Out; - SmallVector Toks; + SmallVector Directives; StringRef Source = "#ifdef BLAH\n" "void skip();\n" "#elifdef BLAM\n" @@ -826,9 +827,9 @@ TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasicElifdef) { "#define GUARD\n" "void foo();\n" "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Toks)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Toks, Ranges)); + ASSERT_FALSE(computeSkippedRanges(Directives, Ranges)); EXPECT_EQ(Ranges.size(), 3u); EXPECT_EQ(Ranges[0].Offset, 0); EXPECT_EQ(Ranges[0].Length, (int)Out.find("#elifdef")); @@ -840,7 +841,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasicElifdef) { TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesNested) { SmallString<128> Out; - SmallVector Toks; + SmallVector Directives; StringRef Source = "#ifndef GUARD\n" "#define GUARD\n" "#if FOO\n" @@ -851,9 +852,9 @@ TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesNested) { "#else\n" "#include nothing\n" "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Toks)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Toks, Ranges)); + ASSERT_FALSE(computeSkippedRanges(Directives, Ranges)); EXPECT_EQ(Ranges.size(), 4u); EXPECT_EQ(Ranges[0].Offset, (int)Out.find("#if FOO")); EXPECT_EQ(Ranges[0].Offset + Ranges[0].Length, (int)Out.find("#elif")); diff --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp index 1c52d28b93241e..bb819d9c59ec16 100644 --- a/clang/unittests/Tooling/DependencyScannerTest.cpp +++ b/clang/unittests/Tooling/DependencyScannerTest.cpp @@ -215,9 +215,9 @@ TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) { ExcludedPreprocessorDirectiveSkipMapping Mappings; DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings); - DepFS.enableMinimizationOfAllFiles(); // Let's be explicit for clarity. + DepFS.enableDirectivesScanningOfAllFiles(); // Let's be explicit for clarity. auto StatusMinimized0 = DepFS.status("/mod.h"); - DepFS.disableMinimization("/mod.h"); + DepFS.disableDirectivesScanning("/mod.h"); auto StatusFull1 = DepFS.status("/mod.h"); EXPECT_TRUE(StatusMinimized0); @@ -238,9 +238,9 @@ TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) { ExcludedPreprocessorDirectiveSkipMapping Mappings; DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings); - DepFS.disableMinimization("/mod.h"); + DepFS.disableDirectivesScanning("/mod.h"); auto StatusFull0 = DepFS.status("/mod.h"); - DepFS.enableMinimizationOfAllFiles(); + DepFS.enableDirectivesScanningOfAllFiles(); auto StatusMinimized1 = DepFS.status("/mod.h"); EXPECT_TRUE(StatusFull0); From b4c83a13f664582015ea22924b9a0c6290d41f5b Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Wed, 11 May 2022 21:56:29 -0700 Subject: [PATCH 628/908] [Tooling/DependencyScanning & Preprocessor] Refactor dependency scanning to produce pre-lexed preprocessor directive tokens, instead of minimized sources This is a commit with the following changes: * Remove `ExcludedPreprocessorDirectiveSkipMapping` and related functionality Removes `ExcludedPreprocessorDirectiveSkipMapping`; its intended benefit for fast skipping of excluded directived blocks will be superseded by a follow-up patch in the series that will use dependency scanning lexing for the same purpose. * Refactor dependency scanning to produce pre-lexed preprocessor directive tokens, instead of minimized sources Replaces the "source minimization" mechanism with a mechanism that produces lexed dependency directives tokens. * Make the special lexing for dependency scanning a first-class feature of the `Preprocessor` and `Lexer` This is bringing the following benefits: * Full access to the preprocessor state during dependency scanning. E.g. a component can see what includes were taken and where they were located in the actual sources. * Improved performance for dependency scanning. Measurements with a release+thin-LTO build shows ~ -11% reduction in wall time. * Opportunity to use dependency scanning lexing to speed-up skipping of excluded conditional blocks during normal preprocessing (as follow-up, not part of this patch). For normal preprocessing measurements show differences are below the noise level. Since, after this change, we don't minimize sources and pass them in place of the real sources, `DependencyScanningFilesystem` is not technically necessary, but it has valuable performance benefits for caching file `stat`s along with the results of scanning the sources. So the setup of using the `DependencyScanningFilesystem` during a dependency scan remains. Differential Revision: https://reviews.llvm.org/D125486 Differential Revision: https://reviews.llvm.org/D125487 Differential Revision: https://reviews.llvm.org/D125488 --- .../clang/Lex/DependencyDirectivesScanner.h | 90 ++- clang/include/clang/Lex/Lexer.h | 35 +- clang/include/clang/Lex/Preprocessor.h | 10 +- ...rExcludedConditionalDirectiveSkipMapping.h | 30 - clang/include/clang/Lex/PreprocessorOptions.h | 20 +- .../DependencyScanningFilesystem.h | 87 +- .../DependencyScanningWorker.h | 2 - clang/lib/Frontend/FrontendActions.cpp | 7 +- clang/lib/Lex/DependencyDirectivesScanner.cpp | 745 ++++++++---------- clang/lib/Lex/Lexer.cpp | 147 +++- clang/lib/Lex/PPDirectives.cpp | 91 +-- clang/lib/Lex/PPLexerChange.cpp | 19 +- clang/lib/Lex/Preprocessor.cpp | 15 +- .../DependencyScanningFilesystem.cpp | 93 +-- .../DependencyScanningWorker.cpp | 40 +- ...dependency_directives_invalid_macro_name.c | 5 +- ..._source_to_dependency_directives_pragmas.c | 2 +- .../Lex/DependencyDirectivesScannerTest.cpp | 221 +++--- .../Tooling/DependencyScannerTest.cpp | 48 -- 19 files changed, 787 insertions(+), 920 deletions(-) delete mode 100644 clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h index b65891c6b8aba1..1ea7e79a0d682c 100644 --- a/clang/include/clang/Lex/DependencyDirectivesScanner.h +++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h @@ -19,15 +19,41 @@ #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" namespace clang { +namespace tok { +enum TokenKind : unsigned short; +} + class DiagnosticsEngine; namespace dependency_directives_scan { +/// Token lexed as part of dependency directive scanning. +struct Token { + /// Offset into the original source input. + unsigned Offset; + unsigned Length; + tok::TokenKind Kind; + unsigned short Flags; + + Token(unsigned Offset, unsigned Length, tok::TokenKind Kind, + unsigned short Flags) + : Offset(Offset), Length(Length), Kind(Kind), Flags(Flags) {} + + unsigned getEnd() const { return Offset + Length; } + + bool is(tok::TokenKind K) const { return Kind == K; } + bool isNot(tok::TokenKind K) const { return Kind != K; } + bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { + return is(K1) || is(K2); + } + template bool isOneOf(tok::TokenKind K1, Ts... Ks) const { + return is(K1) || isOneOf(Ks...); + } +}; + /// Represents the kind of preprocessor directive or a module declaration that /// is tracked by the scanner in its token output. enum DirectiveKind : uint8_t { @@ -52,9 +78,10 @@ enum DirectiveKind : uint8_t { pp_else, pp_endif, decl_at_import, - cxx_export_decl, cxx_module_decl, cxx_import_decl, + cxx_export_module_decl, + cxx_export_import_decl, pp_eof, }; @@ -62,53 +89,48 @@ enum DirectiveKind : uint8_t { /// scanning. It's used to track various preprocessor directives that could /// potentially have an effect on the depedencies. struct Directive { + ArrayRef Tokens; + /// The kind of token. DirectiveKind Kind = pp_none; - /// Offset into the output byte stream of where the directive begins. - int Offset = -1; - - Directive(DirectiveKind K, int Offset) : Kind(K), Offset(Offset) {} -}; - -/// Simplified token range to track the range of a potentially skippable PP -/// directive. -struct SkippedRange { - /// Offset into the output byte stream of where the skipped directive begins. - int Offset; - - /// The number of bytes that can be skipped before the preprocessing must - /// resume. - int Length; + Directive() = default; + Directive(DirectiveKind K, ArrayRef Tokens) + : Tokens(Tokens), Kind(K) {} }; -/// Computes the potential source ranges that can be skipped by the preprocessor -/// when skipping a directive like #if, #ifdef or #elsif. -/// -/// \returns false on success, true on error. -bool computeSkippedRanges(ArrayRef Input, - llvm::SmallVectorImpl &Range); - } // end namespace dependency_directives_scan -/// Minimize the input down to the preprocessor directives that might have +/// Scan the input for the preprocessor directives that might have /// an effect on the dependencies for a compilation unit. /// -/// This function deletes all non-preprocessor code, and strips anything that -/// can't affect what gets included. It canonicalizes whitespace where -/// convenient to stabilize the output against formatting changes in the input. -/// -/// Clears the output vectors at the beginning of the call. +/// This function ignores all non-preprocessor code and anything that +/// can't affect what gets included. /// /// \returns false on success, true on error. If the diagnostic engine is not /// null, an appropriate error is reported using the given input location -/// with the offset that corresponds to the minimizer's current buffer offset. +/// with the offset that corresponds to the \p Input buffer offset. bool scanSourceForDependencyDirectives( - llvm::StringRef Input, llvm::SmallVectorImpl &Output, - llvm::SmallVectorImpl &Directives, + StringRef Input, SmallVectorImpl &Tokens, + SmallVectorImpl &Directives, DiagnosticsEngine *Diags = nullptr, SourceLocation InputSourceLoc = SourceLocation()); +/// Print the previously scanned dependency directives as minimized source text. +/// +/// \param Source The original source text that the dependency directives were +/// scanned from. +/// \param Directives The previously scanned dependency +/// directives. +/// \param OS the stream to print the dependency directives on. +/// +/// This is used primarily for testing purposes, during dependency scanning the +/// \p Lexer uses the tokens directly, not their printed version. +void printDependencyDirectivesAsSource( + StringRef Source, + ArrayRef Directives, + llvm::raw_ostream &OS); + } // end namespace clang #endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 76612184bdffa3..d04b3329346555 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -16,6 +16,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "clang/Lex/PreprocessorLexer.h" #include "clang/Lex/Token.h" #include "llvm/ADT/Optional.h" @@ -149,6 +150,13 @@ class Lexer : public PreprocessorLexer { // CurrentConflictMarkerState - The kind of conflict marker we are handling. ConflictMarkerKind CurrentConflictMarkerState; + /// Non-empty if this \p Lexer is \p isDependencyDirectivesLexer(). + ArrayRef DepDirectives; + + /// If this \p Lexer is \p isDependencyDirectivesLexer(), it represents the + /// next token to use from the current dependency directive. + unsigned NextDepDirectiveTokenIndex = 0; + void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd); public: @@ -195,6 +203,23 @@ class Lexer : public PreprocessorLexer { /// return the tok::eof token. This implicitly involves the preprocessor. bool Lex(Token &Result); + /// Called when the preprocessor is in 'dependency scanning lexing mode'. + bool LexDependencyDirectiveToken(Token &Result); + + /// Called when the preprocessor is in 'dependency scanning lexing mode' and + /// is skipping a conditional block. + bool LexDependencyDirectiveTokenWhileSkipping(Token &Result); + + /// True when the preprocessor is in 'dependency scanning lexing mode' and + /// created this \p Lexer for lexing a set of dependency directive tokens. + bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); } + + /// Initializes \p Result with data from \p DDTok and advances \p BufferPtr to + /// the position just after the token. + /// \returns the buffer pointer at the beginning of the token. + const char *convertDependencyDirectiveToken( + const dependency_directives_scan::Token &DDTok, Token &Result); + public: /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma. bool isPragmaLexer() const { return Is_PragmaLexer; } @@ -288,14 +313,8 @@ class Lexer : public PreprocessorLexer { return BufferPtr - BufferStart; } - /// Skip over \p NumBytes bytes. - /// - /// If the skip is successful, the next token will be lexed from the new - /// offset. The lexer also assumes that we skipped to the start of the line. - /// - /// \returns true if the skip failed (new offset would have been past the - /// end of the buffer), false otherwise. - bool skipOver(unsigned NumBytes); + /// Set the lexer's buffer pointer to \p Offset. + void seek(unsigned Offset, bool IsAtStartOfLine); /// Stringify - Convert the specified string into a C string by i) escaping /// '\\' and " characters and ii) replacing newline character(s) with "\\n". diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index d394ad33c23726..6d8f03e3ceb1b2 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -29,7 +29,6 @@ #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PPCallbacks.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" #include "llvm/ADT/ArrayRef.h" @@ -558,6 +557,7 @@ class Preprocessor { CLK_Lexer, CLK_TokenLexer, CLK_CachingLexer, + CLK_DependencyDirectivesLexer, CLK_LexAfterModuleImport } CurLexerKind = CLK_Lexer; @@ -2595,14 +2595,6 @@ class Preprocessor { void emitMacroDeprecationWarning(const Token &Identifier) const; void emitRestrictExpansionWarning(const Token &Identifier) const; void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const; - - Optional - getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc); - - /// Contains the currently active skipped range mappings for skipping excluded - /// conditional directives. - ExcludedPreprocessorDirectiveSkipMapping - *ExcludedConditionalDirectiveSkipMappings; }; /// Abstract base class that describes a handler that will receive diff --git a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h b/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h deleted file mode 100644 index 49687cb5cc8527..00000000000000 --- a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h +++ /dev/null @@ -1,30 +0,0 @@ -//===- PreprocessorExcludedConditionalDirectiveSkipMapping.h - --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H -#define LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H - -#include "clang/Basic/LLVM.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/MemoryBuffer.h" - -namespace clang { - -/// A mapping from an offset into a buffer to the number of bytes that can be -/// skipped by the preprocessor when skipping over excluded conditional -/// directive ranges. -using PreprocessorSkippedRangeMapping = llvm::DenseMap; - -/// The datastructure that holds the mapping between the active memory buffers -/// and the individual skip mappings. -using ExcludedPreprocessorDirectiveSkipMapping = - llvm::DenseMap; - -} // end namespace clang - -#endif // LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index dc5382ddc7432b..4cf18e98f051f2 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -10,8 +10,9 @@ #define LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_ #include "clang/Basic/BitmaskEnum.h" +#include "clang/Basic/FileEntry.h" #include "clang/Basic/LLVM.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include @@ -200,13 +201,18 @@ class PreprocessorOptions { /// build it again. std::shared_ptr FailedModules; - /// Contains the currently active skipped range mappings for skipping excluded - /// conditional directives. + /// Function for getting the dependency preprocessor directives of a file. /// - /// The pointer is passed to the Preprocessor when it's constructed. The - /// pointer is unowned, the client is responsible for its lifetime. - ExcludedPreprocessorDirectiveSkipMapping - *ExcludedConditionalDirectiveSkipMappings = nullptr; + /// These are directives derived from a special form of lexing where the + /// source input is scanned for the preprocessor directives that might have an + /// effect on the dependencies for a compilation unit. + /// + /// Enables a client to cache the directives for a file and provide them + /// across multiple compiler invocations. + /// FIXME: Allow returning an error. + std::function>( + FileEntryRef)> + DependencyDirectivesForFile; /// Set up preprocessor for RunAnalysis action. bool SetUpStaticAnalyzer = false; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index c5f239a2e4bff7..e0d16df92e1a4a 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -10,7 +10,7 @@ #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H #include "clang/Basic/LLVM.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Allocator.h" @@ -22,24 +22,26 @@ namespace clang { namespace tooling { namespace dependencies { -/// Original and minimized contents of a cached file entry. Single instance can +using DependencyDirectivesTy = + SmallVector; + +/// Contents and directive tokens of a cached file entry. Single instance can /// be shared between multiple entries. struct CachedFileContents { - CachedFileContents(std::unique_ptr Original) - : Original(std::move(Original)), MinimizedAccess(nullptr) {} + CachedFileContents(std::unique_ptr Contents) + : Original(std::move(Contents)), DepDirectives(nullptr) {} /// Owning storage for the original contents. std::unique_ptr Original; /// The mutex that must be locked before mutating directive tokens. std::mutex ValueLock; - /// Owning storage for the minimized contents. - std::unique_ptr MinimizedStorage; + SmallVector DepDirectiveTokens; /// Accessor to the directive tokens that's atomic to avoid data races. - std::atomic MinimizedAccess; - /// Skipped range mapping of the minimized contents. - /// This is initialized iff `MinimizedAccess != nullptr`. - PreprocessorSkippedRangeMapping PPSkippedRangeMapping; + /// \p CachedFileContents has ownership of the pointer. + std::atomic *> DepDirectives; + + ~CachedFileContents() { delete DepDirectives.load(); } }; /// An in-memory representation of a file system entity that is of interest to @@ -86,13 +88,17 @@ class CachedFileSystemEntry { /// \returns The scanned preprocessor directive tokens of the file that are /// used to speed up preprocessing, if available. - StringRef getDirectiveTokens() const { + Optional> + getDirectiveTokens() const { assert(!isError() && "error"); - assert(!MaybeStat->isDirectory() && "not a file"); + assert(!isDirectory() && "not a file"); assert(Contents && "contents not initialized"); - llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load(); - assert(Buffer && "not minimized"); - return Buffer->getBuffer(); + if (auto *Directives = Contents->DepDirectives.load()) { + if (Directives->hasValue()) + return ArrayRef( + Directives->getValue()); + } + return None; } /// \returns The error. @@ -111,15 +117,6 @@ class CachedFileSystemEntry { return MaybeStat->getUniqueID(); } - /// \returns The mapping between location -> distance that is used to speed up - /// the block skipping in the preprocessor. - const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const { - assert(!isError() && "error"); - assert(!isDirectory() && "not a file"); - assert(Contents && "contents not initialized"); - return Contents->PPSkippedRangeMapping; - } - /// \returns The data structure holding both contents and directive tokens. CachedFileContents *getCachedContents() const { assert(!isError() && "error"); @@ -237,10 +234,6 @@ class DependencyScanningFilesystemLocalCache { /// If the underlying entry is an opened file, this wrapper returns the file /// contents and the scanned preprocessor directives. class EntryRef { - /// For entry that is an opened file, this bit signifies whether its contents - /// are minimized. - bool Minimized; - /// The filename used to access this entry. std::string Filename; @@ -248,8 +241,8 @@ class EntryRef { const CachedFileSystemEntry &Entry; public: - EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry) - : Minimized(Minimized), Filename(Name), Entry(Entry) {} + EntryRef(StringRef Name, const CachedFileSystemEntry &Entry) + : Filename(Name), Entry(Entry) {} llvm::vfs::Status getStatus() const { llvm::vfs::Status Stat = Entry.getStatus(); @@ -268,12 +261,11 @@ class EntryRef { return *this; } - StringRef getContents() const { - return Minimized ? Entry.getDirectiveTokens() : Entry.getOriginalContents(); - } + StringRef getContents() const { return Entry.getOriginalContents(); } - const PreprocessorSkippedRangeMapping *getPPSkippedRangeMapping() const { - return Minimized ? &Entry.getPPSkippedRangeMapping() : nullptr; + Optional> + getDirectiveTokens() const { + return Entry.getDirectiveTokens(); } }; @@ -290,24 +282,13 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { public: DependencyScanningWorkerFilesystem( DependencyScanningFilesystemSharedCache &SharedCache, - IntrusiveRefCntPtr FS, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) - : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache), - PPSkipMappings(PPSkipMappings) {} + IntrusiveRefCntPtr FS) + : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {} llvm::ErrorOr status(const Twine &Path) override; llvm::ErrorOr> openFileForRead(const Twine &Path) override; - /// Disable directives scanning of the given file. - void disableDirectivesScanning(StringRef Filename); - /// Enable directives scanning of all files. - void enableDirectivesScanningOfAllFiles() { NotToBeScanned.clear(); } - -private: - /// Check whether the file should be scanned for preprocessor directives. - bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID); - /// Returns entry for the given filename. /// /// Attempts to use the local and shared caches first, then falls back to @@ -316,6 +297,10 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { getOrCreateFileSystemEntry(StringRef Filename, bool DisableDirectivesScanning = false); +private: + /// Check whether the file should be scanned for preprocessor directives. + bool shouldScanForDirectives(StringRef Filename); + /// For a filename that's not yet associated with any entry in the caches, /// uses the underlying filesystem to either look up the entry based in the /// shared cache indexed by unique ID, or creates new entry from scratch. @@ -396,12 +381,6 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { /// The local cache is used by the worker thread to cache file system queries /// locally instead of querying the global cache every time. DependencyScanningFilesystemLocalCache LocalCache; - /// The mapping structure which records information about the - /// excluded conditional directive skip mappings that are used by the - /// currently active preprocessor. - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings; - /// The set of files that should not be scanned for PP directives. - llvm::DenseSet NotToBeScanned; }; } // end namespace dependencies diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h index 84c74ac66359d2..337bba2e72da94 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -13,7 +13,6 @@ #include "clang/Basic/FileManager.h" #include "clang/Basic/LLVM.h" #include "clang/Frontend/PCHContainerOperations.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" #include "llvm/Support/Error.h" @@ -69,7 +68,6 @@ class DependencyScanningWorker { private: std::shared_ptr PCHContainerOps; - ExcludedPreprocessorDirectiveSkipMapping PPSkipMappings; /// The physical filesystem overlaid by `InMemoryFS`. llvm::IntrusiveRefCntPtr RealFS; diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index ba492c29d9c02b..f61c83a2a465e7 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -1157,10 +1157,10 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() { SourceManager &SM = CI.getPreprocessor().getSourceManager(); llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(SM.getMainFileID()); - llvm::SmallString<1024> Output; + llvm::SmallVector Tokens; llvm::SmallVector Directives; if (scanSourceForDependencyDirectives( - FromFile.getBuffer(), Output, Directives, &CI.getDiagnostics(), + FromFile.getBuffer(), Tokens, Directives, &CI.getDiagnostics(), SM.getLocForStartOfFile(SM.getMainFileID()))) { assert(CI.getDiagnostics().hasErrorOccurred() && "no errors reported for failure"); @@ -1179,7 +1179,8 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() { } return; } - llvm::outs() << Output; + printDependencyDirectivesAsSource(FromFile.getBuffer(), Directives, + llvm::outs()); } void GetDependenciesByModuleNameAction::ExecuteAction() { diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index dd1ea1cfe11e04..4c0105269eebd4 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -18,92 +18,148 @@ #include "clang/Basic/CharInfo.h" #include "clang/Basic/Diagnostic.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/MemoryBuffer.h" -using namespace llvm; using namespace clang; using namespace clang::dependency_directives_scan; +using namespace llvm; namespace { -struct Scanner { - /// Minimized output. - SmallVectorImpl &Out; - /// The known tokens encountered during the minimization. - SmallVectorImpl &Directives; +struct DirectiveWithTokens { + DirectiveKind Kind; + unsigned NumTokens; - Scanner(SmallVectorImpl &Out, SmallVectorImpl &Directives, - StringRef Input, DiagnosticsEngine *Diags, - SourceLocation InputSourceLoc) - : Out(Out), Directives(Directives), Input(Input), Diags(Diags), - InputSourceLoc(InputSourceLoc) {} + DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens) + : Kind(Kind), NumTokens(NumTokens) {} +}; + +/// Does an efficient "scan" of the sources to detect the presence of +/// preprocessor (or module import) directives and collects the raw lexed tokens +/// for those directives so that the \p Lexer can "replay" them when the file is +/// included. +/// +/// Note that the behavior of the raw lexer is affected by the language mode, +/// while at this point we want to do a scan and collect tokens once, +/// irrespective of the language mode that the file will get included in. To +/// compensate for that the \p Lexer, while "replaying", will adjust a token +/// where appropriate, when it could affect the preprocessor's state. +/// For example in a directive like +/// +/// \code +/// #if __has_cpp_attribute(clang::fallthrough) +/// \endcode +/// +/// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2 +/// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon' +/// while in C++ mode. +struct Scanner { + Scanner(StringRef Input, + SmallVectorImpl &Tokens, + DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) + : Input(Input), Tokens(Tokens), Diags(Diags), + InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()), + TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(), + Input.end()) {} + + static LangOptions getLangOptsForDepScanning() { + LangOptions LangOpts; + // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. + LangOpts.ObjC = true; + LangOpts.LineComment = true; + return LangOpts; + } /// Lex the provided source and emit the directive tokens. /// /// \returns True on error. - bool scan(); + bool scan(SmallVectorImpl &Directives); private: - struct IdInfo { - const char *Last; - StringRef Name; - }; + /// Lexes next token and advances \p First and the \p Lexer. + LLVM_NODISCARD dependency_directives_scan::Token & + lexToken(const char *&First, const char *const End); + + dependency_directives_scan::Token &lexIncludeFilename(const char *&First, + const char *const End); - /// Lex an identifier. + /// Lexes next token and if it is identifier returns its string, otherwise + /// it skips the current line and returns \p None. /// - /// \pre First points at a valid identifier head. - LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); - LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, - const char *const End); + /// In any case (whatever the token kind) \p First and the \p Lexer will + /// advance beyond the token. + LLVM_NODISCARD Optional + tryLexIdentifierOrSkipLine(const char *&First, const char *const End); + + /// Used when it is certain that next token is an identifier. + LLVM_NODISCARD StringRef lexIdentifier(const char *&First, + const char *const End); + + /// Lexes next token and returns true iff it is an identifier that matches \p + /// Id, otherwise it skips the current line and returns false. + /// + /// In any case (whatever the token kind) \p First and the \p Lexer will + /// advance beyond the token. + LLVM_NODISCARD bool isNextIdentifierOrSkipLine(StringRef Id, + const char *&First, + const char *const End); + LLVM_NODISCARD bool scanImpl(const char *First, const char *const End); LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); - LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First, + const char *const End); LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); - LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, StringRef Directive, - const char *&First, const char *const End); - Directive &pushDirective(DirectiveKind K) { - Directives.emplace_back(K, Out.size()); - return Directives.back(); + LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First, + const char *const End); + LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind, + const char *&First, + const char *const End); + void lexPPDirectiveBody(const char *&First, const char *const End); + + DirectiveWithTokens &pushDirective(DirectiveKind Kind) { + Tokens.append(CurDirToks); + DirsWithToks.emplace_back(Kind, CurDirToks.size()); + CurDirToks.clear(); + return DirsWithToks.back(); } void popDirective() { - Out.resize(Directives.back().Offset); - Directives.pop_back(); + Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens); } DirectiveKind topDirective() const { - return Directives.empty() ? pp_none : Directives.back().Kind; + return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind; } - Scanner &put(char Byte) { - Out.push_back(Byte); - return *this; - } - Scanner &append(StringRef S) { return append(S.begin(), S.end()); } - Scanner &append(const char *First, const char *Last) { - Out.append(First, Last); - return *this; + unsigned getOffsetAt(const char *CurPtr) const { + return CurPtr - Input.data(); } - void printToNewline(const char *&First, const char *const End); - void printAdjacentModuleNameParts(const char *&First, const char *const End); - LLVM_NODISCARD bool printAtImportBody(const char *&First, - const char *const End); - void printDirectiveBody(const char *&First, const char *const End); - void printAdjacentMacroArgs(const char *&First, const char *const End); - LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); - /// Reports a diagnostic if the diagnostic engine is provided. Always returns /// true at the end. bool reportError(const char *CurPtr, unsigned Err); StringMap SplitIds; StringRef Input; + SmallVectorImpl &Tokens; DiagnosticsEngine *Diags; SourceLocation InputSourceLoc; + + /// Keeps track of the tokens for the currently lexed directive. Once a + /// directive is fully lexed and "committed" then the tokens get appended to + /// \p Tokens and \p CurDirToks is cleared for the next directive. + SmallVector CurDirToks; + /// The directives that were lexed along with the number of tokens that each + /// directive contains. The tokens of all the directives are kept in \p Tokens + /// vector, in the same order as the directives order in \p DirsWithToks. + SmallVector DirsWithToks; + LangOptions LangOpts; + Lexer TheLexer; }; } // end anonymous namespace @@ -112,7 +168,7 @@ bool Scanner::reportError(const char *CurPtr, unsigned Err) { if (!Diags) return true; assert(CurPtr >= Input.data() && "invalid buffer ptr"); - Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err); + Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err); return true; } @@ -265,30 +321,6 @@ static void skipToNewlineRaw(const char *&First, const char *const End) { } } -static const char *findLastNonSpace(const char *First, const char *Last) { - assert(First <= Last); - while (First != Last && isHorizontalWhitespace(Last[-1])) - --Last; - return Last; -} - -static const char *findLastNonSpaceNonBackslash(const char *First, - const char *Last) { - assert(First <= Last); - while (First != Last && - (isHorizontalWhitespace(Last[-1]) || Last[-1] == '\\')) - --Last; - return Last; -} - -static const char *findFirstTrailingSpace(const char *First, const char *Last) { - const char *LastNonSpace = findLastNonSpace(First, Last); - if (Last == LastNonSpace) - return Last; - assert(isHorizontalWhitespace(LastNonSpace[0])); - return LastNonSpace + 1; -} - static void skipLineComment(const char *&First, const char *const End) { assert(First[0] == '/' && First[1] == '/'); First += 2; @@ -396,67 +428,6 @@ static void skipDirective(StringRef Name, const char *&First, skipLine(First, End); } -void Scanner::printToNewline(const char *&First, const char *const End) { - while (First != End && !isVerticalWhitespace(*First)) { - const char *Last = First; - do { - // Iterate over strings correctly to avoid comments and newlines. - if (*Last == '"' || *Last == '\'' || - (*Last == '<' && - (topDirective() == pp_include || topDirective() == pp_import))) { - if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) - skipRawString(Last, End); - else - skipString(Last, End); - continue; - } - if (*Last != '/' || End - Last < 2) { - ++Last; - continue; // Gather the rest up to print verbatim. - } - - if (Last[1] != '/' && Last[1] != '*') { - ++Last; - continue; - } - - // Deal with "//..." and "/*...*/". - append(First, findFirstTrailingSpace(First, Last)); - First = Last; - - if (Last[1] == '/') { - skipLineComment(First, End); - return; - } - - put(' '); - skipBlockComment(First, End); - skipOverSpaces(First, End); - Last = First; - } while (Last != End && !isVerticalWhitespace(*Last)); - - // Print out the string. - const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last); - if (Last == End || LastBeforeTrailingSpace == First || - LastBeforeTrailingSpace[-1] != '\\') { - append(First, LastBeforeTrailingSpace); - First = Last; - skipNewline(First, End); - return; - } - - // Print up to the last character that's not a whitespace or backslash. - // Then print exactly one space, which matters when tokens are separated by - // a line continuation. - append(First, findLastNonSpaceNonBackslash(First, Last)); - put(' '); - - First = Last; - skipNewline(First, End); - skipOverSpaces(First, End); - } -} - static void skipWhitespace(const char *&First, const char *const End) { for (;;) { assert(First <= End); @@ -489,176 +460,134 @@ static void skipWhitespace(const char *&First, const char *const End) { } } -void Scanner::printAdjacentModuleNameParts(const char *&First, - const char *const End) { - // Skip over parts of the body. - const char *Last = First; - do - ++Last; - while (Last != End && (isAsciiIdentifierContinue(*Last) || *Last == '.')); - append(First, Last); - First = Last; -} - -bool Scanner::printAtImportBody(const char *&First, const char *const End) { +bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, + const char *const End) { + const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; for (;;) { - skipWhitespace(First, End); - if (First == End) - return true; - - if (isVerticalWhitespace(*First)) { - skipNewline(First, End); - continue; - } - - // Found a semicolon. - if (*First == ';') { - put(*First++).put('\n'); - return false; - } - - // Don't handle macro expansions inside @import for now. - if (!isAsciiIdentifierContinue(*First) && *First != '.') - return true; - - printAdjacentModuleNameParts(First, End); + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.is(tok::eof)) + return reportError( + DirectiveLoc, + diag::err_dep_source_scanner_missing_semi_after_at_import); + if (Tok.is(tok::semi)) + break; } + pushDirective(Kind); + skipWhitespace(First, End); + if (First == End) + return false; + if (!isVerticalWhitespace(*First)) + return reportError( + DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); + skipNewline(First, End); + return false; } -void Scanner::printDirectiveBody(const char *&First, const char *const End) { - skipWhitespace(First, End); // Skip initial whitespace. - printToNewline(First, End); - while (Out.back() == ' ') - Out.pop_back(); - put('\n'); -} +dependency_directives_scan::Token &Scanner::lexToken(const char *&First, + const char *const End) { + clang::Token Tok; + TheLexer.LexFromRawLexer(Tok); + First = Input.data() + TheLexer.getCurrentBufferOffset(); + assert(First <= End); -LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, - const char *const End) { - assert(isAsciiIdentifierContinue(*First) && "invalid identifer"); - const char *Last = First + 1; - while (Last != End && isAsciiIdentifierContinue(*Last)) - ++Last; - return Last; + unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); + CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), + Tok.getFlags()); + return CurDirToks.back(); } -LLVM_NODISCARD static const char * -getIdentifierContinuation(const char *First, const char *const End) { - if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) - return nullptr; +dependency_directives_scan::Token & +Scanner::lexIncludeFilename(const char *&First, const char *const End) { + clang::Token Tok; + TheLexer.LexIncludeFilename(Tok); + First = Input.data() + TheLexer.getCurrentBufferOffset(); + assert(First <= End); - ++First; - skipNewline(First, End); - if (First == End) - return nullptr; - return isAsciiIdentifierContinue(First[0]) ? First : nullptr; -} - -Scanner::IdInfo Scanner::lexIdentifier(const char *First, - const char *const End) { - const char *Last = lexRawIdentifier(First, End); - const char *Next = getIdentifierContinuation(Last, End); - if (LLVM_LIKELY(!Next)) - return IdInfo{Last, StringRef(First, Last - First)}; - - // Slow path, where identifiers are split over lines. - SmallVector Id(First, Last); - while (Next) { - Last = lexRawIdentifier(Next, End); - Id.append(Next, Last); - Next = getIdentifierContinuation(Last, End); - } - return IdInfo{ - Last, - SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; + unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); + CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), + Tok.getFlags()); + return CurDirToks.back(); } -void Scanner::printAdjacentMacroArgs(const char *&First, - const char *const End) { - // Skip over parts of the body. - const char *Last = First; - do - ++Last; - while (Last != End && - (isAsciiIdentifierContinue(*Last) || *Last == '.' || *Last == ',')); - append(First, Last); - First = Last; +void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { + while (true) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.is(tok::eod)) + break; + } } -bool Scanner::printMacroArgs(const char *&First, const char *const End) { - assert(*First == '('); - put(*First++); - for (;;) { - skipWhitespace(First, End); - if (First == End) - return true; +LLVM_NODISCARD Optional +Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.isNot(tok::raw_identifier)) { + if (!Tok.is(tok::eod)) + skipLine(First, End); + return None; + } - if (*First == ')') { - put(*First++); - return false; - } + bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; + if (LLVM_LIKELY(!NeedsCleaning)) + return Input.slice(Tok.Offset, Tok.getEnd()); - // This is intentionally fairly liberal. - if (!(isAsciiIdentifierContinue(*First) || *First == '.' || *First == ',')) - return true; + SmallString<64> Spelling; + Spelling.resize(Tok.Length); - printAdjacentMacroArgs(First, End); + unsigned SpellingLength = 0; + const char *BufPtr = Input.begin() + Tok.Offset; + const char *AfterIdent = Input.begin() + Tok.getEnd(); + while (BufPtr < AfterIdent) { + unsigned Size; + Spelling[SpellingLength++] = + Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); + BufPtr += Size; } + + return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0) + .first->first(); } -/// Looks for an identifier starting from Last. -/// -/// Updates "First" to just past the next identifier, if any. Returns true iff -/// the identifier matches "Id". -bool Scanner::isNextIdentifier(StringRef Id, const char *&First, - const char *const End) { - skipWhitespace(First, End); - if (First == End || !isAsciiIdentifierStart(*First)) - return false; +StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { + Optional Id = tryLexIdentifierOrSkipLine(First, End); + assert(Id.hasValue() && "expected identifier token"); + return Id.getValue(); +} - IdInfo FoundId = lexIdentifier(First, End); - First = FoundId.Last; - return FoundId.Name == Id; +bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, + const char *const End) { + if (Optional FoundId = tryLexIdentifierOrSkipLine(First, End)) { + if (*FoundId == Id) + return true; + skipLine(First, End); + } + return false; } bool Scanner::lexAt(const char *&First, const char *const End) { // Handle "@import". - const char *ImportLoc = First++; - if (!isNextIdentifier("import", First, End)) { - skipLine(First, End); - return false; - } - pushDirective(decl_at_import); - append("@import "); - if (printAtImportBody(First, End)) - return reportError( - ImportLoc, diag::err_dep_source_scanner_missing_semi_after_at_import); - skipWhitespace(First, End); - if (First == End) + + // Lex '@'. + const dependency_directives_scan::Token &AtTok = lexToken(First, End); + assert(AtTok.is(tok::at)); + (void)AtTok; + + if (!isNextIdentifierOrSkipLine("import", First, End)) return false; - if (!isVerticalWhitespace(*First)) - return reportError( - ImportLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); - skipNewline(First, End); - return false; + return lexModuleDirectiveBody(decl_at_import, First, End); } bool Scanner::lexModule(const char *&First, const char *const End) { - IdInfo Id = lexIdentifier(First, End); - First = Id.Last; + StringRef Id = lexIdentifier(First, End); bool Export = false; - if (Id.Name == "export") { + if (Id == "export") { Export = true; - skipWhitespace(First, End); - if (!isAsciiIdentifierContinue(*First)) { - skipLine(First, End); + Optional NextId = tryLexIdentifierOrSkipLine(First, End); + if (!NextId) return false; - } - Id = lexIdentifier(First, End); - First = Id.Last; + Id = *NextId; } - if (Id.Name != "module" && Id.Name != "import") { + if (Id != "module" && Id != "import") { skipLine(First, End); return false; } @@ -680,114 +609,51 @@ bool Scanner::lexModule(const char *&First, const char *const End) { } } - if (Export) { - pushDirective(cxx_export_decl); - append("export "); - } + TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); - if (Id.Name == "module") - pushDirective(cxx_module_decl); + DirectiveKind Kind; + if (Id == "module") + Kind = Export ? cxx_export_module_decl : cxx_module_decl; else - pushDirective(cxx_import_decl); - append(Id.Name); - append(" "); - printToNewline(First, End); - append("\n"); - return false; -} - -bool Scanner::lexDefine(const char *&First, const char *const End) { - pushDirective(pp_define); - append("#define "); - skipWhitespace(First, End); + Kind = Export ? cxx_export_import_decl : cxx_import_decl; - if (!isAsciiIdentifierStart(*First)) - return reportError(First, diag::err_pp_macro_not_identifier); - - IdInfo Id = lexIdentifier(First, End); - const char *Last = Id.Last; - append(Id.Name); - if (Last == End) - return false; - if (*Last == '(') { - size_t Size = Out.size(); - if (printMacroArgs(Last, End)) { - // Be robust to bad macro arguments, since they can show up in disabled - // code. - Out.resize(Size); - append("(/* invalid */\n"); - skipLine(Last, End); - return false; - } - } - skipWhitespace(Last, End); - if (Last == End) - return false; - if (!isVerticalWhitespace(*Last)) - put(' '); - printDirectiveBody(Last, End); - First = Last; - return false; + return lexModuleDirectiveBody(Kind, First, End); } bool Scanner::lexPragma(const char *&First, const char *const End) { - // #pragma. - skipWhitespace(First, End); - if (First == End || !isAsciiIdentifierStart(*First)) + Optional FoundId = tryLexIdentifierOrSkipLine(First, End); + if (!FoundId) return false; - IdInfo FoundId = lexIdentifier(First, End); - First = FoundId.Last; - if (FoundId.Name == "once") { - // #pragma once - skipLine(First, End); - pushDirective(pp_pragma_once); - append("#pragma once\n"); - return false; - } - if (FoundId.Name == "push_macro") { - // #pragma push_macro - pushDirective(pp_pragma_push_macro); - append("#pragma push_macro"); - printDirectiveBody(First, End); - return false; - } - if (FoundId.Name == "pop_macro") { - // #pragma pop_macro - pushDirective(pp_pragma_pop_macro); - append("#pragma pop_macro"); - printDirectiveBody(First, End); - return false; - } - if (FoundId.Name == "include_alias") { - // #pragma include_alias - pushDirective(pp_pragma_include_alias); - append("#pragma include_alias"); - printDirectiveBody(First, End); + StringRef Id = FoundId.getValue(); + auto Kind = llvm::StringSwitch(Id) + .Case("once", pp_pragma_once) + .Case("push_macro", pp_pragma_push_macro) + .Case("pop_macro", pp_pragma_pop_macro) + .Case("include_alias", pp_pragma_include_alias) + .Default(pp_none); + if (Kind != pp_none) { + lexPPDirectiveBody(First, End); + pushDirective(Kind); return false; } - if (FoundId.Name != "clang") { + if (Id != "clang") { skipLine(First, End); return false; } // #pragma clang. - if (!isNextIdentifier("module", First, End)) { - skipLine(First, End); + if (!isNextIdentifierOrSkipLine("module", First, End)) return false; - } // #pragma clang module. - if (!isNextIdentifier("import", First, End)) { - skipLine(First, End); + if (!isNextIdentifierOrSkipLine("import", First, End)) return false; - } // #pragma clang module import. + lexPPDirectiveBody(First, End); pushDirective(pp_pragma_import); - append("#pragma clang module import "); - printDirectiveBody(First, End); return false; } @@ -808,14 +674,13 @@ bool Scanner::lexEndif(const char *&First, const char *const End) { return false; } - return lexDefault(pp_endif, "endif", First, End); + return lexDefault(pp_endif, First, End); } -bool Scanner::lexDefault(DirectiveKind Kind, StringRef Directive, - const char *&First, const char *const End) { +bool Scanner::lexDefault(DirectiveKind Kind, const char *&First, + const char *const End) { + lexPPDirectiveBody(First, End); pushDirective(Kind); - put('#').append(Directive).put(' '); - printDirectiveBody(First, End); return false; } @@ -845,6 +710,14 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { return false; } + TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); + + auto ScEx1 = make_scope_exit([&]() { + /// Clear Scanner's CurDirToks before returning, in case we didn't push a + /// new directive. + CurDirToks.clear(); + }); + // Handle "@import". if (*First == '@') return lexAt(First, End); @@ -853,25 +726,26 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { return lexModule(First, End); // Handle preprocessing directives. - ++First; // Skip over '#'. - skipWhitespace(First, End); - if (First == End) - return reportError(First, diag::err_pp_expected_eol); + TheLexer.setParsingPreprocessorDirective(true); + auto ScEx2 = make_scope_exit( + [&]() { TheLexer.setParsingPreprocessorDirective(false); }); - if (!isAsciiIdentifierStart(*First)) { - skipLine(First, End); + // Lex '#'. + const dependency_directives_scan::Token &HashTok = lexToken(First, End); + assert(HashTok.is(tok::hash)); + (void)HashTok; + + Optional FoundId = tryLexIdentifierOrSkipLine(First, End); + if (!FoundId) return false; - } - // Figure out the token. - IdInfo Id = lexIdentifier(First, End); - First = Id.Last; + StringRef Id = FoundId.getValue(); - if (Id.Name == "pragma") + if (Id == "pragma") return lexPragma(First, End); - auto Kind = llvm::StringSwitch(Id.Name) + auto Kind = llvm::StringSwitch(Id) .Case("include", pp_include) .Case("__include_macros", pp___include_macros) .Case("define", pp_define) @@ -888,18 +762,26 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { .Case("endif", pp_endif) .Default(pp_none); if (Kind == pp_none) { - skipDirective(Id.Name, First, End); + skipDirective(Id, First, End); return false; } if (Kind == pp_endif) return lexEndif(First, End); - if (Kind == pp_define) - return lexDefine(First, End); + switch (Kind) { + case pp_include: + case pp___include_macros: + case pp_include_next: + case pp_import: + lexIncludeFilename(First, End); + break; + default: + break; + } // Everything else. - return lexDefault(Kind, Id.Name, First, End); + return lexDefault(Kind, First, End); } static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { @@ -916,78 +798,65 @@ bool Scanner::scanImpl(const char *First, const char *const End) { return false; } -bool Scanner::scan() { +bool Scanner::scan(SmallVectorImpl &Directives) { bool Error = scanImpl(Input.begin(), Input.end()); if (!Error) { - // Add a trailing newline and an EOF on success. - if (!Out.empty() && Out.back() != '\n') - Out.push_back('\n'); + // Add an EOF on success. pushDirective(pp_eof); } - // Null-terminate the output. This way the memory buffer that's passed to - // Clang will not have to worry about the terminating '\0'. - Out.push_back(0); - Out.pop_back(); + ArrayRef RemainingTokens = Tokens; + for (const DirectiveWithTokens &DirWithToks : DirsWithToks) { + assert(RemainingTokens.size() >= DirWithToks.NumTokens); + Directives.emplace_back(DirWithToks.Kind, + RemainingTokens.take_front(DirWithToks.NumTokens)); + RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens); + } + assert(RemainingTokens.empty()); + return Error; } -bool clang::dependency_directives_scan::computeSkippedRanges( - ArrayRef Input, llvm::SmallVectorImpl &Range) { - struct IfElseDirective { - enum DirectiveKind { - If, // if/ifdef/ifndef - Else // elif/elifdef/elifndef, else - }; - int Offset; - DirectiveKind Kind; +bool clang::scanSourceForDependencyDirectives( + StringRef Input, SmallVectorImpl &Tokens, + SmallVectorImpl &Directives, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) { + return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives); +} + +void clang::printDependencyDirectivesAsSource( + StringRef Source, + ArrayRef Directives, + llvm::raw_ostream &OS) { + // Add a space separator where it is convenient for testing purposes. + auto needsSpaceSeparator = + [](tok::TokenKind Prev, + const dependency_directives_scan::Token &Tok) -> bool { + if (Prev == Tok.Kind) + return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, + tok::r_square); + if (Prev == tok::raw_identifier && + Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal, + tok::char_constant, tok::header_name)) + return true; + if (Prev == tok::r_paren && + Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal, + tok::char_constant, tok::unknown)) + return true; + if (Prev == tok::comma && + Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less)) + return true; + return false; }; - llvm::SmallVector Offsets; - for (const Directive &T : Input) { - switch (T.Kind) { - case pp_if: - case pp_ifdef: - case pp_ifndef: - Offsets.push_back({T.Offset, IfElseDirective::If}); - break; - - case pp_elif: - case pp_elifdef: - case pp_elifndef: - case pp_else: { - if (Offsets.empty()) - return true; - int PreviousOffset = Offsets.back().Offset; - Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); - Offsets.push_back({T.Offset, IfElseDirective::Else}); - break; - } - case pp_endif: { - if (Offsets.empty()) - return true; - int PreviousOffset = Offsets.back().Offset; - Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); - do { - IfElseDirective::DirectiveKind Kind = Offsets.pop_back_val().Kind; - if (Kind == IfElseDirective::If) - break; - } while (!Offsets.empty()); - break; - } - default: - break; + for (const dependency_directives_scan::Directive &Directive : Directives) { + Optional PrevTokenKind; + for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { + if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) + OS << ' '; + PrevTokenKind = Tok.Kind; + OS << Source.slice(Tok.Offset, Tok.getEnd()); } } - return false; -} - -bool clang::scanSourceForDependencyDirectives( - StringRef Input, SmallVectorImpl &Output, - SmallVectorImpl &Directives, DiagnosticsEngine *Diags, - SourceLocation InputSourceLoc) { - Output.clear(); - Directives.clear(); - return Scanner(Output, Directives, Input, Diags, InputSourceLoc).scan(); } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index a85b1186359e4f..a0a7a6ae789b4b 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -226,13 +226,11 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, return L; } -bool Lexer::skipOver(unsigned NumBytes) { - IsAtPhysicalStartOfLine = true; - IsAtStartOfLine = true; - if ((BufferPtr + NumBytes) > BufferEnd) - return true; - BufferPtr += NumBytes; - return false; +void Lexer::seek(unsigned Offset, bool IsAtStartOfLine) { + this->IsAtPhysicalStartOfLine = IsAtStartOfLine; + this->IsAtStartOfLine = IsAtStartOfLine; + assert((BufferStart + Offset) <= BufferEnd); + BufferPtr = BufferStart + Offset; } template static void StringifyImpl(T &Str, char Quote) { @@ -2939,6 +2937,13 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { unsigned Lexer::isNextPPTokenLParen() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); + if (isDependencyDirectivesLexer()) { + if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) + return 2; + return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( + tok::l_paren); + } + // Switch to 'skipping' mode. This will ensure that we can lex a token // without emitting diagnostics, disables macro expansion, and will cause EOF // to return an EOF token instead of popping the include stack. @@ -3281,6 +3286,8 @@ void Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { } bool Lexer::Lex(Token &Result) { + assert(!isDependencyDirectivesLexer()); + // Start a new token. Result.startToken(); @@ -4102,3 +4109,129 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // We parsed the directive; lex a token with the new state. return false; } + +const char *Lexer::convertDependencyDirectiveToken( + const dependency_directives_scan::Token &DDTok, Token &Result) { + const char *TokPtr = BufferStart + DDTok.Offset; + Result.startToken(); + Result.setLocation(getSourceLocation(TokPtr)); + Result.setKind(DDTok.Kind); + Result.setFlag((Token::TokenFlags)DDTok.Flags); + Result.setLength(DDTok.Length); + BufferPtr = TokPtr + DDTok.Length; + return TokPtr; +} + +bool Lexer::LexDependencyDirectiveToken(Token &Result) { + assert(isDependencyDirectivesLexer()); + + using namespace dependency_directives_scan; + + while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) { + if (DepDirectives.front().Kind == pp_eof) + return LexEndOfFile(Result, BufferEnd); + NextDepDirectiveTokenIndex = 0; + DepDirectives = DepDirectives.drop_front(); + } + + const dependency_directives_scan::Token &DDTok = + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++]; + + const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result); + + if (Result.is(tok::hash) && Result.isAtStartOfLine()) { + PP->HandleDirective(Result); + return false; + } + if (Result.is(tok::raw_identifier)) { + Result.setRawIdentifierData(TokPtr); + if (!isLexingRawMode()) { + IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + if (II->isHandleIdentifierCase()) + return PP->HandleIdentifier(Result); + } + return true; + } + if (Result.isLiteral()) { + Result.setLiteralData(TokPtr); + return true; + } + if (Result.is(tok::colon) && + (LangOpts.CPlusPlus || LangOpts.DoubleSquareBracketAttributes)) { + // Convert consecutive colons to 'tok::coloncolon'. + if (*BufferPtr == ':') { + assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( + tok::colon)); + ++NextDepDirectiveTokenIndex; + Result.setKind(tok::coloncolon); + } + return true; + } + if (Result.is(tok::eod)) + ParsingPreprocessorDirective = false; + + return true; +} + +bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) { + assert(isDependencyDirectivesLexer()); + + using namespace dependency_directives_scan; + + bool Stop = false; + unsigned NestedIfs = 0; + do { + DepDirectives = DepDirectives.drop_front(); + switch (DepDirectives.front().Kind) { + case pp_none: + llvm_unreachable("unexpected 'pp_none'"); + case pp_include: + case pp___include_macros: + case pp_define: + case pp_undef: + case pp_import: + case pp_pragma_import: + case pp_pragma_once: + case pp_pragma_push_macro: + case pp_pragma_pop_macro: + case pp_pragma_include_alias: + case pp_include_next: + case decl_at_import: + case cxx_module_decl: + case cxx_import_decl: + case cxx_export_module_decl: + case cxx_export_import_decl: + break; + case pp_if: + case pp_ifdef: + case pp_ifndef: + ++NestedIfs; + break; + case pp_elif: + case pp_elifdef: + case pp_elifndef: + case pp_else: + if (!NestedIfs) { + Stop = true; + } + break; + case pp_endif: + if (!NestedIfs) { + Stop = true; + } else { + --NestedIfs; + } + break; + case pp_eof: + return LexEndOfFile(Result, BufferEnd); + } + } while (!Stop); + + const dependency_directives_scan::Token &DDTok = + DepDirectives.front().Tokens.front(); + assert(DDTok.is(tok::hash)); + NextDepDirectiveTokenIndex = 1; + + convertDependencyDirectiveToken(DDTok, Result); + return false; +} diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index d947c1580f5cad..97d7466d79a19e 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -443,41 +443,6 @@ SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, return DiscardUntilEndOfDirective().getEnd(); } -Optional Preprocessor::getSkippedRangeForExcludedConditionalBlock( - SourceLocation HashLoc) { - if (!ExcludedConditionalDirectiveSkipMappings) - return None; - if (!HashLoc.isFileID()) - return None; - - std::pair HashFileOffset = - SourceMgr.getDecomposedLoc(HashLoc); - Optional Buf = - SourceMgr.getBufferOrNone(HashFileOffset.first); - if (!Buf) - return None; - auto It = - ExcludedConditionalDirectiveSkipMappings->find(Buf->getBufferStart()); - if (It == ExcludedConditionalDirectiveSkipMappings->end()) - return None; - - const PreprocessorSkippedRangeMapping &SkippedRanges = *It->getSecond(); - // Check if the offset of '#' is mapped in the skipped ranges. - auto MappingIt = SkippedRanges.find(HashFileOffset.second); - if (MappingIt == SkippedRanges.end()) - return None; - - unsigned BytesToSkip = MappingIt->getSecond(); - unsigned CurLexerBufferOffset = CurLexer->getCurrentBufferOffset(); - assert(CurLexerBufferOffset >= HashFileOffset.second && - "lexer is before the hash?"); - // Take into account the fact that the lexer has already advanced, so the - // number of bytes to skip must be adjusted. - unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second; - assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?"); - return BytesToSkip - LengthDiff; -} - void Preprocessor::SuggestTypoedDirective(const Token &Tok, StringRef Directive, const SourceLocation &EndLoc) const { @@ -527,36 +492,42 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, // disabling warnings, etc. CurPPLexer->LexingRawMode = true; Token Tok; - if (auto SkipLength = - getSkippedRangeForExcludedConditionalBlock(HashTokenLoc)) { - // Skip to the next '#endif' / '#else' / '#elif'. - CurLexer->skipOver(*SkipLength); - } SourceLocation endLoc; while (true) { - CurLexer->Lex(Tok); + if (CurLexer->isDependencyDirectivesLexer()) { + CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok); + } else { + while (true) { + CurLexer->Lex(Tok); - if (Tok.is(tok::code_completion)) { - setCodeCompletionReached(); - if (CodeComplete) - CodeComplete->CodeCompleteInConditionalExclusion(); - continue; - } + if (Tok.is(tok::code_completion)) { + setCodeCompletionReached(); + if (CodeComplete) + CodeComplete->CodeCompleteInConditionalExclusion(); + continue; + } - // If this is the end of the buffer, we have an error. - if (Tok.is(tok::eof)) { - // We don't emit errors for unterminated conditionals here, - // Lexer::LexEndOfFile can do that properly. - // Just return and let the caller lex after this #include. - if (PreambleConditionalStack.isRecording()) - PreambleConditionalStack.SkipInfo.emplace( - HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc); - break; - } + // If this is the end of the buffer, we have an error. + if (Tok.is(tok::eof)) { + // We don't emit errors for unterminated conditionals here, + // Lexer::LexEndOfFile can do that properly. + // Just return and let the caller lex after this #include. + if (PreambleConditionalStack.isRecording()) + PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc, + FoundNonSkipPortion, + FoundElse, ElseLoc); + break; + } - // If this token is not a preprocessor directive, just skip it. - if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) - continue; + // If this token is not a preprocessor directive, just skip it. + if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) + continue; + + break; + } + } + if (Tok.is(tok::eof)) + break; // We just parsed a # character at the start of a line, so we're in // directive mode. Tell the lexer this so any newlines we see will be diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index e6a7eb6a03506e..e3379aecba7254 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -91,8 +91,19 @@ bool Preprocessor::EnterSourceFile(FileID FID, ConstSearchDirIterator CurDir, CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset); } - EnterSourceFileWithLexer( - new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile), CurDir); + Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile); + if (getPreprocessorOpts().DependencyDirectivesForFile && + FID != PredefinesFileID) { + if (Optional File = SourceMgr.getFileEntryRefForID(FID)) { + if (Optional> + DepDirectives = + getPreprocessorOpts().DependencyDirectivesForFile(*File)) { + TheLexer->DepDirectives = *DepDirectives; + } + } + } + + EnterSourceFileWithLexer(TheLexer, CurDir); return false; } @@ -110,7 +121,9 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, CurDirLookup = CurDir; CurLexerSubmodule = nullptr; if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = CLK_Lexer; + CurLexerKind = TheLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; // Notify the client, if desired, that we are in a new source file. if (Callbacks && !CurLexer->Is_PragmaLexer) { diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index abbd33331e9d33..d077616eb84bc5 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -158,11 +158,6 @@ Preprocessor::Preprocessor(std::shared_ptr PPOpts, if (this->PPOpts->GeneratePreamble) PreambleConditionalStack.startRecording(); - ExcludedConditionalDirectiveSkipMappings = - this->PPOpts->ExcludedConditionalDirectiveSkipMappings; - if (ExcludedConditionalDirectiveSkipMappings) - ExcludedConditionalDirectiveSkipMappings->clear(); - MaxTokens = LangOpts.MaxTokens; } @@ -382,7 +377,9 @@ StringRef Preprocessor::getLastMacroWithSpelling( void Preprocessor::recomputeCurLexerKind() { if (CurLexer) - CurLexerKind = CLK_Lexer; + CurLexerKind = CurLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; else if (CurTokenLexer) CurLexerKind = CLK_TokenLexer; else @@ -645,6 +642,9 @@ void Preprocessor::SkipTokensWhileUsingPCH() { case CLK_CachingLexer: CachingLex(Tok); break; + case CLK_DependencyDirectivesLexer: + CurLexer->LexDependencyDirectiveToken(Tok); + break; case CLK_LexAfterModuleImport: LexAfterModuleImport(Tok); break; @@ -906,6 +906,9 @@ void Preprocessor::Lex(Token &Result) { CachingLex(Result); ReturnedToken = true; break; + case CLK_DependencyDirectivesLexer: + ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result); + break; case CLK_LexAfterModuleImport: ReturnedToken = LexAfterModuleImport(Result); break; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index c47f7d068eb7f2..026bdfe03f28cf 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" -#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/Threading.h" @@ -44,64 +43,41 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary( const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { if (Entry.isError() || Entry.isDirectory() || Disable || - !shouldScanForDirectives(Filename, Entry.getUniqueID())) - return EntryRef(/*Minimized=*/false, Filename, Entry); + !shouldScanForDirectives(Filename)) + return EntryRef(Filename, Entry); CachedFileContents *Contents = Entry.getCachedContents(); assert(Contents && "contents not initialized"); // Double-checked locking. - if (Contents->MinimizedAccess.load()) - return EntryRef(/*Minimized=*/true, Filename, Entry); + if (Contents->DepDirectives.load()) + return EntryRef(Filename, Entry); std::lock_guard GuardLock(Contents->ValueLock); // Double-checked locking. - if (Contents->MinimizedAccess.load()) - return EntryRef(/*Minimized=*/true, Filename, Entry); + if (Contents->DepDirectives.load()) + return EntryRef(Filename, Entry); - llvm::SmallString<1024> MinimizedFileContents; - // Minimize the file down to directives that might affect the dependencies. - SmallVector Tokens; + SmallVector Directives; + // Scan the file for preprocessor directives that might affect the + // dependencies. if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), - MinimizedFileContents, Tokens)) { + Contents->DepDirectiveTokens, + Directives)) { + Contents->DepDirectiveTokens.clear(); // FIXME: Propagate the diagnostic if desired by the client. - // Use the original file if the minimization failed. - Contents->MinimizedStorage = - llvm::MemoryBuffer::getMemBuffer(*Contents->Original); - Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); - return EntryRef(/*Minimized=*/true, Filename, Entry); + Contents->DepDirectives.store(new Optional()); + return EntryRef(Filename, Entry); } - // The contents produced by the minimizer must be null terminated. - assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' && - "not null terminated contents"); - - // Compute the skipped PP ranges that speedup skipping over inactive - // preprocessor blocks. - llvm::SmallVector SkippedRanges; - dependency_directives_scan::computeSkippedRanges(Tokens, SkippedRanges); - PreprocessorSkippedRangeMapping Mapping; - for (const auto &Range : SkippedRanges) { - if (Range.Length < 16) { - // Ignore small ranges as non-profitable. - // FIXME: This is a heuristic, its worth investigating the tradeoffs - // when it should be applied. - continue; - } - Mapping[Range.Offset] = Range.Length; - } - Contents->PPSkippedRangeMapping = std::move(Mapping); - - Contents->MinimizedStorage = std::make_unique( - std::move(MinimizedFileContents)); - // This function performed double-checked locking using `MinimizedAccess`. - // Assigning it must be the last thing this function does. If we were to - // assign it before `PPSkippedRangeMapping`, other threads may skip the - // critical section (`MinimizedAccess != nullptr`) and access the mappings - // that are about to be initialized, leading to a data race. - Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); - return EntryRef(/*Minimized=*/true, Filename, Entry); + // This function performed double-checked locking using `DepDirectives`. + // Assigning it must be the last thing this function does, otherwise other + // threads may skip the + // critical section (`DepDirectives != nullptr`), leading to a data race. + Contents->DepDirectives.store( + new Optional(std::move(Directives))); + return EntryRef(Filename, Entry); } DependencyScanningFilesystemSharedCache:: @@ -208,19 +184,9 @@ static bool shouldCacheStatFailures(StringRef Filename) { return shouldScanForDirectivesBasedOnExtension(Filename); } -void DependencyScanningWorkerFilesystem::disableDirectivesScanning( - StringRef Filename) { - // Since we're not done setting up `NotToBeScanned` yet, we need to disable - // directive scanning explicitly. - if (llvm::ErrorOr Result = getOrCreateFileSystemEntry( - Filename, /*DisableDirectivesScanning=*/true)) - NotToBeScanned.insert(Result->getStatus().getUniqueID()); -} - bool DependencyScanningWorkerFilesystem::shouldScanForDirectives( - StringRef Filename, llvm::sys::fs::UniqueID UID) { - return shouldScanForDirectivesBasedOnExtension(Filename) && - !NotToBeScanned.contains(UID); + StringRef Filename) { + return shouldScanForDirectivesBasedOnExtension(Filename); } const CachedFileSystemEntry & @@ -307,9 +273,7 @@ class DepScanFile final : public llvm::vfs::File { llvm::vfs::Status Stat) : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} - static llvm::ErrorOr> - create(EntryRef Entry, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings); + static llvm::ErrorOr> create(EntryRef Entry); llvm::ErrorOr status() override { return Stat; } @@ -329,8 +293,7 @@ class DepScanFile final : public llvm::vfs::File { } // end anonymous namespace llvm::ErrorOr> -DepScanFile::create(EntryRef Entry, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) { +DepScanFile::create(EntryRef Entry) { assert(!Entry.isError() && "error"); if (Entry.isDirectory()) @@ -342,10 +305,6 @@ DepScanFile::create(EntryRef Entry, /*RequiresNullTerminator=*/false), Entry.getStatus()); - const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping(); - if (EntrySkipMappings && !EntrySkipMappings->empty()) - PPSkipMappings[Result->Buffer->getBufferStart()] = EntrySkipMappings; - return llvm::ErrorOr>( std::unique_ptr(std::move(Result))); } @@ -358,5 +317,5 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); if (!Result) return Result.getError(); - return DepScanFile::create(Result.get(), PPSkipMappings); + return DepScanFile::create(Result.get()); } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index 30f5aeae118a63..04f7044bc42369 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -137,12 +137,11 @@ class DependencyScanningAction : public tooling::ToolAction { DependencyScanningAction( StringRef WorkingDirectory, DependencyConsumer &Consumer, llvm::IntrusiveRefCntPtr DepFS, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings, ScanningOutputFormat Format, bool OptimizeArgs, llvm::Optional ModuleName = None) : WorkingDirectory(WorkingDirectory), Consumer(Consumer), - DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings), Format(Format), - OptimizeArgs(OptimizeArgs), ModuleName(ModuleName) {} + DepFS(std::move(DepFS)), Format(Format), OptimizeArgs(OptimizeArgs), + ModuleName(ModuleName) {} bool runInvocation(std::shared_ptr Invocation, FileManager *FileMgr, @@ -183,29 +182,21 @@ class DependencyScanningAction : public tooling::ToolAction { // Use the dependency scanning optimized file system if requested to do so. if (DepFS) { - DepFS->enableDirectivesScanningOfAllFiles(); - // Don't minimize any files that contributed to prebuilt modules. The - // implicit build validates the modules by comparing the reported sizes of - // their inputs to the current state of the filesystem. Minimization would - // throw this mechanism off. - for (const auto &File : PrebuiltModulesInputFiles) - DepFS->disableDirectivesScanning(File.getKey()); - // Don't minimize any files that were explicitly passed in the build - // settings and that might be opened. - for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries) - DepFS->disableDirectivesScanning(E.Path); - for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles) - DepFS->disableDirectivesScanning(F); - // Support for virtual file system overlays on top of the caching // filesystem. FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation( ScanInstance.getInvocation(), ScanInstance.getDiagnostics(), DepFS)); - // Pass the skip mappings which should speed up excluded conditional block - // skipping in the preprocessor. - ScanInstance.getPreprocessorOpts() - .ExcludedConditionalDirectiveSkipMappings = &PPSkipMappings; + llvm::IntrusiveRefCntPtr LocalDepFS = + DepFS; + ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile = + [LocalDepFS = std::move(LocalDepFS)](FileEntryRef File) + -> Optional> { + if (llvm::ErrorOr Entry = + LocalDepFS->getOrCreateFileSystemEntry(File.getName())) + return Entry->getDirectiveTokens(); + return None; + }; } // Create the dependency collector that will collect the produced @@ -262,7 +253,6 @@ class DependencyScanningAction : public tooling::ToolAction { StringRef WorkingDirectory; DependencyConsumer &Consumer; llvm::IntrusiveRefCntPtr DepFS; - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings; ScanningOutputFormat Format; bool OptimizeArgs; llvm::Optional ModuleName; @@ -289,7 +279,7 @@ DependencyScanningWorker::DependencyScanningWorker( if (Service.getMode() == ScanningMode::DependencyDirectivesScan) DepFS = new DependencyScanningWorkerFilesystem(Service.getSharedCache(), - RealFS, PPSkipMappings); + RealFS); if (Service.canReuseFileManager()) Files = new FileManager(FileSystemOptions(), RealFS); } @@ -340,8 +330,8 @@ llvm::Error DependencyScanningWorker::computeDependencies( return runWithDiags(CreateAndPopulateDiagOpts(FinalCCommandLine).release(), [&](DiagnosticConsumer &DC, DiagnosticOptions &DiagOpts) { DependencyScanningAction Action( - WorkingDirectory, Consumer, DepFS, PPSkipMappings, - Format, OptimizeArgs, ModuleName); + WorkingDirectory, Consumer, DepFS, Format, + OptimizeArgs, ModuleName); // Create an invocation that uses the underlying file // system to ensure that any file system requests that // are made by the driver do not go through the diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c index fa4ff7dcb8bb81..cb4525e38711ce 100644 --- a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c @@ -1,3 +1,4 @@ -// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 +// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s -#define 0 0 // expected-error {{macro name must be an identifier}} +#define 0 0 +// CHECK: #define 0 0 diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c index 98b1cc88e7c18e..0971649caf6738 100644 --- a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c @@ -15,7 +15,7 @@ #pragma include_alias(, "mystring.h") // CHECK: #pragma once -// CHECK-NEXT: #pragma push_macro( "MYMACRO" ) +// CHECK-NEXT: #pragma push_macro("MYMACRO") // CHECK-NEXT: #pragma pop_macro("MYMACRO") // CHECK-NEXT: #pragma clang module import mymodule // CHECK-NEXT: #pragma include_alias(, "mystring.h") diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index cec28eae0f2b80..8c575398528303 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -14,39 +14,58 @@ using namespace llvm; using namespace clang; using namespace clang::dependency_directives_scan; -static bool minimizeSourceToDependencyDirectives(StringRef Input, - SmallVectorImpl &Out) { - SmallVector Directives; - return scanSourceForDependencyDirectives(Input, Out, Directives); +static bool minimizeSourceToDependencyDirectives( + StringRef Input, SmallVectorImpl &Out, + SmallVectorImpl &Tokens, + SmallVectorImpl &Directives) { + Out.clear(); + Tokens.clear(); + Directives.clear(); + if (scanSourceForDependencyDirectives(Input, Tokens, Directives)) + return true; + + raw_svector_ostream OS(Out); + printDependencyDirectivesAsSource(Input, Directives, OS); + if (!Out.empty() && Out.back() != '\n') + Out.push_back('\n'); + Out.push_back('\0'); + Out.pop_back(); + + return false; } -static bool -minimizeSourceToDependencyDirectives(StringRef Input, - SmallVectorImpl &Out, - SmallVectorImpl &Directives) { - return scanSourceForDependencyDirectives(Input, Out, Directives); +static bool minimizeSourceToDependencyDirectives(StringRef Input, + SmallVectorImpl &Out) { + SmallVector Tokens; + SmallVector Directives; + return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives); } namespace { TEST(MinimizeSourceToDependencyDirectivesTest, Empty) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; - ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Directives)); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("", Out, Tokens, Directives)); EXPECT_TRUE(Out.empty()); + EXPECT_TRUE(Tokens.empty()); ASSERT_EQ(1u, Directives.size()); ASSERT_EQ(pp_eof, Directives.back().Kind); - ASSERT_FALSE( - minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Directives)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens, + Directives)); EXPECT_TRUE(Out.empty()); + EXPECT_TRUE(Tokens.empty()); ASSERT_EQ(1u, Directives.size()); ASSERT_EQ(pp_eof, Directives.back().Kind); } -TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) { +TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; ASSERT_FALSE( @@ -71,7 +90,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) { "#pragma include_alias(, )\n" "export module m;\n" "import m;\n", - Out, Directives)); + Out, Tokens, Directives)); EXPECT_EQ(pp_define, Directives[0].Kind); EXPECT_EQ(pp_undef, Directives[1].Kind); EXPECT_EQ(pp_endif, Directives[2].Kind); @@ -91,19 +110,28 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) { EXPECT_EQ(pp_pragma_push_macro, Directives[16].Kind); EXPECT_EQ(pp_pragma_pop_macro, Directives[17].Kind); EXPECT_EQ(pp_pragma_include_alias, Directives[18].Kind); - EXPECT_EQ(cxx_export_decl, Directives[19].Kind); - EXPECT_EQ(cxx_module_decl, Directives[20].Kind); - EXPECT_EQ(cxx_import_decl, Directives[21].Kind); - EXPECT_EQ(pp_eof, Directives[22].Kind); + EXPECT_EQ(cxx_export_module_decl, Directives[19].Kind); + EXPECT_EQ(cxx_import_decl, Directives[20].Kind); + EXPECT_EQ(pp_eof, Directives[21].Kind); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, EmptyHash) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#\n#define MACRO a\n", Out)); + EXPECT_STREQ("#define MACRO a\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, Define) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; - ASSERT_FALSE( - minimizeSourceToDependencyDirectives("#define MACRO", Out, Directives)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO", Out, + Tokens, Directives)); EXPECT_STREQ("#define MACRO\n", Out.data()); + ASSERT_EQ(4u, Tokens.size()); ASSERT_EQ(2u, Directives.size()); ASSERT_EQ(pp_define, Directives.front().Kind); } @@ -144,25 +172,25 @@ TEST(MinimizeSourceToDependencyDirectivesTest, DefineMacroArguments) { ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO con tent ", Out)); - EXPECT_STREQ("#define MACRO con tent\n", Out.data()); + EXPECT_STREQ("#define MACRO con tent\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO() con tent ", Out)); - EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) { SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO((a))\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO(\n", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO(a*b)\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) { @@ -170,19 +198,19 @@ TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) { ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO(\t)\tcon \t tent\t", Out)); - EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data()); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO(\f)\fcon \f tent\f", Out)); - EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data()); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO(\v)\vcon \v tent\v", Out)); - EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data()); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out)); - EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data()); + EXPECT_STREQ("#define MACRO con tent\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) { @@ -255,25 +283,27 @@ TEST(MinimizeSourceToDependencyDirectivesTest, TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) { SmallVector Out; - ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define 0\n", Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) { SmallVector Out; - ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define &\n", Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) { SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out)); - EXPECT_STREQ("#define AND &\n", Out.data()); + EXPECT_STREQ("#define AND&\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n" "&\n", Out)); - EXPECT_STREQ("#define AND &\n", Out.data()); + EXPECT_STREQ("#define AND\\\n" + "&\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) { @@ -303,6 +333,14 @@ TEST(MinimizeSourceToDependencyDirectivesTest, MultilineCommentInStrings) { Out.data()); } +TEST(MinimizeSourceToDependencyDirectivesTest, CommentSlashSlashStar) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO 1 //* blah */\n", Out)); + EXPECT_STREQ("#define MACRO 1\n", Out.data()); +} + TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) { SmallVector Out; @@ -481,6 +519,9 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Include) { ASSERT_FALSE( minimizeSourceToDependencyDirectives("#__include_macros \n", Out)); EXPECT_STREQ("#__include_macros \n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include MACRO\n", Out)); + EXPECT_STREQ("#include MACRO\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { @@ -507,8 +548,9 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) { SmallVector Out; ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out)); - ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); - ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) { @@ -559,7 +601,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest, SplitIdentifier) { "#define GUARD\n" "#endif\n", Out)); - EXPECT_STREQ("#ifndef GUARD\n" + EXPECT_STREQ("#if\\\n" + "ndef GUARD\n" "#define GUARD\n" "#endif\n", Out.data()); @@ -567,12 +610,16 @@ TEST(MinimizeSourceToDependencyDirectivesTest, SplitIdentifier) { ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" "RD\n", Out)); - EXPECT_STREQ("#define GUARD\n", Out.data()); + EXPECT_STREQ("#define GUA\\\n" + "RD\n", + Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r" "RD\n", Out)); - EXPECT_STREQ("#define GUARD\n", Out.data()); + EXPECT_STREQ("#define GUA\\\r" + "RD\n", + Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" " RD\n", @@ -588,7 +635,10 @@ TEST(MinimizeSourceToDependencyDirectivesTest, "2 + \\\t\n" "3\n", Out)); - EXPECT_STREQ("#define A 1 + 2 + 3\n", Out.data()); + EXPECT_STREQ("#define A 1+\\ \n" + "2+\\\t\n" + "3\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) { @@ -682,6 +732,7 @@ int z = 128'78; TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; StringRef Source = R"(// comment @@ -689,7 +740,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) { // another comment #include )"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); EXPECT_STREQ("#pragma once\n#include \n", Out.data()); ASSERT_EQ(Directives.size(), 3u); EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_pragma_once); @@ -700,7 +752,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) { #include )"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#pragma once\n#include \n", Out.data()); + EXPECT_STREQ("#pragma once extra tokens\n#include \n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, @@ -755,11 +807,12 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Source = "#define X \"\\ \r\nx\n#include \n"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#define X \"\\ \r\nx\n#include \n", Out.data()); + EXPECT_STREQ("#define X\"\\ \r\nx\n#include \n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; StringRef Source = R"( @@ -789,81 +842,17 @@ ort \ import f(->a = 3); } )"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); - EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;\n" - "export import :l [[rename]];\n" - "import <<= 3;\nimport a b d e d e f e;\n" - "import foo [[no_unique_address]];\nimport foo();\n" - "import f(:sefse);\nimport f(->a = 3);\n", + ASSERT_FALSE( + minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); + EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;" + "exp\\\nort import:l[[rename]];" + "import<<=3;import a b d e d e f e;" + "import foo[[no_unique_address]];import foo();" + "import f(:sefse);import f(->a=3);\n", Out.data()); - ASSERT_EQ(Directives.size(), 12u); - EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_include); - EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::cxx_module_decl); -} - -TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasic) { - SmallString<128> Out; - SmallVector Directives; - StringRef Source = "#ifndef GUARD\n" - "#define GUARD\n" - "void foo();\n" - "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); - SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Directives, Ranges)); - EXPECT_EQ(Ranges.size(), 1u); - EXPECT_EQ(Ranges[0].Offset, 0); - EXPECT_EQ(Ranges[0].Length, (int)Out.find("#endif")); -} - -TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasicElifdef) { - SmallString<128> Out; - SmallVector Directives; - StringRef Source = "#ifdef BLAH\n" - "void skip();\n" - "#elifdef BLAM\n" - "void skip();\n" - "#elifndef GUARD\n" - "#define GUARD\n" - "void foo();\n" - "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); - SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Directives, Ranges)); - EXPECT_EQ(Ranges.size(), 3u); - EXPECT_EQ(Ranges[0].Offset, 0); - EXPECT_EQ(Ranges[0].Length, (int)Out.find("#elifdef")); - EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elifdef")); - EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#elifndef")); - EXPECT_EQ(Ranges[2].Offset, (int)Out.find("#elifndef")); - EXPECT_EQ(Ranges[2].Offset + Ranges[2].Length, (int)Out.rfind("#endif")); -} - -TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesNested) { - SmallString<128> Out; - SmallVector Directives; - StringRef Source = "#ifndef GUARD\n" - "#define GUARD\n" - "#if FOO\n" - "#include hello\n" - "#elif BAR\n" - "#include bye\n" - "#endif\n" - "#else\n" - "#include nothing\n" - "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); - SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Directives, Ranges)); - EXPECT_EQ(Ranges.size(), 4u); - EXPECT_EQ(Ranges[0].Offset, (int)Out.find("#if FOO")); - EXPECT_EQ(Ranges[0].Offset + Ranges[0].Length, (int)Out.find("#elif")); - EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elif BAR")); - EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#endif")); - EXPECT_EQ(Ranges[2].Offset, 0); - EXPECT_EQ(Ranges[2].Length, (int)Out.find("#else")); - EXPECT_EQ(Ranges[3].Offset, (int)Out.find("#else")); - EXPECT_EQ(Ranges[3].Offset + Ranges[3].Length, (int)Out.rfind("#endif")); + ASSERT_EQ(Directives.size(), 10u); + EXPECT_EQ(Directives[0].Kind, pp_include); + EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl); } } // end anonymous namespace diff --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp index bb819d9c59ec16..a9d6e6e7fb6fd8 100644 --- a/clang/unittests/Tooling/DependencyScannerTest.cpp +++ b/clang/unittests/Tooling/DependencyScannerTest.cpp @@ -204,53 +204,5 @@ TEST(DependencyScanner, ScanDepsReuseFilemanagerHasInclude) { EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h"); } -namespace dependencies { -TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) { - auto VFS = llvm::makeIntrusiveRefCnt(); - VFS->addFile("/mod.h", 0, - llvm::MemoryBuffer::getMemBuffer("#include \n" - "// hi there!\n")); - - DependencyScanningFilesystemSharedCache SharedCache; - ExcludedPreprocessorDirectiveSkipMapping Mappings; - DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings); - - DepFS.enableDirectivesScanningOfAllFiles(); // Let's be explicit for clarity. - auto StatusMinimized0 = DepFS.status("/mod.h"); - DepFS.disableDirectivesScanning("/mod.h"); - auto StatusFull1 = DepFS.status("/mod.h"); - - EXPECT_TRUE(StatusMinimized0); - EXPECT_TRUE(StatusFull1); - EXPECT_EQ(StatusMinimized0->getSize(), 17u); - EXPECT_EQ(StatusFull1->getSize(), 30u); - EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h")); - EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h")); -} - -TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) { - auto VFS = llvm::makeIntrusiveRefCnt(); - VFS->addFile("/mod.h", 0, - llvm::MemoryBuffer::getMemBuffer("#include \n" - "// hi there!\n")); - - DependencyScanningFilesystemSharedCache SharedCache; - ExcludedPreprocessorDirectiveSkipMapping Mappings; - DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings); - - DepFS.disableDirectivesScanning("/mod.h"); - auto StatusFull0 = DepFS.status("/mod.h"); - DepFS.enableDirectivesScanningOfAllFiles(); - auto StatusMinimized1 = DepFS.status("/mod.h"); - - EXPECT_TRUE(StatusFull0); - EXPECT_TRUE(StatusMinimized1); - EXPECT_EQ(StatusFull0->getSize(), 30u); - EXPECT_EQ(StatusMinimized1->getSize(), 17u); - EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h")); - EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h")); -} - -} // end namespace dependencies } // end namespace tooling } // end namespace clang From 75ac914b148d6156732a814b8491285c7d15aca5 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 May 2022 20:01:22 +0000 Subject: [PATCH 629/908] [gn build] Port b58a420ff4f9 --- llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn | 2 +- llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn index 145e0b5a97d2c5..52488344403611 100644 --- a/llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn @@ -6,7 +6,7 @@ static_library("Lex") { "//llvm/lib/Support", ] sources = [ - "DependencyDirectivesSourceMinimizer.cpp", + "DependencyDirectivesScanner.cpp", "HeaderMap.cpp", "HeaderSearch.cpp", "InitHeaderSearch.cpp", diff --git a/llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn index a69e58ab4b824d..397c12baadaae5 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn @@ -11,7 +11,7 @@ unittest("LexTests") { "//llvm/lib/Support", ] sources = [ - "DependencyDirectivesSourceMinimizerTest.cpp", + "DependencyDirectivesScannerTest.cpp", "HeaderMapTest.cpp", "HeaderSearchTest.cpp", "LexerTest.cpp", From e9ac99b6090c125091b61240c75dcc403d569472 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 26 May 2022 12:13:37 -0700 Subject: [PATCH 630/908] [RISCV] Simplfy creation of IndexVT in lowerMaskedGather/lowerMaskedScatter. NFC The scalar element width is not a factor in how ContainerVT is determined. We don't need to check the relative size of VT and IndexVT. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 28 +++++---------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d9829298c4ace8..defbfce203f29b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6385,17 +6385,9 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { - // We need to use the larger of the result and index type to determine the - // scalable type to use so we don't increase LMUL for any operand/result. - if (VT.bitsGE(IndexVT)) { - ContainerVT = getContainerForFixedLengthVector(VT); - IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), - ContainerVT.getVectorElementCount()); - } else { - IndexVT = getContainerForFixedLengthVector(IndexVT); - ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(), - IndexVT.getVectorElementCount()); - } + ContainerVT = getContainerForFixedLengthVector(VT); + IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), + ContainerVT.getVectorElementCount()); Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); @@ -6495,17 +6487,9 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { - // We need to use the larger of the value and index type to determine the - // scalable type to use so we don't increase LMUL for any operand/result. - if (VT.bitsGE(IndexVT)) { - ContainerVT = getContainerForFixedLengthVector(VT); - IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), - ContainerVT.getVectorElementCount()); - } else { - IndexVT = getContainerForFixedLengthVector(IndexVT); - ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), - IndexVT.getVectorElementCount()); - } + ContainerVT = getContainerForFixedLengthVector(VT); + IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), + ContainerVT.getVectorElementCount()); Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); From 460781feef56bdff3e3a88fb76b427a88795641b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 26 May 2022 12:15:04 -0700 Subject: [PATCH 631/908] [LegalizeTypes] Fix bug in expensive checks verification With a fix for an expensive checks build failure exposed by new RISC-V tests. Something about expanding two rotates in type legalization caused a change in the remapping tables that the expensive checks verifying wasn't expecting. See comment in the code for how it was fixed. Tests came from this commit that exposed the bug [RISCV] Add test cases showing failure to remove mask on rotate amounts. If the masking AND has multiple users we fail to remove it. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D126036 --- .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 12 +- llvm/test/CodeGen/RISCV/rotl-rotr.ll | 325 +++++++++++++++++- 2 files changed, 334 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 7b7df0b4f62893..8fe9a83b9c3d6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -143,8 +143,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { } } else { if (Mapped == 0) { - dbgs() << "Processed value not in any map!"; - Failed = true; + SDValue NodeById = IdToValueMap.lookup(ResId); + // It is possible the node has been remapped to another node and had + // its Id updated in the Value to Id table. The node it remapped to + // may not have been processed yet. Look up the Id in the Id to Value + // table and re-check the Processed state. If the node hasn't been + // remapped we'll get the same state as we got earlier. + if (NodeById->getNodeId() == Processed) { + dbgs() << "Processed value not in any map!"; + Failed = true; + } } else if (Mapped & (Mapped - 1)) { dbgs() << "Value in multiple maps!"; Failed = true; diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll index 58ec0b0b0c170a..6e8b73fc987982 100644 --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -1,13 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I -; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -enable-legalize-types-checking < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32ZBB ; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64ZBB +; NOTE: -enable-legalize-types-checking is on one command line due to a previous +; assertion failure on an expensive checks build for @rotr_32_mask_multiple. + ; These IR sequences are idioms for rotates. If rotate instructions are ; supported, they will be turned into ISD::ROTL or ISD::ROTR. @@ -856,3 +859,323 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ret i64 %3 } declare i64 @llvm.fshr.i64(i64, i64, i64) + +define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { +; RV32I-LABEL: rotl_32_mask_multiple: +; RV32I: # %bb.0: +; RV32I-NEXT: sll a3, a0, a2 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: sll a2, a1, a2 +; RV32I-NEXT: srl a1, a1, a4 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotl_32_mask_multiple: +; RV64I: # %bb.0: +; RV64I-NEXT: sllw a3, a0, a2 +; RV64I-NEXT: negw a4, a2 +; RV64I-NEXT: srlw a0, a0, a4 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: sllw a2, a1, a2 +; RV64I-NEXT: srlw a1, a1, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotl_32_mask_multiple: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rol a0, a0, a2 +; RV32ZBB-NEXT: rol a1, a1, a2 +; RV32ZBB-NEXT: add a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotl_32_mask_multiple: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rolw a0, a0, a2 +; RV64ZBB-NEXT: rolw a1, a1, a2 +; RV64ZBB-NEXT: addw a0, a0, a1 +; RV64ZBB-NEXT: ret + %maskedamt = and i32 %amt, 31 + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt) + %2 = tail call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 %maskedamt) + %3 = add i32 %1, %2 + ret i32 %3 +} + +define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { +; RV32I-LABEL: rotl_64_mask_multiple: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a5, a4, 26 +; RV32I-NEXT: srli a5, a5, 31 +; RV32I-NEXT: mv a6, a1 +; RV32I-NEXT: bnez a5, .LBB13_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: .LBB13_2: +; RV32I-NEXT: bnez a5, .LBB13_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB13_4: +; RV32I-NEXT: sll a7, a6, a4 +; RV32I-NEXT: srli t0, a0, 1 +; RV32I-NEXT: not a1, a4 +; RV32I-NEXT: srl t0, t0, a1 +; RV32I-NEXT: sll t1, a0, a4 +; RV32I-NEXT: srli a0, a6, 1 +; RV32I-NEXT: srl t2, a0, a1 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: bnez a5, .LBB13_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: .LBB13_6: +; RV32I-NEXT: or a6, a7, t0 +; RV32I-NEXT: or a7, t1, t2 +; RV32I-NEXT: sll t0, a0, a4 +; RV32I-NEXT: bnez a5, .LBB13_8 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: .LBB13_8: +; RV32I-NEXT: srli a3, a2, 1 +; RV32I-NEXT: srl a3, a3, a1 +; RV32I-NEXT: or a3, t0, a3 +; RV32I-NEXT: sll a2, a2, a4 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: add a1, a7, a0 +; RV32I-NEXT: add a0, a6, a3 +; RV32I-NEXT: sltu a2, a0, a6 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotl_64_mask_multiple: +; RV64I: # %bb.0: +; RV64I-NEXT: sll a3, a0, a2 +; RV64I-NEXT: neg a4, a2 +; RV64I-NEXT: srl a0, a0, a4 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: sll a2, a1, a2 +; RV64I-NEXT: srl a1, a1, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotl_64_mask_multiple: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: slli a5, a4, 26 +; RV32ZBB-NEXT: srli a5, a5, 31 +; RV32ZBB-NEXT: mv a6, a1 +; RV32ZBB-NEXT: bnez a5, .LBB13_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: mv a6, a0 +; RV32ZBB-NEXT: .LBB13_2: +; RV32ZBB-NEXT: bnez a5, .LBB13_4 +; RV32ZBB-NEXT: # %bb.3: +; RV32ZBB-NEXT: mv a0, a1 +; RV32ZBB-NEXT: .LBB13_4: +; RV32ZBB-NEXT: sll a7, a6, a4 +; RV32ZBB-NEXT: srli t0, a0, 1 +; RV32ZBB-NEXT: not a1, a4 +; RV32ZBB-NEXT: srl t0, t0, a1 +; RV32ZBB-NEXT: sll t1, a0, a4 +; RV32ZBB-NEXT: srli a0, a6, 1 +; RV32ZBB-NEXT: srl t2, a0, a1 +; RV32ZBB-NEXT: mv a0, a3 +; RV32ZBB-NEXT: bnez a5, .LBB13_6 +; RV32ZBB-NEXT: # %bb.5: +; RV32ZBB-NEXT: mv a0, a2 +; RV32ZBB-NEXT: .LBB13_6: +; RV32ZBB-NEXT: or a6, a7, t0 +; RV32ZBB-NEXT: or a7, t1, t2 +; RV32ZBB-NEXT: sll t0, a0, a4 +; RV32ZBB-NEXT: bnez a5, .LBB13_8 +; RV32ZBB-NEXT: # %bb.7: +; RV32ZBB-NEXT: mv a2, a3 +; RV32ZBB-NEXT: .LBB13_8: +; RV32ZBB-NEXT: srli a3, a2, 1 +; RV32ZBB-NEXT: srl a3, a3, a1 +; RV32ZBB-NEXT: or a3, t0, a3 +; RV32ZBB-NEXT: sll a2, a2, a4 +; RV32ZBB-NEXT: srli a0, a0, 1 +; RV32ZBB-NEXT: srl a0, a0, a1 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: add a1, a7, a0 +; RV32ZBB-NEXT: add a0, a6, a3 +; RV32ZBB-NEXT: sltu a2, a0, a6 +; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotl_64_mask_multiple: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rol a0, a0, a2 +; RV64ZBB-NEXT: rol a1, a1, a2 +; RV64ZBB-NEXT: add a0, a0, a1 +; RV64ZBB-NEXT: ret + %maskedamt = and i64 %amt, 63 + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt) + %2 = tail call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 %maskedamt) + %3 = add i64 %1, %2 + ret i64 %3 +} + +define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { +; RV32I-LABEL: rotr_32_mask_multiple: +; RV32I: # %bb.0: +; RV32I-NEXT: srl a3, a0, a2 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: sll a0, a0, a4 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: srl a2, a1, a2 +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotr_32_mask_multiple: +; RV64I: # %bb.0: +; RV64I-NEXT: srlw a3, a0, a2 +; RV64I-NEXT: negw a4, a2 +; RV64I-NEXT: sllw a0, a0, a4 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: srlw a2, a1, a2 +; RV64I-NEXT: sllw a1, a1, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotr_32_mask_multiple: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: ror a0, a0, a2 +; RV32ZBB-NEXT: ror a1, a1, a2 +; RV32ZBB-NEXT: add a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotr_32_mask_multiple: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rorw a0, a0, a2 +; RV64ZBB-NEXT: rorw a1, a1, a2 +; RV64ZBB-NEXT: addw a0, a0, a1 +; RV64ZBB-NEXT: ret + %maskedamt = and i32 %amt, 31 + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt) + %2 = tail call i32 @llvm.fshr.i32(i32 %b, i32 %b, i32 %maskedamt) + %3 = add i32 %1, %2 + ret i32 %3 +} + +define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { +; RV32I-LABEL: rotr_64_mask_multiple: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a5, a4, 32 +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: beqz a5, .LBB15_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a6, a1 +; RV32I-NEXT: .LBB15_2: +; RV32I-NEXT: beqz a5, .LBB15_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB15_4: +; RV32I-NEXT: srl a7, a6, a4 +; RV32I-NEXT: slli t0, a1, 1 +; RV32I-NEXT: not a0, a4 +; RV32I-NEXT: sll t0, t0, a0 +; RV32I-NEXT: srl t1, a1, a4 +; RV32I-NEXT: slli a1, a6, 1 +; RV32I-NEXT: sll t2, a1, a0 +; RV32I-NEXT: mv a6, a2 +; RV32I-NEXT: beqz a5, .LBB15_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: .LBB15_6: +; RV32I-NEXT: or a1, t0, a7 +; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: srl t0, a6, a4 +; RV32I-NEXT: beqz a5, .LBB15_8 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: mv a3, a2 +; RV32I-NEXT: .LBB15_8: +; RV32I-NEXT: slli a2, a3, 1 +; RV32I-NEXT: sll a2, a2, a0 +; RV32I-NEXT: or a2, a2, t0 +; RV32I-NEXT: srl a3, a3, a4 +; RV32I-NEXT: slli a4, a6, 1 +; RV32I-NEXT: sll a0, a4, a0 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: add a3, a7, a0 +; RV32I-NEXT: add a0, a1, a2 +; RV32I-NEXT: sltu a1, a0, a1 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotr_64_mask_multiple: +; RV64I: # %bb.0: +; RV64I-NEXT: srl a3, a0, a2 +; RV64I-NEXT: neg a4, a2 +; RV64I-NEXT: sll a0, a0, a4 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: srl a2, a1, a2 +; RV64I-NEXT: sll a1, a1, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotr_64_mask_multiple: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: andi a5, a4, 32 +; RV32ZBB-NEXT: mv a6, a0 +; RV32ZBB-NEXT: beqz a5, .LBB15_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: mv a6, a1 +; RV32ZBB-NEXT: .LBB15_2: +; RV32ZBB-NEXT: beqz a5, .LBB15_4 +; RV32ZBB-NEXT: # %bb.3: +; RV32ZBB-NEXT: mv a1, a0 +; RV32ZBB-NEXT: .LBB15_4: +; RV32ZBB-NEXT: srl a7, a6, a4 +; RV32ZBB-NEXT: slli t0, a1, 1 +; RV32ZBB-NEXT: not a0, a4 +; RV32ZBB-NEXT: sll t0, t0, a0 +; RV32ZBB-NEXT: srl t1, a1, a4 +; RV32ZBB-NEXT: slli a1, a6, 1 +; RV32ZBB-NEXT: sll t2, a1, a0 +; RV32ZBB-NEXT: mv a6, a2 +; RV32ZBB-NEXT: beqz a5, .LBB15_6 +; RV32ZBB-NEXT: # %bb.5: +; RV32ZBB-NEXT: mv a6, a3 +; RV32ZBB-NEXT: .LBB15_6: +; RV32ZBB-NEXT: or a1, t0, a7 +; RV32ZBB-NEXT: or a7, t2, t1 +; RV32ZBB-NEXT: srl t0, a6, a4 +; RV32ZBB-NEXT: beqz a5, .LBB15_8 +; RV32ZBB-NEXT: # %bb.7: +; RV32ZBB-NEXT: mv a3, a2 +; RV32ZBB-NEXT: .LBB15_8: +; RV32ZBB-NEXT: slli a2, a3, 1 +; RV32ZBB-NEXT: sll a2, a2, a0 +; RV32ZBB-NEXT: or a2, a2, t0 +; RV32ZBB-NEXT: srl a3, a3, a4 +; RV32ZBB-NEXT: slli a4, a6, 1 +; RV32ZBB-NEXT: sll a0, a4, a0 +; RV32ZBB-NEXT: or a0, a0, a3 +; RV32ZBB-NEXT: add a3, a7, a0 +; RV32ZBB-NEXT: add a0, a1, a2 +; RV32ZBB-NEXT: sltu a1, a0, a1 +; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotr_64_mask_multiple: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ror a0, a0, a2 +; RV64ZBB-NEXT: ror a1, a1, a2 +; RV64ZBB-NEXT: add a0, a0, a1 +; RV64ZBB-NEXT: ret + %maskedamt = and i64 %amt, 63 + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt) + %2 = tail call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 %maskedamt) + %3 = add i64 %1, %2 + ret i64 %3 +} From 60682a0b85d5bdd74dd7e4a46bc88c901d884c75 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 26 May 2022 11:29:59 -0400 Subject: [PATCH 632/908] [InstCombine] add tests for icmp with sdiv operand; NFC Adapted from unsigned division tests from: ea6171c108c47c1ee4863 --- .../InstCombine/icmp-div-constant.ll | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll index e9b26e6b4bca62..9d7f25fecfd2c4 100644 --- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll @@ -284,3 +284,82 @@ define i1 @udiv_eq_umax_use(i32 %x, i32 %y) { %r = icmp eq i32 %d, -1 ret i1 %r } + +define i1 @sdiv_eq_smin(i8 %x, i8 %y) { +; CHECK-LABEL: @sdiv_eq_smin( +; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[D]], -128 +; CHECK-NEXT: ret i1 [[R]] +; + %d = sdiv i8 %x, %y + %r = icmp eq i8 %d, -128 + ret i1 %r +} + +define <2 x i1> @sdiv_ne_smin(<2 x i5> %x, <2 x i5> %y) { +; CHECK-LABEL: @sdiv_ne_smin( +; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i5> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[D]], +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %d = sdiv <2 x i5> %x, %y + %r = icmp ne <2 x i5> %d, + ret <2 x i1> %r +} + +define i1 @sdiv_eq_small(i8 %x, i8 %y) { +; CHECK-LABEL: @sdiv_eq_small( +; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[D]], -127 +; CHECK-NEXT: ret i1 [[R]] +; + %d = sdiv i8 %x, %y + %r = icmp eq i8 %d, -127 + ret i1 %r +} + +define i1 @sdiv_ne_big(i8 %x, i8 %y) { +; CHECK-LABEL: @sdiv_ne_big( +; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[D]], 127 +; CHECK-NEXT: ret i1 [[R]] +; + %d = sdiv i8 %x, %y + %r = icmp ne i8 %d, 127 + ret i1 %r +} + +define i1 @sdiv_eq_not_big(i8 %x, i8 %y) { +; CHECK-LABEL: @sdiv_eq_not_big( +; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[D]], 100 +; CHECK-NEXT: ret i1 [[R]] +; + %d = sdiv i8 %x, %y + %r = icmp eq i8 %d, 100 + ret i1 %r +} + +define i1 @sdiv_ult_smin(i8 %x, i8 %y) { +; CHECK-LABEL: @sdiv_ult_smin( +; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[D]], -1 +; CHECK-NEXT: ret i1 [[R]] +; + %d = sdiv i8 %x, %y + %r = icmp ult i8 %d, 128 + ret i1 %r +} + +define i1 @sdiv_eq_smin_use(i32 %x, i32 %y) { +; CHECK-LABEL: @sdiv_eq_smin_use( +; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i32 [[D]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[D]], -2147483648 +; CHECK-NEXT: ret i1 [[R]] +; + %d = sdiv i32 %x, %y + call void @use(i32 %d) + %r = icmp eq i32 %d, -2147483648 + ret i1 %r +} From ed5be1523fe1650a0e52e526eb430011fac60434 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 26 May 2022 12:02:30 -0400 Subject: [PATCH 633/908] [InstCombine] reduce code duplication in icmp+div folds; NFC --- .../InstCombine/InstCombineCompares.cpp | 113 +++++++++--------- 1 file changed, 55 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9f5f65cf3bb2d8..c64463da9a16ac 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2427,6 +2427,11 @@ Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp, Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, BinaryOperator *Div, const APInt &C) { + ICmpInst::Predicate Pred = Cmp.getPredicate(); + Value *X = Div->getOperand(0); + Value *Y = Div->getOperand(1); + Type *Ty = Div->getType(); + // Fold: icmp pred ([us]div X, C2), C -> range test // Fold this div into the comparison, producing a range check. // Determine, based on the divide type, what the range is being @@ -2434,7 +2439,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, // it, otherwise compute the range [low, hi) bounding the new value. // See: InsertRangeTest above for the kinds of replacements possible. const APInt *C2; - if (!match(Div->getOperand(1), m_APInt(C2))) + if (!match(Y, m_APInt(C2))) return nullptr; // FIXME: If the operand types don't match the type of the divide @@ -2467,8 +2472,6 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, // instruction that we're folding. bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C; - ICmpInst::Predicate Pred = Cmp.getPredicate(); - // If the division is known to be exact, then there is no remainder from the // divide, so the covered range size is unit, otherwise it is the divisor. APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2; @@ -2483,7 +2486,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, int LoOverflow = 0, HiOverflow = 0; APInt LoBound, HiBound; - if (!DivIsSigned) { // udiv + if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) LoBound = Prod; HiOverflow = LoOverflow = ProdOV; @@ -2498,7 +2501,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, LoBound = -(RangeSize - 1); HiBound = RangeSize; } else if (C.isStrictlyPositive()) { // (X / pos) op pos - LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) HiOverflow = LoOverflow = ProdOV; if (!HiOverflow) HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true); @@ -2518,18 +2521,19 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, // e.g. X/-5 op 0 --> [-4, 5) LoBound = RangeSize + 1; HiBound = -RangeSize; - if (HiBound == *C2) { // -INTMIN = INTMIN - HiOverflow = 1; // [INTMIN+1, overflow) - HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN + if (HiBound == *C2) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN } } else if (C.isStrictlyPositive()) { // (X / neg) op pos // e.g. X/-5 op 3 --> [-19, -14) HiBound = Prod + 1; HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) - LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; - } else { // (X / neg) op neg - LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) + LoOverflow = + addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1 : 0; + } else { // (X / neg) op neg + LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) LoOverflow = HiOverflow = ProdOV; if (!HiOverflow) HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true); @@ -2539,54 +2543,47 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, Pred = ICmpInst::getSwappedPredicate(Pred); } - Value *X = Div->getOperand(0); switch (Pred) { - default: llvm_unreachable("Unhandled icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder.getFalse()); - if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, - ConstantInt::get(Div->getType(), LoBound)); - if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, - ConstantInt::get(Div->getType(), HiBound)); - return replaceInstUsesWith( - Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true)); - case ICmpInst::ICMP_NE: - if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder.getTrue()); - if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, - ConstantInt::get(Div->getType(), LoBound)); - if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, - ConstantInt::get(Div->getType(), HiBound)); - return replaceInstUsesWith(Cmp, - insertRangeTest(X, LoBound, HiBound, - DivIsSigned, false)); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - if (LoOverflow == +1) // Low bound is greater than input range. - return replaceInstUsesWith(Cmp, Builder.getTrue()); - if (LoOverflow == -1) // Low bound is less than input range. - return replaceInstUsesWith(Cmp, Builder.getFalse()); - return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound)); - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - if (HiOverflow == +1) // High bound greater than input range. - return replaceInstUsesWith(Cmp, Builder.getFalse()); - if (HiOverflow == -1) // High bound less than input range. - return replaceInstUsesWith(Cmp, Builder.getTrue()); - if (Pred == ICmpInst::ICMP_UGT) - return new ICmpInst(ICmpInst::ICMP_UGE, X, - ConstantInt::get(Div->getType(), HiBound)); - return new ICmpInst(ICmpInst::ICMP_SGE, X, - ConstantInt::get(Div->getType(), HiBound)); + default: + llvm_unreachable("Unhandled icmp predicate!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return replaceInstUsesWith(Cmp, Builder.getFalse()); + if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, + X, ConstantInt::get(Ty, LoBound)); + if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + X, ConstantInt::get(Ty, HiBound)); + return replaceInstUsesWith( + Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true)); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return replaceInstUsesWith(Cmp, Builder.getTrue()); + if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + X, ConstantInt::get(Ty, LoBound)); + if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, + X, ConstantInt::get(Ty, HiBound)); + return replaceInstUsesWith( + Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, false)); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow == +1) // Low bound is greater than input range. + return replaceInstUsesWith(Cmp, Builder.getTrue()); + if (LoOverflow == -1) // Low bound is less than input range. + return replaceInstUsesWith(Cmp, Builder.getFalse()); + return new ICmpInst(Pred, X, ConstantInt::get(Ty, LoBound)); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow == +1) // High bound greater than input range. + return replaceInstUsesWith(Cmp, Builder.getFalse()); + if (HiOverflow == -1) // High bound less than input range. + return replaceInstUsesWith(Cmp, Builder.getTrue()); + if (Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, HiBound)); + return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, HiBound)); } return nullptr; From 49f8b0513763c2621d8d872ec19a33cc7a00d4c8 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 26 May 2022 12:21:48 -0400 Subject: [PATCH 634/908] [InstCombine] fold icmp equality with sdiv and SMIN This extends the fold from D126410 / 3952c905ef08 to allow for the only case where it works with signed division: https://alive2.llvm.org/ce/z/k7_ypu (X s/ Y) == SMIN --> (X == SMIN) && (Y == 1) (X s/ Y) != SMIN --> (X != SMIN) || (Y != 1) This is another improvement based on #55695. --- .../InstCombine/InstCombineCompares.cpp | 29 +++++++++++-------- .../InstCombine/icmp-div-constant.ll | 20 ++++++++++--- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c64463da9a16ac..1df61f0624bc2c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2388,17 +2388,6 @@ Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp, Value *Y = UDiv->getOperand(1); Type *Ty = UDiv->getType(); - // If the compare constant is bigger than UMAX/2 (negative), there's only one - // pair of values that satisfies an equality check, so eliminate the division: - // (X u/ Y) == C --> (X == C) && (Y == 1) - // (X u/ Y) != C --> (X != C) || (Y != 1) - if (Cmp.isEquality() && UDiv->hasOneUse() && C.isSignBitSet()) { - Value *XBig = Builder.CreateICmp(Pred, X, ConstantInt::get(Ty, C)); - Value *YOne = Builder.CreateICmp(Pred, Y, ConstantInt::get(Ty, 1)); - auto Logic = Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; - return BinaryOperator::Create(Logic, XBig, YOne); - } - const APInt *C2; if (!match(X, m_APInt(C2))) return nullptr; @@ -2431,6 +2420,23 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, Value *X = Div->getOperand(0); Value *Y = Div->getOperand(1); Type *Ty = Div->getType(); + bool DivIsSigned = Div->getOpcode() == Instruction::SDiv; + + // If unsigned division and the compare constant is bigger than + // UMAX/2 (negative), there's only one pair of values that satisfies an + // equality check, so eliminate the division: + // (X u/ Y) == C --> (X == C) && (Y == 1) + // (X u/ Y) != C --> (X != C) || (Y != 1) + // Similarly, if signed division and the compare constant is exactly SMIN: + // (X s/ Y) == SMIN --> (X == SMIN) && (Y == 1) + // (X s/ Y) != SMIN --> (X != SMIN) || (Y != 1) + if (Cmp.isEquality() && Div->hasOneUse() && C.isSignBitSet() && + (!DivIsSigned || C.isMinSignedValue())) { + Value *XBig = Builder.CreateICmp(Pred, X, ConstantInt::get(Ty, C)); + Value *YOne = Builder.CreateICmp(Pred, Y, ConstantInt::get(Ty, 1)); + auto Logic = Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + return BinaryOperator::Create(Logic, XBig, YOne); + } // Fold: icmp pred ([us]div X, C2), C -> range test // Fold this div into the comparison, producing a range check. @@ -2450,7 +2456,6 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, // (x /u C2) getOpcode() == Instruction::SDiv; if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned()) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll index 9d7f25fecfd2c4..9348af51f4ae67 100644 --- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll @@ -287,8 +287,9 @@ define i1 @udiv_eq_umax_use(i32 %x, i32 %y) { define i1 @sdiv_eq_smin(i8 %x, i8 %y) { ; CHECK-LABEL: @sdiv_eq_smin( -; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[D]], -128 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[Y:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i1 [[R]] ; %d = sdiv i8 %x, %y @@ -298,8 +299,9 @@ define i1 @sdiv_eq_smin(i8 %x, i8 %y) { define <2 x i1> @sdiv_ne_smin(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @sdiv_ne_smin( -; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i5> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[D]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %d = sdiv <2 x i5> %x, %y @@ -307,6 +309,8 @@ define <2 x i1> @sdiv_ne_smin(<2 x i5> %x, <2 x i5> %y) { ret <2 x i1> %r } +; negative test - must be SMIN + define i1 @sdiv_eq_small(i8 %x, i8 %y) { ; CHECK-LABEL: @sdiv_eq_small( ; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] @@ -318,6 +322,8 @@ define i1 @sdiv_eq_small(i8 %x, i8 %y) { ret i1 %r } +; negative test - must be SMIN + define i1 @sdiv_ne_big(i8 %x, i8 %y) { ; CHECK-LABEL: @sdiv_ne_big( ; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] @@ -329,6 +335,8 @@ define i1 @sdiv_ne_big(i8 %x, i8 %y) { ret i1 %r } +; negative test - must be SMIN + define i1 @sdiv_eq_not_big(i8 %x, i8 %y) { ; CHECK-LABEL: @sdiv_eq_not_big( ; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] @@ -340,6 +348,8 @@ define i1 @sdiv_eq_not_big(i8 %x, i8 %y) { ret i1 %r } +; negative test - must be equality predicate + define i1 @sdiv_ult_smin(i8 %x, i8 %y) { ; CHECK-LABEL: @sdiv_ult_smin( ; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] @@ -351,6 +361,8 @@ define i1 @sdiv_ult_smin(i8 %x, i8 %y) { ret i1 %r } +; negative test - extra use + define i1 @sdiv_eq_smin_use(i32 %x, i32 %y) { ; CHECK-LABEL: @sdiv_eq_smin_use( ; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]] From 899af021f48fa6f8c7acb51343faf7c75b1041c5 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 26 May 2022 14:03:51 -0400 Subject: [PATCH 635/908] [InstCombine] add tests for mul with sign-splat operand; NFC --- llvm/test/Transforms/InstCombine/mul.ll | 47 +++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 43957a0e2a0a19..47fd5fc87b10ea 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -462,6 +462,52 @@ define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) { ret <2 x i32> %e } +define i32 @signsplat_mul(i32 %x) { +; CHECK-LABEL: @signsplat_mul( +; CHECK-NEXT: [[ASH:%.*]] = ashr i32 [[X:%.*]], 31 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ASH]], 42 +; CHECK-NEXT: ret i32 [[MUL]] +; + %ash = ashr i32 %x, 31 + %mul = mul i32 %ash, 42 + ret i32 %mul +} + +define <2 x i32> @signsplat_mul_vec(<2 x i32> %x) { +; CHECK-LABEL: @signsplat_mul_vec( +; CHECK-NEXT: [[ASH:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul nsw <2 x i32> [[ASH]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %ash = ashr <2 x i32> %x, + %mul = mul <2 x i32> %ash, + ret <2 x i32> %mul +} + +define i32 @not_signsplat_mul(i32 %x) { +; CHECK-LABEL: @not_signsplat_mul( +; CHECK-NEXT: [[ASH:%.*]] = ashr i32 [[X:%.*]], 30 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ASH]], 42 +; CHECK-NEXT: ret i32 [[MUL]] +; + %ash = ashr i32 %x, 30 + %mul = mul i32 %ash, 42 + ret i32 %mul +} + +define i32 @signsplat_mul_use(i32 %x) { +; CHECK-LABEL: @signsplat_mul_use( +; CHECK-NEXT: [[ASH:%.*]] = ashr i32 [[X:%.*]], 31 +; CHECK-NEXT: call void @use32(i32 [[ASH]]) +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ASH]], -42 +; CHECK-NEXT: ret i32 [[MUL]] +; + %ash = ashr i32 %x, 31 + call void @use32(i32 %ash) + %mul = mul i32 %ash, -42 + ret i32 %mul +} + define i32 @test18(i32 %A, i32 %B) { ; CHECK-LABEL: @test18( ; CHECK-NEXT: ret i32 0 @@ -1199,6 +1245,7 @@ define i32 @mulnot(i32 %a0) { %mul = mul i32 %add, -4 ret i32 %mul } + define i32 @mulnot_extrause(i32 %a0) { ; CHECK-LABEL: @mulnot_extrause( ; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A0:%.*]], -1 From c4c750058f3e3b85c24a9e3ad356b76290e7c259 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 26 May 2022 14:28:05 -0400 Subject: [PATCH 636/908] [InstCombine] fold mul of signbit directly to X < 0 ? Y : 0 This is effectively NFC (intentionally no test diffs) because we already have the related fold that converts the 'and' pattern to select. So this is just an efficiency improvement. --- .../InstCombine/InstCombineMulDivRem.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index ea40f088c130f9..0441203e249a24 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -363,17 +363,15 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { return SelectInst::Create(X, NegC, ConstantInt::getNullValue(I.getType())); } - // (lshr X, 31) * Y --> (ashr X, 31) & Y - // Y * (lshr X, 31) --> (ashr X, 31) & Y + // (lshr X, 31) * Y --> (X < 0) ? Y : 0 // TODO: We are not checking one-use because the elimination of the multiply // is better for analysis? - // TODO: Should we canonicalize to '(X < 0) ? Y : 0' instead? That would be - // more similar to what we're doing above. const APInt *C; - if (match(Op0, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1) - return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op1); - if (match(Op1, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1) - return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op0); + if (match(&I, m_c_Mul(m_LShr(m_Value(X), m_APInt(C)), m_Value(Y))) && + *C == C->getBitWidth() - 1) { + Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); + return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(I.getType())); + } // ((ashr X, 31) | 1) * X --> abs(X) // X * ((ashr X, 31) | 1) --> abs(X) From 35b0955aa59d3cf39678046ccff3728f6b7a91b0 Mon Sep 17 00:00:00 2001 From: William Huang Date: Thu, 26 May 2022 20:15:00 +0000 Subject: [PATCH 637/908] [ValueTracking] Added support to deduce PHI Nodes values being a power of 2 Add Value Tracking support to deduce induction variable being a power of 2, allowing urem optimizations Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D126018 --- llvm/lib/Analysis/ValueTracking.cpp | 19 +++++++++++++++++++ .../ValueTracking/known-power-of-two-urem.ll | 6 ++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index c7586f271df103..473b3cb74be832 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2128,6 +2128,25 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, } } + // A PHI node is power of two if all incoming values are power of two. + if (const PHINode *PN = dyn_cast(V)) { + Query RecQ = Q; + + // Recursively check all incoming values. Limit recursion to 2 levels, so + // that search complexity is limited to number of operands^2. + unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); + return llvm::all_of(PN->operands(), [&](const Use &U) { + // Value is power of 2 if it is coming from PHI node itself by induction. + if (U.get() == PN) + return true; + + // Change the context instruction to the incoming block where it is + // evaluated. + RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); + return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); + }); + } + // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not // copying a sign bit (sdiv int_min, 2). diff --git a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll index 880e14dae8778b..e25c769d6052b6 100644 --- a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll +++ b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll @@ -19,7 +19,8 @@ define i64 @known_power_of_two_urem_phi(i64 %size, i1 %cmp, i1 %cmp1) { ; CHECK-NEXT: br label [[COND_END]] ; CHECK: cond.end: ; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ 4096, [[ENTRY:%.*]] ], [ [[PHI]], [[COND_TRUE_END]] ] -; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI1]] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[PHI1]], -1 +; CHECK-NEXT: [[UREM:%.*]] = and i64 [[TMP0]], [[SIZE:%.*]] ; CHECK-NEXT: ret i64 [[UREM]] ; entry: @@ -56,7 +57,8 @@ define i64 @known_power_of_two_urem_nested_expr(i64 %size, i1 %cmp, i1 %cmp1, i6 ; CHECK-NEXT: br label [[COND_END]] ; CHECK: cond.end: ; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[SELECT]], [[COND_FALSE]] ], [ [[TMP1]], [[COND_TRUE]] ], [ [[PHI]], [[COND_END]] ] -; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[PHI]], -1 +; CHECK-NEXT: [[UREM:%.*]] = and i64 [[TMP2]], [[SIZE:%.*]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[UREM]], 10 ; CHECK-NEXT: br i1 [[CMP2]], label [[COND_END]], label [[END:%.*]] ; CHECK: end: From 966427b8471d204b3df1412041af9aa9702db869 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 26 May 2022 13:11:20 -0700 Subject: [PATCH 638/908] [lld][WebAssemlby] Check for command line flags with missing arguments I'm really not sure how this was overlooked when we first ported lld to Wasm. The upstream code in the ELF backend has these two lines but for some reason they never make it into the Wasm version. Differential Revision: https://reviews.llvm.org/D126497 --- lld/test/wasm/driver.s | 3 +++ lld/wasm/Driver.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/lld/test/wasm/driver.s b/lld/test/wasm/driver.s index 46d59e362a6a8c..6247eda93b5ba9 100644 --- a/lld/test/wasm/driver.s +++ b/lld/test/wasm/driver.s @@ -5,6 +5,9 @@ _start: .functype _start () -> () end_function +# RUN: not wasm-ld %t -o 2>&1 | FileCheck --check-prefix=NO_O_VAL %s +# NO_O_VAL: error: -o: missing argument + # RUN: not wasm-ld -o %t.exe 2>&1 | FileCheck -check-prefix=IN %s # IN: error: no input files diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 1dc6b9dd67aca6..ca32d9a49da0b4 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -184,6 +184,9 @@ opt::InputArgList WasmOptTable::parse(ArrayRef argv) { args = this->ParseArgs(vec, missingIndex, missingCount); handleColorDiagnostics(args); + if (missingCount) + error(Twine(args.getArgString(missingIndex)) + ": missing argument"); + for (auto *arg : args.filtered(OPT_UNKNOWN)) error("unknown argument: " + arg->getAsString(args)); return args; From 5221875a957da927a889a705276d4e073ff737b1 Mon Sep 17 00:00:00 2001 From: owenca Date: Wed, 25 May 2022 19:27:54 -0700 Subject: [PATCH 639/908] [clang-format] Fix an invalid code generation in RemoveBracesLLVM Fixes #55706. Differential Revision: https://reviews.llvm.org/D126438 --- clang/lib/Format/UnwrappedLineParser.cpp | 11 +++++++---- clang/unittests/Format/FormatTest.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 0611e9eace476f..fe57141407c0d8 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2587,10 +2587,13 @@ FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, FormatTok->setFinalizedType(TT_ElseLBrace); ElseLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); - if (parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, - /*MunchSemi=*/true, - KeepElseBraces) == IfStmtKind::IfOnly) { - Kind = IfStmtKind::IfElseIf; + const IfStmtKind ElseBlockKind = + parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, + /*MunchSemi=*/true, KeepElseBraces); + if ((ElseBlockKind == IfStmtKind::IfOnly || + ElseBlockKind == IfStmtKind::IfElseIf) && + FormatTok->is(tok::kw_else)) { + KeepElseBraces = true; } addUnwrappedLine(); } else if (FormatTok->is(tok::kw_if)) { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 44ef882fe7db4c..c1257059c86492 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -25385,6 +25385,30 @@ TEST_F(FormatTest, RemoveBraces) { "}", Style); + verifyFormat("if (a)\n" + " if (b)\n" + " c;\n" + " else {\n" + " if (d)\n" + " e;\n" + " }\n" + "else\n" + " f;", + Style); + + verifyFormat("if (a)\n" + " if (b)\n" + " c;\n" + " else {\n" + " if (d)\n" + " e;\n" + " else if (f)\n" + " g;\n" + " }\n" + "else\n" + " h;", + Style); + verifyFormat("if (a)\n" " b;\n" "else if (c)\n" From 198815e18dad31a9ff0f6c1b30f356db2fefe300 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 26 May 2022 10:32:04 -0700 Subject: [PATCH 640/908] [lld][WebAssembly] Avoid importing/exporting hidden symbols in shared libraries We have some special handling for weakly defined symbols where we both import and export them, but this is not needed for hidden symbols which should never be imported or exported. See https://github.com/emscripten-core/emscripten/pull/16972 This should also help with: https://github.com/emscripten-core/emscripten/issues/15487 Differential Revision: https://reviews.llvm.org/D126491 --- lld/test/wasm/shared-weak-symbols.s | 36 +++++++++++++++++++++++++---- lld/wasm/Symbols.cpp | 8 +++---- lld/wasm/Writer.cpp | 3 ++- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/lld/test/wasm/shared-weak-symbols.s b/lld/test/wasm/shared-weak-symbols.s index 00b5d3a9212b3e..f22aaa5431539e 100644 --- a/lld/test/wasm/shared-weak-symbols.s +++ b/lld/test/wasm/shared-weak-symbols.s @@ -3,10 +3,9 @@ # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump -d %t.wasm | FileCheck %s -check-prefix=ASM -# Verify the weakly defined fuctions (weak_func) are both -# imported and exported, and that internal usage (direct call) -# always uses the imported version. - +# Verify the weakly defined fuctions (weak_func) are both imported and exported, +# and that internal usage (direct call) always uses the imported version. +# Hidden functions, even if weak, should not be imported or exported. .globl weak_func .weak weak_func @@ -15,12 +14,23 @@ weak_func: i32.const 0 end_function +.globl hidden_weak_func +.hidden hidden_weak_func +.weak hidden_weak_func +hidden_weak_func: + .functype hidden_weak_func () -> (i32) + i32.const 42 + end_function + .globl call_weak call_weak: # ASM: : .functype call_weak () -> (i32) call weak_func # ASM: 10 80 80 80 80 00 call 0 + drop + call hidden_weak_func +# ASM: 10 84 80 80 80 00 call 4 end_function # ASM-NEXT: 0b end @@ -45,6 +55,20 @@ call_weak: # CHECK-NEXT: Field: weak_func # CHECK-NEXT: Kind: FUNCTION # CHECK-NEXT: SigIndex: 0 +# CHECK-NEXT: - Type: FUNCTION + +# CHECK: - Type: EXPORT +# CHECK-NEXT: Exports: +# CHECK-NEXT: - Name: __wasm_call_ctors +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 1 +# CHECK-NEXT: - Name: weak_func +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 3 +# CHECK-NEXT: - Name: call_weak +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 5 +# CHECK-NEXT: - Type: START # CHECK: - Type: CUSTOM # CHECK-NEXT: Name: name @@ -57,3 +81,7 @@ call_weak: # CHECK-NEXT: Name: __wasm_apply_data_relocs # CHECK-NEXT: - Index: 3 # CHECK-NEXT: Name: weak_func +# CHECK-NEXT: - Index: 4 +# CHECK-NEXT: Name: hidden_weak_func +# CHECK-NEXT: - Index: 5 +# CHECK-NEXT: Name: call_weak diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 2ad21c8947f365..e0670cea6425e3 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -217,15 +217,15 @@ void Symbol::setHidden(bool isHidden) { } bool Symbol::isExported() const { + if (!isDefined() || isLocal()) + return false; + // Shared libraries must export all weakly defined symbols // in case they contain the version that will be chosen by // the dynamic linker. - if (config->shared && isLive() && isDefined() && isWeak()) + if (config->shared && isLive() && isWeak() && !isHidden()) return true; - if (!isDefined() || isLocal()) - return false; - if (config->exportAll || (config->exportDynamic && !isHidden())) return true; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 9e3e32c183f19e..4e254717e3b9c4 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -592,7 +592,8 @@ static bool shouldImport(Symbol *sym) { // When a symbol is weakly defined in a shared library we need to allow // it to be overridden by another module so need to both import // and export the symbol. - if (config->shared && sym->isDefined() && sym->isWeak()) + if (config->shared && sym->isWeak() && !sym->isUndefined() && + !sym->isHidden()) return true; if (!sym->isUndefined()) return false; From eb1b1f2411bf3c4b7108493a527cd001af3d7b56 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 May 2022 16:52:16 -0400 Subject: [PATCH 641/908] [libc++][NFC] Fix whitespace --- libcxx/src/memory.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/libcxx/src/memory.cpp b/libcxx/src/memory.cpp index 5ee9cbff93e5c4..faf3189e5ee274 100644 --- a/libcxx/src/memory.cpp +++ b/libcxx/src/memory.cpp @@ -79,7 +79,6 @@ __shared_weak_count::__release_shared() noexcept if (__shared_count::__release_shared()) __release_weak(); } - #endif // _LIBCPP_SHARED_PTR_DEFINE_LEGACY_INLINE_FUNCTIONS void From 5bf44aa434ffe4d2e49806be20683e32135f4e16 Mon Sep 17 00:00:00 2001 From: owenca Date: Wed, 25 May 2022 00:55:03 -0700 Subject: [PATCH 642/908] [clang-format][NFC] Refactor UnwrappedLineParser::parseBlock() Differential Revision: https://reviews.llvm.org/D126358 --- clang/lib/Format/UnwrappedLineParser.cpp | 41 +++++++++++++----------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index fe57141407c0d8..9a5d85cead8891 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -871,30 +871,33 @@ UnwrappedLineParser::IfStmtKind UnwrappedLineParser::parseBlock( return IfKind; } - if (SimpleBlock && !KeepBraces) { + auto RemoveBraces = [=]() mutable { + if (KeepBraces || !SimpleBlock) + return false; assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); assert(FormatTok->is(tok::r_brace)); + const bool WrappedOpeningBrace = !Tok->Previous; + if (WrappedOpeningBrace && FollowedByComment) + return false; const FormatToken *Previous = Tokens->getPreviousToken(); assert(Previous); - if (Previous->isNot(tok::r_brace) || Previous->Optional) { - assert(!CurrentLines->empty()); - const FormatToken *OpeningBrace = Tok; - if (!Tok->Previous) { // Wrapped l_brace. - if (FollowedByComment) { - KeepBraces = true; - } else { - assert(Index > 0); - --Index; // The line above the wrapped l_brace. - OpeningBrace = nullptr; - } - } - if (!KeepBraces && mightFitOnOneLine(CurrentLines->back()) && - (Tok->is(TT_ElseLBrace) || - mightFitOnOneLine((*CurrentLines)[Index], OpeningBrace))) { - Tok->MatchingParen = FormatTok; - FormatTok->MatchingParen = Tok; - } + if (Previous->is(tok::r_brace) && !Previous->Optional) + return false; + assert(!CurrentLines->empty()); + if (!mightFitOnOneLine(CurrentLines->back())) + return false; + if (Tok->is(TT_ElseLBrace)) + return true; + if (WrappedOpeningBrace) { + assert(Index > 0); + --Index; // The line above the wrapped l_brace. + Tok = nullptr; } + return mightFitOnOneLine((*CurrentLines)[Index], Tok); + }; + if (RemoveBraces()) { + Tok->MatchingParen = FormatTok; + FormatTok->MatchingParen = Tok; } size_t PPEndHash = computePPHash(); From cc871cf6b50f6a76f2083d192e2254a16832224b Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 26 May 2022 15:38:04 -0500 Subject: [PATCH 643/908] [Polly][Test] Fix race condition while printing dot files. The tests dot-scops.ll and dot-scops-npm.ll both wrote to the same file scops.func.dot. If they are executed in parallel they will race for the file. Fix by renaming func to func_npm in dot-scops-npm.ll so this test writes dot scops.func_npm.dot. Long-term, we will probably pass a file name (prefix) to the printer pass such that we can use the guaranteed-unique LIT %t placeholder in tests. --- polly/test/ScopDetect/dot-scops-npm.ll | 13 +++++++++---- polly/test/ScopDetect/dot-scops.ll | 5 +++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/polly/test/ScopDetect/dot-scops-npm.ll b/polly/test/ScopDetect/dot-scops-npm.ll index 3911d0fb97cff5..c426d0209694db 100644 --- a/polly/test/ScopDetect/dot-scops-npm.ll +++ b/polly/test/ScopDetect/dot-scops-npm.ll @@ -1,15 +1,20 @@ ; RUN: opt %loadNPMPolly "-passes=polly-scop-printer" -disable-output < %s -; RUN: FileCheck %s -input-file=scops.func.dot +; RUN: FileCheck %s -input-file=scops.func_npm.dot ; ; Check that the ScopPrinter does not crash. ; ScopPrinter needs the ScopDetection pass, which should depend on ; ScalarEvolution transitively. +; +; FIXME: polly-scop-printer always prints to the same hardcoded filename +; scops..dot. If there is another test with the same +; function name and printing a dot file there will be a race condition +; when running tests in parallel. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -define void @func(i32 %n, i32 %m, double* noalias nonnull %A) { -; CHECK: digraph "Scop Graph for 'func' function" -; CHECK-NEXT: label="Scop Graph for 'func' function" +define void @func_npm(i32 %n, i32 %m, double* noalias nonnull %A) { +; CHECK: digraph "Scop Graph for 'func_npm' function" +; CHECK-NEXT: label="Scop Graph for 'func_npm' function" ; CHECK: Node0x[[EntryID:.*]] [shape=record,label="{entry:\l br label %outer.for\l}"]; ; CHECK-NEXT: Node0x[[EntryID]] -> Node0x[[OUTER_FOR_ID:.*]]; ; CHECK-NEXT: Node0x[[OUTER_FOR_ID]] [shape=record,label="{outer.for: diff --git a/polly/test/ScopDetect/dot-scops.ll b/polly/test/ScopDetect/dot-scops.ll index 3b26d6c9464e27..590f222c4690b5 100644 --- a/polly/test/ScopDetect/dot-scops.ll +++ b/polly/test/ScopDetect/dot-scops.ll @@ -3,6 +3,11 @@ ; Check that the ScopPrinter does not crash. ; ScopPrinter needs the ScopDetection pass, which should depend on ; ScalarEvolution transitively. +; +; FIXME: -dot-scops always prints to the same hardcoded filename +; scops..dot. If there is another test with the same +; function name and printing a dot file there will be a race condition +; when running tests in parallel. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" From da3b8200e59987fc21bfb0741f05af4cba8b9bd3 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 24 May 2022 00:02:05 +0000 Subject: [PATCH 644/908] Apply clang-tidy fixes for performance-unnecessary-value-param in Bufferize.cpp (NFC) --- mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 64152273a17c6d..4f621979a5e35a 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -152,7 +152,7 @@ struct FinalizingBufferizePass }; static BufferizationOptions::LayoutMapOption -parseLayoutMapOption(std::string s) { +parseLayoutMapOption(const std::string &s) { if (s == "fully-dynamic-layout-map") return BufferizationOptions::LayoutMapOption::FullyDynamicLayoutMap; if (s == "identity-layout-map") From c067a9dee5c1c115f6b11ec12f5719613ef08391 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 24 May 2022 00:20:55 +0000 Subject: [PATCH 645/908] Apply clang-tidy fixes for llvm-else-after-return in OpenMPDialect.cpp (NFC) --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 23a25fc4260b8d..19615c06b08dc4 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -985,7 +985,8 @@ LogicalResult CancelOp::verify() { if (cast(parentOp).nowaitAttr()) { return emitError() << "A worksharing construct that is canceled " << "must not have a nowait clause"; - } else if (cast(parentOp).ordered_valAttr()) { + } + if (cast(parentOp).ordered_valAttr()) { return emitError() << "A worksharing construct that is canceled " << "must not have an ordered clause"; } From 0f64945352f7df4eb2c884c34a822952b6b69bff Mon Sep 17 00:00:00 2001 From: Sebastian Peryt Date: Thu, 26 May 2022 14:19:13 -0700 Subject: [PATCH 646/908] [DOC] Refactor Functions section in LangRef This change is a small refactor of Functions section to update placement of define syntax. Reviewed By: RKSimon Differential revision: https://reviews.llvm.org/D125831 --- llvm/docs/LangRef.rst | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 46549e1175ae64..8d5a5f0e316331 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -782,14 +782,31 @@ an optional ``unnamed_addr`` attribute, a return type, an optional :ref:`parameter attribute ` for the return type, a function name, a (possibly empty) argument list (each with optional :ref:`parameter attributes `), optional :ref:`function attributes `, -an optional address space, an optional section, an optional alignment, -an optional :ref:`comdat `, +an optional address space, an optional section, an optional partition, +an optional alignment, an optional :ref:`comdat `, an optional :ref:`garbage collector name `, an optional :ref:`prefix `, an optional :ref:`prologue `, an optional :ref:`personality `, an optional list of attached :ref:`metadata `, an opening curly brace, a list of basic blocks, and a closing curly brace. +Syntax:: + + define [linkage] [PreemptionSpecifier] [visibility] [DLLStorageClass] + [cconv] [ret attrs] + @ ([argument list]) + [(unnamed_addr|local_unnamed_addr)] [AddrSpace] [fn Attrs] + [section "name"] [partition "name"] [comdat [($name)]] [align N] + [gc] [prefix Constant] [prologue Constant] [personality Constant] + (!name !N)* { ... } + +The argument list is a comma separated sequence of arguments where each +argument is of the following form: + +Syntax:: + + [parameter Attrs] [name] + LLVM function declarations consist of the "``declare``" keyword, an optional :ref:`linkage type `, an optional :ref:`visibility style `, an optional :ref:`DLL storage class `, an @@ -837,24 +854,6 @@ not be significant within the module. If an explicit address space is not given, it will default to the program address space from the :ref:`datalayout string`. -Syntax:: - - define [linkage] [PreemptionSpecifier] [visibility] [DLLStorageClass] - [cconv] [ret attrs] - @ ([argument list]) - [(unnamed_addr|local_unnamed_addr)] [AddrSpace] [fn Attrs] - [section "name"] [partition "name"] [comdat [($name)]] [align N] - [gc] [prefix Constant] [prologue Constant] [personality Constant] - (!name !N)* { ... } - -The argument list is a comma separated sequence of arguments where each -argument is of the following form: - -Syntax:: - - [parameter Attrs] [name] - - .. _langref_aliases: Aliases From d1c5da34a7eab1906112963d53bdc310a47a243d Mon Sep 17 00:00:00 2001 From: Sebastian Peryt Date: Thu, 26 May 2022 14:27:25 -0700 Subject: [PATCH 647/908] [DOC] Improve LangRef description of declare This patch fixes formatting inside Functions section of declare by making it consistent with the way how define is written. Fixes #39844 Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D125581 --- llvm/docs/LangRef.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8d5a5f0e316331..aae95fb83d9a20 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -817,6 +817,14 @@ empty list of arguments, an optional alignment, an optional :ref:`garbage collector name `, an optional :ref:`prefix `, and an optional :ref:`prologue `. +Syntax:: + + declare [linkage] [visibility] [DLLStorageClass] + [cconv] [ret attrs] + @ ([argument list]) + [(unnamed_addr|local_unnamed_addr)] [align N] [gc] + [prefix Constant] [prologue Constant] + A function definition contains a list of basic blocks, forming the CFG (Control Flow Graph) for the function. Each basic block may optionally start with a label (giving the basic block a symbol table entry), contains a list of instructions, From 056dec5dc821114d5b6cfee482a1c7a49bb41902 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 26 May 2022 14:41:03 -0700 Subject: [PATCH 648/908] [Driver][test] Change -target i386-unknown-linux-gnu to --target=i386 and remove unused -o %t.o Use a generic ELF triple to demonstrate that these apply to all ELF OSes. Also migrate away from the legacy `-target ` spelling. --- clang/test/Driver/x86-mcrc32.c | 40 ++-- clang/test/Driver/x86-target-features.c | 246 ++++++++++++------------ 2 files changed, 143 insertions(+), 143 deletions(-) diff --git a/clang/test/Driver/x86-mcrc32.c b/clang/test/Driver/x86-mcrc32.c index 1b589ca49ab4f1..dc78d12afd398d 100644 --- a/clang/test/Driver/x86-mcrc32.c +++ b/clang/test/Driver/x86-mcrc32.c @@ -1,34 +1,34 @@ // Test interaction between -mcrc32 and other SIMD ISA options on x86 -// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=i386 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=x86_64 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=i386 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=x86_64 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=i386 -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=x86_64 -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=i386 -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=x86_64 -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: not %clang -target i386-unknown-linux-gnu -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s -// RUN: not %clang -target x86_64-unknown-linux-gnu -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang --target=i386 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang --target=x86_64 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s -// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s -// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang --target=i386 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang --target=x86_64 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s -// RUN: not %clang -target i386-unknown-linux-gnu -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s -// RUN: not %clang -target x86_64-unknown-linux-gnu -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang --target=i386 -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang --target=x86_64 -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s -// RUN: not %clang -target i386-unknown-linux-gnu -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s -// RUN: not %clang -target x86_64-unknown-linux-gnu -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang --target=i386 -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang --target=x86_64 -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s -// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=i386 -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=x86_64 -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target i386-unknown-linux-gnu -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s -// RUN: %clang -target x86_64-unknown-linux-gnu -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=i386 -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang --target=x86_64 -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s unsigned int test__crc32b(unsigned int CRC, unsigned char V) { // CHECK-LABEL: test__crc32b diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index 078680c1ea81b0..09c17aceaa4d61 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -1,306 +1,306 @@ -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mx87 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=X87 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-x87 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-X87 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -m80387 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=X87 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-80387 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-X87 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-fp-ret-in-387 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-X87 %s +// RUN: %clang --target=i386 -march=i386 -mx87 %s -### 2>&1 | FileCheck -check-prefix=X87 %s +// RUN: %clang --target=i386 -march=i386 -mno-x87 %s -### 2>&1 | FileCheck -check-prefix=NO-X87 %s +// RUN: %clang --target=i386 -march=i386 -m80387 %s -### 2>&1 | FileCheck -check-prefix=X87 %s +// RUN: %clang --target=i386 -march=i386 -mno-80387 %s -### 2>&1 | FileCheck -check-prefix=NO-X87 %s +// RUN: %clang --target=i386 -march=i386 -mno-fp-ret-in-387 %s -### 2>&1 | FileCheck -check-prefix=NO-X87 %s // X87: "-target-feature" "+x87" // NO-X87: "-target-feature" "-x87" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mmmx -m3dnow -m3dnowa %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MMX %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-mmx -mno-3dnow -mno-3dnowa %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MMX %s +// RUN: %clang --target=i386 -march=i386 -mmmx -m3dnow -m3dnowa %s -### 2>&1 | FileCheck -check-prefix=MMX %s +// RUN: %clang --target=i386 -march=i386 -mno-mmx -mno-3dnow -mno-3dnowa %s -### 2>&1 | FileCheck -check-prefix=NO-MMX %s // MMX: "-target-feature" "+mmx" "-target-feature" "+3dnow" "-target-feature" "+3dnowa" // NO-MMX: "-target-feature" "-mmx" "-target-feature" "-3dnow" "-target-feature" "-3dnowa" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -msse -msse2 -msse3 -mssse3 -msse4a -msse4.1 -msse4.2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SSE %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-sse -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4a -mno-sse4.1 -mno-sse4.2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SSE %s +// RUN: %clang --target=i386 -march=i386 -msse -msse2 -msse3 -mssse3 -msse4a -msse4.1 -msse4.2 %s -### 2>&1 | FileCheck -check-prefix=SSE %s +// RUN: %clang --target=i386 -march=i386 -mno-sse -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4a -mno-sse4.1 -mno-sse4.2 %s -### 2>&1 | FileCheck -check-prefix=NO-SSE %s // SSE: "-target-feature" "+sse" "-target-feature" "+sse2" "-target-feature" "+sse3" "-target-feature" "+ssse3" "-target-feature" "+sse4a" "-target-feature" "+sse4.1" "-target-feature" "+sse4.2" // NO-SSE: "-target-feature" "-sse" "-target-feature" "-sse2" "-target-feature" "-sse3" "-target-feature" "-ssse3" "-target-feature" "-sse4a" "-target-feature" "-sse4.1" "-target-feature" "-sse4.2" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -msse4 -maes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SSE4-AES %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-sse4 -mno-aes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SSE4-AES %s +// RUN: %clang --target=i386 -march=i386 -msse4 -maes %s -### 2>&1 | FileCheck -check-prefix=SSE4-AES %s +// RUN: %clang --target=i386 -march=i386 -mno-sse4 -mno-aes %s -### 2>&1 | FileCheck -check-prefix=NO-SSE4-AES %s // SSE4-AES: "-target-feature" "+sse4.2" "-target-feature" "+aes" // NO-SSE4-AES: "-target-feature" "-sse4.1" "-target-feature" "-aes" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX %s +// RUN: %clang --target=i386 -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### 2>&1 | FileCheck -check-prefix=AVX %s +// RUN: %clang --target=i386 -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### 2>&1 | FileCheck -check-prefix=NO-AVX %s // AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512er" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma" // NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512er" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mpclmul -mrdrnd -mfsgsbase -mbmi -mbmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BMI %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-pclmul -mno-rdrnd -mno-fsgsbase -mno-bmi -mno-bmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BMI %s +// RUN: %clang --target=i386 -march=i386 -mpclmul -mrdrnd -mfsgsbase -mbmi -mbmi2 %s -### 2>&1 | FileCheck -check-prefix=BMI %s +// RUN: %clang --target=i386 -march=i386 -mno-pclmul -mno-rdrnd -mno-fsgsbase -mno-bmi -mno-bmi2 %s -### 2>&1 | FileCheck -check-prefix=NO-BMI %s // BMI: "-target-feature" "+pclmul" "-target-feature" "+rdrnd" "-target-feature" "+fsgsbase" "-target-feature" "+bmi" "-target-feature" "+bmi2" // NO-BMI: "-target-feature" "-pclmul" "-target-feature" "-rdrnd" "-target-feature" "-fsgsbase" "-target-feature" "-bmi" "-target-feature" "-bmi2" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mlzcnt -mpopcnt -mtbm -mfma -mfma4 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=FMA %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-lzcnt -mno-popcnt -mno-tbm -mno-fma -mno-fma4 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-FMA %s +// RUN: %clang --target=i386 -march=i386 -mlzcnt -mpopcnt -mtbm -mfma -mfma4 %s -### 2>&1 | FileCheck -check-prefix=FMA %s +// RUN: %clang --target=i386 -march=i386 -mno-lzcnt -mno-popcnt -mno-tbm -mno-fma -mno-fma4 %s -### 2>&1 | FileCheck -check-prefix=NO-FMA %s // FMA: "-target-feature" "+lzcnt" "-target-feature" "+popcnt" "-target-feature" "+tbm" "-target-feature" "+fma" "-target-feature" "+fma4" // NO-FMA: "-target-feature" "-lzcnt" "-target-feature" "-popcnt" "-target-feature" "-tbm" "-target-feature" "-fma" "-target-feature" "-fma4" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mxop -mf16c -mrtm -mprfchw -mrdseed %s -### -o %t.o 2>&1 | FileCheck -check-prefix=XOP %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-xop -mno-f16c -mno-rtm -mno-prfchw -mno-rdseed %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-XOP %s +// RUN: %clang --target=i386 -march=i386 -mxop -mf16c -mrtm -mprfchw -mrdseed %s -### 2>&1 | FileCheck -check-prefix=XOP %s +// RUN: %clang --target=i386 -march=i386 -mno-xop -mno-f16c -mno-rtm -mno-prfchw -mno-rdseed %s -### 2>&1 | FileCheck -check-prefix=NO-XOP %s // XOP: "-target-feature" "+xop" "-target-feature" "+f16c" "-target-feature" "+rtm" "-target-feature" "+prfchw" "-target-feature" "+rdseed" // NO-XOP: "-target-feature" "-xop" "-target-feature" "-f16c" "-target-feature" "-rtm" "-target-feature" "-prfchw" "-target-feature" "-rdseed" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -msha -mpku -madx -mcx16 -mfxsr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SHA %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-sha -mno-pku -mno-adx -mno-cx16 -mno-fxsr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SHA %s +// RUN: %clang --target=i386 -march=i386 -msha -mpku -madx -mcx16 -mfxsr %s -### 2>&1 | FileCheck -check-prefix=SHA %s +// RUN: %clang --target=i386 -march=i386 -mno-sha -mno-pku -mno-adx -mno-cx16 -mno-fxsr %s -### 2>&1 | FileCheck -check-prefix=NO-SHA %s // SHA: "-target-feature" "+sha" "-target-feature" "+pku" "-target-feature" "+adx" "-target-feature" "+cx16" "-target-feature" "+fxsr" // NO-SHA: "-target-feature" "-sha" "-target-feature" "-pku" "-target-feature" "-adx" "-target-feature" "-cx16" "-target-feature" "-fxsr" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mxsave -mxsaveopt -mxsavec -mxsaves %s -### -o %t.o 2>&1 | FileCheck -check-prefix=XSAVE %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-xsave -mno-xsaveopt -mno-xsavec -mno-xsaves %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-XSAVE %s +// RUN: %clang --target=i386 -march=i386 -mxsave -mxsaveopt -mxsavec -mxsaves %s -### 2>&1 | FileCheck -check-prefix=XSAVE %s +// RUN: %clang --target=i386 -march=i386 -mno-xsave -mno-xsaveopt -mno-xsavec -mno-xsaves %s -### 2>&1 | FileCheck -check-prefix=NO-XSAVE %s // XSAVE: "-target-feature" "+xsave" "-target-feature" "+xsaveopt" "-target-feature" "+xsavec" "-target-feature" "+xsaves" // NO-XSAVE: "-target-feature" "-xsave" "-target-feature" "-xsaveopt" "-target-feature" "-xsavec" "-target-feature" "-xsaves" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mclflushopt %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CLFLUSHOPT %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clflushopt %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLFLUSHOPT %s +// RUN: %clang --target=i386 -march=i386 -mclflushopt %s -### 2>&1 | FileCheck -check-prefix=CLFLUSHOPT %s +// RUN: %clang --target=i386 -march=i386 -mno-clflushopt %s -### 2>&1 | FileCheck -check-prefix=NO-CLFLUSHOPT %s // CLFLUSHOPT: "-target-feature" "+clflushopt" // NO-CLFLUSHOPT: "-target-feature" "-clflushopt" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mclwb %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CLWB %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clwb %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLWB %s +// RUN: %clang --target=i386 -march=i386 -mclwb %s -### 2>&1 | FileCheck -check-prefix=CLWB %s +// RUN: %clang --target=i386 -march=i386 -mno-clwb %s -### 2>&1 | FileCheck -check-prefix=NO-CLWB %s // CLWB: "-target-feature" "+clwb" // NO-CLWB: "-target-feature" "-clwb" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mwbnoinvd %s -### -o %t.o 2>&1 | FileCheck -check-prefix=WBNOINVD %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-wbnoinvd %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-WBNOINVD %s +// RUN: %clang --target=i386 -march=i386 -mwbnoinvd %s -### 2>&1 | FileCheck -check-prefix=WBNOINVD %s +// RUN: %clang --target=i386 -march=i386 -mno-wbnoinvd %s -### 2>&1 | FileCheck -check-prefix=NO-WBNOINVD %s // WBNOINVD: "-target-feature" "+wbnoinvd" // NO-WBNOINVD: "-target-feature" "-wbnoinvd" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mmovbe %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MOVBE %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-movbe %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MOVBE %s +// RUN: %clang --target=i386 -march=i386 -mmovbe %s -### 2>&1 | FileCheck -check-prefix=MOVBE %s +// RUN: %clang --target=i386 -march=i386 -mno-movbe %s -### 2>&1 | FileCheck -check-prefix=NO-MOVBE %s // MOVBE: "-target-feature" "+movbe" // NO-MOVBE: "-target-feature" "-movbe" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mmpx %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MPX %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-mpx %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MPX %s +// RUN: %clang --target=i386 -march=i386 -mmpx %s -### 2>&1 | FileCheck -check-prefix=MPX %s +// RUN: %clang --target=i386 -march=i386 -mno-mpx %s -### 2>&1 | FileCheck -check-prefix=NO-MPX %s // MPX: the flag '-mmpx' has been deprecated and will be ignored // NO-MPX: the flag '-mno-mpx' has been deprecated and will be ignored -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mshstk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CETSS %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-shstk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CETSS %s +// RUN: %clang --target=i386 -march=i386 -mshstk %s -### 2>&1 | FileCheck -check-prefix=CETSS %s +// RUN: %clang --target=i386 -march=i386 -mno-shstk %s -### 2>&1 | FileCheck -check-prefix=NO-CETSS %s // CETSS: "-target-feature" "+shstk" // NO-CETSS: "-target-feature" "-shstk" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -msgx %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SGX %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-sgx %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SGX %s +// RUN: %clang --target=i386 -march=i386 -msgx %s -### 2>&1 | FileCheck -check-prefix=SGX %s +// RUN: %clang --target=i386 -march=i386 -mno-sgx %s -### 2>&1 | FileCheck -check-prefix=NO-SGX %s // SGX: "-target-feature" "+sgx" // NO-SGX: "-target-feature" "-sgx" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mprefetchwt1 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PREFETCHWT1 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-prefetchwt1 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PREFETCHWT1 %s +// RUN: %clang --target=i386 -march=i386 -mprefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=PREFETCHWT1 %s +// RUN: %clang --target=i386 -march=i386 -mno-prefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=NO-PREFETCHWT1 %s // PREFETCHWT1: "-target-feature" "+prefetchwt1" // NO-PREFETCHWT1: "-target-feature" "-prefetchwt1" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mclzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CLZERO %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLZERO %s +// RUN: %clang --target=i386 -march=i386 -mclzero %s -### 2>&1 | FileCheck -check-prefix=CLZERO %s +// RUN: %clang --target=i386 -march=i386 -mno-clzero %s -### 2>&1 | FileCheck -check-prefix=NO-CLZERO %s // CLZERO: "-target-feature" "+clzero" // NO-CLZERO: "-target-feature" "-clzero" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VAES %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VAES %s +// RUN: %clang --target=i386 -march=i386 -mvaes %s -### 2>&1 | FileCheck -check-prefix=VAES %s +// RUN: %clang --target=i386 -march=i386 -mno-vaes %s -### 2>&1 | FileCheck -check-prefix=NO-VAES %s // VAES: "-target-feature" "+vaes" // NO-VAES: "-target-feature" "-vaes" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mgfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=GFNI %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-gfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-GFNI %s +// RUN: %clang --target=i386 -march=i386 -mgfni %s -### 2>&1 | FileCheck -check-prefix=GFNI %s +// RUN: %clang --target=i386 -march=i386 -mno-gfni %s -### 2>&1 | FileCheck -check-prefix=NO-GFNI %s // GFNI: "-target-feature" "+gfni" // NO-GFNI: "-target-feature" "-gfni -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VPCLMULQDQ %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VPCLMULQDQ %s +// RUN: %clang --target=i386 -march=i386 -mvpclmulqdq %s -### 2>&1 | FileCheck -check-prefix=VPCLMULQDQ %s +// RUN: %clang --target=i386 -march=i386 -mno-vpclmulqdq %s -### 2>&1 | FileCheck -check-prefix=NO-VPCLMULQDQ %s // VPCLMULQDQ: "-target-feature" "+vpclmulqdq" // NO-VPCLMULQDQ: "-target-feature" "-vpclmulqdq" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BITALG %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BITALG %s +// RUN: %clang --target=i386 -march=i386 -mavx512bitalg %s -### 2>&1 | FileCheck -check-prefix=BITALG %s +// RUN: %clang --target=i386 -march=i386 -mno-avx512bitalg %s -### 2>&1 | FileCheck -check-prefix=NO-BITALG %s // BITALG: "-target-feature" "+avx512bitalg" // NO-BITALG: "-target-feature" "-avx512bitalg" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VNNI %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VNNI %s +// RUN: %clang --target=i386 -march=i386 -mavx512vnni %s -### 2>&1 | FileCheck -check-prefix=VNNI %s +// RUN: %clang --target=i386 -march=i386 -mno-avx512vnni %s -### 2>&1 | FileCheck -check-prefix=NO-VNNI %s // VNNI: "-target-feature" "+avx512vnni" // NO-VNNI: "-target-feature" "-avx512vnni" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vbmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VBMI2 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vbmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VBMI2 %s +// RUN: %clang --target=i386 -march=i386 -mavx512vbmi2 %s -### 2>&1 | FileCheck -check-prefix=VBMI2 %s +// RUN: %clang --target=i386 -march=i386 -mno-avx512vbmi2 %s -### 2>&1 | FileCheck -check-prefix=NO-VBMI2 %s // VBMI2: "-target-feature" "+avx512vbmi2" // NO-VBMI2: "-target-feature" "-avx512vbmi2" -// RUN: %clang -target i386-linux-gnu -mavx512vp2intersect %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VP2INTERSECT %s -// RUN: %clang -target i386-linux-gnu -mno-avx512vp2intersect %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VP2INTERSECT %s +// RUN: %clang -target i386-linux-gnu -mavx512vp2intersect %s -### 2>&1 | FileCheck -check-prefix=VP2INTERSECT %s +// RUN: %clang -target i386-linux-gnu -mno-avx512vp2intersect %s -### 2>&1 | FileCheck -check-prefix=NO-VP2INTERSECT %s // VP2INTERSECT: "-target-feature" "+avx512vp2intersect" // NO-VP2INTERSECT: "-target-feature" "-avx512vp2intersect" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mrdpid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RDPID %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-rdpid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RDPID %s +// RUN: %clang --target=i386 -march=i386 -mrdpid %s -### 2>&1 | FileCheck -check-prefix=RDPID %s +// RUN: %clang --target=i386 -march=i386 -mno-rdpid %s -### 2>&1 | FileCheck -check-prefix=NO-RDPID %s // RDPID: "-target-feature" "+rdpid" // NO-RDPID: "-target-feature" "-rdpid" -// RUN: %clang -target i386-linux-gnu -mretpoline %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RETPOLINE %s -// RUN: %clang -target i386-linux-gnu -mno-retpoline %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RETPOLINE %s +// RUN: %clang -target i386-linux-gnu -mretpoline %s -### 2>&1 | FileCheck -check-prefix=RETPOLINE %s +// RUN: %clang -target i386-linux-gnu -mno-retpoline %s -### 2>&1 | FileCheck -check-prefix=NO-RETPOLINE %s // RETPOLINE: "-target-feature" "+retpoline-indirect-calls" "-target-feature" "+retpoline-indirect-branches" // NO-RETPOLINE-NOT: retpoline -// RUN: %clang -target i386-linux-gnu -mretpoline -mretpoline-external-thunk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RETPOLINE-EXTERNAL-THUNK %s -// RUN: %clang -target i386-linux-gnu -mretpoline -mno-retpoline-external-thunk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RETPOLINE-EXTERNAL-THUNK %s +// RUN: %clang -target i386-linux-gnu -mretpoline -mretpoline-external-thunk %s -### 2>&1 | FileCheck -check-prefix=RETPOLINE-EXTERNAL-THUNK %s +// RUN: %clang -target i386-linux-gnu -mretpoline -mno-retpoline-external-thunk %s -### 2>&1 | FileCheck -check-prefix=NO-RETPOLINE-EXTERNAL-THUNK %s // RETPOLINE-EXTERNAL-THUNK: "-target-feature" "+retpoline-external-thunk" // NO-RETPOLINE-EXTERNAL-THUNK: "-target-feature" "-retpoline-external-thunk" -// RUN: %clang -target i386-linux-gnu -mspeculative-load-hardening %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SLH %s -// RUN: %clang -target i386-linux-gnu -mretpoline -mspeculative-load-hardening %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RETPOLINE %s -// RUN: %clang -target i386-linux-gnu -mno-speculative-load-hardening %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SLH %s +// RUN: %clang -target i386-linux-gnu -mspeculative-load-hardening %s -### 2>&1 | FileCheck -check-prefix=SLH %s +// RUN: %clang -target i386-linux-gnu -mretpoline -mspeculative-load-hardening %s -### 2>&1 | FileCheck -check-prefix=RETPOLINE %s +// RUN: %clang -target i386-linux-gnu -mno-speculative-load-hardening %s -### 2>&1 | FileCheck -check-prefix=NO-SLH %s // SLH-NOT: retpoline // SLH: "-target-feature" "+retpoline-indirect-calls" // SLH-NOT: retpoline // SLH: "-mspeculative-load-hardening" // NO-SLH-NOT: retpoline -// RUN: %clang -target i386-linux-gnu -mlvi-cfi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=LVICFI %s -// RUN: %clang -target i386-linux-gnu -mno-lvi-cfi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-LVICFI %s +// RUN: %clang -target i386-linux-gnu -mlvi-cfi %s -### 2>&1 | FileCheck -check-prefix=LVICFI %s +// RUN: %clang -target i386-linux-gnu -mno-lvi-cfi %s -### 2>&1 | FileCheck -check-prefix=NO-LVICFI %s // LVICFI: "-target-feature" "+lvi-cfi" // NO-LVICFI-NOT: lvi-cfi -// RUN: %clang -target i386-linux-gnu -mlvi-cfi -mspeculative-load-hardening %s -### -o %t.o 2>&1 | FileCheck -check-prefix=LVICFI-SLH %s +// RUN: %clang -target i386-linux-gnu -mlvi-cfi -mspeculative-load-hardening %s -### 2>&1 | FileCheck -check-prefix=LVICFI-SLH %s // LVICFI-SLH: error: invalid argument 'mspeculative-load-hardening' not allowed with 'mlvi-cfi' -// RUN: %clang -target i386-linux-gnu -mlvi-cfi -mretpoline %s -### -o %t.o 2>&1 | FileCheck -check-prefix=LVICFI-RETPOLINE %s +// RUN: %clang -target i386-linux-gnu -mlvi-cfi -mretpoline %s -### 2>&1 | FileCheck -check-prefix=LVICFI-RETPOLINE %s // LVICFI-RETPOLINE: error: invalid argument 'mretpoline' not allowed with 'mlvi-cfi' -// RUN: %clang -target i386-linux-gnu -mlvi-cfi -mretpoline-external-thunk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=LVICFI-RETPOLINE-EXTERNAL-THUNK %s +// RUN: %clang -target i386-linux-gnu -mlvi-cfi -mretpoline-external-thunk %s -### 2>&1 | FileCheck -check-prefix=LVICFI-RETPOLINE-EXTERNAL-THUNK %s // LVICFI-RETPOLINE-EXTERNAL-THUNK: error: invalid argument 'mretpoline-external-thunk' not allowed with 'mlvi-cfi' -// RUN: %clang -target i386-linux-gnu -mlvi-hardening %s -### -o %t.o 2>&1 | FileCheck -check-prefix=LVIHARDENING %s -// RUN: %clang -target i386-linux-gnu -mno-lvi-hardening %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-LVIHARDENING %s +// RUN: %clang -target i386-linux-gnu -mlvi-hardening %s -### 2>&1 | FileCheck -check-prefix=LVIHARDENING %s +// RUN: %clang -target i386-linux-gnu -mno-lvi-hardening %s -### 2>&1 | FileCheck -check-prefix=NO-LVIHARDENING %s // LVIHARDENING: "-target-feature" "+lvi-load-hardening" "-target-feature" "+lvi-cfi" // NO-LVIHARDENING-NOT: "+lvi- -// RUN: %clang -target i386-linux-gnu -mlvi-hardening -mspeculative-load-hardening %s -### -o %t.o 2>&1 | FileCheck -check-prefix=LVIHARDENING-SLH %s +// RUN: %clang -target i386-linux-gnu -mlvi-hardening -mspeculative-load-hardening %s -### 2>&1 | FileCheck -check-prefix=LVIHARDENING-SLH %s // LVIHARDENING-SLH: error: invalid argument 'mspeculative-load-hardening' not allowed with 'mlvi-hardening' -// RUN: %clang -target i386-linux-gnu -mlvi-hardening -mretpoline %s -### -o %t.o 2>&1 | FileCheck -check-prefix=LVIHARDENING-RETPOLINE %s +// RUN: %clang -target i386-linux-gnu -mlvi-hardening -mretpoline %s -### 2>&1 | FileCheck -check-prefix=LVIHARDENING-RETPOLINE %s // LVIHARDENING-RETPOLINE: error: invalid argument 'mretpoline' not allowed with 'mlvi-hardening' -// RUN: %clang -target i386-linux-gnu -mlvi-hardening -mretpoline-external-thunk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=LVIHARDENING-RETPOLINE-EXTERNAL-THUNK %s +// RUN: %clang -target i386-linux-gnu -mlvi-hardening -mretpoline-external-thunk %s -### 2>&1 | FileCheck -check-prefix=LVIHARDENING-RETPOLINE-EXTERNAL-THUNK %s // LVIHARDENING-RETPOLINE-EXTERNAL-THUNK: error: invalid argument 'mretpoline-external-thunk' not allowed with 'mlvi-hardening' -// RUN: %clang -target i386-linux-gnu -mseses %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SESES %s -// RUN: %clang -target i386-linux-gnu -mno-seses %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SESES %s +// RUN: %clang -target i386-linux-gnu -mseses %s -### 2>&1 | FileCheck -check-prefix=SESES %s +// RUN: %clang -target i386-linux-gnu -mno-seses %s -### 2>&1 | FileCheck -check-prefix=NO-SESES %s // SESES: "-target-feature" "+seses" // SESES: "-target-feature" "+lvi-cfi" // NO-SESES-NOT: seses // NO-SESES-NOT: lvi-cfi -// RUN: %clang -target i386-linux-gnu -mseses -mno-lvi-cfi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SESES-NOLVICFI %s +// RUN: %clang -target i386-linux-gnu -mseses -mno-lvi-cfi %s -### 2>&1 | FileCheck -check-prefix=SESES-NOLVICFI %s // SESES-NOLVICFI: "-target-feature" "+seses" // SESES-NOLVICFI-NOT: lvi-cfi -// RUN: %clang -target i386-linux-gnu -mseses -mspeculative-load-hardening %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SESES-SLH %s +// RUN: %clang -target i386-linux-gnu -mseses -mspeculative-load-hardening %s -### 2>&1 | FileCheck -check-prefix=SESES-SLH %s // SESES-SLH: error: invalid argument 'mspeculative-load-hardening' not allowed with 'mseses' -// RUN: %clang -target i386-linux-gnu -mseses -mretpoline %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SESES-RETPOLINE %s +// RUN: %clang -target i386-linux-gnu -mseses -mretpoline %s -### 2>&1 | FileCheck -check-prefix=SESES-RETPOLINE %s // SESES-RETPOLINE: error: invalid argument 'mretpoline' not allowed with 'mseses' -// RUN: %clang -target i386-linux-gnu -mseses -mretpoline-external-thunk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SESES-RETPOLINE-EXTERNAL-THUNK %s +// RUN: %clang -target i386-linux-gnu -mseses -mretpoline-external-thunk %s -### 2>&1 | FileCheck -check-prefix=SESES-RETPOLINE-EXTERNAL-THUNK %s // SESES-RETPOLINE-EXTERNAL-THUNK: error: invalid argument 'mretpoline-external-thunk' not allowed with 'mseses' -// RUN: %clang -target i386-linux-gnu -mseses -mlvi-hardening %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SESES-LVIHARDENING %s +// RUN: %clang -target i386-linux-gnu -mseses -mlvi-hardening %s -### 2>&1 | FileCheck -check-prefix=SESES-LVIHARDENING %s // SESES-LVIHARDENING: error: invalid argument 'mlvi-hardening' not allowed with 'mseses' -// RUN: %clang -target i386-linux-gnu -mwaitpkg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=WAITPKG %s -// RUN: %clang -target i386-linux-gnu -mno-waitpkg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-WAITPKG %s +// RUN: %clang -target i386-linux-gnu -mwaitpkg %s -### 2>&1 | FileCheck -check-prefix=WAITPKG %s +// RUN: %clang -target i386-linux-gnu -mno-waitpkg %s -### 2>&1 | FileCheck -check-prefix=NO-WAITPKG %s // WAITPKG: "-target-feature" "+waitpkg" // NO-WAITPKG: "-target-feature" "-waitpkg" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mmovdiri %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MOVDIRI %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-movdiri %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MOVDIRI %s +// RUN: %clang --target=i386 -march=i386 -mmovdiri %s -### 2>&1 | FileCheck -check-prefix=MOVDIRI %s +// RUN: %clang --target=i386 -march=i386 -mno-movdiri %s -### 2>&1 | FileCheck -check-prefix=NO-MOVDIRI %s // MOVDIRI: "-target-feature" "+movdiri" // NO-MOVDIRI: "-target-feature" "-movdiri" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mmovdir64b %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MOVDIR64B %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-movdir64b %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MOVDIR64B %s +// RUN: %clang --target=i386 -march=i386 -mmovdir64b %s -### 2>&1 | FileCheck -check-prefix=MOVDIR64B %s +// RUN: %clang --target=i386 -march=i386 -mno-movdir64b %s -### 2>&1 | FileCheck -check-prefix=NO-MOVDIR64B %s // MOVDIR64B: "-target-feature" "+movdir64b" // NO-MOVDIR64B: "-target-feature" "-movdir64b" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mpconfig %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PCONFIG %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-pconfig %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PCONFIG %s +// RUN: %clang --target=i386 -march=i386 -mpconfig %s -### 2>&1 | FileCheck -check-prefix=PCONFIG %s +// RUN: %clang --target=i386 -march=i386 -mno-pconfig %s -### 2>&1 | FileCheck -check-prefix=NO-PCONFIG %s // PCONFIG: "-target-feature" "+pconfig" // NO-PCONFIG: "-target-feature" "-pconfig" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mptwrite %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PTWRITE %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-ptwrite %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PTWRITE %s +// RUN: %clang --target=i386 -march=i386 -mptwrite %s -### 2>&1 | FileCheck -check-prefix=PTWRITE %s +// RUN: %clang --target=i386 -march=i386 -mno-ptwrite %s -### 2>&1 | FileCheck -check-prefix=NO-PTWRITE %s // PTWRITE: "-target-feature" "+ptwrite" // NO-PTWRITE: "-target-feature" "-ptwrite" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -minvpcid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=INVPCID %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-invpcid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-INVPCID %s +// RUN: %clang --target=i386 -march=i386 -minvpcid %s -### 2>&1 | FileCheck -check-prefix=INVPCID %s +// RUN: %clang --target=i386 -march=i386 -mno-invpcid %s -### 2>&1 | FileCheck -check-prefix=NO-INVPCID %s // INVPCID: "-target-feature" "+invpcid" // NO-INVPCID: "-target-feature" "-invpcid" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX512BF16 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512BF16 %s +// RUN: %clang --target=i386 -march=i386 -mavx512bf16 %s -### 2>&1 | FileCheck -check-prefix=AVX512BF16 %s +// RUN: %clang --target=i386 -march=i386 -mno-avx512bf16 %s -### 2>&1 | FileCheck -check-prefix=NO-AVX512BF16 %s // AVX512BF16: "-target-feature" "+avx512bf16" // NO-AVX512BF16: "-target-feature" "-avx512bf16" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -menqcmd %s -### -o %t.o 2>&1 | FileCheck --check-prefix=ENQCMD %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-enqcmd %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-ENQCMD %s +// RUN: %clang --target=i386 -march=i386 -menqcmd %s -### 2>&1 | FileCheck --check-prefix=ENQCMD %s +// RUN: %clang --target=i386 -march=i386 -mno-enqcmd %s -### 2>&1 | FileCheck --check-prefix=NO-ENQCMD %s // ENQCMD: "-target-feature" "+enqcmd" // NO-ENQCMD: "-target-feature" "-enqcmd" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvzeroupper %s -### -o %t.o 2>&1 | FileCheck --check-prefix=VZEROUPPER %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vzeroupper %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-VZEROUPPER %s +// RUN: %clang --target=i386 -march=i386 -mvzeroupper %s -### 2>&1 | FileCheck --check-prefix=VZEROUPPER %s +// RUN: %clang --target=i386 -march=i386 -mno-vzeroupper %s -### 2>&1 | FileCheck --check-prefix=NO-VZEROUPPER %s // VZEROUPPER: "-target-feature" "+vzeroupper" // NO-VZEROUPPER: "-target-feature" "-vzeroupper" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mserialize %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SERIALIZE %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-serialize %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SERIALIZE %s +// RUN: %clang --target=i386 -march=i386 -mserialize %s -### 2>&1 | FileCheck -check-prefix=SERIALIZE %s +// RUN: %clang --target=i386 -march=i386 -mno-serialize %s -### 2>&1 | FileCheck -check-prefix=NO-SERIALIZE %s // SERIALIZE: "-target-feature" "+serialize" // NO-SERIALIZE: "-target-feature" "-serialize" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mtsxldtrk %s -### -o %t.o 2>&1 | FileCheck --check-prefix=TSXLDTRK %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-tsxldtrk %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-TSXLDTRK %s +// RUN: %clang --target=i386 -march=i386 -mtsxldtrk %s -### 2>&1 | FileCheck --check-prefix=TSXLDTRK %s +// RUN: %clang --target=i386 -march=i386 -mno-tsxldtrk %s -### 2>&1 | FileCheck --check-prefix=NO-TSXLDTRK %s // TSXLDTRK: "-target-feature" "+tsxldtrk" // NO-TSXLDTRK: "-target-feature" "-tsxldtrk" -// RUN: %clang -target i386-linux-gnu -mkl %s -### -o %t.o 2>&1 | FileCheck -check-prefix=KL %s -// RUN: %clang -target i386-linux-gnu -mno-kl %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-KL %s +// RUN: %clang -target i386-linux-gnu -mkl %s -### 2>&1 | FileCheck -check-prefix=KL %s +// RUN: %clang -target i386-linux-gnu -mno-kl %s -### 2>&1 | FileCheck -check-prefix=NO-KL %s // KL: "-target-feature" "+kl" // NO-KL: "-target-feature" "-kl" -// RUN: %clang -target i386-linux-gnu -mwidekl %s -### -o %t.o 2>&1 | FileCheck -check-prefix=WIDE_KL %s -// RUN: %clang -target i386-linux-gnu -mno-widekl %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-WIDE_KL %s +// RUN: %clang -target i386-linux-gnu -mwidekl %s -### 2>&1 | FileCheck -check-prefix=WIDE_KL %s +// RUN: %clang -target i386-linux-gnu -mno-widekl %s -### 2>&1 | FileCheck -check-prefix=NO-WIDE_KL %s // WIDE_KL: "-target-feature" "+widekl" // NO-WIDE_KL: "-target-feature" "-widekl" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-tile %s -### -o %t.o 2>&1 | FileCheck --check-prefix=AMX-TILE %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-tile %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AMX-TILE %s +// RUN: %clang --target=i386 -march=i386 -mamx-tile %s -### 2>&1 | FileCheck --check-prefix=AMX-TILE %s +// RUN: %clang --target=i386 -march=i386 -mno-amx-tile %s -### 2>&1 | FileCheck --check-prefix=NO-AMX-TILE %s // AMX-TILE: "-target-feature" "+amx-tile" // NO-AMX-TILE: "-target-feature" "-amx-tile" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-bf16 %s -### -o %t.o 2>&1 | FileCheck --check-prefix=AMX-BF16 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AMX-BF16 %s +// RUN: %clang --target=i386 -march=i386 -mamx-bf16 %s -### 2>&1 | FileCheck --check-prefix=AMX-BF16 %s +// RUN: %clang --target=i386 -march=i386 -mno-amx-bf16 %s -### 2>&1 | FileCheck -check-prefix=NO-AMX-BF16 %s // AMX-BF16: "-target-feature" "+amx-bf16" // NO-AMX-BF16: "-target-feature" "-amx-bf16" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-int8 %s -### -o %t.o 2>&1 | FileCheck --check-prefix=AMX-INT8 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-int8 %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AMX-INT8 %s +// RUN: %clang --target=i386 -march=i386 -mamx-int8 %s -### 2>&1 | FileCheck --check-prefix=AMX-INT8 %s +// RUN: %clang --target=i386 -march=i386 -mno-amx-int8 %s -### 2>&1 | FileCheck --check-prefix=NO-AMX-INT8 %s // AMX-INT8: "-target-feature" "+amx-int8" // NO-AMX-INT8: "-target-feature" "-amx-int8" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mhreset %s -### -o %t.o 2>&1 | FileCheck -check-prefix=HRESET %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-hreset %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-HRESET %s +// RUN: %clang --target=i386 -march=i386 -mhreset %s -### 2>&1 | FileCheck -check-prefix=HRESET %s +// RUN: %clang --target=i386 -march=i386 -mno-hreset %s -### 2>&1 | FileCheck -check-prefix=NO-HRESET %s // HRESET: "-target-feature" "+hreset" // NO-HRESET: "-target-feature" "-hreset" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -muintr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=UINTR %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-uintr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-UINTR %s +// RUN: %clang --target=i386 -march=i386 -muintr %s -### 2>&1 | FileCheck -check-prefix=UINTR %s +// RUN: %clang --target=i386 -march=i386 -mno-uintr %s -### 2>&1 | FileCheck -check-prefix=NO-UINTR %s // UINTR: "-target-feature" "+uintr" // NO-UINTR: "-target-feature" "-uintr" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavxvnni %s -### -o %t.o 2>&1 | FileCheck --check-prefix=AVX-VNNI %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avxvnni %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AVX-VNNI %s +// RUN: %clang --target=i386 -march=i386 -mavxvnni %s -### 2>&1 | FileCheck --check-prefix=AVX-VNNI %s +// RUN: %clang --target=i386 -march=i386 -mno-avxvnni %s -### 2>&1 | FileCheck --check-prefix=NO-AVX-VNNI %s // AVX-VNNI: "-target-feature" "+avxvnni" // NO-AVX-VNNI: "-target-feature" "-avxvnni" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512fp16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX512FP16 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512fp16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512FP16 %s +// RUN: %clang --target=i386 -march=i386 -mavx512fp16 %s -### 2>&1 | FileCheck -check-prefix=AVX512FP16 %s +// RUN: %clang --target=i386 -march=i386 -mno-avx512fp16 %s -### 2>&1 | FileCheck -check-prefix=NO-AVX512FP16 %s // AVX512FP16: "-target-feature" "+avx512fp16" // NO-AVX512FP16: "-target-feature" "-avx512fp16" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CRC32 %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CRC32 %s +// RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s +// RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s // CRC32: "-target-feature" "+crc32" // NO-CRC32: "-target-feature" "-crc32" From 87628f5804e23a40986692f6cdcf66654ce3f017 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 26 May 2022 14:05:05 -0700 Subject: [PATCH 649/908] [lld][WebAssembly] Require double dash for modern linker flags This matches the behaviour of the ELF backend (in fact this change is mostly just copying directly from ELF/Options.td). Differential Revision: https://reviews.llvm.org/D126500 --- lld/test/wasm/lto/atomics.ll | 2 +- lld/test/wasm/lto/pic-empty.s | 2 +- lld/wasm/Options.td | 73 +++++++++++++++++++++-------------- 3 files changed, 45 insertions(+), 32 deletions(-) diff --git a/lld/test/wasm/lto/atomics.ll b/lld/test/wasm/lto/atomics.ll index f231b3225fe8bd..7e367157675cd6 100644 --- a/lld/test/wasm/lto/atomics.ll +++ b/lld/test/wasm/lto/atomics.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as %s -o %t.o -; RUN: wasm-ld %t.o -o %t.wasm -lto-O0 +; RUN: wasm-ld %t.o -o %t.wasm --lto-O0 ; Atomic operations will not fail to compile if atomics are not ; enabled because LLVM atomics will be lowered to regular ops. diff --git a/lld/test/wasm/lto/pic-empty.s b/lld/test/wasm/lto/pic-empty.s index 88e0f42542e247..51938e1ad429dc 100644 --- a/lld/test/wasm/lto/pic-empty.s +++ b/lld/test/wasm/lto/pic-empty.s @@ -6,7 +6,7 @@ ; See https://bugs.llvm.org/show_bug.cgi?id=52339 ; RUN: llvm-as %s -o %t.o -; RUN: wasm-ld -lto-O2 --experimental-pic -shared --no-gc-sections --export=tls_int %t.o -o %t.so +; RUN: wasm-ld --lto-O2 --experimental-pic -shared --no-gc-sections --export=tls_int %t.o -o %t.so ; RUN: obj2yaml %t.so | FileCheck %s target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-f128:64-n32:64-S128-ni:1:10:20" diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index a5395bbda1c7a2..b0c4a811e85a0d 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -1,5 +1,23 @@ include "llvm/Option/OptParser.td" +// Convenience classes for long options which only accept two dashes. For lld +// specific or newer long options, we prefer two dashes to avoid collision with +// short options. For many others, we have to accept both forms to be compatible +// with GNU ld. +class FF : Flag<["--"], name>; +class JJ: Joined<["--"], name>; + +multiclass EEq { + def NAME: Separate<["--"], name>; + def NAME # _eq: Joined<["--"], name # "=">, Alias(NAME)>, + HelpText; +} + +multiclass BB { + def NAME: Flag<["--"], name>, HelpText; + def no_ # NAME: Flag<["--"], "no-" # name>, HelpText; +} + // For options whose names are multiple letters, either one dash or // two can precede the option name except those that start with 'o'. class F: Flag<["--", "-"], name>; @@ -17,11 +35,6 @@ multiclass B { def no_ # NAME: Flag<["--", "-"], "no-" # name>, HelpText; } -multiclass BB { - def NAME: Flag<["--"], name>, HelpText; - def no_ # NAME: Flag<["--"], "no-" # name>, HelpText; -} - // The following flags are shared with the ELF linker def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">; @@ -51,8 +64,8 @@ defm export_dynamic: B<"export-dynamic", def entry: S<"entry">, MetaVarName<"">, HelpText<"Name of entry point symbol">; -def error_limit: J<"error-limit=">, - HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; +defm error_limit: + EEq<"error-limit", "Maximum number of errors to emit before stopping (0 = no limit)">; def fatal_warnings: F<"fatal-warnings">, HelpText<"Treat warnings as errors">; @@ -61,7 +74,7 @@ defm gc_sections: B<"gc-sections", "Enable garbage collection of unused sections", "Disable garbage collection of unused sections">; -defm merge_data_segments: B<"merge-data-segments", +defm merge_data_segments: BB<"merge-data-segments", "Enable merging data segments", "Disable merging data segments">; @@ -75,7 +88,7 @@ def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"">, def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">; -def mllvm: S<"mllvm">, HelpText<"Options to pass to LLVM">; +defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">; defm Map: Eq<"Map", "Print a link map to the specified file">; @@ -99,7 +112,7 @@ def print_map: F<"print-map">, def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; -defm reproduce: Eq<"reproduce", "Dump linker invocation and input files for debugging">; +defm reproduce: EEq<"reproduce", "Dump linker invocation and input files for debugging">; defm rsp_quoting: Eq<"rsp-quoting", "Quoting style for response files">, MetaVarName<"[posix,windows]">; @@ -159,44 +172,44 @@ defm export: Eq<"export", "Force a symbol to be exported">; defm export_if_defined: Eq<"export-if-defined", "Force a symbol to be exported, if it is defined in the input">; -def export_all: F<"export-all">, +def export_all: FF<"export-all">, HelpText<"Export all symbols (normally combined with --no-gc-sections)">; -def export_table: F<"export-table">, +def export_table: FF<"export-table">, HelpText<"Export function table to the environment">; -def growable_table: F<"growable-table">, +def growable_table: FF<"growable-table">, HelpText<"Remove maximum size from function table, allowing table to grow">; -def global_base: J<"global-base=">, +def global_base: JJ<"global-base=">, HelpText<"Where to start to place global data">; -def import_memory: F<"import-memory">, +def import_memory: FF<"import-memory">, HelpText<"Import memory from the environment">; -def shared_memory: F<"shared-memory">, +def shared_memory: FF<"shared-memory">, HelpText<"Use shared linear memory">; -def import_table: F<"import-table">, +def import_table: FF<"import-table">, HelpText<"Import function table from the environment">; -def initial_memory: J<"initial-memory=">, +def initial_memory: JJ<"initial-memory=">, HelpText<"Initial size of the linear memory">; -def max_memory: J<"max-memory=">, +def max_memory: JJ<"max-memory=">, HelpText<"Maximum size of the linear memory">; -def no_entry: F<"no-entry">, +def no_entry: FF<"no-entry">, HelpText<"Do not output any entry point">; -def stack_first: F<"stack-first">, +def stack_first: FF<"stack-first">, HelpText<"Place stack at start of linear memory rather than after data">; defm whole_archive: B<"whole-archive", "Force load of all members in a static library", "Do not force load of all members in a static library (default)">; -defm check_features: B<"check-features", +defm check_features: BB<"check-features", "Check feature compatibility of linked objects (default)", "Ignore feature compatibility of linked objects">; @@ -217,22 +230,22 @@ def: JoinedOrSeparate<["-"], "y">, Alias, HelpText<"Alias for --tr def: JoinedOrSeparate<["-"], "u">, Alias; // LTO-related options. -def lto_O: J<"lto-O">, MetaVarName<"">, +def lto_O: JJ<"lto-O">, MetaVarName<"">, HelpText<"Optimization level for LTO">; -def lto_partitions: J<"lto-partitions=">, +def lto_partitions: JJ<"lto-partitions=">, HelpText<"Number of LTO codegen partitions">; def disable_verify: F<"disable-verify">; def save_temps: F<"save-temps">, HelpText<"Save intermediate LTO compilation results">; -def thinlto_cache_dir: J<"thinlto-cache-dir=">, +def thinlto_cache_dir: JJ<"thinlto-cache-dir=">, HelpText<"Path to ThinLTO cached object file directory">; -defm thinlto_cache_policy: Eq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">; -def thinlto_jobs: J<"thinlto-jobs=">, +defm thinlto_cache_policy: EEq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">; +def thinlto_jobs: JJ<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs. Default to --threads=">; -def no_lto_legacy_pass_manager: F<"no-lto-legacy-pass-manager">, +def no_lto_legacy_pass_manager: FF<"no-lto-legacy-pass-manager">, HelpText<"Use new pass manager">; -def lto_debug_pass_manager: F<"lto-debug-pass-manager">, +def lto_debug_pass_manager: FF<"lto-debug-pass-manager">, HelpText<"Debug new pass manager">; // Experimental PIC mode. -def experimental_pic: F<"experimental-pic">, +def experimental_pic: FF<"experimental-pic">, HelpText<"Enable Experimental PIC">; From e267df8ce8d86ba14187b316ad2495afaebe9a44 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Thu, 26 May 2022 14:47:49 -0700 Subject: [PATCH 650/908] Use cmake Python3_EXECUTABLE variable instead of hardcoding --- llvm/cmake/modules/TensorFlowCompile.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/cmake/modules/TensorFlowCompile.cmake b/llvm/cmake/modules/TensorFlowCompile.cmake index 4f79f6653c88f0..861e559a3813af 100644 --- a/llvm/cmake/modules/TensorFlowCompile.cmake +++ b/llvm/cmake/modules/TensorFlowCompile.cmake @@ -31,7 +31,7 @@ function(generate_mock_model generator output) tf_get_absolute_path(${generator} ${CMAKE_CURRENT_SOURCE_DIR} generator_absolute_path) tf_get_absolute_path(${output} ${CMAKE_CURRENT_BINARY_DIR} output_absolute_path) message(WARNING "Autogenerated mock models should not be used in production builds.") - execute_process(COMMAND python3 + execute_process(COMMAND ${Python3_EXECUTABLE} ${generator_absolute_path} ${output_absolute_path} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} From 134d7f9a4b97e9035150d970bd9e376043c4577e Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Wed, 18 May 2022 14:39:03 -0700 Subject: [PATCH 651/908] Store a by name list of signals with their actions in the Target so that they can be used to prime new Process runs. "process handle" was also changed to populate the dummy target if there's no selected target, so that the settings will get copied into new targets. Differential Revision: https://reviews.llvm.org/D126259 --- lldb/include/lldb/Target/Target.h | 41 +++++ lldb/include/lldb/Target/UnixSignals.h | 5 + .../Python/lldbsuite/test/lldbutil.py | 36 +++++ lldb/source/Commands/CommandObjectProcess.cpp | 153 ++++++++++++++---- lldb/source/Commands/Options.td | 6 + lldb/source/Target/Process.cpp | 13 ++ lldb/source/Target/Target.cpp | 130 ++++++++++++++- lldb/source/Target/UnixSignals.cpp | 24 ++- .../test/API/commands/process/handle/Makefile | 3 + .../process/handle/TestProcessHandle.py | 136 ++++++++++++++++ .../test/API/commands/process/handle/main.cpp | 3 + .../functionalities/signal/raise/TestRaise.py | 37 ++--- lldb/unittests/Signals/UnixSignalsTest.cpp | 23 +++ 13 files changed, 556 insertions(+), 54 deletions(-) create mode 100644 lldb/test/API/commands/process/handle/Makefile create mode 100644 lldb/test/API/commands/process/handle/TestProcessHandle.py create mode 100644 lldb/test/API/commands/process/handle/main.cpp diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index d6193ea7166f3d..fb2914064ce451 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -1414,6 +1414,42 @@ class Target : public std::enable_shared_from_this, return *m_frame_recognizer_manager_up; } + /// Add a signal for the target. This will get copied over to the process + /// if the signal exists on that target. Only the values with Yes and No are + /// set, Calculate values will be ignored. +protected: + struct DummySignalValues { + LazyBool pass = eLazyBoolCalculate; + LazyBool notify = eLazyBoolCalculate; + LazyBool stop = eLazyBoolCalculate; + DummySignalValues(LazyBool pass, LazyBool notify, LazyBool stop) : + pass(pass), notify(notify), stop(stop) {} + DummySignalValues() = default; + }; + using DummySignalElement = llvm::StringMapEntry; + static bool UpdateSignalFromDummy(lldb::UnixSignalsSP signals_sp, + const DummySignalElement &element); + static bool ResetSignalFromDummy(lldb::UnixSignalsSP signals_sp, + const DummySignalElement &element); + +public: + /// Add a signal to the Target's list of stored signals/actions. These + /// values will get copied into any processes launched from + /// this target. + void AddDummySignal(llvm::StringRef name, LazyBool pass, LazyBool print, + LazyBool stop); + /// Updates the signals in signals_sp using the stored dummy signals. + /// If warning_stream_sp is not null, if any stored signals are not found in + /// the current process, a warning will be emitted here. + void UpdateSignalsFromDummy(lldb::UnixSignalsSP signals_sp, + lldb::StreamSP warning_stream_sp); + /// Clear the dummy signals in signal_names from the target, or all signals + /// if signal_names is empty. Also remove the behaviors they set from the + /// process's signals if it exists. + void ClearDummySignals(Args &signal_names); + /// Print all the signals set in this target. + void PrintDummySignals(Stream &strm, Args &signals); + protected: /// Implementing of ModuleList::Notifier. @@ -1443,6 +1479,7 @@ class Target : public std::enable_shared_from_this, ArchSpec m_spec; std::unique_ptr m_plugin_up; }; + // Member variables. Debugger &m_debugger; lldb::PlatformSP m_platform_sp; ///< The platform for this target. @@ -1493,6 +1530,10 @@ class Target : public std::enable_shared_from_this, lldb::TraceSP m_trace_sp; /// Stores the frame recognizers of this target. lldb::StackFrameRecognizerManagerUP m_frame_recognizer_manager_up; + /// These are used to set the signal state when you don't have a process and + /// more usefully in the Dummy target where you can't know exactly what + /// signals you will have. + llvm::StringMap m_dummy_signals; static void ImageSearchPathsChanged(const PathMappingList &path_list, void *baton); diff --git a/lldb/include/lldb/Target/UnixSignals.h b/lldb/include/lldb/Target/UnixSignals.h index 1c91c9fdd48957..34f4e30d135056 100644 --- a/lldb/include/lldb/Target/UnixSignals.h +++ b/lldb/include/lldb/Target/UnixSignals.h @@ -55,6 +55,9 @@ class UnixSignals { bool SetShouldNotify(int32_t signo, bool value); bool SetShouldNotify(const char *signal_name, bool value); + + bool ResetSignal(int32_t signo, bool reset_stop = true, + bool reset_notify = true, bool reset_suppress = true); // These provide an iterator through the signals available on this system. // Call GetFirstSignalNumber to get the first entry, then iterate on @@ -114,11 +117,13 @@ class UnixSignals { std::string m_description; uint32_t m_hit_count = 0; bool m_suppress : 1, m_stop : 1, m_notify : 1; + bool m_default_suppress : 1, m_default_stop : 1, m_default_notify : 1; Signal(const char *name, bool default_suppress, bool default_stop, bool default_notify, const char *description, const char *alias); ~Signal() = default; + void Reset(bool reset_stop, bool reset_notify, bool reset_suppress); }; virtual void Reset(); diff --git a/lldb/packages/Python/lldbsuite/test/lldbutil.py b/lldb/packages/Python/lldbsuite/test/lldbutil.py index af2c41d1afa61c..e3e81a7f593581 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbutil.py +++ b/lldb/packages/Python/lldbsuite/test/lldbutil.py @@ -1527,6 +1527,42 @@ def get_signal_number(signal_name): # No remote platform; fall back to using local python signals. return getattr(signal, signal_name) +def get_actions_for_signal(testcase, signal_name, from_target=False, expected_absent=False): + """Returns a triple of (pass, stop, notify)""" + return_obj = lldb.SBCommandReturnObject() + command = "process handle {0}".format(signal_name) + if from_target: + command += " -t" + testcase.dbg.GetCommandInterpreter().HandleCommand( + command, return_obj) + match = re.match( + 'NAME *PASS *STOP *NOTIFY.*(false|true|not set) *(false|true|not set) *(false|true|not set)', + return_obj.GetOutput(), + re.IGNORECASE | re.DOTALL) + if match and expected_absent: + testcase.fail('Signal "{0}" was supposed to be absent'.format(signal_name)) + if not match: + if expected_absent: + return (None, None, None) + testcase.fail('Unable to retrieve default signal disposition.') + return (match.group(1), match.group(2), match.group(3)) + + + +def set_actions_for_signal(testcase, signal_name, pass_action, stop_action, notify_action, expect_success=True): + return_obj = lldb.SBCommandReturnObject() + command = "process handle {0}".format(signal_name) + if pass_action != None: + command += " -p {0}".format(pass_action) + if stop_action != None: + command += " -s {0}".format(stop_action) + if notify_action != None: + command +=" -n {0}".format(notify_action) + + testcase.dbg.GetCommandInterpreter().HandleCommand(command, return_obj) + testcase.assertEqual(expect_success, + return_obj.Succeeded(), + "Setting signal handling for {0} worked as expected".format(signal_name)) class PrintableRegex(object): diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index 484ef6aa6036bf..f5961d9b1c00d2 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -1432,6 +1432,12 @@ class CommandObjectProcessHandle : public CommandObjectParsed { const int short_option = m_getopt_table[option_idx].val; switch (short_option) { + case 'c': + do_clear = true; + break; + case 'd': + dummy = true; + break; case 's': stop = std::string(option_arg); break; @@ -1441,6 +1447,9 @@ class CommandObjectProcessHandle : public CommandObjectParsed { case 'p': pass = std::string(option_arg); break; + case 't': + only_target_values = true; + break; default: llvm_unreachable("Unimplemented option"); } @@ -1451,6 +1460,9 @@ class CommandObjectProcessHandle : public CommandObjectParsed { stop.clear(); notify.clear(); pass.clear(); + only_target_values = false; + do_clear = false; + dummy = false; } llvm::ArrayRef GetDefinitions() override { @@ -1462,6 +1474,9 @@ class CommandObjectProcessHandle : public CommandObjectParsed { std::string stop; std::string notify; std::string pass; + bool only_target_values = false; + bool do_clear = false; + bool dummy = false; }; CommandObjectProcessHandle(CommandInterpreter &interpreter) @@ -1469,9 +1484,19 @@ class CommandObjectProcessHandle : public CommandObjectParsed { "Manage LLDB handling of OS signals for the " "current target process. Defaults to showing " "current policy.", - nullptr, eCommandRequiresTarget) { - SetHelpLong("\nIf no signals are specified, update them all. If no update " - "option is specified, list the current values."); + nullptr) { + SetHelpLong("\nIf no signals are specified but one or more actions are, " + "and there is a live process, update them all. If no action " + "is specified, list the current values.\n" + "If you specify actions with no target (e.g. in an init file) " + "or in a target with no process " + "the values will get copied into subsequent targets, but " + "lldb won't be able to spell-check the options since it can't " + "know which signal set will later be in force." + "\nYou can see the signal modifications held by the target" + "by passing the -t option." + "\nYou can also clear the target modification for a signal" + "by passing the -c option"); CommandArgumentEntry arg; CommandArgumentData signal_arg; @@ -1554,15 +1579,13 @@ class CommandObjectProcessHandle : public CommandObjectParsed { protected: bool DoExecute(Args &signal_args, CommandReturnObject &result) override { - Target *target_sp = &GetSelectedTarget(); + Target &target = GetSelectedOrDummyTarget(); - ProcessSP process_sp = target_sp->GetProcessSP(); - - if (!process_sp) { - result.AppendError("No current process; cannot handle signals until you " - "have a valid process.\n"); - return false; - } + // Any signals that are being set should be added to the Target's + // DummySignals so they will get applied on rerun, etc. + // If we have a process, however, we can do a more accurate job of vetting + // the user's options. + ProcessSP process_sp = target.GetProcessSP(); int stop_action = -1; // -1 means leave the current setting alone int pass_action = -1; // -1 means leave the current setting alone @@ -1588,35 +1611,99 @@ class CommandObjectProcessHandle : public CommandObjectParsed { "true or false.\n"); return false; } + + bool no_actions = (stop_action == -1 && pass_action == -1 + && notify_action == -1); + if (m_options.only_target_values && !no_actions) { + result.AppendError("-t is for reporting, not setting, target values."); + return false; + } size_t num_args = signal_args.GetArgumentCount(); - UnixSignalsSP signals_sp = process_sp->GetUnixSignals(); + UnixSignalsSP signals_sp; + if (process_sp) + signals_sp = process_sp->GetUnixSignals(); + int num_signals_set = 0; + // If we were just asked to print the target values, do that here and + // return: + if (m_options.only_target_values) { + target.PrintDummySignals(result.GetOutputStream(), signal_args); + result.SetStatus(eReturnStatusSuccessFinishResult); + return true; + } + + // This handles clearing values: + if (m_options.do_clear) { + target.ClearDummySignals(signal_args); + if (m_options.dummy) + GetDummyTarget().ClearDummySignals(signal_args); + result.SetStatus(eReturnStatusSuccessFinishNoResult); + return true; + } + + // This rest handles setting values: if (num_args > 0) { for (const auto &arg : signal_args) { - int32_t signo = signals_sp->GetSignalNumberFromName(arg.c_str()); - if (signo != LLDB_INVALID_SIGNAL_NUMBER) { - // Casting the actions as bools here should be okay, because - // VerifyCommandOptionValue guarantees the value is either 0 or 1. - if (stop_action != -1) - signals_sp->SetShouldStop(signo, stop_action); - if (pass_action != -1) { - bool suppress = !pass_action; - signals_sp->SetShouldSuppress(signo, suppress); + // Do the process first. If we have a process we can catch + // invalid signal names, which we do here. + if (signals_sp) { + int32_t signo = signals_sp->GetSignalNumberFromName(arg.c_str()); + if (signo != LLDB_INVALID_SIGNAL_NUMBER) { + // Casting the actions as bools here should be okay, because + // VerifyCommandOptionValue guarantees the value is either 0 or 1. + if (stop_action != -1) + signals_sp->SetShouldStop(signo, stop_action); + if (pass_action != -1) { + bool suppress = !pass_action; + signals_sp->SetShouldSuppress(signo, suppress); + } + if (notify_action != -1) + signals_sp->SetShouldNotify(signo, notify_action); + ++num_signals_set; + } else { + result.AppendErrorWithFormat("Invalid signal name '%s'\n", + arg.c_str()); + continue; } - if (notify_action != -1) - signals_sp->SetShouldNotify(signo, notify_action); - ++num_signals_set; } else { - result.AppendErrorWithFormat("Invalid signal name '%s'\n", - arg.c_str()); + // If there's no process we can't check, so we just set them all. + // But since the map signal name -> signal number across all platforms + // is not 1-1, we can't sensibly set signal actions by number before + // we have a process. Check that here: + int32_t signo; + if (llvm::to_integer(arg.c_str(), signo)) { + result.AppendErrorWithFormat("Can't set signal handling by signal " + "number with no process"); + return false; + } + num_signals_set = num_args; } + auto set_lazy_bool = [] (int action) -> LazyBool { + LazyBool lazy; + if (action == -1) + lazy = eLazyBoolCalculate; + else if (action) + lazy = eLazyBoolYes; + else + lazy = eLazyBoolNo; + return lazy; + }; + + // If there were no actions, we're just listing, don't add the dummy: + if (!no_actions) + target.AddDummySignal(arg.ref(), + set_lazy_bool(pass_action), + set_lazy_bool(notify_action), + set_lazy_bool(stop_action)); } } else { // No signal specified, if any command options were specified, update ALL - // signals. - if ((notify_action != -1) || (stop_action != -1) || (pass_action != -1)) { + // signals. But we can't do this without a process since we don't know + // all the possible signals that might be valid for this target. + if (((notify_action != -1) || (stop_action != -1) || (pass_action != -1)) + && process_sp) { if (m_interpreter.Confirm( "Do you really want to update all the signals?", false)) { int32_t signo = signals_sp->GetFirstSignalNumber(); @@ -1635,11 +1722,15 @@ class CommandObjectProcessHandle : public CommandObjectParsed { } } - PrintSignalInformation(result.GetOutputStream(), signal_args, - num_signals_set, signals_sp); + if (signals_sp) + PrintSignalInformation(result.GetOutputStream(), signal_args, + num_signals_set, signals_sp); + else + target.PrintDummySignals(result.GetOutputStream(), + signal_args); if (num_signals_set > 0) - result.SetStatus(eReturnStatusSuccessFinishNoResult); + result.SetStatus(eReturnStatusSuccessFinishResult); else result.SetStatus(eReturnStatusFailed); diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index c326f8a3207488..5a1c4bcd2f3a52 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -742,6 +742,8 @@ let Command = "process load" in { } let Command = "process handle" in { + def process_handle_clear : Option<"clear", "c">, Group<2>, + Desc<"Removes the signals listed from the Target signal handlers">; def process_handle_stop : Option<"stop", "s">, Group<1>, Arg<"Boolean">, Desc<"Whether or not the process should be stopped if the signal is " "received.">; @@ -750,6 +752,10 @@ let Command = "process handle" in { "received.">; def process_handle_pass : Option<"pass", "p">, Group<1>, Arg<"Boolean">, Desc<"Whether or not the signal should be passed to the process.">; + def process_handle_only_target : Option<"target", "t">, Group<1>, + Desc<"Show only the signals with behaviors modified in this target">; + def process_handle_dummy : Option<"dummy", "d">, Group<2>, + Desc<"Also clear the values in the dummy target so they won't be inherited by new targets.">; } let Command = "process status" in { diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 1f62e95377f4d9..0467066376913b 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -2566,6 +2566,13 @@ Status Process::LaunchPrivate(ProcessLaunchInfo &launch_info, StateType &state, if (state == eStateStopped || state == eStateCrashed) { DidLaunch(); + + // Now that we know the process type, update its signal responses from the + // ones stored in the Target: + if (m_unix_signals_sp) { + StreamSP warning_strm = GetTarget().GetDebugger().GetAsyncErrorStream(); + GetTarget().UpdateSignalsFromDummy(m_unix_signals_sp, warning_strm); + } DynamicLoader *dyld = GetDynamicLoader(); if (dyld) @@ -2928,6 +2935,12 @@ void Process::CompleteAttach() { } } } + // Now that we know the process type, update its signal responses from the + // ones stored in the Target: + if (m_unix_signals_sp) { + StreamSP warning_strm = GetTarget().GetDebugger().GetAsyncErrorStream(); + GetTarget().UpdateSignalsFromDummy(m_unix_signals_sp, warning_strm); + } // We have completed the attach, now it is time to find the dynamic loader // plug-in diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 993e7efd02ba7a..ca9754d40af072 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -51,6 +51,7 @@ #include "lldb/Target/SystemRuntime.h" #include "lldb/Target/Thread.h" #include "lldb/Target/ThreadSpec.h" +#include "lldb/Target/UnixSignals.h" #include "lldb/Utility/Event.h" #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/LLDBAssert.h" @@ -106,7 +107,7 @@ Target::Target(Debugger &debugger, const ArchSpec &target_arch, SetEventName(eBroadcastBitModulesUnloaded, "modules-unloaded"); SetEventName(eBroadcastBitWatchpointChanged, "watchpoint-changed"); SetEventName(eBroadcastBitSymbolsLoaded, "symbols-loaded"); - + CheckInWithManager(); LLDB_LOG(GetLog(LLDBLog::Object), "{0} Target::Target()", @@ -147,6 +148,8 @@ void Target::PrimeFromDummyTarget(Target &target) { m_frame_recognizer_manager_up = std::make_unique( *target.m_frame_recognizer_manager_up); + + m_dummy_signals = target.m_dummy_signals; } void Target::Dump(Stream *s, lldb::DescriptionLevel description_level) { @@ -287,6 +290,8 @@ void Target::Destroy() { m_stop_hooks.clear(); m_stop_hook_next_id = 0; m_suppress_stop_hooks = false; + Args signal_args; + ClearDummySignals(signal_args); } llvm::StringRef Target::GetABIName() const { @@ -3357,6 +3362,129 @@ void Target::FinalizeFileActions(ProcessLaunchInfo &info) { } } +void Target::AddDummySignal(llvm::StringRef name, LazyBool pass, LazyBool notify, + LazyBool stop) { + if (name.empty()) + return; + // Don't add a signal if all the actions are trivial: + if (pass == eLazyBoolCalculate && notify == eLazyBoolCalculate + && stop == eLazyBoolCalculate) + return; + + auto& elem = m_dummy_signals[name]; + elem.pass = pass; + elem.notify = notify; + elem.stop = stop; +} + +bool Target::UpdateSignalFromDummy(UnixSignalsSP signals_sp, + const DummySignalElement &elem) { + if (!signals_sp) + return false; + + int32_t signo + = signals_sp->GetSignalNumberFromName(elem.first().str().c_str()); + if (signo == LLDB_INVALID_SIGNAL_NUMBER) + return false; + + if (elem.second.pass == eLazyBoolYes) + signals_sp->SetShouldSuppress(signo, false); + else if (elem.second.pass == eLazyBoolNo) + signals_sp->SetShouldSuppress(signo, true); + + if (elem.second.notify == eLazyBoolYes) + signals_sp->SetShouldNotify(signo, true); + else if (elem.second.notify == eLazyBoolNo) + signals_sp->SetShouldNotify(signo, false); + + if (elem.second.stop == eLazyBoolYes) + signals_sp->SetShouldStop(signo, true); + else if (elem.second.stop == eLazyBoolNo) + signals_sp->SetShouldStop(signo, false); + return true; +} + +bool Target::ResetSignalFromDummy(UnixSignalsSP signals_sp, + const DummySignalElement &elem) { + if (!signals_sp) + return false; + int32_t signo + = signals_sp->GetSignalNumberFromName(elem.first().str().c_str()); + if (signo == LLDB_INVALID_SIGNAL_NUMBER) + return false; + bool do_pass = elem.second.pass != eLazyBoolCalculate; + bool do_stop = elem.second.stop != eLazyBoolCalculate; + bool do_notify = elem.second.notify != eLazyBoolCalculate; + signals_sp->ResetSignal(signo, do_stop, do_notify, do_pass); + return true; +} + +void Target::UpdateSignalsFromDummy(UnixSignalsSP signals_sp, + StreamSP warning_stream_sp) { + if (!signals_sp) + return; + + for (const auto &elem : m_dummy_signals) { + if (!UpdateSignalFromDummy(signals_sp, elem)) + warning_stream_sp->Printf("Target signal '%s' not found in process\n", + elem.first().str().c_str()); + } +} + +void Target::ClearDummySignals(Args &signal_names) { + ProcessSP process_sp = GetProcessSP(); + // The simplest case, delete them all with no process to update. + if (signal_names.GetArgumentCount() == 0 && !process_sp) { + m_dummy_signals.clear(); + return; + } + UnixSignalsSP signals_sp; + if (process_sp) + signals_sp = process_sp->GetUnixSignals(); + + for (const Args::ArgEntry &entry : signal_names) { + const char *signal_name = entry.c_str(); + auto elem = m_dummy_signals.find(signal_name); + // If we didn't find it go on. + // FIXME: Should I pipe error handling through here? + if (elem == m_dummy_signals.end()) { + continue; + } + if (signals_sp) + ResetSignalFromDummy(signals_sp, *elem); + m_dummy_signals.erase(elem); + } +} + +void Target::PrintDummySignals(Stream &strm, Args &signal_args) { + strm.Printf("NAME PASS STOP NOTIFY\n"); + strm.Printf("=========== ======= ======= =======\n"); + + auto str_for_lazy = [] (LazyBool lazy) -> const char * { + switch (lazy) { + case eLazyBoolCalculate: return "not set"; + case eLazyBoolYes: return "true "; + case eLazyBoolNo: return "false "; + } + }; + size_t num_args = signal_args.GetArgumentCount(); + for (const auto &elem : m_dummy_signals) { + bool print_it = false; + for (size_t idx = 0; idx < num_args; idx++) { + if (elem.first() == signal_args.GetArgumentAtIndex(idx)) { + print_it = true; + break; + } + } + if (print_it) { + strm.Printf("%-11s ", elem.first().str().c_str()); + strm.Printf("%s %s %s\n", str_for_lazy(elem.second.pass), + str_for_lazy(elem.second.stop), + str_for_lazy(elem.second.notify)); + } + } +} + // Target::StopHook Target::StopHook::StopHook(lldb::TargetSP target_sp, lldb::user_id_t uid) : UserID(uid), m_target_sp(target_sp), m_specifier_sp(), diff --git a/lldb/source/Target/UnixSignals.cpp b/lldb/source/Target/UnixSignals.cpp index 26ff0bbd3825f2..de1fdb8cc4202a 100644 --- a/lldb/source/Target/UnixSignals.cpp +++ b/lldb/source/Target/UnixSignals.cpp @@ -22,7 +22,9 @@ UnixSignals::Signal::Signal(const char *name, bool default_suppress, const char *description, const char *alias) : m_name(name), m_alias(alias), m_description(), m_suppress(default_suppress), m_stop(default_stop), - m_notify(default_notify) { + m_notify(default_notify), + m_default_suppress(default_suppress), m_default_stop(default_stop), + m_default_notify(default_notify) { if (description) m_description.assign(description); } @@ -330,3 +332,23 @@ json::Value UnixSignals::GetHitCountStatistics() const { } return std::move(json_signals); } + +void UnixSignals::Signal::Reset(bool reset_stop, bool reset_notify, + bool reset_suppress) { + if (reset_stop) + m_stop = m_default_stop; + if (reset_notify) + m_notify = m_default_notify; + if (reset_suppress) + m_suppress = m_default_suppress; +} + +bool UnixSignals::ResetSignal(int32_t signo, bool reset_stop, + bool reset_notify, bool reset_suppress) { + auto elem = m_signals.find(signo); + if (elem == m_signals.end()) + return false; + (*elem).second.Reset(reset_stop, reset_notify, reset_suppress); + return true; +} + diff --git a/lldb/test/API/commands/process/handle/Makefile b/lldb/test/API/commands/process/handle/Makefile new file mode 100644 index 00000000000000..99998b20bcb050 --- /dev/null +++ b/lldb/test/API/commands/process/handle/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/commands/process/handle/TestProcessHandle.py b/lldb/test/API/commands/process/handle/TestProcessHandle.py new file mode 100644 index 00000000000000..e026d4b74a35ba --- /dev/null +++ b/lldb/test/API/commands/process/handle/TestProcessHandle.py @@ -0,0 +1,136 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +from lldbsuite.test.decorators import * + +class TestProcessHandle(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @no_debug_info_test + @skipIfWindows + def test_process_handle(self): + """Test that calling process handle before we have a target, and before we + have a process will affect the process. Also that the signal settings + are preserved on rerun.""" + self.build() + + # Make sure we don't accept signal values by signo with no process - we don't know what the + # mapping will be so we can't do the right thing with bare numbers: + lldbutil.set_actions_for_signal(self, "9", "true", None, None, expect_success=False) + + # First, I need a reference value so I can see whether changes actually took: + (target, process, _, bkpt) = lldbutil.run_to_source_breakpoint(self, '// break here', lldb.SBFileSpec("main.cpp")) + (default_pass, default_stop, default_notify) = lldbutil.get_actions_for_signal(self, "SIGSEGV") + + # Let's change the value here, then exit and make sure the changed value sticks: + new_value = "false" + if default_pass == "true": + new_value = "false" + + # First make sure we get an error for bogus values when running: + lldbutil.set_actions_for_signal(self, "NOTSIGSEGV", new_value, None, None, expect_success=False) + + # Then set the one we intend to change. + lldbutil.set_actions_for_signal(self, "SIGSEGV", new_value, None, None) + + process.Continue() + + self.assertEqual(process.GetState(), lldb.eStateExited) + self.assertEqual(process.GetExitStatus(), 0) + + # Check that we preserved the setting: + (curr_pass, curr_stop, curr_notify) = lldbutil.get_actions_for_signal(self, "SIGSEGV",from_target=True) + self.assertEqual(curr_pass, new_value, "Pass was set correctly") + self.assertEqual(curr_stop, "not set", "Stop was not set by us") + self.assertEqual(curr_notify, "not set", "Notify was not set by us") + + # Run again and make sure that we prime the new process with these settings: + process = lldbutil.run_to_breakpoint_do_run(self, target, bkpt) + + # We check the process settings now, to see what got copied into the process: + (curr_pass, curr_stop, curr_notify) = lldbutil.get_actions_for_signal(self, "SIGSEGV") + self.assertEqual(curr_pass, new_value, "Pass was set correctly") + self.assertEqual(curr_stop, default_stop, "Stop was its default value") + self.assertEqual(curr_notify, default_notify, "Notify was its default value") + + # Now kill this target, set the handling and make sure the values get copied from the dummy into the new target. + success = self.dbg.DeleteTarget(target) + self.assertTrue(success, "Deleted the target") + self.assertEqual(self.dbg.GetNumTargets(), 0, "We did delete all the targets.") + + # The signal settings should be back at their default - we were only setting this on the target: + lldbutil.get_actions_for_signal(self, "SIGSEGV", from_target=True, expected_absent=True) + # Set a valid one: + lldbutil.set_actions_for_signal(self, "SIGSEGV", new_value, None, None) + # Set a bogus one - we don't have a way to check pre-run so this is allowed + # but we should get an error message when launching: + lldbutil.set_actions_for_signal(self, "SIGNOTSIG", new_value, None, None) + + out_filename = self.getBuildArtifact('output') + success = True + try: + f = open(out_filename, 'w') + except: + success = False + + if not success: + self.fail("Couldn't open error output file for writing.") + + self.dbg.SetErrorFileHandle(f, False) + # Now make a new process and make sure the right values got copied into the new target + (target, process, _, bkpt) = lldbutil.run_to_source_breakpoint(self, '// break here', lldb.SBFileSpec("main.cpp")) + f.write("TESTPATTERN\n") + f.flush() + f.close() + + try: + f = open(out_filename, 'r') + except: + success = False + + if not success: + self.fail("Couldn't open error output file for reading") + errors = f.read() + f.close() + + self.assertIn("SIGNOTSIG", errors, "We warned about the unset signal") + # Also make sure we didn't accidentally add this bogus setting to the process. + lldbutil.set_actions_for_signal(self, "SIGNOTSIG", "true", "true", "true", expect_success=False) + + # Check that they went into the target: + (curr_pass, curr_stop, curr_notify) = lldbutil.get_actions_for_signal(self, "SIGSEGV",from_target=True) + self.assertEqual(curr_pass, new_value, "Pass was set correctly") + self.assertEqual(curr_stop, "not set", "Stop was not set by us") + self.assertEqual(curr_notify, "not set", "Notify was not set by us") + + # And the process: + # Check that they went into the target: + (curr_pass, curr_stop, curr_notify) = lldbutil.get_actions_for_signal(self, "SIGSEGV") + self.assertEqual(curr_pass, new_value, "Pass was set correctly") + self.assertEqual(curr_stop, default_stop, "Stop was its default value") + self.assertEqual(curr_notify, default_notify, "Notify was its default value") + + # Now clear the handling, and make sure that we get the right signal values again: + self.runCmd("process handle -c SIGSEGV") + # Check that there is no longer configuration for SIGSEGV in the target: + lldbutil.get_actions_for_signal(self, "SIGSEGV",from_target=True, expected_absent=True) + # Make a new process, to make sure we did indeed reset the values: + (target, process, _, bkpt) = lldbutil.run_to_source_breakpoint(self, '// break here', lldb.SBFileSpec("main.cpp")) + (curr_pass, curr_stop, curr_notify) = lldbutil.get_actions_for_signal(self, "SIGSEGV") + self.assertEqual(curr_pass, new_value, "Pass was set correctly") + self.assertEqual(curr_stop, default_stop, "Stop was its default value") + self.assertEqual(curr_notify, default_notify, "Notify was its default value") + + # Finally remove this from the dummy target as well, and make sure it was cleared from there: + self.runCmd("process handle -c -d SIGSEGV") + error = process.Kill() + self.assertSuccess(error, "Killed the process") + success = self.dbg.DeleteTarget(target) + self.assertTrue(success, "Destroyed the target.") + + (target, process, _, bkpt) = lldbutil.run_to_source_breakpoint(self, '// break here', lldb.SBFileSpec("main.cpp")) + (curr_pass, curr_stop, curr_notify) = lldbutil.get_actions_for_signal(self, "SIGSEGV") + self.assertEqual(curr_pass, default_pass, "Pass was set correctly") + self.assertEqual(curr_stop, default_stop, "Stop was its default value") + self.assertEqual(curr_notify, default_notify, "Notify was its default value") diff --git a/lldb/test/API/commands/process/handle/main.cpp b/lldb/test/API/commands/process/handle/main.cpp new file mode 100644 index 00000000000000..ba45ee316cd42b --- /dev/null +++ b/lldb/test/API/commands/process/handle/main.cpp @@ -0,0 +1,3 @@ +int main() { + return 0; // break here +} diff --git a/lldb/test/API/functionalities/signal/raise/TestRaise.py b/lldb/test/API/functionalities/signal/raise/TestRaise.py index 70271e43cff59b..4a81c695775379 100644 --- a/lldb/test/API/functionalities/signal/raise/TestRaise.py +++ b/lldb/test/API/functionalities/signal/raise/TestRaise.py @@ -36,6 +36,10 @@ def test_sigtrap(self): def launch(self, target, signal): # launch the process, do not stop at entry point. + # If we have gotten the default for this signal, reset that as well. + if len(self.default_pass) != 0: + lldbutil.set_actions_for_signal(self, signal, self.default_pass, self.default_stop, self.default_notify) + process = target.LaunchSimple( [signal], None, self.get_process_working_directory()) self.assertTrue(process, PROCESS_IS_VALID) @@ -64,27 +68,19 @@ def signal_test(self, signal, test_passing): target = self.dbg.CreateTarget(exe) self.assertTrue(target, VALID_TARGET) lldbutil.run_break_set_by_symbol(self, "main") + self.default_pass = "" + self.default_stop = "" + self.default_notify = "" # launch process = self.launch(target, signal) signo = process.GetUnixSignals().GetSignalNumberFromName(signal) # retrieve default signal disposition - return_obj = lldb.SBCommandReturnObject() - self.dbg.GetCommandInterpreter().HandleCommand( - "process handle %s " % signal, return_obj) - match = re.match( - 'NAME *PASS *STOP *NOTIFY.*(false|true) *(false|true) *(false|true)', - return_obj.GetOutput(), - re.IGNORECASE | re.DOTALL) - if not match: - self.fail('Unable to retrieve default signal disposition.') - default_pass = match.group(1) - default_stop = match.group(2) - default_notify = match.group(3) + (self.default_pass, self.default_stop, self.default_notify) = lldbutil.get_actions_for_signal(self, signal) # Make sure we stop at the signal - self.set_handle(signal, "false", "true", "true") + lldbutil.set_actions_for_signal(self, signal, "false", "true", "true") process.Continue() self.assertEqual(process.GetState(), lldb.eStateStopped) thread = lldbutil.get_stopped_thread(process, lldb.eStopReasonSignal) @@ -102,12 +98,11 @@ def signal_test(self, signal, test_passing): self.assertEqual(process.GetState(), lldb.eStateExited) self.assertEqual(process.GetExitStatus(), 0) - # launch again process = self.launch(target, signal) # Make sure we do not stop at the signal. We should still get the # notification. - self.set_handle(signal, "false", "false", "true") + lldbutil.set_actions_for_signal(self, signal, "false", "false", "true") self.expect( "process continue", substrs=[ @@ -121,7 +116,7 @@ def signal_test(self, signal, test_passing): # Make sure we do not stop at the signal, and we do not get the # notification. - self.set_handle(signal, "false", "false", "false") + lldbutil.set_actions_for_signal(self, signal, "false", "false", "false") self.expect( "process continue", substrs=["stopped and restarted"], @@ -131,14 +126,14 @@ def signal_test(self, signal, test_passing): if not test_passing: # reset signal handling to default - self.set_handle(signal, default_pass, default_stop, default_notify) + lldbutil.set_actions_for_signal(self, signal, self.default_pass, self.default_stop, self.default_notify) return # launch again process = self.launch(target, signal) # Make sure we stop at the signal - self.set_handle(signal, "true", "true", "true") + lldbutil.set_actions_for_signal(self, signal, "true", "true", "true") process.Continue() self.assertEqual(process.GetState(), lldb.eStateStopped) thread = lldbutil.get_stopped_thread(process, lldb.eStopReasonSignal) @@ -164,7 +159,7 @@ def signal_test(self, signal, test_passing): # Make sure we do not stop at the signal. We should still get the notification. Process # should receive the signal. - self.set_handle(signal, "true", "false", "true") + lldbutil.set_actions_for_signal(self, signal, "true", "false", "true") self.expect( "process continue", substrs=[ @@ -178,7 +173,7 @@ def signal_test(self, signal, test_passing): # Make sure we do not stop at the signal, and we do not get the notification. Process # should receive the signal. - self.set_handle(signal, "true", "false", "false") + lldbutil.set_actions_for_signal(self, signal, "true", "false", "false") self.expect( "process continue", substrs=["stopped and restarted"], @@ -187,4 +182,4 @@ def signal_test(self, signal, test_passing): self.assertEqual(process.GetExitStatus(), signo) # reset signal handling to default - self.set_handle(signal, default_pass, default_stop, default_notify) + lldbutil.set_actions_for_signal(self, signal, self.default_pass, self.default_stop, self.default_notify) diff --git a/lldb/unittests/Signals/UnixSignalsTest.cpp b/lldb/unittests/Signals/UnixSignalsTest.cpp index 1a53f0a249516c..5b07cf45d099b7 100644 --- a/lldb/unittests/Signals/UnixSignalsTest.cpp +++ b/lldb/unittests/Signals/UnixSignalsTest.cpp @@ -53,6 +53,29 @@ TEST(UnixSignalsTest, Iteration) { EXPECT_EQ(LLDB_INVALID_SIGNAL_NUMBER, signals.GetNextSignalNumber(16)); } +TEST(UnixSignalsTest, Reset) { + TestSignals signals; + bool stop_val = signals.GetShouldStop(2); + bool notify_val = signals.GetShouldNotify(2); + bool suppress_val = signals.GetShouldSuppress(2); + + // Change two, then reset one and make sure only that one was reset: + EXPECT_EQ(true, signals.SetShouldNotify(2, !notify_val)); + EXPECT_EQ(true, signals.SetShouldSuppress(2, !suppress_val)); + EXPECT_EQ(true, signals.ResetSignal(2, false, true, false)); + EXPECT_EQ(stop_val, signals.GetShouldStop(2)); + EXPECT_EQ(notify_val, signals.GetShouldStop(2)); + EXPECT_EQ(!suppress_val, signals.GetShouldNotify(2)); + + // Make sure reset with no arguments resets them all: + EXPECT_EQ(true, signals.SetShouldSuppress(2, !suppress_val)); + EXPECT_EQ(true, signals.SetShouldNotify(2, !notify_val)); + EXPECT_EQ(true, signals.ResetSignal(2)); + EXPECT_EQ(stop_val, signals.GetShouldStop(2)); + EXPECT_EQ(notify_val, signals.GetShouldNotify(2)); + EXPECT_EQ(suppress_val, signals.GetShouldSuppress(2)); +} + TEST(UnixSignalsTest, GetInfo) { TestSignals signals; From 98ff205fd86eb2d1b26e91c6f731f1b307310f95 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 26 May 2022 14:56:36 -0700 Subject: [PATCH 652/908] [lld][WebAssembly] Update test after 87628f5804e2 --- lld/test/wasm/data-segment-merging.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/test/wasm/data-segment-merging.ll b/lld/test/wasm/data-segment-merging.ll index 99f1a95c585440..e6f3c5ee469f22 100644 --- a/lld/test/wasm/data-segment-merging.ll +++ b/lld/test/wasm/data-segment-merging.ll @@ -102,9 +102,9 @@ ; PASSIVE-MERGE-NEXT: - Index: 2 ; PASSIVE-MERGE-NEXT: Name: __wasm_init_memory -; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 -no-merge-data-segments -o %t.separate.passive.wasm %t.passive.o +; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --no-merge-data-segments -o %t.separate.passive.wasm %t.passive.o ; RUN: obj2yaml %t.separate.passive.wasm | FileCheck %s --check-prefix=PASSIVE-SEPARATE -; RUN: wasm-ld -mwasm64 -no-gc-sections --no-entry --shared-memory --max-memory=131072 -no-merge-data-segments -o %t.separate.passive64.wasm %t.passive64.o +; RUN: wasm-ld -mwasm64 -no-gc-sections --no-entry --shared-memory --max-memory=131072 --no-merge-data-segments -o %t.separate.passive64.wasm %t.passive64.o ; RUN: obj2yaml %t.separate.passive64.wasm | FileCheck %s --check-prefix=PASSIVE-SEPARATE ; PASSIVE-SEPARATE-LABEL: - Type: DATACOUNT From c09cd64e5c6dea6e97ef7d6cee5f689df2b408d7 Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Fri, 20 May 2022 17:42:58 -0700 Subject: [PATCH 653/908] [BOLT] Fix AND evaluation bug in shrink wrapping Fix a bug where shrink-wrapping would use wrong stack offsets because the stack was being aligned with an AND instruction, hence, making its true offsets only available during runtime (we can't statically determine where are the stack elements and we must give up on this case). Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D126110 --- bolt/include/bolt/Core/MCPlusBuilder.h | 22 ++++++-- .../bolt/Passes/StackPointerTracking.h | 4 +- bolt/lib/Passes/AllocCombiner.cpp | 6 +- bolt/lib/Passes/ShrinkWrapping.cpp | 4 +- bolt/lib/Passes/StackAllocationAnalysis.cpp | 2 +- bolt/lib/Passes/ValidateInternalCalls.cpp | 4 +- bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 17 ++---- bolt/test/X86/shrinkwrapping-and-rsp.s | 55 +++++++++++++++++++ 8 files changed, 85 insertions(+), 29 deletions(-) create mode 100644 bolt/test/X86/shrinkwrapping-and-rsp.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 1b4159dd932d64..d4f5c4202292f2 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -909,12 +909,22 @@ class MCPlusBuilder { return false; } - /// Use \p Input1 or Input2 as the current value for the input register and - /// put in \p Output the changes incurred by executing \p Inst. Return false - /// if it was not possible to perform the evaluation. - virtual bool evaluateSimple(const MCInst &Inst, int64_t &Output, - std::pair Input1, - std::pair Input2) const { + /// Use \p Input1 or Input2 as the current value for the input + /// register and put in \p Output the changes incurred by executing + /// \p Inst. Return false if it was not possible to perform the + /// evaluation. evaluateStackOffsetExpr is restricted to operations + /// that have associativity with addition. Its intended usage is for + /// evaluating stack offset changes. In these cases, expressions + /// appear in the form of (x + offset) OP constant, where x is an + /// unknown base (such as stack base) but offset and constant are + /// known. In these cases, \p Output represents the new stack offset + /// after executing \p Inst. Because we don't know x, we can't + /// evaluate operations such as multiply or AND/OR, e.g. (x + + /// offset) OP constant is not the same as x + (offset OP constant). + virtual bool + evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output, + std::pair Input1, + std::pair Input2) const { llvm_unreachable("not implemented"); return false; } diff --git a/bolt/include/bolt/Passes/StackPointerTracking.h b/bolt/include/bolt/Passes/StackPointerTracking.h index 535977a2ba4a8b..cbb4f88302bc65 100644 --- a/bolt/include/bolt/Passes/StackPointerTracking.h +++ b/bolt/include/bolt/Passes/StackPointerTracking.h @@ -115,7 +115,7 @@ class StackPointerTrackingBase else FP = std::make_pair(0, 0); int64_t Output; - if (!MIB->evaluateSimple(Point, Output, SP, FP)) { + if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) { if (SPVal == EMPTY && FPVal == EMPTY) return SPVal; return SUPERPOSITION; @@ -150,7 +150,7 @@ class StackPointerTrackingBase else SP = std::make_pair(0, 0); int64_t Output; - if (!MIB->evaluateSimple(Point, Output, SP, FP)) { + if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) { if (SPVal == EMPTY && FPVal == EMPTY) return FPVal; return SUPERPOSITION; diff --git a/bolt/lib/Passes/AllocCombiner.cpp b/bolt/lib/Passes/AllocCombiner.cpp index 652111cf34a90b..e92b74e130efc5 100644 --- a/bolt/lib/Passes/AllocCombiner.cpp +++ b/bolt/lib/Passes/AllocCombiner.cpp @@ -29,9 +29,9 @@ namespace { bool getStackAdjustmentSize(const BinaryContext &BC, const MCInst &Inst, int64_t &Adjustment) { - return BC.MIB->evaluateSimple(Inst, Adjustment, - std::make_pair(BC.MIB->getStackPointer(), 0LL), - std::make_pair(0, 0LL)); + return BC.MIB->evaluateStackOffsetExpr( + Inst, Adjustment, std::make_pair(BC.MIB->getStackPointer(), 0LL), + std::make_pair(0, 0LL)); } bool isIndifferentToSP(const MCInst &Inst, const BinaryContext &BC) { diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp index f150965a67e099..f032735f8981f9 100644 --- a/bolt/lib/Passes/ShrinkWrapping.cpp +++ b/bolt/lib/Passes/ShrinkWrapping.cpp @@ -246,7 +246,7 @@ void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) { SP = std::make_pair(0, 0); int64_t Output; - if (!BC.MIB->evaluateSimple(Point, Output, SP, FP)) + if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) return; // Not your regular frame pointer initialization... bail @@ -294,7 +294,7 @@ void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) { SP = std::make_pair(0, 0); int64_t Output; - if (!BC.MIB->evaluateSimple(Point, Output, SP, FP)) + if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) return; // If the value is the same of FP, no need to adjust it diff --git a/bolt/lib/Passes/StackAllocationAnalysis.cpp b/bolt/lib/Passes/StackAllocationAnalysis.cpp index 92ad2ccac85467..25975537a12ab0 100644 --- a/bolt/lib/Passes/StackAllocationAnalysis.cpp +++ b/bolt/lib/Passes/StackAllocationAnalysis.cpp @@ -134,7 +134,7 @@ BitVector StackAllocationAnalysis::computeNext(const MCInst &Point, else FP = std::make_pair(0, 0); int64_t Output; - if (!MIB->evaluateSimple(Point, Output, SP, FP)) + if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) return Next; if (SPOffset < Output) { diff --git a/bolt/lib/Passes/ValidateInternalCalls.cpp b/bolt/lib/Passes/ValidateInternalCalls.cpp index 7faff7676ba172..d1246a2abf2847 100644 --- a/bolt/lib/Passes/ValidateInternalCalls.cpp +++ b/bolt/lib/Passes/ValidateInternalCalls.cpp @@ -261,8 +261,8 @@ bool ValidateInternalCalls::analyzeFunction(BinaryFunction &Function) const { int64_t Output; std::pair Input1 = std::make_pair(Reg, 0); std::pair Input2 = std::make_pair(0, 0); - if (!BC.MIB->evaluateSimple(Use, Output, Input1, Input2)) { - LLVM_DEBUG(dbgs() << "Evaluate simple failed.\n"); + if (!BC.MIB->evaluateStackOffsetExpr(Use, Output, Input1, Input2)) { + LLVM_DEBUG(dbgs() << "Evaluate stack offset expr failed.\n"); return false; } if (Offset + Output < 0 || diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index 610eafc60971ff..d6debda767071f 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -1244,9 +1244,10 @@ class X86MCPlusBuilder : public MCPlusBuilder { return false; } - bool evaluateSimple(const MCInst &Inst, int64_t &Output, - std::pair Input1, - std::pair Input2) const override { + bool + evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output, + std::pair Input1, + std::pair Input2) const override { auto getOperandVal = [&](MCPhysReg Reg) -> ErrorOr { if (Reg == Input1.first) @@ -1260,16 +1261,6 @@ class X86MCPlusBuilder : public MCPlusBuilder { default: return false; - case X86::AND64ri32: - case X86::AND64ri8: - if (!Inst.getOperand(2).isImm()) - return false; - if (ErrorOr InputVal = - getOperandVal(Inst.getOperand(1).getReg())) - Output = *InputVal & Inst.getOperand(2).getImm(); - else - return false; - break; case X86::SUB64ri32: case X86::SUB64ri8: if (!Inst.getOperand(2).isImm()) diff --git a/bolt/test/X86/shrinkwrapping-and-rsp.s b/bolt/test/X86/shrinkwrapping-and-rsp.s new file mode 100644 index 00000000000000..2228c6037e9c51 --- /dev/null +++ b/bolt/test/X86/shrinkwrapping-and-rsp.s @@ -0,0 +1,55 @@ +# This checks that shrink wrapping does attempt at accessing stack elements +# using RSP when the function is aligning RSP and changing offsets. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out -data %t.fdata \ +# RUN: -frame-opt=all -simplify-conditional-tail-calls=false \ +# RUN: -eliminate-unreachable=false | FileCheck %s + +# Here we have a function that aligns the stack at prologue. Stack pointer +# analysis can't try to infer offset positions after AND because that depends +# on the runtime value of the stack pointer of callee (whether it is misaligned +# or not). + .globl _start + .type _start, %function +_start: + .cfi_startproc +# FDATA: 0 [unknown] 0 1 _start 0 0 1 + push %rbp + mov %rsp, %rbp + push %rbx + push %r14 + and $0xffffffffffffffe0,%rsp + subq $0x20, %rsp +b: je hot_path +# FDATA: 1 _start #b# 1 _start #hot_path# 0 1 +cold_path: + mov %r14, %rdi + mov %rbx, %rdi + # Block push-pop mode by using an instruction that requires the + # stack to be aligned to 128B. This will force the pass + # to try to index stack elements by using RSP +offset directly, but + # we do not know how to access individual elements of the stack thanks + # to the alignment. + movdqa %xmm8, (%rsp) + addq $0x20, %rsp + pop %r14 + pop %rbx + pop %rbp + ret +hot_path: + addq $0x20, %rsp + pop %r14 + pop %rbx + pop %rbp + ret + .cfi_endproc + .size _start, .-_start + +# CHECK: BOLT-INFO: Shrink wrapping moved 0 spills inserting load/stores and 0 spills inserting push/pops From 8a3b6ba7569e80782dbd1893902bd6134bd6ff01 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 26 May 2022 15:24:41 -0700 Subject: [PATCH 654/908] [RISCV] Add a subtarget feature to enable unaligned scalar loads and stores A RISCV implementation can choose to implement unaligned load/store support. We currently don't have a way for such a processor to indicate a preference for unaligned load/stores, so add a subtarget feature. There doesn't appear to be a formal extension for unaligned support. The RISCV Profiles (https://github.com/riscv/riscv-profiles/blob/main/profiles.adoc#rva20u64-profile) docs use the name Zicclsm, but a) that doesn't appear to actually been standardized, and b) isn't quite what we want here anyway due to the perf comment. Instead, we can follow precedent from other backends and have a feature flag for the existence of misaligned load/stores with sufficient performance that user code should actually use them. Differential Revision: https://reviews.llvm.org/D126085 --- llvm/lib/Target/RISCV/RISCV.td | 5 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +- llvm/lib/Target/RISCV/RISCVSubtarget.h | 2 + .../CodeGen/RISCV/unaligned-load-store.ll | 164 ++++++++++++------ 4 files changed, 126 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 478773625f9710..2303578a280126 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -430,6 +430,11 @@ foreach i = {1-31} in def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore", "true", "Enable save/restore.">; +def FeatureUnalignedScalarMem + : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem", + "true", "Has reasonably performant unaligned scalar " + "loads and stores">; + def TuneNoDefaultUnroll : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false", "Disable default unroll preference.">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index defbfce203f29b..4f74f3f01795c1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11787,9 +11787,13 @@ bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { - if (!VT.isVector()) - return false; + if (!VT.isVector()) { + if (Fast) + *Fast = false; + return Subtarget.enableUnalignedScalarMem(); + } + // All vector implementations must support element alignment EVT ElemVT = VT.getVectorElementType(); if (Alignment >= ElemVT.getStoreSize()) { if (Fast) diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index c24f2bb10353df..664de38d62f54c 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -92,6 +92,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool EnableRVCHintInstrs = true; bool EnableDefaultUnroll = true; bool EnableSaveRestore = false; + bool EnableUnalignedScalarMem = false; unsigned XLen = 32; unsigned ZvlLen = 0; MVT XLenVT = MVT::i32; @@ -182,6 +183,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; } bool enableDefaultUnroll() const { return EnableDefaultUnroll; } bool enableSaveRestore() const { return EnableSaveRestore; } + bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } unsigned getFLen() const { diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index 3bf0d0a0744e48..c5bb7e4b5be016 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -1,43 +1,60 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=BOTH,RV32I %s +; RUN: | FileCheck -check-prefixes=ALL,NOMISALIGN,RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=BOTH,RV64I %s +; RUN: | FileCheck -check-prefixes=ALL,NOMISALIGN,RV64I %s +; RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=ALL,MISALIGN,MISALIGN-RV32I %s +; RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=ALL,MISALIGN,MISALIGN-RV64I %s ; A collection of cases showing codegen for unaligned loads and stores define i8 @load_i8(i8* %p) { -; BOTH-LABEL: load_i8: -; BOTH: # %bb.0: -; BOTH-NEXT: lb a0, 0(a0) -; BOTH-NEXT: ret +; ALL-LABEL: load_i8: +; ALL: # %bb.0: +; ALL-NEXT: lb a0, 0(a0) +; ALL-NEXT: ret %res = load i8, i8* %p, align 1 ret i8 %res } define i16 @load_i16(i16* %p) { -; BOTH-LABEL: load_i16: -; BOTH: # %bb.0: -; BOTH-NEXT: lb a1, 1(a0) -; BOTH-NEXT: lbu a0, 0(a0) -; BOTH-NEXT: slli a1, a1, 8 -; BOTH-NEXT: or a0, a1, a0 -; BOTH-NEXT: ret +; NOMISALIGN-LABEL: load_i16: +; NOMISALIGN: # %bb.0: +; NOMISALIGN-NEXT: lb a1, 1(a0) +; NOMISALIGN-NEXT: lbu a0, 0(a0) +; NOMISALIGN-NEXT: slli a1, a1, 8 +; NOMISALIGN-NEXT: or a0, a1, a0 +; NOMISALIGN-NEXT: ret +; +; MISALIGN-LABEL: load_i16: +; MISALIGN: # %bb.0: +; MISALIGN-NEXT: lh a0, 0(a0) +; MISALIGN-NEXT: ret %res = load i16, i16* %p, align 1 ret i16 %res } define i24 @load_i24(i24* %p) { -; BOTH-LABEL: load_i24: -; BOTH: # %bb.0: -; BOTH-NEXT: lbu a1, 1(a0) -; BOTH-NEXT: lbu a2, 0(a0) -; BOTH-NEXT: lb a0, 2(a0) -; BOTH-NEXT: slli a1, a1, 8 -; BOTH-NEXT: or a1, a1, a2 -; BOTH-NEXT: slli a0, a0, 16 -; BOTH-NEXT: or a0, a1, a0 -; BOTH-NEXT: ret +; NOMISALIGN-LABEL: load_i24: +; NOMISALIGN: # %bb.0: +; NOMISALIGN-NEXT: lbu a1, 1(a0) +; NOMISALIGN-NEXT: lbu a2, 0(a0) +; NOMISALIGN-NEXT: lb a0, 2(a0) +; NOMISALIGN-NEXT: slli a1, a1, 8 +; NOMISALIGN-NEXT: or a1, a1, a2 +; NOMISALIGN-NEXT: slli a0, a0, 16 +; NOMISALIGN-NEXT: or a0, a1, a0 +; NOMISALIGN-NEXT: ret +; +; MISALIGN-LABEL: load_i24: +; MISALIGN: # %bb.0: +; MISALIGN-NEXT: lb a1, 2(a0) +; MISALIGN-NEXT: lhu a0, 0(a0) +; MISALIGN-NEXT: slli a1, a1, 16 +; MISALIGN-NEXT: or a0, a0, a1 +; MISALIGN-NEXT: ret %res = load i24, i24* %p, align 1 ret i24 %res } @@ -70,6 +87,11 @@ define i32 @load_i32(i32* %p) { ; RV64I-NEXT: slli a0, a0, 16 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret +; +; MISALIGN-LABEL: load_i32: +; MISALIGN: # %bb.0: +; MISALIGN-NEXT: lw a0, 0(a0) +; MISALIGN-NEXT: ret %res = load i32, i32* %p, align 1 ret i32 %res } @@ -125,54 +147,83 @@ define i64 @load_i64(i64* %p) { ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret +; +; MISALIGN-RV32I-LABEL: load_i64: +; MISALIGN-RV32I: # %bb.0: +; MISALIGN-RV32I-NEXT: lw a2, 0(a0) +; MISALIGN-RV32I-NEXT: lw a1, 4(a0) +; MISALIGN-RV32I-NEXT: mv a0, a2 +; MISALIGN-RV32I-NEXT: ret +; +; MISALIGN-RV64I-LABEL: load_i64: +; MISALIGN-RV64I: # %bb.0: +; MISALIGN-RV64I-NEXT: ld a0, 0(a0) +; MISALIGN-RV64I-NEXT: ret %res = load i64, i64* %p, align 1 ret i64 %res } define void @store_i8(i8* %p, i8 %v) { -; BOTH-LABEL: store_i8: -; BOTH: # %bb.0: -; BOTH-NEXT: sb a1, 0(a0) -; BOTH-NEXT: ret +; ALL-LABEL: store_i8: +; ALL: # %bb.0: +; ALL-NEXT: sb a1, 0(a0) +; ALL-NEXT: ret store i8 %v, i8* %p, align 1 ret void } define void @store_i16(i16* %p, i16 %v) { -; BOTH-LABEL: store_i16: -; BOTH: # %bb.0: -; BOTH-NEXT: sb a1, 0(a0) -; BOTH-NEXT: srli a1, a1, 8 -; BOTH-NEXT: sb a1, 1(a0) -; BOTH-NEXT: ret +; NOMISALIGN-LABEL: store_i16: +; NOMISALIGN: # %bb.0: +; NOMISALIGN-NEXT: sb a1, 0(a0) +; NOMISALIGN-NEXT: srli a1, a1, 8 +; NOMISALIGN-NEXT: sb a1, 1(a0) +; NOMISALIGN-NEXT: ret +; +; MISALIGN-LABEL: store_i16: +; MISALIGN: # %bb.0: +; MISALIGN-NEXT: sh a1, 0(a0) +; MISALIGN-NEXT: ret store i16 %v, i16* %p, align 1 ret void } define void @store_i24(i24* %p, i24 %v) { -; BOTH-LABEL: store_i24: -; BOTH: # %bb.0: -; BOTH-NEXT: sb a1, 0(a0) -; BOTH-NEXT: srli a2, a1, 8 -; BOTH-NEXT: sb a2, 1(a0) -; BOTH-NEXT: srli a1, a1, 16 -; BOTH-NEXT: sb a1, 2(a0) -; BOTH-NEXT: ret +; NOMISALIGN-LABEL: store_i24: +; NOMISALIGN: # %bb.0: +; NOMISALIGN-NEXT: sb a1, 0(a0) +; NOMISALIGN-NEXT: srli a2, a1, 8 +; NOMISALIGN-NEXT: sb a2, 1(a0) +; NOMISALIGN-NEXT: srli a1, a1, 16 +; NOMISALIGN-NEXT: sb a1, 2(a0) +; NOMISALIGN-NEXT: ret +; +; MISALIGN-LABEL: store_i24: +; MISALIGN: # %bb.0: +; MISALIGN-NEXT: sh a1, 0(a0) +; MISALIGN-NEXT: srli a1, a1, 16 +; MISALIGN-NEXT: sb a1, 2(a0) +; MISALIGN-NEXT: ret store i24 %v, i24* %p, align 1 ret void } define void @store_i32(i32* %p, i32 %v) { -; BOTH-LABEL: store_i32: -; BOTH: # %bb.0: -; BOTH-NEXT: sb a1, 0(a0) -; BOTH-NEXT: srli a2, a1, 24 -; BOTH-NEXT: sb a2, 3(a0) -; BOTH-NEXT: srli a2, a1, 16 -; BOTH-NEXT: sb a2, 2(a0) -; BOTH-NEXT: srli a1, a1, 8 -; BOTH-NEXT: sb a1, 1(a0) -; BOTH-NEXT: ret +; NOMISALIGN-LABEL: store_i32: +; NOMISALIGN: # %bb.0: +; NOMISALIGN-NEXT: sb a1, 0(a0) +; NOMISALIGN-NEXT: srli a2, a1, 24 +; NOMISALIGN-NEXT: sb a2, 3(a0) +; NOMISALIGN-NEXT: srli a2, a1, 16 +; NOMISALIGN-NEXT: sb a2, 2(a0) +; NOMISALIGN-NEXT: srli a1, a1, 8 +; NOMISALIGN-NEXT: sb a1, 1(a0) +; NOMISALIGN-NEXT: ret +; +; MISALIGN-LABEL: store_i32: +; MISALIGN: # %bb.0: +; MISALIGN-NEXT: sw a1, 0(a0) +; MISALIGN-NEXT: ret store i32 %v, i32* %p, align 1 ret void } @@ -214,6 +265,17 @@ define void @store_i64(i64* %p, i64 %v) { ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 1(a0) ; RV64I-NEXT: ret +; +; MISALIGN-RV32I-LABEL: store_i64: +; MISALIGN-RV32I: # %bb.0: +; MISALIGN-RV32I-NEXT: sw a2, 4(a0) +; MISALIGN-RV32I-NEXT: sw a1, 0(a0) +; MISALIGN-RV32I-NEXT: ret +; +; MISALIGN-RV64I-LABEL: store_i64: +; MISALIGN-RV64I: # %bb.0: +; MISALIGN-RV64I-NEXT: sd a1, 0(a0) +; MISALIGN-RV64I-NEXT: ret store i64 %v, i64* %p, align 1 ret void } From 36096c2b383ec78030aad47c6b9f479f34d571d7 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 26 May 2022 14:18:58 -0700 Subject: [PATCH 655/908] [NFC][JumpThreading] Remove InsertFreezeWhenUnfoldingSelect pass parameter All callers pass true. select-unfold-freeze.ll is now a subset of select.ll so delete it. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D126501 --- llvm/include/llvm/Transforms/Scalar.h | 6 +- .../llvm/Transforms/Scalar/JumpThreading.h | 3 +- llvm/lib/Passes/PassBuilderPipelines.cpp | 4 +- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 4 +- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 18 +- .../JumpThreading/select-unfold-freeze.ll | 244 ------------------ 6 files changed, 12 insertions(+), 267 deletions(-) delete mode 100644 llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index f903f64f57a584..edd492b0343d24 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -240,12 +240,10 @@ FunctionPass *createReassociatePass(); //===----------------------------------------------------------------------===// // // JumpThreading - Thread control through mult-pred/multi-succ blocks where some -// preds always go to some succ. If FreezeSelectCond is true, unfold the -// condition of a select that unfolds to branch. Thresholds other than minus one +// preds always go to some succ. Thresholds other than minus one // override the internal BB duplication default threshold. // -FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false, - int Threshold = -1); +FunctionPass *createJumpThreadingPass(int Threshold = -1); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 07db880a6d74bf..09d08bf423a6a8 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -92,10 +92,9 @@ class JumpThreadingPass : public PassInfoMixin { unsigned BBDupThreshold; unsigned DefaultBBDupThreshold; - bool InsertFreezeWhenUnfoldingSelect; public: - JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = true, int T = -1); + JumpThreadingPass(int T = -1); // Glue for old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 7a2960a669eafc..eef964442ee7d0 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1617,7 +1617,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); - FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); + FPM.addPass(JumpThreadingPass()); // Do a post inline PGO instrumentation and use pass. This is a context // sensitive PGO pass. @@ -1701,7 +1701,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass())); invokePeepholeEPCallbacks(MainFPM, Level); - MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); + MainFPM.addPass(JumpThreadingPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM), PTO.EagerlyInvalidateAnalyses)); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index beac7d6ff8e8b6..c4e660fe09b573 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -1011,7 +1011,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // The IPO passes may leave cruft around. Clean up after them. PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); - PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); + PM.add(createJumpThreadingPass()); // Break up allocas PM.add(createSROAPass()); @@ -1056,7 +1056,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { addExtensionsToPM(EP_Peephole, PM); - PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); + PM.add(createJumpThreadingPass()); } void PassManagerBuilder::addLateLTOOptimizationPasses( diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 5b1f703ffef0b2..a9e5b811f15d46 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -104,11 +104,6 @@ static cl::opt PrintLVIAfterJumpThreading( cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden); -static cl::opt JumpThreadingFreezeSelectCond( - "jump-threading-freeze-select-cond", - cl::desc("Freeze the condition when unfolding select"), cl::init(true), - cl::Hidden); - static cl::opt ThreadAcrossLoopHeaders( "jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), @@ -138,8 +133,7 @@ namespace { public: static char ID; // Pass identification - JumpThreading(bool InsertFreezeWhenUnfoldingSelect = true, int T = -1) - : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) { + JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) { initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); } @@ -173,12 +167,11 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) // Public interface to the Jump Threading pass -FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) { - return new JumpThreading(InsertFr, Threshold); +FunctionPass *llvm::createJumpThreadingPass(int Threshold) { + return new JumpThreading(Threshold); } -JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) { - InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr; +JumpThreadingPass::JumpThreadingPass(int T) { DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T); } @@ -2911,8 +2904,7 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) { continue; // Expand the select. Value *Cond = SI->getCondition(); - if (InsertFreezeWhenUnfoldingSelect && - !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI)) + if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI)) Cond = new FreezeInst(Cond, "cond.fr", SI); Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false); BasicBlock *SplitBB = SI->getParent(); diff --git a/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll b/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll deleted file mode 100644 index 5f6f5ddcd7cfd3..00000000000000 --- a/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll +++ /dev/null @@ -1,244 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -jump-threading-freeze-select-cond -jump-threading < %s | FileCheck %s - -declare void @foo() -declare void @bar() -declare void @baz() -declare void @quux() - - -define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind { -; CHECK-LABEL: @test_switch_cmp( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[COND:%.*]], label [[L0:%.*]], label [[L0_THREAD:%.*]] -; CHECK: L0: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[VAL:%.*]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VAL_PHI]], 0 -; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP]] -; CHECK-NEXT: br i1 [[COND_FR]], label [[L1:%.*]], label [[TMP0:%.*]] -; CHECK: 0: -; CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[VALUE:%.*]], [[L0]] ] -; CHECK-NEXT: switch i8 [[TMP1]], label [[L3:%.*]] [ -; CHECK-NEXT: i8 1, label [[L1]] -; CHECK-NEXT: i8 2, label [[L2:%.*]] -; CHECK-NEXT: ] -; CHECK: L1: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: ret void -; CHECK: L2: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: ret void -; CHECK: L3: -; CHECK-NEXT: call void @baz() -; CHECK-NEXT: ret void -; CHECK: L0.thread: -; CHECK-NEXT: call void @quux() -; CHECK-NEXT: br label [[L1]] -; -entry: - br i1 %cond, label %L0, label %L4 -L0: - %val.phi = phi i32 [%val, %entry], [-1, %L4] - %cmp = icmp slt i32 %val.phi, 0 - %expr = select i1 %cmp, i8 1, i8 %value - switch i8 %expr, label %L3 [i8 1, label %L1 i8 2, label %L2] - -L1: - call void @foo() - ret void -L2: - call void @bar() - ret void -L3: - call void @baz() - ret void -L4: - call void @quux() - br label %L0 -} - -define i32 @unfold3(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind { -; CHECK-LABEL: @unfold3( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 -; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] -; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD4:%.*]], label [[COND_FALSE_I:%.*]] -; CHECK: cond.false.i: -; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] -; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_6_I:%.*]] -; CHECK: cond.false.6.i: -; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] -; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_10_I:%.*]] -; CHECK: cond.false.10.i: -; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] -; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]] -; CHECK: .exit: -; CHECK-NEXT: [[PHITMP:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[PHITMP]] -; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD4]] -; CHECK: .exit.thread: -; CHECK-NEXT: br label [[DOTEXIT_THREAD4]] -; CHECK: .exit.thread4: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ] -; CHECK-NEXT: ret i32 [[TMP0]] -; -entry: - %add3 = add nsw i32 %j, 2 - %cmp.i = icmp slt i32 %u, %v - br i1 %cmp.i, label %.exit, label %cond.false.i - -cond.false.i: ; preds = %entry - %cmp4.i = icmp sgt i32 %u, %v - br i1 %cmp4.i, label %.exit, label %cond.false.6.i - -cond.false.6.i: ; preds = %cond.false.i - %cmp8.i = icmp slt i32 %w, %x - br i1 %cmp8.i, label %.exit, label %cond.false.10.i - -cond.false.10.i: ; preds = %cond.false.6.i - %cmp13.i = icmp sgt i32 %w, %x - br i1 %cmp13.i, label %.exit, label %cond.false.15.i - -cond.false.15.i: ; preds = %cond.false.10.i - %phitmp = icmp sge i32 %y, %z - br label %.exit - -.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i - %cond23.i = phi i1 [ false, %entry ], [ true, %cond.false.i ], [ false, %cond.false.6.i ], [ %phitmp, %cond.false.15.i ], [ true, %cond.false.10.i ] - %j.add3 = select i1 %cond23.i, i32 %j, i32 %add3 - ret i32 %j.add3 -} - -define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind { -; CHECK-LABEL: @unfold4( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 -; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] -; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]] -; CHECK: cond.false.i: -; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] -; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD5:%.*]], label [[COND_FALSE_6_I:%.*]] -; CHECK: cond.false.6.i: -; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] -; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]] -; CHECK: cond.false.10.i: -; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] -; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD5]], label [[DOTEXIT:%.*]] -; CHECK: .exit: -; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32 -; CHECK-NEXT: [[LNOT_I18:%.*]] = icmp eq i32 [[CONV]], 1 -; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[LNOT_I18]] -; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD5]] -; CHECK: .exit.thread: -; CHECK-NEXT: br label [[DOTEXIT_THREAD5]] -; CHECK: .exit.thread5: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ] -; CHECK-NEXT: ret i32 [[TMP0]] -; -entry: - %add3 = add nsw i32 %j, 2 - %cmp.i = icmp slt i32 %u, %v - br i1 %cmp.i, label %.exit, label %cond.false.i - -cond.false.i: ; preds = %entry - %cmp4.i = icmp sgt i32 %u, %v - br i1 %cmp4.i, label %.exit, label %cond.false.6.i - -cond.false.6.i: ; preds = %cond.false.i - %cmp8.i = icmp slt i32 %w, %x - br i1 %cmp8.i, label %.exit, label %cond.false.10.i - -cond.false.10.i: ; preds = %cond.false.6.i - %cmp13.i = icmp sgt i32 %w, %x - br i1 %cmp13.i, label %.exit, label %cond.false.15.i - -cond.false.15.i: ; preds = %cond.false.10.i - %cmp19.i = icmp sge i32 %y, %z - %conv = zext i1 %cmp19.i to i32 - br label %.exit - -.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i - %cond23.i = phi i32 [ 1, %entry ], [ 0, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 0, %cond.false.10.i ] - %lnot.i18 = icmp eq i32 %cond23.i, 1 - %j.add3 = select i1 %lnot.i18, i32 %j, i32 %add3 - ret i32 %j.add3 -} - -; TODO: cond23_i should be constant-folded. -define i32 @unfold5(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind { -; CHECK-LABEL: @unfold5( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 -; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] -; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT:%.*]], label [[COND_FALSE_I:%.*]] -; CHECK: cond.false.i: -; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] -; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT]], label [[COND_FALSE_6_I:%.*]] -; CHECK: cond.false.6.i: -; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] -; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT]], label [[COND_FALSE_10_I:%.*]] -; CHECK: cond.false.10.i: -; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] -; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT]], label [[COND_FALSE_15_I:%.*]] -; CHECK: cond.false.15.i: -; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32 -; CHECK-NEXT: br label [[DOTEXIT]] -; CHECK: .exit: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[COND_FALSE_10_I]] ], [ [[CONV]], [[COND_FALSE_15_I]] ], [ 1, [[COND_FALSE_6_I]] ], [ 3, [[COND_FALSE_I]] ], [ 2, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[TMP0]] -; -entry: - %add3 = add nsw i32 %j, 2 - %cmp.i = icmp slt i32 %u, %v - br i1 %cmp.i, label %.exit, label %cond.false.i - -cond.false.i: ; preds = %entry - %cmp4.i = icmp sgt i32 %u, %v - br i1 %cmp4.i, label %.exit, label %cond.false.6.i - -cond.false.6.i: ; preds = %cond.false.i - %cmp8.i = icmp slt i32 %w, %x - br i1 %cmp8.i, label %.exit, label %cond.false.10.i - -cond.false.10.i: ; preds = %cond.false.6.i - %cmp13.i = icmp sgt i32 %w, %x - br i1 %cmp13.i, label %.exit, label %cond.false.15.i - -cond.false.15.i: ; preds = %cond.false.10.i - %cmp19.i = icmp sge i32 %y, %z - %conv = zext i1 %cmp19.i to i32 - br label %.exit - -.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i - %cond23.i = phi i32 [ 2, %entry ], [ 3, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 7, %cond.false.10.i ] - %lnot.i18 = icmp sgt i32 %cond23.i, 5 - %j.add3 = select i1 %lnot.i18, i32 %j, i32 %cond23.i - ret i32 %j.add3 -} - -define i32 @TryToUnfoldSelectInCurrBB(i1 %b, i1 %ui, i32 %s, i1 %x) { -; CHECK-LABEL: @TryToUnfoldSelectInCurrBB( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[B:%.*]], label [[IF_END_THREAD:%.*]], label [[IF_END:%.*]] -; CHECK: if.end: -; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[X:%.*]] -; CHECK-NEXT: br i1 [[COND_FR]], label [[TMP0:%.*]], label [[IF_END_THREAD]] -; CHECK: 0: -; CHECK-NEXT: br label [[IF_END_THREAD]] -; CHECK: if.end.thread: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[S:%.*]], [[TMP0]] ], [ 42, [[IF_END]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[TMP1]] -; -entry: - br i1 %b, label %if.end, label %if.else - -if.else: - br label %if.end - -if.end: - %v = phi i1 [ %x, %if.else ], [ false, %entry ] - %v1 = select i1 %v, i32 %s, i32 42 - ret i32 %v1 -} From 4d8d2580c53e130c3c3dd3877384301e3c495554 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Tue, 24 May 2022 10:54:36 -0700 Subject: [PATCH 656/908] [Propeller] Promote functions with propeller profiles to .text.hot. Today, text section prefixes (none, .unlikely, .hot, and .unkown) are determined based on PGO profile. However, Propeller may deem a function hot when PGO doesn't. Besides, when `-Wl,-keep-text-section-prefix=true` Propeller cannot enforce a global section ordering as the linker can only reorder sections within each output section (.text, .text.hot, .text.unlikely). This patch promotes all functions with Propeller profiles (functions listed in the basic-block-sections profile) to .text.hot. The feature is hidden behind the flag `--bbsections-guided-section-prefix` which defaults to `true`. The new implementation refactors the parsing of basic block sections profile into a new `BasicBlockSectionsProfileReader` analysis pass. This allows us to use the information earlier in `CodeGenPrepare` in order to set the functions text prefix. `BasicBlockSectionsProfileReader` will be used both by `BasicBlockSections` pass and `CodeGenPrepare`. Differential Revision: https://reviews.llvm.org/D122930 --- .../CodeGen/BasicBlockSectionsProfileReader.h | 109 +++++++++++ llvm/include/llvm/CodeGen/Passes.h | 6 +- llvm/include/llvm/InitializePasses.h | 1 + llvm/lib/CodeGen/BasicBlockSections.cpp | 169 ++---------------- .../BasicBlockSectionsProfileReader.cpp | 144 +++++++++++++++ llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CodeGenPrepare.cpp | 21 ++- llvm/lib/CodeGen/TargetPassConfig.cpp | 7 +- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 4 + llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + .../CodeGen/X86/basic-block-sections-cold.ll | 6 +- .../CodeGen/X86/basic-block-sections-list.ll | 18 +- .../X86/basic-block-sections-listbb.ll | 8 +- .../X86/basic-block-sections-source-drift.ll | 4 +- llvm/test/CodeGen/X86/opt-pipeline.ll | 1 + 17 files changed, 326 insertions(+), 176 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h create mode 100644 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h new file mode 100644 index 00000000000000..7ae1304cced9ad --- /dev/null +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -0,0 +1,109 @@ +//===-- BasicBlockSectionsProfileReader.h - BB sections profile reader pass ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass creates the basic block cluster info by reading the basic block +// sections profile. The cluster info will be used by the basic-block-sections +// pass to arrange basic blocks in their sections. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H +#define LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +namespace llvm { + +// The cluster information for a machine basic block. +struct BBClusterInfo { + // MachineBasicBlock ID. + unsigned MBBNumber; + // Cluster ID this basic block belongs to. + unsigned ClusterID; + // Position of basic block within the cluster. + unsigned PositionInCluster; +}; + +using ProgramBBClusterInfoMapTy = StringMap>; + +class BasicBlockSectionsProfileReader : public ImmutablePass { +public: + static char ID; + + BasicBlockSectionsProfileReader(const MemoryBuffer *Buf) + : ImmutablePass(ID), MBuf(Buf) { + initializeBasicBlockSectionsProfileReaderPass( + *PassRegistry::getPassRegistry()); + }; + + BasicBlockSectionsProfileReader() : ImmutablePass(ID) { + initializeBasicBlockSectionsProfileReaderPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Basic Block Sections Profile Reader"; + } + + // Returns true if basic block sections profile exist for function \p + // FuncName. + bool isFunctionHot(StringRef FuncName) const; + + // Returns a pair with first element representing whether basic block sections + // profile exist for the function \p FuncName, and the second element + // representing the basic block sections profile (cluster info) for this + // function. If the first element is true and the second element is empty, it + // means unique basic block sections are desired for all basic blocks of the + // function. + std::pair> + getBBClusterInfoForFunction(StringRef FuncName) const; + + /// Read profiles of basic blocks if available here. + void initializePass() override; + +private: + StringRef getAliasName(StringRef FuncName) const { + auto R = FuncAliasMap.find(FuncName); + return R == FuncAliasMap.end() ? FuncName : R->second; + } + + // This contains the basic-block-sections profile. + const MemoryBuffer *MBuf = nullptr; + + // This encapsulates the BB cluster information for the whole program. + // + // For every function name, it contains the cluster information for (all or + // some of) its basic blocks. The cluster information for every basic block + // includes its cluster ID along with the position of the basic block in that + // cluster. + ProgramBBClusterInfoMapTy ProgramBBClusterInfo; + + // Some functions have alias names. We use this map to find the main alias + // name for which we have mapping in ProgramBBClusterInfo. + StringMap FuncAliasMap; +}; + +// Creates a BasicBlockSectionsProfileReader pass to parse the basic block +// sections profile. \p Buf is a memory buffer that contains the list of +// functions and basic block ids to selectively enable basic block sections. +ImmutablePass * +createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf); + +} // namespace llvm +#endif // LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index bfd774f578ac88..6e37d42f0d2944 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -51,10 +51,8 @@ namespace llvm { FunctionPass *createUnreachableBlockEliminationPass(); /// createBasicBlockSections Pass - This pass assigns sections to machine - /// basic blocks and is enabled with -fbasic-block-sections. Buf is a memory - /// buffer that contains the list of functions and basic block ids to - /// selectively enable basic block sections. - MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf); + /// basic blocks and is enabled with -fbasic-block-sections. + MachineFunctionPass *createBasicBlockSectionsPass(); /// createMachineFunctionSplitterPass - This pass splits machine functions /// using profile information. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 41e1d99b536136..ece2f20bd4b86c 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -76,6 +76,7 @@ void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeAttributorLegacyPassPass(PassRegistry&); void initializeAttributorCGSCCLegacyPassPass(PassRegistry &); +void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &); void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBDCELegacyPassPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 295d557cd72713..38ba651d126faf 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -69,25 +69,17 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Target/TargetMachine.h" -using llvm::SmallSet; -using llvm::SmallVector; -using llvm::StringMap; -using llvm::StringRef; using namespace llvm; // Placing the cold clusters in a separate section mitigates against poor @@ -107,41 +99,11 @@ cl::opt BBSectionsDetectSourceDrift( namespace { -// This struct represents the cluster information for a machine basic block. -struct BBClusterInfo { - // MachineBasicBlock ID. - unsigned MBBNumber; - // Cluster ID this basic block belongs to. - unsigned ClusterID; - // Position of basic block within the cluster. - unsigned PositionInCluster; -}; - -using ProgramBBClusterInfoMapTy = StringMap>; - class BasicBlockSections : public MachineFunctionPass { public: static char ID; - // This contains the basic-block-sections profile. - const MemoryBuffer *MBuf = nullptr; - - // This encapsulates the BB cluster information for the whole program. - // - // For every function name, it contains the cluster information for (all or - // some of) its basic blocks. The cluster information for every basic block - // includes its cluster ID along with the position of the basic block in that - // cluster. - ProgramBBClusterInfoMapTy ProgramBBClusterInfo; - - // Some functions have alias names. We use this map to find the main alias - // name for which we have mapping in ProgramBBClusterInfo. - StringMap FuncAliasMap; - - BasicBlockSections(const MemoryBuffer *Buf) - : MachineFunctionPass(ID), MBuf(Buf) { - initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); - }; + BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; BasicBlockSections() : MachineFunctionPass(ID) { initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); @@ -153,9 +115,6 @@ class BasicBlockSections : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override; - /// Read profiles of basic blocks if available here. - bool doInitialization(Module &M) override; - /// Identify basic blocks that need separate sections and prepare to emit them /// accordingly. bool runOnMachineFunction(MachineFunction &MF) override; @@ -205,21 +164,18 @@ static void updateBranches( // This function provides the BBCluster information associated with a function. // Returns true if a valid association exists and false otherwise. -static bool getBBClusterInfoForFunction( - const MachineFunction &MF, const StringMap FuncAliasMap, - const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, +bool getBBClusterInfoForFunction( + const MachineFunction &MF, + BasicBlockSectionsProfileReader *BBSectionsProfileReader, std::vector> &V) { - // Get the main alias name for the function. - auto FuncName = MF.getName(); - auto R = FuncAliasMap.find(FuncName); - StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second; // Find the assoicated cluster information. - auto P = ProgramBBClusterInfo.find(AliasName); - if (P == ProgramBBClusterInfo.end()) + std::pair> P = + BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName()); + if (!P.first) return false; - if (P->second.empty()) { + if (P.second.empty()) { // This indicates that sections are desired for all basic blocks of this // function. We clear the BBClusterInfo vector to denote this. V.clear(); @@ -227,7 +183,7 @@ static bool getBBClusterInfoForFunction( } V.resize(MF.getNumBlockIDs()); - for (auto bbClusterInfo : P->second) { + for (auto bbClusterInfo : P.second) { // Bail out if the cluster information contains invalid MBB numbers. if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs()) return false; @@ -376,9 +332,11 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } + BBSectionsProfileReader = &getAnalysis(); + std::vector> FuncBBClusterInfo; if (BBSectionsType == BasicBlockSection::List && - !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, + !getBBClusterInfoForFunction(MF, BBSectionsProfileReader, FuncBBClusterInfo)) return true; MF.setBBSectionsType(BBSectionsType); @@ -426,107 +384,12 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } -// Basic Block Sections can be enabled for a subset of machine basic blocks. -// This is done by passing a file containing names of functions for which basic -// block sections are desired. Additionally, machine basic block ids of the -// functions can also be specified for a finer granularity. Moreover, a cluster -// of basic blocks could be assigned to the same section. -// A file with basic block sections for all of function main and three blocks -// for function foo (of which 1 and 2 are placed in a cluster) looks like this: -// ---------------------------- -// list.txt: -// !main -// !foo -// !!1 2 -// !!4 -static Error getBBClusterInfo(const MemoryBuffer *MBuf, - ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, - StringMap &FuncAliasMap) { - assert(MBuf); - line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); - - auto invalidProfileError = [&](auto Message) { - return make_error( - Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + - Twine(LineIt.line_number()) + ": " + Message), - inconvertibleErrorCode()); - }; - - auto FI = ProgramBBClusterInfo.end(); - - // Current cluster ID corresponding to this function. - unsigned CurrentCluster = 0; - // Current position in the current cluster. - unsigned CurrentPosition = 0; - - // Temporary set to ensure every basic block ID appears once in the clusters - // of a function. - SmallSet FuncBBIDs; - - for (; !LineIt.is_at_eof(); ++LineIt) { - StringRef S(*LineIt); - if (S[0] == '@') - continue; - // Check for the leading "!" - if (!S.consume_front("!") || S.empty()) - break; - // Check for second "!" which indicates a cluster of basic blocks. - if (S.consume_front("!")) { - if (FI == ProgramBBClusterInfo.end()) - return invalidProfileError( - "Cluster list does not follow a function name specifier."); - SmallVector BBIndexes; - S.split(BBIndexes, ' '); - // Reset current cluster position. - CurrentPosition = 0; - for (auto BBIndexStr : BBIndexes) { - unsigned long long BBIndex; - if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) - return invalidProfileError(Twine("Unsigned integer expected: '") + - BBIndexStr + "'."); - if (!FuncBBIDs.insert(BBIndex).second) - return invalidProfileError(Twine("Duplicate basic block id found '") + - BBIndexStr + "'."); - if (!BBIndex && CurrentPosition) - return invalidProfileError("Entry BB (0) does not begin a cluster."); - - FI->second.emplace_back(BBClusterInfo{ - ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); - } - CurrentCluster++; - } else { // This is a function name specifier. - // Function aliases are separated using '/'. We use the first function - // name for the cluster info mapping and delegate all other aliases to - // this one. - SmallVector Aliases; - S.split(Aliases, '/'); - for (size_t i = 1; i < Aliases.size(); ++i) - FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); - - // Prepare for parsing clusters of this function name. - // Start a new cluster map for this function name. - FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; - CurrentCluster = 0; - FuncBBIDs.clear(); - } - } - return Error::success(); -} - -bool BasicBlockSections::doInitialization(Module &M) { - if (!MBuf) - return false; - if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) - report_fatal_error(std::move(Err)); - return false; -} - void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } -MachineFunctionPass * -llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) { - return new BasicBlockSections(Buf); +MachineFunctionPass *llvm::createBasicBlockSectionsPass() { + return new BasicBlockSections(); } diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp new file mode 100644 index 00000000000000..c2acf115998b63 --- /dev/null +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -0,0 +1,144 @@ +//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the basic block sections profile reader pass. It parses +// and stores the basic block sections profile file (which is specified via the +// `-basic-block-sections` flag). +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +char BasicBlockSectionsProfileReader::ID = 0; +INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", + "Reads and parses a basic block sections profile.", false, + false) + +bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { + return getBBClusterInfoForFunction(FuncName).first; +} + +std::pair> +BasicBlockSectionsProfileReader::getBBClusterInfoForFunction( + StringRef FuncName) const { + std::pair> cluster_info(false, {}); + auto R = ProgramBBClusterInfo.find(getAliasName(FuncName)); + if (R != ProgramBBClusterInfo.end()) { + cluster_info.second = R->second; + cluster_info.first = true; + } + return cluster_info; +} + +// Basic Block Sections can be enabled for a subset of machine basic blocks. +// This is done by passing a file containing names of functions for which basic +// block sections are desired. Additionally, machine basic block ids of the +// functions can also be specified for a finer granularity. Moreover, a cluster +// of basic blocks could be assigned to the same section. +// A file with basic block sections for all of function main and three blocks +// for function foo (of which 1 and 2 are placed in a cluster) looks like this: +// ---------------------------- +// list.txt: +// !main +// !foo +// !!1 2 +// !!4 +static Error getBBClusterInfo(const MemoryBuffer *MBuf, + ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, + StringMap &FuncAliasMap) { + assert(MBuf); + line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); + + auto invalidProfileError = [&](auto Message) { + return make_error( + Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + + Twine(LineIt.line_number()) + ": " + Message), + inconvertibleErrorCode()); + }; + + auto FI = ProgramBBClusterInfo.end(); + + // Current cluster ID corresponding to this function. + unsigned CurrentCluster = 0; + // Current position in the current cluster. + unsigned CurrentPosition = 0; + + // Temporary set to ensure every basic block ID appears once in the clusters + // of a function. + SmallSet FuncBBIDs; + + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef S(*LineIt); + if (S[0] == '@') + continue; + // Check for the leading "!" + if (!S.consume_front("!") || S.empty()) + break; + // Check for second "!" which indicates a cluster of basic blocks. + if (S.consume_front("!")) { + if (FI == ProgramBBClusterInfo.end()) + return invalidProfileError( + "Cluster list does not follow a function name specifier."); + SmallVector BBIndexes; + S.split(BBIndexes, ' '); + // Reset current cluster position. + CurrentPosition = 0; + for (auto BBIndexStr : BBIndexes) { + unsigned long long BBIndex; + if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) + return invalidProfileError(Twine("Unsigned integer expected: '") + + BBIndexStr + "'."); + if (!FuncBBIDs.insert(BBIndex).second) + return invalidProfileError(Twine("Duplicate basic block id found '") + + BBIndexStr + "'."); + if (!BBIndex && CurrentPosition) + return invalidProfileError("Entry BB (0) does not begin a cluster."); + + FI->second.emplace_back(BBClusterInfo{ + ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); + } + CurrentCluster++; + } else { // This is a function name specifier. + // Function aliases are separated using '/'. We use the first function + // name for the cluster info mapping and delegate all other aliases to + // this one. + SmallVector Aliases; + S.split(Aliases, '/'); + for (size_t i = 1; i < Aliases.size(); ++i) + FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); + + // Prepare for parsing clusters of this function name. + // Start a new cluster map for this function name. + FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; + CurrentCluster = 0; + FuncBBIDs.clear(); + } + } + return Error::success(); +} + +void BasicBlockSectionsProfileReader::initializePass() { + if (!MBuf) + return; + if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) + report_fatal_error(std::move(Err)); +} + +ImmutablePass * +llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) { + return new BasicBlockSectionsProfileReader(Buf); +} diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 6fb6322dfb9832..2ef8380d578eda 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_component_library(LLVMCodeGen BranchRelaxation.cpp BreakFalseDeps.cpp BasicBlockSections.cpp + BasicBlockSectionsProfileReader.cpp CalcSpillWeights.cpp CallingConvLower.cpp CFGuardLongjmp.cpp diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index c71ca729391c80..b4bb37a66a74dd 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -186,6 +187,15 @@ static cl::opt ProfileUnknownInSpecialSection( "to handle it in a different way than .text section, to save " "RAM for example. ")); +static cl::opt BBSectionsGuidedSectionPrefix( + "bbsections-guided-section-prefix", cl::Hidden, cl::init(true), + cl::desc("Use the basic-block-sections profile to determine the text " + "section prefix for hot functions. Functions with " + "basic-block-sections profile will be placed in `.text.hot` " + "regardless of their FDO profile info. Other functions won't be " + "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " + "profiles.")); + static cl::opt FreqRatioToSkipMerge( "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " @@ -272,6 +282,7 @@ class TypePromotionTransaction; const TargetLowering *TLI = nullptr; const TargetRegisterInfo *TRI; const TargetTransformInfo *TTI = nullptr; + const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; std::unique_ptr BFI; @@ -347,6 +358,7 @@ class TypePromotionTransaction; AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } private: @@ -447,6 +459,7 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) @@ -473,8 +486,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); PSI = &getAnalysis().getPSI(); + BBSectionsProfileReader = + getAnalysisIfAvailable(); OptSize = F.hasOptSize(); - if (ProfileGuidedSectionPrefix) { + // Use the basic-block-sections profile to promote hot functions to .text.hot if requested. + if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader && + BBSectionsProfileReader->isFunctionHot(F.getName())) { + F.setSectionPrefix("hot"); + } else if (ProfileGuidedSectionPrefix) { // The hot attribute overwrites profile count based hotness while profile // counts based hotness overwrite the cold attribute. // This is a conservative behabvior. diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index be255c83a95975..d659a44c183743 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePassRegistry.h" @@ -1284,7 +1285,11 @@ void TargetPassConfig::addMachinePasses() { // FIXME: In principle, BasicBlockSection::Labels and splitting can used // together. Update this check once we have addressed any issues. if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { - addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf())); + if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { + addPass(llvm::createBasicBlockSectionsProfileReaderPass( + TM->getBBSectionsFuncListBuf())); + } + addPass(llvm::createBasicBlockSectionsPass()); } else if (TM->Options.EnableMachineFunctionSplitter || EnableMachineFunctionSplitter) { addPass(createMachineFunctionSplitterPass()); diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 708c8bef290a1b..4b99fd24fd7d9c 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Basic Block Sections Profile Reader ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 684774b18308e0..31e747deb8e1df 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -157,6 +157,7 @@ ; GCN-O1-NEXT:Scoped NoAlias Alias Analysis ; GCN-O1-NEXT:Argument Register Usage Information Storage ; GCN-O1-NEXT:Create Garbage Collector Module Metadata +; GCN-O1-NEXT:Basic Block Sections Profile Reader ; GCN-O1-NEXT:Machine Branch Probability Analysis ; GCN-O1-NEXT:Register Usage Information Storage ; GCN-O1-NEXT:Default Regalloc Eviction Advisor @@ -411,6 +412,7 @@ ; GCN-O1-OPTS-NEXT:Scoped NoAlias Alias Analysis ; GCN-O1-OPTS-NEXT:Argument Register Usage Information Storage ; GCN-O1-OPTS-NEXT:Create Garbage Collector Module Metadata +; GCN-O1-OPTS-NEXT:Basic Block Sections Profile Reader ; GCN-O1-OPTS-NEXT:Machine Branch Probability Analysis ; GCN-O1-OPTS-NEXT:Register Usage Information Storage ; GCN-O1-OPTS-NEXT:Default Regalloc Eviction Advisor @@ -698,6 +700,7 @@ ; GCN-O2-NEXT:Scoped NoAlias Alias Analysis ; GCN-O2-NEXT:Argument Register Usage Information Storage ; GCN-O2-NEXT:Create Garbage Collector Module Metadata +; GCN-O2-NEXT:Basic Block Sections Profile Reader ; GCN-O2-NEXT:Machine Branch Probability Analysis ; GCN-O2-NEXT:Register Usage Information Storage ; GCN-O2-NEXT:Default Regalloc Eviction Advisor @@ -987,6 +990,7 @@ ; GCN-O3-NEXT:Scoped NoAlias Alias Analysis ; GCN-O3-NEXT:Argument Register Usage Information Storage ; GCN-O3-NEXT:Create Garbage Collector Module Metadata +; GCN-O3-NEXT:Basic Block Sections Profile Reader ; GCN-O3-NEXT:Machine Branch Probability Analysis ; GCN-O3-NEXT:Register Usage Information Storage ; GCN-O3-NEXT:Default Regalloc Eviction Advisor diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index eff5184e9c84c8..a94d0560efbe45 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Basic Block Sections Profile Reader ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index d0bbe886f67968..75bc489ab942e1 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -17,6 +17,7 @@ ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Basic Block Sections Profile Reader ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cold.ll b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll index be016df7061b52..c635b73a45b986 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-cold.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll @@ -24,14 +24,14 @@ declare i32 @_Z3barv() #1 declare i32 @_Z3foov() #1 -; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: ; Check that the basic block with id 1 doesn't get a section. -; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.1,"ax",@progbits,unique +; LINUX-SECTIONS-NOT: .section .text{{.*}}._Z3bazb.1,"ax",@progbits,unique ; Check that a single cold section is started here and id 1 and 2 blocks are placed here. ; LINUX-SECTIONS: .section .text.split._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb.cold: -; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.2,"ax",@progbits,unique +; LINUX-SECTIONS-NOT: .section .text.hot._Z3bazb._Z3bazb.2,"ax",@progbits,unique ; LINUX-SECTIONS: .LBB0_2: ; LINUX-SECTIONS: .size _Z3bazb, .Lfunc_end{{[0-9]}}-_Z3bazb diff --git a/llvm/test/CodeGen/X86/basic-block-sections-list.ll b/llvm/test/CodeGen/X86/basic-block-sections-list.ll index d2408a0b3e4b38..f9f7f8892d194d 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-list.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-list.ll @@ -1,7 +1,8 @@ ; Check the basic block sections list option. ; RUN: echo '!_Z3foob' > %t ; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-FUNCTION-SECTION -; llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-NO-FUNCTION-SECTION +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-NO-FUNCTION-SECTION +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names --bbsections-guided-section-prefix=false | FileCheck %s -check-prefix=LINUX-SECTIONS-NO-GUIDED-PREFIX define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 @@ -58,17 +59,18 @@ define i32 @_Z3zipb(i1 zeroext %0) nounwind { ret i32 %14 } -; LINUX-SECTIONS: .section .text._Z3foob,"ax",@progbits +; LINUX-SECTIONS-NO-GUIDED-PREFIX: .section .text._Z3foob,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3foob,"ax",@progbits ; LINUX-SECTIONS: _Z3foob: -; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.__part.1,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3foob._Z3foob.__part.1,"ax",@progbits ; LINUX-SECTIONS: _Z3foob.__part.1: -; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.__part.2,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3foob._Z3foob.__part.2,"ax",@progbits ; LINUX-SECTIONS: _Z3foob.__part.2: -; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.__part.3,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3foob._Z3foob.__part.3,"ax",@progbits ; LINUX-SECTIONS: _Z3foob.__part.3: -; LINUX-SECTIONS-FUNCTION-SECTION: .section .text._Z3zipb,"ax",@progbits -; LINUX-SECIONS-NO-FUNCTION-SECTION-NOT: .section .text._Z3zipb,"ax",@progbits +; LINUX-SECTIONS-FUNCTION-SECTION: .section .text._Z3zipb,"ax",@progbits +; LINUX-SECTIONS-NO-FUNCTION-SECTION-NOT: .section .text{{.*}}._Z3zipb,"ax",@progbits ; LINUX-SECTIONS: _Z3zipb: -; LINUX-SECTIONS-NOT: .section .text._Z3zipb._Z3zipb.__part.{{[0-9]+}},"ax",@progbits +; LINUX-SECTIONS-NOT: .section .text{{.*}}._Z3zipb.__part.{{[0-9]+}},"ax",@progbits ; LINUX-SECTIONS-NOT: _Z3zipb.__part.{{[0-9]+}}: diff --git a/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll b/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll index fd6522236987d9..32288095361fef 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll @@ -30,17 +30,17 @@ declare i32 @_Z3foov() #1 ; Check that the correct block is found using the call insts for foo and bar. ; -; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: ; Check that the basic block with id 1 doesn't get a section. -; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.__part.{{[0-9]+}},"ax",@progbits +; LINUX-SECTIONS-NOT: .section .text{{.*}}._Z3bazb.__part.{{[0-9]+}},"ax",@progbits ; LINUX-SECTIONS-LABEL: # %bb.1: ; LINUX-SECTIONS-NEXT: callq _Z3barv -; LINUX-SECTIONS: .section .text._Z3bazb.[[SECTION_LABEL:_Z3bazb.__part.[0-9]+]],"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3bazb.[[SECTION_LABEL:_Z3bazb.__part.[0-9]+]],"ax",@progbits ; LINUX-SECTIONS-NEXT: [[SECTION_LABEL]]: ; LINUX-SECTIONS-NEXT: callq _Z3foov ; LINUX-SECTIONS: .LBB_END0_2: ; LINUX-SECTIONS-NEXT: .size [[SECTION_LABEL]], .LBB_END0_2-[[SECTION_LABEL]] -; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: .Lfunc_end0: ; LINUX-SECTIONS-NEXT: .size _Z3bazb, .Lfunc_end0-_Z3bazb diff --git a/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll b/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll index 7c5bddedf3ec5c..d481b147662dce 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll @@ -15,5 +15,5 @@ define dso_local i32 @foo(i1 zeroext %0, i1 zeroext %1) !annotation !1 { !1 = !{!"instr_prof_hash_mismatch"} -; SOURCE-DRIFT-NOT: .section .text -; HASH-CHECK-DISABLED: .section .text +; SOURCE-DRIFT-NOT: .section .text{{.*}}.foo +; HASH-CHECK-DISABLED: .section .text.hot.foo diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 8552ebc348cbb9..704641087667fc 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -21,6 +21,7 @@ ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Basic Block Sections Profile Reader ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager From d1440ccaa5bce1726da283759e95ef99a25d4276 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Thu, 31 Mar 2022 13:55:43 -0700 Subject: [PATCH 657/908] [flang] expand the this_image test coverage Add a test with a range of this_image() intrinsic function invocations, including a comprehensive set of standard-conforming keyword and non-keyword arguments with and without optional arguments present and with argument positions covering all possible orderings. Also test that several non-conforming this_image() invocations generate the correct error messages. Differential Revision: https://reviews.llvm.org/D123331 --- .../{this_image.f90 => this_image01.f90} | 0 flang/test/Semantics/this_image02.f90 | 128 ++++++++++++++++++ 2 files changed, 128 insertions(+) rename flang/test/Semantics/{this_image.f90 => this_image01.f90} (100%) create mode 100644 flang/test/Semantics/this_image02.f90 diff --git a/flang/test/Semantics/this_image.f90 b/flang/test/Semantics/this_image01.f90 similarity index 100% rename from flang/test/Semantics/this_image.f90 rename to flang/test/Semantics/this_image01.f90 diff --git a/flang/test/Semantics/this_image02.f90 b/flang/test/Semantics/this_image02.f90 new file mode 100644 index 00000000000000..b5871c3d4b7273 --- /dev/null +++ b/flang/test/Semantics/this_image02.f90 @@ -0,0 +1,128 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! XFAIL: * +! Check for semantic errors in this_image() function calls + +program this_image_tests + use iso_fortran_env, only : team_type + implicit none + + !ERROR: Coarray 'team_coarray' may not have type TEAM_TYPE, C_PTR, or C_FUNPTR + type(team_type) team_coarray[*] + type(team_type) home, league(2) + integer n, i, array(1), non_coarray(1), co_array[*] + integer, allocatable :: images(:) + logical non_integer + + !___ standard-conforming statement with no optional arguments present ___ + n = this_image() + + !___ standard-conforming statements with team argument present ___ + n = this_image(home) + n = this_image(team=home) + n = this_image(league(1)) + + !___ standard-conforming statements with coarray argument present ___ + images = this_image(co_array) + images = this_image(coarray=co_array) + + !___ standard-conforming statements with coarray and team arguments present ___ + images = this_image(co_array, home) + images = this_image(co_array, team=home) + images = this_image(team_coarray, team=home) + images = this_image(team_coarray[1], team=home) + images = this_image(coarray=co_array, team=home) + images = this_image(team=home, coarray=co_array) + + !___ standard-conforming statements with coarray and dim arguments present ___ + n = this_image(co_array, i) + n = this_image(co_array, dim=i) + n = this_image(coarray=co_array, dim=i) + n = this_image(dim=i, coarray=co_array) + + !___ standard-conforming statements with all arguments present ___ + n = this_image(co_array, i, home) + n = this_image(co_array, i, team=home) + n = this_image(co_array, dim=i, team=home) + n = this_image(co_array, team=home, dim=i) + + n = this_image(coarray=co_array, dim=i, team=home) + n = this_image(coarray=co_array, team=home, dim=i) + + n = this_image(dim=i, coarray=co_array, team=home) + n = this_image(dim=i, team=home, coarray=co_array) + + n = this_image(team=home, dim=i, coarray=co_array) + n = this_image(team=home, coarray=co_array, dim=i) + + !___ non-conforming statements ___ + + !ERROR: TBD + n = this_image(co_array) + + !ERROR: missing mandatory 'dim=' argument + n = this_image(i) + + !ERROR: missing mandatory 'coarray=' argument + n = this_image(dim=i) + + !ERROR: Actual argument for 'dim=' has bad type 'team_type' + n = this_image(i, home) + + !ERROR: missing mandatory 'dim=' argument + n = this_image(i, team=home) + + !ERROR: TBD + n = this_image(coarray=co_array, dim=2) + + !ERROR: missing mandatory 'coarray=' argument + n = this_image(dim=i, team=home) + + !ERROR: missing mandatory 'coarray=' argument + n = this_image(team=home, dim=i) + + ! Doesn't produce an error + n = this_image(coarray=co_array, i) + + !ERROR: No explicit type declared for 'team' + images = this_image(coarray=co_array, team) + + ! non-scalar team_type argument + !ERROR: missing mandatory 'coarray=' argument + n = this_image(team=league) + + ! incorrectly typed argument + !ERROR: missing mandatory 'dim=' argument + n = this_image(3.4) + + !ERROR: too many actual arguments for intrinsic 'this_image' + n = this_image(co_array, i, home, 0) + + ! keyword argument with incorrect type + !ERROR: missing mandatory 'dim=' argument + images = this_image(coarray=non_coarray) + + ! incorrect keyword argument name but valid type (type number) + !ERROR: unknown keyword argument to intrinsic 'this_image' + images = this_image(co_array=co_array) + + ! incorrect keyword argument name but valid type (team_type) + !ERROR: unknown keyword argument to intrinsic 'this_image' + n = this_image(my_team=home) + + ! correct keyword argument name but mismatched type + !ERROR: Actual argument for 'team=' has bad type 'INTEGER(4)' + n = this_image(co_array, i, team=-1) + + !ERROR: 'dim=' argument has unacceptable rank 1 + n = this_image(co_array, array ) + + !ERROR: unknown keyword argument to intrinsic 'this_image' + n = this_image(co_array, dims=i) + + !ERROR: Actual argument for 'dim=' has bad type 'LOGICAL(4)' + n = this_image(co_array, non_integer) + + ! A this_image reference with a coarray argument of team type shall also have a team argument + ! Doesn't produce an error + images = this_image(team_coarray) +end program this_image_tests From 5451c4b4fb5448ac256327db4f78c996114ca4ec Mon Sep 17 00:00:00 2001 From: Chelsea Cassanova Date: Thu, 26 May 2022 16:57:57 -0700 Subject: [PATCH 658/908] [lldb/fuzzer] Moving target fuzzer into separate subdirectory Moving lldb-target-fuzzer into its own subdirectory for better organization and modularity. Differential revision: https://reviews.llvm.org/D126507 --- lldb/tools/lldb-fuzzer/CMakeLists.txt | 18 +----------------- .../lldb-target-fuzzer/CMakeLists.txt | 17 +++++++++++++++++ .../lldb-target-fuzzer.cpp | 2 +- 3 files changed, 19 insertions(+), 18 deletions(-) create mode 100644 lldb/tools/lldb-fuzzer/lldb-target-fuzzer/CMakeLists.txt rename lldb/tools/lldb-fuzzer/{ => lldb-target-fuzzer}/lldb-target-fuzzer.cpp (97%) diff --git a/lldb/tools/lldb-fuzzer/CMakeLists.txt b/lldb/tools/lldb-fuzzer/CMakeLists.txt index b87e08720b59b1..326c69a29dac19 100644 --- a/lldb/tools/lldb-fuzzer/CMakeLists.txt +++ b/lldb/tools/lldb-fuzzer/CMakeLists.txt @@ -1,18 +1,2 @@ +add_subdirectory(lldb-target-fuzzer) add_subdirectory(utils) - -set(LLVM_LINK_COMPONENTS - Support - ) - -add_llvm_fuzzer(lldb-target-fuzzer - EXCLUDE_FROM_ALL - lldb-target-fuzzer.cpp - ) - -if(TARGET lldb-target-fuzzer) - target_link_libraries(lldb-target-fuzzer - PRIVATE - liblldb - lldbFuzzerUtils - ) -endif() diff --git a/lldb/tools/lldb-fuzzer/lldb-target-fuzzer/CMakeLists.txt b/lldb/tools/lldb-fuzzer/lldb-target-fuzzer/CMakeLists.txt new file mode 100644 index 00000000000000..2c5b09a2571100 --- /dev/null +++ b/lldb/tools/lldb-fuzzer/lldb-target-fuzzer/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_llvm_fuzzer(lldb-target-fuzzer + EXCLUDE_FROM_ALL + lldb-target-fuzzer.cpp + ) + +if(TARGET lldb-target-fuzzer) + target_include_directories(lldb-target-fuzzer PRIVATE ..) + target_link_libraries(lldb-target-fuzzer + PRIVATE + liblldb + lldbFuzzerUtils + ) +endif() diff --git a/lldb/tools/lldb-fuzzer/lldb-target-fuzzer.cpp b/lldb/tools/lldb-fuzzer/lldb-target-fuzzer/lldb-target-fuzzer.cpp similarity index 97% rename from lldb/tools/lldb-fuzzer/lldb-target-fuzzer.cpp rename to lldb/tools/lldb-fuzzer/lldb-target-fuzzer/lldb-target-fuzzer.cpp index 82487bb224f09d..b7103789f5a570 100644 --- a/lldb/tools/lldb-fuzzer/lldb-target-fuzzer.cpp +++ b/lldb/tools/lldb-fuzzer/lldb-target-fuzzer/lldb-target-fuzzer.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include +#include "utils/TempFile.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBTarget.h" From edcd06ba8b4123af83274845316b037b3864f8cb Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Thu, 26 May 2022 17:18:32 -0700 Subject: [PATCH 659/908] [test/ClangScanDeps] Add a target triple for `macro-expansions.cpp` This should fix the `clang-ppc64-aix` builder. --- clang/test/ClangScanDeps/macro-expansions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/ClangScanDeps/macro-expansions.cpp b/clang/test/ClangScanDeps/macro-expansions.cpp index 78415d6716c897..c8c21f87e6007f 100644 --- a/clang/test/ClangScanDeps/macro-expansions.cpp +++ b/clang/test/ClangScanDeps/macro-expansions.cpp @@ -15,7 +15,7 @@ //--- cdb.json.template [{ "directory" : "DIR", - "command" : "clang -c DIR/test.cpp -o DIR/test.o", + "command" : "clang -target x86_64-apple-macosx10.7 -c DIR/test.cpp -o DIR/test.o", "file" : "DIR/test.o" }] From 2046e11ac493818ef782d71a841275c10f3834c5 Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Thu, 26 May 2022 12:31:58 -0700 Subject: [PATCH 660/908] [mlir][sparse] Improving ExecutionEngine/SparseTensorUtils.h This change makes the public API of SparseTensorUtils.cpp explicit, whereas before the publicity of these functions was only implicit. Implicit publicity is sufficient for mlir-opt to generate calls to these functions, but it's not enough to enable C/C++ code to call them directly in the usual way (i.e., without going through codegen). Thus, leaving the publicity implicit prevents development of other tools (e.g., microbenchmarks). In addition this change also marks the functions MLIR_CRUNNERUTILS_EXPORT, which is required by the JIT under certain configurations (albeit not for anything in our test suite). Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D126105 --- .../mlir/ExecutionEngine/SparseTensorUtils.h | 261 +++++++++++++++++- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 160 ++--------- .../llvm-project-overlay/mlir/BUILD.bazel | 1 + 3 files changed, 285 insertions(+), 137 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h index 85549121ff03c7..e733544eea35f7 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h @@ -6,22 +6,38 @@ // //===----------------------------------------------------------------------===// // -// This header file defines several enums shared between -// Transforms/SparseTensorConversion.cpp and ExecutionEngine/SparseUtils.cpp +// This header file provides the enums and functions which comprise the +// public API of `ExecutionEngine/SparseUtils.cpp`. // //===----------------------------------------------------------------------===// #ifndef MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H_ #define MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H_ +#include "mlir/ExecutionEngine/CRunnerUtils.h" + #include +#include +#include extern "C" { +//===----------------------------------------------------------------------===// +// +// Typedefs and enums. These are required to be public so that they +// can be shared with `Transforms/SparseTensorConversion.cpp`, since +// they define the arguments to the public functions declared later on. +// +// This section also defines x-macros +// so that we can generate variations of the public functions for each +// supported primary- and/or overhead-type. +// +//===----------------------------------------------------------------------===// + /// This type is used in the public API at all places where MLIR expects /// values with the built-in type "index". For now, we simply assume that -/// type is 64-bit, but targets with different "index" bit widths should link -/// with an alternatively built runtime support library. +/// type is 64-bit, but targets with different "index" bit widths should +/// link with an alternatively built runtime support library. // TODO: support such targets? using index_type = uint64_t; @@ -35,6 +51,32 @@ enum class OverheadType : uint32_t { kU8 = 4 }; +// This x-macro calls its argument on every overhead type which has +// fixed-width. It excludes `index_type` because that type is often +// handled specially (e.g., by translating it into the architecture-dependent +// equivalent fixed-width overhead type). +#define FOREVERY_FIXED_O(DO) \ + DO(64, uint64_t) \ + DO(32, uint32_t) \ + DO(16, uint16_t) \ + DO(8, uint8_t) + +// This x-macro calls its argument on every overhead type, including +// `index_type`. Our naming convention uses an empty suffix for +// `index_type`, so the missing first argument when we call `DO` +// gets resolved to the empty token which can then be concatenated +// as intended. (This behavior is standard per C99 6.10.3/4 and +// C++11 N3290 16.3/4; whereas in C++03 16.3/10 it was undefined behavior.) +#define FOREVERY_O(DO) \ + FOREVERY_FIXED_O(DO) \ + DO(, index_type) + +// These are not just shorthands but indicate the particular +// implementation used (e.g., as opposed to C99's `complex double`, +// or MLIR's `ComplexType`). +using complex64 = std::complex; +using complex32 = std::complex; + /// Encoding of the elemental type, for "overloading" @newSparseTensor. enum class PrimaryType : uint32_t { kF64 = 1, @@ -47,6 +89,30 @@ enum class PrimaryType : uint32_t { kC32 = 8 }; +// This x-macro only specifies the non-complex `V` types, because the ABI +// for complex types has compiler-/architecture-dependent details we need +// to work around. Namely, when a function takes a parameter of C/C++ +// type `complex32` (per se), then there is additional padding that causes +// it not to match the LLVM type `!llvm.struct<(f32, f32)>`. This only +// happens with the `complex32` type itself, not with pointers/arrays +// of complex values. We also exclude `complex64` because it is in +// principle susceptible to analogous ABI issues (even though we haven't +// yet encountered them in practice). +#define FOREVERY_SIMPLEX_V(DO) \ + DO(F64, double) \ + DO(F32, float) \ + DO(I64, int64_t) \ + DO(I32, int32_t) \ + DO(I16, int16_t) \ + DO(I8, int8_t) + +// This x-macro includes all `V` types, for when the aforementioned ABI +// issues don't apply (e.g., because the functions take pointers/arrays). +#define FOREVERY_V(DO) \ + FOREVERY_SIMPLEX_V(DO) \ + DO(C64, complex64) \ + DO(C32, complex32) + /// The actions performed by @newSparseTensor. enum class Action : uint32_t { kEmpty = 0, @@ -67,6 +133,193 @@ enum class DimLevelType : uint8_t { kSingleton = 2 }; +//===----------------------------------------------------------------------===// +// +// Public functions which operate on MLIR buffers (memrefs) to interact +// with sparse tensors (which are only visible as opaque pointers externally). +// Because these functions deal with memrefs, they should only be used +// by MLIR compiler-generated code (or code similarly guaranteed to remain +// in sync with MLIR; e.g., internal development tools like benchmarks). +// +// Where appropriate, we use macros to generate all variations of these +// functions for each supported primary- and overhead-type. +// +//===----------------------------------------------------------------------===// + +/// The @newSparseTensor function for constructing a new sparse tensor. +/// This is the "swiss army knife" method for materializing sparse +/// tensors into the computation. The types of the `ptr` argument and +/// the result depend on the action, as explained in the following table +/// (where "STS" means a sparse-tensor-storage object, and "COO" means +/// a coordinate-scheme object). +/// +/// Action: `ptr`: Returns: +/// kEmpty unused STS, empty +/// kEmptyCOO unused COO, empty +/// kFromFile char* filename STS, read from the file +/// kFromCOO COO STS, copied from the COO source +/// kToCOO STS COO, copied from the STS source +/// kSparseToSparse STS STS, copied from the STS source +/// kToIterator STS COO-Iterator, call @getNext to use +MLIR_CRUNNERUTILS_EXPORT void * +_mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT + StridedMemRefType *sref, + StridedMemRefType *pref, + OverheadType ptrTp, OverheadType indTp, + PrimaryType valTp, Action action, void *ptr); + +/// Tensor-storage method to obtain direct access to the values array. +#define DECL_SPARSEVALUES(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_sparseValues##VNAME( \ + StridedMemRefType *out, void *tensor); +FOREVERY_V(DECL_SPARSEVALUES) +#undef DECL_SPARSEVALUES + +/// Tensor-storage method to obtain direct access to the pointers array +/// for the given dimension. +#define DECL_SPARSEPOINTERS(PNAME, P) \ + MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_sparsePointers##PNAME( \ + StridedMemRefType *out, void *tensor, index_type d); +FOREVERY_O(DECL_SPARSEPOINTERS) +#undef DECL_SPARSEPOINTERS + +/// Tensor-storage method to obtain direct access to the indices array +/// for the given dimension. +#define DECL_SPARSEINDICES(INAME, I) \ + MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_sparseIndices##INAME( \ + StridedMemRefType *out, void *tensor, index_type d); +FOREVERY_O(DECL_SPARSEINDICES) +#undef DECL_SPARSEINDICES + +/// Coordinate-scheme method for adding a new element. +#define DECL_ADDELT(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_addElt##VNAME( \ + void *coo, V value, StridedMemRefType *iref, \ + StridedMemRefType *pref); +FOREVERY_SIMPLEX_V(DECL_ADDELT) +DECL_ADDELT(C64, complex64) +#undef DECL_ADDELT +// Explicitly unpack the `complex32` into a pair of `float` arguments, +// to work around ABI issues. +// TODO: cleaner way to avoid ABI padding problem? +MLIR_CRUNNERUTILS_EXPORT void * +_mlir_ciface_addEltC32(void *coo, float r, float i, + StridedMemRefType *iref, + StridedMemRefType *pref); + +/// Coordinate-scheme method for getting the next element while iterating. +#define DECL_GETNEXT(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT bool _mlir_ciface_getNext##VNAME( \ + void *coo, StridedMemRefType *iref, \ + StridedMemRefType *vref); +FOREVERY_V(DECL_GETNEXT) +#undef DECL_GETNEXT + +/// Tensor-storage method to insert elements in lexicographical index order. +#define DECL_LEXINSERT(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_lexInsert##VNAME( \ + void *tensor, StridedMemRefType *cref, V val); +FOREVERY_SIMPLEX_V(DECL_LEXINSERT) +DECL_LEXINSERT(C64, complex64) +#undef DECL_LEXINSERT +// Explicitly unpack the `complex32` into a pair of `float` arguments, +// to work around ABI issues. +// TODO: cleaner way to avoid ABI padding problem? +MLIR_CRUNNERUTILS_EXPORT void +_mlir_ciface_lexInsertC32(void *tensor, StridedMemRefType *cref, + float r, float i); + +/// Tensor-storage method to insert using expansion. +#define DECL_EXPINSERT(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_expInsert##VNAME( \ + void *tensor, StridedMemRefType *cref, \ + StridedMemRefType *vref, StridedMemRefType *fref, \ + StridedMemRefType *aref, index_type count); +FOREVERY_V(DECL_EXPINSERT) +#undef DECL_EXPINSERT + +//===----------------------------------------------------------------------===// +// +// Public functions which accept only C-style data structures to interact +// with sparse tensors (which are only visible as opaque pointers externally). +// These functions can be used both by MLIR compiler-generated code +// as well as by any external runtime that wants to interact with MLIR +// compiler-generated code. +// +//===----------------------------------------------------------------------===// + +/// Tensor-storage method to get the size of the given dimension. +MLIR_CRUNNERUTILS_EXPORT index_type sparseDimSize(void *tensor, index_type d); + +/// Tensor-storage method to finalize lexicographic insertions. +MLIR_CRUNNERUTILS_EXPORT void endInsert(void *tensor); + +/// Coordinate-scheme method to write to file in extended FROSTT format. +#define DECL_OUTSPARSETENSOR(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT void outSparseTensor##VNAME(void *coo, void *dest, \ + bool sort); +FOREVERY_V(DECL_OUTSPARSETENSOR) +#undef DECL_OUTSPARSETENSOR + +/// Releases the memory for the tensor-storage object. +void delSparseTensor(void *tensor); + +/// Releases the memory for the coordinate-scheme object. +#define DECL_DELCOO(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT void delSparseTensorCOO##VNAME(void *coo); +FOREVERY_V(DECL_DELCOO) +#undef DECL_DELCOO + +/// Helper function to read a sparse tensor filename from the environment, +/// defined with the naming convention ${TENSOR0}, ${TENSOR1}, etc. +MLIR_CRUNNERUTILS_EXPORT char *getTensorFilename(index_type id); + +/// Initializes sparse tensor from a COO-flavored format expressed using +/// C-style data structures. The expected parameters are: +/// +/// rank: rank of tensor +/// nse: number of specified elements (usually the nonzeros) +/// shape: array with dimension size for each rank +/// values: a "nse" array with values for all specified elements +/// indices: a flat "nse * rank" array with indices for all specified elements +/// perm: the permutation of the dimensions in the storage +/// sparse: the sparsity for the dimensions +/// +/// For example, the sparse matrix +/// | 1.0 0.0 0.0 | +/// | 0.0 5.0 3.0 | +/// can be passed as +/// rank = 2 +/// nse = 3 +/// shape = [2, 3] +/// values = [1.0, 5.0, 3.0] +/// indices = [ 0, 0, 1, 1, 1, 2] +#define DECL_CONVERTTOMLIRSPARSETENSOR(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT void *convertToMLIRSparseTensor##VNAME( \ + uint64_t rank, uint64_t nse, uint64_t *shape, V *values, \ + uint64_t *indices, uint64_t *perm, uint8_t *sparse); +FOREVERY_V(DECL_CONVERTTOMLIRSPARSETENSOR) +#undef DECL_CONVERTTOMLIRSPARSETENSOR + +/// Converts a sparse tensor to COO-flavored format expressed using +/// C-style data structures. The expected output parameters are pointers +/// for these values: +/// +/// rank: rank of tensor +/// nse: number of specified elements (usually the nonzeros) +/// shape: array with dimension size for each rank +/// values: a "nse" array with values for all specified elements +/// indices: a flat "nse * rank" array with indices for all specified elements +/// +/// The input is a pointer to `SparseTensorStorage`, typically +/// returned from `convertToMLIRSparseTensor`. +#define DECL_CONVERTFROMMLIRSPARSETENSOR(VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT void convertFromMLIRSparseTensor##VNAME( \ + void *tensor, uint64_t *pRank, uint64_t *pNse, uint64_t **pShape, \ + V **pValues, uint64_t **pIndices); +FOREVERY_V(DECL_CONVERTFROMMLIRSPARSETENSOR) +#undef DECL_CONVERTFROMMLIRSPARSETENSOR + } // extern "C" #endif // MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H_ diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index c46ad463c3cfdc..f66b87c1c74d78 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -15,15 +15,12 @@ //===----------------------------------------------------------------------===// #include "mlir/ExecutionEngine/SparseTensorUtils.h" -#include "mlir/ExecutionEngine/CRunnerUtils.h" #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS #include #include -#include #include -#include #include #include #include @@ -32,10 +29,6 @@ #include #include #include -#include - -using complex64 = std::complex; -using complex32 = std::complex; //===----------------------------------------------------------------------===// // @@ -246,49 +239,6 @@ struct SparseTensorCOO final { unsigned iteratorPos = 0; }; -// See -// -// `FOREVERY_SIMPLEX_V` only specifies the non-complex `V` types, because -// the ABI for complex types has compiler/architecture dependent complexities -// we need to work around. Namely, when a function takes a parameter of -// C/C++ type `complex32` (per se), then there is additional padding that -// causes it not to match the LLVM type `!llvm.struct<(f32, f32)>`. This -// only happens with the `complex32` type itself, not with pointers/arrays -// of complex values. So far `complex64` doesn't exhibit this ABI -// incompatibility, but we exclude it anyways just to be safe. -#define FOREVERY_SIMPLEX_V(DO) \ - DO(F64, double) \ - DO(F32, float) \ - DO(I64, int64_t) \ - DO(I32, int32_t) \ - DO(I16, int16_t) \ - DO(I8, int8_t) - -#define FOREVERY_V(DO) \ - FOREVERY_SIMPLEX_V(DO) \ - DO(C64, complex64) \ - DO(C32, complex32) - -// This x-macro calls its argument on every overhead type which has -// fixed-width. It excludes `index_type` because that type is often -// handled specially (e.g., by translating it into the architecture-dependent -// equivalent fixed-width overhead type). -#define FOREVERY_FIXED_O(DO) \ - DO(64, uint64_t) \ - DO(32, uint32_t) \ - DO(16, uint16_t) \ - DO(8, uint8_t) - -// This x-macro calls its argument on every overhead type, including -// `index_type`. Our naming convention uses an empty suffix for -// `index_type`, so the missing first argument when we call `DO` -// gets resolved to the empty token which can then be concatenated -// as intended. (This behavior is standard per C99 6.10.3/4 and -// C++11 N3290 16.3/4; whereas in C++03 16.3/10 it was undefined behavior.) -#define FOREVERY_O(DO) \ - FOREVERY_FIXED_O(DO) \ - DO(, index_type) - // Forward. template class SparseTensorEnumeratorBase; @@ -1267,7 +1217,7 @@ static SparseTensorCOO *openSparseTensorCOO(char *filename, uint64_t rank, return tensor; } -/// Writes the sparse tensor to extended FROSTT format. +/// Writes the sparse tensor to `dest` in extended FROSTT format. template static void outSparseTensor(void *tensor, void *dest, bool sort) { assert(tensor && dest); @@ -1372,18 +1322,14 @@ static void fromMLIRSparseTensor(void *tensor, uint64_t *pRank, uint64_t *pNse, *pIndices = indices; } -} // namespace +} // anonymous namespace extern "C" { //===----------------------------------------------------------------------===// // -// Public API with methods that operate on MLIR buffers (memrefs) to interact -// with sparse tensors, which are only visible as opaque pointers externally. -// These methods should be used exclusively by MLIR compiler-generated code. -// -// Some macro magic is used to generate implementations for all required type -// combinations that can be called from MLIR compiler-generated code. +// Public functions which operate on MLIR buffers (memrefs) to interact +// with sparse tensors (which are only visible as opaque pointers externally). // //===----------------------------------------------------------------------===// @@ -1429,16 +1375,6 @@ extern "C" { static_assert(std::is_same::value, "Expected index_type == uint64_t"); -/// Constructs a new sparse tensor. This is the "swiss army knife" -/// method for materializing sparse tensors into the computation. -/// -/// Action: -/// kEmpty = returns empty storage to fill later -/// kFromFile = returns storage, where ptr contains filename to read -/// kFromCOO = returns storage, where ptr contains coordinate scheme to assign -/// kEmptyCOO = returns empty coordinate scheme to fill and use with kFromCOO -/// kToCOO = returns coordinate scheme from storage in ptr to use with kFromCOO -/// kToIterator = returns iterator from storage in ptr (call getNext() to use) void * _mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT StridedMemRefType *sref, @@ -1534,12 +1470,15 @@ _mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT CASE_SECSAME(OverheadType::kU64, PrimaryType::kI32, uint64_t, int32_t); CASE_SECSAME(OverheadType::kU64, PrimaryType::kI16, uint64_t, int16_t); CASE_SECSAME(OverheadType::kU64, PrimaryType::kI8, uint64_t, int8_t); + CASE_SECSAME(OverheadType::kU32, PrimaryType::kI64, uint32_t, int64_t); CASE_SECSAME(OverheadType::kU32, PrimaryType::kI32, uint32_t, int32_t); CASE_SECSAME(OverheadType::kU32, PrimaryType::kI16, uint32_t, int16_t); CASE_SECSAME(OverheadType::kU32, PrimaryType::kI8, uint32_t, int8_t); + CASE_SECSAME(OverheadType::kU16, PrimaryType::kI64, uint16_t, int64_t); CASE_SECSAME(OverheadType::kU16, PrimaryType::kI32, uint16_t, int32_t); CASE_SECSAME(OverheadType::kU16, PrimaryType::kI16, uint16_t, int16_t); CASE_SECSAME(OverheadType::kU16, PrimaryType::kI8, uint16_t, int8_t); + CASE_SECSAME(OverheadType::kU8, PrimaryType::kI64, uint8_t, int64_t); CASE_SECSAME(OverheadType::kU8, PrimaryType::kI32, uint8_t, int32_t); CASE_SECSAME(OverheadType::kU8, PrimaryType::kI16, uint8_t, int16_t); CASE_SECSAME(OverheadType::kU8, PrimaryType::kI8, uint8_t, int8_t); @@ -1557,7 +1496,6 @@ _mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT #undef CASE #undef CASE_SECSAME -/// Methods that provide direct access to values. #define IMPL_SPARSEVALUES(VNAME, V) \ void _mlir_ciface_sparseValues##VNAME(StridedMemRefType *ref, \ void *tensor) { \ @@ -1583,20 +1521,17 @@ FOREVERY_V(IMPL_SPARSEVALUES) ref->sizes[0] = v->size(); \ ref->strides[0] = 1; \ } -/// Methods that provide direct access to pointers. #define IMPL_SPARSEPOINTERS(PNAME, P) \ IMPL_GETOVERHEAD(sparsePointers##PNAME, P, getPointers) FOREVERY_O(IMPL_SPARSEPOINTERS) #undef IMPL_SPARSEPOINTERS -/// Methods that provide direct access to indices. #define IMPL_SPARSEINDICES(INAME, I) \ IMPL_GETOVERHEAD(sparseIndices##INAME, I, getIndices) FOREVERY_O(IMPL_SPARSEINDICES) #undef IMPL_SPARSEINDICES #undef IMPL_GETOVERHEAD -/// Helper to add value to coordinate scheme, one per value type. #define IMPL_ADDELT(VNAME, V) \ void *_mlir_ciface_addElt##VNAME(void *coo, V value, \ StridedMemRefType *iref, \ @@ -1614,18 +1549,19 @@ FOREVERY_O(IMPL_SPARSEINDICES) return coo; \ } FOREVERY_SIMPLEX_V(IMPL_ADDELT) -// `complex64` apparently doesn't encounter any ABI issues (yet). IMPL_ADDELT(C64, complex64) -// TODO: cleaner way to avoid ABI padding problem? -IMPL_ADDELT(C32ABI, complex32) +// Marked static because it's not part of the public API. +// (The `static` keyword confuses clang-format, so the extraneous trailing +// semicolon is required to teach clang-format not to indent the prototype +// of `_mlir_ciface_addEltC32` below.) +static IMPL_ADDELT(C32ABI, complex32); +#undef IMPL_ADDELT void *_mlir_ciface_addEltC32(void *coo, float r, float i, StridedMemRefType *iref, StridedMemRefType *pref) { return _mlir_ciface_addEltC32ABI(coo, complex32(r, i), iref, pref); } -#undef IMPL_ADDELT -/// Helper to enumerate elements of coordinate scheme, one per value type. #define IMPL_GETNEXT(VNAME, V) \ bool _mlir_ciface_getNext##VNAME(void *coo, \ StridedMemRefType *iref, \ @@ -1647,7 +1583,6 @@ void *_mlir_ciface_addEltC32(void *coo, float r, float i, FOREVERY_V(IMPL_GETNEXT) #undef IMPL_GETNEXT -/// Insert elements in lexicographical index order, one per value type. #define IMPL_LEXINSERT(VNAME, V) \ void _mlir_ciface_lexInsert##VNAME( \ void *tensor, StridedMemRefType *cref, V val) { \ @@ -1658,18 +1593,19 @@ FOREVERY_V(IMPL_GETNEXT) static_cast(tensor)->lexInsert(cursor, val); \ } FOREVERY_SIMPLEX_V(IMPL_LEXINSERT) -// `complex64` apparently doesn't encounter any ABI issues (yet). IMPL_LEXINSERT(C64, complex64) -// TODO: cleaner way to avoid ABI padding problem? -IMPL_LEXINSERT(C32ABI, complex32) +// Marked static because it's not part of the public API. +// (The `static` keyword confuses clang-format, so the extraneous trailing +// semicolon is required to teach clang-format not to indent the prototype +// of `_mlir_ciface_lexInsertC32` below.) +static IMPL_LEXINSERT(C32ABI, complex32); +#undef IMPL_LEXINSERT void _mlir_ciface_lexInsertC32(void *tensor, StridedMemRefType *cref, float r, float i) { _mlir_ciface_lexInsertC32ABI(tensor, cref, complex32(r, i)); } -#undef IMPL_LEXINSERT -/// Insert using expansion, one per value type. #define IMPL_EXPINSERT(VNAME, V) \ void _mlir_ciface_expInsert##VNAME( \ void *tensor, StridedMemRefType *cref, \ @@ -1691,7 +1627,7 @@ void _mlir_ciface_lexInsertC32(void *tensor, FOREVERY_V(IMPL_EXPINSERT) #undef IMPL_EXPINSERT -/// Output a sparse tensor, one per value type. +// TODO: move this into the next section, since it doesn't depend on memrefs. #define IMPL_OUTSPARSETENSOR(VNAME, V) \ void outSparseTensor##VNAME(void *coo, void *dest, bool sort) { \ return outSparseTensor(coo, dest, sort); \ @@ -1701,15 +1637,13 @@ FOREVERY_V(IMPL_OUTSPARSETENSOR) //===----------------------------------------------------------------------===// // -// Public API with methods that accept C-style data structures to interact -// with sparse tensors, which are only visible as opaque pointers externally. -// These methods can be used both by MLIR compiler-generated code as well as by -// an external runtime that wants to interact with MLIR compiler-generated code. +// Public functions which accept only C-style data structures to interact +// with sparse tensors (which are only visible as opaque pointers externally). // //===----------------------------------------------------------------------===// -/// Helper method to read a sparse tensor filename from the environment, -/// defined with the naming convention ${TENSOR0}, ${TENSOR1}, etc. +// TODO: move this lower down (after `delSparseTensorCOO`) since it's +// independent of our sparse-tensor formats. char *getTensorFilename(index_type id) { char var[80]; sprintf(var, "TENSOR%" PRIu64, id); @@ -1719,22 +1653,18 @@ char *getTensorFilename(index_type id) { return env; } -/// Returns size of sparse tensor in given dimension. index_type sparseDimSize(void *tensor, index_type d) { return static_cast(tensor)->getDimSize(d); } -/// Finalizes lexicographic insertions. void endInsert(void *tensor) { return static_cast(tensor)->endInsert(); } -/// Releases sparse tensor storage. void delSparseTensor(void *tensor) { delete static_cast(tensor); } -/// Releases sparse tensor coordinate scheme. #define IMPL_DELCOO(VNAME, V) \ void delSparseTensorCOO##VNAME(void *coo) { \ delete static_cast *>(coo); \ @@ -1742,29 +1672,7 @@ void delSparseTensor(void *tensor) { FOREVERY_V(IMPL_DELCOO) #undef IMPL_DELCOO -/// Initializes sparse tensor from a COO-flavored format expressed using C-style -/// data structures. The expected parameters are: -/// -/// rank: rank of tensor -/// nse: number of specified elements (usually the nonzeros) -/// shape: array with dimension size for each rank -/// values: a "nse" array with values for all specified elements -/// indices: a flat "nse x rank" array with indices for all specified elements -/// perm: the permutation of the dimensions in the storage -/// sparse: the sparsity for the dimensions -/// -/// For example, the sparse matrix -/// | 1.0 0.0 0.0 | -/// | 0.0 5.0 3.0 | -/// can be passed as -/// rank = 2 -/// nse = 3 -/// shape = [2, 3] -/// values = [1.0, 5.0, 3.0] -/// indices = [ 0, 0, 1, 1, 1, 2] -// // TODO: generalize beyond 64-bit indices. -// #define IMPL_CONVERTTOMLIRSPARSETENSOR(VNAME, V) \ void *convertToMLIRSparseTensor##VNAME( \ uint64_t rank, uint64_t nse, uint64_t *shape, V *values, \ @@ -1775,26 +1683,12 @@ FOREVERY_V(IMPL_DELCOO) FOREVERY_V(IMPL_CONVERTTOMLIRSPARSETENSOR) #undef IMPL_CONVERTTOMLIRSPARSETENSOR -/// Converts a sparse tensor to COO-flavored format expressed using C-style -/// data structures. The expected output parameters are pointers for these -/// values: -/// -/// rank: rank of tensor -/// nse: number of specified elements (usually the nonzeros) -/// shape: array with dimension size for each rank -/// values: a "nse" array with values for all specified elements -/// indices: a flat "nse x rank" array with indices for all specified elements -/// -/// The input is a pointer to SparseTensorStorage, typically returned -/// from convertToMLIRSparseTensor. -/// -// TODO: Currently, values are copied from SparseTensorStorage to -// SparseTensorCOO, then to the output. We may want to reduce the number of -// copies. +// TODO: Currently, values are copied from SparseTensorStorage to +// SparseTensorCOO, then to the output. We may want to reduce the number +// of copies. // // TODO: generalize beyond 64-bit indices, no dim ordering, all dimensions // compressed -// #define IMPL_CONVERTFROMMLIRSPARSETENSOR(VNAME, V) \ void convertFromMLIRSparseTensor##VNAME(void *tensor, uint64_t *pRank, \ uint64_t *pNse, uint64_t **pShape, \ diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 61171f32174be0..6dc36aeff09591 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2050,6 +2050,7 @@ cc_library( ":TensorDialect", ":Transforms", ":VectorOps", + ":mlir_c_runner_utils", "//llvm:Support", ], ) From 05c17bc4bb632a3a6080550447b5001736979c2a Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Thu, 26 May 2022 12:38:58 -0700 Subject: [PATCH 661/908] [mlir][sparse] Moving some functions around This is a followup to D126105 to move functions in SparseTensorUtils.cpp to match their locations in SparseTensorUtils.h Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D126106 --- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index f66b87c1c74d78..ccc8c7a19387ae 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -1627,14 +1627,6 @@ void _mlir_ciface_lexInsertC32(void *tensor, FOREVERY_V(IMPL_EXPINSERT) #undef IMPL_EXPINSERT -// TODO: move this into the next section, since it doesn't depend on memrefs. -#define IMPL_OUTSPARSETENSOR(VNAME, V) \ - void outSparseTensor##VNAME(void *coo, void *dest, bool sort) { \ - return outSparseTensor(coo, dest, sort); \ - } -FOREVERY_V(IMPL_OUTSPARSETENSOR) -#undef IMPL_OUTSPARSETENSOR - //===----------------------------------------------------------------------===// // // Public functions which accept only C-style data structures to interact @@ -1642,17 +1634,6 @@ FOREVERY_V(IMPL_OUTSPARSETENSOR) // //===----------------------------------------------------------------------===// -// TODO: move this lower down (after `delSparseTensorCOO`) since it's -// independent of our sparse-tensor formats. -char *getTensorFilename(index_type id) { - char var[80]; - sprintf(var, "TENSOR%" PRIu64, id); - char *env = getenv(var); - if (!env) - FATAL("Environment variable %s is not set\n", var); - return env; -} - index_type sparseDimSize(void *tensor, index_type d) { return static_cast(tensor)->getDimSize(d); } @@ -1661,6 +1642,13 @@ void endInsert(void *tensor) { return static_cast(tensor)->endInsert(); } +#define IMPL_OUTSPARSETENSOR(VNAME, V) \ + void outSparseTensor##VNAME(void *coo, void *dest, bool sort) { \ + return outSparseTensor(coo, dest, sort); \ + } +FOREVERY_V(IMPL_OUTSPARSETENSOR) +#undef IMPL_OUTSPARSETENSOR + void delSparseTensor(void *tensor) { delete static_cast(tensor); } @@ -1672,6 +1660,15 @@ void delSparseTensor(void *tensor) { FOREVERY_V(IMPL_DELCOO) #undef IMPL_DELCOO +char *getTensorFilename(index_type id) { + char var[80]; + sprintf(var, "TENSOR%" PRIu64, id); + char *env = getenv(var); + if (!env) + FATAL("Environment variable %s is not set\n", var); + return env; +} + // TODO: generalize beyond 64-bit indices. #define IMPL_CONVERTTOMLIRSPARSETENSOR(VNAME, V) \ void *convertToMLIRSparseTensor##VNAME( \ From cef377d75df478350e0af6b0c70516a8aa8fa800 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 26 May 2022 18:13:38 -0700 Subject: [PATCH 662/908] [RISCV] Simplify code after D125905 --- llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 52e6af1224f596..e27456076b4bd6 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -232,9 +232,7 @@ class RISCVELFStreamer : public MCELFStreamer { } void reset() override { - MCTargetStreamer &TS = *getTargetStreamer(); - RISCVTargetStreamer &RTS = static_cast(TS); - RTS.reset(); + static_cast(getTargetStreamer())->reset(); MCELFStreamer::reset(); } From 3aa249329f3859dc433213f1224b2dd6a5cc8c7f Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 26 May 2022 18:43:58 -0700 Subject: [PATCH 663/908] Revert "[Propeller] Promote functions with propeller profiles to .text.hot." This reverts commit 4d8d2580c53e130c3c3dd3877384301e3c495554. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 109 ----------- llvm/include/llvm/CodeGen/Passes.h | 6 +- llvm/include/llvm/InitializePasses.h | 1 - llvm/lib/CodeGen/BasicBlockSections.cpp | 169 ++++++++++++++++-- .../BasicBlockSectionsProfileReader.cpp | 144 --------------- llvm/lib/CodeGen/CMakeLists.txt | 1 - llvm/lib/CodeGen/CodeGenPrepare.cpp | 21 +-- llvm/lib/CodeGen/TargetPassConfig.cpp | 7 +- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 - llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 4 - llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 1 - llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 - .../CodeGen/X86/basic-block-sections-cold.ll | 6 +- .../CodeGen/X86/basic-block-sections-list.ll | 18 +- .../X86/basic-block-sections-listbb.ll | 8 +- .../X86/basic-block-sections-source-drift.ll | 4 +- llvm/test/CodeGen/X86/opt-pipeline.ll | 1 - 17 files changed, 176 insertions(+), 326 deletions(-) delete mode 100644 llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h delete mode 100644 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h deleted file mode 100644 index 7ae1304cced9ad..00000000000000 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ /dev/null @@ -1,109 +0,0 @@ -//===-- BasicBlockSectionsProfileReader.h - BB sections profile reader pass ==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass creates the basic block cluster info by reading the basic block -// sections profile. The cluster info will be used by the basic-block-sections -// pass to arrange basic blocks in their sections. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H -#define LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/MemoryBuffer.h" - -using namespace llvm; - -namespace llvm { - -// The cluster information for a machine basic block. -struct BBClusterInfo { - // MachineBasicBlock ID. - unsigned MBBNumber; - // Cluster ID this basic block belongs to. - unsigned ClusterID; - // Position of basic block within the cluster. - unsigned PositionInCluster; -}; - -using ProgramBBClusterInfoMapTy = StringMap>; - -class BasicBlockSectionsProfileReader : public ImmutablePass { -public: - static char ID; - - BasicBlockSectionsProfileReader(const MemoryBuffer *Buf) - : ImmutablePass(ID), MBuf(Buf) { - initializeBasicBlockSectionsProfileReaderPass( - *PassRegistry::getPassRegistry()); - }; - - BasicBlockSectionsProfileReader() : ImmutablePass(ID) { - initializeBasicBlockSectionsProfileReaderPass( - *PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { - return "Basic Block Sections Profile Reader"; - } - - // Returns true if basic block sections profile exist for function \p - // FuncName. - bool isFunctionHot(StringRef FuncName) const; - - // Returns a pair with first element representing whether basic block sections - // profile exist for the function \p FuncName, and the second element - // representing the basic block sections profile (cluster info) for this - // function. If the first element is true and the second element is empty, it - // means unique basic block sections are desired for all basic blocks of the - // function. - std::pair> - getBBClusterInfoForFunction(StringRef FuncName) const; - - /// Read profiles of basic blocks if available here. - void initializePass() override; - -private: - StringRef getAliasName(StringRef FuncName) const { - auto R = FuncAliasMap.find(FuncName); - return R == FuncAliasMap.end() ? FuncName : R->second; - } - - // This contains the basic-block-sections profile. - const MemoryBuffer *MBuf = nullptr; - - // This encapsulates the BB cluster information for the whole program. - // - // For every function name, it contains the cluster information for (all or - // some of) its basic blocks. The cluster information for every basic block - // includes its cluster ID along with the position of the basic block in that - // cluster. - ProgramBBClusterInfoMapTy ProgramBBClusterInfo; - - // Some functions have alias names. We use this map to find the main alias - // name for which we have mapping in ProgramBBClusterInfo. - StringMap FuncAliasMap; -}; - -// Creates a BasicBlockSectionsProfileReader pass to parse the basic block -// sections profile. \p Buf is a memory buffer that contains the list of -// functions and basic block ids to selectively enable basic block sections. -ImmutablePass * -createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf); - -} // namespace llvm -#endif // LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 6e37d42f0d2944..bfd774f578ac88 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -51,8 +51,10 @@ namespace llvm { FunctionPass *createUnreachableBlockEliminationPass(); /// createBasicBlockSections Pass - This pass assigns sections to machine - /// basic blocks and is enabled with -fbasic-block-sections. - MachineFunctionPass *createBasicBlockSectionsPass(); + /// basic blocks and is enabled with -fbasic-block-sections. Buf is a memory + /// buffer that contains the list of functions and basic block ids to + /// selectively enable basic block sections. + MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf); /// createMachineFunctionSplitterPass - This pass splits machine functions /// using profile information. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index ece2f20bd4b86c..41e1d99b536136 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -76,7 +76,6 @@ void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeAttributorLegacyPassPass(PassRegistry&); void initializeAttributorCGSCCLegacyPassPass(PassRegistry &); -void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &); void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBDCELegacyPassPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 38ba651d126faf..295d557cd72713 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -69,17 +69,25 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Target/TargetMachine.h" +using llvm::SmallSet; +using llvm::SmallVector; +using llvm::StringMap; +using llvm::StringRef; using namespace llvm; // Placing the cold clusters in a separate section mitigates against poor @@ -99,11 +107,41 @@ cl::opt BBSectionsDetectSourceDrift( namespace { +// This struct represents the cluster information for a machine basic block. +struct BBClusterInfo { + // MachineBasicBlock ID. + unsigned MBBNumber; + // Cluster ID this basic block belongs to. + unsigned ClusterID; + // Position of basic block within the cluster. + unsigned PositionInCluster; +}; + +using ProgramBBClusterInfoMapTy = StringMap>; + class BasicBlockSections : public MachineFunctionPass { public: static char ID; - BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; + // This contains the basic-block-sections profile. + const MemoryBuffer *MBuf = nullptr; + + // This encapsulates the BB cluster information for the whole program. + // + // For every function name, it contains the cluster information for (all or + // some of) its basic blocks. The cluster information for every basic block + // includes its cluster ID along with the position of the basic block in that + // cluster. + ProgramBBClusterInfoMapTy ProgramBBClusterInfo; + + // Some functions have alias names. We use this map to find the main alias + // name for which we have mapping in ProgramBBClusterInfo. + StringMap FuncAliasMap; + + BasicBlockSections(const MemoryBuffer *Buf) + : MachineFunctionPass(ID), MBuf(Buf) { + initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); + }; BasicBlockSections() : MachineFunctionPass(ID) { initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); @@ -115,6 +153,9 @@ class BasicBlockSections : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override; + /// Read profiles of basic blocks if available here. + bool doInitialization(Module &M) override; + /// Identify basic blocks that need separate sections and prepare to emit them /// accordingly. bool runOnMachineFunction(MachineFunction &MF) override; @@ -164,18 +205,21 @@ static void updateBranches( // This function provides the BBCluster information associated with a function. // Returns true if a valid association exists and false otherwise. -bool getBBClusterInfoForFunction( - const MachineFunction &MF, - BasicBlockSectionsProfileReader *BBSectionsProfileReader, +static bool getBBClusterInfoForFunction( + const MachineFunction &MF, const StringMap FuncAliasMap, + const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, std::vector> &V) { + // Get the main alias name for the function. + auto FuncName = MF.getName(); + auto R = FuncAliasMap.find(FuncName); + StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second; // Find the assoicated cluster information. - std::pair> P = - BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName()); - if (!P.first) + auto P = ProgramBBClusterInfo.find(AliasName); + if (P == ProgramBBClusterInfo.end()) return false; - if (P.second.empty()) { + if (P->second.empty()) { // This indicates that sections are desired for all basic blocks of this // function. We clear the BBClusterInfo vector to denote this. V.clear(); @@ -183,7 +227,7 @@ bool getBBClusterInfoForFunction( } V.resize(MF.getNumBlockIDs()); - for (auto bbClusterInfo : P.second) { + for (auto bbClusterInfo : P->second) { // Bail out if the cluster information contains invalid MBB numbers. if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs()) return false; @@ -332,11 +376,9 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } - BBSectionsProfileReader = &getAnalysis(); - std::vector> FuncBBClusterInfo; if (BBSectionsType == BasicBlockSection::List && - !getBBClusterInfoForFunction(MF, BBSectionsProfileReader, + !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, FuncBBClusterInfo)) return true; MF.setBBSectionsType(BBSectionsType); @@ -384,12 +426,107 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } +// Basic Block Sections can be enabled for a subset of machine basic blocks. +// This is done by passing a file containing names of functions for which basic +// block sections are desired. Additionally, machine basic block ids of the +// functions can also be specified for a finer granularity. Moreover, a cluster +// of basic blocks could be assigned to the same section. +// A file with basic block sections for all of function main and three blocks +// for function foo (of which 1 and 2 are placed in a cluster) looks like this: +// ---------------------------- +// list.txt: +// !main +// !foo +// !!1 2 +// !!4 +static Error getBBClusterInfo(const MemoryBuffer *MBuf, + ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, + StringMap &FuncAliasMap) { + assert(MBuf); + line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); + + auto invalidProfileError = [&](auto Message) { + return make_error( + Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + + Twine(LineIt.line_number()) + ": " + Message), + inconvertibleErrorCode()); + }; + + auto FI = ProgramBBClusterInfo.end(); + + // Current cluster ID corresponding to this function. + unsigned CurrentCluster = 0; + // Current position in the current cluster. + unsigned CurrentPosition = 0; + + // Temporary set to ensure every basic block ID appears once in the clusters + // of a function. + SmallSet FuncBBIDs; + + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef S(*LineIt); + if (S[0] == '@') + continue; + // Check for the leading "!" + if (!S.consume_front("!") || S.empty()) + break; + // Check for second "!" which indicates a cluster of basic blocks. + if (S.consume_front("!")) { + if (FI == ProgramBBClusterInfo.end()) + return invalidProfileError( + "Cluster list does not follow a function name specifier."); + SmallVector BBIndexes; + S.split(BBIndexes, ' '); + // Reset current cluster position. + CurrentPosition = 0; + for (auto BBIndexStr : BBIndexes) { + unsigned long long BBIndex; + if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) + return invalidProfileError(Twine("Unsigned integer expected: '") + + BBIndexStr + "'."); + if (!FuncBBIDs.insert(BBIndex).second) + return invalidProfileError(Twine("Duplicate basic block id found '") + + BBIndexStr + "'."); + if (!BBIndex && CurrentPosition) + return invalidProfileError("Entry BB (0) does not begin a cluster."); + + FI->second.emplace_back(BBClusterInfo{ + ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); + } + CurrentCluster++; + } else { // This is a function name specifier. + // Function aliases are separated using '/'. We use the first function + // name for the cluster info mapping and delegate all other aliases to + // this one. + SmallVector Aliases; + S.split(Aliases, '/'); + for (size_t i = 1; i < Aliases.size(); ++i) + FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); + + // Prepare for parsing clusters of this function name. + // Start a new cluster map for this function name. + FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; + CurrentCluster = 0; + FuncBBIDs.clear(); + } + } + return Error::success(); +} + +bool BasicBlockSections::doInitialization(Module &M) { + if (!MBuf) + return false; + if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) + report_fatal_error(std::move(Err)); + return false; +} + void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } -MachineFunctionPass *llvm::createBasicBlockSectionsPass() { - return new BasicBlockSections(); +MachineFunctionPass * +llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) { + return new BasicBlockSections(Buf); } diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp deleted file mode 100644 index c2acf115998b63..00000000000000 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ /dev/null @@ -1,144 +0,0 @@ -//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Implementation of the basic block sections profile reader pass. It parses -// and stores the basic block sections profile file (which is specified via the -// `-basic-block-sections` flag). -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/MemoryBuffer.h" - -using namespace llvm; - -char BasicBlockSectionsProfileReader::ID = 0; -INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", - "Reads and parses a basic block sections profile.", false, - false) - -bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { - return getBBClusterInfoForFunction(FuncName).first; -} - -std::pair> -BasicBlockSectionsProfileReader::getBBClusterInfoForFunction( - StringRef FuncName) const { - std::pair> cluster_info(false, {}); - auto R = ProgramBBClusterInfo.find(getAliasName(FuncName)); - if (R != ProgramBBClusterInfo.end()) { - cluster_info.second = R->second; - cluster_info.first = true; - } - return cluster_info; -} - -// Basic Block Sections can be enabled for a subset of machine basic blocks. -// This is done by passing a file containing names of functions for which basic -// block sections are desired. Additionally, machine basic block ids of the -// functions can also be specified for a finer granularity. Moreover, a cluster -// of basic blocks could be assigned to the same section. -// A file with basic block sections for all of function main and three blocks -// for function foo (of which 1 and 2 are placed in a cluster) looks like this: -// ---------------------------- -// list.txt: -// !main -// !foo -// !!1 2 -// !!4 -static Error getBBClusterInfo(const MemoryBuffer *MBuf, - ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, - StringMap &FuncAliasMap) { - assert(MBuf); - line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); - - auto invalidProfileError = [&](auto Message) { - return make_error( - Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + - Twine(LineIt.line_number()) + ": " + Message), - inconvertibleErrorCode()); - }; - - auto FI = ProgramBBClusterInfo.end(); - - // Current cluster ID corresponding to this function. - unsigned CurrentCluster = 0; - // Current position in the current cluster. - unsigned CurrentPosition = 0; - - // Temporary set to ensure every basic block ID appears once in the clusters - // of a function. - SmallSet FuncBBIDs; - - for (; !LineIt.is_at_eof(); ++LineIt) { - StringRef S(*LineIt); - if (S[0] == '@') - continue; - // Check for the leading "!" - if (!S.consume_front("!") || S.empty()) - break; - // Check for second "!" which indicates a cluster of basic blocks. - if (S.consume_front("!")) { - if (FI == ProgramBBClusterInfo.end()) - return invalidProfileError( - "Cluster list does not follow a function name specifier."); - SmallVector BBIndexes; - S.split(BBIndexes, ' '); - // Reset current cluster position. - CurrentPosition = 0; - for (auto BBIndexStr : BBIndexes) { - unsigned long long BBIndex; - if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) - return invalidProfileError(Twine("Unsigned integer expected: '") + - BBIndexStr + "'."); - if (!FuncBBIDs.insert(BBIndex).second) - return invalidProfileError(Twine("Duplicate basic block id found '") + - BBIndexStr + "'."); - if (!BBIndex && CurrentPosition) - return invalidProfileError("Entry BB (0) does not begin a cluster."); - - FI->second.emplace_back(BBClusterInfo{ - ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); - } - CurrentCluster++; - } else { // This is a function name specifier. - // Function aliases are separated using '/'. We use the first function - // name for the cluster info mapping and delegate all other aliases to - // this one. - SmallVector Aliases; - S.split(Aliases, '/'); - for (size_t i = 1; i < Aliases.size(); ++i) - FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); - - // Prepare for parsing clusters of this function name. - // Start a new cluster map for this function name. - FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; - CurrentCluster = 0; - FuncBBIDs.clear(); - } - } - return Error::success(); -} - -void BasicBlockSectionsProfileReader::initializePass() { - if (!MBuf) - return; - if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) - report_fatal_error(std::move(Err)); -} - -ImmutablePass * -llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) { - return new BasicBlockSectionsProfileReader(Buf); -} diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 2ef8380d578eda..6fb6322dfb9832 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -35,7 +35,6 @@ add_llvm_component_library(LLVMCodeGen BranchRelaxation.cpp BreakFalseDeps.cpp BasicBlockSections.cpp - BasicBlockSectionsProfileReader.cpp CalcSpillWeights.cpp CallingConvLower.cpp CFGuardLongjmp.cpp diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index b4bb37a66a74dd..c71ca729391c80 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -31,7 +31,6 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -187,15 +186,6 @@ static cl::opt ProfileUnknownInSpecialSection( "to handle it in a different way than .text section, to save " "RAM for example. ")); -static cl::opt BBSectionsGuidedSectionPrefix( - "bbsections-guided-section-prefix", cl::Hidden, cl::init(true), - cl::desc("Use the basic-block-sections profile to determine the text " - "section prefix for hot functions. Functions with " - "basic-block-sections profile will be placed in `.text.hot` " - "regardless of their FDO profile info. Other functions won't be " - "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " - "profiles.")); - static cl::opt FreqRatioToSkipMerge( "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " @@ -282,7 +272,6 @@ class TypePromotionTransaction; const TargetLowering *TLI = nullptr; const TargetRegisterInfo *TRI; const TargetTransformInfo *TTI = nullptr; - const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; std::unique_ptr BFI; @@ -358,7 +347,6 @@ class TypePromotionTransaction; AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); } private: @@ -459,7 +447,6 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) @@ -486,14 +473,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); PSI = &getAnalysis().getPSI(); - BBSectionsProfileReader = - getAnalysisIfAvailable(); OptSize = F.hasOptSize(); - // Use the basic-block-sections profile to promote hot functions to .text.hot if requested. - if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader && - BBSectionsProfileReader->isFunctionHot(F.getName())) { - F.setSectionPrefix("hot"); - } else if (ProfileGuidedSectionPrefix) { + if (ProfileGuidedSectionPrefix) { // The hot attribute overwrites profile count based hotness while profile // counts based hotness overwrite the cold attribute. // This is a conservative behabvior. diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index d659a44c183743..be255c83a95975 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePassRegistry.h" @@ -1285,11 +1284,7 @@ void TargetPassConfig::addMachinePasses() { // FIXME: In principle, BasicBlockSection::Labels and splitting can used // together. Update this check once we have addressed any issues. if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { - if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { - addPass(llvm::createBasicBlockSectionsProfileReaderPass( - TM->getBBSectionsFuncListBuf())); - } - addPass(llvm::createBasicBlockSectionsPass()); + addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf())); } else if (TM->Options.EnableMachineFunctionSplitter || EnableMachineFunctionSplitter) { addPass(createMachineFunctionSplitterPass()); diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 4b99fd24fd7d9c..708c8bef290a1b 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -13,7 +13,6 @@ ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Create Garbage Collector Module Metadata -; CHECK-NEXT: Basic Block Sections Profile Reader ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 31e747deb8e1df..684774b18308e0 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -157,7 +157,6 @@ ; GCN-O1-NEXT:Scoped NoAlias Alias Analysis ; GCN-O1-NEXT:Argument Register Usage Information Storage ; GCN-O1-NEXT:Create Garbage Collector Module Metadata -; GCN-O1-NEXT:Basic Block Sections Profile Reader ; GCN-O1-NEXT:Machine Branch Probability Analysis ; GCN-O1-NEXT:Register Usage Information Storage ; GCN-O1-NEXT:Default Regalloc Eviction Advisor @@ -412,7 +411,6 @@ ; GCN-O1-OPTS-NEXT:Scoped NoAlias Alias Analysis ; GCN-O1-OPTS-NEXT:Argument Register Usage Information Storage ; GCN-O1-OPTS-NEXT:Create Garbage Collector Module Metadata -; GCN-O1-OPTS-NEXT:Basic Block Sections Profile Reader ; GCN-O1-OPTS-NEXT:Machine Branch Probability Analysis ; GCN-O1-OPTS-NEXT:Register Usage Information Storage ; GCN-O1-OPTS-NEXT:Default Regalloc Eviction Advisor @@ -700,7 +698,6 @@ ; GCN-O2-NEXT:Scoped NoAlias Alias Analysis ; GCN-O2-NEXT:Argument Register Usage Information Storage ; GCN-O2-NEXT:Create Garbage Collector Module Metadata -; GCN-O2-NEXT:Basic Block Sections Profile Reader ; GCN-O2-NEXT:Machine Branch Probability Analysis ; GCN-O2-NEXT:Register Usage Information Storage ; GCN-O2-NEXT:Default Regalloc Eviction Advisor @@ -990,7 +987,6 @@ ; GCN-O3-NEXT:Scoped NoAlias Alias Analysis ; GCN-O3-NEXT:Argument Register Usage Information Storage ; GCN-O3-NEXT:Create Garbage Collector Module Metadata -; GCN-O3-NEXT:Basic Block Sections Profile Reader ; GCN-O3-NEXT:Machine Branch Probability Analysis ; GCN-O3-NEXT:Register Usage Information Storage ; GCN-O3-NEXT:Default Regalloc Eviction Advisor diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index a94d0560efbe45..eff5184e9c84c8 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -13,7 +13,6 @@ ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata -; CHECK-NEXT: Basic Block Sections Profile Reader ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 75bc489ab942e1..d0bbe886f67968 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -17,7 +17,6 @@ ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata -; CHECK-NEXT: Basic Block Sections Profile Reader ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cold.ll b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll index c635b73a45b986..be016df7061b52 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-cold.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll @@ -24,14 +24,14 @@ declare i32 @_Z3barv() #1 declare i32 @_Z3foov() #1 -; LINUX-SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits +; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: ; Check that the basic block with id 1 doesn't get a section. -; LINUX-SECTIONS-NOT: .section .text{{.*}}._Z3bazb.1,"ax",@progbits,unique +; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.1,"ax",@progbits,unique ; Check that a single cold section is started here and id 1 and 2 blocks are placed here. ; LINUX-SECTIONS: .section .text.split._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb.cold: -; LINUX-SECTIONS-NOT: .section .text.hot._Z3bazb._Z3bazb.2,"ax",@progbits,unique +; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.2,"ax",@progbits,unique ; LINUX-SECTIONS: .LBB0_2: ; LINUX-SECTIONS: .size _Z3bazb, .Lfunc_end{{[0-9]}}-_Z3bazb diff --git a/llvm/test/CodeGen/X86/basic-block-sections-list.ll b/llvm/test/CodeGen/X86/basic-block-sections-list.ll index f9f7f8892d194d..d2408a0b3e4b38 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-list.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-list.ll @@ -1,8 +1,7 @@ ; Check the basic block sections list option. ; RUN: echo '!_Z3foob' > %t ; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-FUNCTION-SECTION -; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-NO-FUNCTION-SECTION -; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names --bbsections-guided-section-prefix=false | FileCheck %s -check-prefix=LINUX-SECTIONS-NO-GUIDED-PREFIX +; llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-NO-FUNCTION-SECTION define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 @@ -59,18 +58,17 @@ define i32 @_Z3zipb(i1 zeroext %0) nounwind { ret i32 %14 } -; LINUX-SECTIONS-NO-GUIDED-PREFIX: .section .text._Z3foob,"ax",@progbits -; LINUX-SECTIONS: .section .text.hot._Z3foob,"ax",@progbits +; LINUX-SECTIONS: .section .text._Z3foob,"ax",@progbits ; LINUX-SECTIONS: _Z3foob: -; LINUX-SECTIONS: .section .text.hot._Z3foob._Z3foob.__part.1,"ax",@progbits +; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.__part.1,"ax",@progbits ; LINUX-SECTIONS: _Z3foob.__part.1: -; LINUX-SECTIONS: .section .text.hot._Z3foob._Z3foob.__part.2,"ax",@progbits +; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.__part.2,"ax",@progbits ; LINUX-SECTIONS: _Z3foob.__part.2: -; LINUX-SECTIONS: .section .text.hot._Z3foob._Z3foob.__part.3,"ax",@progbits +; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.__part.3,"ax",@progbits ; LINUX-SECTIONS: _Z3foob.__part.3: -; LINUX-SECTIONS-FUNCTION-SECTION: .section .text._Z3zipb,"ax",@progbits -; LINUX-SECTIONS-NO-FUNCTION-SECTION-NOT: .section .text{{.*}}._Z3zipb,"ax",@progbits +; LINUX-SECTIONS-FUNCTION-SECTION: .section .text._Z3zipb,"ax",@progbits +; LINUX-SECIONS-NO-FUNCTION-SECTION-NOT: .section .text._Z3zipb,"ax",@progbits ; LINUX-SECTIONS: _Z3zipb: -; LINUX-SECTIONS-NOT: .section .text{{.*}}._Z3zipb.__part.{{[0-9]+}},"ax",@progbits +; LINUX-SECTIONS-NOT: .section .text._Z3zipb._Z3zipb.__part.{{[0-9]+}},"ax",@progbits ; LINUX-SECTIONS-NOT: _Z3zipb.__part.{{[0-9]+}}: diff --git a/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll b/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll index 32288095361fef..fd6522236987d9 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll @@ -30,17 +30,17 @@ declare i32 @_Z3foov() #1 ; Check that the correct block is found using the call insts for foo and bar. ; -; LINUX-SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits +; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: ; Check that the basic block with id 1 doesn't get a section. -; LINUX-SECTIONS-NOT: .section .text{{.*}}._Z3bazb.__part.{{[0-9]+}},"ax",@progbits +; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.__part.{{[0-9]+}},"ax",@progbits ; LINUX-SECTIONS-LABEL: # %bb.1: ; LINUX-SECTIONS-NEXT: callq _Z3barv -; LINUX-SECTIONS: .section .text.hot._Z3bazb.[[SECTION_LABEL:_Z3bazb.__part.[0-9]+]],"ax",@progbits +; LINUX-SECTIONS: .section .text._Z3bazb.[[SECTION_LABEL:_Z3bazb.__part.[0-9]+]],"ax",@progbits ; LINUX-SECTIONS-NEXT: [[SECTION_LABEL]]: ; LINUX-SECTIONS-NEXT: callq _Z3foov ; LINUX-SECTIONS: .LBB_END0_2: ; LINUX-SECTIONS-NEXT: .size [[SECTION_LABEL]], .LBB_END0_2-[[SECTION_LABEL]] -; LINUX-SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits +; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: .Lfunc_end0: ; LINUX-SECTIONS-NEXT: .size _Z3bazb, .Lfunc_end0-_Z3bazb diff --git a/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll b/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll index d481b147662dce..7c5bddedf3ec5c 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll @@ -15,5 +15,5 @@ define dso_local i32 @foo(i1 zeroext %0, i1 zeroext %1) !annotation !1 { !1 = !{!"instr_prof_hash_mismatch"} -; SOURCE-DRIFT-NOT: .section .text{{.*}}.foo -; HASH-CHECK-DISABLED: .section .text.hot.foo +; SOURCE-DRIFT-NOT: .section .text +; HASH-CHECK-DISABLED: .section .text diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 704641087667fc..8552ebc348cbb9 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -21,7 +21,6 @@ ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata -; CHECK-NEXT: Basic Block Sections Profile Reader ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager From 52992f136b3bd421533ac909c520525e34058468 Mon Sep 17 00:00:00 2001 From: Enna1 Date: Fri, 27 May 2022 09:39:04 +0800 Subject: [PATCH 664/908] Add !nosanitize to FixedMetadataKinds This patch adds !nosanitize metadata to FixedMetadataKinds.def, !nosanitize indicates that LLVM should not insert any sanitizer instrumentation. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D126294 --- clang/lib/CodeGen/SanitizerMetadata.cpp | 2 +- llvm/include/llvm/IR/FixedMetadataKinds.def | 1 + llvm/lib/Transforms/IPO/HotColdSplitting.cpp | 3 ++- .../Transforms/Instrumentation/AddressSanitizer.cpp | 5 +++-- .../Transforms/Instrumentation/HWAddressSanitizer.cpp | 2 +- .../lib/Transforms/Instrumentation/MemorySanitizer.cpp | 10 +++++----- .../Transforms/Instrumentation/SanitizerCoverage.cpp | 3 +-- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp index 9e26d242d3a7e6..51f67622c94191 100644 --- a/clang/lib/CodeGen/SanitizerMetadata.cpp +++ b/clang/lib/CodeGen/SanitizerMetadata.cpp @@ -87,7 +87,7 @@ void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) { } void SanitizerMetadata::disableSanitizerForInstruction(llvm::Instruction *I) { - I->setMetadata(CGM.getModule().getMDKindID("nosanitize"), + I->setMetadata(llvm::LLVMContext::MD_nosanitize, llvm::MDNode::get(CGM.getLLVMContext(), None)); } diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 31979cd2f9db03..9a78fd895fe297 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -42,3 +42,4 @@ LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28) LLVM_FIXED_MD_KIND(MD_noundef, "noundef", 29) LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30) +LLVM_FIXED_MD_KIND(MD_nosanitize, "nosanitize", 31) diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index 670b67a5131b19..95e8ae0fd22f48 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -113,7 +113,8 @@ bool unlikelyExecuted(BasicBlock &BB) { // mark sanitizer traps as cold. for (Instruction &I : BB) if (auto *CB = dyn_cast(&I)) - if (CB->hasFnAttr(Attribute::Cold) && !CB->getMetadata("nosanitize")) + if (CB->hasFnAttr(Attribute::Cold) && + !CB->getMetadata(LLVMContext::MD_nosanitize)) return true; // The block is cold if it has an unreachable terminator, unless it's diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 41ef29b2889875..645ca9f257a158 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1331,7 +1331,7 @@ bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { void AddressSanitizer::getInterestingMemoryOperands( Instruction *I, SmallVectorImpl &Interesting) { // Skip memory accesses inserted by another instrumentation. - if (I->hasMetadata("nosanitize")) + if (I->hasMetadata(LLVMContext::MD_nosanitize)) return; // Do not instrument the load fetching the dynamic shadow address. @@ -2771,7 +2771,8 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (auto *CB = dyn_cast(&Inst)) { // A call inside BB. TempsToInstrument.clear(); - if (CB->doesNotReturn() && !CB->hasMetadata("nosanitize")) + if (CB->doesNotReturn() && + !CB->hasMetadata(LLVMContext::MD_nosanitize)) NoReturnCalls.push_back(CB); } if (CallInst *CI = dyn_cast(&Inst)) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 39a396f3c147a6..68e93cb934a7e5 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -715,7 +715,7 @@ bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { void HWAddressSanitizer::getInterestingMemoryOperands( Instruction *I, SmallVectorImpl &Interesting) { // Skip memory accesses inserted by another instrumentation. - if (I->hasMetadata("nosanitize")) + if (I->hasMetadata(LLVMContext::MD_nosanitize)) return; // Do not instrument the load fetching the dynamic shadow address. diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index bc1a340fdc7e3d..b8e080d3ad24ef 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1620,7 +1620,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// or extracts if from ParamTLS (for function arguments). Value *getShadow(Value *V) { if (Instruction *I = dyn_cast(V)) { - if (!PropagateShadow || I->getMetadata("nosanitize")) + if (!PropagateShadow || I->getMetadata(LLVMContext::MD_nosanitize)) return getCleanShadow(V); // For instructions the shadow is already stored in the map. Value *Shadow = ShadowMap[V]; @@ -1739,7 +1739,7 @@ struct MemorySanitizerVisitor : public InstVisitor { assert((isa(V) || isa(V)) && "Unexpected value type in getOrigin()"); if (Instruction *I = dyn_cast(V)) { - if (I->getMetadata("nosanitize")) + if (I->getMetadata(LLVMContext::MD_nosanitize)) return getCleanOrigin(); } Value *Origin = OriginMap[V]; @@ -1862,7 +1862,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // ------------------- Visitors. using InstVisitor::visit; void visit(Instruction &I) { - if (I.getMetadata("nosanitize")) + if (I.getMetadata(LLVMContext::MD_nosanitize)) return; // Don't want to visit if we're in the prologue if (isInPrologue(I)) @@ -1876,7 +1876,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// Optionally, checks that the load address is fully defined. void visitLoadInst(LoadInst &I) { assert(I.getType()->isSized() && "Load type must have size"); - assert(!I.getMetadata("nosanitize")); + assert(!I.getMetadata(LLVMContext::MD_nosanitize)); IRBuilder<> IRB(I.getNextNode()); Type *ShadowTy = getShadowTy(&I); Value *Addr = I.getPointerOperand(); @@ -3591,7 +3591,7 @@ struct MemorySanitizerVisitor : public InstVisitor { } void visitCallBase(CallBase &CB) { - assert(!CB.getMetadata("nosanitize")); + assert(!CB.getMetadata(LLVMContext::MD_nosanitize)); if (CB.isInlineAsm()) { // For inline asm (either a call to asm function, or callbr instruction), // do the usual thing: check argument shadow and mark all outputs as diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 61628223ddefae..d9d11cc90d3d31 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -241,8 +241,7 @@ class ModuleSanitizerCoverage { Type *Ty); void SetNoSanitizeMetadata(Instruction *I) { - I->setMetadata(I->getModule()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); + I->setMetadata(LLVMContext::MD_nosanitize, MDNode::get(*C, None)); } std::string getSectionName(const std::string &Section) const; From 5aefdafccf3321f0f94674aeb7ae83e9b306829e Mon Sep 17 00:00:00 2001 From: Hanhan Wang Date: Thu, 26 May 2022 19:20:36 -0700 Subject: [PATCH 665/908] [mlir][Linalg] Relax vectorization condition to allow transposed output. Reviewed By: ThomasRaoux, dcaballe Differential Revision: https://reviews.llvm.org/D126454 --- .../Linalg/Transforms/Vectorization.cpp | 2 +- mlir/test/Dialect/Linalg/vectorization.mlir | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 00c08af780201e..bbb9dd71ac3c9a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -441,7 +441,7 @@ static bool isElementwise(Operation *op) { return false; // TODO: relax the restrictions on indexing map. for (OpOperand *opOperand : linalgOp.getOutputOperands()) { - if (!linalgOp.getTiedIndexingMap(opOperand).isIdentity()) + if (!linalgOp.getTiedIndexingMap(opOperand).isPermutation()) return false; } return hasOnlyScalarElementwiseOp(linalgOp->getRegion(0)); diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 3414450ceae95c..301e8f0dcd8da6 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -121,6 +121,26 @@ func.func @generic_output_transpose(%A: memref<8x16xf32>, %B: memref<16x32xf32>, // ----- +#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +#map1 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> +// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d1, d0, d2)> +// CHECK: func @generic_interchanged_transpose +func.func @generic_interchanged_transpose(%arg0: tensor<12x128x32xf32>) -> tensor<128x12x32xf32> { + // CHECK: %[[IN:.+]] = vector.transfer_read + // CHECK: vector.transfer_write %[[IN]], {{.+}} permutation_map = #[[MAP]] + %0 = linalg.init_tensor [128, 12, 32] : tensor<128x12x32xf32> + %1 = linalg.generic {indexing_maps = [#map0, #map1], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%arg0 : tensor<12x128x32xf32>) + outs(%0 : tensor<128x12x32xf32>) { + ^bb0(%arg1: f32, %arg2: f32): + linalg.yield %arg1 : f32 + } -> tensor<128x12x32xf32> + return %1 : tensor<128x12x32xf32> +} + +// ----- + #matmul_trait = { args_in = 2, args_out = 1, From 535604e1773de37e6fcd4f4844a9c27aa9e17383 Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Fri, 27 May 2022 10:35:05 +0800 Subject: [PATCH 666/908] [X86][AMX] Update test case with automation tool. --- .../CodeGen/X86/AMX/amx-fastconfig-phi.mir | 179 +++++++++++++++--- llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir | 103 ++++++++-- 2 files changed, 238 insertions(+), 44 deletions(-) diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir index 2a300199f61b43..65b4bd230ff783 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s # # This case test tile phi is nested accessed, but the its def block is @@ -19,7 +20,7 @@ # __tile1024i a = {16, 64}; # __tile1024i b = {16, 64}; # __tile1024i c = {16, 64}; -# +# # if (cond) { # __tile_zero(&c); # } else { @@ -88,9 +89,154 @@ frameInfo: maxAlignment: 1 machineFunctionInfo: {} body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $edi, $rsi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_SET0_:%[0-9]+]]:vr128 = V_SET0 + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 0, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1, align 4) + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 16, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 16, align 4) + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 32, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 32, align 4) + ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 48, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 48, align 4) + ; CHECK-NEXT: MOV8mi %stack.1, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.1, align 4) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY killed [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY killed [[COPY]] + ; CHECK-NEXT: CMP32ri8 [[COPY2]], 0, implicit-def $eflags + ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK-NEXT: TEST8ri [[SETCCr]], 1, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 5, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] + ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.0, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.0) + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri3]], [[MOV16ri2]], [[COPY3]], 1, killed [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.2, 1, killed [[MOV64ri1]], 0, $noreg, [[PTILELOADDV]] :: (store (s8192) into %stack.2) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri2]], %bb.2 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[MOV16ri3]], %bb.2 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r1]], %bb.2 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.5, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.5) + ; CHECK-NEXT: [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]] + ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.4, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILEZEROV1]] :: (store (s8192) into %stack.4) + ; CHECK-NEXT: [[MOV16ri6:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri7:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri7]], [[MOV16ri6]] + ; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILEZEROV2]] :: (store (s8192) into %stack.3) + ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags + ; CHECK-NEXT: JMP_1 %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: [[MOV16ri8:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri9:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: [[MOV64ri6:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV %59, %60, %stack.9, 1, killed [[MOV64ri6]], 0, $noreg :: (load (s8192) from %stack.9) + ; CHECK-NEXT: PTILESTOREDV killed [[MOV16ri9]], killed [[MOV16ri8]], [[COPY3]], 1, killed [[MOV32ri64_1]], 0, $noreg, [[PTILELOADDV2]] + ; CHECK-NEXT: RET64 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32r0_]], %bb.3, %35, %bb.8 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[PHI]], %bb.3, %60, %bb.8 + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr16 = PHI [[PHI1]], %bb.3, %59, %bb.8 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.3, %58, %bb.8 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], [[PHI6]], 1, killed [[MOV64ri7]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.8, 1, killed [[MOV64ri8]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.8) + ; CHECK-NEXT: [[MOV16ri10:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri11:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[MOV64ri9:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri7]], [[MOV16ri6]], %stack.3, 1, killed [[MOV64ri9]], 0, $noreg :: (load (s8192) from %stack.3) + ; CHECK-NEXT: [[MOV64ri10:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV5:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri5]], [[MOV16ri4]], %stack.4, 1, killed [[MOV64ri10]], 0, $noreg :: (load (s8192) from %stack.4) + ; CHECK-NEXT: [[MOV64ri11:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV6:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], %stack.8, 1, killed [[MOV64ri11]], 0, $noreg :: (load (s8192) from %stack.8) + ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV killed [[MOV16ri11]], [[MOV16ri10]], [[MOV16ri10]], [[PTILELOADDV6]], [[PTILELOADDV5]], [[PTILELOADDV4]] + ; CHECK-NEXT: TEST8ri [[SETCCr]], 1, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.7, 5, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV16ri12:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri13:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILEZEROV3:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri13]], [[MOV16ri12]] + ; CHECK-NEXT: [[MOV64ri12:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.6, 1, killed [[MOV64ri12]], 0, $noreg, [[PTILEZEROV3]] :: (store (s8192) into %stack.6) + ; CHECK-NEXT: JMP_1 %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: [[MOV16ri14:%[0-9]+]]:gr16 = MOV16ri 64 + ; CHECK-NEXT: [[MOV16ri15:%[0-9]+]]:gr16 = MOV16ri 16 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.7, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri15]], [[MOV16ri14]], [[COPY3]], 1, killed [[MOV32ri64_2]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri13:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.7, 1, killed [[MOV64ri13]], 0, $noreg, [[PTILELOADDV7]] :: (store (s8192) into %stack.7) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gr16 = PHI [[MOV16ri12]], %bb.6, [[MOV16ri14]], %bb.7 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gr16 = PHI [[MOV16ri13]], %bb.6, [[MOV16ri15]], %bb.7 + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:gr64_nosp = PHI [[LEA64r3]], %bb.6, [[LEA64r4]], %bb.7 + ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: [[MOV64ri14:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI8]], [[PHI7]], [[PHI9]], 1, killed [[MOV64ri14]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri15:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.9, 1, killed [[MOV64ri15]], 0, $noreg, [[PTILELOADDV8]] :: (store (s8192) into %stack.9) + ; CHECK-NEXT: [[ADD32ri8_:%[0-9]+]]:gr32 = ADD32ri8 [[PHI3]], 1, implicit-def $eflags + ; CHECK-NEXT: CMP32ri8 [[ADD32ri8_]], 10, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.4, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.5 bb.0.entry: liveins: $edi, $rsi - + %14:gr64 = COPY $rsi %12:gr32 = COPY $edi %13:gr32 = COPY killed %12 @@ -99,24 +245,20 @@ body: | %16:gr8 = SETCCr 4, implicit $eflags TEST8ri %16, 1, implicit-def $eflags JCC_1 %bb.2, 5, implicit $eflags - + bb.1: %17:gr16 = MOV16ri 64 %18:gr16 = MOV16ri 16 %1:tile = PTILEZEROV killed %18, killed %17 JMP_1 %bb.3 - + bb.2: %19:gr64_nosp = MOV32ri64 32 %20:gr16 = MOV16ri 64 %21:gr16 = MOV16ri 16 %2:tile = PTILELOADDV killed %21, killed %20, %15, 1, killed %19, 0, $noreg - + bb.3: - ; CHECK: %43:gr16 = PHI %17, %bb.1, %20, %bb.2 - ; CHECK-NEXT: %42:gr16 = PHI %18, %bb.1, %21, %bb.2 - ; CHECK-NEXT: %41:gr64_nosp = PHI %44, %bb.1, %45, %bb.2 - ; CHECK-NEXT: LDTILECFG %3:tile = PHI %1, %bb.1, %2, %bb.2 %25:gr16 = MOV16ri 64 @@ -127,20 +269,15 @@ body: | %5:tile = PTILEZEROV killed %24, killed %23 %22:gr32 = MOV32r0 implicit-def $eflags JMP_1 %bb.5 - + bb.4: %36:gr64_nosp = MOV32ri64 32 %37:gr16 = MOV16ri 64 %38:gr16 = MOV16ri 16 PTILESTOREDV killed %38, killed %37, %15, 1, killed %36, 0, $noreg, %10 RET64 - + bb.5: - ; CHECK: %6:gr32 = PHI %22, %bb.3, %35, %bb.8 - ; CHECK-NEXT: %56:gr16 = PHI %43, %bb.3, %60, %bb.8 - ; CHECK-NEXT: %55:gr16 = PHI %42, %bb.3, %59, %bb.8 - ; CHECK-NEXT: %54:gr64_nosp = PHI %57, %bb.3, %58, %bb.8 - ; CHECK-NEXT: LDTILECFG %6:gr32 = PHI %22, %bb.3, %35, %bb.8 %7:tile = PHI %3, %bb.3, %10, %bb.8 @@ -149,24 +286,20 @@ body: | %29:tile = PTDPBSSDV killed %28, %27, %27, %7, %4, %5 TEST8ri %16, 1, implicit-def $eflags JCC_1 %bb.7, 5, implicit $eflags - + bb.6: %30:gr16 = MOV16ri 64 %31:gr16 = MOV16ri 16 %8:tile = PTILEZEROV killed %31, killed %30 JMP_1 %bb.8 - + bb.7: %32:gr64_nosp = MOV32ri64 32 %33:gr16 = MOV16ri 64 %34:gr16 = MOV16ri 16 %9:tile = PTILELOADDV killed %34, killed %33, %15, 1, killed %32, 0, $noreg - + bb.8: - ; CHECK: %60:gr16 = PHI %30, %bb.6, %33, %bb.7 - ; CHECK-NEXT: %59:gr16 = PHI %31, %bb.6, %34, %bb.7 - ; CHECK-NEXT: %58:gr64_nosp = PHI %61, %bb.6, %62, %bb.7 - ; CHECK-NEXT: LDTILECFG %10:tile = PHI %8, %bb.6, %9, %bb.7 %35:gr32 = ADD32ri8 %6, 1, implicit-def $eflags diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir index ff0fdbe3aaf71a..1dfc45c4c0f0f9 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s --- | @@ -78,12 +79,91 @@ frameInfo: maxAlignment: 1 machineFunctionInfo: {} body: | + ; CHECK-LABEL: name: test_api + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: liveins: $edi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 + ; CHECK-NEXT: VMOVUPSZmr %stack.3, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.3, align 4) + ; CHECK-NEXT: MOV8mi %stack.3, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.3, align 4) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY killed $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY killed $edi + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY killed [[COPY]].sub_16bit + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY killed [[COPY1]].sub_16bit + ; CHECK-NEXT: TEST32rr killed [[COPY2]], [[COPY2]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit killed $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf + ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 8 + ; CHECK-NEXT: LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4) + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri]], [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.2, 1, killed [[MOV64ri]], 0, $noreg, [[PTILELOADDV]] :: (store (s8192) into %stack.2) + ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.1, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri]], [[COPY3]], [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.1, 1, killed [[MOV64ri1]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.1) + ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[COPY3]], killed [[MOV32ri64_]], 1, killed [[MOV32ri64_1]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.0, 1, killed [[MOV64ri2]], 0, $noreg, [[PTILELOADDV2]] :: (store (s8192) into %stack.0) + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.else: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2 + ; CHECK-NEXT: [[MOV32ri64_3:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 + ; CHECK-NEXT: LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4) + ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri1]], [[MOV32ri64_2]], 1, [[MOV32ri64_3]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.6, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.6) + ; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[COPY3]], [[MOV32ri64_2]], 1, [[MOV32ri64_3]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.5, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILELOADDV4]] :: (store (s8192) into %stack.5) + ; CHECK-NEXT: [[LEA64r5:%[0-9]+]]:gr64_nosp = LEA64r %stack.4, 1, $noreg, 0, $noreg + ; CHECK-NEXT: [[PTILELOADDV5:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[COPY3]], killed [[MOV32ri64_2]], 1, killed [[MOV32ri64_3]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: TILESTORED %stack.4, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILELOADDV5]] :: (store (s8192) into %stack.4) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri1]], %bb.2 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI [[COPY4]], %bb.1, [[COPY4]], %bb.2 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r3]], %bb.2 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr16 = PHI [[COPY3]], %bb.1, [[COPY3]], %bb.2 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri1]], %bb.2 + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r1]], %bb.1, [[LEA64r4]], %bb.2 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gr16 = PHI [[COPY3]], %bb.1, [[COPY3]], %bb.2 + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gr16 = PHI [[COPY4]], %bb.1, [[COPY4]], %bb.2 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.1, [[LEA64r5]], %bb.2 + ; CHECK-NEXT: LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4) + ; CHECK-NEXT: [[MOV64ri6:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV6:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri6]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri7]], 0, $noreg + ; CHECK-NEXT: [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64 + ; CHECK-NEXT: [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI7]], [[PHI6]], [[PHI8]], 1, killed [[MOV64ri8]], 0, $noreg + ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 8 + ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[COPY4]], [[COPY3]], killed [[MOV16ri2]], killed [[PTILELOADDV8]], killed [[PTILELOADDV6]], killed [[PTILELOADDV7]] + ; CHECK-NEXT: [[MOV32ri64_4:%[0-9]+]]:gr64 = MOV32ri64 @buf + ; CHECK-NEXT: [[MOV32ri64_5:%[0-9]+]]:gr64_nosp = MOV32ri64 32 + ; CHECK-NEXT: PTILESTOREDV killed [[COPY4]], killed [[COPY3]], killed [[MOV32ri64_4]], 1, killed [[MOV32ri64_5]], 0, $noreg, killed [[PTDPBSSDV]] + ; CHECK-NEXT: RET 0 bb.0.entry: successors: %bb.2(0x30000000), %bb.1(0x50000000) liveins: $edi, $esi, $edx - ; CHECK: {{%.*}}:vr512 = AVX512_512_SET0 - ; CHECK-NEXT: VMOVUPSZmr %stack.3, 1, $noreg, 0, $noreg, {{%.*}} %11:gr32 = COPY killed $edx %10:gr32 = COPY killed $esi @@ -98,7 +178,6 @@ body: | %14:gr64 = MOV32ri64 @buf %15:gr64_nosp = MOV32ri64 32 %16:gr16 = MOV16ri 8 - ; CHECK: LDTILECFG %0:tile = PTILELOADDV %12, %16, %14, 1, %15, 0, $noreg %1:tile = PTILELOADDV killed %16, %13, %14, 1, %15, 0, $noreg %2:tile = PTILELOADDV %12, %13, killed %14, 1, killed %15, 0, $noreg @@ -108,30 +187,12 @@ body: | %17:gr64 = MOV32ri64 @buf2 %18:gr64_nosp = MOV32ri64 32 %19:gr16 = MOV16ri 8 - ; CHECK: LDTILECFG %3:tile = PTILELOADDV %12, %19, %17, 1, %18, 0, $noreg %4:tile = PTILELOADDV killed %19, %13, %17, 1, %18, 0, $noreg %5:tile = PTILELOADDV %12, %13, killed %17, 1, killed %18, 0, $noreg bb.3.if.end: - ; CHECK: bb.3.if.end - ; CHECK-NEXT: %44:gr16 = PHI %16, %bb.1, %19, %bb.2 - ; CHECK-NEXT: %43:gr16 = PHI %12, %bb.1, %12, %bb.2 - ; CHECK-NEXT: %42:gr64_nosp = PHI %45, %bb.1, %46, %bb.2 - ; CHECK-NEXT: %38:gr16 = PHI %13, %bb.1, %13, %bb.2 - ; CHECK-NEXT: %37:gr16 = PHI %16, %bb.1, %19, %bb.2 - ; CHECK-NEXT: %36:gr64_nosp = PHI %39, %bb.1, %40, %bb.2 - ; CHECK-NEXT: %32:gr16 = PHI %13, %bb.1, %13, %bb.2 - ; CHECK-NEXT: %31:gr16 = PHI %12, %bb.1, %12, %bb.2 - ; CHECK-NEXT: %30:gr64_nosp = PHI %33, %bb.1, %34, %bb.2 - ; CHECK-NEXT: LDTILECFG - ; CHECK-NEXT: %47:gr64_nosp = MOV64ri 64 - ; CHECK-NEXT: %6:tile = PTILELOADDV %43, %44, %42, 1, killed %47, 0, $noreg - ; CHECK-NEXT: %41:gr64_nosp = MOV64ri 64 - ; CHECK-NEXT: %7:tile = PTILELOADDV %37, %38, %36, 1, killed %41, 0, $noreg - ; CHECK-NEXT: %35:gr64_nosp = MOV64ri 64 - ; CHECK-NEXT: %8:tile = PTILELOADDV %31, %32, %30, 1, killed %35, 0, $noreg %6:tile = PHI %0, %bb.1, %3, %bb.2 %7:tile = PHI %1, %bb.1, %4, %bb.2 From b709f074d2e406b2a36cd9cb50fc99a3ec9d4bd2 Mon Sep 17 00:00:00 2001 From: Chenbing Zheng Date: Fri, 27 May 2022 10:50:30 +0800 Subject: [PATCH 667/908] [InstCombine] [NFC] precommit tests for bitcast-extelt --- llvm/test/Transforms/InstCombine/bitcast.ll | 54 +++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll index eb30719c75f2c5..68193558280798 100644 --- a/llvm/test/Transforms/InstCombine/bitcast.ll +++ b/llvm/test/Transforms/InstCombine/bitcast.ll @@ -431,6 +431,60 @@ define double @bitcast_extelt4(i128 %A) { ret double %bc2 } +define <2 x i32> @bitcast_extelt5(<1 x i64> %A) { +; CHECK-LABEL: @bitcast_extelt5( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <1 x i64> [[A:%.*]], i64 0 +; CHECK-NEXT: [[BC:%.*]] = bitcast i64 [[EXT]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[BC]] +; + %ext = extractelement <1 x i64> %A, i32 0 + %bc = bitcast i64 %ext to <2 x i32> + ret <2 x i32> %bc +} + +define <2 x i32> @bitcast_extelt5_scalable( %A) { +; CHECK-LABEL: @bitcast_extelt5_scalable( +; CHECK-NEXT: [[EXT:%.*]] = extractelement [[A:%.*]], i64 0 +; CHECK-NEXT: [[BC:%.*]] = bitcast i64 [[EXT]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[BC]] +; + %ext = extractelement %A, i32 0 + %bc = bitcast i64 %ext to <2 x i32> + ret <2 x i32> %bc +} + +define <2 x i32> @bitcast_extelt6(<2 x i64> %A) { +; CHECK-LABEL: @bitcast_extelt6( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i64> [[A:%.*]], i64 0 +; CHECK-NEXT: [[BC:%.*]] = bitcast i64 [[EXT]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[BC]] +; + %ext = extractelement <2 x i64> %A, i32 0 + %bc = bitcast i64 %ext to <2 x i32> + ret <2 x i32> %bc +} + +define double @bitcast_extelt7(<1 x i64> %A) { +; CHECK-LABEL: @bitcast_extelt7( +; CHECK-NEXT: [[BC1:%.*]] = bitcast <1 x i64> [[A:%.*]] to <1 x double> +; CHECK-NEXT: [[BC:%.*]] = extractelement <1 x double> [[BC1]], i64 0 +; CHECK-NEXT: ret double [[BC]] +; + %ext = extractelement <1 x i64> %A, i32 0 + %bc = bitcast i64 %ext to double + ret double %bc +} + +define double @bitcast_extelt8(<1 x i64> %A) { +; CHECK-LABEL: @bitcast_extelt8( +; CHECK-NEXT: [[BC1:%.*]] = bitcast <1 x i64> [[A:%.*]] to <1 x double> +; CHECK-NEXT: [[BC:%.*]] = extractelement <1 x double> [[BC1]], i64 0 +; CHECK-NEXT: ret double [[BC]] +; + %bc = bitcast <1 x i64> %A to double + ret double %bc +} + define <2 x i32> @test4(i32 %A, i32 %B){ ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 From 08cc0585187219c916a3125e79e7d7154ee1646a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 26 May 2022 19:27:05 -0700 Subject: [PATCH 668/908] Reland "[Propeller] Promote functions with propeller profiles to .text.hot." This relands commit 4d8d2580c53e130c3c3dd3877384301e3c495554. The major change here is using 'addUsedIfAvailable()` to make sure we don't change the pipeline tests. Differential Revision: https://reviews.llvm.org/D126518 --- .../CodeGen/BasicBlockSectionsProfileReader.h | 109 +++++++++++ llvm/include/llvm/CodeGen/Passes.h | 6 +- llvm/include/llvm/InitializePasses.h | 1 + llvm/lib/CodeGen/BasicBlockSections.cpp | 169 ++---------------- .../BasicBlockSectionsProfileReader.cpp | 144 +++++++++++++++ llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CodeGenPrepare.cpp | 21 ++- llvm/lib/CodeGen/TargetPassConfig.cpp | 7 +- .../CodeGen/X86/basic-block-sections-cold.ll | 6 +- .../CodeGen/X86/basic-block-sections-list.ll | 18 +- .../X86/basic-block-sections-listbb.ll | 8 +- .../X86/basic-block-sections-source-drift.ll | 4 +- 12 files changed, 318 insertions(+), 176 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h create mode 100644 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h new file mode 100644 index 00000000000000..7ae1304cced9ad --- /dev/null +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -0,0 +1,109 @@ +//===-- BasicBlockSectionsProfileReader.h - BB sections profile reader pass ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass creates the basic block cluster info by reading the basic block +// sections profile. The cluster info will be used by the basic-block-sections +// pass to arrange basic blocks in their sections. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H +#define LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +namespace llvm { + +// The cluster information for a machine basic block. +struct BBClusterInfo { + // MachineBasicBlock ID. + unsigned MBBNumber; + // Cluster ID this basic block belongs to. + unsigned ClusterID; + // Position of basic block within the cluster. + unsigned PositionInCluster; +}; + +using ProgramBBClusterInfoMapTy = StringMap>; + +class BasicBlockSectionsProfileReader : public ImmutablePass { +public: + static char ID; + + BasicBlockSectionsProfileReader(const MemoryBuffer *Buf) + : ImmutablePass(ID), MBuf(Buf) { + initializeBasicBlockSectionsProfileReaderPass( + *PassRegistry::getPassRegistry()); + }; + + BasicBlockSectionsProfileReader() : ImmutablePass(ID) { + initializeBasicBlockSectionsProfileReaderPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Basic Block Sections Profile Reader"; + } + + // Returns true if basic block sections profile exist for function \p + // FuncName. + bool isFunctionHot(StringRef FuncName) const; + + // Returns a pair with first element representing whether basic block sections + // profile exist for the function \p FuncName, and the second element + // representing the basic block sections profile (cluster info) for this + // function. If the first element is true and the second element is empty, it + // means unique basic block sections are desired for all basic blocks of the + // function. + std::pair> + getBBClusterInfoForFunction(StringRef FuncName) const; + + /// Read profiles of basic blocks if available here. + void initializePass() override; + +private: + StringRef getAliasName(StringRef FuncName) const { + auto R = FuncAliasMap.find(FuncName); + return R == FuncAliasMap.end() ? FuncName : R->second; + } + + // This contains the basic-block-sections profile. + const MemoryBuffer *MBuf = nullptr; + + // This encapsulates the BB cluster information for the whole program. + // + // For every function name, it contains the cluster information for (all or + // some of) its basic blocks. The cluster information for every basic block + // includes its cluster ID along with the position of the basic block in that + // cluster. + ProgramBBClusterInfoMapTy ProgramBBClusterInfo; + + // Some functions have alias names. We use this map to find the main alias + // name for which we have mapping in ProgramBBClusterInfo. + StringMap FuncAliasMap; +}; + +// Creates a BasicBlockSectionsProfileReader pass to parse the basic block +// sections profile. \p Buf is a memory buffer that contains the list of +// functions and basic block ids to selectively enable basic block sections. +ImmutablePass * +createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf); + +} // namespace llvm +#endif // LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index bfd774f578ac88..6e37d42f0d2944 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -51,10 +51,8 @@ namespace llvm { FunctionPass *createUnreachableBlockEliminationPass(); /// createBasicBlockSections Pass - This pass assigns sections to machine - /// basic blocks and is enabled with -fbasic-block-sections. Buf is a memory - /// buffer that contains the list of functions and basic block ids to - /// selectively enable basic block sections. - MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf); + /// basic blocks and is enabled with -fbasic-block-sections. + MachineFunctionPass *createBasicBlockSectionsPass(); /// createMachineFunctionSplitterPass - This pass splits machine functions /// using profile information. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 41e1d99b536136..ece2f20bd4b86c 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -76,6 +76,7 @@ void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeAttributorLegacyPassPass(PassRegistry&); void initializeAttributorCGSCCLegacyPassPass(PassRegistry &); +void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &); void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBDCELegacyPassPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 295d557cd72713..38ba651d126faf 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -69,25 +69,17 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Target/TargetMachine.h" -using llvm::SmallSet; -using llvm::SmallVector; -using llvm::StringMap; -using llvm::StringRef; using namespace llvm; // Placing the cold clusters in a separate section mitigates against poor @@ -107,41 +99,11 @@ cl::opt BBSectionsDetectSourceDrift( namespace { -// This struct represents the cluster information for a machine basic block. -struct BBClusterInfo { - // MachineBasicBlock ID. - unsigned MBBNumber; - // Cluster ID this basic block belongs to. - unsigned ClusterID; - // Position of basic block within the cluster. - unsigned PositionInCluster; -}; - -using ProgramBBClusterInfoMapTy = StringMap>; - class BasicBlockSections : public MachineFunctionPass { public: static char ID; - // This contains the basic-block-sections profile. - const MemoryBuffer *MBuf = nullptr; - - // This encapsulates the BB cluster information for the whole program. - // - // For every function name, it contains the cluster information for (all or - // some of) its basic blocks. The cluster information for every basic block - // includes its cluster ID along with the position of the basic block in that - // cluster. - ProgramBBClusterInfoMapTy ProgramBBClusterInfo; - - // Some functions have alias names. We use this map to find the main alias - // name for which we have mapping in ProgramBBClusterInfo. - StringMap FuncAliasMap; - - BasicBlockSections(const MemoryBuffer *Buf) - : MachineFunctionPass(ID), MBuf(Buf) { - initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); - }; + BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; BasicBlockSections() : MachineFunctionPass(ID) { initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); @@ -153,9 +115,6 @@ class BasicBlockSections : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override; - /// Read profiles of basic blocks if available here. - bool doInitialization(Module &M) override; - /// Identify basic blocks that need separate sections and prepare to emit them /// accordingly. bool runOnMachineFunction(MachineFunction &MF) override; @@ -205,21 +164,18 @@ static void updateBranches( // This function provides the BBCluster information associated with a function. // Returns true if a valid association exists and false otherwise. -static bool getBBClusterInfoForFunction( - const MachineFunction &MF, const StringMap FuncAliasMap, - const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, +bool getBBClusterInfoForFunction( + const MachineFunction &MF, + BasicBlockSectionsProfileReader *BBSectionsProfileReader, std::vector> &V) { - // Get the main alias name for the function. - auto FuncName = MF.getName(); - auto R = FuncAliasMap.find(FuncName); - StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second; // Find the assoicated cluster information. - auto P = ProgramBBClusterInfo.find(AliasName); - if (P == ProgramBBClusterInfo.end()) + std::pair> P = + BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName()); + if (!P.first) return false; - if (P->second.empty()) { + if (P.second.empty()) { // This indicates that sections are desired for all basic blocks of this // function. We clear the BBClusterInfo vector to denote this. V.clear(); @@ -227,7 +183,7 @@ static bool getBBClusterInfoForFunction( } V.resize(MF.getNumBlockIDs()); - for (auto bbClusterInfo : P->second) { + for (auto bbClusterInfo : P.second) { // Bail out if the cluster information contains invalid MBB numbers. if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs()) return false; @@ -376,9 +332,11 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } + BBSectionsProfileReader = &getAnalysis(); + std::vector> FuncBBClusterInfo; if (BBSectionsType == BasicBlockSection::List && - !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, + !getBBClusterInfoForFunction(MF, BBSectionsProfileReader, FuncBBClusterInfo)) return true; MF.setBBSectionsType(BBSectionsType); @@ -426,107 +384,12 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } -// Basic Block Sections can be enabled for a subset of machine basic blocks. -// This is done by passing a file containing names of functions for which basic -// block sections are desired. Additionally, machine basic block ids of the -// functions can also be specified for a finer granularity. Moreover, a cluster -// of basic blocks could be assigned to the same section. -// A file with basic block sections for all of function main and three blocks -// for function foo (of which 1 and 2 are placed in a cluster) looks like this: -// ---------------------------- -// list.txt: -// !main -// !foo -// !!1 2 -// !!4 -static Error getBBClusterInfo(const MemoryBuffer *MBuf, - ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, - StringMap &FuncAliasMap) { - assert(MBuf); - line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); - - auto invalidProfileError = [&](auto Message) { - return make_error( - Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + - Twine(LineIt.line_number()) + ": " + Message), - inconvertibleErrorCode()); - }; - - auto FI = ProgramBBClusterInfo.end(); - - // Current cluster ID corresponding to this function. - unsigned CurrentCluster = 0; - // Current position in the current cluster. - unsigned CurrentPosition = 0; - - // Temporary set to ensure every basic block ID appears once in the clusters - // of a function. - SmallSet FuncBBIDs; - - for (; !LineIt.is_at_eof(); ++LineIt) { - StringRef S(*LineIt); - if (S[0] == '@') - continue; - // Check for the leading "!" - if (!S.consume_front("!") || S.empty()) - break; - // Check for second "!" which indicates a cluster of basic blocks. - if (S.consume_front("!")) { - if (FI == ProgramBBClusterInfo.end()) - return invalidProfileError( - "Cluster list does not follow a function name specifier."); - SmallVector BBIndexes; - S.split(BBIndexes, ' '); - // Reset current cluster position. - CurrentPosition = 0; - for (auto BBIndexStr : BBIndexes) { - unsigned long long BBIndex; - if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) - return invalidProfileError(Twine("Unsigned integer expected: '") + - BBIndexStr + "'."); - if (!FuncBBIDs.insert(BBIndex).second) - return invalidProfileError(Twine("Duplicate basic block id found '") + - BBIndexStr + "'."); - if (!BBIndex && CurrentPosition) - return invalidProfileError("Entry BB (0) does not begin a cluster."); - - FI->second.emplace_back(BBClusterInfo{ - ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); - } - CurrentCluster++; - } else { // This is a function name specifier. - // Function aliases are separated using '/'. We use the first function - // name for the cluster info mapping and delegate all other aliases to - // this one. - SmallVector Aliases; - S.split(Aliases, '/'); - for (size_t i = 1; i < Aliases.size(); ++i) - FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); - - // Prepare for parsing clusters of this function name. - // Start a new cluster map for this function name. - FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; - CurrentCluster = 0; - FuncBBIDs.clear(); - } - } - return Error::success(); -} - -bool BasicBlockSections::doInitialization(Module &M) { - if (!MBuf) - return false; - if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) - report_fatal_error(std::move(Err)); - return false; -} - void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } -MachineFunctionPass * -llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) { - return new BasicBlockSections(Buf); +MachineFunctionPass *llvm::createBasicBlockSectionsPass() { + return new BasicBlockSections(); } diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp new file mode 100644 index 00000000000000..c2acf115998b63 --- /dev/null +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -0,0 +1,144 @@ +//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the basic block sections profile reader pass. It parses +// and stores the basic block sections profile file (which is specified via the +// `-basic-block-sections` flag). +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +char BasicBlockSectionsProfileReader::ID = 0; +INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", + "Reads and parses a basic block sections profile.", false, + false) + +bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { + return getBBClusterInfoForFunction(FuncName).first; +} + +std::pair> +BasicBlockSectionsProfileReader::getBBClusterInfoForFunction( + StringRef FuncName) const { + std::pair> cluster_info(false, {}); + auto R = ProgramBBClusterInfo.find(getAliasName(FuncName)); + if (R != ProgramBBClusterInfo.end()) { + cluster_info.second = R->second; + cluster_info.first = true; + } + return cluster_info; +} + +// Basic Block Sections can be enabled for a subset of machine basic blocks. +// This is done by passing a file containing names of functions for which basic +// block sections are desired. Additionally, machine basic block ids of the +// functions can also be specified for a finer granularity. Moreover, a cluster +// of basic blocks could be assigned to the same section. +// A file with basic block sections for all of function main and three blocks +// for function foo (of which 1 and 2 are placed in a cluster) looks like this: +// ---------------------------- +// list.txt: +// !main +// !foo +// !!1 2 +// !!4 +static Error getBBClusterInfo(const MemoryBuffer *MBuf, + ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, + StringMap &FuncAliasMap) { + assert(MBuf); + line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); + + auto invalidProfileError = [&](auto Message) { + return make_error( + Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + + Twine(LineIt.line_number()) + ": " + Message), + inconvertibleErrorCode()); + }; + + auto FI = ProgramBBClusterInfo.end(); + + // Current cluster ID corresponding to this function. + unsigned CurrentCluster = 0; + // Current position in the current cluster. + unsigned CurrentPosition = 0; + + // Temporary set to ensure every basic block ID appears once in the clusters + // of a function. + SmallSet FuncBBIDs; + + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef S(*LineIt); + if (S[0] == '@') + continue; + // Check for the leading "!" + if (!S.consume_front("!") || S.empty()) + break; + // Check for second "!" which indicates a cluster of basic blocks. + if (S.consume_front("!")) { + if (FI == ProgramBBClusterInfo.end()) + return invalidProfileError( + "Cluster list does not follow a function name specifier."); + SmallVector BBIndexes; + S.split(BBIndexes, ' '); + // Reset current cluster position. + CurrentPosition = 0; + for (auto BBIndexStr : BBIndexes) { + unsigned long long BBIndex; + if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) + return invalidProfileError(Twine("Unsigned integer expected: '") + + BBIndexStr + "'."); + if (!FuncBBIDs.insert(BBIndex).second) + return invalidProfileError(Twine("Duplicate basic block id found '") + + BBIndexStr + "'."); + if (!BBIndex && CurrentPosition) + return invalidProfileError("Entry BB (0) does not begin a cluster."); + + FI->second.emplace_back(BBClusterInfo{ + ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); + } + CurrentCluster++; + } else { // This is a function name specifier. + // Function aliases are separated using '/'. We use the first function + // name for the cluster info mapping and delegate all other aliases to + // this one. + SmallVector Aliases; + S.split(Aliases, '/'); + for (size_t i = 1; i < Aliases.size(); ++i) + FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); + + // Prepare for parsing clusters of this function name. + // Start a new cluster map for this function name. + FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; + CurrentCluster = 0; + FuncBBIDs.clear(); + } + } + return Error::success(); +} + +void BasicBlockSectionsProfileReader::initializePass() { + if (!MBuf) + return; + if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) + report_fatal_error(std::move(Err)); +} + +ImmutablePass * +llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) { + return new BasicBlockSectionsProfileReader(Buf); +} diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 6fb6322dfb9832..2ef8380d578eda 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_component_library(LLVMCodeGen BranchRelaxation.cpp BreakFalseDeps.cpp BasicBlockSections.cpp + BasicBlockSectionsProfileReader.cpp CalcSpillWeights.cpp CallingConvLower.cpp CFGuardLongjmp.cpp diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index c71ca729391c80..da3d40bc38ad4b 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -186,6 +187,15 @@ static cl::opt ProfileUnknownInSpecialSection( "to handle it in a different way than .text section, to save " "RAM for example. ")); +static cl::opt BBSectionsGuidedSectionPrefix( + "bbsections-guided-section-prefix", cl::Hidden, cl::init(true), + cl::desc("Use the basic-block-sections profile to determine the text " + "section prefix for hot functions. Functions with " + "basic-block-sections profile will be placed in `.text.hot` " + "regardless of their FDO profile info. Other functions won't be " + "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " + "profiles.")); + static cl::opt FreqRatioToSkipMerge( "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " @@ -272,6 +282,7 @@ class TypePromotionTransaction; const TargetLowering *TLI = nullptr; const TargetRegisterInfo *TRI; const TargetTransformInfo *TTI = nullptr; + const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; std::unique_ptr BFI; @@ -347,6 +358,7 @@ class TypePromotionTransaction; AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addUsedIfAvailable(); } private: @@ -442,6 +454,7 @@ char CodeGenPrepare::ID = 0; INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -473,8 +486,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); PSI = &getAnalysis().getPSI(); + BBSectionsProfileReader = + getAnalysisIfAvailable(); OptSize = F.hasOptSize(); - if (ProfileGuidedSectionPrefix) { + // Use the basic-block-sections profile to promote hot functions to .text.hot if requested. + if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader && + BBSectionsProfileReader->isFunctionHot(F.getName())) { + F.setSectionPrefix("hot"); + } else if (ProfileGuidedSectionPrefix) { // The hot attribute overwrites profile count based hotness while profile // counts based hotness overwrite the cold attribute. // This is a conservative behabvior. diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index be255c83a95975..d659a44c183743 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePassRegistry.h" @@ -1284,7 +1285,11 @@ void TargetPassConfig::addMachinePasses() { // FIXME: In principle, BasicBlockSection::Labels and splitting can used // together. Update this check once we have addressed any issues. if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { - addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf())); + if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { + addPass(llvm::createBasicBlockSectionsProfileReaderPass( + TM->getBBSectionsFuncListBuf())); + } + addPass(llvm::createBasicBlockSectionsPass()); } else if (TM->Options.EnableMachineFunctionSplitter || EnableMachineFunctionSplitter) { addPass(createMachineFunctionSplitterPass()); diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cold.ll b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll index be016df7061b52..c635b73a45b986 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-cold.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll @@ -24,14 +24,14 @@ declare i32 @_Z3barv() #1 declare i32 @_Z3foov() #1 -; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: ; Check that the basic block with id 1 doesn't get a section. -; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.1,"ax",@progbits,unique +; LINUX-SECTIONS-NOT: .section .text{{.*}}._Z3bazb.1,"ax",@progbits,unique ; Check that a single cold section is started here and id 1 and 2 blocks are placed here. ; LINUX-SECTIONS: .section .text.split._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb.cold: -; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.2,"ax",@progbits,unique +; LINUX-SECTIONS-NOT: .section .text.hot._Z3bazb._Z3bazb.2,"ax",@progbits,unique ; LINUX-SECTIONS: .LBB0_2: ; LINUX-SECTIONS: .size _Z3bazb, .Lfunc_end{{[0-9]}}-_Z3bazb diff --git a/llvm/test/CodeGen/X86/basic-block-sections-list.ll b/llvm/test/CodeGen/X86/basic-block-sections-list.ll index d2408a0b3e4b38..f9f7f8892d194d 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-list.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-list.ll @@ -1,7 +1,8 @@ ; Check the basic block sections list option. ; RUN: echo '!_Z3foob' > %t ; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-FUNCTION-SECTION -; llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-NO-FUNCTION-SECTION +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-NO-FUNCTION-SECTION +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names --bbsections-guided-section-prefix=false | FileCheck %s -check-prefix=LINUX-SECTIONS-NO-GUIDED-PREFIX define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 @@ -58,17 +59,18 @@ define i32 @_Z3zipb(i1 zeroext %0) nounwind { ret i32 %14 } -; LINUX-SECTIONS: .section .text._Z3foob,"ax",@progbits +; LINUX-SECTIONS-NO-GUIDED-PREFIX: .section .text._Z3foob,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3foob,"ax",@progbits ; LINUX-SECTIONS: _Z3foob: -; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.__part.1,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3foob._Z3foob.__part.1,"ax",@progbits ; LINUX-SECTIONS: _Z3foob.__part.1: -; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.__part.2,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3foob._Z3foob.__part.2,"ax",@progbits ; LINUX-SECTIONS: _Z3foob.__part.2: -; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.__part.3,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3foob._Z3foob.__part.3,"ax",@progbits ; LINUX-SECTIONS: _Z3foob.__part.3: -; LINUX-SECTIONS-FUNCTION-SECTION: .section .text._Z3zipb,"ax",@progbits -; LINUX-SECIONS-NO-FUNCTION-SECTION-NOT: .section .text._Z3zipb,"ax",@progbits +; LINUX-SECTIONS-FUNCTION-SECTION: .section .text._Z3zipb,"ax",@progbits +; LINUX-SECTIONS-NO-FUNCTION-SECTION-NOT: .section .text{{.*}}._Z3zipb,"ax",@progbits ; LINUX-SECTIONS: _Z3zipb: -; LINUX-SECTIONS-NOT: .section .text._Z3zipb._Z3zipb.__part.{{[0-9]+}},"ax",@progbits +; LINUX-SECTIONS-NOT: .section .text{{.*}}._Z3zipb.__part.{{[0-9]+}},"ax",@progbits ; LINUX-SECTIONS-NOT: _Z3zipb.__part.{{[0-9]+}}: diff --git a/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll b/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll index fd6522236987d9..32288095361fef 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll @@ -30,17 +30,17 @@ declare i32 @_Z3foov() #1 ; Check that the correct block is found using the call insts for foo and bar. ; -; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: ; Check that the basic block with id 1 doesn't get a section. -; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.__part.{{[0-9]+}},"ax",@progbits +; LINUX-SECTIONS-NOT: .section .text{{.*}}._Z3bazb.__part.{{[0-9]+}},"ax",@progbits ; LINUX-SECTIONS-LABEL: # %bb.1: ; LINUX-SECTIONS-NEXT: callq _Z3barv -; LINUX-SECTIONS: .section .text._Z3bazb.[[SECTION_LABEL:_Z3bazb.__part.[0-9]+]],"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3bazb.[[SECTION_LABEL:_Z3bazb.__part.[0-9]+]],"ax",@progbits ; LINUX-SECTIONS-NEXT: [[SECTION_LABEL]]: ; LINUX-SECTIONS-NEXT: callq _Z3foov ; LINUX-SECTIONS: .LBB_END0_2: ; LINUX-SECTIONS-NEXT: .size [[SECTION_LABEL]], .LBB_END0_2-[[SECTION_LABEL]] -; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits +; LINUX-SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: .Lfunc_end0: ; LINUX-SECTIONS-NEXT: .size _Z3bazb, .Lfunc_end0-_Z3bazb diff --git a/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll b/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll index 7c5bddedf3ec5c..d481b147662dce 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-source-drift.ll @@ -15,5 +15,5 @@ define dso_local i32 @foo(i1 zeroext %0, i1 zeroext %1) !annotation !1 { !1 = !{!"instr_prof_hash_mismatch"} -; SOURCE-DRIFT-NOT: .section .text -; HASH-CHECK-DISABLED: .section .text +; SOURCE-DRIFT-NOT: .section .text{{.*}}.foo +; HASH-CHECK-DISABLED: .section .text.hot.foo From 827fa2c419617b40b639995ccfd43896cefd4a3c Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 27 May 2022 02:54:55 +0000 Subject: [PATCH 669/908] [gn build] Port 08cc05851872 --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index 42a221989352fa..f278e3b9977b37 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -22,6 +22,7 @@ static_library("CodeGen") { "Analysis.cpp", "AtomicExpandPass.cpp", "BasicBlockSections.cpp", + "BasicBlockSectionsProfileReader.cpp", "BasicTargetTransformInfo.cpp", "BranchFolding.cpp", "BranchRelaxation.cpp", From 9b8ca3c1f18c6c906c8ba8d5ac7a082f2d3b5bd4 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Tue, 24 May 2022 08:13:11 +0000 Subject: [PATCH 670/908] [libc] Add global stdout and stderr objects. They are added as entrypoint object targets. The header-gen infrastructure has been extended to enable handling standard required global objects. The libc-api-test has also been extended to verify the global object declarations. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D126329 --- libc/config/linux/api.td | 4 ++++ libc/config/linux/x86_64/entrypoints.txt | 2 ++ libc/config/public_api.td | 1 + libc/spec/spec.td | 9 +++++++- libc/spec/stdc.td | 15 +++++++++++- libc/src/__support/File/file.h | 3 +++ libc/src/__support/File/linux_file.cpp | 14 +++++++++++ libc/src/stdio/CMakeLists.txt | 23 +++++++++++++++++++ libc/src/stdio/stderr.cpp | 5 ++++ libc/src/stdio/stderr.h | 9 ++++++++ libc/src/stdio/stdout.cpp | 5 ++++ libc/src/stdio/stdout.h | 9 ++++++++ libc/test/src/CMakeLists.txt | 2 ++ .../src/__support/File/platform_file_test.cpp | 5 ++++ .../PrototypeTestGen/PrototypeTestGen.cpp | 22 ++++++++++++++++++ libc/utils/HdrGen/PublicAPICommand.cpp | 9 ++++++++ libc/utils/LibcTableGenUtil/APIIndexer.cpp | 11 +++++++++ libc/utils/LibcTableGenUtil/APIIndexer.h | 3 +++ 18 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 libc/src/stdio/stderr.cpp create mode 100644 libc/src/stdio/stderr.h create mode 100644 libc/src/stdio/stdout.cpp create mode 100644 libc/src/stdio/stdout.h diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 6862139d66c31f..f8ca32133eea85 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -147,6 +147,10 @@ def StringAPI : PublicAPI<"string.h"> { } def StdIOAPI : PublicAPI<"stdio.h"> { + let Macros = [ + SimpleMacroDef<"stderr", "stderr">, + SimpleMacroDef<"stdout", "stdout">, + ]; let Types = ["size_t", "FILE", "cookie_io_functions_t"]; } diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 99937c7ebe5927..300d94761f7012 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -276,6 +276,8 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.fwrite_unlocked libc.src.stdio.sprintf libc.src.stdio.snprintf + libc.src.stdio.stderr + libc.src.stdio.stdout # signal.h entrypoints # TODO: Enable signal.h entrypoints after fixing signal.h diff --git a/libc/config/public_api.td b/libc/config/public_api.td index 6d2f534f5a593a..1b34506c643c32 100644 --- a/libc/config/public_api.td +++ b/libc/config/public_api.td @@ -22,4 +22,5 @@ class PublicAPI { list Enumerations = []; list Structs = []; list Functions = []; + list Objects = []; } diff --git a/libc/spec/spec.td b/libc/spec/spec.td index fe5b2005650d42..d609f8e04ea3e7 100644 --- a/libc/spec/spec.td +++ b/libc/spec/spec.td @@ -138,16 +138,23 @@ class FunctionSpec args> { list Args = args; } +class ObjectSpec { + string Name = name; + string Type = type; +} + class HeaderSpec macros = [], list types = [], list enumerations = [], - list functions = []> { + list functions = [], + list objects = []> { string Name = name; list Functions = functions; list Types = types; list Macros = macros; list Enumerations = enumerations; + list Objects = objects; } class StandardSpec { diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index aa09bc36a25da6..f173e4b54eeab4 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -471,7 +471,10 @@ def StdC : StandardSpec<"stdc"> { HeaderSpec StdIO = HeaderSpec< "stdio.h", - [], // Macros + [ + Macro<"stderr">, + Macro<"stdout">, + ], // Macros [ // Types SizeTType, FILE, @@ -560,6 +563,16 @@ def StdC : StandardSpec<"stdc"> { ArgSpec, ArgSpec] >, + ], + [ + ObjectSpec< + "stdout", + "FILE *" + >, + ObjectSpec< + "stderr", + "FILE *" + >, ] >; diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h index a62aa7f2e72d3a..49a5779c2f3220 100644 --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -223,6 +223,9 @@ class File { // library. File *openfile(const char *path, const char *mode); +extern File *stdout; +extern File *stderr; + } // namespace __llvm_libc #endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_FILE_H diff --git a/libc/src/__support/File/linux_file.cpp b/libc/src/__support/File/linux_file.cpp index b1a58d94e7207a..fc706c2e7ac115 100644 --- a/libc/src/__support/File/linux_file.cpp +++ b/libc/src/__support/File/linux_file.cpp @@ -165,4 +165,18 @@ File *openfile(const char *path, const char *mode) { return file; } +// TODO: Use the appropriate buffering modes for the standard streams below +// the different buffering modes are available. +constexpr size_t STDOUT_BUFFER_SIZE = 1024; +char stdout_buffer[STDOUT_BUFFER_SIZE]; +static LinuxFile StdOut(1, stdout_buffer, STDOUT_BUFFER_SIZE, 0, false, + File::ModeFlags(File::OpenMode::APPEND)); +File *stdout = &StdOut; + +constexpr size_t STDERR_BUFFER_SIZE = 1024; +char stderr_buffer[STDERR_BUFFER_SIZE]; +static LinuxFile StdErr(2, stderr_buffer, STDERR_BUFFER_SIZE, 0, false, + File::ModeFlags(File::OpenMode::APPEND)); +File *stderr = &StdErr; + } // namespace __llvm_libc diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 2abb5091b1cb14..f4684c6b371f4e 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -203,6 +203,29 @@ add_entrypoint_object( libc.src.__support.File.file ) +add_entrypoint_object( + stdout + SRCS + stdout.cpp + HDRS + stdout.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + +add_entrypoint_object( + stderr + SRCS + stderr.cpp + HDRS + stderr.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) add_entrypoint_object( sprintf diff --git a/libc/src/stdio/stderr.cpp b/libc/src/stdio/stderr.cpp new file mode 100644 index 00000000000000..60e9d94d7ecd47 --- /dev/null +++ b/libc/src/stdio/stderr.cpp @@ -0,0 +1,5 @@ +#include "src/__support/File/file.h" + +#include + +extern FILE *stderr = reinterpret_cast(__llvm_libc::stderr); diff --git a/libc/src/stdio/stderr.h b/libc/src/stdio/stderr.h new file mode 100644 index 00000000000000..ee4e6bfb6a2b06 --- /dev/null +++ b/libc/src/stdio/stderr.h @@ -0,0 +1,9 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#error "Do not include this file. Instead include __support/File/file.h." diff --git a/libc/src/stdio/stdout.cpp b/libc/src/stdio/stdout.cpp new file mode 100644 index 00000000000000..66fed5d0c35d29 --- /dev/null +++ b/libc/src/stdio/stdout.cpp @@ -0,0 +1,5 @@ +#include "src/__support/File/file.h" + +#include + +extern FILE *stdout = reinterpret_cast(__llvm_libc::stdout); diff --git a/libc/src/stdio/stdout.h b/libc/src/stdio/stdout.h new file mode 100644 index 00000000000000..ee4e6bfb6a2b06 --- /dev/null +++ b/libc/src/stdio/stdout.h @@ -0,0 +1,9 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#error "Do not include this file. Instead include __support/File/file.h." diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index 49f7bc0774dcf8..50ccc9e68b633f 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -96,6 +96,8 @@ set_target_properties( PROPERTIES INCLUDE_DIRECTORIES "" ) +target_link_libraries(libc-api-test llvmlibc) + # Only include we need is the include for cpp::IsSame and our generated # public headers. target_include_directories( diff --git a/libc/test/src/__support/File/platform_file_test.cpp b/libc/test/src/__support/File/platform_file_test.cpp index 0ad0efc19cf593..fe04b6f643e5a9 100644 --- a/libc/test/src/__support/File/platform_file_test.cpp +++ b/libc/test/src/__support/File/platform_file_test.cpp @@ -195,3 +195,8 @@ TEST(LlvmLibcPlatformFileTest, IncorrectOperation) { ASSERT_TRUE(file->error()); ASSERT_EQ(file->close(), 0); } + +TEST(LlvmLibcPlatformFileTest, StdOutStdErrSmokeTest) { + EXPECT_FALSE(__llvm_libc::stdout == nullptr); + EXPECT_FALSE(__llvm_libc::stderr == nullptr); +} diff --git a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp index e6b4353ca8d24b..ca35dde7237d19 100644 --- a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp +++ b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp @@ -28,6 +28,12 @@ bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { for (const auto &entrypoint : EntrypointNamesOption) { auto match = G.FunctionToHeaderMap.find(entrypoint); if (match == G.FunctionToHeaderMap.end()) { + auto objectMatch = G.ObjectToHeaderMap.find(entrypoint); + if (objectMatch != G.ObjectToHeaderMap.end()) { + headerFileSet.insert(objectMatch->second); + continue; + } + llvm::errs() << "ERROR: entrypoint '" << entrypoint << "' could not be found in spec in any public header\n"; return true; @@ -43,6 +49,17 @@ bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { for (const auto &entrypoint : EntrypointNamesOption) { auto match = G.FunctionSpecMap.find(entrypoint); if (match == G.FunctionSpecMap.end()) { + auto objectMatch = G.ObjectSpecMap.find(entrypoint); + if (objectMatch != G.ObjectSpecMap.end()) { + auto entrypointPtr = entrypoint + "_ptr"; + llvm::Record *objectSpec = G.ObjectSpecMap[entrypoint]; + auto objectType = objectSpec->getValueAsString("Type"); + // We just make sure that the global object is present. + OS << " " << objectType << " *" << entrypointPtr << " = &" + << entrypoint << ";\n"; + OS << " ++" << entrypointPtr << ";\n"; // To avoid unused var warning. + continue; + } llvm::errs() << "ERROR: entrypoint '" << entrypoint << "' could not be found in spec in any public header\n"; return true; @@ -74,6 +91,11 @@ bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { OS << " return 0;\n"; OS << "}\n\n"; + // We provide dummy malloc and free implementations to support the case + // when LLVM libc does to include them. + OS << "void *malloc(size_t) { return nullptr; }\n"; + OS << "void free(void *) {}\n"; + return false; } diff --git a/libc/utils/HdrGen/PublicAPICommand.cpp b/libc/utils/HdrGen/PublicAPICommand.cpp index 8f036f7a6d56e5..e1e8f4ee42b9ec 100644 --- a/libc/utils/HdrGen/PublicAPICommand.cpp +++ b/libc/utils/HdrGen/PublicAPICommand.cpp @@ -114,6 +114,15 @@ void writeAPIFromIndex(APIIndexer &G, OS << ");\n\n"; } + + // Make another pass over entrypoints to emit object declarations. + for (const auto &Name : EntrypointNameList) { + if (G.ObjectSpecMap.find(Name) == G.ObjectSpecMap.end()) + continue; + llvm::Record *ObjectSpec = G.ObjectSpecMap[Name]; + auto Type = ObjectSpec->getValueAsString("Type"); + OS << "extern " << Type << " " << Name << ";\n"; + } OS << "__END_C_DECLS\n"; } diff --git a/libc/utils/LibcTableGenUtil/APIIndexer.cpp b/libc/utils/LibcTableGenUtil/APIIndexer.cpp index fd3f53c4150fce..2630797517f77a 100644 --- a/libc/utils/LibcTableGenUtil/APIIndexer.cpp +++ b/libc/utils/LibcTableGenUtil/APIIndexer.cpp @@ -108,6 +108,13 @@ void APIIndexer::indexStandardSpecDef(llvm::Record *StandardSpec) { EnumerationSpecMap[std::string( EnumerationSpec->getValueAsString("Name"))] = EnumerationSpec; } + + auto ObjectSpecList = HeaderSpec->getValueAsListOfDefs("Objects"); + for (llvm::Record *ObjectSpec : ObjectSpecList) { + auto ObjectName = std::string(ObjectSpec->getValueAsString("Name")); + ObjectSpecMap[ObjectName] = ObjectSpec; + ObjectToHeaderMap[ObjectName] = std::string(Header); + } } } } @@ -135,6 +142,10 @@ void APIIndexer::indexPublicAPIDef(llvm::Record *PublicAPI) { auto EnumerationList = PublicAPI->getValueAsListOfStrings("Enumerations"); for (llvm::StringRef EnumerationName : EnumerationList) Enumerations.insert(std::string(EnumerationName)); + + auto ObjectList = PublicAPI->getValueAsListOfStrings("Objects"); + for (llvm::StringRef ObjectName : ObjectList) + Objects.insert(std::string(ObjectName)); } void APIIndexer::index(llvm::RecordKeeper &Records) { diff --git a/libc/utils/LibcTableGenUtil/APIIndexer.h b/libc/utils/LibcTableGenUtil/APIIndexer.h index cbb7ac2e66dcfc..26c813075a7ba7 100644 --- a/libc/utils/LibcTableGenUtil/APIIndexer.h +++ b/libc/utils/LibcTableGenUtil/APIIndexer.h @@ -63,13 +63,16 @@ class APIIndexer { NameToRecordMapping EnumerationSpecMap; NameToRecordMapping FunctionSpecMap; NameToRecordMapping MacroDefsMap; + NameToRecordMapping ObjectSpecMap; std::unordered_map FunctionToHeaderMap; + std::unordered_map ObjectToHeaderMap; NameSet RequiredTypes; NameSet Structs; NameSet Enumerations; NameSet Functions; + NameSet Objects; NameSet PublicHeaders; std::string getTypeAsString(llvm::Record *TypeRecord); From 121689a62e5b86cce0c5474f902cb24ff6dd57c5 Mon Sep 17 00:00:00 2001 From: Ping Deng Date: Fri, 27 May 2022 05:45:17 +0000 Subject: [PATCH 671/908] [SelectionDAG][NFC] Simplify integer promotion in setcc/vp.setcc Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D126516 --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8a04b7f61e58a0..686de3933c5f9b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1949,10 +1949,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast(N->getOperand(2))->get()); // The CC (#2) is always legal. - if (N->getNumOperands() == 3) + if (N->getOpcode() == ISD::SETCC) return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); - assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), From 842e48bd65770f0368ee0d1ca7ee34dbd8f0ff03 Mon Sep 17 00:00:00 2001 From: Piggy NL Date: Thu, 26 May 2022 22:59:49 +0800 Subject: [PATCH 672/908] [demangler][RISCV] Fix for long double Summary: The size of long double in RISCV (both RV32 and RV64) is 16 bytes, thus the mangled_size shouble be 32. This patch will fix test case "_ZN5test01hIfEEvRAcvjplstT_Le4001a000000000000000E_c" in test_demangle.pass.cpp, which is expected to be invalid but demangler returned "void test0::h(char (&) [(unsigned int)((sizeof (float)) + (0x0.000000004001ap-16382L))])" in RISCV environment without this patch. Reviewed By: urnathan Differential Revision: https://reviews.llvm.org/D126480 --- libcxxabi/src/demangle/ItaniumDemangle.h | 2 +- llvm/include/llvm/Demangle/ItaniumDemangle.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index beece93b21d41b..e3f0c6dfecdc29 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -5099,7 +5099,7 @@ template <> struct FloatData { #if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ - defined(__wasm__) + defined(__wasm__) || defined(__riscv) static const size_t mangled_size = 32; #elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) static const size_t mangled_size = 16; diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 10210b950d2f9d..959632f13e1e23 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -5099,7 +5099,7 @@ template <> struct FloatData { #if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ - defined(__wasm__) + defined(__wasm__) || defined(__riscv) static const size_t mangled_size = 32; #elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) static const size_t mangled_size = 16; From e251fb4cdeb723eff5507be1efeb3fc69f7a2ce7 Mon Sep 17 00:00:00 2001 From: Pavel Samolysov Date: Thu, 26 May 2022 17:03:08 +0300 Subject: [PATCH 673/908] [libunwind][CMake] Fix name of LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG The CMake variable LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG has been renamed into C_SUPPORTS_NODEFAULTLIBS_FLAG because the last one is used in the confix-ix.cmake file while the variable with the original name is not used at al. Differential Revision: https://reviews.llvm.org/D126466 --- libunwind/cmake/config-ix.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake index f716532fdf1b5a..c9b65b3cefc082 100644 --- a/libunwind/cmake/config-ix.cmake +++ b/libunwind/cmake/config-ix.cmake @@ -35,7 +35,7 @@ llvm_check_compiler_linker_flag(C "-nostdlib++" CXX_SUPPORTS_NOSTDLIBXX_FLAG) if (CXX_SUPPORTS_NOSTDLIBXX_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++") else() - llvm_check_compiler_linker_flag(C "-nodefaultlibs" LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG) + llvm_check_compiler_linker_flag(C "-nodefaultlibs" C_SUPPORTS_NODEFAULTLIBS_FLAG) if (C_SUPPORTS_NODEFAULTLIBS_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs") endif() From a84026821bf65b312c6752e6d4318419ab3a4f31 Mon Sep 17 00:00:00 2001 From: "Liqin.Weng" Date: Fri, 27 May 2022 06:27:26 +0000 Subject: [PATCH 674/908] [RISCV] Add test for experimental.vector.reverse ``` void vector_reverse_i64(int *A, int *B, int n) { #pragma clang loop vectorize_width(4, scalable) for (int i = n-1; i >= 0; i--) A[i] = B[i] + 1; } ``` When option: scalable-vectorization is on (or set #pragma clang loop vectorize_width(elements, scalable)), Reverse Iterators can't loop vectorization as Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D125866 --- .../RISCV/riscv-vector-reverse.ll | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll new file mode 100644 index 00000000000000..e01ecd5632e3d1 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -0,0 +1,94 @@ +; This is the loop in c++ being vectorize in this file with +;experimental.vector.reverse +; #pragma clang loop vectorize_width(4, scalable) +; for (int i = N-1; i >= 0; --i) +; a[i] = b[i] + 1.0; + +; REQUIRES: asserts +; RUN: opt -loop-vectorize -dce -instcombine -mtriple riscv64-linux-gnu \ +; RUN: -mattr=+v -debug-only=loop-vectorize -scalable-vectorization=on \ +; RUN: -riscv-v-vector-bits-min=128 -S < %s 2>&1 | FileCheck %s + +; CHECK-LABEL: vector_reverse_i64 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %{{.*}} = load i32, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: store i32 %{{.*}}, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %{{.*}} = load i32, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: store i32 %{{.*}}, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %{{.*}} = load i32, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store i32 %{{.*}}, ptr %{{.*}}, align 4 +; CHECK: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load %1 = load i32, ptr %arrayidx, align 4 +; CHECK: remark: :0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load +; CHECK: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store store i32 %add9, ptr %arrayidx3, align 4 +; CHECK: remark: :0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store +; CHECK: LV: Selecting VF: 4. +define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocapture noundef readonly %B, i32 noundef signext %n) { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %0 = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] + %i.0 = add nsw i32 %i.0.in8, -1 + %idxprom = zext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + %add9 = add i32 %1, 1 + %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom + store i32 %add9, ptr %arrayidx3, align 4 + %cmp = icmp ugt i64 %indvars.iv, 1 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0 +} + +; CHECK-LABEL: vector_reverse_f32 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %{{.*}} = load float, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: store float %{{.*}}, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %{{.*}} = load float, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: store float %{{.*}}, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %{{.*}} = load float, ptr %{{.*}}, align 4 +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store float %{{.*}}, ptr %{{.*}}, align 4 +; CHECK: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load %1 = load float, ptr %arrayidx, align 4 +; CHECK: remark: :0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load +; CHECK: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store store float %conv1, ptr %arrayidx3, align 4 +; CHECK: remark: :0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store +; CHECK: LV: Selecting VF: 4. +define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocapture noundef readonly %B, i32 noundef signext %n) { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %0 = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] + %i.0 = add nsw i32 %i.0.in8, -1 + %idxprom = zext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom + %1 = load float, ptr %arrayidx, align 4 + %conv1 = fadd float %1, 1.000000e+00 + %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom + store float %conv1, ptr %arrayidx3, align 4 + %cmp = icmp ugt i64 %indvars.iv, 1 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0 +} + +!0 = distinct !{!0, !1, !2, !3, !4} +!1 = !{!"llvm.loop.mustprogress"} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.enable", i1 true} From 0252357b3e1e8f6d3bc51ac6d7ac075842b2c956 Mon Sep 17 00:00:00 2001 From: Alexander Batashev Date: Fri, 27 May 2022 09:23:27 +0300 Subject: [PATCH 675/908] [mlir][LLVM] Add support for Calling Convention in LLVMFuncOp This patch adds support for Calling Convention attribute in LLVM dialect, including enums, custom syntax and import from LLVM IR. Additionally fix import of dso_local attribute. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D126161 --- .../mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 9 ++ .../include/mlir/Dialect/LLVMIR/LLVMDialect.h | 1 + .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 8 ++ mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 130 ++++++++++++++++++ mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp | 8 +- .../Conversion/GPUCommon/GPUOpsLowering.cpp | 3 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 49 ++++++- mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 9 +- mlir/test/Dialect/LLVMIR/func.mlir | 30 ++++ mlir/test/Target/LLVMIR/Import/basic.ll | 7 +- .../tools/mlir-tblgen/LLVMIRConversionGen.cpp | 91 ++++++++++++ 11 files changed, 333 insertions(+), 12 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index f5b97002b5a15c..4a095817ab0ceb 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -35,6 +35,15 @@ def LinkageAttr : LLVM_Attr<"Linkage"> { let hasCustomAssemblyFormat = 1; } +// Attribute definition for the LLVM Linkage enum. +def CConvAttr : LLVM_Attr<"CConv"> { + let mnemonic = "cconv"; + let parameters = (ins + "CConv":$CConv + ); + let hasCustomAssemblyFormat = 1; +} + def LoopOptionsAttr : LLVM_Attr<"LoopOptions"> { let mnemonic = "loopopts"; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h index 67709646a22279..d97a267f4aba3c 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h @@ -39,6 +39,7 @@ namespace LLVM { // attribute definition itself. // TODO: this shouldn't be needed after we unify the attribute generation, i.e. // --gen-attr-* and --gen-attrdef-*. +using cconv::CConv; using linkage::Linkage; } // namespace LLVM } // namespace mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index 1b986968f928a4..2fe1130c6bb734 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -234,6 +234,14 @@ class LLVM_EnumAttr cases> : + I64EnumAttr { + string llvmClassName = llvmNS; +} + // For every value in the list, substitutes the value in the place of "$0" in // "pattern" and stores the list of strings as "lst". class ListIntSubst values> { diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 039294a6e200b3..9b77699ee7d8ba 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -67,6 +67,134 @@ def LoopOptionCase : I32EnumAttr< let cppNamespace = "::mlir::LLVM"; } +// These values must match llvm::CallingConv ones. +// See https://llvm.org/doxygen/namespacellvm_1_1CallingConv.html for full list +// of supported calling conventions. +def CConvC : LLVM_EnumAttrCase<"C", "ccc", "C", 0>; +def CConvFast : LLVM_EnumAttrCase<"Fast", "fastcc", "Fast", 8>; +def CConvCold : LLVM_EnumAttrCase<"Cold", "coldcc", "Cold", 9>; +def CConvGHC : LLVM_EnumAttrCase<"GHC", "cc_10", "GHC", 10>; +def CConvHiPE : LLVM_EnumAttrCase<"HiPE", "cc_11", "HiPE", 11>; +def CConvWebKitJS : LLVM_EnumAttrCase<"WebKit_JS", "webkit_jscc", + "WebKit_JS", 12>; +def CConvAnyReg : LLVM_EnumAttrCase<"AnyReg", "anyregcc", "AnyReg", 13>; +def CConvPreserveMost : LLVM_EnumAttrCase<"PreserveMost", "preserve_mostcc", + "PreserveMost", 14>; +def CConvPreserveAll : LLVM_EnumAttrCase<"PreserveAll", "preserve_allcc", + "PreserveAll", 15>; +def CConvSwift : LLVM_EnumAttrCase<"Swift", "swiftcc", "Swift", 16>; +def CConvCXXFastTLS : LLVM_EnumAttrCase<"CXX_FAST_TLS", "cxx_fast_tlscc", + "CXX_FAST_TLS", 17>; +def CConvTail : LLVM_EnumAttrCase<"Tail", "tailcc", "Tail", 18>; +def CConvCFGuard_Check : LLVM_EnumAttrCase<"CFGuard_Check", + "cfguard_checkcc", + "CFGuard_Check", 19>; +def CConvSwiftTail : LLVM_EnumAttrCase<"SwiftTail", "swifttailcc", + "SwiftTail", 20>; +def CConvX86_StdCall : LLVM_EnumAttrCase<"X86_StdCall", "x86_stdcallcc", + "X86_StdCall", 64>; +def CConvX86_FastCall : LLVM_EnumAttrCase<"X86_FastCall", "x86_fastcallcc", + "X86_FastCall", 65>; +def CConvARM_APCS : LLVM_EnumAttrCase<"ARM_APCS", "arm_apcscc", "ARM_APCS", 66>; +def CConvARM_AAPCS : LLVM_EnumAttrCase<"ARM_AAPCS", "arm_aapcscc", "ARM_AAPCS", + 67>; +def CConvARM_AAPCS_VFP : LLVM_EnumAttrCase<"ARM_AAPCS_VFP", "arm_aapcs_vfpcc", + "ARM_AAPCS_VFP", 68>; +def CConvMSP430_INTR : LLVM_EnumAttrCase<"MSP430_INTR", "msp430_intrcc", + "MSP430_INTR", 69>; +def CConvX86_ThisCall : LLVM_EnumAttrCase<"X86_ThisCall", "x86_thiscallcc", + "X86_ThisCall", 70>; +def CConvPTX_Kernel : LLVM_EnumAttrCase<"PTX_Kernel", "ptx_kernelcc", + "PTX_Kernel", 71>; +def CConvPTX_Device : LLVM_EnumAttrCase<"PTX_Device", "ptx_devicecc", + "PTX_Device", 72>; +def CConvSPIR_FUNC : LLVM_EnumAttrCase<"SPIR_FUNC", "spir_funccc", + "SPIR_FUNC", 75>; +def CConvSPIR_KERNEL : LLVM_EnumAttrCase<"SPIR_KERNEL", "spir_kernelcc", + "SPIR_KERNEL", 76>; +def CConvIntel_OCL_BI : LLVM_EnumAttrCase<"Intel_OCL_BI", "intel_ocl_bicc", + "Intel_OCL_BI", 77>; +def CConvX86_64_SysV : LLVM_EnumAttrCase<"X86_64_SysV", "x86_64_sysvcc", + "X86_64_SysV", 78>; +def CConvWin64 : LLVM_EnumAttrCase<"Win64", "win64cc", "Win64", 79>; +def CConvX86_VectorCall : LLVM_EnumAttrCase<"X86_VectorCall", + "x86_vectorcallcc", + "X86_VectorCall", 80>; +def CConvHHVM : LLVM_EnumAttrCase<"HHVM", "hhvmcc", "HHVM", 81>; +def CConvHHVM_C : LLVM_EnumAttrCase<"HHVM_C", "hhvm_ccc", "HHVM_C", 82>; +def CConvX86_INTR : LLVM_EnumAttrCase<"X86_INTR", "x86_intrcc", "X86_INTR", 83>; +def CConvAVR_INTR : LLVM_EnumAttrCase<"AVR_INTR", "avr_intrcc", "AVR_INTR", 84>; +def CConvAVR_SIGNAL : LLVM_EnumAttrCase<"AVR_SIGNAL", "avr_signalcc", + "AVR_SIGNAL", 85>; +def CConvAVR_BUILTIN : LLVM_EnumAttrCase<"AVR_BUILTIN", "avr_builtincc", + "AVR_BUILTIN", 86>; +def CConvAMDGPU_VS : LLVM_EnumAttrCase<"AMDGPU_VS", "amdgpu_vscc", "AMDGPU_VS", + 87>; +def CConvAMDGPU_GS : LLVM_EnumAttrCase<"AMDGPU_GS", "amdgpu_gscc", "AMDGPU_GS", + 88>; +def CConvAMDGPU_PS : LLVM_EnumAttrCase<"AMDGPU_PS", "amdgpu_pscc", "AMDGPU_PS", + 89>; +def CConvAMDGPU_CS : LLVM_EnumAttrCase<"AMDGPU_CS", "amdgpu_cscc", "AMDGPU_CS", + 90>; +def CConvAMDGPU_KERNEL : LLVM_EnumAttrCase<"AMDGPU_KERNEL", "amdgpu_kernelcc", + "AMDGPU_KERNEL", 91>; +def CConvX86_RegCall : LLVM_EnumAttrCase<"X86_RegCall", "x86_regcallcc", + "X86_RegCall", 92>; +def CConvAMDGPU_HS : LLVM_EnumAttrCase<"AMDGPU_HS", "amdgpu_hscc", "AMDGPU_HS", + 93>; +def CConvMSP430_BUILTIN : LLVM_EnumAttrCase<"MSP430_BUILTIN", + "msp430_builtincc", + "MSP430_BUILTIN", 94>; +def CConvAMDGPU_LS : LLVM_EnumAttrCase<"AMDGPU_LS", "amdgpu_lscc", "AMDGPU_LS", + 95>; +def CConvAMDGPU_ES : LLVM_EnumAttrCase<"AMDGPU_ES", "amdgpu_escc", "AMDGPU_ES", + 96>; +def CConvAArch64_VectorCall : LLVM_EnumAttrCase<"AArch64_VectorCall", + "aarch64_vectorcallcc", + "AArch64_VectorCall", 97>; +def CConvAArch64_SVE_VectorCall : LLVM_EnumAttrCase<"AArch64_SVE_VectorCall", + "aarch64_sve_vectorcallcc", + "AArch64_SVE_VectorCall", + 98>; +def CConvWASM_EmscriptenInvoke : LLVM_EnumAttrCase<"WASM_EmscriptenInvoke", + "wasm_emscripten_invokecc", + "WASM_EmscriptenInvoke", 99>; +def CConvAMDGPU_Gfx : LLVM_EnumAttrCase<"AMDGPU_Gfx", "amdgpu_gfxcc", + "AMDGPU_Gfx", 100>; +def CConvM68k_INTR : LLVM_EnumAttrCase<"M68k_INTR", "m68k_intrcc", "M68k_INTR", + 101>; + +def CConvEnum : LLVM_CEnumAttr< + "CConv", + "::llvm::CallingConv", + "Calling Conventions", + [CConvC, CConvFast, CConvCold, CConvGHC, CConvHiPE, CConvWebKitJS, + CConvAnyReg, CConvPreserveMost, CConvPreserveAll, CConvSwift, + CConvCXXFastTLS, CConvTail, CConvCFGuard_Check, CConvSwiftTail, + CConvX86_StdCall, CConvX86_FastCall, CConvARM_APCS, + CConvARM_AAPCS, CConvARM_AAPCS_VFP, CConvMSP430_INTR, CConvX86_ThisCall, + CConvPTX_Kernel, CConvPTX_Device, CConvSPIR_FUNC, CConvSPIR_KERNEL, + CConvIntel_OCL_BI, CConvX86_64_SysV, CConvWin64, CConvX86_VectorCall, + CConvHHVM, CConvHHVM_C, CConvX86_INTR, CConvAVR_INTR, CConvAVR_BUILTIN, + CConvAMDGPU_VS, CConvAMDGPU_GS, CConvAMDGPU_CS, CConvAMDGPU_KERNEL, + CConvX86_RegCall, CConvAMDGPU_HS, CConvMSP430_BUILTIN, CConvAMDGPU_LS, + CConvAMDGPU_ES, CConvAArch64_VectorCall, CConvAArch64_SVE_VectorCall, + CConvWASM_EmscriptenInvoke, CConvAMDGPU_Gfx, CConvM68k_INTR + ]> { + let cppNamespace = "::mlir::LLVM::cconv"; +} + +def CConv : DialectAttr< + LLVM_Dialect, + CPred<"$_self.isa<::mlir::LLVM::CConvAttr>()">, + "LLVM Calling Convention specification"> { + let storageType = "::mlir::LLVM::CConvAttr"; + let returnType = "::mlir::LLVM::cconv::CConv"; + let convertFromStorage = "$_self.getCConv()"; + let constBuilderCall = + "::mlir::LLVM::CConvAttr::get($_builder.getContext(), $0)"; +} + class LLVM_Builder { string llvmBuilder = builder; } @@ -1233,6 +1361,7 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [ TypeAttrOf:$function_type, DefaultValuedAttr:$linkage, UnitAttr:$dso_local, + DefaultValuedAttr:$CConv, OptionalAttr:$personality, OptionalAttr:$garbageCollector, OptionalAttr:$passthrough @@ -1246,6 +1375,7 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [ OpBuilder<(ins "StringRef":$name, "Type":$type, CArg<"Linkage", "Linkage::External">:$linkage, CArg<"bool", "false">:$dsoLocal, + CArg<"CConv", "CConv::C">:$cconv, CArg<"ArrayRef", "{}">:$attrs, CArg<"ArrayRef", "{}">:$argAttrs)> ]; diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp index 3ffd52604fd780..f162f779922f8a 100644 --- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp +++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp @@ -139,7 +139,8 @@ static void wrapForExternalCallers(OpBuilder &rewriter, Location loc, prependResAttrsToArgAttrs(rewriter, attributes, funcOp.getNumArguments()); auto wrapperFuncOp = rewriter.create( loc, llvm::formatv("_mlir_ciface_{0}", funcOp.getName()).str(), - wrapperFuncType, LLVM::Linkage::External, /*dsoLocal*/ false, attributes); + wrapperFuncType, LLVM::Linkage::External, /*dsoLocal*/ false, + /*cconv*/ LLVM::CConv::C, attributes); OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(wrapperFuncOp.addEntryBlock()); @@ -206,7 +207,8 @@ static void wrapExternalFunction(OpBuilder &builder, Location loc, // Create the auxiliary function. auto wrapperFunc = builder.create( loc, llvm::formatv("_mlir_ciface_{0}", funcOp.getName()).str(), - wrapperType, LLVM::Linkage::External, /*dsoLocal*/ false, attributes); + wrapperType, LLVM::Linkage::External, /*dsoLocal*/ false, + /*cconv*/ LLVM::CConv::C, attributes); builder.setInsertionPointToStart(newFuncOp.addEntryBlock()); @@ -345,7 +347,7 @@ struct FuncOpConversionBase : public ConvertOpToLLVMPattern { } auto newFuncOp = rewriter.create( funcOp.getLoc(), funcOp.getName(), llvmType, linkage, - /*dsoLocal*/ false, attributes); + /*dsoLocal*/ false, /*cconv*/ LLVM::CConv::C, attributes); rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(), newFuncOp.end()); if (failed(rewriter.convertRegionTypes(&newFuncOp.getBody(), *typeConverter, diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 6cb5a5bdf2e024..85d1a5234b8f28 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -68,7 +68,8 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr()); auto llvmFuncOp = rewriter.create( gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType, - LLVM::Linkage::External, /*dsoLocal*/ false, attributes); + LLVM::Linkage::External, /*dsoLocal*/ false, /*cconv*/ LLVM::CConv::C, + attributes); { // Insert operations that correspond to converted workgroup and private diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 958142b03d2456..41d0521b981c2f 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -37,6 +37,7 @@ using namespace mlir; using namespace mlir::LLVM; +using mlir::LLVM::cconv::getMaxEnumValForCConv; using mlir::LLVM::linkage::getMaxEnumValForLinkage; #include "mlir/Dialect/LLVMIR/LLVMOpsDialect.cpp.inc" @@ -1821,6 +1822,7 @@ struct EnumTraits {}; REGISTER_ENUM_TYPE(Linkage); REGISTER_ENUM_TYPE(UnnamedAddr); +REGISTER_ENUM_TYPE(CConv); } // namespace /// Parse an enum from the keyword, or default to the provided default value. @@ -2124,7 +2126,8 @@ Block *LLVMFuncOp::addEntryBlock() { void LLVMFuncOp::build(OpBuilder &builder, OperationState &result, StringRef name, Type type, LLVM::Linkage linkage, - bool dsoLocal, ArrayRef attrs, + bool dsoLocal, CConv cconv, + ArrayRef attrs, ArrayRef argAttrs) { result.addRegion(); result.addAttribute(SymbolTable::getSymbolAttrName(), @@ -2133,6 +2136,8 @@ void LLVMFuncOp::build(OpBuilder &builder, OperationState &result, TypeAttr::get(type)); result.addAttribute(getLinkageAttrName(result.name), LinkageAttr::get(builder.getContext(), linkage)); + result.addAttribute(getCConvAttrName(result.name), + CConvAttr::get(builder.getContext(), cconv)); result.attributes.append(attrs.begin(), attrs.end()); if (dsoLocal) result.addAttribute("dso_local", builder.getUnitAttr()); @@ -2185,7 +2190,8 @@ buildLLVMFunctionType(OpAsmParser &parser, SMLoc loc, ArrayRef inputs, // Parses an LLVM function. // -// operation ::= `llvm.func` linkage? function-signature function-attributes? +// operation ::= `llvm.func` linkage? cconv? function-signature +// function-attributes? // function-body // ParseResult LLVMFuncOp::parse(OpAsmParser &parser, OperationState &result) { @@ -2196,6 +2202,12 @@ ParseResult LLVMFuncOp::parse(OpAsmParser &parser, OperationState &result) { parseOptionalLLVMKeyword( parser, result, LLVM::Linkage::External))); + // Default to C Calling Convention if no keyword is provided. + result.addAttribute( + getCConvAttrName(result.name), + CConvAttr::get(parser.getContext(), parseOptionalLLVMKeyword( + parser, result, LLVM::CConv::C))); + StringAttr nameAttr; SmallVector entryArgs; SmallVector resultAttrs; @@ -2239,6 +2251,9 @@ void LLVMFuncOp::print(OpAsmPrinter &p) { p << ' '; if (getLinkage() != LLVM::Linkage::External) p << stringifyLinkage(getLinkage()) << ' '; + if (getCConv() != LLVM::CConv::C) + p << stringifyCConv(getCConv()) << ' '; + p.printSymbolName(getName()); LLVMFunctionType fnType = getFunctionType(); @@ -2255,7 +2270,8 @@ void LLVMFuncOp::print(OpAsmPrinter &p) { function_interface_impl::printFunctionSignature(p, *this, argTypes, isVarArg(), resTypes); function_interface_impl::printFunctionAttributes( - p, *this, argTypes.size(), resTypes.size(), {getLinkageAttrName()}); + p, *this, argTypes.size(), resTypes.size(), + {getLinkageAttrName(), getCConvAttrName()}); // Print the body if this is not an external function. Region &body = getBody(); @@ -2645,7 +2661,7 @@ OpFoldResult LLVM::GEPOp::fold(ArrayRef operands) { //===----------------------------------------------------------------------===// void LLVMDialect::initialize() { - addAttributes(); + addAttributes(); // clang-format off addTypes(getCConv()) <= cconv::getMaxEnumValForCConv()) + printer << stringifyEnum(getCConv()); + else + printer << "INVALID_cc_" << static_cast(getCConv()); + printer << ">"; +} + +Attribute CConvAttr::parse(AsmParser &parser, Type type) { + StringRef convName; + + if (parser.parseLess() || parser.parseKeyword(&convName) || + parser.parseGreater()) + return {}; + auto cconv = cconv::symbolizeCConv(convName); + if (!cconv) { + parser.emitError(parser.getNameLoc(), "unknown calling convention: ") + << convName; + return {}; + } + CConv cconvVal = *cconv; + return CConvAttr::get(parser.getContext(), cconvVal); +} + LoopOptionsAttrBuilder::LoopOptionsAttrBuilder(LoopOptionsAttr attr) : options(attr.getOptions().begin(), attr.getOptions().end()) {} diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index ab69c6a0027513..eced5896a167ec 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -1139,10 +1139,13 @@ LogicalResult Importer::processFunction(llvm::Function *f) { if (!functionType) return failure(); + bool dsoLocal = f->hasLocalLinkage(); + CConv cconv = convertCConvFromLLVM(f->getCallingConv()); + b.setInsertionPoint(module.getBody(), getFuncInsertPt()); - LLVMFuncOp fop = - b.create(UnknownLoc::get(context), f->getName(), functionType, - convertLinkageFromLLVM(f->getLinkage())); + LLVMFuncOp fop = b.create( + UnknownLoc::get(context), f->getName(), functionType, + convertLinkageFromLLVM(f->getLinkage()), dsoLocal, cconv); if (FlatSymbolRefAttr personality = getPersonalityAsAttr(f)) fop->setAttr(b.getStringAttr("personality"), personality); diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index 810622da275ee9..852ff1f6191f63 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -144,6 +144,21 @@ module { -> (!llvm.struct<(i32)> {llvm.struct_attrs = [{llvm.noalias}]}) { llvm.return %arg0 : !llvm.struct<(i32)> } + + // CHECK: llvm.func @cconv1 + llvm.func ccc @cconv1() { + llvm.return + } + + // CHECK: llvm.func weak @cconv2 + llvm.func weak ccc @cconv2() { + llvm.return + } + + // CHECK: llvm.func weak fastcc @cconv3 + llvm.func weak fastcc @cconv3() { + llvm.return + } } // ----- @@ -251,3 +266,18 @@ module { // expected-error@+1 {{functions cannot have 'common' linkage}} llvm.func common @common_linkage_func() } + +// ----- + +module { + // expected-error@+1 {{custom op 'llvm.func' expected valid '@'-identifier for symbol name}} + llvm.func cc_12 @unknown_calling_convention() +} + +// ----- + +module { + // expected-error@+2 {{unknown calling convention: cc_12}} + "llvm.func"() ({ + }) {sym_name = "generic_unknown_calling_convention", CConv = #llvm.cconv, function_type = !llvm.func} : () -> () +} diff --git a/mlir/test/Target/LLVMIR/Import/basic.ll b/mlir/test/Target/LLVMIR/Import/basic.ll index 6b74e7da00c6cf..3691448eb6ee1f 100644 --- a/mlir/test/Target/LLVMIR/Import/basic.ll +++ b/mlir/test/Target/LLVMIR/Import/basic.ll @@ -122,8 +122,13 @@ define internal void @func_internal() { ; CHECK: llvm.func @fe(i32) -> f32 declare float @fe(i32) +; CHECK: llvm.func internal spir_funccc @spir_func_internal() +define internal spir_func void @spir_func_internal() { + ret void +} + ; FIXME: function attributes. -; CHECK-LABEL: llvm.func internal @f1(%arg0: i64) -> i32 { +; CHECK-LABEL: llvm.func internal @f1(%arg0: i64) -> i32 attributes {dso_local} { ; CHECK-DAG: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : i32 ; CHECK-DAG: %[[c42:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 ; CHECK-DAG: %[[c1:[0-9]+]] = llvm.mlir.constant(true) : i1 diff --git a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp index e098e89da39d8c..50eb2e7978af5a 100644 --- a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp +++ b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp @@ -210,6 +210,27 @@ class LLVMEnumAttr : public tblgen::EnumAttr { return cases; } }; + +// Wraper class around a Tablegen definition of a C-style LLVM enum attribute. +class LLVMCEnumAttr : public tblgen::EnumAttr { +public: + using tblgen::EnumAttr::EnumAttr; + + // Returns the C++ enum name for the LLVM API. + StringRef getLLVMClassName() const { + return def->getValueAsString("llvmClassName"); + } + + // Returns all associated cases viewed as LLVM-specific enum cases. + std::vector getAllCases() const { + std::vector cases; + + for (auto &c : tblgen::EnumAttr::getAllCases()) + cases.emplace_back(c); + + return cases; + } +}; } // namespace // Emits conversion function "LLVMClass convertEnumToLLVM(Enum)" and containing @@ -242,6 +263,37 @@ static void emitOneEnumToConversion(const llvm::Record *record, os << "}\n\n"; } +// Emits conversion function "LLVMClass convertEnumToLLVM(Enum)" and containing +// switch-based logic to convert from the MLIR LLVM dialect enum attribute case +// (Enum) to the corresponding LLVM API C-style enumerant +static void emitOneCEnumToConversion(const llvm::Record *record, + raw_ostream &os) { + LLVMCEnumAttr enumAttr(record); + StringRef llvmClass = enumAttr.getLLVMClassName(); + StringRef cppClassName = enumAttr.getEnumClassName(); + StringRef cppNamespace = enumAttr.getCppNamespace(); + + // Emit the function converting the enum attribute to its LLVM counterpart. + os << formatv("static LLVM_ATTRIBUTE_UNUSED int64_t " + "convert{0}ToLLVM({1}::{0} value) {{\n", + cppClassName, cppNamespace); + os << " switch (value) {\n"; + + for (const auto &enumerant : enumAttr.getAllCases()) { + StringRef llvmEnumerant = enumerant.getLLVMEnumerant(); + StringRef cppEnumerant = enumerant.getSymbol(); + os << formatv(" case {0}::{1}::{2}:\n", cppNamespace, cppClassName, + cppEnumerant); + os << formatv(" return static_cast({0}::{1});\n", llvmClass, + llvmEnumerant); + } + + os << " }\n"; + os << formatv(" llvm_unreachable(\"unknown {0} type\");\n", + enumAttr.getEnumClassName()); + os << "}\n\n"; +} + // Emits conversion function "Enum convertEnumFromLLVM(LLVMClass)" and // containing switch-based logic to convert from the LLVM API enumerant to MLIR // LLVM dialect enum attribute (Enum). @@ -272,6 +324,38 @@ static void emitOneEnumFromConversion(const llvm::Record *record, os << "}\n\n"; } +// Emits conversion function "Enum convertEnumFromLLVM(LLVMEnum)" and +// containing switch-based logic to convert from the LLVM API C-style enumerant +// to MLIR LLVM dialect enum attribute (Enum). +static void emitOneCEnumFromConversion(const llvm::Record *record, + raw_ostream &os) { + LLVMCEnumAttr enumAttr(record); + StringRef llvmClass = enumAttr.getLLVMClassName(); + StringRef cppClassName = enumAttr.getEnumClassName(); + StringRef cppNamespace = enumAttr.getCppNamespace(); + + // Emit the function converting the enum attribute from its LLVM counterpart. + os << formatv( + "inline LLVM_ATTRIBUTE_UNUSED {0}::{1} convert{1}FromLLVM(int64_t " + "value) {{\n", + cppNamespace, cppClassName, llvmClass); + os << " switch (value) {\n"; + + for (const auto &enumerant : enumAttr.getAllCases()) { + StringRef llvmEnumerant = enumerant.getLLVMEnumerant(); + StringRef cppEnumerant = enumerant.getSymbol(); + os << formatv(" case static_cast({0}::{1}):\n", llvmClass, + llvmEnumerant); + os << formatv(" return {0}::{1}::{2};\n", cppNamespace, cppClassName, + cppEnumerant); + } + + os << " }\n"; + os << formatv(" llvm_unreachable(\"unknown {0} type\");", + enumAttr.getLLVMClassName()); + os << "}\n\n"; +} + // Emits conversion functions between MLIR enum attribute case and corresponding // LLVM API enumerants for all registered LLVM dialect enum attributes. template @@ -283,6 +367,13 @@ static bool emitEnumConversionDefs(const RecordKeeper &recordKeeper, else emitOneEnumFromConversion(def, os); + for (const auto *def : + recordKeeper.getAllDerivedDefinitions("LLVM_CEnumAttr")) + if (ConvertTo) + emitOneCEnumToConversion(def, os); + else + emitOneCEnumFromConversion(def, os); + return false; } From bdd0093f4d57790893d679ad590cd40c1dc3978c Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Fri, 11 Mar 2022 14:40:44 +0700 Subject: [PATCH 676/908] [GlobalISel] Add G_IS_FPCLASS Add a generic opcode to represent `llvm.is_fpclass` intrinsic. Differential Revision: https://reviews.llvm.org/D121454 --- llvm/docs/GlobalISel/GenericOpcode.rst | 10 +++++ llvm/include/llvm/ADT/APFloat.h | 3 +- llvm/include/llvm/Support/TargetOpcodes.def | 3 ++ llvm/include/llvm/Target/GenericOpcodes.td | 7 ++++ llvm/lib/CodeGen/MachineVerifier.cpp | 38 ++++++++++++++++++ .../GlobalISel/legalizer-info-validation.mir | 3 ++ .../MachineVerifier/test_g_is_fpclass.mir | 40 +++++++++++++++++++ 7 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MachineVerifier/test_g_is_fpclass.mir diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index a1dac644c031c1..3d251c53e98595 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -480,6 +480,16 @@ G_FCANONICALIZE See :ref:`i_intr_llvm_canonicalize`. +G_IS_FPCLASS +^^^^^^^^^^^^ + +Tests if the first operand, which must be floating-point scalar or vector, has +floating-point class specified by the second operand. The third operand +specifies floating-point semantics of the tested value. Returns non-zero (true) +or zero (false). It's target specific whether a true value is 1, ~0U, or some +other non-zero value. If the first operand is a vector, the returned value is a +vector of the same length. + G_FMINNUM ^^^^^^^^^ diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 17b57de7b0aa7e..cdedb6ece99208 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -155,7 +155,8 @@ struct APFloatBase { S_IEEEdouble, S_x87DoubleExtended, S_IEEEquad, - S_PPCDoubleDouble + S_PPCDoubleDouble, + S_MaxSemantics = S_PPCDoubleDouble }; static const llvm::fltSemantics &EnumToSemantics(Semantics S); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index ca8876f51fae96..8df7ced0029d80 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -620,6 +620,9 @@ HANDLE_TARGET_OPCODE(G_FABS) /// f64) is allowed. HANDLE_TARGET_OPCODE(G_FCOPYSIGN) +/// Generic test for floating-point class. +HANDLE_TARGET_OPCODE(G_IS_FPCLASS) + /// Generic FP canonicalize value. HANDLE_TARGET_OPCODE(G_FCANONICALIZE) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 8ec12a9bc5d09c..3e2f18b57d1e0c 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -745,6 +745,13 @@ def G_FCANONICALIZE : GenericInstruction { let hasSideEffects = false; } +// Generic opcode equivalent to the llvm.is_fpclass intrinsic. +def G_IS_FPCLASS: GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src, unknown:$test, unknown:$fpsem); + let hasSideEffects = false; +} + // FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two // values. // diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index eebdfe0e40bd18..084f85800ab8df 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeCalc.h" @@ -1650,6 +1651,43 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { verifyAllRegOpsScalar(*MI, *MRI); break; } + case TargetOpcode::G_IS_FPCLASS: { + LLT DestTy = MRI->getType(MI->getOperand(0).getReg()); + LLT DestEltTy = DestTy.getScalarType(); + if (!DestEltTy.isScalar()) { + report("Destination must be a scalar or vector of scalars", MI); + break; + } + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + LLT SrcEltTy = SrcTy.getScalarType(); + if (!SrcEltTy.isScalar()) { + report("Source must be a scalar or vector of scalars", MI); + break; + } + if (!verifyVectorElementMatch(DestTy, SrcTy, MI)) + break; + const MachineOperand &TestMO = MI->getOperand(2); + if (!TestMO.isImm()) { + report("floating-point class set (operand 2) must be an immediate", MI); + break; + } + int64_t Test = TestMO.getImm(); + if (Test < 0 || Test > fcAllFlags) { + report("Incorrect floating-point class set (operand 2)", MI); + break; + } + const MachineOperand &SemanticsMO = MI->getOperand(3); + if (!SemanticsMO.isImm()) { + report("floating-point semantics (operand 3) must be an immediate", MI); + break; + } + int64_t Semantics = SemanticsMO.getImm(); + if (Semantics < 0 || Semantics > APFloat::S_MaxSemantics) { + report("Incorrect floating-point semantics (operand 3)", MI); + break; + } + break; + } default: break; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 2127229d6a08da..881b7f88834fd8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -484,6 +484,9 @@ # DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FCANONICALIZE (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined diff --git a/llvm/test/MachineVerifier/test_g_is_fpclass.mir b/llvm/test/MachineVerifier/test_g_is_fpclass.mir new file mode 100644 index 00000000000000..a749a7923d684d --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_is_fpclass.mir @@ -0,0 +1,40 @@ +# RUN: not --crash llc -o - -march=aarch64 -global-isel -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target + +--- +name: test_fcmp +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: +body: | + bb.0: + liveins: $x0, $w0, $q0 + %s32:_(s32) = COPY $w0 + %ptr:_(p0) = COPY $x0 + %vector:_(<4 x s32>) = COPY $q0 + + %val1:_(s1) = G_IS_FPCLASS %s32, 1 + ; CHECK: *** Bad machine code: Too few operands *** + ; CHECK: 4 operands expected, but 3 given. + + %val2:_(p0) = G_IS_FPCLASS %s32, 3, 2 + ; CHECK: *** Bad machine code: Destination must be a scalar or vector of scalars *** + + %val3:_(s1) = G_IS_FPCLASS %s32, 1, 66 + ; CHECK: *** Bad machine code: Incorrect floating-point semantics (operand 3) *** + + %val4:_(s1) = G_IS_FPCLASS %s32, 7777, 2 + ; CHECK: *** Bad machine code: Incorrect floating-point class set (operand 2) *** + + %val5:_(s1) = G_IS_FPCLASS %ptr:_(p0), 3, 2 + ; CHECK: *** Bad machine code: Source must be a scalar or vector of scalars *** + + %var6:_(s1) = G_IS_FPCLASS %vector:_(<4 x s32>), 1, 2 + ; CHECK: *** Bad machine code: operand types must be all-vector or all-scalar *** + + %var7:_(<2 x s1>) = G_IS_FPCLASS %vector:_(<4 x s32>), 1, 2 + ; CHECK: *** Bad machine code: operand types must preserve number of vector elements *** + +... From d6708b741936870a1d214eccc2fe86f25f796a37 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Mon, 16 May 2022 16:52:49 -0700 Subject: [PATCH 677/908] [mlir-vscode] Add support for highlighting pdll and tablegen markdown code blocks This essentially just piggy backs off of the existing mlir support. Differential Revision: https://reviews.llvm.org/D125734 --- mlir/utils/vscode/markdown-grammar.json | 72 +++++++++++++++++++++++++ mlir/utils/vscode/package.json | 4 +- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/mlir/utils/vscode/markdown-grammar.json b/mlir/utils/vscode/markdown-grammar.json index 2f1f68d42cbe42..2a285a0705bcb0 100644 --- a/mlir/utils/vscode/markdown-grammar.json +++ b/mlir/utils/vscode/markdown-grammar.json @@ -4,6 +4,12 @@ "patterns": [ { "include": "#mlir-code-block" + }, + { + "include": "#pdll-code-block" + }, + { + "include": "#tablegen-code-block" } ], "repository": { @@ -39,6 +45,72 @@ ] } ] + }, + "pdll-code-block": { + "begin": "(^|\\G)(\\s*)(\\`{3,}|~{3,})\\s*(?i:(pdll)(\\s+[^`~]*)?$)", + "name": "markup.fenced_code.block.markdown", + "end": "(^|\\G)(\\2|\\s{0,3})(\\3)\\s*$", + "beginCaptures": { + "3": { + "name": "punctuation.definition.markdown" + }, + "4": { + "name": "fenced_code.block.language.markdown" + }, + "5": { + "name": "fenced_code.block.language.attributes.markdown" + } + }, + "endCaptures": { + "3": { + "name": "punctuation.definition.markdown" + } + }, + "patterns": [ + { + "begin": "(^|\\G)(\\s*)(.*)", + "while": "(^|\\G)(?!\\s*([`~]{3,})\\s*$)", + "contentName": "meta.embedded.block.pdll", + "patterns": [ + { + "include": "source.pdll" + } + ] + } + ] + }, + "tablegen-code-block": { + "begin": "(^|\\G)(\\s*)(\\`{3,}|~{3,})\\s*(?i:(tablegen)(\\s+[^`~]*)?$)", + "name": "markup.fenced_code.block.markdown", + "end": "(^|\\G)(\\2|\\s{0,3})(\\3)\\s*$", + "beginCaptures": { + "3": { + "name": "punctuation.definition.markdown" + }, + "4": { + "name": "fenced_code.block.language.markdown" + }, + "5": { + "name": "fenced_code.block.language.attributes.markdown" + } + }, + "endCaptures": { + "3": { + "name": "punctuation.definition.markdown" + } + }, + "patterns": [ + { + "begin": "(^|\\G)(\\s*)(.*)", + "while": "(^|\\G)(?!\\s*([`~]{3,})\\s*$)", + "contentName": "meta.embedded.block.tablegen", + "patterns": [ + { + "include": "source.tablegen" + } + ] + } + ] } }, "scopeName": "markdown.mlir.codeblock" diff --git a/mlir/utils/vscode/package.json b/mlir/utils/vscode/package.json index 793ea00e18fe07..55033c3304bac3 100644 --- a/mlir/utils/vscode/package.json +++ b/mlir/utils/vscode/package.json @@ -106,7 +106,9 @@ "text.html.markdown" ], "embeddedLanguages": { - "meta.embedded.block.mlir": "mlir" + "meta.embedded.block.mlir": "mlir", + "meta.embedded.block.pdll": "pdll", + "meta.embedded.block.tablegen": "tablegen" } }, { From 7a2d6dea73b594a42b83ed48bd19bd232c29f2b8 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:07:06 +0200 Subject: [PATCH 678/908] [analyzer][NFC] Inline ExprEngine::evalComplement Reviewed By: martong Differential Revision: https://reviews.llvm.org/D126124 --- .../clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h | 4 ---- clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index fe558fc21bc892..c79cd7716aa7ff 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -590,10 +590,6 @@ class ExprEngine { return X.isValid() ? svalBuilder.evalMinus(X.castAs()) : X; } - SVal evalComplement(SVal X) { - return X.isValid() ? svalBuilder.evalComplement(X.castAs()) : X; - } - ProgramStateRef handleLValueBitCast(ProgramStateRef state, const Expr *Ex, const LocationContext *LCtx, QualType T, QualType ExTy, const CastExpr *CastE, diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index 21fd46e308f3f1..470a56cfab479a 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -1029,7 +1029,8 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U, ExplodedNode *Pred, llvm_unreachable("Invalid Opcode."); case UO_Not: // FIXME: Do we need to handle promotions? - state = state->BindExpr(U, LCtx, evalComplement(V.castAs())); + state = state->BindExpr( + U, LCtx, svalBuilder.evalComplement(V.castAs())); break; case UO_Minus: // FIXME: Do we need to handle promotions? From ee8987d585e799ec056da364220e81d97258ddec Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:07:06 +0200 Subject: [PATCH 679/908] [analyzer][NFC] Inline ExprEngine::evalMinus Reviewed By: martong Differential Revision: https://reviews.llvm.org/D126125 --- .../clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h | 4 ---- clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 8 +++++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index c79cd7716aa7ff..6be33d488a9e3c 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -586,10 +586,6 @@ class ExprEngine { static std::pair geteagerlyAssumeBinOpBifurcationTags(); - SVal evalMinus(SVal X) { - return X.isValid() ? svalBuilder.evalMinus(X.castAs()) : X; - } - ProgramStateRef handleLValueBitCast(ProgramStateRef state, const Expr *Ex, const LocationContext *LCtx, QualType T, QualType ExTy, const CastExpr *CastE, diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index 470a56cfab479a..45e2b34b106a2a 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -271,8 +271,9 @@ ProgramStateRef ExprEngine::handleLValueBitCast( SVal OrigV = state->getSVal(Ex, LCtx); SVal V = svalBuilder.evalCast(OrigV, T, ExTy); // Negate the result if we're treating the boolean as a signed i1 - if (CastE->getCastKind() == CK_BooleanToSignedIntegral) - V = evalMinus(V); + if (CastE->getCastKind() == CK_BooleanToSignedIntegral && V.isValid()) + V = svalBuilder.evalMinus(V.castAs()); + state = state->BindExpr(CastE, LCtx, V); if (V.isUnknown() && !OrigV.isUnknown()) { state = escapeValues(state, OrigV, PSK_EscapeOther); @@ -1034,7 +1035,8 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U, ExplodedNode *Pred, break; case UO_Minus: // FIXME: Do we need to handle promotions? - state = state->BindExpr(U, LCtx, evalMinus(V.castAs())); + state = state->BindExpr(U, LCtx, + svalBuilder.evalMinus(V.castAs())); break; case UO_LNot: // C99 6.5.3.3: "The expression !E is equivalent to (0==E)." From f6eab4376432fffe99b6699e6b604a38a8906bfe Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:07:06 +0200 Subject: [PATCH 680/908] [analyzer][NFC] Inline loc::ConcreteInt::evalBinOp This patch also refactored some of the enclosing parts. Reviewed By: martong Differential Revision: https://reviews.llvm.org/D126128 --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 4 ---- clang/lib/StaticAnalyzer/Core/SVals.cpp | 17 ----------------- .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 14 +++++++------- 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 60a59cbeecf9d9..29840e7cafd998 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -645,10 +645,6 @@ class ConcreteInt : public Loc { return *static_cast(Data); } - // Transfer functions for binary/unary operations on ConcreteInts. - SVal evalBinOp(BasicValueFactory& BasicVals, BinaryOperator::Opcode Op, - const ConcreteInt& R) const; - private: friend class SVal; diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp index 510d0d2fb6276e..cf4927400df92e 100644 --- a/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -298,23 +298,6 @@ nonloc::ConcreteInt::evalMinus(SValBuilder &svalBuilder) const { return svalBuilder.makeIntVal(-getValue()); } -//===----------------------------------------------------------------------===// -// Transfer function dispatch for Locs. -//===----------------------------------------------------------------------===// - -SVal loc::ConcreteInt::evalBinOp(BasicValueFactory& BasicVals, - BinaryOperator::Opcode Op, - const loc::ConcreteInt& R) const { - assert(BinaryOperator::isComparisonOp(Op) || Op == BO_Sub); - - const llvm::APSInt *X = BasicVals.evalAPSInt(Op, getValue(), R.getValue()); - - if (X) - return nonloc::ConcreteInt(*X); - else - return UndefinedVal(); -} - //===----------------------------------------------------------------------===// // Pretty-Printing. //===----------------------------------------------------------------------===// diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index cfc8816a0ecbaf..1b0c377aedc2cf 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -851,6 +851,8 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, return UnknownVal(); case loc::ConcreteIntKind: { + auto L = lhs.castAs(); + // If one of the operands is a symbol and the other is a constant, // build an expression for use by the constraint manager. if (SymbolRef rSym = rhs.getAsLocSymbol()) { @@ -859,19 +861,17 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp) return UnknownVal(); - const llvm::APSInt &lVal = lhs.castAs().getValue(); op = BinaryOperator::reverseComparisonOp(op); - return makeNonLoc(rSym, op, lVal, resultTy); + return makeNonLoc(rSym, op, L.getValue(), resultTy); } // If both operands are constants, just perform the operation. if (Optional rInt = rhs.getAs()) { - SVal ResultVal = - lhs.castAs().evalBinOp(BasicVals, op, *rInt); - if (Optional Result = ResultVal.getAs()) - return evalCast(*Result, resultTy, QualType{}); + assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub); - assert(!ResultVal.getAs() && "Loc-Loc ops should not produce Locs"); + if (const auto *ResultInt = + BasicVals.evalAPSInt(op, L.getValue(), rInt->getValue())) + return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{}); return UnknownVal(); } From 81066603a8b91ec3edf59622d153ba40f0b4a169 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:07:06 +0200 Subject: [PATCH 681/908] [analyzer][NFC] Remove unused nonloc::ConcreteInt::evalBinOp Reviewed By: martong Differential Revision: https://reviews.llvm.org/D126129 --- .../clang/StaticAnalyzer/Core/PathSensitive/SVals.h | 5 +---- clang/lib/StaticAnalyzer/Core/SVals.cpp | 12 ------------ 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 29840e7cafd998..30fe5f78a82871 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -390,10 +390,7 @@ class ConcreteInt : public NonLoc { return *static_cast(Data); } - // Transfer functions for binary/unary operations on ConcreteInts. - SVal evalBinOp(SValBuilder &svalBuilder, BinaryOperator::Opcode Op, - const ConcreteInt& R) const; - + // Transfer functions for unary operations on ConcreteInts. ConcreteInt evalComplement(SValBuilder &svalBuilder) const; ConcreteInt evalMinus(SValBuilder &svalBuilder) const; diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp index cf4927400df92e..96bc2bbed98b75 100644 --- a/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -276,18 +276,6 @@ bool SVal::isZeroConstant() const { // Transfer function dispatch for Non-Locs. //===----------------------------------------------------------------------===// -SVal nonloc::ConcreteInt::evalBinOp(SValBuilder &svalBuilder, - BinaryOperator::Opcode Op, - const nonloc::ConcreteInt& R) const { - const llvm::APSInt* X = - svalBuilder.getBasicValueFactory().evalAPSInt(Op, getValue(), R.getValue()); - - if (X) - return nonloc::ConcreteInt(*X); - else - return UndefinedVal(); -} - nonloc::ConcreteInt nonloc::ConcreteInt::evalComplement(SValBuilder &svalBuilder) const { return svalBuilder.makeIntVal(~getValue()); From 813acb12972eceabd2b5b06f2c11d3a1f34579bd Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:07:06 +0200 Subject: [PATCH 682/908] [analyzer][NFC] Remove unused SVal::hasConjuredSymbol Reviewed By: martong Differential Revision: https://reviews.llvm.org/D126130 --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 3 --- clang/lib/StaticAnalyzer/Core/SVals.cpp | 19 ------------------- 2 files changed, 22 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 30fe5f78a82871..f153e97d2d4417 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -154,9 +154,6 @@ class SVal { bool isZeroConstant() const; - /// hasConjuredSymbol - If this SVal wraps a conjured symbol, return true; - bool hasConjuredSymbol() const; - /// getAsFunctionDecl - If this SVal is a MemRegionVal and wraps a /// CodeTextRegion wrapping a FunctionDecl, return that FunctionDecl. /// Otherwise return 0. diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp index 96bc2bbed98b75..963861c136c300 100644 --- a/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -43,25 +43,6 @@ using namespace ento; // Utility methods. //===----------------------------------------------------------------------===// -bool SVal::hasConjuredSymbol() const { - if (Optional SV = getAs()) { - SymbolRef sym = SV->getSymbol(); - if (isa(sym)) - return true; - } - - if (Optional RV = getAs()) { - const MemRegion *R = RV->getRegion(); - if (const auto *SR = dyn_cast(R)) { - SymbolRef sym = SR->getSymbol(); - if (isa(sym)) - return true; - } - } - - return false; -} - const FunctionDecl *SVal::getAsFunctionDecl() const { if (Optional X = getAs()) { const MemRegion* R = X->getRegion(); From 3a666dd37a06fd5ff74b9e8fea15e31a80398233 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:07:06 +0200 Subject: [PATCH 683/908] [analyzer][NFC] Use MemRegion::getRegion()'s return value unconditionally Reviewed By: martong Differential Revision: https://reviews.llvm.org/D126123 --- clang/lib/StaticAnalyzer/Core/SVals.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp index 963861c136c300..1429d36107e9e6 100644 --- a/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -177,8 +177,7 @@ QualType SVal::getType(const ASTContext &Context) const { } const MemRegion *loc::MemRegionVal::stripCasts(bool StripBaseCasts) const { - const MemRegion *R = getRegion(); - return R ? R->StripCasts(StripBaseCasts) : nullptr; + return getRegion()->StripCasts(StripBaseCasts); } const void *nonloc::LazyCompoundVal::getStore() const { From 3a7a465def4a7faae41467893276910b1faa6152 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:07:06 +0200 Subject: [PATCH 684/908] [analyzer][docs] Fix typo in checker name Fixes #55720 --- clang/docs/analyzer/checkers.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 2fff6e0a63942e..6a73e66b0daaab 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -947,9 +947,9 @@ Check the size argument passed into C string functions for common erroneous patt // warn: potential buffer overflow } -.. _unix-cstrisng-NullArg: +.. _unix-cstring-NullArg: -unix.cstrisng.NullArg (C) +unix.cstring.NullArg (C) """"""""""""""""""""""""" Check for null pointers being passed as arguments to C string functions: ``strlen, strnlen, strcpy, strncpy, strcat, strncat, strcmp, strncmp, strcasecmp, strncasecmp``. From 11e3ad299feee35d2a8a1291d9155b68636362e6 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 26 May 2022 16:08:55 +0200 Subject: [PATCH 685/908] [libc++] Implement ranges::is_sorted{, _until} Reviewed By: Mordante, var-const, #libc Spies: libcxx-commits, mgorny Differential Revision: https://reviews.llvm.org/D125608 --- libcxx/docs/Status/RangesAlgorithms.csv | 4 +- libcxx/include/CMakeLists.txt | 2 + libcxx/include/__algorithm/ranges_is_sorted.h | 61 ++++++ .../__algorithm/ranges_is_sorted_until.h | 76 ++++++++ libcxx/include/algorithm | 18 ++ libcxx/include/module.modulemap | 2 + ...obust_against_copying_comparators.pass.cpp | 8 +- ...obust_against_copying_projections.pass.cpp | 8 +- libcxx/test/libcxx/private_headers.verify.cpp | 2 + .../is.sorted/ranges.is_sorted.pass.cpp | 178 +++++++++++++++++ .../is.sorted/ranges.is_sorted_until.pass.cpp | 180 ++++++++++++++++++ .../niebloid.compile.pass.cpp | 4 +- libcxx/test/support/almost_satisfies_types.h | 43 +++++ 13 files changed, 574 insertions(+), 12 deletions(-) create mode 100644 libcxx/include/__algorithm/ranges_is_sorted.h create mode 100644 libcxx/include/__algorithm/ranges_is_sorted_until.h create mode 100644 libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/ranges.is_sorted.pass.cpp create mode 100644 libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/ranges.is_sorted_until.pass.cpp diff --git a/libcxx/docs/Status/RangesAlgorithms.csv b/libcxx/docs/Status/RangesAlgorithms.csv index 283470af0f11c4..c18f1d6d3c71ea 100644 --- a/libcxx/docs/Status/RangesAlgorithms.csv +++ b/libcxx/docs/Status/RangesAlgorithms.csv @@ -27,8 +27,8 @@ Search,search,Not assigned,n/a,Not started Search,search_n,Not assigned,n/a,Not started Search,find_end,Not assigned,n/a,Not started Read-only,is_partitioned,Nikolas Klauser,`D124440 `_,✅ -Read-only,is_sorted,Not assigned,n/a,Not started -Read-only,is_sorted_unitl,Not assigned,n/a,Not started +Read-only,is_sorted,Nikolas Klauser,`D125608 `_,✅ +Read-only,is_sorted_unitl,Nikolas Klauser,`D125608 `_,✅ Read-only,includes,Not assigned,n/a,Not started Read-only,is_heap,Not assigned,n/a,Not started Read-only,is_heap_until,Not assigned,n/a,Not started diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 80261bf2a4537c..f1ff3111f6ee8a 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -83,6 +83,8 @@ set(files __algorithm/ranges_for_each.h __algorithm/ranges_for_each_n.h __algorithm/ranges_is_partitioned.h + __algorithm/ranges_is_sorted.h + __algorithm/ranges_is_sorted_until.h __algorithm/ranges_max.h __algorithm/ranges_max_element.h __algorithm/ranges_min.h diff --git a/libcxx/include/__algorithm/ranges_is_sorted.h b/libcxx/include/__algorithm/ranges_is_sorted.h new file mode 100644 index 00000000000000..938e69afba8d69 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_is_sorted.h @@ -0,0 +1,61 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP__ALGORITHM_RANGES_IS_SORTED_H +#define _LIBCPP__ALGORITHM_RANGES_IS_SORTED_H + +#include <__algorithm/ranges_is_sorted_until.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __is_sorted { +struct __fn { + template _Sent, + class _Proj = identity, + indirect_strict_weak_order> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + return ranges::__is_sorted_until_impl(std::move(__first), __last, __comp, __proj) == __last; + } + + template , _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { + auto __last = ranges::end(__range); + return ranges::__is_sorted_until_impl(ranges::begin(__range), __last, __comp, __proj) == __last; + } +}; +} // namespace __is_sorted + +inline namespace __cpo { + inline constexpr auto is_sorted = __is_sorted::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP__ALGORITHM_RANGES_IS_SORTED_H diff --git a/libcxx/include/__algorithm/ranges_is_sorted_until.h b/libcxx/include/__algorithm/ranges_is_sorted_until.h new file mode 100644 index 00000000000000..c8f0608e12555e --- /dev/null +++ b/libcxx/include/__algorithm/ranges_is_sorted_until.h @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP__ALGORITHM_RANGES_IS_SORTED_UNTIL_H +#define _LIBCPP__ALGORITHM_RANGES_IS_SORTED_UNTIL_H + +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template +_LIBCPP_HIDE_FROM_ABI constexpr +_Iter __is_sorted_until_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { + if (__first == __last) + return __first; + auto __i = __first; + while (++__i != __last) { + if (std::invoke(__comp, std::invoke(__proj, *__i), std::invoke(__proj, *__first))) + return __i; + __first = __i; + } + return __i; +} + +namespace __is_sorted_until { +struct __fn { + template _Sent, + class _Proj = identity, + indirect_strict_weak_order> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + return ranges::__is_sorted_until_impl(std::move(__first), std::move(__last), __comp, __proj); + } + + template , _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { + return ranges::__is_sorted_until_impl(ranges::begin(__range), ranges::end(__range), __comp, __proj); + } +}; +} // namespace __is_sorted_until + +inline namespace __cpo { + inline constexpr auto is_sorted_until = __is_sorted_until::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP__ALGORITHM_RANGES_IS_SORTED_UNTIL_H diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 30f647c416f88b..864ff1cdcaf63a 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -329,6 +329,22 @@ namespace ranges { indirect_unary_predicate, Proj>> Pred> constexpr bool ranges::none_of(R&& r, Pred pred, Proj proj = {}); // since C++20 + template S, class Proj = identity, + indirect_strict_weak_order> Comp = ranges::less> + constexpr bool ranges::is_sorted(I first, S last, Comp comp = {}, Proj proj = {}); // since C++20 + + template, Proj>> Comp = ranges::less> + constexpr bool ranges::is_sorted(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 + + template S, class Proj = identity, + indirect_strict_weak_order> Comp = ranges::less> + constexpr I ranges::is_sorted_until(I first, S last, Comp comp = {}, Proj proj = {}); // since C++20 + + template, Proj>> Comp = ranges::less> + constexpr borrowed_iterator_t + ranges::is_sorted_until(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 } constexpr bool // constexpr in C++20 @@ -1062,6 +1078,8 @@ template #include <__algorithm/ranges_for_each.h> #include <__algorithm/ranges_for_each_n.h> #include <__algorithm/ranges_is_partitioned.h> +#include <__algorithm/ranges_is_sorted.h> +#include <__algorithm/ranges_is_sorted_until.h> #include <__algorithm/ranges_max.h> #include <__algorithm/ranges_max_element.h> #include <__algorithm/ranges_min.h> diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 0e43ca4e44b1d2..d186ebe1c34576 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -315,6 +315,8 @@ module std [system] { module ranges_for_each { private header "__algorithm/ranges_for_each.h" } module ranges_for_each_n { private header "__algorithm/ranges_for_each_n.h" } module ranges_is_partitioned { private header "__algorithm/ranges_is_partitioned.h" } + module ranges_is_sorted { private header "__algorithm/ranges_is_sorted.h" } + module ranges_is_sorted_until { private header "__algorithm/ranges_is_sorted_until.h" } module ranges_max { private header "__algorithm/ranges_max.h" } module ranges_max_element { private header "__algorithm/ranges_max_element.h" } module ranges_min { private header "__algorithm/ranges_min.h" } diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp index 95f1ed7248df0e..b59d9867acdab4 100644 --- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp +++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp @@ -134,10 +134,10 @@ constexpr bool all_the_algorithms() //(void)std::ranges::is_partitioned(a, UnaryTrue(&copies)); assert(copies == 0); //(void)std::ranges::is_permutation(first, last, first2, last2, Equal(&copies)); assert(copies == 0); //(void)std::ranges::is_permutation(a, b, Equal(&copies)); assert(copies == 0); - //(void)std::ranges::is_sorted(first, last, Less(&copies)); assert(copies == 0); - //(void)std::ranges::is_sorted(a, Less(&copies)); assert(copies == 0); - //(void)std::ranges::is_sorted_until(first, last, Less(&copies)); assert(copies == 0); - //(void)std::ranges::is_sorted_until(a, Less(&copies)); assert(copies == 0); + (void)std::ranges::is_sorted(first, last, Less(&copies)); assert(copies == 0); + (void)std::ranges::is_sorted(a, Less(&copies)); assert(copies == 0); + (void)std::ranges::is_sorted_until(first, last, Less(&copies)); assert(copies == 0); + (void)std::ranges::is_sorted_until(a, Less(&copies)); assert(copies == 0); //if (!std::is_constant_evaluated()) { (void)std::ranges::inplace_merge(first, mid, last, Less(&copies)); assert(copies == 0); } //if (!std::is_constant_evaluated()) { (void)std::ranges::inplace_merge(a, mid, Less(&copies)); assert(copies == 0); } //(void)std::ranges::lexicographical_compare(first, last, first2, last2, Less(&copies)); assert(copies == 0); diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp index 9efb19d6e3ee02..333e263aafdf93 100644 --- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp +++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp @@ -117,10 +117,10 @@ constexpr bool all_the_algorithms() //(void)std::ranges::is_partitioned(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::is_permutation(first, last, first2, last2, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); //(void)std::ranges::is_permutation(a, b, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::is_sorted(first, last, Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::is_sorted(a, Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::is_sorted_until(first, last, Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::is_sorted_until(a, Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::is_sorted(first, last, Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::is_sorted(a, Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::is_sorted_until(first, last, Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::is_sorted_until(a, Less(), Proj(&copies)); assert(copies == 0); //if (!std::is_constant_evaluated()) { (void)std::ranges::inplace_merge(first, mid, last, Less(), Proj(&copies)); assert(copies == 0); } //if (!std::is_constant_evaluated()) { (void)std::ranges::inplace_merge(a, mid, Less(), Proj(&copies)); assert(copies == 0); } //(void)std::ranges::lexicographical_compare(first, last, first2, last2, Less(), Proj(&copies), Proj(&copies)); assert(copies == 0); diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index d7889f7bb2b8c6..c0debf7b8b2257 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -120,6 +120,8 @@ END-SCRIPT #include <__algorithm/ranges_for_each.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_for_each.h'}} #include <__algorithm/ranges_for_each_n.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_for_each_n.h'}} #include <__algorithm/ranges_is_partitioned.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_is_partitioned.h'}} +#include <__algorithm/ranges_is_sorted.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_is_sorted.h'}} +#include <__algorithm/ranges_is_sorted_until.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_is_sorted_until.h'}} #include <__algorithm/ranges_max.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_max.h'}} #include <__algorithm/ranges_max_element.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_max_element.h'}} #include <__algorithm/ranges_min.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_min.h'}} diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/ranges.is_sorted.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/ranges.is_sorted.pass.cpp new file mode 100644 index 00000000000000..767b98c75488c9 --- /dev/null +++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/ranges.is_sorted.pass.cpp @@ -0,0 +1,178 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// + +// template S, class Proj = identity, +// indirect_strict_weak_order> Comp = ranges::less> +// constexpr bool ranges::is_sorted(I first, S last, Comp comp = {}, Proj proj = {}); +// template, Proj>> Comp = ranges::less> +// constexpr bool ranges::is_sorted(R&& r, Comp comp = {}, Proj proj = {}); + +#include +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +template +concept HasIsSortedIt = requires(Iter iter, Sent sent) { std::ranges::is_sorted(iter, sent); }; + +struct HasNoComparator {}; + +static_assert(HasIsSortedIt); +static_assert(!HasIsSortedIt); +static_assert(!HasIsSortedIt); +static_assert(!HasIsSortedIt); +static_assert(!HasIsSortedIt); +static_assert(!HasIsSortedIt); + +template +concept HasIsSortedR = requires(Range range) { std::ranges::is_sorted(range); }; + +static_assert(HasIsSortedR>); +static_assert(!HasIsSortedR); +static_assert(!HasIsSortedR); +static_assert(!HasIsSortedR); +static_assert(!HasIsSortedR); +static_assert(!HasIsSortedR>); + +template +constexpr void test_iterators() { + { // simple test + { + int a[] = {1, 2, 3, 4, 3}; + std::same_as decltype(auto) ret = std::ranges::is_sorted(Iter(a), Sent(Iter(a + 5))); + assert(!ret); + } + { + int a[] = {1, 2, 3, 4, 3}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + std::same_as decltype(auto) ret = std::ranges::is_sorted(range); + assert(!ret); + } + } + + { // second element isn't sorted + { + int a[] = {1, 0, 3, 4, 5}; + auto ret = std::ranges::is_sorted(Iter(a), Sent(Iter(a + 5))); + assert(!ret); + } + { + int a[] = {1, 0, 3, 4, 5}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + auto ret = std::ranges::is_sorted(range); + assert(!ret); + } + } + + { // all elements are sorted + { + int a[] = {1, 2, 3, 4, 5}; + auto ret = std::ranges::is_sorted(Iter(a), Sent(Iter(a + 5))); + assert(ret); + } + { + int a[] = {1, 2, 3, 4, 5}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + auto ret = std::ranges::is_sorted(range); + assert(ret); + } + } + + { // check that the comparator is used + { + int a[] = {5, 4, 3, 2}; + auto ret = std::ranges::is_sorted(Iter(a), Sent(Iter(a + 4)), std::ranges::greater{}); + assert(ret); + } + { + int a[] = {5, 4, 3, 2}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 4))); + auto ret = std::ranges::is_sorted(range, std::ranges::greater{}); + assert(ret); + } + } + + { // check that an empty range works + { + int a[] = {}; + auto ret = std::ranges::is_sorted(Iter(a), Sent(Iter(a))); + assert(ret); + } + { + int a[] = {}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a))); + auto ret = std::ranges::is_sorted(range); + assert(ret); + } + } + + { // check that a range with a single element works + { + int a[] = {32}; + auto ret = std::ranges::is_sorted(Iter(a), Sent(Iter(a + 1))); + assert(ret); + } + { + int a[] = {32}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 1))); + auto ret = std::ranges::is_sorted(range); + assert(ret); + } + } +} + +constexpr bool test() { + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators(); + test_iterators(); + + { // check that the projection is used + struct S { + int i; + constexpr S(int i_) : i(i_) {} + }; + { + S a[] = {1, 2, 3}; + auto ret = std::ranges::is_sorted(a, a + 3, {}, &S::i); + assert(ret); + } + { + S a[] = {1, 2, 3}; + auto ret = std::ranges::is_sorted(a, {}, &S::i); + assert(ret); + } + } + + { // check that a dangling range works + assert(std::ranges::is_sorted(std::array{1, 2, 3, 4})); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/ranges.is_sorted_until.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/ranges.is_sorted_until.pass.cpp new file mode 100644 index 00000000000000..4174cb341d897d --- /dev/null +++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/ranges.is_sorted_until.pass.cpp @@ -0,0 +1,180 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// + +// template S, class Proj = identity, +// indirect_strict_weak_order> Comp = ranges::less> +// constexpr I ranges::is_sorted_until(I first, S last, Comp comp = {}, Proj proj = {}); +// template, Proj>> Comp = ranges::less> +// constexpr borrowed_iterator_t +// ranges::is_sorted_until(R&& r, Comp comp = {}, Proj proj = {}); + +#include +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +template +concept HasIsSortedUntilIt = requires(Iter iter, Sent sent) { std::ranges::is_sorted_until(iter, sent); }; + +struct HasNoComparator {}; + +static_assert(HasIsSortedUntilIt); +static_assert(!HasIsSortedUntilIt); +static_assert(!HasIsSortedUntilIt); +static_assert(!HasIsSortedUntilIt); +static_assert(!HasIsSortedUntilIt); +static_assert(!HasIsSortedUntilIt); + +template +concept HasIsSortedUntilR = requires(Range range) { std::ranges::is_sorted_until(range); }; + +static_assert(HasIsSortedUntilR>); +static_assert(!HasIsSortedUntilR); +static_assert(!HasIsSortedUntilR); +static_assert(!HasIsSortedUntilR); +static_assert(!HasIsSortedUntilR); +static_assert(!HasIsSortedUntilR>); + +template +constexpr void test_iterators() { + { // simple test + { + int a[] = {1, 2, 3, 4, 3}; + std::same_as auto ret = std::ranges::is_sorted_until(Iter(a), Sent(Iter(a + 5))); + assert(base(ret) == a + 4); + } + { + int a[] = {1, 2, 3, 4, 3}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + std::same_as auto ret = std::ranges::is_sorted_until(range); + assert(base(ret) == a + 4); + } + } + + { // second element isn't sorted + { + int a[] = {1, 0, 3, 4, 5}; + auto ret = std::ranges::is_sorted_until(Iter(a), Sent(Iter(a + 5))); + assert(base(ret) == a + 1); + } + { + int a[] = {1, 0, 3, 4, 5}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + auto ret = std::ranges::is_sorted_until(range); + assert(base(ret) == a + 1); + } + } + + { // all elements are sorted + { + int a[] = {1, 2, 3, 4, 5}; + auto ret = std::ranges::is_sorted_until(Iter(a), Sent(Iter(a + 5))); + assert(base(ret) == a + 5); + } + { + int a[] = {1, 2, 3, 4, 5}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 5))); + auto ret = std::ranges::is_sorted_until(range); + assert(base(ret) == a + 5); + } + } + + { // check that the comparator is used + { + int a[] = {5, 4, 3, 2}; + auto ret = std::ranges::is_sorted_until(Iter(a), Sent(Iter(a + 4)), std::ranges::greater{}); + assert(base(ret) == a + 4); + } + { + int a[] = {5, 4, 3, 2}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 4))); + auto ret = std::ranges::is_sorted_until(range, std::ranges::greater{}); + assert(base(ret) == a + 4); + } + } + + { // check that an empty range works + { + int a[] = {}; + auto ret = std::ranges::is_sorted_until(Iter(a), Sent(Iter(a))); + assert(base(ret) == a); + } + { + int a[] = {}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a))); + auto ret = std::ranges::is_sorted_until(range); + assert(base(ret) == a); + } + } + + { // check that a range with a single element works + { + int a[] = {32}; + auto ret = std::ranges::is_sorted_until(Iter(a), Sent(Iter(a + 1))); + assert(base(ret) == a + 1); + } + { + int a[] = {32}; + auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 1))); + auto ret = std::ranges::is_sorted_until(range); + assert(base(ret) == a + 1); + } + } +} + +constexpr bool test() { + test_iterators, sentinel_wrapper>>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators(); + test_iterators(); + + { // check that the projection is used + struct S { + int i; + constexpr S(int i_) : i(i_) {} + }; + { + S a[] = {1, 2, 3}; + auto ret = std::ranges::is_sorted_until(a, a + 3, {}, &S::i); + assert(base(ret) == a + 3); + } + { + S a[] = {1, 2, 3}; + auto ret = std::ranges::is_sorted_until(a, {}, &S::i); + assert(base(ret) == a + 3); + } + } + + { // check that std::ranges::dangling is returned + [[maybe_unused]] std::same_as decltype(auto) ret = + std::ranges::is_sorted_until(std::array{1, 2, 3}); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp index 47707c091c475c..75c0905c3a0e81 100644 --- a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp @@ -91,8 +91,8 @@ static_assert(test(std::ranges::find_if_not, a, odd)); //static_assert(test(std::ranges::is_heap_until, a)); //static_assert(test(std::ranges::is_partitioned, a, odd)); //static_assert(test(std::ranges::is_permutation, a, a)); -//static_assert(test(std::ranges::is_sorted, a)); -//static_assert(test(std::ranges::is_sorted_until, a)); +static_assert(test(std::ranges::is_sorted, a)); +static_assert(test(std::ranges::is_sorted_until, a)); //static_assert(test(std::ranges::lexicographical_compare, a, a)); //static_assert(test(std::ranges::lower_bound, a, 42)); //static_assert(test(std::ranges::make_heap, a)); diff --git a/libcxx/test/support/almost_satisfies_types.h b/libcxx/test/support/almost_satisfies_types.h index 99ea852d2ff9a5..eaab3e4693f41c 100644 --- a/libcxx/test/support/almost_satisfies_types.h +++ b/libcxx/test/support/almost_satisfies_types.h @@ -142,6 +142,49 @@ class WeaklyIncrementableNotMovable { static_assert(!std::movable); static_assert(!std::weakly_incrementable); +// almost a forward_iterator +class ForwardIteratorNotDerivedFrom { +public: + using difference_type = long; + using value_type = int; + using iterator_category = std::input_iterator_tag; + + ForwardIteratorNotDerivedFrom& operator++(); + ForwardIteratorNotDerivedFrom operator++(int); + const int& operator*() const; + bool operator==(const ForwardIteratorNotDerivedFrom&) const = default; +}; + +using ForwardRangeNotDerivedFrom = UncheckedRange; + +static_assert(std::input_iterator); +static_assert(std::incrementable); +static_assert(std::sentinel_for); +static_assert(!std::forward_iterator); + +class ForwardIteratorNotIncrementable { +public: + using difference_type = long; + using value_type = int; + using iterator_category = std::forward_iterator_tag; + + ForwardIteratorNotIncrementable& operator++(); + int operator++(int); + const int& operator*() const; + bool operator==(const ForwardIteratorNotIncrementable&) const = default; +}; + +using ForwardRangeNotIncrementable = UncheckedRange; + +static_assert(std::input_iterator); +static_assert(!std::incrementable); +static_assert(std::sentinel_for); +static_assert(!std::forward_iterator); + +using ForwardRangeNotSentinelSemiregular = UncheckedRange, SentinelForNotSemiregular>; +using ForwardRangeNotSentinelEqualityComparableWith = + UncheckedRange, SentinelForNotWeaklyEqualityComparableWith>; + class BidirectionalIteratorNotDerivedFrom { public: using difference_type = long; From cde101d0225f4ef32254561c1c8d201d05336471 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 27 May 2022 08:29:03 +0000 Subject: [PATCH 686/908] [gn build] Port 11e3ad299fee --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index b6eba6a91626a3..9f63c5a87396e9 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -139,6 +139,8 @@ if (current_toolchain == default_toolchain) { "__algorithm/ranges_for_each.h", "__algorithm/ranges_for_each_n.h", "__algorithm/ranges_is_partitioned.h", + "__algorithm/ranges_is_sorted.h", + "__algorithm/ranges_is_sorted_until.h", "__algorithm/ranges_max.h", "__algorithm/ranges_max_element.h", "__algorithm/ranges_min.h", From aaaf9cede774a3a82770e88a8096b8c605bdb2c0 Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Fri, 27 May 2022 10:54:37 +0800 Subject: [PATCH 687/908] [X86][AMX] Replace LDTILECFG with PLDTILECFGV on auto-config. There is intrinsic `@llvm.x86.ldtilecfg` which is lowered to LDTILECFG. This intrinsic is open for user to configure tile registers by themselves. There is a chance that `@llvm.x86.ldtilecfg` would be mixed with the new AMX intrinsics which depend on compiler to configure tile registers. Separate pusedo instruction PLDTILECFGV would avoid unexpected behavious when `@llvm.x86.ldtilecfg` is mixed with new AMX intrinsics. Though user should not mix the two programming model, compiler should avoid crash or UB when they are mixed. Differential Revision: https://reviews.llvm.org/D126519 --- llvm/lib/Target/X86/X86FastPreTileConfig.cpp | 2 +- llvm/lib/Target/X86/X86FastTileConfig.cpp | 6 ++--- llvm/lib/Target/X86/X86InstrAMX.td | 6 ++--- llvm/lib/Target/X86/X86InstrInfo.cpp | 2 +- llvm/lib/Target/X86/X86PreTileConfig.cpp | 2 +- llvm/lib/Target/X86/X86TileConfig.cpp | 4 ++-- .../CodeGen/X86/AMX/amx-fastconfig-phi.mir | 22 +++++++++---------- .../CodeGen/X86/AMX/amx-fastconfig-phi2.mir | 8 +++---- .../CodeGen/X86/AMX/amx-fastconfig-phi4.mir | 10 ++++----- .../CodeGen/X86/AMX/amx-fastconfig-spill.mir | 8 +++---- llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir | 6 ++--- .../CodeGen/X86/AMX/amx-fastpreconfig.mir | 2 +- .../CodeGen/X86/AMX/amx-tile-intrinsics.ll | 1 + 13 files changed, 40 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp index da10964d51ecbd..754baf2973d5e7 100644 --- a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp @@ -515,7 +515,7 @@ bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) { CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(), ST->getTileConfigAlignment(), false); LastTileCfg = addFrameReference( - BuildMI(MBB, Before, DebugLoc(), TII->get(X86::LDTILECFG)), CfgSS); + BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS); LastShapeMI = nullptr; Change = true; }; diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp index 2949bd048ee00d..2a20cd13791deb 100644 --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp @@ -110,15 +110,15 @@ bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { bool Change = false; SmallVector, 6> ShapeInfos; for (MachineInstr &MI : reverse(MBB)) { - if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::LDTILECFG) + if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV) continue; // AMX instructions that define tile register. - if (MI.getOpcode() != X86::LDTILECFG) { + if (MI.getOpcode() != X86::PLDTILECFGV) { MachineOperand &Row = MI.getOperand(1); MachineOperand &Col = MI.getOperand(2); unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0; ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)}); - } else { // LDTILECFG + } else { // PLDTILECFGV // Rewrite the shape information to memory. Stack slot should have // been initialized to zero in pre config. int SS = MI.getOperand(0).getIndex(); // tile config stack slot. diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index df8c096204e052..5da06bc87b060d 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -48,9 +48,9 @@ let Predicates = [HasAMXTILE, In64BitMode] in { VEX, T8XD; // Pseduo instruction for RA. - let isPseudo = true, mayLoad = 1 in - def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), - [(int_x86_ldtilecfg_internal addr:$src)]>; + let isPseudo = true, mayLoad = 1, hasSideEffects = 1, + Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in + def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>; let isPseudo = true, mayLoad = 1 in def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 831142a66d6bb8..6763fe57aeef1b 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7360,7 +7360,7 @@ bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI, // ENDBR instructions should not be scheduled around. unsigned Opcode = MI.getOpcode(); if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 || - Opcode == X86::LDTILECFG) + Opcode == X86::PLDTILECFGV) return true; return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp index ed0f5a5a36624f..479db8585ca030 100644 --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -369,7 +369,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) { // multi insert. if (VisitedOrInserted.insert(I).second) { auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin(); - addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::LDTILECFG)), + addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::PLDTILECFGV)), SS); } } diff --git a/llvm/lib/Target/X86/X86TileConfig.cpp b/llvm/lib/Target/X86/X86TileConfig.cpp index 490b1ad12b2679..c126aa33f96059 100644 --- a/llvm/lib/Target/X86/X86TileConfig.cpp +++ b/llvm/lib/Target/X86/X86TileConfig.cpp @@ -90,7 +90,7 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) { int SS = INT_MAX; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - if (MI.getOpcode() == X86::LDTILECFG) { + if (MI.getOpcode() == X86::PLDTILECFGV) { SS = MI.getOperand(0).getIndex(); break; } @@ -98,7 +98,7 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) { if (SS != INT_MAX) break; } - // Didn't find LDTILECFG, just return false; + // Didn't find PLDTILECFGV, just return false; if (SS == INT_MAX) return false; diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir index 65b4bd230ff783..fa79fe27f4d5e3 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir @@ -114,7 +114,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -127,7 +127,7 @@ body: | ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri3]], [[MOV16ri2]], [[COPY3]], 1, killed [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -139,7 +139,7 @@ body: | ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri2]], %bb.2 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[MOV16ri3]], %bb.2 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r1]], %bb.2 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg @@ -147,13 +147,13 @@ body: | ; CHECK-NEXT: TILESTORED %stack.5, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.5) ; CHECK-NEXT: [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]] ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.4, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILEZEROV1]] :: (store (s8192) into %stack.4) ; CHECK-NEXT: [[MOV16ri6:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri7:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri7]], [[MOV16ri6]] ; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILEZEROV2]] :: (store (s8192) into %stack.3) @@ -161,7 +161,7 @@ body: | ; CHECK-NEXT: JMP_1 %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri8:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri9:%[0-9]+]]:gr16 = MOV16ri 16 @@ -177,14 +177,14 @@ body: | ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[PHI]], %bb.3, %60, %bb.8 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr16 = PHI [[PHI1]], %bb.3, %59, %bb.8 ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.3, %58, %bb.8 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], [[PHI6]], 1, killed [[MOV64ri7]], 0, $noreg ; CHECK-NEXT: [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.8, 1, killed [[MOV64ri8]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.8) ; CHECK-NEXT: [[MOV16ri10:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri11:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri9:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri7]], [[MOV16ri6]], %stack.3, 1, killed [[MOV64ri9]], 0, $noreg :: (load (s8192) from %stack.3) ; CHECK-NEXT: [[MOV64ri10:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -200,7 +200,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri12:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri13:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV3:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri13]], [[MOV16ri12]] ; CHECK-NEXT: [[MOV64ri12:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -213,7 +213,7 @@ body: | ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri14:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri15:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.7, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri15]], [[MOV16ri14]], [[COPY3]], 1, killed [[MOV32ri64_2]], 0, $noreg ; CHECK-NEXT: [[MOV64ri13:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -225,7 +225,7 @@ body: | ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gr16 = PHI [[MOV16ri12]], %bb.6, [[MOV16ri14]], %bb.7 ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gr16 = PHI [[MOV16ri13]], %bb.6, [[MOV16ri15]], %bb.7 ; CHECK-NEXT: [[PHI9:%[0-9]+]]:gr64_nosp = PHI [[LEA64r3]], %bb.6, [[LEA64r4]], %bb.7 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri14:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI8]], [[PHI7]], [[PHI9]], 1, killed [[MOV64ri14]], 0, $noreg ; CHECK-NEXT: [[MOV64ri15:%[0-9]+]]:gr64_nosp = MOV64ri 64 diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir index 4cea456e7e9170..97c44ffab66d9f 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir @@ -51,7 +51,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed [[COPY]] ; CHECK-NEXT: %r0:gr16 = MOV16ri 64 ; CHECK-NEXT: %c0:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: %t0:tile = PTILEZEROV %r0, %c0 ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -66,7 +66,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -79,7 +79,7 @@ body: | ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %24, %bb.3 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %23, %bb.3 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.0, %22, %bb.3 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -92,7 +92,7 @@ body: | ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[PHI]], %bb.2 ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[PHI1]], %bb.2 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r1]], %bb.1, [[PHI2]], %bb.2 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri4]], 0, $noreg ; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64 diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir index 14ff3b2996d622..6f0976b3a4894f 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir @@ -52,7 +52,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed [[COPY]] ; CHECK-NEXT: %r0:gr16 = MOV16ri 64 ; CHECK-NEXT: %c0:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: %t0:tile = PTILEZEROV %r0, %c0 @@ -68,7 +68,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV killed [[MOV16ri1]], killed [[MOV16ri]] ; CHECK-NEXT: JMP_1 %bb.3 ; CHECK-NEXT: {{ $}} @@ -81,14 +81,14 @@ body: | ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %11, %bb.3, %17, %bb.2 ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %12, %bb.3, %18, %bb.2 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.0, %24, %bb.3, %25, %bb.2 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri1]], 0, $noreg ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri2]], 0, $noreg ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.3, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.3, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri3]], [[MOV16ri2]] @@ -102,7 +102,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[LEA64r5:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]] diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir index c797ce764c2c9c..98744bbe8e1473 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir @@ -34,7 +34,7 @@ body: | ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4) ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4) ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.3) @@ -48,7 +48,7 @@ body: | ; CHECK-NEXT: JMP_1 %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4) ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV %row, %col, [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.2, 1, killed [[MOV64ri2]], 0, $noreg :: (load (s8192) from %stack.2) @@ -111,7 +111,7 @@ body: | ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4) ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4) ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.3) @@ -123,7 +123,7 @@ body: | ; CHECK-NEXT: JMP_1 %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4) ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: %t:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.2, 1, killed [[MOV64ri2]], 0, $noreg :: (load (s8192) from %stack.2) diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir index 1dfc45c4c0f0f9..8da2b78d7944d7 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir @@ -102,7 +102,7 @@ body: | ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4) ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri]], [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -123,7 +123,7 @@ body: | ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2 ; CHECK-NEXT: [[MOV32ri64_3:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4) ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri1]], [[MOV32ri64_2]], 1, [[MOV32ri64_3]], 0, $noreg ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -147,7 +147,7 @@ body: | ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gr16 = PHI [[COPY3]], %bb.1, [[COPY3]], %bb.2 ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gr16 = PHI [[COPY4]], %bb.1, [[COPY4]], %bb.2 ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.1, [[LEA64r5]], %bb.2 - ; CHECK-NEXT: LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4) ; CHECK-NEXT: [[MOV64ri6:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV6:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri6]], 0, $noreg ; CHECK-NEXT: [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64 diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir index 53877b5da84a30..8807cc029ccd70 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir @@ -34,7 +34,7 @@ body: | ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.2, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.2, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.2, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.2, align 4) ; CHECK-NEXT: $tmm0 = TILELOADD [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg diff --git a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll index 4d469c23328e41..e5fb227329d0df 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -verify-machineinstrs | FileCheck %s define void @test_amx(i8* %pointer, i8* %base, i64 %stride) { ; CHECK-LABEL: test_amx: From 3606da5fbad042e2b74a35404797af20f65b437b Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 26 May 2022 17:10:53 +0200 Subject: [PATCH 688/908] [libc++] Enable ranges_robust_against* and niebloid tests for implemented ranges algorithms Spies: libcxx-commits Differential Revision: https://reviews.llvm.org/D126477 --- libcxx/docs/Status/RangesAlgorithms.csv | 6 +-- ...obust_against_copying_comparators.pass.cpp | 38 +++++++++---------- ...obust_against_copying_projections.pass.cpp | 38 +++++++++---------- .../niebloid.compile.pass.cpp | 26 ++++++------- 4 files changed, 54 insertions(+), 54 deletions(-) diff --git a/libcxx/docs/Status/RangesAlgorithms.csv b/libcxx/docs/Status/RangesAlgorithms.csv index c18f1d6d3c71ea..05e522d916b528 100644 --- a/libcxx/docs/Status/RangesAlgorithms.csv +++ b/libcxx/docs/Status/RangesAlgorithms.csv @@ -8,7 +8,7 @@ Search,find_if_not,Nikolas Klauser,`D121248 `_ Search,find_first_of,Not assigned,n/a,Not started Search,adjacent_find,Not assigned,n/a,Not started Search,mismatch,Nikolas Klauser,`D117817 `_,✅ -Search,equal,Nikolas Klauser,n/a,✅ +Search,equal,Nikolas Klauser,`D123681 `,✅ Search,lexicographical_compare,Not assigned,n/a,Not started Search,partition_point,Christopher Di Bella,`D105794 `_,Under review Search,lower_bound,Christopher Di Bella,`D105795 `_,Under review @@ -34,8 +34,8 @@ Read-only,is_heap,Not assigned,n/a,Not started Read-only,is_heap_until,Not assigned,n/a,Not started Read-only,clamp,Not assigned,n/a,Not started Read-only,is_permutation,Not assigned,n/a,Not started -Read-only,for_each,Nikolas Klauser,n/a,✅ -Read-only,for_each_n,Nikolas Klauser,n/a,✅ +Read-only,for_each,Nikolas Klauser,`D124332 `_,✅ +Read-only,for_each_n,Nikolas Klauser,`D124332 `_,✅ Write,copy,Nikolas Klauser,`D122982 `_,✅ Write,copy_if,Nikolas Klauser,`D122982 `_,✅ Write,copy_n,Nikolas Klauser,`D122982 `_,✅ diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp index b59d9867acdab4..e801f6b2013434 100644 --- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp +++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp @@ -81,13 +81,13 @@ constexpr bool all_the_algorithms() void *b[10] = {}; //void *half[5] = {}; void **first = a; - //void **mid = a+5; + void **mid = a+5; void **last = a+10; void **first2 = b; //void **mid2 = b+5; void **last2 = b+10; void *value = nullptr; - //int count = 1; + int count = 1; int copies = 0; //(void)std::ranges::adjacent_find(first, last, Equal(&copies)); assert(copies == 0); @@ -101,8 +101,8 @@ constexpr bool all_the_algorithms() //(void)std::ranges::clamp(value, value, value, Less(&copies)); assert(copies == 0); (void)std::ranges::count_if(first, last, UnaryTrue(&copies)); assert(copies == 0); (void)std::ranges::count_if(a, UnaryTrue(&copies)); assert(copies == 0); - //(void)std::ranges::copy_if(first, last, first2, UnaryTrue(&copies)); assert(copies == 0); - //(void)std::ranges::copy_if(a, first2, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::copy_if(first, last, first2, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::copy_if(a, first2, UnaryTrue(&copies)); assert(copies == 0); #if TEST_STD_VER > 20 //(void)std::ranges::ends_with(first, last, first2, last2, Equal(&copies)); assert(copies == 0); #endif @@ -118,9 +118,9 @@ constexpr bool all_the_algorithms() (void)std::ranges::find_if(a, UnaryTrue(&copies)); assert(copies == 0); (void)std::ranges::find_if_not(first, last, UnaryTrue(&copies)); assert(copies == 0); (void)std::ranges::find_if_not(a, UnaryTrue(&copies)); assert(copies == 0); - //(void)std::ranges::for_each(first, last, UnaryVoid(&copies)); assert(copies == 1); copies = 0; - //(void)std::ranges::for_each(a, UnaryVoid(&copies)); assert(copies == 1); copies = 0; - //(void)std::ranges::for_each_n(first, count, UnaryVoid(&copies)); assert(copies == 0); + (void)std::ranges::for_each(first, last, UnaryVoid(&copies)); assert(copies == 1); copies = 0; + (void)std::ranges::for_each(a, UnaryVoid(&copies)); assert(copies == 1); copies = 0; + (void)std::ranges::for_each_n(first, count, UnaryVoid(&copies)); assert(copies == 1); copies = 0; //(void)std::ranges::generate(first, last, NullaryValue(&copies)); assert(copies == 0); //(void)std::ranges::generate(a, NullaryValue(&copies)); assert(copies == 0); //(void)std::ranges::generate_n(first, count, NullaryValue(&copies)); assert(copies == 0); @@ -130,8 +130,8 @@ constexpr bool all_the_algorithms() //(void)std::ranges::is_heap(a, Less(&copies)); assert(copies == 0); //(void)std::ranges::is_heap_until(first, last, Less(&copies)); assert(copies == 0); //(void)std::ranges::is_heap_until(a, Less(&copies)); assert(copies == 0); - //(void)std::ranges::is_partitioned(first, last, UnaryTrue(&copies)); assert(copies == 0); - //(void)std::ranges::is_partitioned(a, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::is_partitioned(first, last, UnaryTrue(&copies)); assert(copies == 0); + (void)std::ranges::is_partitioned(a, UnaryTrue(&copies)); assert(copies == 0); //(void)std::ranges::is_permutation(first, last, first2, last2, Equal(&copies)); assert(copies == 0); //(void)std::ranges::is_permutation(a, b, Equal(&copies)); assert(copies == 0); (void)std::ranges::is_sorted(first, last, Less(&copies)); assert(copies == 0); @@ -146,16 +146,16 @@ constexpr bool all_the_algorithms() //(void)std::ranges::lower_bound(a, value, Less(&copies)); assert(copies == 0); //(void)std::ranges::make_heap(first, last, Less(&copies)); assert(copies == 0); //(void)std::ranges::make_heap(a, Less(&copies)); assert(copies == 0); - //(void)std::ranges::max(value, value, Less(&copies)); assert(copies == 0); - //(void)std::ranges::max({ value, value }, Less(&copies)); assert(copies == 0); - //(void)std::ranges::max(a, Less(&copies)); assert(copies == 0); + (void)std::ranges::max(value, value, Less(&copies)); assert(copies == 0); + (void)std::ranges::max({ value, value }, Less(&copies)); assert(copies == 0); + (void)std::ranges::max(a, Less(&copies)); assert(copies == 0); (void)std::ranges::max_element(first, last, Less(&copies)); assert(copies == 0); (void)std::ranges::max_element(a, Less(&copies)); assert(copies == 0); //(void)std::ranges::merge(first, mid, mid, last, first2, Less(&copies)); assert(copies == 0); //(void)std::ranges::merge(half, half, b, Less(&copies)); assert(copies == 0); - //(void)std::ranges::min(value, value, Less(&copies)); assert(copies == 0); - //(void)std::ranges::min({ value, value }, Less(&copies)); assert(copies == 0); - //(void)std::ranges::min(a, Less(&copies)); assert(copies == 0); + (void)std::ranges::min(value, value, Less(&copies)); assert(copies == 0); + (void)std::ranges::min({ value, value }, Less(&copies)); assert(copies == 0); + (void)std::ranges::min(a, Less(&copies)); assert(copies == 0); (void)std::ranges::min_element(first, last, Less(&copies)); assert(copies == 0); (void)std::ranges::min_element(a, Less(&copies)); assert(copies == 0); (void)std::ranges::minmax(value, value, Less(&copies)); assert(copies == 0); @@ -218,10 +218,10 @@ constexpr bool all_the_algorithms() #if TEST_STD_VER > 20 //(void)std::ranges::starts_with(first, last, first2, last2, Equal(&copies)); assert(copies == 0); #endif - //(void)std::ranges::transform(first, last, first2, UnaryTransform(&copies)); assert(copies == 0); - //(void)std::ranges::transform(a, first2, UnaryTransform(&copies)); assert(copies == 0); - //(void)std::ranges::transform(first, mid, mid, last, first2, BinaryTransform(&copies)); assert(copies == 0); - //(void)std::ranges::transform(a, b, first2, BinaryTransform(&copies)); assert(copies == 0); + (void)std::ranges::transform(first, last, first2, UnaryTransform(&copies)); assert(copies == 0); + (void)std::ranges::transform(a, first2, UnaryTransform(&copies)); assert(copies == 0); + (void)std::ranges::transform(first, mid, mid, last, first2, BinaryTransform(&copies)); assert(copies == 0); + (void)std::ranges::transform(a, b, first2, BinaryTransform(&copies)); assert(copies == 0); //(void)std::ranges::unique(first, last, Equal(&copies)); assert(copies == 0); //(void)std::ranges::unique(a, Equal(&copies)); assert(copies == 0); //(void)std::ranges::unique_copy(first, last, first2, Equal(&copies)); assert(copies == 0); diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp index 333e263aafdf93..27678fb7b43b3c 100644 --- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp +++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp @@ -63,13 +63,13 @@ constexpr bool all_the_algorithms() T b[10] = {}; //T half[5] = {}; T *first = a; - //T *mid = a+5; + T *mid = a+5; T *last = a+10; T *first2 = b; //T *mid2 = b+5; T *last2 = b+10; void *value = nullptr; - //int count = 1; + int count = 1; int copies = 0; //(void)std::ranges::adjacent_find(first, last, Equal(), Proj(&copies)); assert(copies == 0); @@ -85,8 +85,8 @@ constexpr bool all_the_algorithms() (void)std::ranges::count(a, value, Proj(&copies)); assert(copies == 0); (void)std::ranges::count_if(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); (void)std::ranges::count_if(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::copy_if(first, last, first2, UnaryTrue(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::copy_if(a, first2, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::copy_if(first, last, first2, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::copy_if(a, first2, UnaryTrue(), Proj(&copies)); assert(copies == 0); #if TEST_STD_VER > 20 //(void)std::ranges::ends_with(first, last, first2, last2, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); #endif @@ -104,17 +104,17 @@ constexpr bool all_the_algorithms() (void)std::ranges::find_if(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); (void)std::ranges::find_if_not(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); (void)std::ranges::find_if_not(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::for_each(first, last, UnaryVoid(), Proj(&copies)); assert(copies == 1); copies = 0; - //(void)std::ranges::for_each(a, UnaryVoid(), Proj(&copies)); assert(copies == 1); copies = 0; - //(void)std::ranges::for_each_n(first, count, UnaryVoid(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::for_each(first, last, UnaryVoid(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::for_each(a, UnaryVoid(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::for_each_n(first, count, UnaryVoid(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::includes(first, last, first2, last2, Less(), Proj(&copies), Proj(&copies)); assert(copies == 0); //(void)std::ranges::includes(a, b, Less(), Proj(&copies), Proj(&copies)); assert(copies == 0); //(void)std::ranges::is_heap(first, last, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::is_heap(a, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::is_heap_until(first, last, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::is_heap_until(a, Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::is_partitioned(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::is_partitioned(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::is_partitioned(first, last, UnaryTrue(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::is_partitioned(a, UnaryTrue(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::is_permutation(first, last, first2, last2, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); //(void)std::ranges::is_permutation(a, b, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); (void)std::ranges::is_sorted(first, last, Less(), Proj(&copies)); assert(copies == 0); @@ -129,16 +129,16 @@ constexpr bool all_the_algorithms() //(void)std::ranges::lower_bound(a, value, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::make_heap(first, last, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::make_heap(a, Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::max(T(), T(), Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::max({ T(), T() }, Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::max(a, Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::max(T(), T(), Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::max({ T(), T() }, Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::max(a, Less(), Proj(&copies)); assert(copies == 0); (void)std::ranges::max_element(first, last, Less(), Proj(&copies)); assert(copies == 0); (void)std::ranges::max_element(a, Less(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::merge(first, mid, mid, last, first2, Less(), Proj(&copies), Proj(&copies)); assert(copies == 0); //(void)std::ranges::merge(half, half, b, Less(), Proj(&copies), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::min(T(), T(), Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::min({ T(), T() }, Less(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::min(a, Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::min(T(), T(), Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::min({ T(), T() }, Less(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::min(a, Less(), Proj(&copies)); assert(copies == 0); (void)std::ranges::min_element(first, last, Less(), Proj(&copies)); assert(copies == 0); (void)std::ranges::min_element(a, Less(), Proj(&copies)); assert(copies == 0); (void)std::ranges::minmax(T(), T(), Less(), Proj(&copies)); assert(copies == 0); @@ -209,10 +209,10 @@ constexpr bool all_the_algorithms() #if TEST_STD_VER > 20 //(void)std::ranges::starts_with(first, last, first2, last2, Equal(), Proj(&copies), Proj(&copies)); assert(copies == 0); #endif - //(void)std::ranges::transform(first, last, first2, UnaryTransform(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::transform(a, first2, UnaryTransform(), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::transform(first, mid, mid, last, first2, BinaryTransform(), Proj(&copies), Proj(&copies)); assert(copies == 0); - //(void)std::ranges::transform(a, b, first2, BinaryTransform(), Proj(&copies), Proj(&copies)); assert(copies == 0); + (void)std::ranges::transform(first, last, first2, UnaryTransform(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::transform(a, first2, UnaryTransform(), Proj(&copies)); assert(copies == 0); + (void)std::ranges::transform(first, mid, mid, last, first2, BinaryTransform(), Proj(&copies), Proj(&copies)); assert(copies == 0); + (void)std::ranges::transform(a, b, first2, BinaryTransform(), Proj(&copies), Proj(&copies)); assert(copies == 0); //(void)std::ranges::unique(first, last, Equal(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::unique(a, Equal(), Proj(&copies)); assert(copies == 0); //(void)std::ranges::unique_copy(first, last, first2, Equal(), Proj(&copies)); assert(copies == 0); diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp index 75c0905c3a0e81..81137859e1e668 100644 --- a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp @@ -54,7 +54,7 @@ constexpr bool test(CPO& o, Args&&...) { int *p; int a[10]; auto odd = [](int x) { return x % 2 != 0; }; -//auto triple = [](int x) { return 3*x; }; +auto triple = [](int x) { return 3*x; }; //auto plus = [](int x, int y) { return x == y; }; //std::mt19937 g; @@ -65,38 +65,38 @@ static_assert(test(std::ranges::all_of, a, odd)); static_assert(test(std::ranges::any_of, a, odd)); //static_assert(test(std::ranges::binary_search, a, 42)); //static_assert(test(std::ranges::clamp, 42, 42, 42)); -//static_assert(test(std::ranges::copy, a, a)); -//static_assert(test(std::ranges::copy_backward, a, a)); -//static_assert(test(std::ranges::copy_if, a, a, odd)); -//static_assert(test(std::ranges::copy_n, a, 10, a)); +static_assert(test(std::ranges::copy, a, a)); +static_assert(test(std::ranges::copy_backward, a, a)); +static_assert(test(std::ranges::copy_if, a, a, odd)); +static_assert(test(std::ranges::copy_n, a, 10, a)); static_assert(test(std::ranges::count, a, 42)); static_assert(test(std::ranges::count_if, a, odd)); //static_assert(test(std::ranges::ends_with, a, a)); -//static_assert(test(std::ranges::equal, a, a)); +static_assert(test(std::ranges::equal, a, a)); //static_assert(test(std::ranges::equal_range, a, 42)); -//static_assert(test(std::ranges::fill, a, 42)); -//static_assert(test(std::ranges::fill_n, a, 10, 42)); +static_assert(test(std::ranges::fill, a, 42)); +static_assert(test(std::ranges::fill_n, a, 10, 42)); static_assert(test(std::ranges::find, a, 42)); //static_assert(test(std::ranges::find_end, a, a)); //static_assert(test(std::ranges::find_first_of, a, a)); static_assert(test(std::ranges::find_if, a, odd)); static_assert(test(std::ranges::find_if_not, a, odd)); -//static_assert(test(std::ranges::for_each, a, odd)); -//static_assert(test(std::ranges::for_each_n, a, 10, odd)); +static_assert(test(std::ranges::for_each, a, odd)); +static_assert(test(std::ranges::for_each_n, a, 10, odd)); //static_assert(test(std::ranges::generate, a, 42)); //static_assert(test(std::ranges::generate_n, a, 10, 42)); //static_assert(test(std::ranges::includes, a, a)); //static_assert(test(std::ranges::inplace_merge, a, a+5)); //static_assert(test(std::ranges::is_heap, a)); //static_assert(test(std::ranges::is_heap_until, a)); -//static_assert(test(std::ranges::is_partitioned, a, odd)); +static_assert(test(std::ranges::is_partitioned, a, odd)); //static_assert(test(std::ranges::is_permutation, a, a)); static_assert(test(std::ranges::is_sorted, a)); static_assert(test(std::ranges::is_sorted_until, a)); //static_assert(test(std::ranges::lexicographical_compare, a, a)); //static_assert(test(std::ranges::lower_bound, a, 42)); //static_assert(test(std::ranges::make_heap, a)); -//static_assert(test(std::ranges::max, a)); +static_assert(test(std::ranges::max, a)); static_assert(test(std::ranges::max_element, a)); //static_assert(test(std::ranges::merge, a, a, a)); static_assert(test(std::ranges::min, a)); @@ -143,7 +143,7 @@ static_assert(test(std::ranges::reverse, a)); //static_assert(test(std::ranges::stable_sort, a)); //static_assert(test(std::ranges::starts_with, a, a)); static_assert(test(std::ranges::swap_ranges, a, a)); -//static_assert(test(std::ranges::transform, a, a, triple)); +static_assert(test(std::ranges::transform, a, a, triple)); //static_assert(test(std::ranges::unique, a)); //static_assert(test(std::ranges::unique_copy, a, a)); //static_assert(test(std::ranges::upper_bound, a, 42)); From e37b287998a72b028fbcf623f06685ed2b6b48e2 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:54:25 +0200 Subject: [PATCH 689/908] [analyzer][NFC] Use idiomatic classof instead of isKind - Rename `isKind()` to `classof()` to follow the llvm style RTTI. - Take SVal by-value instead of reference. - Mark `classof` public. Reviewed By: martong, xazax.hun Differential Revision: https://reviews.llvm.org/D125706 --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 194 +++++++----------- 1 file changed, 74 insertions(+), 120 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index f153e97d2d4417..16d7d54bf81586 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -100,7 +100,7 @@ class SVal { /// the desired type. template T castAs() const { - assert(T::isKind(*this)); + assert(T::classof(*this)); return *static_cast(this); } @@ -108,7 +108,7 @@ class SVal { /// not of the desired type. template Optional getAs() const { - if (!T::isKind(*this)) + if (!T::classof(*this)) return None; return *static_cast(this); } @@ -124,13 +124,11 @@ class SVal { ID.AddPointer(Data); } - bool operator==(const SVal &R) const { + bool operator==(SVal R) const { return getRawKind() == R.getRawKind() && Data == R.Data; } - bool operator!=(const SVal &R) const { - return !(*this == R); - } + bool operator!=(SVal R) const { return !(*this == R); } bool isUnknown() const { return getRawKind() == UnknownValKind; @@ -221,12 +219,10 @@ class UndefinedVal : public SVal { public: UndefinedVal() : SVal(UndefinedValKind) {} + static bool classof(SVal V) { return V.getBaseKind() == UndefinedValKind; } + private: friend class SVal; - - static bool isKind(const SVal& V) { - return V.getBaseKind() == UndefinedValKind; - } }; class DefinedOrUnknownSVal : public SVal { @@ -236,6 +232,8 @@ class DefinedOrUnknownSVal : public SVal { bool isUndef() const = delete; bool isValid() const = delete; + static bool classof(SVal V) { return !V.isUndef(); } + protected: DefinedOrUnknownSVal() = default; explicit DefinedOrUnknownSVal(const void *d, bool isLoc, unsigned ValKind) @@ -244,22 +242,16 @@ class DefinedOrUnknownSVal : public SVal { private: friend class SVal; - - static bool isKind(const SVal& V) { - return !V.isUndef(); - } }; class UnknownVal : public DefinedOrUnknownSVal { public: explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {} + static bool classof(SVal V) { return V.getBaseKind() == UnknownValKind; } + private: friend class SVal; - - static bool isKind(const SVal &V) { - return V.getBaseKind() == UnknownValKind; - } }; class DefinedSVal : public DefinedOrUnknownSVal { @@ -270,6 +262,8 @@ class DefinedSVal : public DefinedOrUnknownSVal { bool isUnknownOrUndef() const = delete; bool isValid() const = delete; + static bool classof(SVal V) { return !V.isUnknownOrUndef(); } + protected: DefinedSVal() = default; explicit DefinedSVal(const void *d, bool isLoc, unsigned ValKind) @@ -277,25 +271,17 @@ class DefinedSVal : public DefinedOrUnknownSVal { private: friend class SVal; - - static bool isKind(const SVal& V) { - return !V.isUnknownOrUndef(); - } }; /// Represents an SVal that is guaranteed to not be UnknownVal. class KnownSVal : public SVal { friend class SVal; - KnownSVal() = default; - static bool isKind(const SVal &V) { - return !V.isUnknown(); - } - public: KnownSVal(const DefinedSVal &V) : SVal(V) {} KnownSVal(const UndefinedVal &V) : SVal(V) {} + static bool classof(SVal V) { return !V.isUnknown(); } }; class NonLoc : public DefinedSVal { @@ -312,12 +298,10 @@ class NonLoc : public DefinedSVal { T->isAnyComplexType() || T->isVectorType(); } + static bool classof(SVal V) { return V.getBaseKind() == NonLocKind; } + private: friend class SVal; - - static bool isKind(const SVal& V) { - return V.getBaseKind() == NonLocKind; - } }; class Loc : public DefinedSVal { @@ -334,12 +318,10 @@ class Loc : public DefinedSVal { T->isReferenceType() || T->isNullPtrType(); } + static bool classof(SVal V) { return V.getBaseKind() == LocKind; } + private: friend class SVal; - - static bool isKind(const SVal& V) { - return V.getBaseKind() == LocKind; - } }; //==------------------------------------------------------------------------==// @@ -365,17 +347,14 @@ class SymbolVal : public NonLoc { return !isa(getSymbol()); } -private: - friend class SVal; - - static bool isKind(const SVal& V) { - return V.getBaseKind() == NonLocKind && - V.getSubKind() == SymbolValKind; + static bool classof(SVal V) { + return V.getBaseKind() == NonLocKind && V.getSubKind() == SymbolValKind; } - static bool isKind(const NonLoc& V) { - return V.getSubKind() == SymbolValKind; - } + static bool classof(NonLoc V) { return V.getSubKind() == SymbolValKind; } + +private: + friend class SVal; }; /// Value representing integer constant. @@ -392,19 +371,15 @@ class ConcreteInt : public NonLoc { ConcreteInt evalMinus(SValBuilder &svalBuilder) const; + static bool classof(SVal V) { + return V.getBaseKind() == NonLocKind && V.getSubKind() == ConcreteIntKind; + } + + static bool classof(NonLoc V) { return V.getSubKind() == ConcreteIntKind; } + private: friend class SVal; - ConcreteInt() = default; - - static bool isKind(const SVal& V) { - return V.getBaseKind() == NonLocKind && - V.getSubKind() == ConcreteIntKind; - } - - static bool isKind(const NonLoc& V) { - return V.getSubKind() == ConcreteIntKind; - } }; class LocAsInteger : public NonLoc { @@ -432,19 +407,15 @@ class LocAsInteger : public NonLoc { return D->second; } + static bool classof(SVal V) { + return V.getBaseKind() == NonLocKind && V.getSubKind() == LocAsIntegerKind; + } + + static bool classof(NonLoc V) { return V.getSubKind() == LocAsIntegerKind; } + private: friend class SVal; - LocAsInteger() = default; - - static bool isKind(const SVal& V) { - return V.getBaseKind() == NonLocKind && - V.getSubKind() == LocAsIntegerKind; - } - - static bool isKind(const NonLoc& V) { - return V.getSubKind() == LocAsIntegerKind; - } }; class CompoundVal : public NonLoc { @@ -462,18 +433,15 @@ class CompoundVal : public NonLoc { iterator begin() const; iterator end() const; -private: - friend class SVal; - - CompoundVal() = default; - - static bool isKind(const SVal& V) { + static bool classof(SVal V) { return V.getBaseKind() == NonLocKind && V.getSubKind() == CompoundValKind; } - static bool isKind(const NonLoc& V) { - return V.getSubKind() == CompoundValKind; - } + static bool classof(NonLoc V) { return V.getSubKind() == CompoundValKind; } + +private: + friend class SVal; + CompoundVal() = default; }; class LazyCompoundVal : public NonLoc { @@ -490,19 +458,18 @@ class LazyCompoundVal : public NonLoc { const void *getStore() const; const TypedValueRegion *getRegion() const; -private: - friend class SVal; - - LazyCompoundVal() = default; - - static bool isKind(const SVal& V) { + static bool classof(SVal V) { return V.getBaseKind() == NonLocKind && V.getSubKind() == LazyCompoundValKind; } - static bool isKind(const NonLoc& V) { + static bool classof(NonLoc V) { return V.getSubKind() == LazyCompoundValKind; } + +private: + friend class SVal; + LazyCompoundVal() = default; }; /// Value representing pointer-to-member. @@ -540,21 +507,21 @@ class PointerToMember : public NonLoc { iterator begin() const; iterator end() const; -private: - friend class SVal; - - PointerToMember() = default; - explicit PointerToMember(const PTMDataType D) - : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {} - - static bool isKind(const SVal& V) { + static bool classof(SVal V) { return V.getBaseKind() == NonLocKind && V.getSubKind() == PointerToMemberKind; } - static bool isKind(const NonLoc& V) { + static bool classof(NonLoc V) { return V.getSubKind() == PointerToMemberKind; } + +private: + friend class SVal; + + PointerToMember() = default; + explicit PointerToMember(const PTMDataType D) + : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {} }; } // namespace nonloc @@ -575,18 +542,15 @@ class GotoLabel : public Loc { return static_cast(Data); } -private: - friend class SVal; - - GotoLabel() = default; - - static bool isKind(const SVal& V) { + static bool classof(SVal V) { return V.getBaseKind() == LocKind && V.getSubKind() == GotoLabelKind; } - static bool isKind(const Loc& V) { - return V.getSubKind() == GotoLabelKind; - } + static bool classof(Loc V) { return V.getSubKind() == GotoLabelKind; } + +private: + friend class SVal; + GotoLabel() = default; }; class MemRegionVal : public Loc { @@ -616,19 +580,15 @@ class MemRegionVal : public Loc { return getRegion() != R.getRegion(); } + static bool classof(SVal V) { + return V.getBaseKind() == LocKind && V.getSubKind() == MemRegionValKind; + } + + static bool classof(Loc V) { return V.getSubKind() == MemRegionValKind; } + private: friend class SVal; - MemRegionVal() = default; - - static bool isKind(const SVal& V) { - return V.getBaseKind() == LocKind && - V.getSubKind() == MemRegionValKind; - } - - static bool isKind(const Loc& V) { - return V.getSubKind() == MemRegionValKind; - } }; class ConcreteInt : public Loc { @@ -639,25 +599,19 @@ class ConcreteInt : public Loc { return *static_cast(Data); } + static bool classof(SVal V) { + return V.getBaseKind() == LocKind && V.getSubKind() == ConcreteIntKind; + } + + static bool classof(Loc V) { return V.getSubKind() == ConcreteIntKind; } + private: friend class SVal; - ConcreteInt() = default; - - static bool isKind(const SVal& V) { - return V.getBaseKind() == LocKind && - V.getSubKind() == ConcreteIntKind; - } - - static bool isKind(const Loc& V) { - return V.getSubKind() == ConcreteIntKind; - } }; } // namespace loc - } // namespace ento - } // namespace clang #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H From d8def22f7be120ab159220a9e6d450b26bf50bca Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:54:25 +0200 Subject: [PATCH 690/908] [analyzer][NFC] Remove unused friend SVal declarations Reviewed By: martong, xazax.hun Differential Revision: https://reviews.llvm.org/D125707 --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 16d7d54bf81586..7086b889e6d1ee 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -218,11 +218,7 @@ inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) { class UndefinedVal : public SVal { public: UndefinedVal() : SVal(UndefinedValKind) {} - static bool classof(SVal V) { return V.getBaseKind() == UndefinedValKind; } - -private: - friend class SVal; }; class DefinedOrUnknownSVal : public SVal { @@ -239,9 +235,6 @@ class DefinedOrUnknownSVal : public SVal { explicit DefinedOrUnknownSVal(const void *d, bool isLoc, unsigned ValKind) : SVal(d, isLoc, ValKind) {} explicit DefinedOrUnknownSVal(BaseKind k, void *D = nullptr) : SVal(k, D) {} - -private: - friend class SVal; }; class UnknownVal : public DefinedOrUnknownSVal { @@ -249,9 +242,6 @@ class UnknownVal : public DefinedOrUnknownSVal { explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {} static bool classof(SVal V) { return V.getBaseKind() == UnknownValKind; } - -private: - friend class SVal; }; class DefinedSVal : public DefinedOrUnknownSVal { @@ -268,14 +258,10 @@ class DefinedSVal : public DefinedOrUnknownSVal { DefinedSVal() = default; explicit DefinedSVal(const void *d, bool isLoc, unsigned ValKind) : DefinedOrUnknownSVal(d, isLoc, ValKind) {} - -private: - friend class SVal; }; /// Represents an SVal that is guaranteed to not be UnknownVal. class KnownSVal : public SVal { - friend class SVal; KnownSVal() = default; public: @@ -299,9 +285,6 @@ class NonLoc : public DefinedSVal { } static bool classof(SVal V) { return V.getBaseKind() == NonLocKind; } - -private: - friend class SVal; }; class Loc : public DefinedSVal { @@ -319,9 +302,6 @@ class Loc : public DefinedSVal { } static bool classof(SVal V) { return V.getBaseKind() == LocKind; } - -private: - friend class SVal; }; //==------------------------------------------------------------------------==// @@ -352,9 +332,6 @@ class SymbolVal : public NonLoc { } static bool classof(NonLoc V) { return V.getSubKind() == SymbolValKind; } - -private: - friend class SVal; }; /// Value representing integer constant. @@ -378,7 +355,6 @@ class ConcreteInt : public NonLoc { static bool classof(NonLoc V) { return V.getSubKind() == ConcreteIntKind; } private: - friend class SVal; ConcreteInt() = default; }; @@ -414,7 +390,6 @@ class LocAsInteger : public NonLoc { static bool classof(NonLoc V) { return V.getSubKind() == LocAsIntegerKind; } private: - friend class SVal; LocAsInteger() = default; }; @@ -440,7 +415,6 @@ class CompoundVal : public NonLoc { static bool classof(NonLoc V) { return V.getSubKind() == CompoundValKind; } private: - friend class SVal; CompoundVal() = default; }; @@ -468,7 +442,6 @@ class LazyCompoundVal : public NonLoc { } private: - friend class SVal; LazyCompoundVal() = default; }; @@ -517,8 +490,6 @@ class PointerToMember : public NonLoc { } private: - friend class SVal; - PointerToMember() = default; explicit PointerToMember(const PTMDataType D) : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {} @@ -549,7 +520,6 @@ class GotoLabel : public Loc { static bool classof(Loc V) { return V.getSubKind() == GotoLabelKind; } private: - friend class SVal; GotoLabel() = default; }; @@ -587,7 +557,6 @@ class MemRegionVal : public Loc { static bool classof(Loc V) { return V.getSubKind() == MemRegionValKind; } private: - friend class SVal; MemRegionVal() = default; }; @@ -606,7 +575,6 @@ class ConcreteInt : public Loc { static bool classof(Loc V) { return V.getSubKind() == ConcreteIntKind; } private: - friend class SVal; ConcreteInt() = default; }; From 7727d2abece6855ec6486ca0600998bfa35882a1 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 10:54:25 +0200 Subject: [PATCH 691/908] [analyzer][NFC] Remove unused default SVal constructors Reviewed By: martong, xazax.hun Differential Revision: https://reviews.llvm.org/D125708 --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 7086b889e6d1ee..5865ccea76c0af 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -231,7 +231,6 @@ class DefinedOrUnknownSVal : public SVal { static bool classof(SVal V) { return !V.isUndef(); } protected: - DefinedOrUnknownSVal() = default; explicit DefinedOrUnknownSVal(const void *d, bool isLoc, unsigned ValKind) : SVal(d, isLoc, ValKind) {} explicit DefinedOrUnknownSVal(BaseKind k, void *D = nullptr) : SVal(k, D) {} @@ -255,15 +254,12 @@ class DefinedSVal : public DefinedOrUnknownSVal { static bool classof(SVal V) { return !V.isUnknownOrUndef(); } protected: - DefinedSVal() = default; explicit DefinedSVal(const void *d, bool isLoc, unsigned ValKind) : DefinedOrUnknownSVal(d, isLoc, ValKind) {} }; /// Represents an SVal that is guaranteed to not be UnknownVal. class KnownSVal : public SVal { - KnownSVal() = default; - public: KnownSVal(const DefinedSVal &V) : SVal(V) {} KnownSVal(const UndefinedVal &V) : SVal(V) {} @@ -272,7 +268,6 @@ class KnownSVal : public SVal { class NonLoc : public DefinedSVal { protected: - NonLoc() = default; explicit NonLoc(unsigned SubKind, const void *d) : DefinedSVal(d, false, SubKind) {} @@ -289,7 +284,6 @@ class NonLoc : public DefinedSVal { class Loc : public DefinedSVal { protected: - Loc() = default; explicit Loc(unsigned SubKind, const void *D) : DefinedSVal(const_cast(D), true, SubKind) {} @@ -353,9 +347,6 @@ class ConcreteInt : public NonLoc { } static bool classof(NonLoc V) { return V.getSubKind() == ConcreteIntKind; } - -private: - ConcreteInt() = default; }; class LocAsInteger : public NonLoc { @@ -388,9 +379,6 @@ class LocAsInteger : public NonLoc { } static bool classof(NonLoc V) { return V.getSubKind() == LocAsIntegerKind; } - -private: - LocAsInteger() = default; }; class CompoundVal : public NonLoc { @@ -413,9 +401,6 @@ class CompoundVal : public NonLoc { } static bool classof(NonLoc V) { return V.getSubKind() == CompoundValKind; } - -private: - CompoundVal() = default; }; class LazyCompoundVal : public NonLoc { @@ -440,9 +425,6 @@ class LazyCompoundVal : public NonLoc { static bool classof(NonLoc V) { return V.getSubKind() == LazyCompoundValKind; } - -private: - LazyCompoundVal() = default; }; /// Value representing pointer-to-member. @@ -490,7 +472,6 @@ class PointerToMember : public NonLoc { } private: - PointerToMember() = default; explicit PointerToMember(const PTMDataType D) : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {} }; @@ -518,9 +499,6 @@ class GotoLabel : public Loc { } static bool classof(Loc V) { return V.getSubKind() == GotoLabelKind; } - -private: - GotoLabel() = default; }; class MemRegionVal : public Loc { @@ -555,9 +533,6 @@ class MemRegionVal : public Loc { } static bool classof(Loc V) { return V.getSubKind() == MemRegionValKind; } - -private: - MemRegionVal() = default; }; class ConcreteInt : public Loc { @@ -573,9 +548,6 @@ class ConcreteInt : public Loc { } static bool classof(Loc V) { return V.getSubKind() == ConcreteIntKind; } - -private: - ConcreteInt() = default; }; } // namespace loc From 6ab69efe61f2db50d72416a59fafa427249cb91b Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Fri, 27 May 2022 11:02:11 +0200 Subject: [PATCH 692/908] [analyzer][NFC] Rename GREngine->CoreEngine, GRExprEngine->ExprEngine in comments and txt files fixes #115 --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 2 +- clang/lib/StaticAnalyzer/README.txt | 6 +++--- clang/test/Analysis/PR3991.m | 2 +- clang/test/Analysis/misc-ps-eager-assume.m | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index a8bffd3c5e66f7..55920c1a7566ef 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file defines a meta-engine for path-sensitive dataflow analysis that -// is built on GREngine, but provides the boilerplate to execute transfer +// is built on CoreEngine, but provides the boilerplate to execute transfer // functions and build the ExplodedGraph at the expression level. // //===----------------------------------------------------------------------===// diff --git a/clang/lib/StaticAnalyzer/README.txt b/clang/lib/StaticAnalyzer/README.txt index 79a16ec7673d23..75f20315a7ae81 100644 --- a/clang/lib/StaticAnalyzer/README.txt +++ b/clang/lib/StaticAnalyzer/README.txt @@ -5,7 +5,7 @@ = Library Structure = The analyzer library has two layers: a (low-level) static analysis -engine (GRExprEngine.cpp and friends), and some static checkers +engine (ExprEngine.cpp and friends), and some static checkers (*Checker.cpp). The latter are built on top of the former via the Checker and CheckerVisitor interfaces (Checker.h and CheckerVisitor.h). The Checker interface is designed to be minimal @@ -58,7 +58,7 @@ ImmutableMaps) which share data between instances. Finally, individual Checkers work by also manipulating the analysis state. The analyzer engine talks to them via a visitor interface. -For example, the PreVisitCallExpr() method is called by GRExprEngine +For example, the PreVisitCallExpr() method is called by ExprEngine to tell the Checker that we are about to analyze a CallExpr, and the checker is asked to check for any preconditions that might not be satisfied. The checker can do nothing, or it can generate a new @@ -92,7 +92,7 @@ method call. = Working on the Analyzer = If you are interested in bringing up support for C++ expressions, the -best place to look is the visitation logic in GRExprEngine, which +best place to look is the visitation logic in ExprEngine, which handles the simulation of individual expressions. There are plenty of examples there of how other expressions are handled. diff --git a/clang/test/Analysis/PR3991.m b/clang/test/Analysis/PR3991.m index ffdb7b4a6e44db..5d76443a0fbbad 100644 --- a/clang/test/Analysis/PR3991.m +++ b/clang/test/Analysis/PR3991.m @@ -50,7 +50,7 @@ @implementation IHGoogleDocsAdapter - (id)initWithUsername:(NSString *)inUser // Actual test case: // // The analyzer currently doesn't reason about ObjCKVCRefExpr. Have both -// GRExprEngine::Visit and GRExprEngine::VisitLValue have such expressions +// ExprEngine::Visit and ExprEngine::VisitLValue have such expressions // evaluate to UnknownVal. //===----------------------------------------------------------------------===// diff --git a/clang/test/Analysis/misc-ps-eager-assume.m b/clang/test/Analysis/misc-ps-eager-assume.m index 112dd3672d937f..6c24dfa5708c4d 100644 --- a/clang/test/Analysis/misc-ps-eager-assume.m +++ b/clang/test/Analysis/misc-ps-eager-assume.m @@ -49,7 +49,7 @@ void handle_assign_of_condition(int x) { // a symbolic value for this variable, but in the branch condition it is // promoted to 'int'. Currently the analyzer doesn't reason well about // promotions of symbolic values, so this test case tests the logic in -// 'recoverCastedSymbol()' (GRExprEngine.cpp) to test that we recover +// 'recoverCastedSymbol()' (ExprEngine.cpp) to test that we recover // path-sensitivity and use the symbol for 'needsAnArray' in the branch // condition. // @@ -128,7 +128,7 @@ void rdar7342806(void) { // This test case depends on using -analyzer-config eagerly-assume=true and // -analyzer-store=region. The 'eagerly-assume=true' causes the path // to bifurcate when evaluating the function call argument, and a state -// caching bug in GRExprEngine::CheckerVisit (and friends) caused the store +// caching bug in ExprEngine::CheckerVisit (and friends) caused the store // to 'p' to not be evaluated along one path, but then an autotransition caused // the path to keep on propagating with 'p' still set to an undefined value. // We would then get a bogus report of returning uninitialized memory. From f13050eca3564ebccc04ea39eb34a312f063c4e9 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 11:05:50 +0200 Subject: [PATCH 693/908] [analyzer][NFCi] Annotate major nonnull returning functions This patch annotates the most important analyzer function APIs. Also adds a couple of assertions for uncovering any potential issues earlier in the constructor; in those cases, the member functions were already dereferencing the members unconditionally anyway. Measurements showed no performance impact, nor crashes. Reviewed By: martong Differential Revision: https://reviews.llvm.org/D126198 --- .../clang/Analysis/AnalysisDeclContext.h | 6 ++- .../Core/PathSensitive/BasicValueFactory.h | 10 +++- .../Core/PathSensitive/CheckerContext.h | 1 + .../Core/PathSensitive/ConstraintManager.h | 1 + .../Core/PathSensitive/DynamicType.h | 1 + .../Core/PathSensitive/MemRegion.h | 52 +++++++++++++++--- .../Core/PathSensitive/ProgramState.h | 1 + .../StaticAnalyzer/Core/PathSensitive/SVals.h | 14 ++++- .../Core/PathSensitive/SymExpr.h | 1 + .../Core/PathSensitive/SymbolManager.h | 54 ++++++++++++------- clang/lib/StaticAnalyzer/Core/MemRegion.cpp | 4 +- 11 files changed, 114 insertions(+), 31 deletions(-) diff --git a/clang/include/clang/Analysis/AnalysisDeclContext.h b/clang/include/clang/Analysis/AnalysisDeclContext.h index 102970a1d55e21..ce60ad56af4ed5 100644 --- a/clang/include/clang/Analysis/AnalysisDeclContext.h +++ b/clang/include/clang/Analysis/AnalysisDeclContext.h @@ -229,7 +229,9 @@ class LocationContext : public llvm::FoldingSetNode { protected: LocationContext(ContextKind k, AnalysisDeclContext *ctx, const LocationContext *parent, int64_t ID) - : Kind(k), Ctx(ctx), Parent(parent), ID(ID) {} + : Kind(k), Ctx(ctx), Parent(parent), ID(ID) { + assert(ctx); + } public: virtual ~LocationContext(); @@ -238,8 +240,10 @@ class LocationContext : public llvm::FoldingSetNode { int64_t getID() const { return ID; } + LLVM_ATTRIBUTE_RETURNS_NONNULL AnalysisDeclContext *getAnalysisDeclContext() const { return Ctx; } + /// It might return null. const LocationContext *getParent() const { return Parent; } bool isParentOf(const LocationContext *LC) const; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h index e7d64313ea42c7..457247bcdf5537 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h @@ -66,10 +66,14 @@ class LazyCompoundValData : public llvm::FoldingSetNode { public: LazyCompoundValData(const StoreRef &st, const TypedValueRegion *r) : store(st), region(r) { + assert(r); assert(NonLoc::isCompoundType(r->getValueType())); } + /// It might return null. const void *getStore() const { return store.getStore(); } + + LLVM_ATTRIBUTE_RETURNS_NONNULL const TypedValueRegion *getRegion() const { return region; } static void Profile(llvm::FoldingSetNodeID& ID, @@ -86,7 +90,9 @@ class PointerToMemberData : public llvm::FoldingSetNode { public: PointerToMemberData(const NamedDecl *D, llvm::ImmutableList L) - : D(D), L(L) {} + : D(D), L(L) { + assert(D); + } using iterator = llvm::ImmutableList::iterator; @@ -97,6 +103,8 @@ class PointerToMemberData : public llvm::FoldingSetNode { llvm::ImmutableList L); void Profile(llvm::FoldingSetNodeID &ID) { Profile(ID, D, L); } + + LLVM_ATTRIBUTE_RETURNS_NONNULL const NamedDecl *getDeclaratorDecl() const { return D; } llvm::ImmutableList getCXXBaseList() const { diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h index 94fa0d9ecdc31d..960131cfc396a8 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h @@ -256,6 +256,7 @@ class CheckerContext { /// @param IsPrunable Whether the note is prunable. It allows BugReporter /// to omit the note from the report if it would make the displayed /// bug path significantly shorter. + LLVM_ATTRIBUTE_RETURNS_NONNULL const NoteTag *getNoteTag(NoteTag::Callback &&Cb, bool IsPrunable = false) { return Eng.getDataTags().make(std::move(Cb), IsPrunable); } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h index 755371f93b9c04..d1e06297252b31 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h @@ -112,6 +112,7 @@ class ConstraintManager { /// /// Note that a ConstraintManager is not obligated to return a concretized /// value for a symbol, even if it is perfectly constrained. + /// It might return null. virtual const llvm::APSInt* getSymVal(ProgramStateRef state, SymbolRef sym) const { return nullptr; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h index ffe1fe846be1c1..e85029b327415e 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h @@ -32,6 +32,7 @@ namespace ento { DynamicTypeInfo getDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR); /// Get raw dynamic type information for the region \p MR. +/// It might return null. const DynamicTypeInfo *getRawDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h index 0db6d92c631318..a61dd0cd9a3792 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -74,6 +74,7 @@ class RegionOffset { RegionOffset() = default; RegionOffset(const MemRegion *r, int64_t off) : R(r), Offset(off) {} + /// It might return null. const MemRegion *getRegion() const { return R; } bool hasSymbolicOffset() const { return Offset == Symbolic; } @@ -114,22 +115,25 @@ class MemRegion : public llvm::FoldingSetNode { virtual MemRegionManager &getMemRegionManager() const = 0; - const MemSpaceRegion *getMemorySpace() const; + LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion *getMemorySpace() const; - const MemRegion *getBaseRegion() const; + LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion *getBaseRegion() const; /// Recursively retrieve the region of the most derived class instance of /// regions of C++ base class instances. + LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion *getMostDerivedObjectRegion() const; /// Check if the region is a subregion of the given region. /// Each region is a subregion of itself. virtual bool isSubRegionOf(const MemRegion *R) const; + LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion *StripCasts(bool StripBaseAndDerivedCasts = true) const; /// If this is a symbolic region, returns the region. Otherwise, /// goes up the base chain looking for the first symbolic base region. + /// It might return null. const SymbolicRegion *getSymbolicBase() const; bool hasGlobalsOrParametersStorage() const; @@ -169,7 +173,8 @@ class MemRegion : public llvm::FoldingSetNode { Kind getKind() const { return kind; } template const RegionTy* getAs() const; - template const RegionTy* castAs() const; + template + LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy *castAs() const; virtual bool isBoundable() const { return false; } @@ -268,6 +273,7 @@ class StaticGlobalSpaceRegion : public GlobalsSpaceRegion { void dumpToStream(raw_ostream &os) const override; + LLVM_ATTRIBUTE_RETURNS_NONNULL const CodeTextRegion *getCodeRegion() const { return CR; } static bool classof(const MemRegion *R) { @@ -391,6 +397,7 @@ class StackSpaceRegion : public MemSpaceRegion { } public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const StackFrameContext *getStackFrame() const { return SFC; } void Profile(llvm::FoldingSetNodeID &ID) const override; @@ -444,6 +451,7 @@ class SubRegion : public MemRegion { } public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion* getSuperRegion() const { return superRegion; } @@ -481,6 +489,7 @@ class AllocaRegion : public SubRegion { unsigned Cnt, const MemRegion *superRegion); public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const Expr *getExpr() const { return Ex; } bool isBoundable() const override { return true; } @@ -639,10 +648,12 @@ class BlockCodeRegion : public CodeTextRegion { return locTy; } + LLVM_ATTRIBUTE_RETURNS_NONNULL const BlockDecl *getDecl() const { return BD; } + LLVM_ATTRIBUTE_RETURNS_NONNULL AnalysisDeclContext *getAnalysisDeclContext() const { return AC; } void dumpToStream(raw_ostream &os) const override; @@ -674,6 +685,7 @@ class BlockDataRegion : public TypedRegion { : TypedRegion(sreg, BlockDataRegionKind), BC(bc), LC(lc), BlockCount(count) { assert(bc); + assert(bc->getDecl()); assert(lc); assert(isa(sreg) || isa(sreg) || @@ -685,8 +697,10 @@ class BlockDataRegion : public TypedRegion { const MemRegion *); public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const BlockCodeRegion *getCodeRegion() const { return BC; } + LLVM_ATTRIBUTE_RETURNS_NONNULL const BlockDecl *getDecl() const { return BC->getDecl(); } QualType getLocationType() const override { return BC->getLocationType(); } @@ -700,10 +714,12 @@ class BlockDataRegion : public TypedRegion { const MemRegion * const *originalR) : R(r), OriginalR(originalR) {} + LLVM_ATTRIBUTE_RETURNS_NONNULL const VarRegion *getCapturedRegion() const { return cast(*R); } + LLVM_ATTRIBUTE_RETURNS_NONNULL const VarRegion *getOriginalRegion() const { return cast(*OriginalR); } @@ -726,7 +742,7 @@ class BlockDataRegion : public TypedRegion { }; /// Return the original region for a captured region, if - /// one exists. + /// one exists. It might return null. const VarRegion *getOriginalRegion(const VarRegion *VR) const; referenced_vars_iterator referenced_vars_begin() const; @@ -769,6 +785,7 @@ class SymbolicRegion : public SubRegion { } public: + /// It might return null. SymbolRef getSymbol() const { return sym; } bool isBoundable() const override { return true; } @@ -802,6 +819,7 @@ class StringRegion : public TypedValueRegion { const MemRegion *superRegion); public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const StringLiteral *getStringLiteral() const { return Str; } QualType getValueType() const override { return Str->getType(); } @@ -836,6 +854,7 @@ class ObjCStringRegion : public TypedValueRegion { const MemRegion *superRegion); public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const ObjCStringLiteral *getObjCStringLiteral() const { return Str; } QualType getValueType() const override { return Str->getType(); } @@ -882,6 +901,7 @@ class CompoundLiteralRegion : public TypedValueRegion { void dumpToStream(raw_ostream &os) const override; + LLVM_ATTRIBUTE_RETURNS_NONNULL const CompoundLiteralExpr *getLiteralExpr() const { return CL; } static bool classof(const MemRegion* R) { @@ -896,6 +916,7 @@ class DeclRegion : public TypedValueRegion { } public: + // TODO what does this return? virtual const ValueDecl *getDecl() const = 0; static bool classof(const MemRegion* R) { @@ -919,8 +940,10 @@ class VarRegion : public DeclRegion { } public: + // TODO what does this return? const VarDecl *getDecl() const override = 0; + /// It might return null. const StackFrameContext *getStackFrame() const; QualType getValueType() const override { @@ -948,6 +971,7 @@ class NonParamVarRegion : public VarRegion { // which, unlike everything else on this list, are not memory spaces. assert(isa(sReg) || isa(sReg) || isa(sReg) || isa(sReg)); + assert(vd); } static void ProfileRegion(llvm::FoldingSetNodeID &ID, const VarDecl *VD, @@ -956,6 +980,7 @@ class NonParamVarRegion : public VarRegion { public: void Profile(llvm::FoldingSetNodeID &ID) const override; + LLVM_ATTRIBUTE_RETURNS_NONNULL const VarDecl *getDecl() const override { return VD; } QualType getValueType() const override { @@ -993,12 +1018,14 @@ class ParamVarRegion : public VarRegion { ParamVarRegion(const Expr *OE, unsigned Idx, const MemRegion *SReg) : VarRegion(SReg, ParamVarRegionKind), OriginExpr(OE), Index(Idx) { assert(!cast(SReg)->getStackFrame()->inTopFrame()); + assert(OriginExpr); } static void ProfileRegion(llvm::FoldingSetNodeID &ID, const Expr *OE, unsigned Idx, const MemRegion *SReg); public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const Expr *getOriginExpr() const { return OriginExpr; } unsigned getIndex() const { return Index; } @@ -1007,6 +1034,8 @@ class ParamVarRegion : public VarRegion { void dumpToStream(raw_ostream &os) const override; QualType getValueType() const override; + + /// TODO: What does this return? const ParmVarDecl *getDecl() const override; bool canPrintPrettyAsExpr() const override; @@ -1058,7 +1087,9 @@ class FieldRegion : public DeclRegion { const FieldDecl *FD; FieldRegion(const FieldDecl *fd, const SubRegion *sReg) - : DeclRegion(sReg, FieldRegionKind), FD(fd) {} + : DeclRegion(sReg, FieldRegionKind), FD(fd) { + assert(FD); + } static void ProfileRegion(llvm::FoldingSetNodeID &ID, const FieldDecl *FD, const MemRegion* superRegion) { @@ -1068,6 +1099,7 @@ class FieldRegion : public DeclRegion { } public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const FieldDecl *getDecl() const override { return FD; } void Profile(llvm::FoldingSetNodeID &ID) const override; @@ -1100,6 +1132,7 @@ class ObjCIvarRegion : public DeclRegion { const MemRegion* superRegion); public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const ObjCIvarDecl *getDecl() const override; void Profile(llvm::FoldingSetNodeID& ID) const override; @@ -1132,6 +1165,8 @@ class RegionRawOffset { public: // FIXME: Eventually support symbolic offsets. CharUnits getOffset() const { return Offset; } + + // It might return null. const MemRegion *getRegion() const { return Region; } void dumpToStream(raw_ostream &os) const; @@ -1194,6 +1229,7 @@ class CXXTempObjectRegion : public TypedValueRegion { Expr const *E, const MemRegion *sReg); public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const Expr *getExpr() const { return Ex; } QualType getValueType() const override { return Ex->getType(); } @@ -1224,6 +1260,7 @@ class CXXBaseObjectRegion : public TypedValueRegion { bool IsVirtual, const MemRegion *SReg); public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const CXXRecordDecl *getDecl() const { return Data.getPointer(); } bool isVirtual() const { return Data.getInt(); } @@ -1266,6 +1303,7 @@ class CXXDerivedObjectRegion : public TypedValueRegion { const MemRegion *SReg); public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const CXXRecordDecl *getDecl() const { return DerivedD; } QualType getValueType() const override; @@ -1291,8 +1329,8 @@ const RegionTy* MemRegion::getAs() const { return nullptr; } -template -const RegionTy* MemRegion::castAs() const { +template +LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy *MemRegion::castAs() const { return cast(this); } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h index f36ba70babc71f..78abd0a0e42d73 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h @@ -271,6 +271,7 @@ class ProgramState : public llvm::FoldingSetNode { ConditionTruthVal areEqual(SVal Lhs, SVal Rhs) const; /// Utility method for getting regions. + LLVM_ATTRIBUTE_RETURNS_NONNULL const VarRegion* getRegion(const VarDecl *D, const LocationContext *LC) const; //==---------------------------------------------------------------------==// diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 5865ccea76c0af..ec7f05333f7b2f 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -313,6 +313,7 @@ class SymbolVal : public NonLoc { assert(!Loc::isLocType(sym->getType())); } + LLVM_ATTRIBUTE_RETURNS_NONNULL SymbolRef getSymbol() const { return (const SymExpr *) Data; } @@ -384,9 +385,12 @@ class LocAsInteger : public NonLoc { class CompoundVal : public NonLoc { friend class ento::SValBuilder; - explicit CompoundVal(const CompoundValData* D) : NonLoc(CompoundValKind, D) {} + explicit CompoundVal(const CompoundValData *D) : NonLoc(CompoundValKind, D) { + assert(D); + } public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const CompoundValData* getValue() const { return static_cast(Data); } @@ -407,14 +411,20 @@ class LazyCompoundVal : public NonLoc { friend class ento::SValBuilder; explicit LazyCompoundVal(const LazyCompoundValData *D) - : NonLoc(LazyCompoundValKind, D) {} + : NonLoc(LazyCompoundValKind, D) { + assert(D); + } public: + LLVM_ATTRIBUTE_RETURNS_NONNULL const LazyCompoundValData *getCVData() const { return static_cast(Data); } + LLVM_ATTRIBUTE_RETURNS_NONNULL const void *getStore() const; + + LLVM_ATTRIBUTE_RETURNS_NONNULL const TypedValueRegion *getRegion() const; static bool classof(SVal V) { diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h index 2f4ac6ba5f975d..abd05fe34f54a8 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h @@ -98,6 +98,7 @@ class SymExpr : public llvm::FoldingSetNode { /// the beginning of the analysis, and SymbolDerived which denotes the value /// of a certain memory region after its super region (a memory space or /// a larger record region) is default-bound with a certain symbol. + /// It might return null. virtual const MemRegion *getOriginRegion() const { return nullptr; } }; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index 3ba3aa5c01566e..e46f97b7fafcdf 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -48,6 +48,7 @@ class SymbolRegionValue : public SymbolData { assert(isValidTypeForSymbol(r->getValueType())); } + LLVM_ATTRIBUTE_RETURNS_NONNULL const TypedValueRegion* getRegion() const { return R; } static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) { @@ -95,8 +96,10 @@ class SymbolConjured : public SymbolData { assert(isValidTypeForSymbol(t)); } + /// It might return null. const Stmt *getStmt() const { return S; } unsigned getCount() const { return Count; } + /// It might return null. const void *getTag() const { return SymbolTag; } QualType getType() const override; @@ -140,7 +143,9 @@ class SymbolDerived : public SymbolData { assert(isValidTypeForSymbol(r->getValueType())); } + LLVM_ATTRIBUTE_RETURNS_NONNULL SymbolRef getParentSymbol() const { return parentSymbol; } + LLVM_ATTRIBUTE_RETURNS_NONNULL const TypedValueRegion *getRegion() const { return R; } QualType getType() const override; @@ -179,6 +184,7 @@ class SymbolExtent : public SymbolData { assert(r); } + LLVM_ATTRIBUTE_RETURNS_NONNULL const SubRegion *getRegion() const { return R; } QualType getType() const override; @@ -226,29 +232,37 @@ class SymbolMetadata : public SymbolData { assert(tag); } - const MemRegion *getRegion() const { return R; } - const Stmt *getStmt() const { return S; } - const LocationContext *getLocationContext() const { return LCtx; } - unsigned getCount() const { return Count; } - const void *getTag() const { return Tag; } + LLVM_ATTRIBUTE_RETURNS_NONNULL + const MemRegion *getRegion() const { return R; } - QualType getType() const override; + LLVM_ATTRIBUTE_RETURNS_NONNULL + const Stmt *getStmt() const { return S; } - StringRef getKindStr() const override; + LLVM_ATTRIBUTE_RETURNS_NONNULL + const LocationContext *getLocationContext() const { return LCtx; } - void dumpToStream(raw_ostream &os) const override; + unsigned getCount() const { return Count; } - static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R, - const Stmt *S, QualType T, const LocationContext *LCtx, - unsigned Count, const void *Tag) { - profile.AddInteger((unsigned) SymbolMetadataKind); - profile.AddPointer(R); - profile.AddPointer(S); - profile.Add(T); - profile.AddPointer(LCtx); - profile.AddInteger(Count); - profile.AddPointer(Tag); - } + LLVM_ATTRIBUTE_RETURNS_NONNULL + const void *getTag() const { return Tag; } + + QualType getType() const override; + + StringRef getKindStr() const override; + + void dumpToStream(raw_ostream &os) const override; + + static void Profile(llvm::FoldingSetNodeID &profile, const MemRegion *R, + const Stmt *S, QualType T, const LocationContext *LCtx, + unsigned Count, const void *Tag) { + profile.AddInteger((unsigned)SymbolMetadataKind); + profile.AddPointer(R); + profile.AddPointer(S); + profile.Add(T); + profile.AddPointer(LCtx); + profile.AddInteger(Count); + profile.AddPointer(Tag); + } void Profile(llvm::FoldingSetNodeID& profile) override { Profile(profile, R, S, T, LCtx, Count, Tag); @@ -287,6 +301,7 @@ class SymbolCast : public SymExpr { QualType getType() const override { return ToTy; } + LLVM_ATTRIBUTE_RETURNS_NONNULL const SymExpr *getOperand() const { return Operand; } void dumpToStream(raw_ostream &os) const override; @@ -587,6 +602,7 @@ class SymbolReaper { SymbolManager &symmgr, StoreManager &storeMgr) : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {} + /// It might return null. const LocationContext *getLocationContext() const { return LCtx; } bool isLive(SymbolRef sym); diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index c969f2ded104f2..7c9babf894c7b4 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -162,7 +162,9 @@ const StackFrameContext *VarRegion::getStackFrame() const { } ObjCIvarRegion::ObjCIvarRegion(const ObjCIvarDecl *ivd, const SubRegion *sReg) - : DeclRegion(sReg, ObjCIvarRegionKind), IVD(ivd) {} + : DeclRegion(sReg, ObjCIvarRegionKind), IVD(ivd) { + assert(IVD); +} const ObjCIvarDecl *ObjCIvarRegion::getDecl() const { return IVD; } From 3988bd13988aad72ec979beb2361e8738584926b Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 11:15:23 +0200 Subject: [PATCH 694/908] [llvm][clang][bolt][NFC] Use llvm::less_first() when applicable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One could reuse this functor instead of rolling out your own version. There were a couple other cases where the code was similar, but not quite the same, such as it might have an assertion in the lambda or other constructs. Thus, I've not touched any of those, as it might change the behavior in some way. As per https://discourse.llvm.org/t/submitting-simple-nfc-patches/62640/3?u=steakhal Chris Lattner > LLVM intentionally has a “yes, you can apply common sense judgement to > things” policy when it comes to code review. If you are doing mechanical > patches (e.g. adopting less_first) that apply to the entire monorepo, > then you don’t need everyone in the monorepo to sign off on it. Having > some +1 validation from someone is useful, but you don’t need everyone > whose code you touch to weigh in. Differential Revision: https://reviews.llvm.org/D126068 --- bolt/lib/Passes/LongJmp.cpp | 28 ++++++------------- clang/lib/Frontend/FrontendAction.cpp | 6 +--- clang/lib/Sema/SemaDeclCXX.cpp | 3 +- clang/lib/Sema/SemaStmtAsm.cpp | 4 +-- .../Frontend/AnalyzerHelpFlags.cpp | 5 +--- .../Introspection/IntrospectionTest.cpp | 4 +-- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 6 +--- llvm/lib/IR/Attributes.cpp | 12 ++------ llvm/lib/ObjCopy/MachO/MachOWriter.cpp | 4 +-- llvm/tools/dsymutil/DebugMap.cpp | 4 +-- .../llvm-reduce/deltas/ReduceAttributes.cpp | 6 ++-- 11 files changed, 20 insertions(+), 62 deletions(-) diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index 24139f2b9ee165..b0cc441023eb16 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -91,15 +91,10 @@ LongJmpPass::createNewStub(BinaryBasicBlock &SourceBB, const MCSymbol *TgtSym, // Register this in stubs maps auto registerInMap = [&](StubGroupsTy &Map) { StubGroupTy &StubGroup = Map[TgtSym]; - StubGroup.insert( - std::lower_bound( - StubGroup.begin(), StubGroup.end(), - std::make_pair(AtAddress, nullptr), - [&](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; - }), - std::make_pair(AtAddress, StubBB.get())); + StubGroup.insert(std::lower_bound(StubGroup.begin(), StubGroup.end(), + std::make_pair(AtAddress, nullptr), + llvm::less_first()), + std::make_pair(AtAddress, StubBB.get())); }; Stubs[&Func].insert(StubBB.get()); @@ -129,12 +124,9 @@ BinaryBasicBlock *LongJmpPass::lookupStubFromGroup( const StubGroupTy &Candidates = CandidatesIter->second; if (Candidates.empty()) return nullptr; - auto Cand = std::lower_bound( - Candidates.begin(), Candidates.end(), std::make_pair(DotAddress, nullptr), - [&](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; - }); + auto Cand = + std::lower_bound(Candidates.begin(), Candidates.end(), + std::make_pair(DotAddress, nullptr), llvm::less_first()); if (Cand == Candidates.end()) return nullptr; if (Cand != Candidates.begin()) { @@ -259,11 +251,7 @@ void LongJmpPass::updateStubGroups() { for (auto &KeyVal : StubGroups) { for (StubTy &Elem : KeyVal.second) Elem.first = BBAddresses[Elem.second]; - std::sort(KeyVal.second.begin(), KeyVal.second.end(), - [&](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; - }); + std::sort(KeyVal.second.begin(), KeyVal.second.end(), llvm::less_first()); } }; diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 68c70dda12b9ea..6b1f6364b13c39 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -413,11 +413,7 @@ static std::error_code collectModuleHeaderIncludes( // Sort header paths and make the header inclusion order deterministic // across different OSs and filesystems. - llvm::sort(Headers.begin(), Headers.end(), []( - const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; - }); + llvm::sort(Headers.begin(), Headers.end(), llvm::less_first()); for (auto &H : Headers) { // Include this header as part of the umbrella directory. Module->addTopHeader(H.second); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index c470a46f584773..569b226da92334 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -5430,8 +5430,7 @@ static void DiagnoseBaseOrMemInitializerOrder( return; // Sort based on the ideal order, first in the pair. - llvm::sort(CorrelatedInitOrder, - [](auto &LHS, auto &RHS) { return LHS.first < RHS.first; }); + llvm::sort(CorrelatedInitOrder, llvm::less_first()); // Introduce a new scope as SemaDiagnosticBuilder needs to be destroyed to // emit the diagnostic before we can try adding notes. diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp index fbca36b1216a88..f3c4b4fd583048 100644 --- a/clang/lib/Sema/SemaStmtAsm.cpp +++ b/clang/lib/Sema/SemaStmtAsm.cpp @@ -717,9 +717,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple, std::make_pair(Names[i]->getName(), Exprs[i])); // Sort NamedOperandList. std::stable_sort(NamedOperandList.begin(), NamedOperandList.end(), - [](const NamedOperand &LHS, const NamedOperand &RHS) { - return LHS.first < RHS.first; - }); + llvm::less_first()); // Find adjacent duplicate operand. SmallVector::iterator Found = std::adjacent_find(begin(NamedOperandList), end(NamedOperandList), diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalyzerHelpFlags.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalyzerHelpFlags.cpp index eb6014a0629df6..7cd15f0f65954a 100644 --- a/clang/lib/StaticAnalyzer/Frontend/AnalyzerHelpFlags.cpp +++ b/clang/lib/StaticAnalyzer/Frontend/AnalyzerHelpFlags.cpp @@ -101,10 +101,7 @@ USAGE: -analyzer-config #undef ANALYZER_OPTION_DEPENDS_ON_USER_MODE }; - llvm::sort(PrintableOptions, [](const OptionAndDescriptionTy &LHS, - const OptionAndDescriptionTy &RHS) { - return LHS.first < RHS.first; - }); + llvm::sort(PrintableOptions, llvm::less_first()); for (const auto &Pair : PrintableOptions) { AnalyzerOptions::printFormattedEntry(out, Pair, /*InitialPad*/ 2, diff --git a/clang/unittests/Introspection/IntrospectionTest.cpp b/clang/unittests/Introspection/IntrospectionTest.cpp index 69e461609e7ddf..9f76568c8ec351 100644 --- a/clang/unittests/Introspection/IntrospectionTest.cpp +++ b/clang/unittests/Introspection/IntrospectionTest.cpp @@ -299,9 +299,7 @@ STRING_LOCATION_STDPAIR(MethodDecl, getTypeSpecStartLoc()) auto ExpectedRanges = FormatExpected(Result.RangeAccessors); - llvm::sort(ExpectedRanges, [](const auto &LHS, const auto &RHS) { - return LHS.first < RHS.first; - }); + llvm::sort(ExpectedRanges, llvm::less_first()); // clang-format off EXPECT_EQ( diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index a51776880cde26..df2826b50784da 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -339,11 +339,7 @@ class SymbolLookupSet { /// Sort the lookup set by pointer value. This sort is fast but sensitive to /// allocation order and so should not be used where a consistent order is /// required. - void sortByAddress() { - llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) { - return LHS.first < RHS.first; - }); - } + void sortByAddress() { llvm::sort(Symbols, llvm::less_first()); } /// Sort the lookup set lexicographically. This sort is slow but the order /// is unaffected by allocation order. diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index 1aa2d44308c476..31141cfa3efb49 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1018,11 +1018,7 @@ AttributeList::get(LLVMContext &C, if (Attrs.empty()) return {}; - assert(llvm::is_sorted(Attrs, - [](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; - }) && + assert(llvm::is_sorted(Attrs, llvm::less_first()) && "Misordered Attributes list!"); assert(llvm::all_of(Attrs, [](const std::pair &Pair) { @@ -1055,11 +1051,7 @@ AttributeList::get(LLVMContext &C, if (Attrs.empty()) return {}; - assert(llvm::is_sorted(Attrs, - [](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; - }) && + assert(llvm::is_sorted(Attrs, llvm::less_first()) && "Misordered Attributes list!"); assert(llvm::none_of(Attrs, [](const std::pair &Pair) { diff --git a/llvm/lib/ObjCopy/MachO/MachOWriter.cpp b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp index a94764a9a03494..9b1fd58cc482a3 100644 --- a/llvm/lib/ObjCopy/MachO/MachOWriter.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp @@ -634,9 +634,7 @@ void MachOWriter::writeTail() { } } - llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { - return LHS.first < RHS.first; - }); + llvm::sort(Queue, llvm::less_first()); for (auto WriteOp : Queue) (this->*WriteOp.second)(); diff --git a/llvm/tools/dsymutil/DebugMap.cpp b/llvm/tools/dsymutil/DebugMap.cpp index 605c1317b9c87a..79b6cb2c80a213 100644 --- a/llvm/tools/dsymutil/DebugMap.cpp +++ b/llvm/tools/dsymutil/DebugMap.cpp @@ -62,9 +62,7 @@ void DebugMapObject::print(raw_ostream &OS) const { Entries.reserve(Symbols.getNumItems()); for (const auto &Sym : Symbols) Entries.push_back(std::make_pair(Sym.getKey(), Sym.getValue())); - llvm::sort(Entries, [](const Entry &LHS, const Entry &RHS) { - return LHS.first < RHS.first; - }); + llvm::sort(Entries, llvm::less_first()); for (const auto &Sym : Entries) { if (Sym.second.ObjectAddress) OS << format("\t%016" PRIx64, uint64_t(*Sym.second.ObjectAddress)); diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp index 3a2397464be76e..e4570dd32aca18 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp @@ -158,10 +158,8 @@ AttributeList convertAttributeRefVecToAttributeList( V.first, convertAttributeRefToAttributeSet(C, V.second)); }); - sort(SetVec, [](const std::pair &LHS, - const std::pair &RHS) { - return LHS.first < RHS.first; // All values are unique. - }); + // All values are unique. + sort(SetVec, llvm::less_first()); return AttributeList::get(C, SetVec); } From a73b50ad0649d635433547ff51cd73d2ce9f085b Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 11:18:05 +0200 Subject: [PATCH 695/908] Revert "[llvm][clang][bolt][NFC] Use llvm::less_first() when applicable" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 3988bd13988aad72ec979beb2361e8738584926b. Did not build on this bot: https://lab.llvm.org/buildbot#builders/215/builds/6372 /usr/include/c++/9/bits/predefined_ops.h:177:11: error: no match for call to ‘(llvm::less_first) (std::pair&, const std::pair&)’ 177 | { return bool(_M_comp(*__it, __val)); } --- bolt/lib/Passes/LongJmp.cpp | 28 +++++++++++++------ clang/lib/Frontend/FrontendAction.cpp | 6 +++- clang/lib/Sema/SemaDeclCXX.cpp | 3 +- clang/lib/Sema/SemaStmtAsm.cpp | 4 ++- .../Frontend/AnalyzerHelpFlags.cpp | 5 +++- .../Introspection/IntrospectionTest.cpp | 4 ++- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 6 +++- llvm/lib/IR/Attributes.cpp | 12 ++++++-- llvm/lib/ObjCopy/MachO/MachOWriter.cpp | 4 ++- llvm/tools/dsymutil/DebugMap.cpp | 4 ++- .../llvm-reduce/deltas/ReduceAttributes.cpp | 6 ++-- 11 files changed, 62 insertions(+), 20 deletions(-) diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index b0cc441023eb16..24139f2b9ee165 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -91,10 +91,15 @@ LongJmpPass::createNewStub(BinaryBasicBlock &SourceBB, const MCSymbol *TgtSym, // Register this in stubs maps auto registerInMap = [&](StubGroupsTy &Map) { StubGroupTy &StubGroup = Map[TgtSym]; - StubGroup.insert(std::lower_bound(StubGroup.begin(), StubGroup.end(), - std::make_pair(AtAddress, nullptr), - llvm::less_first()), - std::make_pair(AtAddress, StubBB.get())); + StubGroup.insert( + std::lower_bound( + StubGroup.begin(), StubGroup.end(), + std::make_pair(AtAddress, nullptr), + [&](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }), + std::make_pair(AtAddress, StubBB.get())); }; Stubs[&Func].insert(StubBB.get()); @@ -124,9 +129,12 @@ BinaryBasicBlock *LongJmpPass::lookupStubFromGroup( const StubGroupTy &Candidates = CandidatesIter->second; if (Candidates.empty()) return nullptr; - auto Cand = - std::lower_bound(Candidates.begin(), Candidates.end(), - std::make_pair(DotAddress, nullptr), llvm::less_first()); + auto Cand = std::lower_bound( + Candidates.begin(), Candidates.end(), std::make_pair(DotAddress, nullptr), + [&](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }); if (Cand == Candidates.end()) return nullptr; if (Cand != Candidates.begin()) { @@ -251,7 +259,11 @@ void LongJmpPass::updateStubGroups() { for (auto &KeyVal : StubGroups) { for (StubTy &Elem : KeyVal.second) Elem.first = BBAddresses[Elem.second]; - std::sort(KeyVal.second.begin(), KeyVal.second.end(), llvm::less_first()); + std::sort(KeyVal.second.begin(), KeyVal.second.end(), + [&](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }); } }; diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 6b1f6364b13c39..68c70dda12b9ea 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -413,7 +413,11 @@ static std::error_code collectModuleHeaderIncludes( // Sort header paths and make the header inclusion order deterministic // across different OSs and filesystems. - llvm::sort(Headers.begin(), Headers.end(), llvm::less_first()); + llvm::sort(Headers.begin(), Headers.end(), []( + const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }); for (auto &H : Headers) { // Include this header as part of the umbrella directory. Module->addTopHeader(H.second); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 569b226da92334..c470a46f584773 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -5430,7 +5430,8 @@ static void DiagnoseBaseOrMemInitializerOrder( return; // Sort based on the ideal order, first in the pair. - llvm::sort(CorrelatedInitOrder, llvm::less_first()); + llvm::sort(CorrelatedInitOrder, + [](auto &LHS, auto &RHS) { return LHS.first < RHS.first; }); // Introduce a new scope as SemaDiagnosticBuilder needs to be destroyed to // emit the diagnostic before we can try adding notes. diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp index f3c4b4fd583048..fbca36b1216a88 100644 --- a/clang/lib/Sema/SemaStmtAsm.cpp +++ b/clang/lib/Sema/SemaStmtAsm.cpp @@ -717,7 +717,9 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple, std::make_pair(Names[i]->getName(), Exprs[i])); // Sort NamedOperandList. std::stable_sort(NamedOperandList.begin(), NamedOperandList.end(), - llvm::less_first()); + [](const NamedOperand &LHS, const NamedOperand &RHS) { + return LHS.first < RHS.first; + }); // Find adjacent duplicate operand. SmallVector::iterator Found = std::adjacent_find(begin(NamedOperandList), end(NamedOperandList), diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalyzerHelpFlags.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalyzerHelpFlags.cpp index 7cd15f0f65954a..eb6014a0629df6 100644 --- a/clang/lib/StaticAnalyzer/Frontend/AnalyzerHelpFlags.cpp +++ b/clang/lib/StaticAnalyzer/Frontend/AnalyzerHelpFlags.cpp @@ -101,7 +101,10 @@ USAGE: -analyzer-config #undef ANALYZER_OPTION_DEPENDS_ON_USER_MODE }; - llvm::sort(PrintableOptions, llvm::less_first()); + llvm::sort(PrintableOptions, [](const OptionAndDescriptionTy &LHS, + const OptionAndDescriptionTy &RHS) { + return LHS.first < RHS.first; + }); for (const auto &Pair : PrintableOptions) { AnalyzerOptions::printFormattedEntry(out, Pair, /*InitialPad*/ 2, diff --git a/clang/unittests/Introspection/IntrospectionTest.cpp b/clang/unittests/Introspection/IntrospectionTest.cpp index 9f76568c8ec351..69e461609e7ddf 100644 --- a/clang/unittests/Introspection/IntrospectionTest.cpp +++ b/clang/unittests/Introspection/IntrospectionTest.cpp @@ -299,7 +299,9 @@ STRING_LOCATION_STDPAIR(MethodDecl, getTypeSpecStartLoc()) auto ExpectedRanges = FormatExpected(Result.RangeAccessors); - llvm::sort(ExpectedRanges, llvm::less_first()); + llvm::sort(ExpectedRanges, [](const auto &LHS, const auto &RHS) { + return LHS.first < RHS.first; + }); // clang-format off EXPECT_EQ( diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index df2826b50784da..a51776880cde26 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -339,7 +339,11 @@ class SymbolLookupSet { /// Sort the lookup set by pointer value. This sort is fast but sensitive to /// allocation order and so should not be used where a consistent order is /// required. - void sortByAddress() { llvm::sort(Symbols, llvm::less_first()); } + void sortByAddress() { + llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) { + return LHS.first < RHS.first; + }); + } /// Sort the lookup set lexicographically. This sort is slow but the order /// is unaffected by allocation order. diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index 31141cfa3efb49..1aa2d44308c476 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1018,7 +1018,11 @@ AttributeList::get(LLVMContext &C, if (Attrs.empty()) return {}; - assert(llvm::is_sorted(Attrs, llvm::less_first()) && + assert(llvm::is_sorted(Attrs, + [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }) && "Misordered Attributes list!"); assert(llvm::all_of(Attrs, [](const std::pair &Pair) { @@ -1051,7 +1055,11 @@ AttributeList::get(LLVMContext &C, if (Attrs.empty()) return {}; - assert(llvm::is_sorted(Attrs, llvm::less_first()) && + assert(llvm::is_sorted(Attrs, + [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }) && "Misordered Attributes list!"); assert(llvm::none_of(Attrs, [](const std::pair &Pair) { diff --git a/llvm/lib/ObjCopy/MachO/MachOWriter.cpp b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp index 9b1fd58cc482a3..a94764a9a03494 100644 --- a/llvm/lib/ObjCopy/MachO/MachOWriter.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp @@ -634,7 +634,9 @@ void MachOWriter::writeTail() { } } - llvm::sort(Queue, llvm::less_first()); + llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { + return LHS.first < RHS.first; + }); for (auto WriteOp : Queue) (this->*WriteOp.second)(); diff --git a/llvm/tools/dsymutil/DebugMap.cpp b/llvm/tools/dsymutil/DebugMap.cpp index 79b6cb2c80a213..605c1317b9c87a 100644 --- a/llvm/tools/dsymutil/DebugMap.cpp +++ b/llvm/tools/dsymutil/DebugMap.cpp @@ -62,7 +62,9 @@ void DebugMapObject::print(raw_ostream &OS) const { Entries.reserve(Symbols.getNumItems()); for (const auto &Sym : Symbols) Entries.push_back(std::make_pair(Sym.getKey(), Sym.getValue())); - llvm::sort(Entries, llvm::less_first()); + llvm::sort(Entries, [](const Entry &LHS, const Entry &RHS) { + return LHS.first < RHS.first; + }); for (const auto &Sym : Entries) { if (Sym.second.ObjectAddress) OS << format("\t%016" PRIx64, uint64_t(*Sym.second.ObjectAddress)); diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp index e4570dd32aca18..3a2397464be76e 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp @@ -158,8 +158,10 @@ AttributeList convertAttributeRefVecToAttributeList( V.first, convertAttributeRefToAttributeSet(C, V.second)); }); - // All values are unique. - sort(SetVec, llvm::less_first()); + sort(SetVec, [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; // All values are unique. + }); return AttributeList::get(C, SetVec); } From 4290ef54e18a61103f21f59c1db66fb67999c375 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Wed, 25 May 2022 19:18:18 +0100 Subject: [PATCH 696/908] [Support] Reduce allocations in parallelForEach with move Differential Revision: https://reviews.llvm.org/D126458 --- llvm/lib/Support/Parallel.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 4977c188f934f8..85426847c28c96 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -89,7 +89,7 @@ class ThreadPoolExecutor : public Executor { void add(std::function F) override { { std::lock_guard Lock(Mutex); - WorkStack.push(F); + WorkStack.push(std::move(F)); } Cond.notify_one(); } @@ -102,7 +102,7 @@ class ThreadPoolExecutor : public Executor { Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); if (Stop) break; - auto Task = WorkStack.top(); + auto Task = std::move(WorkStack.top()); WorkStack.pop(); Lock.unlock(); Task(); @@ -161,7 +161,7 @@ TaskGroup::~TaskGroup() { void TaskGroup::spawn(std::function F) { if (Parallel) { L.inc(); - Executor::getDefaultExecutor()->add([&, F] { + Executor::getDefaultExecutor()->add([&, F = std::move(F)] { F(); L.dec(); }); From 786c687810a5e3db4c64312018de25c65527c40c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 27 May 2022 10:37:02 +0100 Subject: [PATCH 697/908] [AArch64] Add support for FMA intrinsics to shouldSinkOperands. If the fma operates on a legal vector type, the indexed variants can be used, if the second operand is a splat of a valid index. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D126234 --- .../Target/AArch64/AArch64ISelLowering.cpp | 6 +- .../AArch64/sink-free-instructions.ll | 127 +++++++++++------- 2 files changed, 81 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e31a58da083137..d31008496ea4f2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12545,6 +12545,11 @@ bool AArch64TargetLowering::shouldSinkOperands( } LLVM_FALLTHROUGH; + case Intrinsic::fma: + if (cast(I->getType())->getElementType()->isHalfTy() && + !Subtarget->hasFullFP16()) + return false; + LLVM_FALLTHROUGH; case Intrinsic::aarch64_neon_sqdmull: case Intrinsic::aarch64_neon_sqdmulh: case Intrinsic::aarch64_neon_sqrdmulh: @@ -12568,7 +12573,6 @@ bool AArch64TargetLowering::shouldSinkOperands( Ops.push_back(&II->getArgOperandUse(0)); Ops.push_back(&II->getArgOperandUse(1)); return true; - default: return false; } diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll index 5d7a26f6578455..fc60b119225c4a 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -codegenprepare -S | FileCheck %s +; RUN: opt < %s -codegenprepare -S | FileCheck --check-prefixes=CHECK,NOFP16 %s +; RUN: opt < %s -codegenprepare -S -mattr=+fullfp16 | FileCheck --check-prefixes=CHECK,FULLFP16 %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown" @@ -498,29 +499,53 @@ if.else: declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) define <8 x half> @sink_shufflevector_fma_v8f16(i1 %c, <8 x half> %a, <8 x half> %b) { -; CHECK-LABEL: @sink_shufflevector_fma_v8f16( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> -; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> -; CHECK-NEXT: [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> -; CHECK-NEXT: [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> -; CHECK-NEXT: [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> -; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]]) -; CHECK-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]]) -; CHECK-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]]) -; CHECK-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]]) -; CHECK-NEXT: ret <8 x half> [[R_3]] -; CHECK: if.else: -; CHECK-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]]) -; CHECK-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]]) -; CHECK-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]]) -; CHECK-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]]) -; CHECK-NEXT: ret <8 x half> [[R_7]] +; NOFP16-LABEL: @sink_shufflevector_fma_v8f16( +; NOFP16-NEXT: entry: +; NOFP16-NEXT: [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; NOFP16-NEXT: [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; NOFP16-NEXT: [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; NOFP16-NEXT: [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; NOFP16-NEXT: [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; NOFP16-NEXT: [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; NOFP16-NEXT: [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; NOFP16-NEXT: [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; NOFP16-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NOFP16: if.then: +; NOFP16-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]]) +; NOFP16-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]]) +; NOFP16-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]]) +; NOFP16-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]]) +; NOFP16-NEXT: ret <8 x half> [[R_3]] +; NOFP16: if.else: +; NOFP16-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]]) +; NOFP16-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]]) +; NOFP16-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]]) +; NOFP16-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]]) +; NOFP16-NEXT: ret <8 x half> [[R_7]] +; +; FULLFP16-LABEL: @sink_shufflevector_fma_v8f16( +; FULLFP16-NEXT: entry: +; FULLFP16-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; FULLFP16: if.then: +; FULLFP16-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; FULLFP16-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[B]]) +; FULLFP16-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; FULLFP16-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[TMP1]], <8 x half> [[B]]) +; FULLFP16-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; FULLFP16-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[TMP2]], <8 x half> [[B]]) +; FULLFP16-NEXT: [[TMP3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; FULLFP16-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[TMP3]], <8 x half> [[B]]) +; FULLFP16-NEXT: ret <8 x half> [[R_3]] +; FULLFP16: if.else: +; FULLFP16-NEXT: [[TMP4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; FULLFP16-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[TMP4]], <8 x half> [[B]]) +; FULLFP16-NEXT: [[TMP5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; FULLFP16-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[TMP5]], <8 x half> [[B]]) +; FULLFP16-NEXT: [[TMP6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; FULLFP16-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[TMP6]], <8 x half> [[B]]) +; FULLFP16-NEXT: [[TMP7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> +; FULLFP16-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[TMP7]], <8 x half> [[B]]) +; FULLFP16-NEXT: ret <8 x half> [[R_7]] ; entry: %s0 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer @@ -553,18 +578,18 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) define <4 x float> @sink_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @sink_shufflevector_fma_v4f32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S0]], <4 x float> [[B]]) -; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_0]], <4 x float> [[S1]], <4 x float> [[B]]) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]]) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_0]], <4 x float> [[TMP1]], <4 x float> [[B]]) ; CHECK-NEXT: ret <4 x float> [[R_1]] ; CHECK: if.else: -; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[S2]], <4 x float> [[B]]) -; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_2]], <4 x float> [[S3]], <4 x float> [[B]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[TMP2]], <4 x float> [[B]]) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_2]], <4 x float> [[TMP3]], <4 x float> [[B]]) ; CHECK-NEXT: ret <4 x float> [[R_3]] ; entry: @@ -588,18 +613,18 @@ if.else: define <4 x float> @sink_shufflevector_first_arg_fma_v4f3(i1 %c, <8 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @sink_shufflevector_first_arg_fma_v4f3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S0]], <4 x float> [[B:%.*]], <4 x float> [[B]]) -; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S1]], <4 x float> [[R_0]], <4 x float> [[B]]) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP0]], <4 x float> [[B:%.*]], <4 x float> [[B]]) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1]], <4 x float> [[R_0]], <4 x float> [[B]]) ; CHECK-NEXT: ret <4 x float> [[R_1]] ; CHECK: if.else: -; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S2]], <4 x float> [[B]], <4 x float> [[B]]) -; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S3]], <4 x float> [[R_2]], <4 x float> [[B]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP2]], <4 x float> [[B]], <4 x float> [[B]]) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP3]], <4 x float> [[R_2]], <4 x float> [[B]]) ; CHECK-NEXT: ret <4 x float> [[R_3]] ; entry: @@ -627,14 +652,14 @@ declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x double> @sink_shufflevector_fma_v2f64(i1 %c, <2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @sink_shufflevector_fma_v2f64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[R_0:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> [[S0]], <2 x double> [[B]]) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[R_0:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> [[TMP0]], <2 x double> [[B]]) ; CHECK-NEXT: ret <2 x double> [[R_0]] ; CHECK: if.else: -; CHECK-NEXT: [[R_1:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[S1]], <2 x double> [[B]]) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[R_1:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[TMP1]], <2 x double> [[B]]) ; CHECK-NEXT: ret <2 x double> [[R_1]] ; entry: @@ -654,10 +679,10 @@ if.else: define <4 x float> @do_not_sink_out_of_range_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @do_not_sink_out_of_range_shufflevector_fma_v4f32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[R:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S4]], <4 x float> [[B]]) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]]) ; CHECK-NEXT: ret <4 x float> [[R]] ; CHECK: if.else: ; CHECK-NEXT: ret <4 x float> zeroinitializer @@ -679,20 +704,20 @@ declare <5 x float> @llvm.fma.v5f32(<5 x float>, <5 x float>, <5 x float>) define <5 x float> @sink_shufflevector_fma_v5f32(i1 %c, <8 x float> %a, <5 x float> %b) { ; CHECK-LABEL: @sink_shufflevector_fma_v5f32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> zeroinitializer -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> ; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> ; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> -; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[R_0:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B:%.*]], <5 x float> [[S0]], <5 x float> [[B]]) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> zeroinitializer +; CHECK-NEXT: [[R_0:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B:%.*]], <5 x float> [[TMP0]], <5 x float> [[B]]) ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_0]], <5 x float> [[S1]], <5 x float> [[B]]) ; CHECK-NEXT: ret <5 x float> [[R_1]] ; CHECK: if.else: ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B]], <5 x float> [[S2]], <5 x float> [[B]]) ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_2]], <5 x float> [[S3]], <5 x float> [[B]]) -; CHECK-NEXT: [[R_4:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_3]], <5 x float> [[S4]], <5 x float> [[B]]) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> +; CHECK-NEXT: [[R_4:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_3]], <5 x float> [[TMP1]], <5 x float> [[B]]) ; CHECK-NEXT: ret <5 x float> [[R_4]] ; entry: From dc9fb65c4ffedc983c29bb0cddebf6fc4e9146e6 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Mon, 9 May 2022 15:05:03 -0700 Subject: [PATCH 698/908] [mlir][Tablegen-LSP] Add support for a compilation database This provides a format for externally specifying the include directories for a source file. The format of the tablegen database is exactly the same as that for PDLL, namely it includes the absolute source file name and the set of include directories. The database format is shared to simplify the infra, and also because the format itself is general enough to share. Even if we desire to expand in the future to contain the actual compilation command, nothing there is specific enough that we would need two different formats. As with PDLL, support for generating the database is added to our mlir_tablegen cmake command. Differential Revision: https://reviews.llvm.org/D125441 --- mlir/cmake/modules/AddMLIR.cmake | 26 ++++++++++ .../Tools/lsp-server-support/CMakeLists.txt | 1 + .../CompilationDatabase.cpp | 47 +++++++++++++---- .../CompilationDatabase.h | 30 +++++++---- .../Tools/mlir-pdll-lsp-server/CMakeLists.txt | 1 - .../Tools/mlir-pdll-lsp-server/PDLLServer.cpp | 7 +-- .../TableGenLspServerMain.cpp | 10 +++- .../tblgen-lsp-server/TableGenServer.cpp | 37 +++++++++++--- .../Tools/tblgen-lsp-server/TableGenServer.h | 15 +++++- .../compilation_database.test | 21 ++++++++ .../tblgen-lsp-server/include/included.td | 3 ++ mlir/test/tblgen-lsp-server/lit.local.cfg | 1 + mlir/utils/vscode/package.json | 5 ++ mlir/utils/vscode/src/config.ts | 6 ++- mlir/utils/vscode/src/mlirContext.ts | 51 +++++++++++++++---- 15 files changed, 216 insertions(+), 45 deletions(-) rename mlir/lib/Tools/{mlir-pdll-lsp-server => lsp-server-support}/CompilationDatabase.cpp (61%) rename mlir/lib/Tools/{mlir-pdll-lsp-server => lsp-server-support}/CompilationDatabase.h (61%) create mode 100644 mlir/test/tblgen-lsp-server/compilation_database.test create mode 100644 mlir/test/tblgen-lsp-server/include/included.td create mode 100644 mlir/test/tblgen-lsp-server/lit.local.cfg diff --git a/mlir/cmake/modules/AddMLIR.cmake b/mlir/cmake/modules/AddMLIR.cmake index a372832e39d3d3..c6eebfaac40ce6 100644 --- a/mlir/cmake/modules/AddMLIR.cmake +++ b/mlir/cmake/modules/AddMLIR.cmake @@ -1,10 +1,36 @@ include(GNUInstallDirs) include(LLVMDistributionSupport) +# Clear out any pre-existing compile_commands file before processing. This +# allows for generating a clean compile_commands on each configure. +file(REMOVE ${CMAKE_BINARY_DIR}/tablegen_compile_commands.yml) + function(mlir_tablegen ofn) tablegen(MLIR ${ARGV}) set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn} PARENT_SCOPE) + + # Get the current set of include paths for this td file. + cmake_parse_arguments(ARG "" "" "DEPENDS;EXTRA_INCLUDES" ${ARGN}) + get_directory_property(tblgen_includes INCLUDE_DIRECTORIES) + list(APPEND tblgen_includes ${ARG_EXTRA_INCLUDES}) + # Filter out any empty include items. + list(REMOVE_ITEM tblgen_includes "") + + # Build the absolute path for the current input file. + if (IS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS}) + set(LLVM_TARGET_DEFINITIONS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS}) + else() + set(LLVM_TARGET_DEFINITIONS_ABSOLUTE ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS}) + endif() + + # Append the includes used for this file to the tablegen_compile_commands + # file. + file(APPEND ${CMAKE_BINARY_DIR}/tablegen_compile_commands.yml + "--- !FileInfo:\n" + " filepath: \"${LLVM_TARGET_DEFINITIONS_ABSOLUTE}\"\n" + " includes: \"${CMAKE_CURRENT_SOURCE_DIR};${tblgen_includes}\"\n" + ) endfunction() # Clear out any pre-existing compile_commands file before processing. This diff --git a/mlir/lib/Tools/lsp-server-support/CMakeLists.txt b/mlir/lib/Tools/lsp-server-support/CMakeLists.txt index b4c09a4e4f092e..d71ecfaa6b5214 100644 --- a/mlir/lib/Tools/lsp-server-support/CMakeLists.txt +++ b/mlir/lib/Tools/lsp-server-support/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_library(MLIRLspServerSupportLib + CompilationDatabase.cpp Logging.cpp Protocol.cpp SourceMgrUtils.cpp diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/CompilationDatabase.cpp b/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp similarity index 61% rename from mlir/lib/Tools/mlir-pdll-lsp-server/CompilationDatabase.cpp rename to mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp index 678d7dcf458e2c..31a6a308252929 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/CompilationDatabase.cpp +++ b/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp @@ -1,4 +1,4 @@ -//===- CompilationDatabase.cpp - PDLL Compilation Database ----------------===// +//===- CompilationDatabase.cpp - LSP Compilation Database -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,23 +10,37 @@ #include "../lsp-server-support/Logging.h" #include "../lsp-server-support/Protocol.h" #include "mlir/Support/FileUtilities.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/YAMLTraits.h" using namespace mlir; using namespace mlir::lsp; +//===----------------------------------------------------------------------===// +// YamlFileInfo +//===----------------------------------------------------------------------===// + +namespace { +struct YamlFileInfo { + /// The absolute path to the file. + std::string filename; + /// The include directories available for the file. + std::vector includeDirs; +}; +} // namespace + //===----------------------------------------------------------------------===// // CompilationDatabase //===----------------------------------------------------------------------===// -LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(CompilationDatabase::FileInfo) +LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(YamlFileInfo) namespace llvm { namespace yaml { template <> -struct MappingTraits { - static void mapping(IO &io, CompilationDatabase::FileInfo &info) { +struct MappingTraits { + static void mapping(IO &io, YamlFileInfo &info) { // Parse the filename and normalize it to the form we will expect from // incoming URIs. io.mapRequired("filepath", info.filename); @@ -54,10 +68,10 @@ CompilationDatabase::CompilationDatabase(ArrayRef databases) { loadDatabase(filename); } -const CompilationDatabase::FileInfo * +const CompilationDatabase::FileInfo & CompilationDatabase::getFileInfo(StringRef filename) const { auto it = files.find(filename); - return it == files.end() ? nullptr : &it->second; + return it == files.end() ? defaultFileInfo : it->second; } void CompilationDatabase::loadDatabase(StringRef filename) { @@ -75,15 +89,30 @@ void CompilationDatabase::loadDatabase(StringRef filename) { llvm::yaml::Input yaml(inputFile->getBuffer()); // Parse the yaml description and add any new files to the database. - std::vector parsedFiles; + std::vector parsedFiles; yaml >> parsedFiles; + + SetVector knownIncludes; for (auto &file : parsedFiles) { - auto it = files.try_emplace(file.filename, std::move(file)); + auto it = files.try_emplace(file.filename, std::move(file.includeDirs)); // If we encounter a duplicate file, log a warning and ignore it. if (!it.second) { - Logger::info("Duplicate .pdll file in compilation database: {0}", + Logger::info("Duplicate file in compilation database: {0}", file.filename); + continue; } + + // Track the includes for the file. + for (StringRef include : it.first->second.includeDirs) + knownIncludes.insert(include); } + + // Add all of the known includes to the default file info. We don't know any + // information about how to treat these files, but these may be project files + // that we just don't yet have information for. In these cases, providing some + // heuristic information provides a better user experience, and generally + // shouldn't lead to any negative side effects. + for (StringRef include : knownIncludes) + defaultFileInfo.includeDirs.push_back(include.str()); } diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/CompilationDatabase.h b/mlir/lib/Tools/lsp-server-support/CompilationDatabase.h similarity index 61% rename from mlir/lib/Tools/mlir-pdll-lsp-server/CompilationDatabase.h rename to mlir/lib/Tools/lsp-server-support/CompilationDatabase.h index 13a5f13c977f65..56a3d5126739b6 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/CompilationDatabase.h +++ b/mlir/lib/Tools/lsp-server-support/CompilationDatabase.h @@ -1,13 +1,20 @@ -//===- CompilationDatabase.h - PDLL Compilation Database --------*- C++ -*-===// +//===- CompilationDatabase.h - LSP Compilation Database ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +// This file contains a definition of a generic compilation database that can be +// used to provide information about the compilation of a given source file. It +// contains generic components, leaving more complex interpretation to the +// specific language servers that consume it. +// +//===----------------------------------------------------------------------===// -#ifndef LIB_MLIR_TOOLS_MLIRPDLLSPSERVER_COMPILATIONDATABASE_H_ -#define LIB_MLIR_TOOLS_MLIRPDLLSPSERVER_COMPILATIONDATABASE_H_ +#ifndef LIB_MLIR_TOOLS_LSPSERVERSUPPORT_COMPILATIONDATABASE_H_ +#define LIB_MLIR_TOOLS_LSPSERVERSUPPORT_COMPILATIONDATABASE_H_ #include "mlir/Support/LLVM.h" #include "llvm/ADT/StringMap.h" @@ -30,8 +37,10 @@ class CompilationDatabase { public: /// Compilation information for a specific file within the database. struct FileInfo { - /// The absolute path to the file. - std::string filename; + FileInfo() = default; + FileInfo(std::vector &&includeDirs) + : includeDirs(std::move(includeDirs)) {} + /// The include directories available for the file. std::vector includeDirs; }; @@ -40,9 +49,8 @@ class CompilationDatabase { /// descriptions of the database. CompilationDatabase(ArrayRef databases); - /// Get the compilation information for the provided file, or nullptr if the - /// database doesn't include information for `filename`. - const FileInfo *getFileInfo(StringRef filename) const; + /// Get the compilation information for the provided file. + const FileInfo &getFileInfo(StringRef filename) const; private: /// Load the given database file into this database. @@ -51,8 +59,12 @@ class CompilationDatabase { /// A map of filename to file information for each known file within the /// databases. llvm::StringMap files; + + /// A default file info that contains basic information for use by files that + /// weren't explicitly in the database. + FileInfo defaultFileInfo; }; } // namespace lsp } // namespace mlir -#endif // LIB_MLIR_TOOLS_MLIRPDLLSPSERVER_COMPILATIONDATABASE_H_ +#endif // LIB_MLIR_TOOLS_LSPSERVERSUPPORT_COMPILATIONDATABASE_H_ diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt b/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt index 125b62325fedb8..f9ff5470eca0ea 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt @@ -1,5 +1,4 @@ llvm_add_library(MLIRPdllLspServerLib - CompilationDatabase.cpp LSPServer.cpp PDLLServer.cpp MlirPdllLspServerMain.cpp diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp index 7ff7f8d8264d92..dca2af0580b085 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp @@ -8,9 +8,9 @@ #include "PDLLServer.h" +#include "../lsp-server-support/CompilationDatabase.h" #include "../lsp-server-support/Logging.h" #include "../lsp-server-support/Protocol.h" -#include "CompilationDatabase.h" #include "mlir/Tools/PDLL/AST/Context.h" #include "mlir/Tools/PDLL/AST/Nodes.h" #include "mlir/Tools/PDLL/AST/Types.h" @@ -1422,9 +1422,10 @@ lsp::PDLLServer::~PDLLServer() = default; void lsp::PDLLServer::addOrUpdateDocument( const URIForFile &uri, StringRef contents, int64_t version, std::vector &diagnostics) { + // Build the set of additional include directories. std::vector additionalIncludeDirs = impl->options.extraDirs; - if (auto *fileInfo = impl->compilationDatabase.getFileInfo(uri.file())) - llvm::append_range(additionalIncludeDirs, fileInfo->includeDirs); + const auto &fileInfo = impl->compilationDatabase.getFileInfo(uri.file()); + llvm::append_range(additionalIncludeDirs, fileInfo.includeDirs); impl->files[uri.file()] = std::make_unique( uri, contents, version, additionalIncludeDirs, diagnostics); diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenLspServerMain.cpp b/mlir/lib/Tools/tblgen-lsp-server/TableGenLspServerMain.cpp index 30f39eb83475b3..9f47a791f3ff48 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenLspServerMain.cpp +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenLspServerMain.cpp @@ -51,6 +51,13 @@ LogicalResult mlir::TableGenLspServerMain(int argc, char **argv) { llvm::cl::desc("Pretty-print JSON output"), llvm::cl::init(false), }; + llvm::cl::list extraIncludeDirs( + "tablegen-extra-dir", llvm::cl::desc("Extra directory of include files"), + llvm::cl::value_desc("directory"), llvm::cl::Prefix); + llvm::cl::list compilationDatabases( + "tablegen-compilation-database", + llvm::cl::desc("Compilation YAML databases containing additional " + "compilation information for .td files")); llvm::cl::ParseCommandLineOptions(argc, argv, "TableGen LSP Language Server"); @@ -68,6 +75,7 @@ LogicalResult mlir::TableGenLspServerMain(int argc, char **argv) { JSONTransport transport(stdin, llvm::outs(), inputStyle, prettyPrint); // Configure the servers and start the main language server. - TableGenServer server; + TableGenServer::Options options(compilationDatabases, extraIncludeDirs); + TableGenServer server(options); return runTableGenLSPServer(server, transport); } diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp index de0508feca6865..973097e0c8474b 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp @@ -8,6 +8,7 @@ #include "TableGenServer.h" +#include "../lsp-server-support/CompilationDatabase.h" #include "../lsp-server-support/Logging.h" #include "../lsp-server-support/Protocol.h" #include "../lsp-server-support/SourceMgrUtils.h" @@ -95,7 +96,9 @@ namespace { class TableGenTextFile { public: TableGenTextFile(const lsp::URIForFile &uri, StringRef fileContents, - int64_t version, std::vector &diagnostics); + int64_t version, + const std::vector &extraIncludeDirs, + std::vector &diagnostics); /// Return the current version of this text file. int64_t getVersion() const { return version; } @@ -118,9 +121,10 @@ class TableGenTextFile { }; } // namespace -TableGenTextFile::TableGenTextFile(const lsp::URIForFile &uri, - StringRef fileContents, int64_t version, - std::vector &diagnostics) +TableGenTextFile::TableGenTextFile( + const lsp::URIForFile &uri, StringRef fileContents, int64_t version, + const std::vector &extraIncludeDirs, + std::vector &diagnostics) : contents(fileContents.str()), version(version) { auto memBuffer = llvm::MemoryBuffer::getMemBufferCopy(contents, uri.file()); if (!memBuffer) { @@ -129,10 +133,11 @@ TableGenTextFile::TableGenTextFile(const lsp::URIForFile &uri, } // Build the set of include directories for this file. - // TODO: Setup external include directories. llvm::SmallString<32> uriDirectory(uri.file()); llvm::sys::path::remove_filename(uriDirectory); includeDirs.push_back(uriDirectory.str().str()); + includeDirs.insert(includeDirs.end(), extraIncludeDirs.begin(), + extraIncludeDirs.end()); sourceMgr.setIncludeDirs(includeDirs); sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc()); @@ -161,6 +166,16 @@ TableGenTextFile::TableGenTextFile(const lsp::URIForFile &uri, //===----------------------------------------------------------------------===// struct lsp::TableGenServer::Impl { + explicit Impl(const Options &options) + : options(options), compilationDatabase(options.compilationDatabases) {} + + /// TableGen LSP options. + const Options &options; + + /// The compilation database containing additional information for files + /// passed to the server. + lsp::CompilationDatabase compilationDatabase; + /// The files held by the server, mapped by their URI file name. llvm::StringMap> files; }; @@ -169,14 +184,20 @@ struct lsp::TableGenServer::Impl { // TableGenServer //===----------------------------------------------------------------------===// -lsp::TableGenServer::TableGenServer() : impl(std::make_unique()) {} +lsp::TableGenServer::TableGenServer(const Options &options) + : impl(std::make_unique(options)) {} lsp::TableGenServer::~TableGenServer() = default; void lsp::TableGenServer::addOrUpdateDocument( const URIForFile &uri, StringRef contents, int64_t version, std::vector &diagnostics) { - impl->files[uri.file()] = - std::make_unique(uri, contents, version, diagnostics); + // Build the set of additional include directories. + std::vector additionalIncludeDirs = impl->options.extraDirs; + const auto &fileInfo = impl->compilationDatabase.getFileInfo(uri.file()); + llvm::append_range(additionalIncludeDirs, fileInfo.includeDirs); + + impl->files[uri.file()] = std::make_unique( + uri, contents, version, additionalIncludeDirs, diagnostics); } Optional lsp::TableGenServer::removeDocument(const URIForFile &uri) { diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h index 925e450784f08b..d7a88fd47dbde1 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h @@ -24,7 +24,20 @@ class URIForFile; /// logic separate from the logic that involves LSP server/client communication. class TableGenServer { public: - TableGenServer(); + struct Options { + Options(const std::vector &compilationDatabases, + const std::vector &extraDirs) + : compilationDatabases(compilationDatabases), extraDirs(extraDirs) {} + + /// The filenames for databases containing compilation commands for TableGen + /// files passed to the server. + const std::vector &compilationDatabases; + + /// Additional list of include directories to search. + const std::vector &extraDirs; + }; + + TableGenServer(const Options &options); ~TableGenServer(); /// Add or update the document, with the provided `version`, at the given URI. diff --git a/mlir/test/tblgen-lsp-server/compilation_database.test b/mlir/test/tblgen-lsp-server/compilation_database.test new file mode 100644 index 00000000000000..d1a9412af61c7d --- /dev/null +++ b/mlir/test/tblgen-lsp-server/compilation_database.test @@ -0,0 +1,21 @@ +// RUN: echo -e '--- !FileInfo:\n filepath: "/foo.td"\n includes: "%/S;%/S/../../include"' > %t.yml +// RUN: tblgen-lsp-server -tablegen-compilation-database=%t.yml -lit-test < %s | FileCheck %s +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"tablegen","capabilities":{},"trace":"off"}} +// ----- +{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{ + "uri":"test:///foo.td", + "languageId":"tablegen", + "version":1, + "text":"include \"include/included.td\"" +}}} +// Check that we can properly process the includes without errors. +// CHECK: "method": "textDocument/publishDiagnostics", +// CHECK-NEXT: "params": { +// CHECK-NEXT: "diagnostics": [], +// CHECK-NEXT: "uri": "test:///foo.td", +// CHECK-NEXT: "version": 1 +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":7,"method":"shutdown"} +// ----- +{"jsonrpc":"2.0","method":"exit"} diff --git a/mlir/test/tblgen-lsp-server/include/included.td b/mlir/test/tblgen-lsp-server/include/included.td new file mode 100644 index 00000000000000..0507300c7bc3b6 --- /dev/null +++ b/mlir/test/tblgen-lsp-server/include/included.td @@ -0,0 +1,3 @@ + +// This file is merely to test the processing of includes, it has +// no other purpose or contents. diff --git a/mlir/test/tblgen-lsp-server/lit.local.cfg b/mlir/test/tblgen-lsp-server/lit.local.cfg new file mode 100644 index 00000000000000..25d08c7aba3060 --- /dev/null +++ b/mlir/test/tblgen-lsp-server/lit.local.cfg @@ -0,0 +1 @@ +config.excludes = ['include'] diff --git a/mlir/utils/vscode/package.json b/mlir/utils/vscode/package.json index 55033c3304bac3..cca2ba1ace033d 100644 --- a/mlir/utils/vscode/package.json +++ b/mlir/utils/vscode/package.json @@ -156,6 +156,11 @@ "type": "string", "description": "The file path of the tblgen-lsp-server executable." }, + "mlir.tablegen_compilation_databases": { + "scope": "resource", + "type": "array", + "description": "A list of `tablegen_compile_commands.yml` database files containing information about .td files processed by the server." + }, "mlir.onSettingsChanged": { "type": "string", "default": "prompt", diff --git a/mlir/utils/vscode/src/config.ts b/mlir/utils/vscode/src/config.ts index d7dd5bfb5454ec..04ff8f53c9e586 100644 --- a/mlir/utils/vscode/src/config.ts +++ b/mlir/utils/vscode/src/config.ts @@ -4,8 +4,10 @@ import * as vscode from 'vscode'; * Gets the config value `mlir.`, with an optional workspace folder. */ export function get(key: string, - workspaceFolder: vscode.WorkspaceFolder = null): T { - return vscode.workspace.getConfiguration('mlir', workspaceFolder).get(key); + workspaceFolder: vscode.WorkspaceFolder = null, + defaultValue: T = undefined): T { + return vscode.workspace.getConfiguration('mlir', workspaceFolder) + .get(key, defaultValue); } /** diff --git a/mlir/utils/vscode/src/mlirContext.ts b/mlir/utils/vscode/src/mlirContext.ts index c8d4f78818a054..c0c2b53edbaf30 100644 --- a/mlir/utils/vscode/src/mlirContext.ts +++ b/mlir/utils/vscode/src/mlirContext.ts @@ -90,23 +90,22 @@ export class MLIRContext implements vscode.Disposable { } /** - * Prepare the server options for a PDLL server, e.g. populating any - * accessible compilation databases. + * Prepare a compilation database option for a server. */ - async preparePDLLServerOptions(workspaceFolder: vscode.WorkspaceFolder, - configsToWatch: string[], - pathsToWatch: string[], - additionalServerArgs: string[]) { + async prepareCompilationDatabaseServerOptions( + languageName: string, workspaceFolder: vscode.WorkspaceFolder, + configsToWatch: string[], pathsToWatch: string[], + additionalServerArgs: string[]) { // Process the compilation databases attached for the workspace folder. - let databases = - config.get('pdll_compilation_databases', workspaceFolder); + let databases = config.get( + `${languageName}_compilation_databases`, workspaceFolder, []); // If no databases were explicitly specified, default to a database in the // 'build' directory within the current workspace. if (databases.length === 0) { if (workspaceFolder) { databases.push(workspaceFolder.uri.fsPath + - '/build/pdll_compile_commands.yml'); + `/build/${languageName}_compile_commands.yml`); } // Otherwise, try to resolve each of the paths. @@ -116,14 +115,40 @@ export class MLIRContext implements vscode.Disposable { } } - configsToWatch.push('pdll_compilation_databases'); + configsToWatch.push(`${languageName}_compilation_databases`); pathsToWatch.push(...databases); // Setup the compilation databases as additional arguments to pass to the // server. databases.filter(database => database !== ''); additionalServerArgs.push(...databases.map( - (database) => `--pdll-compilation-database=${database}`)); + (database) => `--${languageName}-compilation-database=${database}`)); + } + + /** + * Prepare the server options for a PDLL server, e.g. populating any + * accessible compilation databases. + */ + async preparePDLLServerOptions(workspaceFolder: vscode.WorkspaceFolder, + configsToWatch: string[], + pathsToWatch: string[], + additionalServerArgs: string[]) { + await this.prepareCompilationDatabaseServerOptions( + 'pdll', workspaceFolder, configsToWatch, pathsToWatch, + additionalServerArgs); + } + + /** + * Prepare the server options for a TableGen server, e.g. populating any + * accessible compilation databases. + */ + async prepareTableGenServerOptions(workspaceFolder: vscode.WorkspaceFolder, + configsToWatch: string[], + pathsToWatch: string[], + additionalServerArgs: string[]) { + await this.prepareCompilationDatabaseServerOptions( + 'tablegen', workspaceFolder, configsToWatch, pathsToWatch, + additionalServerArgs); } /** @@ -143,6 +168,10 @@ export class MLIRContext implements vscode.Disposable { await this.preparePDLLServerOptions(workspaceFolder, configsToWatch, filepathsToWatch, additionalServerArgs); + } else if (languageName == 'tablegen') { + await this.prepareTableGenServerOptions(workspaceFolder, configsToWatch, + filepathsToWatch, + additionalServerArgs); } // Try to activate the language client. From 682ca00e218d60fef49936a38f174e54e85886cb Mon Sep 17 00:00:00 2001 From: River Riddle Date: Mon, 9 May 2022 15:22:12 -0700 Subject: [PATCH 699/908] [mlir][Tablegen-LSP] Add support for include file link and hover This allows for following links to include files. This support is effectively identical to the logic in the PDLL language server, and code is shared as much as possible. Differential Revision: https://reviews.llvm.org/D125442 --- .../lsp-server-support/SourceMgrUtils.cpp | 48 +++++++++++++ .../Tools/lsp-server-support/SourceMgrUtils.h | 27 ++++++++ .../Tools/mlir-pdll-lsp-server/PDLLServer.cpp | 68 ++----------------- .../lib/Tools/tblgen-lsp-server/LSPServer.cpp | 42 ++++++++++++ .../tblgen-lsp-server/TableGenServer.cpp | 62 ++++++++++++++++- .../Tools/tblgen-lsp-server/TableGenServer.h | 11 +++ .../tblgen-lsp-server/document-links.test | 34 ++++++++++ mlir/test/tblgen-lsp-server/hover.test | 37 ++++++++++ .../tblgen-lsp-server/initialize-params.test | 4 ++ 9 files changed, 270 insertions(+), 63 deletions(-) create mode 100644 mlir/test/tblgen-lsp-server/document-links.test create mode 100644 mlir/test/tblgen-lsp-server/hover.test diff --git a/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp b/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp index 918f097abd21cd..2e8d2abd0b3beb 100644 --- a/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp +++ b/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp @@ -7,10 +7,15 @@ //===----------------------------------------------------------------------===// #include "SourceMgrUtils.h" +#include "llvm/Support/Path.h" using namespace mlir; using namespace mlir::lsp; +//===----------------------------------------------------------------------===// +// Utils +//===----------------------------------------------------------------------===// + /// Find the end of a string whose contents start at the given `curPtr`. Returns /// the position at the end of the string, after a terminal or invalid character /// (e.g. `"` or `\0`). @@ -59,3 +64,46 @@ SMRange lsp::convertTokenLocToRange(SMLoc loc) { return SMRange(loc, SMLoc::getFromPointer(curPtr)); } + +//===----------------------------------------------------------------------===// +// SourceMgrInclude +//===----------------------------------------------------------------------===// + +Hover SourceMgrInclude::buildHover() const { + Hover hover(range); + { + llvm::raw_string_ostream hoverOS(hover.contents.value); + hoverOS << "`" << llvm::sys::path::filename(uri.file()) << "`\n***\n" + << uri.file(); + } + return hover; +} + +void lsp::gatherIncludeFiles(llvm::SourceMgr &sourceMgr, + SmallVectorImpl &includes) { + for (unsigned i = 1, e = sourceMgr.getNumBuffers(); i < e; ++i) { + // Check to see if this file was included by the main file. + SMLoc includeLoc = sourceMgr.getBufferInfo(i + 1).IncludeLoc; + if (!includeLoc.isValid() || sourceMgr.FindBufferContainingLoc( + includeLoc) != sourceMgr.getMainFileID()) + continue; + + // Try to build a URI for this file path. + auto *buffer = sourceMgr.getMemoryBuffer(i + 1); + llvm::SmallString<256> path(buffer->getBufferIdentifier()); + llvm::sys::path::remove_dots(path, /*remove_dot_dot=*/true); + + llvm::Expected includedFileURI = URIForFile::fromFile(path); + if (!includedFileURI) + continue; + + // Find the end of the include token. + const char *includeStart = includeLoc.getPointer() - 2; + while (*(--includeStart) != '\"') + continue; + + // Push this include. + SMRange includeRange(SMLoc::getFromPointer(includeStart), includeLoc); + includes.emplace_back(*includedFileURI, Range(sourceMgr, includeRange)); + } +} diff --git a/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.h b/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.h index ba3a7b0a18c817..bad0dc7a7e58fb 100644 --- a/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.h +++ b/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.h @@ -19,12 +19,39 @@ namespace mlir { namespace lsp { +//===----------------------------------------------------------------------===// +// Utils +//===----------------------------------------------------------------------===// /// Returns the range of a lexical token given a SMLoc corresponding to the /// start of an token location. The range is computed heuristically, and /// supports identifier-like tokens, strings, etc. SMRange convertTokenLocToRange(SMLoc loc); +//===----------------------------------------------------------------------===// +// SourceMgrInclude +//===----------------------------------------------------------------------===// + +/// This class represents a single include within a root file. +struct SourceMgrInclude { + SourceMgrInclude(const lsp::URIForFile &uri, const lsp::Range &range) + : uri(uri), range(range) {} + + /// Build a hover for the current include file. + Hover buildHover() const; + + /// The URI of the file that is included. + lsp::URIForFile uri; + + /// The range of the include directive. + lsp::Range range; +}; + +/// Given a source manager, gather all of the processed include files. These are +/// assumed to be all of the files other than the main root file. +void gatherIncludeFiles(llvm::SourceMgr &sourceMgr, + SmallVectorImpl &includes); + } // namespace lsp } // namespace mlir diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp index dca2af0580b085..cbfd421c722241 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp @@ -11,6 +11,7 @@ #include "../lsp-server-support/CompilationDatabase.h" #include "../lsp-server-support/Logging.h" #include "../lsp-server-support/Protocol.h" +#include "../lsp-server-support/SourceMgrUtils.h" #include "mlir/Tools/PDLL/AST/Context.h" #include "mlir/Tools/PDLL/AST/Nodes.h" #include "mlir/Tools/PDLL/AST/Types.h" @@ -106,24 +107,6 @@ getLspDiagnoticFromDiag(llvm::SourceMgr &sourceMgr, const ast::Diagnostic &diag, return lspDiag; } -//===----------------------------------------------------------------------===// -// PDLLInclude -//===----------------------------------------------------------------------===// - -namespace { -/// This class represents a single include within a root file. -struct PDLLInclude { - PDLLInclude(const lsp::URIForFile &uri, const lsp::Range &range) - : uri(uri), range(range) {} - - /// The URI of the file that is included. - lsp::URIForFile uri; - - /// The range of the include directive. - lsp::Range range; -}; -} // namespace - //===----------------------------------------------------------------------===// // PDLIndex //===----------------------------------------------------------------------===// @@ -288,7 +271,6 @@ struct PDLDocument { const lsp::Position &hoverPos); Optional findHover(const ast::Decl *decl, const SMRange &hoverRange); - lsp::Hover buildHoverForInclude(const PDLLInclude &include); lsp::Hover buildHoverForOpName(const ods::Operation *op, const SMRange &hoverRange); lsp::Hover buildHoverForVariable(const ast::VariableDecl *varDecl, @@ -343,7 +325,7 @@ struct PDLDocument { PDLIndex index; /// The set of includes of the parsed module. - std::vector parsedIncludes; + SmallVector parsedIncludes; }; } // namespace @@ -373,33 +355,7 @@ PDLDocument::PDLDocument(const lsp::URIForFile &uri, StringRef contents, astModule = parsePDLAST(astContext, sourceMgr); // Initialize the set of parsed includes. - for (unsigned i = 1, e = sourceMgr.getNumBuffers(); i < e; ++i) { - // Check to see if this file was included by the main file. - SMLoc includeLoc = sourceMgr.getBufferInfo(i + 1).IncludeLoc; - if (!includeLoc.isValid() || sourceMgr.FindBufferContainingLoc( - includeLoc) != sourceMgr.getMainFileID()) - continue; - - // Try to build a URI for this file path. - auto *buffer = sourceMgr.getMemoryBuffer(i + 1); - llvm::SmallString<256> path(buffer->getBufferIdentifier()); - llvm::sys::path::remove_dots(path, /*remove_dot_dot=*/true); - - llvm::Expected includedFileURI = - lsp::URIForFile::fromFile(path); - if (!includedFileURI) - continue; - - // Find the end of the include token. - const char *includeStart = includeLoc.getPointer() - 2; - while (*(--includeStart) != '\"') - continue; - - // Push this include. - SMRange includeRange(SMLoc::getFromPointer(includeStart), includeLoc); - parsedIncludes.emplace_back(*includedFileURI, - lsp::Range(sourceMgr, includeRange)); - } + lsp::gatherIncludeFiles(sourceMgr, parsedIncludes); // If we failed to parse the module, there is nothing left to initialize. if (failed(astModule)) @@ -443,7 +399,7 @@ void PDLDocument::findReferencesOf(const lsp::URIForFile &uri, void PDLDocument::getDocumentLinks(const lsp::URIForFile &uri, std::vector &links) { - for (const PDLLInclude &include : parsedIncludes) + for (const lsp::SourceMgrInclude &include : parsedIncludes) links.emplace_back(include.range, include.uri); } @@ -456,10 +412,9 @@ Optional PDLDocument::findHover(const lsp::URIForFile &uri, SMLoc posLoc = hoverPos.getAsSMLoc(sourceMgr); // Check for a reference to an include. - for (const PDLLInclude &include : parsedIncludes) { + for (const lsp::SourceMgrInclude &include : parsedIncludes) if (include.range.contains(hoverPos)) - return buildHoverForInclude(include); - } + return include.buildHover(); // Find the symbol at the given location. SMRange hoverRange; @@ -499,17 +454,6 @@ Optional PDLDocument::findHover(const ast::Decl *decl, return llvm::None; } -lsp::Hover PDLDocument::buildHoverForInclude(const PDLLInclude &include) { - lsp::Hover hover(include.range); - { - llvm::raw_string_ostream hoverOS(hover.contents.value); - hoverOS << "`" << llvm::sys::path::filename(include.uri.file()) - << "`\n***\n" - << include.uri.file(); - } - return hover; -} - lsp::Hover PDLDocument::buildHoverForOpName(const ods::Operation *op, const SMRange &hoverRange) { lsp::Hover hover(lsp::Range(sourceMgr, hoverRange)); diff --git a/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp b/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp index b9aa76aa016d8b..83133efcf4c5a5 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp +++ b/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp @@ -42,6 +42,18 @@ struct LSPServer { void onDocumentDidClose(const DidCloseTextDocumentParams ¶ms); void onDocumentDidChange(const DidChangeTextDocumentParams ¶ms); + //===----------------------------------------------------------------------===// + // DocumentLink + + void onDocumentLink(const DocumentLinkParams ¶ms, + Callback> reply); + + //===--------------------------------------------------------------------===// + // Hover + + void onHover(const TextDocumentPositionParams ¶ms, + Callback> reply); + //===--------------------------------------------------------------------===// // Fields //===--------------------------------------------------------------------===// @@ -72,6 +84,11 @@ void LSPServer::onInitialize(const InitializeParams ¶ms, {"change", (int)TextDocumentSyncKind::Full}, {"save", true}, }}, + {"documentLinkProvider", + llvm::json::Object{ + {"resolveProvider", false}, + }}, + {"hoverProvider", true}, }; llvm::json::Object result{ @@ -125,6 +142,24 @@ void LSPServer::onDocumentDidChange(const DidChangeTextDocumentParams ¶ms) { publishDiagnostics(diagParams); } +//===----------------------------------------------------------------------===// +// DocumentLink + +void LSPServer::onDocumentLink(const DocumentLinkParams ¶ms, + Callback> reply) { + std::vector links; + server.getDocumentLinks(params.textDocument.uri, links); + reply(std::move(links)); +} + +//===----------------------------------------------------------------------===// +// Hover + +void LSPServer::onHover(const TextDocumentPositionParams ¶ms, + Callback> reply) { + reply(server.findHover(params.textDocument.uri, params.position)); +} + //===----------------------------------------------------------------------===// // Entry Point //===----------------------------------------------------------------------===// @@ -148,6 +183,13 @@ LogicalResult mlir::lsp::runTableGenLSPServer(TableGenServer &server, messageHandler.notification("textDocument/didChange", &lspServer, &LSPServer::onDocumentDidChange); + // Document Link + messageHandler.method("textDocument/documentLink", &lspServer, + &LSPServer::onDocumentLink); + + // Hover + messageHandler.method("textDocument/hover", &lspServer, &LSPServer::onHover); + // Diagnostics lspServer.publishDiagnostics = messageHandler.outgoingNotification( diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp index 973097e0c8474b..17c1e13fcc4e6d 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp @@ -103,6 +103,20 @@ class TableGenTextFile { /// Return the current version of this text file. int64_t getVersion() const { return version; } + //===--------------------------------------------------------------------===// + // Document Links + //===--------------------------------------------------------------------===// + + void getDocumentLinks(const lsp::URIForFile &uri, + std::vector &links); + + //===--------------------------------------------------------------------===// + // Hover + //===--------------------------------------------------------------------===// + + Optional findHover(const lsp::URIForFile &uri, + const lsp::Position &hoverPos); + private: /// The full string contents of the file. std::string contents; @@ -118,6 +132,9 @@ class TableGenTextFile { /// The record keeper containing the parsed tablegen constructs. llvm::RecordKeeper recordKeeper; + + /// The set of includes of the parsed file. + SmallVector parsedIncludes; }; } // namespace @@ -157,10 +174,38 @@ TableGenTextFile::TableGenTextFile( ctx->diagnostics.push_back(*lspDiag); }, &handlerContext); - if (llvm::TableGenParseFile(sourceMgr, recordKeeper)) + bool failedToParse = llvm::TableGenParseFile(sourceMgr, recordKeeper); + + // Process all of the include files. + lsp::gatherIncludeFiles(sourceMgr, parsedIncludes); + if (failedToParse) return; } +//===--------------------------------------------------------------------===// +// TableGenTextFile: Document Links +//===--------------------------------------------------------------------===// + +void TableGenTextFile::getDocumentLinks(const lsp::URIForFile &uri, + std::vector &links) { + for (const lsp::SourceMgrInclude &include : parsedIncludes) + links.emplace_back(include.range, include.uri); +} + +//===----------------------------------------------------------------------===// +// TableGenTextFile: Hover +//===----------------------------------------------------------------------===// + +Optional +TableGenTextFile::findHover(const lsp::URIForFile &uri, + const lsp::Position &hoverPos) { + // Check for a reference to an include. + for (const lsp::SourceMgrInclude &include : parsedIncludes) + if (include.range.contains(hoverPos)) + return include.buildHover(); + return llvm::None; +} + //===----------------------------------------------------------------------===// // TableGenServer::Impl //===----------------------------------------------------------------------===// @@ -209,3 +254,18 @@ Optional lsp::TableGenServer::removeDocument(const URIForFile &uri) { impl->files.erase(it); return version; } + +void lsp::TableGenServer::getDocumentLinks( + const URIForFile &uri, std::vector &documentLinks) { + auto fileIt = impl->files.find(uri.file()); + if (fileIt != impl->files.end()) + return fileIt->second->getDocumentLinks(uri, documentLinks); +} + +Optional lsp::TableGenServer::findHover(const URIForFile &uri, + const Position &hoverPos) { + auto fileIt = impl->files.find(uri.file()); + if (fileIt != impl->files.end()) + return fileIt->second->findHover(uri, hoverPos); + return llvm::None; +} diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h index d7a88fd47dbde1..68162533ecc375 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h @@ -17,6 +17,9 @@ namespace mlir { namespace lsp { struct Diagnostic; +struct DocumentLink; +struct Hover; +struct Position; class URIForFile; /// This class implements all of the TableGen related functionality necessary @@ -52,6 +55,14 @@ class TableGenServer { /// the server. Optional removeDocument(const URIForFile &uri); + /// Return the document links referenced by the given file. + void getDocumentLinks(const URIForFile &uri, + std::vector &documentLinks); + + /// Find a hover description for the given hover position, or None if one + /// couldn't be found. + Optional findHover(const URIForFile &uri, const Position &hoverPos); + private: struct Impl; std::unique_ptr impl; diff --git a/mlir/test/tblgen-lsp-server/document-links.test b/mlir/test/tblgen-lsp-server/document-links.test new file mode 100644 index 00000000000000..1dc0e0c0fa55ed --- /dev/null +++ b/mlir/test/tblgen-lsp-server/document-links.test @@ -0,0 +1,34 @@ +// RUN: tblgen-lsp-server -tablegen-extra-dir %S -tablegen-extra-dir %S/../../include -lit-test < %s | FileCheck %s +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"tablegen","capabilities":{},"trace":"off"}} +// ----- +{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{ + "uri":"test:///foo.td", + "languageId":"tablegen", + "version":1, + "text":"include \"include/included.td\"" +}}} +// ----- +{"jsonrpc":"2.0","id":1,"method":"textDocument/documentLink","params":{ + "textDocument":{"uri":"test:///foo.td"} +}} +// CHECK: "id": 1, +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": [ +// CHECK-NEXT: { +// CHECK-NEXT: "range": { +// CHECK-NEXT: "end": { +// CHECK-NEXT: "character": 29, +// CHECK-NEXT: "line": 0 +// CHECK-NEXT: }, +// CHECK-NEXT: "start": { +// CHECK-NEXT: "character": 8, +// CHECK-NEXT: "line": 0 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "target": "file:{{.*}}included.td" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// ----- +{"jsonrpc":"2.0","id":7,"method":"shutdown"} +// ----- +{"jsonrpc":"2.0","method":"exit"} diff --git a/mlir/test/tblgen-lsp-server/hover.test b/mlir/test/tblgen-lsp-server/hover.test new file mode 100644 index 00000000000000..3518f4e0e973d6 --- /dev/null +++ b/mlir/test/tblgen-lsp-server/hover.test @@ -0,0 +1,37 @@ +// RUN: tblgen-lsp-server -tablegen-extra-dir %S -tablegen-extra-dir %S/../../include -lit-test < %s | FileCheck %s +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"tablegen","capabilities":{},"trace":"off"}} +// ----- +{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{ + "uri":"test:///foo.td", + "languageId":"tablegen", + "version":1, + "text":"include \"include/included.td\"" +}}} +// ----- +// Hover on an include file. +{"jsonrpc":"2.0","id":1,"method":"textDocument/hover","params":{ + "textDocument":{"uri":"test:///foo.td"}, + "position":{"line":0,"character":15} +}} +// CHECK: "id": 1, +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": { +// CHECK-NEXT: "contents": { +// CHECK-NEXT: "kind": "markdown", +// CHECK-NEXT: "value": "`included.td`\n***\n{{.*}}included.td" +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "end": { +// CHECK-NEXT: "character": 29, +// CHECK-NEXT: "line": 0 +// CHECK-NEXT: }, +// CHECK-NEXT: "start": { +// CHECK-NEXT: "character": 8, +// CHECK-NEXT: "line": 0 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":7,"method":"shutdown"} +// ----- +{"jsonrpc":"2.0","method":"exit"} diff --git a/mlir/test/tblgen-lsp-server/initialize-params.test b/mlir/test/tblgen-lsp-server/initialize-params.test index ef7e5407201400..584722c6dc32c3 100644 --- a/mlir/test/tblgen-lsp-server/initialize-params.test +++ b/mlir/test/tblgen-lsp-server/initialize-params.test @@ -5,6 +5,10 @@ // CHECK-NEXT: "jsonrpc": "2.0", // CHECK-NEXT: "result": { // CHECK-NEXT: "capabilities": { +// CHECK-NEXT: "documentLinkProvider": { +// CHECK-NEXT: "resolveProvider": false +// CHECK-NEXT: }, +// CHECK-NEXT: "hoverProvider": true, // CHECK-NEXT: "textDocumentSync": { // CHECK-NEXT: "change": 1, // CHECK-NEXT: "openClose": true, From 8d021670c31dcb760ad3d301eb5fdfdf38733324 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Tue, 10 May 2022 10:23:57 -0700 Subject: [PATCH 700/908] [mlir][Tablegen-LSP] Add support for a tracking definitions and references This essentially builds an index for the parsed records and record values (fields). This covers quite a few cases, but is limited by the currently lackluster location tracking in tablegen. A followup will work on plumbing more locations through tablegen, which should greatly improve what we can do here. Differential Revision: https://reviews.llvm.org/D125443 --- .../lib/Tools/tblgen-lsp-server/LSPServer.cpp | 33 +++ .../tblgen-lsp-server/TableGenServer.cpp | 204 +++++++++++++++++- .../Tools/tblgen-lsp-server/TableGenServer.h | 9 + mlir/test/tblgen-lsp-server/definition.test | 55 +++++ .../tblgen-lsp-server/initialize-params.test | 4 +- mlir/test/tblgen-lsp-server/references.test | 49 +++++ 6 files changed, 351 insertions(+), 3 deletions(-) create mode 100644 mlir/test/tblgen-lsp-server/definition.test create mode 100644 mlir/test/tblgen-lsp-server/references.test diff --git a/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp b/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp index 83133efcf4c5a5..4b480319d290da 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp +++ b/mlir/lib/Tools/tblgen-lsp-server/LSPServer.cpp @@ -42,6 +42,14 @@ struct LSPServer { void onDocumentDidClose(const DidCloseTextDocumentParams ¶ms); void onDocumentDidChange(const DidChangeTextDocumentParams ¶ms); + //===--------------------------------------------------------------------===// + // Definitions and References + + void onGoToDefinition(const TextDocumentPositionParams ¶ms, + Callback> reply); + void onReference(const ReferenceParams ¶ms, + Callback> reply); + //===----------------------------------------------------------------------===// // DocumentLink @@ -84,6 +92,8 @@ void LSPServer::onInitialize(const InitializeParams ¶ms, {"change", (int)TextDocumentSyncKind::Full}, {"save", true}, }}, + {"definitionProvider", true}, + {"referencesProvider", true}, {"documentLinkProvider", llvm::json::Object{ {"resolveProvider", false}, @@ -142,6 +152,23 @@ void LSPServer::onDocumentDidChange(const DidChangeTextDocumentParams ¶ms) { publishDiagnostics(diagParams); } +//===----------------------------------------------------------------------===// +// Definitions and References + +void LSPServer::onGoToDefinition(const TextDocumentPositionParams ¶ms, + Callback> reply) { + std::vector locations; + server.getLocationsOf(params.textDocument.uri, params.position, locations); + reply(std::move(locations)); +} + +void LSPServer::onReference(const ReferenceParams ¶ms, + Callback> reply) { + std::vector locations; + server.findReferencesOf(params.textDocument.uri, params.position, locations); + reply(std::move(locations)); +} + //===----------------------------------------------------------------------===// // DocumentLink @@ -183,6 +210,12 @@ LogicalResult mlir::lsp::runTableGenLSPServer(TableGenServer &server, messageHandler.notification("textDocument/didChange", &lspServer, &LSPServer::onDocumentDidChange); + // Definitions and References + messageHandler.method("textDocument/definition", &lspServer, + &LSPServer::onGoToDefinition); + messageHandler.method("textDocument/references", &lspServer, + &LSPServer::onReference); + // Document Link messageHandler.method("textDocument/documentLink", &lspServer, &LSPServer::onDocumentLink); diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp index 17c1e13fcc4e6d..81cf65070e44ba 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp @@ -13,6 +13,7 @@ #include "../lsp-server-support/Protocol.h" #include "../lsp-server-support/SourceMgrUtils.h" #include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/TypeSwitch.h" @@ -40,10 +41,14 @@ static lsp::URIForFile getURIFromLoc(const llvm::SourceMgr &mgr, SMLoc loc, } /// Returns a language server location from the given source range. +static lsp::Location getLocationFromLoc(llvm::SourceMgr &mgr, SMRange loc, + const lsp::URIForFile &uri) { + return lsp::Location(getURIFromLoc(mgr, loc.Start, uri), + lsp::Range(mgr, loc)); +} static lsp::Location getLocationFromLoc(llvm::SourceMgr &mgr, SMLoc loc, const lsp::URIForFile &uri) { - return lsp::Location(getURIFromLoc(mgr, loc, uri), - lsp::Range(mgr, lsp::convertTokenLocToRange(loc))); + return getLocationFromLoc(mgr, lsp::convertTokenLocToRange(loc), uri); } /// Convert the given TableGen diagnostic to the LSP form. @@ -87,6 +92,142 @@ getLspDiagnoticFromDiag(const llvm::SMDiagnostic &diag, return lspDiag; } +//===----------------------------------------------------------------------===// +// TableGenIndex +//===----------------------------------------------------------------------===// + +namespace { +/// This class represents a single symbol definition within a TableGen index. It +/// contains the definition of the symbol, the location of the symbol, and any +/// recorded references. +struct TableGenIndexSymbol { + TableGenIndexSymbol(const llvm::Record *record) + : definition(record), + defLoc(lsp::convertTokenLocToRange(record->getLoc().front())) {} + TableGenIndexSymbol(const llvm::RecordVal *value) + : definition(value), + defLoc(lsp::convertTokenLocToRange(value->getLoc())) {} + + /// The main definition of the symbol. + PointerUnion definition; + + /// The source location of the definition. + SMRange defLoc; + + /// The source location of the references of the definition. + SmallVector references; +}; + +/// This class provides an index for definitions/uses within a TableGen +/// document. It provides efficient lookup of a definition given an input source +/// range. +class TableGenIndex { +public: + TableGenIndex() : intervalMap(allocator) {} + + /// Initialize the index with the given RecordKeeper. + void initialize(const llvm::RecordKeeper &records); + + /// Lookup a symbol for the given location. Returns nullptr if no symbol could + /// be found. If provided, `overlappedRange` is set to the range that the + /// provided `loc` overlapped with. + const TableGenIndexSymbol *lookup(SMLoc loc, + SMRange *overlappedRange = nullptr) const; + +private: + /// The type of interval map used to store source references. SMRange is + /// half-open, so we also need to use a half-open interval map. + using MapT = llvm::IntervalMap< + const char *, const TableGenIndexSymbol *, + llvm::IntervalMapImpl::NodeSizer::LeafSize, + llvm::IntervalMapHalfOpenInfo>; + + /// An allocator for the interval map. + MapT::Allocator allocator; + + /// An interval map containing a corresponding definition mapped to a source + /// interval. + MapT intervalMap; + + /// A mapping between definitions and their corresponding symbol. + DenseMap> defToSymbol; +}; +} // namespace + +void TableGenIndex::initialize(const llvm::RecordKeeper &records) { + auto getOrInsertDef = [&](const auto *def) -> TableGenIndexSymbol * { + auto it = defToSymbol.try_emplace(def, nullptr); + if (it.second) + it.first->second = std::make_unique(def); + return &*it.first->second; + }; + auto insertRef = [&](TableGenIndexSymbol *sym, SMRange refLoc, + bool isDef = false) { + const char *startLoc = refLoc.Start.getPointer(); + const char *endLoc = refLoc.End.getPointer(); + + // If the location we got was empty, try to lex a token from the start + // location. + if (startLoc == endLoc) { + refLoc = lsp::convertTokenLocToRange(SMLoc::getFromPointer(startLoc)); + startLoc = refLoc.Start.getPointer(); + endLoc = refLoc.End.getPointer(); + + // If the location is still empty, bail on trying to use this reference + // location. + if (startLoc == endLoc) + return; + } + + // Check to see if a symbol is already attached to this location. + // IntervalMap doesn't allow overlapping inserts, and we don't really + // want multiple symbols attached to a source location anyways. This + // shouldn't really happen in practice, but we should handle it gracefully. + if (!intervalMap.overlaps(startLoc, endLoc)) + intervalMap.insert(startLoc, endLoc, sym); + + if (!isDef) + sym->references.push_back(refLoc); + }; + auto classes = + llvm::make_pointee_range(llvm::make_second_range(records.getClasses())); + auto defs = + llvm::make_pointee_range(llvm::make_second_range(records.getDefs())); + for (const llvm::Record &def : llvm::concat(classes, defs)) { + auto *sym = getOrInsertDef(&def); + insertRef(sym, sym->defLoc, /*isDef=*/true); + + // Add references to the definition. + for (SMLoc loc : def.getLoc().drop_front()) + insertRef(sym, lsp::convertTokenLocToRange(loc)); + + // Add references to any super classes. + for (auto &it : def.getSuperClasses()) + insertRef(getOrInsertDef(it.first), + lsp::convertTokenLocToRange(it.second.Start)); + + // Add definitions for any values. + for (const llvm::RecordVal &value : def.getValues()) { + auto *sym = getOrInsertDef(&value); + insertRef(sym, sym->defLoc, /*isDef=*/true); + } + } +} + +const TableGenIndexSymbol * +TableGenIndex::lookup(SMLoc loc, SMRange *overlappedRange) const { + auto it = intervalMap.find(loc.getPointer()); + if (!it.valid() || loc.getPointer() < it.start()) + return nullptr; + + if (overlappedRange) { + *overlappedRange = SMRange(SMLoc::getFromPointer(it.start()), + SMLoc::getFromPointer(it.stop())); + } + return it.value(); +} + //===----------------------------------------------------------------------===// // TableGenTextFile //===----------------------------------------------------------------------===// @@ -103,6 +244,15 @@ class TableGenTextFile { /// Return the current version of this text file. int64_t getVersion() const { return version; } + //===--------------------------------------------------------------------===// + // Definitions and References + //===--------------------------------------------------------------------===// + + void getLocationsOf(const lsp::URIForFile &uri, const lsp::Position &defPos, + std::vector &locations); + void findReferencesOf(const lsp::URIForFile &uri, const lsp::Position &pos, + std::vector &references); + //===--------------------------------------------------------------------===// // Document Links //===--------------------------------------------------------------------===// @@ -133,6 +283,9 @@ class TableGenTextFile { /// The record keeper containing the parsed tablegen constructs. llvm::RecordKeeper recordKeeper; + /// The index of the parsed file. + TableGenIndex index; + /// The set of includes of the parsed file. SmallVector parsedIncludes; }; @@ -180,6 +333,37 @@ TableGenTextFile::TableGenTextFile( lsp::gatherIncludeFiles(sourceMgr, parsedIncludes); if (failedToParse) return; + + // If we successfully parsed the file, we can now build the index. + index.initialize(recordKeeper); +} + +//===----------------------------------------------------------------------===// +// TableGenTextFile: Definitions and References +//===----------------------------------------------------------------------===// + +void TableGenTextFile::getLocationsOf(const lsp::URIForFile &uri, + const lsp::Position &defPos, + std::vector &locations) { + SMLoc posLoc = defPos.getAsSMLoc(sourceMgr); + const TableGenIndexSymbol *symbol = index.lookup(posLoc); + if (!symbol) + return; + + locations.push_back(getLocationFromLoc(sourceMgr, symbol->defLoc, uri)); +} + +void TableGenTextFile::findReferencesOf( + const lsp::URIForFile &uri, const lsp::Position &pos, + std::vector &references) { + SMLoc posLoc = pos.getAsSMLoc(sourceMgr); + const TableGenIndexSymbol *symbol = index.lookup(posLoc); + if (!symbol) + return; + + references.push_back(getLocationFromLoc(sourceMgr, symbol->defLoc, uri)); + for (SMRange refLoc : symbol->references) + references.push_back(getLocationFromLoc(sourceMgr, refLoc, uri)); } //===--------------------------------------------------------------------===// @@ -255,6 +439,22 @@ Optional lsp::TableGenServer::removeDocument(const URIForFile &uri) { return version; } +void lsp::TableGenServer::getLocationsOf(const URIForFile &uri, + const Position &defPos, + std::vector &locations) { + auto fileIt = impl->files.find(uri.file()); + if (fileIt != impl->files.end()) + fileIt->second->getLocationsOf(uri, defPos, locations); +} + +void lsp::TableGenServer::findReferencesOf(const URIForFile &uri, + const Position &pos, + std::vector &references) { + auto fileIt = impl->files.find(uri.file()); + if (fileIt != impl->files.end()) + fileIt->second->findReferencesOf(uri, pos, references); +} + void lsp::TableGenServer::getDocumentLinks( const URIForFile &uri, std::vector &documentLinks) { auto fileIt = impl->files.find(uri.file()); diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h index 68162533ecc375..82ac0d59d6ae55 100644 --- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h +++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.h @@ -19,6 +19,7 @@ namespace lsp { struct Diagnostic; struct DocumentLink; struct Hover; +struct Location; struct Position; class URIForFile; @@ -55,6 +56,14 @@ class TableGenServer { /// the server. Optional removeDocument(const URIForFile &uri); + /// Return the locations of the object pointed at by the given position. + void getLocationsOf(const URIForFile &uri, const Position &defPos, + std::vector &locations); + + /// Find all references of the object pointed at by the given position. + void findReferencesOf(const URIForFile &uri, const Position &pos, + std::vector &references); + /// Return the document links referenced by the given file. void getDocumentLinks(const URIForFile &uri, std::vector &documentLinks); diff --git a/mlir/test/tblgen-lsp-server/definition.test b/mlir/test/tblgen-lsp-server/definition.test new file mode 100644 index 00000000000000..d2f26a2247628f --- /dev/null +++ b/mlir/test/tblgen-lsp-server/definition.test @@ -0,0 +1,55 @@ +// RUN: tblgen-lsp-server -lit-test < %s | FileCheck %s +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"tablegen","capabilities":{},"trace":"off"}} +// ----- +{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{ + "uri":"test:///foo.td", + "languageId":"tablegen", + "version":1, + "text":"class Foo {\n int field1 = ?;\n}\ndef FooDerived : Foo {\n let field1 = 10;\n }\n" +}}} +// ----- +{"jsonrpc":"2.0","id":1,"method":"textDocument/definition","params":{ + "textDocument":{"uri":"test:///foo.td"}, + "position":{"line":3,"character":19} +}} +// CHECK: "id": 1 +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": [ +// CHECK-NEXT: { +// CHECK-NEXT: "range": { +// CHECK-NEXT: "end": { +// CHECK-NEXT: "character": 9, +// CHECK-NEXT: "line": 0 +// CHECK-NEXT: }, +// CHECK-NEXT: "start": { +// CHECK-NEXT: "character": 6, +// CHECK-NEXT: "line": 0 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "uri": "{{.*}}/foo.td" +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":2,"method":"textDocument/definition","params":{ + "textDocument":{"uri":"test:///foo.td"}, + "position":{"line":4,"character":9} +}} +// CHECK: "id": 2 +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": [ +// CHECK-NEXT: { +// CHECK-NEXT: "range": { +// CHECK-NEXT: "end": { +// CHECK-NEXT: "character": 12, +// CHECK-NEXT: "line": 4 +// CHECK-NEXT: }, +// CHECK-NEXT: "start": { +// CHECK-NEXT: "character": 6, +// CHECK-NEXT: "line": 4 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "uri": "{{.*}}/foo.td" +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":3,"method":"shutdown"} +// ----- +{"jsonrpc":"2.0","method":"exit"} diff --git a/mlir/test/tblgen-lsp-server/initialize-params.test b/mlir/test/tblgen-lsp-server/initialize-params.test index 584722c6dc32c3..77aeca03d1fd44 100644 --- a/mlir/test/tblgen-lsp-server/initialize-params.test +++ b/mlir/test/tblgen-lsp-server/initialize-params.test @@ -5,11 +5,13 @@ // CHECK-NEXT: "jsonrpc": "2.0", // CHECK-NEXT: "result": { // CHECK-NEXT: "capabilities": { +// CHECK-NEXT: "definitionProvider": true, // CHECK-NEXT: "documentLinkProvider": { // CHECK-NEXT: "resolveProvider": false // CHECK-NEXT: }, // CHECK-NEXT: "hoverProvider": true, -// CHECK-NEXT: "textDocumentSync": { +// CHECK-NEXT: "referencesProvider": true, +// CHECK-NEXT: "textDocumentSync": { // CHECK-NEXT: "change": 1, // CHECK-NEXT: "openClose": true, // CHECK-NEXT: "save": true diff --git a/mlir/test/tblgen-lsp-server/references.test b/mlir/test/tblgen-lsp-server/references.test new file mode 100644 index 00000000000000..55aa96e8ed8b93 --- /dev/null +++ b/mlir/test/tblgen-lsp-server/references.test @@ -0,0 +1,49 @@ +// RUN: tblgen-lsp-server -lit-test < %s | FileCheck -strict-whitespace %s +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"tablegen","capabilities":{},"trace":"off"}} +// ----- +{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{ + "uri":"test:///foo.td", + "languageId":"tablegen", + "version":1, + "text":"class Foo;\ndef FooDerived : Foo;\n" +}}} +// ----- +{"jsonrpc":"2.0","id":1,"method":"textDocument/references","params":{ + "textDocument":{"uri":"test:///foo.td"}, + "position":{"line":0,"character":7}, + "context":{"includeDeclaration": false} +}} +// CHECK: "id": 1 +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": [ +// CHECK-NEXT: { +// CHECK-NEXT: "range": { +// CHECK-NEXT: "end": { +// CHECK-NEXT: "character": 9, +// CHECK-NEXT: "line": 0 +// CHECK-NEXT: }, +// CHECK-NEXT: "start": { +// CHECK-NEXT: "character": 6, +// CHECK-NEXT: "line": 0 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "uri": "{{.*}}/foo.td" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "range": { +// CHECK-NEXT: "end": { +// CHECK-NEXT: "character": 20, +// CHECK-NEXT: "line": 1 +// CHECK-NEXT: }, +// CHECK-NEXT: "start": { +// CHECK-NEXT: "character": 17, +// CHECK-NEXT: "line": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "uri": "{{.*}}/foo.td" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// ----- +{"jsonrpc":"2.0","id":3,"method":"shutdown"} +// ----- +{"jsonrpc":"2.0","method":"exit"} From c78c00dc16bbbb3b48705190f7e6de09458a1f34 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Thu, 26 May 2022 16:03:09 +0100 Subject: [PATCH 701/908] [LLD][ELF] Drop the string null terminator from the hash in splitStrings Differential Revision: https://reviews.llvm.org/D126484 --- lld/ELF/InputSection.cpp | 8 ++++---- lld/test/ELF/comment-gc.s | 2 +- lld/test/ELF/merge-string.s | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 02d1080cae2cd5..056119924a8291 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -1376,15 +1376,15 @@ void MergeInputSection::splitStrings(StringRef s, size_t entSize) { if (entSize == 1) { // Optimize the common case. do { - size_t size = strlen(p) + 1; + size_t size = strlen(p); pieces.emplace_back(p - s.begin(), xxHash64(StringRef(p, size)), live); - p += size; + p += size + 1; } while (p != end); } else { do { - size_t size = findNull(StringRef(p, end - p), entSize) + entSize; + size_t size = findNull(StringRef(p, end - p), entSize); pieces.emplace_back(p - s.begin(), xxHash64(StringRef(p, size)), live); - p += size; + p += size + entSize; } while (p != end); } } diff --git a/lld/test/ELF/comment-gc.s b/lld/test/ELF/comment-gc.s index 44c08fe9cef9ac..077bdb9021a6d2 100644 --- a/lld/test/ELF/comment-gc.s +++ b/lld/test/ELF/comment-gc.s @@ -5,7 +5,7 @@ # RUN: llvm-objdump -s %t1 | FileCheck %s # CHECK: Contents of section .comment: -# CHECK-NEXT: foo..LLD 1.0.bar +# CHECK-NEXT: .LLD 1.0.foo.bar .ident "foo" diff --git a/lld/test/ELF/merge-string.s b/lld/test/ELF/merge-string.s index 0922cd8f135e74..549195d5cf8058 100644 --- a/lld/test/ELF/merge-string.s +++ b/lld/test/ELF/merge-string.s @@ -54,7 +54,7 @@ zed: // NOTAIL-NEXT: AddressAlignment: 1 // NOTAIL-NEXT: EntrySize: 1 // NOTAIL-NEXT: SectionData ( -// NOTAIL-NEXT: 0000: 62630061 626300 |bc.abc.| +// NOTAIL-NEXT: 0000: 61626300 626300 |abc.bc.| // NOTAIL-NEXT: ) // NOMERGE: Name: .rodata1 From c913c5598b026f0968f3c25db14e232c5b52277e Mon Sep 17 00:00:00 2001 From: Ben Dunbobbin Date: Fri, 27 May 2022 11:02:04 +0100 Subject: [PATCH 702/908] [llvm-ar][test] add special case of replace converting a regular to a thin archive Add a regression test for: https://github.com/llvm/llvm-project/issues/55527 Differential Revision: https://reviews.llvm.org/D125785 --- .../llvm-ar/regular-to-thin-archive-special.test | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 llvm/test/tools/llvm-ar/regular-to-thin-archive-special.test diff --git a/llvm/test/tools/llvm-ar/regular-to-thin-archive-special.test b/llvm/test/tools/llvm-ar/regular-to-thin-archive-special.test new file mode 100644 index 00000000000000..f7b037d3f43e1e --- /dev/null +++ b/llvm/test/tools/llvm-ar/regular-to-thin-archive-special.test @@ -0,0 +1,15 @@ +## Test regular archives do not convert to thin archives +## in the special case of replacing a single file in an +## archive with itself from the current directory. + +# RUN: rm -rf %t && mkdir -p %t && cd %t +# RUN: echo "a" > a.txt +# RUN: llvm-ar --format=gnu cr foo.a a.txt 2>&1 + +## Check that an error is issued. +# RUN: not llvm-ar --format=gnu cr --thin foo.a a.txt 2>&1 | FileCheck %s +# CHECK: error: cannot convert a regular archive to a thin one + +## Check that the archive is still the correct flavor. +# RUN: FileCheck --input-file=foo.a %s --check-prefix=REGULAR +# REGULAR: ! From 7df25978ef7882a3d793eecee854ec5d961c02d2 Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Fri, 27 May 2022 11:12:44 +0100 Subject: [PATCH 703/908] [Doc][OpenCL] Misc wording improvements for SPIR-V --- clang/docs/OpenCLSupport.rst | 2 +- llvm/docs/SPIRVUsage.rst | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/clang/docs/OpenCLSupport.rst b/clang/docs/OpenCLSupport.rst index bcd55f6cc84134..3fa0c774f5bc91 100644 --- a/clang/docs/OpenCLSupport.rst +++ b/clang/docs/OpenCLSupport.rst @@ -25,7 +25,7 @@ Clang also supports :ref:`the C++ for OpenCL kernel language ` available. - +Details about usage of clang for OpenCL can be found in :doc:`UsersManual`. Missing features or with limited support ======================================== diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index 7529b5d3b586c9..0f75e001e661e9 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -36,17 +36,17 @@ to specify the target triple: .. table:: SPIR-V Subarchitectures - ============ ============================================================== - Architecture Description - ============ ============================================================== - ** SPIR-V version deduced by tools based on the compiled input. - ``v1.0`` SPIR-V version 1.0. - ``v1.1`` SPIR-V version 1.1. - ``v1.2`` SPIR-V version 1.2. - ``v1.3`` SPIR-V version 1.3. - ``v1.4`` SPIR-V version 1.4. - ``v1.5`` SPIR-V version 1.5. - ============ ============================================================== + =============== ============================================================== + Subarchitecture Description + =============== ============================================================== + ** SPIR-V version deduced by tools based on the compiled input. + ``v1.0`` SPIR-V version 1.0. + ``v1.1`` SPIR-V version 1.1. + ``v1.2`` SPIR-V version 1.2. + ``v1.3`` SPIR-V version 1.3. + ``v1.4`` SPIR-V version 1.4. + ``v1.5`` SPIR-V version 1.5. + =============== ============================================================== .. table:: SPIR-V Vendors @@ -71,3 +71,7 @@ to specify the target triple: ===================== ============================================================== **/``unknown`` Defaults to the OpenCL environment. ===================== ============================================================== + +Example: + +``-target spirv64v1.0`` can be used to compile for SPIR-V version 1.0 with 64-bit pointer width. From f168a65943f934bcb161e01a600b9bb4e4340c24 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Fri, 27 May 2022 15:51:40 +0530 Subject: [PATCH 704/908] [MLIR][Presburger] Add intersectDomain/Range to IntegerRelation This patch adds support for intersection a set with a relation. Reviewed By: arjunp Differential Revision: https://reviews.llvm.org/D126328 --- .../Analysis/Presburger/IntegerRelation.h | 14 +++++++- .../Analysis/Presburger/IntegerRelation.cpp | 29 ++++++++++++++++ .../Presburger/IntegerRelationTest.cpp | 34 +++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h index b74413b9ff0116..3481d71f6d5c4a 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h @@ -505,7 +505,19 @@ class IntegerRelation { /// Return a set corresponding to all points in the range of the relation. IntegerPolyhedron getRangeSet() const; - /// Invert the relation i.e., swap it's domain and range. + /// Intersect the given `poly` with the domain in-place. + /// + /// Formally, let the relation `this` be R: A -> B and poly is C, then this + /// operation modifies R to be (A intersection C) -> B. + void intersectDomain(const IntegerPolyhedron &poly); + + /// Intersect the given `poly` with the range in-place. + /// + /// Formally, let the relation `this` be R: A -> B and poly is C, then this + /// operation modifies R to be A -> (B intersection C). + void intersectRange(const IntegerPolyhedron &poly); + + /// Invert the relation i.e., swap its domain and range. /// /// Formally, let the relation `this` be R: A -> B, then this operation /// modifies R to be B -> A. diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp index 87132728103a7e..d6337cbac8fea0 100644 --- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp @@ -2102,6 +2102,35 @@ IntegerPolyhedron IntegerRelation::getRangeSet() const { return IntegerPolyhedron(std::move(copyRel)); } +void IntegerRelation::intersectDomain(const IntegerPolyhedron &poly) { + assert(getDomainSet().getSpace().isCompatible(poly.getSpace()) && + "Domain set is not compatible with poly"); + + // Treating the poly as a relation, convert it from `0 -> R` to `R -> 0`. + IntegerRelation rel = poly; + rel.inverse(); + + // Append dummy range variables to make the spaces compatible. + rel.appendId(IdKind::Range, getNumRangeIds()); + + // Intersect in place. + mergeLocalIds(rel); + append(rel); +} + +void IntegerRelation::intersectRange(const IntegerPolyhedron &poly) { + assert(getRangeSet().getSpace().isCompatible(poly.getSpace()) && + "Range set is not compatible with poly"); + + IntegerRelation rel = poly; + + // Append dummy domain variables to make the spaces compatible. + rel.appendId(IdKind::Domain, getNumDomainIds()); + + mergeLocalIds(rel); + append(rel); +} + void IntegerRelation::inverse() { unsigned numRangeIds = getNumIdKind(IdKind::Range); convertIdKind(IdKind::Domain, 0, getIdKindEnd(IdKind::Domain), IdKind::Range); diff --git a/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp index ee922584f564eb..d23900b38f54a3 100644 --- a/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp +++ b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp @@ -57,3 +57,37 @@ TEST(IntegerRelationTest, inverse) { EXPECT_TRUE(rel.isEqual(inverseRel)); } + +TEST(IntegerRelationTest, intersectDomainAndRange) { + IntegerRelation rel = parseRelationFromSet( + "(x, y, z)[N, M]: (y floordiv 2 - N >= 0, z floordiv 5 - M" + ">= 0, x + y + z floordiv 7 == 0)", + 1); + + { + IntegerPolyhedron poly = parsePoly("(x)[N, M] : (x >= 0, M - x - 1 >= 0)"); + + IntegerRelation expectedRel = parseRelationFromSet( + "(x, y, z)[N, M]: (y floordiv 2 - N >= 0, z floordiv 5 - M" + ">= 0, x + y + z floordiv 7 == 0, x >= 0, M - x - 1 >= 0)", + 1); + + IntegerRelation copyRel = rel; + copyRel.intersectDomain(poly); + EXPECT_TRUE(copyRel.isEqual(expectedRel)); + } + + { + IntegerPolyhedron poly = + parsePoly("(y, z)[N, M] : (y >= 0, M - y - 1 >= 0, y + z == 0)"); + + IntegerRelation expectedRel = parseRelationFromSet( + "(x, y, z)[N, M]: (y floordiv 2 - N >= 0, z floordiv 5 - M" + ">= 0, x + y + z floordiv 7 == 0, y >= 0, M - y - 1 >= 0, y + z == 0)", + 1); + + IntegerRelation copyRel = rel; + copyRel.intersectRange(poly); + EXPECT_TRUE(copyRel.isEqual(expectedRel)); + } +} From 3e450d9cbbc00b78eebe122cdbec70bd315167e3 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 26 May 2022 15:11:02 +0100 Subject: [PATCH 705/908] [RISCV][NFC] Unify compatibility checks under one function Split off from D125021. We were duplicating logic across different phases. Since we want to ensure a consistency of logic across phases for correctness, this patch combines our multiple compatibility checks into one function to better convey this. Several methods were made const too. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D126472 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 31 ++++++++++++-------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index afd80754fa0ca7..6497fb2de3dcca 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -453,8 +453,12 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } private: - bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo); - bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB); + bool needVSETVLI(const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) const; + bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) const; + bool needVSETVLIPHI(const VSETVLIInfo &Require, + const MachineBasicBlock &MBB) const; void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); void insertVSETVLI(MachineBasicBlock &MBB, @@ -715,7 +719,7 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { } bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, - const VSETVLIInfo &CurInfo) { + const VSETVLIInfo &CurInfo) const { if (CurInfo.isCompatible(Require)) return false; @@ -934,6 +938,15 @@ bool canSkipVSETVLIForLoadStore(const MachineInstr &MI, return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require); } +bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) const { + if (!needVSETVLI(Require, CurInfo)) + return false; + // If this is a unit-stride or strided load/store, we may be able to use the + // EMUL=(EEW/SEW)*LMUL relationship to avoid changing VTYPE. + return !canSkipVSETVLIForLoadStore(MI, Require, CurInfo); +} + bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { bool HadVectorOp = false; @@ -958,13 +971,10 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { } else { // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. - // If this is a unit-stride or strided load/store, we may be able to use - // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. // NOTE: We only do this if the vtype we're comparing against was // created in this block. We need the first and third phase to treat // the store the same way. - if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) && - needVSETVLI(NewInfo, BBInfo.Change)) + if (needVSETVLI(MI, NewInfo, BBInfo.Change)) BBInfo.Change = NewInfo; } } @@ -1033,7 +1043,7 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { // be/ unneeded if the AVL is a phi node where all incoming values are VL // outputs from the last VSETVLI in their respective basic blocks. bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, - const MachineBasicBlock &MBB) { + const MachineBasicBlock &MBB) const { if (DisableInsertVSETVLPHIOpt) return true; @@ -1128,13 +1138,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { } else { // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. - // If this is a unit-stride or strided load/store, we may be able to use - // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. // NOTE: We can't use predecessor information for the store. We must // treat it the same as the first phase so that we produce the correct // vl/vtype for succesor blocks. - if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && - needVSETVLI(NewInfo, CurInfo)) { + if (needVSETVLI(MI, NewInfo, CurInfo)) { insertVSETVLI(MBB, MI, NewInfo, CurInfo); CurInfo = NewInfo; } From 889c7c8e9260932dce5367df3f7ed7ecc32dbe8e Mon Sep 17 00:00:00 2001 From: Daniil Dudkin Date: Fri, 27 May 2022 13:45:15 +0300 Subject: [PATCH 706/908] [flang] Support correct continuations for compiler directives If a line is over 72 characters long, flang's preprocessor cuts it there and continues on the next line. For this purpose it uses the standard way of continuing line with & on each line. However, it doesn't work with long compiler directives, like OpenMP or OpenACC ones. The line that continues the directive also has to contain the corresponding sentinel at the beginning. This change implements the described functionality. Also, some code was refactored in order to simplify and reuse existing code. Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D126301 --- flang/lib/Parser/parsing.cpp | 51 ++++++++++++++++++++++-------- flang/test/Preprocessing/pp132.f90 | 18 +++++++++++ flang/test/Preprocessing/pp133.f90 | 14 ++++++++ 3 files changed, 69 insertions(+), 14 deletions(-) create mode 100644 flang/test/Preprocessing/pp132.f90 create mode 100644 flang/test/Preprocessing/pp133.f90 diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp index e29d5aff2496b9..6b0119008dbf42 100644 --- a/flang/lib/Parser/parsing.cpp +++ b/flang/lib/Parser/parsing.cpp @@ -103,7 +103,13 @@ void Parsing::EmitPreprocessedSource( int column{1}; bool inDirective{false}; bool inContinuation{false}; + bool lineWasBlankBefore{true}; const AllSources &allSources{allCooked().allSources()}; + // All directives that flang support are known to have a length of 3 chars + constexpr int directiveNameLength{3}; + // We need to know the current directive in order to provide correct + // continuation for the directive + std::string directive; for (const char &atChar : cooked().AsCharBlock()) { char ch{atChar}; if (ch == '\n') { @@ -111,15 +117,36 @@ void Parsing::EmitPreprocessedSource( column = 1; inDirective = false; inContinuation = false; + lineWasBlankBefore = true; ++sourceLine; + directive.clear(); } else { - if (ch == '!') { + auto provenance{cooked().GetProvenanceRange(CharBlock{&atChar, 1})}; + + // Preserves original case of the character + const auto getOriginalChar{[&](char ch) { + if (IsLetter(ch) && provenance && provenance->size() == 1) { + if (const char *orig{allSources.GetSource(*provenance)}) { + const char upper{ToUpperCaseLetter(ch)}; + if (*orig == upper) { + return upper; + } + } + } + return ch; + }}; + + if (ch == '!' && lineWasBlankBefore) { // Other comment markers (C, *, D) in original fixed form source // input card column 1 will have been deleted or normalized to !, // which signifies a comment (directive) in both source forms. inDirective = true; } - auto provenance{cooked().GetProvenanceRange(CharBlock{&atChar, 1})}; + if (inDirective && directive.size() < directiveNameLength && + IsLetter(ch)) { + directive += getOriginalChar(ch); + } + std::optional position{provenance ? allSources.GetSourcePosition(provenance->start()) : std::nullopt}; @@ -145,14 +172,18 @@ void Parsing::EmitPreprocessedSource( } if (column > 72) { // Wrap long lines in a portable fashion that works in both - // of the Fortran source forms. The first free-form continuation + // of the Fortran source forms. The first free-form continuation // marker ("&") lands in column 73, which begins the card commentary // field of fixed form, and the second one is put in column 6, // where it signifies fixed form line continuation. // The standard Fortran fixed form column limit (72) is used // for output, even if the input was parsed with a nonstandard // column limit override option. - out << "&\n &"; + // OpenMP and OpenACC directives' continuations should have the + // corresponding sentinel at the next line. + const auto continuation{ + inDirective ? "&\n!$" + directive + "&" : "&\n &"s}; + out << continuation; column = 7; // start of fixed form source field ++sourceLine; inContinuation = true; @@ -169,16 +200,8 @@ void Parsing::EmitPreprocessedSource( out << ' '; } } - if (ch >= 'a' && ch <= 'z' && provenance && provenance->size() == 1) { - // Preserve original case - if (const char *orig{allSources.GetSource(*provenance)}) { - auto upper{static_cast(ch + 'A' - 'a')}; - if (*orig == upper) { - ch = upper; - } - } - } - out << ch; + out << getOriginalChar(ch); + lineWasBlankBefore = ch == ' ' && lineWasBlankBefore; ++column; } } diff --git a/flang/test/Preprocessing/pp132.f90 b/flang/test/Preprocessing/pp132.f90 new file mode 100644 index 00000000000000..418defb2d553ce --- /dev/null +++ b/flang/test/Preprocessing/pp132.f90 @@ -0,0 +1,18 @@ +! RUN: %flang -E -fopenmp -fopenacc %s 2>&1 | FileCheck %s +! CHECK: !$OMP parallel default(shared) private(super_very_long_name_for_the_va& +! CHECK: !$OMP&riable) +! CHECK: !$acc data copyin(super_very_long_name_for_the_variable, another_super& +! CHECK: !$acc&_wordy_variable_to_test) +! Test correct continuations in compiler directives +subroutine foo + integer :: super_very_long_name_for_the_variable + integer :: another_super_wordy_variable_to_test + + super_very_long_name_for_the_variable = 42 + another_super_wordy_variable_to_test = super_very_long_name_for_the_variable * 2 + !$OMP parallel default(shared) private(super_very_long_name_for_the_variable) + !$omp end parallel + + !$acc data copyin(super_very_long_name_for_the_variable, another_super_wordy_variable_to_test) + !$acc end data +end subroutine foo diff --git a/flang/test/Preprocessing/pp133.f90 b/flang/test/Preprocessing/pp133.f90 new file mode 100644 index 00000000000000..639bc0ad5b76b7 --- /dev/null +++ b/flang/test/Preprocessing/pp133.f90 @@ -0,0 +1,14 @@ +! RUN: %flang -E %s 2>&1 | FileCheck %s +! CHECK: character(*), parameter :: simple_literal = "!!!!!!!!!!!!!!!!!!!!!& +! CHECK: &!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!& +! CHECK: &!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!& +! CHECK: &!!!!!!!!!!!!!" +! CHECK: character(*), parameter :: hollerith_literal = 166H!!!!!!!!!!!!!!!& +! CHECK: &!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!& +! CHECK: &!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!& +! CHECK: &!!!!!!!!!!!!!!!!!!! +! Test correct continuations in string literals +subroutine foo + character(*), parameter :: simple_literal = "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + character(*), parameter :: hollerith_literal = 166H!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +end subroutine foo From a94d454390c6c351cea64cff4d4229df16d01217 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Fri, 27 May 2022 12:02:36 +0100 Subject: [PATCH 707/908] [LLD][test] Update `zlib` tests for LLD commit c78c00dc16 Updates for these tests were missed because I didn't have zlib-dev installed and thus the tests were unsupported and not run. --- lld/test/ELF/compress-debug-sections.s | 2 +- lld/test/ELF/compressed-debug-input.s | 8 ++++---- lld/test/ELF/relocatable-compressed-input.s | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lld/test/ELF/compress-debug-sections.s b/lld/test/ELF/compress-debug-sections.s index 93e834660cf800..00d08a95d6faac 100644 --- a/lld/test/ELF/compress-debug-sections.s +++ b/lld/test/ELF/compress-debug-sections.s @@ -18,8 +18,8 @@ # RUN: llvm-dwarfdump %t1 -debug-str | \ # RUN: FileCheck %s --check-prefix=DEBUGSTR # DEBUGSTR: .debug_str contents: -# DEBUGSTR-NEXT: AAAAAAAAAAAAAAAAAAAAAAAAAAA # DEBUGSTR-NEXT: BBBBBBBBBBBBBBBBBBBBBBBBBBB +# DEBUGSTR-NEXT: AAAAAAAAAAAAAAAAAAAAAAAAAAA ## Test alias. # RUN: ld.lld %t.o -o %t2 --compress-debug-sections zlib diff --git a/lld/test/ELF/compressed-debug-input.s b/lld/test/ELF/compressed-debug-input.s index af81d6fff3115e..70eb9d2a77b728 100644 --- a/lld/test/ELF/compressed-debug-input.s +++ b/lld/test/ELF/compressed-debug-input.s @@ -69,10 +69,10 @@ # DATA-NEXT: AddressAlignment: 1 # DATA-NEXT: EntrySize: 1 # DATA-NEXT: SectionData ( -# DATA-NEXT: 0000: 6C6F6E67 20756E73 69676E65 6420696E |long unsigned in| -# DATA-NEXT: 0010: 7400756E 7369676E 65642063 68617200 |t.unsigned char.| -# DATA-NEXT: 0020: 756E7369 676E6564 20696E74 00636861 |unsigned int.cha| -# DATA-NEXT: 0030: 72007368 6F727420 756E7369 676E6564 |r.short unsigned| +# DATA-NEXT: 0000: 756E7369 676E6564 20696E74 00636861 |unsigned int.cha| +# DATA-NEXT: 0010: 7200756E 7369676E 65642063 68617200 |r.unsigned char.| +# DATA-NEXT: 0020: 73686F72 7420756E 7369676E 65642069 |short unsigned i| +# DATA-NEXT: 0030: 6E74006C 6F6E6720 756E7369 676E6564 |nt.long unsigned| # DATA-NEXT: 0040: 20696E74 00 | int.| # DATA-NEXT: ) # DATA-NEXT: } diff --git a/lld/test/ELF/relocatable-compressed-input.s b/lld/test/ELF/relocatable-compressed-input.s index a98f93d6619f2c..c276a242c6f059 100644 --- a/lld/test/ELF/relocatable-compressed-input.s +++ b/lld/test/ELF/relocatable-compressed-input.s @@ -24,10 +24,10 @@ # CHECK-NEXT: AddressAlignment: 1 # CHECK-NEXT: EntrySize: 1 # CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: {{.*}} |long unsigned in| -# CHECK-NEXT: 0010: {{.*}} |t.unsigned char.| -# CHECK-NEXT: 0020: {{.*}} |unsigned int.cha| -# CHECK-NEXT: 0030: {{.*}} |r.short unsigned| +# CHECK-NEXT: 0000: {{.*}} |unsigned int.cha| +# CHECK-NEXT: 0010: {{.*}} |r.unsigned char.| +# CHECK-NEXT: 0020: {{.*}} |short unsigned i| +# CHECK-NEXT: 0030: {{.*}} |nt.long unsigned| # CHECK-NEXT: 0040: {{.*}} | int.| # CHECK-NEXT: ) # CHECK-NEXT: } From 52d79b04b2628d21a328f331428f980a5ef3e2f0 Mon Sep 17 00:00:00 2001 From: Daniil Dudkin Date: Fri, 27 May 2022 15:33:02 +0300 Subject: [PATCH 708/908] [mlir][llvm] Fix compiler error on GCC 9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the following compiler error: error: declaration of ‘mlir::LLVM::cconv::CConv mlir::LLVM::detail::CConvAttrStorage::CConv’ changes meaning of ‘CConv’ [-fpermissive] CConv as a member variable name was shadowing CConv as an enumeration, hence the compiler error. Reviewed By: ftynse, alexbatashev Differential Revision: https://reviews.llvm.org/D126530 --- mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 2 +- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 2 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index 4a095817ab0ceb..2de2f7a7cb6a45 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -39,7 +39,7 @@ def LinkageAttr : LLVM_Attr<"Linkage"> { def CConvAttr : LLVM_Attr<"CConv"> { let mnemonic = "cconv"; let parameters = (ins - "CConv":$CConv + "CConv":$CallingConv ); let hasCustomAssemblyFormat = 1; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 9b77699ee7d8ba..6f4da42908fb09 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -190,7 +190,7 @@ def CConv : DialectAttr< "LLVM Calling Convention specification"> { let storageType = "::mlir::LLVM::CConvAttr"; let returnType = "::mlir::LLVM::cconv::CConv"; - let convertFromStorage = "$_self.getCConv()"; + let convertFromStorage = "$_self.getCallingConv()"; let constBuilderCall = "::mlir::LLVM::CConvAttr::get($_builder.getContext(), $0)"; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 41d0521b981c2f..164e966d1e1d45 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -2958,10 +2958,10 @@ Attribute LinkageAttr::parse(AsmParser &parser, Type type) { void CConvAttr::print(AsmPrinter &printer) const { printer << "<"; - if (static_cast(getCConv()) <= cconv::getMaxEnumValForCConv()) - printer << stringifyEnum(getCConv()); + if (static_cast(getCallingConv()) <= cconv::getMaxEnumValForCConv()) + printer << stringifyEnum(getCallingConv()); else - printer << "INVALID_cc_" << static_cast(getCConv()); + printer << "INVALID_cc_" << static_cast(getCallingConv()); printer << ">"; } From 8c0eb32d2aa0bc73c176d7b25f47bdf37f967d3b Mon Sep 17 00:00:00 2001 From: Vassil Vassilev Date: Fri, 27 May 2022 11:16:24 +0000 Subject: [PATCH 709/908] Also remove the empty StoredDeclsList entry from the lookup table In case where we have removed all declarations for a given declaration name entry we should remove the whole StoredDeclsMap entry. This patch improves consistency in the lookup tables and helps cling/clang-repl error recovery. Differential revision: https://reviews.llvm.org/D119675 --- clang/lib/AST/DeclBase.cpp | 6 +++++- clang/unittests/AST/ASTImporterTest.cpp | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index d5ad6368105444..13dd6da3f24fe2 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1537,7 +1537,11 @@ void DeclContext::removeDecl(Decl *D) { if (Map) { StoredDeclsMap::iterator Pos = Map->find(ND->getDeclName()); assert(Pos != Map->end() && "no lookup entry for decl"); - Pos->second.remove(ND); + StoredDeclsList &List = Pos->second; + List.remove(ND); + // Clean up the entry if there are no more decls. + if (List.isNull()) + Map->erase(Pos); } } while (DC->isTransparentContext() && (DC = DC->getParent())); } diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 896e3cb7a956cf..f1965235c9a6d5 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -4279,6 +4279,10 @@ TEST_P(DeclContextTest, // This asserts in the old implementation. DC->removeDecl(A0); EXPECT_FALSE(DC->containsDecl(A0)); + + // Make sure we do not leave a StoredDeclsList with no entries. + DC->removeDecl(A1); + ASSERT_EQ(Map->find(A1->getDeclName()), Map->end()); } struct ImportFunctionTemplateSpecializations From dae2c24eb2385d5bf34e7199bae26080a5eb17b3 Mon Sep 17 00:00:00 2001 From: Shraiysh Vaishay Date: Fri, 27 May 2022 18:19:14 +0530 Subject: [PATCH 710/908] [flang][OpenMP][NFC] Cleanup the sections tests This patch cleans up the sections tests as per the recent effort to separate integration tests from unit tests. Reviewed By: kiranchandramohan, peixin Differential Revision: https://reviews.llvm.org/D126368 --- .../Fir/convert-to-llvm-openmp-and-fir.fir | 67 ++++++ flang/test/Lower/OpenMP/sections.f90 | 220 ++++++------------ 2 files changed, 138 insertions(+), 149 deletions(-) diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 601cf70815cf0d..b6da69a20a5e59 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -104,3 +104,70 @@ func.func @_QPsb(%arr: !fir.box> {fir.bindc_name = "arr"}) { // CHECK: omp.yield // CHECK: omp.terminator // CHECK: llvm.return + +// ----- + +func.func private @foo() +func.func private @bar() + +func.func @sections_no_data() { + omp.sections { + omp.section { + fir.call @foo() : () -> () + omp.terminator + } + omp.section { + fir.call @bar() : () -> () + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-LABEL: llvm.func @sections_no_data +// CHECK: omp.sections { +// CHECK: omp.section { +// CHECK: llvm.call @foo() : () -> () +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.section { +// CHECK: llvm.call @bar() : () -> () +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.terminator +// CHECK: } + +// ----- + +func.func private @foo(!fir.ref) +func.func private @bar(!fir.ref, !fir.ref) + +func.func @sections_data_without_clauses(%arg0: !fir.ref {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "b"}) { + omp.sections { + omp.section { + fir.call @foo(%arg0) : (!fir.ref) -> () + omp.terminator + } + omp.section { + fir.call @bar(%arg0, %arg1) : (!fir.ref, !fir.ref) -> () + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-LABEL: llvm.func @sections_data_without_clauses +// CHECK-SAME: (%[[ARG0:.+]]: !llvm.ptr {fir.bindc_name = "a"}, %[[ARG1:.+]]: !llvm.ptr {fir.bindc_name = "b"}) +// CHECK: omp.sections { +// CHECK: omp.section { +// CHECK: llvm.call @foo(%arg0) : (!llvm.ptr) -> () +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.section { +// CHECK: llvm.call @bar(%[[ARG0]], %[[ARG1]]) : (!llvm.ptr, !llvm.ptr) -> () +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.terminator +// CHECK: } diff --git a/flang/test/Lower/OpenMP/sections.f90 b/flang/test/Lower/OpenMP/sections.f90 index f18babcd31a05b..a882308479aedb 100644 --- a/flang/test/Lower/OpenMP/sections.f90 +++ b/flang/test/Lower/OpenMP/sections.f90 @@ -1,116 +1,58 @@ ! This test checks the lowering of OpenMP sections construct with several clauses present -! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefix="FIRDialect" -! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | FileCheck %s --check-prefix="LLVMDialect" -! TODO before (%flang_fc1 -emit-fir -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | tco | FileCheck %s --check-prefix="LLVMIR"): -! ensure allocate clause lowering +! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s -!FIRDialect: func @_QQmain() { -!FIRDialect: %[[COUNT:.*]] = fir.address_of(@_QFEcount) : !fir.ref -!FIRDialect: %[[DOUBLE_COUNT:.*]] = fir.address_of(@_QFEdouble_count) : !fir.ref -!FIRDialect: %[[ETA:.*]] = fir.alloca f32 {bindc_name = "eta", uniq_name = "_QFEeta"} -!FIRDialect: %[[CONST_1:.*]] = arith.constant 1 : i32 -!FIRDialect: omp.sections allocate(%[[CONST_1]] : i32 -> %0 : !fir.ref) { -!FIRDialect: omp.section { -!FIRDialect: {{.*}} = arith.constant 5 : i32 -!FIRDialect: fir.store {{.*}} to {{.*}} : !fir.ref -!FIRDialect: {{.*}} = fir.load %[[COUNT]] : !fir.ref -!FIRDialect: {{.*}} = fir.load %[[DOUBLE_COUNT]] : !fir.ref -!FIRDialect: {{.*}} = arith.muli {{.*}}, {{.*}} : i32 -!FIRDialect: {{.*}} = fir.convert {{.*}} : (i32) -> f32 -!FIRDialect: fir.store {{.*}} to %[[ETA]] : !fir.ref -!FIRDialect: omp.terminator -!FIRDialect: } -!FIRDialect: omp.section { -!FIRDialect: {{.*}} = fir.load %[[DOUBLE_COUNT]] : !fir.ref -!FIRDialect: {{.*}} = arith.constant 1 : i32 -!FIRDialect: {{.*}} = arith.addi {{.*}} : i32 -!FIRDialect: fir.store {{.*}} to %[[DOUBLE_COUNT]] : !fir.ref -!FIRDialect: omp.terminator -!FIRDialect: } -!FIRDialect: omp.section { -!FIRDialect: {{.*}} = fir.load %[[ETA]] : !fir.ref -!FIRDialect: {{.*}} = arith.constant 7.000000e+00 : f32 -!FIRDialect: {{.*}} = arith.subf {{.*}} : f32 -!FIRDialect: fir.store {{.*}} to %[[ETA]] : !fir.ref -!FIRDialect: {{.*}} = fir.load %[[COUNT]] : !fir.ref -!FIRDialect: {{.*}} = fir.convert {{.*}} : (i32) -> f32 -!FIRDialect: {{.*}} = fir.load %[[ETA]] : !fir.ref -!FIRDialect: {{.*}} = arith.mulf {{.*}}, {{.*}} : f32 -!FIRDialect: {{.*}} = fir.convert {{.*}} : (f32) -> i32 -!FIRDialect: fir.store {{.*}} to %[[COUNT]] : !fir.ref -!FIRDialect: {{.*}} = fir.load %[[COUNT]] : !fir.ref -!FIRDialect: {{.*}} = fir.convert {{.*}} : (i32) -> f32 -!FIRDialect: {{.*}} = fir.load %[[ETA]] : !fir.ref -!FIRDialect: {{.*}} = arith.subf {{.*}}, {{.*}} : f32 -!FIRDialect: {{.*}} = fir.convert {{.*}} : (f32) -> i32 -!FIRDialect: fir.store {{.*}} to %[[DOUBLE_COUNT]] : !fir.ref -!FIRDialect: omp.terminator -!FIRDialect: } -!FIRDialect: omp.terminator -!FIRDialect: } -!FIRDialect: omp.sections nowait { -!FIRDialect: omp.terminator -!FIRDialect: } -!FIRDialect: return -!FIRDialect: } +!CHECK: func @_QQmain() { +!CHECK: %[[COUNT:.*]] = fir.address_of(@_QFEcount) : !fir.ref +!CHECK: %[[DOUBLE_COUNT:.*]] = fir.address_of(@_QFEdouble_count) : !fir.ref +!CHECK: %[[ETA:.*]] = fir.alloca f32 {bindc_name = "eta", uniq_name = "_QFEeta"} +!CHECK: %[[CONST_1:.*]] = arith.constant 1 : i32 +!CHECK: omp.sections allocate(%[[CONST_1]] : i32 -> %0 : !fir.ref) { +!CHECK: omp.section { +!CHECK: {{.*}} = arith.constant 5 : i32 +!CHECK: fir.store {{.*}} to {{.*}} : !fir.ref +!CHECK: {{.*}} = fir.load %[[COUNT]] : !fir.ref +!CHECK: {{.*}} = fir.load %[[DOUBLE_COUNT]] : !fir.ref +!CHECK: {{.*}} = arith.muli {{.*}}, {{.*}} : i32 +!CHECK: {{.*}} = fir.convert {{.*}} : (i32) -> f32 +!CHECK: fir.store {{.*}} to %[[ETA]] : !fir.ref +!CHECK: omp.terminator +!CHECK: } +!CHECK: omp.section { +!CHECK: {{.*}} = fir.load %[[DOUBLE_COUNT]] : !fir.ref +!CHECK: {{.*}} = arith.constant 1 : i32 +!CHECK: {{.*}} = arith.addi {{.*}} : i32 +!CHECK: fir.store {{.*}} to %[[DOUBLE_COUNT]] : !fir.ref +!CHECK: omp.terminator +!CHECK: } +!CHECK: omp.section { +!CHECK: {{.*}} = fir.load %[[ETA]] : !fir.ref +!CHECK: {{.*}} = arith.constant 7.000000e+00 : f32 +!CHECK: {{.*}} = arith.subf {{.*}} : f32 +!CHECK: fir.store {{.*}} to %[[ETA]] : !fir.ref +!CHECK: {{.*}} = fir.load %[[COUNT]] : !fir.ref +!CHECK: {{.*}} = fir.convert {{.*}} : (i32) -> f32 +!CHECK: {{.*}} = fir.load %[[ETA]] : !fir.ref +!CHECK: {{.*}} = arith.mulf {{.*}}, {{.*}} : f32 +!CHECK: {{.*}} = fir.convert {{.*}} : (f32) -> i32 +!CHECK: fir.store {{.*}} to %[[COUNT]] : !fir.ref +!CHECK: {{.*}} = fir.load %[[COUNT]] : !fir.ref +!CHECK: {{.*}} = fir.convert {{.*}} : (i32) -> f32 +!CHECK: {{.*}} = fir.load %[[ETA]] : !fir.ref +!CHECK: {{.*}} = arith.subf {{.*}}, {{.*}} : f32 +!CHECK: {{.*}} = fir.convert {{.*}} : (f32) -> i32 +!CHECK: fir.store {{.*}} to %[[DOUBLE_COUNT]] : !fir.ref +!CHECK: omp.terminator +!CHECK: } +!CHECK: omp.terminator +!CHECK: } +!CHECK: omp.sections nowait { +!CHECK: omp.terminator +!CHECK: } +!CHECK: return +!CHECK: } -!LLVMDialect: llvm.func @_QQmain() { -!LLVMDialect: %[[COUNT:.*]] = llvm.mlir.addressof @_QFEcount : !llvm.ptr -!LLVMDialect: {{.*}} = builtin.unrealized_conversion_cast %[[COUNT]] : !llvm.ptr to !fir.ref -!LLVMDialect: %[[DOUBLE_COUNT:.*]] = llvm.mlir.addressof @_QFEdouble_count : !llvm.ptr -!LLVMDialect: %[[ALLOCATOR:.*]] = llvm.mlir.constant(1 : i64) : i64 -!LLVMDialect: %[[ETA:.*]] = llvm.alloca %[[ALLOCATOR]] x f32 {bindc_name = "eta", in_type = f32, operand_segment_sizes = dense<0> : vector<2xi32>, uniq_name = "_QFEeta"} : (i64) -> !llvm.ptr -!LLVMDialect: %[[CONSTANT:.*]] = llvm.mlir.constant(1 : i32) : i32 -!LLVMDialect: omp.sections allocate(%[[CONSTANT]] : i32 -> %1 : !fir.ref) { -!LLVMDialect: omp.section { -!LLVMDialect: {{.*}} = llvm.mlir.constant(5 : i32) : i32 -!LLVMDialect: llvm.store {{.*}}, %[[COUNT]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.load %[[COUNT]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.load %[[DOUBLE_COUNT]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.mul {{.*}}, {{.*}} : i32 -!LLVMDialect: {{.*}} = llvm.sitofp {{.*}} : i32 to f32 -!LLVMDialect: llvm.store {{.*}}, %[[ETA]] : !llvm.ptr -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: omp.section { -!LLVMDialect: {{.*}} = llvm.load %[[DOUBLE_COUNT]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.mlir.constant(1 : i32) : i32 -!LLVMDialect: {{.*}} = llvm.add {{.*}}, {{.*}} : i32 -!LLVMDialect: llvm.store {{.*}}, %[[DOUBLE_COUNT]] : !llvm.ptr -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: omp.section { -!LLVMDialect: {{.*}} = llvm.load %[[ETA]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.mlir.constant(7.000000e+00 : f32) : f32 -!LLVMDialect: {{.*}} = llvm.fsub {{.*}}, {{.*}} : f32 -!LLVMDialect: llvm.store {{.*}}, %[[ETA]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.load %[[COUNT]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.sitofp {{.*}} : i32 to f32 -!LLVMDialect: {{.*}} = llvm.load %[[ETA]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.fmul {{.*}}, {{.*}} : f32 -!LLVMDialect: {{.*}} = llvm.fptosi {{.*}} : f32 to i32 -!LLVMDialect: llvm.store {{.*}}, %[[COUNT]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.load %[[COUNT]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.sitofp {{.*}} : i32 to f32 -!LLVMDialect: {{.*}} = llvm.load %[[ETA]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.fsub {{.*}}, {{.*}} : f32 -!LLVMDialect: {{.*}} = llvm.fptosi {{.*}} : f32 to i32 -!LLVMDialect: llvm.store {{.*}}, %[[DOUBLE_COUNT]] : !llvm.ptr -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: omp.sections nowait { -!LLVMDialect: omp.section { -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: llvm.return -!LLVMDialect: } - -program sample +program sample use omp_lib integer :: count = 0, double_count = 1 !$omp sections private (eta, double_count) allocate(omp_high_bw_mem_alloc: count) @@ -129,48 +71,28 @@ program sample !$omp end sections nowait end program sample -!FIRDialect: func @_QPfirstprivate(%[[ARG:.*]]: !fir.ref {fir.bindc_name = "alpha"}) { -!FIRDialect: omp.sections { -!FIRDialect: omp.section { -!FIRDialect: omp.terminator -!FIRDialect: } -!FIRDialect: omp.terminator -!FIRDialect: } -!FIRDialect: omp.sections { -!FIRDialect: omp.section { -!FIRDialect: %[[PRIVATE_VAR:.*]] = fir.load %[[ARG]] : !fir.ref -!FIRDialect: %[[CONSTANT:.*]] = arith.constant 5.000000e+00 : f32 -!FIRDialect: %[[PRIVATE_VAR_2:.*]] = arith.mulf %[[PRIVATE_VAR]], %[[CONSTANT]] : f32 -!FIRDialect: fir.store %[[PRIVATE_VAR_2]] to %[[ARG]] : !fir.ref -!FIRDialect: omp.terminator -!FIRDialect: } -!FIRDialect: omp.terminator -!FIRDialect: } -!FIRDialect: return -!FIRDialect: } - -!LLVMDialect: llvm.func @_QPfirstprivate(%[[ARG:.*]]: !llvm.ptr {fir.bindc_name = "alpha"}) { -!LLVMDialect: omp.sections { -!LLVMDialect: omp.section { -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: omp.sections { -!LLVMDialect: omp.section { -!LLVMDialect: {{.*}} = llvm.load %[[ARG]] : !llvm.ptr -!LLVMDialect: {{.*}} = llvm.mlir.constant(5.000000e+00 : f32) : f32 -!LLVMDialect: {{.*}} = llvm.fmul {{.*}}, {{.*}} : f32 -!LLVMDialect: llvm.store {{.*}}, %[[ARG]] : !llvm.ptr -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: omp.terminator -!LLVMDialect: } -!LLVMDialect: llvm.return -!LLVMDialect: } +!CHECK: func @_QPfirstprivate(%[[ARG:.*]]: !fir.ref {fir.bindc_name = "alpha"}) { +!CHECK: omp.sections { +!CHECK: omp.section { +!CHECK: omp.terminator +!CHECK: } +!CHECK: omp.terminator +!CHECK: } +!CHECK: omp.sections { +!CHECK: omp.section { +!CHECK: %[[PRIVATE_VAR:.*]] = fir.load %[[ARG]] : !fir.ref +!CHECK: %[[CONSTANT:.*]] = arith.constant 5.000000e+00 : f32 +!CHECK: %[[PRIVATE_VAR_2:.*]] = arith.mulf %[[PRIVATE_VAR]], %[[CONSTANT]] : f32 +!CHECK: fir.store %[[PRIVATE_VAR_2]] to %[[ARG]] : !fir.ref +!CHECK: omp.terminator +!CHECK: } +!CHECK: omp.terminator +!CHECK: } +!CHECK: return +!CHECK: } subroutine firstprivate(alpha) - real :: alpha + real :: alpha !$omp sections firstprivate(alpha) !$omp end sections From 67136d0e8fb57251dece4be0907414fdbe081f7a Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Wed, 25 May 2022 17:57:27 +0000 Subject: [PATCH 711/908] [clang][dataflow] Remove private-field filtering from `StorageLocation` creation. The API for `AggregateStorageLocation` does not allow for missing fields (it asserts). Therefore, it is incorrect to filter out any fields at location-creation time which may be accessed by the code. Currently, we limit filtering to private, base-calss fields on the assumption that those can never be accessed. However, `friend` declarations can invalidate that assumption, thereby breaking our invariants. This patch removes said field filtering to avoid violating the invariant of "no missing fields" for `AggregateStorageLocation`. Differential Revision: https://reviews.llvm.org/D126420 --- .../FlowSensitive/DataflowEnvironment.cpp | 40 +++----- .../Analysis/FlowSensitive/TransferTest.cpp | 91 ++++++++++++------- 2 files changed, 69 insertions(+), 62 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index fe573bbf9f379a..13fef0d4286cf3 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -148,39 +148,26 @@ static void initGlobalVars(const Stmt &S, Environment &Env) { } } +// FIXME: Does not precisely handle non-virtual diamond inheritance. A single +// field decl will be modeled for all instances of the inherited field. static void -getFieldsFromClassHierarchy(QualType Type, bool IgnorePrivateFields, +getFieldsFromClassHierarchy(QualType Type, llvm::DenseSet &Fields) { if (Type->isIncompleteType() || Type->isDependentType() || !Type->isRecordType()) return; - for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) { - if (IgnorePrivateFields && - (Field->getAccess() == AS_private || - (Field->getAccess() == AS_none && Type->getAsRecordDecl()->isClass()))) - continue; + for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) Fields.insert(Field); - } - if (auto *CXXRecord = Type->getAsCXXRecordDecl()) { - for (const CXXBaseSpecifier &Base : CXXRecord->bases()) { - // Ignore private fields (including default access in C++ classes) in - // base classes, because they are not visible in derived classes. - getFieldsFromClassHierarchy(Base.getType(), /*IgnorePrivateFields=*/true, - Fields); - } - } + if (auto *CXXRecord = Type->getAsCXXRecordDecl()) + for (const CXXBaseSpecifier &Base : CXXRecord->bases()) + getFieldsFromClassHierarchy(Base.getType(), Fields); } -/// Gets the set of all fields accesible from the type. -/// -/// FIXME: Does not precisely handle non-virtual diamond inheritance. A single -/// field decl will be modeled for all instances of the inherited field. -static llvm::DenseSet -getAccessibleObjectFields(QualType Type) { +/// Gets the set of all fields in the type. +static llvm::DenseSet getObjectFields(QualType Type) { llvm::DenseSet Fields; - // Don't ignore private fields for the class itself, only its super classes. - getFieldsFromClassHierarchy(Type, /*IgnorePrivateFields=*/false, Fields); + getFieldsFromClassHierarchy(Type, Fields); return Fields; } @@ -324,9 +311,8 @@ StorageLocation &Environment::createStorageLocation(QualType Type) { // FIXME: Explore options to avoid eager initialization of fields as some of // them might not be needed for a particular analysis. llvm::DenseMap FieldLocs; - for (const FieldDecl *Field : getAccessibleObjectFields(Type)) { + for (const FieldDecl *Field : getObjectFields(Type)) FieldLocs.insert({Field, &createStorageLocation(Field->getType())}); - } return takeOwnership( std::make_unique(Type, std::move(FieldLocs))); } @@ -392,7 +378,7 @@ void Environment::setValue(const StorageLocation &Loc, Value &Val) { const QualType Type = AggregateLoc.getType(); assert(Type->isStructureOrClassType()); - for (const FieldDecl *Field : getAccessibleObjectFields(Type)) { + for (const FieldDecl *Field : getObjectFields(Type)) { assert(Field != nullptr); StorageLocation &FieldLoc = AggregateLoc.getChild(*Field); MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field); @@ -508,7 +494,7 @@ Value *Environment::createValueUnlessSelfReferential( // FIXME: Initialize only fields that are accessed in the context that is // being analyzed. llvm::DenseMap FieldValues; - for (const FieldDecl *Field : getAccessibleObjectFields(Type)) { + for (const FieldDecl *Field : getObjectFields(Type)) { assert(Field != nullptr); QualType FieldType = Field->getType(); diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index bed6b975974ccc..dbf59bf695560c 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1154,8 +1154,8 @@ TEST_F(TransferTest, DerivedBaseMemberClass) { // two. // Base-class fields. - EXPECT_THAT(FooVal.getChild(*ADefaultDecl), IsNull()); - EXPECT_THAT(FooVal.getChild(*APrivateDecl), IsNull()); + EXPECT_THAT(FooVal.getChild(*ADefaultDecl), NotNull()); + EXPECT_THAT(FooVal.getChild(*APrivateDecl), NotNull()); EXPECT_THAT(FooVal.getChild(*AProtectedDecl), NotNull()); EXPECT_EQ(Env.getValue(FooLoc.getChild(*APublicDecl)), @@ -1177,6 +1177,40 @@ TEST_F(TransferTest, DerivedBaseMemberClass) { }); } +static void derivedBaseMemberExpectations( + llvm::ArrayRef>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + ASSERT_TRUE(FooDecl->getType()->isRecordType()); + const FieldDecl *BarDecl = nullptr; + for (const clang::CXXBaseSpecifier &Base : + FooDecl->getType()->getAsCXXRecordDecl()->bases()) { + QualType BaseType = Base.getType(); + ASSERT_TRUE(BaseType->isStructureType()); + + for (const FieldDecl *Field : BaseType->getAsRecordDecl()->fields()) { + if (Field->getNameAsString() == "Bar") { + BarDecl = Field; + } else { + FAIL() << "Unexpected field: " << Field->getNameAsString(); + } + } + } + ASSERT_THAT(BarDecl, NotNull()); + + const auto &FooLoc = *cast( + Env.getStorageLocation(*FooDecl, SkipPast::None)); + const auto &FooVal = *cast(Env.getValue(FooLoc)); + EXPECT_THAT(FooVal.getChild(*BarDecl), NotNull()); + EXPECT_EQ(Env.getValue(FooLoc.getChild(*BarDecl)), FooVal.getChild(*BarDecl)); +} + TEST_F(TransferTest, DerivedBaseMemberStructDefault) { std::string Code = R"( struct A { @@ -1190,41 +1224,28 @@ TEST_F(TransferTest, DerivedBaseMemberStructDefault) { // [[p]] } )"; - runDataflow( - Code, [](llvm::ArrayRef< - std::pair>> - Results, - ASTContext &ASTCtx) { - ASSERT_THAT(Results, ElementsAre(Pair("p", _))); - const Environment &Env = Results[0].second.Env; - - const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); - ASSERT_THAT(FooDecl, NotNull()); - - ASSERT_TRUE(FooDecl->getType()->isRecordType()); - const FieldDecl *BarDecl = nullptr; - for (const clang::CXXBaseSpecifier &Base : - FooDecl->getType()->getAsCXXRecordDecl()->bases()) { - QualType BaseType = Base.getType(); - ASSERT_TRUE(BaseType->isStructureType()); + runDataflow(Code, derivedBaseMemberExpectations); +} - for (const FieldDecl *Field : BaseType->getAsRecordDecl()->fields()) { - if (Field->getNameAsString() == "Bar") { - BarDecl = Field; - } else { - FAIL() << "Unexpected field: " << Field->getNameAsString(); - } - } - } - ASSERT_THAT(BarDecl, NotNull()); +TEST_F(TransferTest, DerivedBaseMemberPrivateFriend) { + // Include an access to `Foo.Bar` to verify the analysis doesn't crash on that + // access. + std::string Code = R"( + struct A { + private: + friend void target(); + int Bar; + }; + struct B : public A { + }; - const auto &FooLoc = *cast( - Env.getStorageLocation(*FooDecl, SkipPast::None)); - const auto &FooVal = *cast(Env.getValue(FooLoc)); - EXPECT_THAT(FooVal.getChild(*BarDecl), NotNull()); - EXPECT_EQ(Env.getValue(FooLoc.getChild(*BarDecl)), - FooVal.getChild(*BarDecl)); - }); + void target() { + B Foo; + (void)Foo.Bar; + // [[p]] + } + )"; + runDataflow(Code, derivedBaseMemberExpectations); } TEST_F(TransferTest, ClassMember) { From 3682e22ef404e1314ee1eab9daf6de99dc9ea8ee Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Wed, 25 May 2022 18:51:13 +0000 Subject: [PATCH 712/908] [clang][dataflow] Improve handling of constructor initializers. Currently, we assert that `CXXCtorInitializer`s are field initializers. Replace the assertion with an early return. Otherwise, we crash every time we process a constructor with a non-field (e.g. base class) initializer. Differential Revision: https://reviews.llvm.org/D126419 --- .../TypeErasedDataflowAnalysis.cpp | 8 ++-- .../Analysis/FlowSensitive/TransferTest.cpp | 43 +++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index ee1723dcd8cc85..8a5d5ca386afeb 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -257,6 +257,11 @@ static void transferCFGInitializer(const CFGInitializer &CfgInit, const CXXCtorInitializer *Initializer = CfgInit.getInitializer(); assert(Initializer != nullptr); + const FieldDecl *Member = Initializer->getMember(); + if (Member == nullptr) + // Not a field initializer. + return; + auto *InitStmt = Initializer->getInit(); assert(InitStmt != nullptr); @@ -269,9 +274,6 @@ static void transferCFGInitializer(const CFGInitializer &CfgInit, if (InitStmtVal == nullptr) return; - const FieldDecl *Member = Initializer->getMember(); - assert(Member != nullptr); - if (Member->getType()->isReferenceType()) { auto &MemberLoc = ThisLoc.getChild(*Member); State.Env.setValue(MemberLoc, diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index dbf59bf695560c..efea7797b45fbc 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1296,6 +1296,49 @@ TEST_F(TransferTest, ClassMember) { }); } +TEST_F(TransferTest, BaseClassInitializer) { + using ast_matchers::cxxConstructorDecl; + using ast_matchers::hasName; + using ast_matchers::ofClass; + + std::string Code = R"( + class A { + public: + A(int I) : Bar(I) {} + int Bar; + }; + + class B : public A { + public: + B(int I) : A(I) { + (void)0; + // [[p]] + } + }; + )"; + ASSERT_THAT_ERROR( + test::checkDataflow( + Code, cxxConstructorDecl(ofClass(hasName("B"))), + [](ASTContext &C, Environment &) { + return NoopAnalysis(C, /*ApplyBuiltinTransfer=*/true); + }, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + // Regression test to verify that base-class initializers do not + // trigger an assertion. If we add support for such initializers in + // the future, we can expand this test to check more specific + // properties. + EXPECT_THAT(Results, ElementsAre(Pair("p", _))); + }, + {"-fsyntax-only", "-fno-delayed-template-parsing", + "-std=" + std::string(LangStandard::getLangStandardForKind( + LangStandard::lang_cxx17) + .getName())}), + llvm::Succeeded()); +} + TEST_F(TransferTest, ReferenceMember) { std::string Code = R"( struct A { From 49ad577c0772be38f6691f93c7ac1b5dbb4dc9bf Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Fri, 27 May 2022 16:02:15 +0200 Subject: [PATCH 713/908] [workflow] Don't fail workflow if we already have a PR for an issue When running /cherry-pick several times you will get an error when it tries to create a new PR since there already is one. This checks if we have PR first. Fixes #54862 Reviewed By: tstellar Differential Revision: https://reviews.llvm.org/D123657 --- llvm/utils/git/github-automation.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index 7dbcf02a820b6d..e094fe810f9978 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -191,6 +191,9 @@ def create_branch(self, commits:List[str]) -> bool: self.issue_remove_cherry_pick_failed_label() return True + def check_if_pull_request_exists(self, repo:github.Repository.Repository, head:str) -> bool: + pulls = repo.get_pulls(head=head) + return pulls != None def create_pull_request(self, owner:str, branch:str) -> bool: """ @@ -208,11 +211,15 @@ def create_pull_request(self, owner:str, branch:str) -> bool: release_branch_for_issue = self.release_branch_for_issue if release_branch_for_issue is None: return False + head = f"{owner}:{branch}" + if self.check_if_pull_request_exists(repo, head): + print("PR already exists...") + return True try: - pull = repo.create_pull(title='PR for {}'.format(issue_ref), + pull = repo.create_pull(title=f"PR for {issue_ref}", body='resolves {}'.format(issue_ref), base=release_branch_for_issue, - head='{}:{}'.format(owner, branch), + head=head, maintainer_can_modify=False) except Exception as e: self.issue_notify_pull_request_failure(branch) From e651ed8621c3719937517ddb0b0815b18ec888e4 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 27 May 2022 16:05:33 +0200 Subject: [PATCH 714/908] [analyzer] Fix wrong annotation of LazyCompoundVal::getStore The build bot https://lab.llvm.org/buildbot#builders/5/builds/24183 uncovered a wrong nonnull annotation intoduced by D126198. --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index ec7f05333f7b2f..7c5f23b6d466a4 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -421,7 +421,7 @@ class LazyCompoundVal : public NonLoc { return static_cast(Data); } - LLVM_ATTRIBUTE_RETURNS_NONNULL + /// It might return null. const void *getStore() const; LLVM_ATTRIBUTE_RETURNS_NONNULL From b479ea4b0afc537cd4608e95e9cafa15b803ed16 Mon Sep 17 00:00:00 2001 From: Daniele Vettorel Date: Fri, 27 May 2022 10:17:29 -0400 Subject: [PATCH 715/908] Add llvm-debuginfod-find tool to Bazel build Add missing `llvm-debuginfod-find` tool to the Bazel build. Patch by: vettoreldaniele. Reviewed By: GMNGeoffrey Differential Revision: https://reviews.llvm.org/D126489 --- .../bazel/llvm-project-overlay/llvm/BUILD.bazel | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index a6f25064715d5c..570df56dda1a27 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -2877,6 +2877,23 @@ cc_binary( ], ) +cc_binary( + name = "llvm-debuginfod-find", + srcs = glob([ + "tools/llvm-debuginfod-find/*.cpp", + "tools/llvm-debuginfod-find/*.h", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Symbolize", + ":BitReader", + ":Core", + ":Support", + ":Debuginfod", + ], +) + cc_binary( name = "llvm-dis", srcs = glob([ From af430944b3ba8ca55c4fd6b73f53c198c469ffee Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Thu, 26 May 2022 09:24:12 -0500 Subject: [PATCH 716/908] [PowerPC][AIX] Allow VSX patterns to be 32-bit and 64-bit safe on P8+. This patch updates two patterns involving `scalar_to_vector` and `SCALAR_TO_VECTOR_PERMUTED` nodes to be safe for both 64-bit and 32-bit by pulling the patterns out of the 64-bit specific guard. These patterns are matched on POWER8 and above. Differential Revision: https://reviews.llvm.org/D125389 --- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 17 +-- .../PowerPC/aix_scalar_vector_permuted.ll | 29 ++--- .../PowerPC/canonical-merge-shuffles.ll | 3 +- .../CodeGen/PowerPC/float-vector-gather.ll | 18 ++- llvm/test/CodeGen/PowerPC/load-and-splat.ll | 3 +- .../CodeGen/PowerPC/load-v4i8-improved.ll | 3 +- llvm/test/CodeGen/PowerPC/pre-inc-disable.ll | 5 +- .../CodeGen/PowerPC/reduce_scalarization.ll | 57 ++++----- .../CodeGen/PowerPC/scalar_vector_test_4.ll | 113 ++++-------------- llvm/test/CodeGen/PowerPC/vec_insert_elt.ll | 25 ++-- 10 files changed, 93 insertions(+), 180 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 1e87af516e52b5..6e562498dcf907 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3367,6 +3367,15 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)), def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)), (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>; + +defm : ScalToVecWPermute< + v4i32, (i32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; +defm : ScalToVecWPermute< + v4f32, (f32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; } // HasVSX, HasP8Vector, IsBigEndian // Big endian Power8 64Bit VSX subtarget. @@ -3381,14 +3390,6 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 ForceXForm:$src)))), (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>; def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))), (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>; -defm : ScalToVecWPermute< - v4i32, (i32 (load ForceXForm:$src)), - (XXSLDWIs (LIWZX ForceXForm:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; -defm : ScalToVecWPermute< - v4f32, (f32 (load ForceXForm:$src)), - (XXSLDWIs (LIWZX ForceXForm:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; def : Pat @testSplat4hi(<8 x i8>* nocapture readonly %ptr) loca ; ; P8-AIX-32-LABEL: testSplat4hi: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lwz r3, 0(r3) -; P8-AIX-32-NEXT: mtfprwz f0, r3 +; P8-AIX-32-NEXT: lfiwzx f0, 0, r3 ; P8-AIX-32-NEXT: xxspltw v2, vs0, 1 ; P8-AIX-32-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll index 0b3e75e1c370d7..7d97598a66b86f 100644 --- a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll +++ b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll @@ -43,17 +43,13 @@ float* nocapture readonly %d) { ; CHECK-BE-AIX-32-LABEL: vector_gatherf: ; CHECK-BE-AIX-32-LABEL: # %bb.0: # %entry -; CHECK-BE-AIX-32-DAG: lfs f[[REG0:[0-9]+]] -; CHECK-BE-AIX-32-DAG: lfs f[[REG1:[0-9]+]] -; CHECK-BE-AIX-32-DAG: lfs f[[REG2:[0-9]+]] -; CHECK-BE-AIX-32-DAG: lfs f[[REG3:[0-9]+]] -; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG0]] -; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG1:[0-9]+]], f[[REG1]] -; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG2:[0-9]+]], f[[REG2]] -; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG3]] -; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG1]], v[[VREG0]], v[[VREG1]] -; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG0]], v[[VREG2]], v[[VREG0]] -; CHECK-BE-AIX-32-NEXT: xxmrghd v[[VREG1]], v[[VREG0]], v[[VREG1]] +; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG0:[0-9]+]] +; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG1:[0-9]+]] +; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG2:[0-9]+]] +; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG3:[0-9]+]] +; CHECK-BE-AIX-32-DAG: vmrgow v[[REG0]], v[[REG1]], v[[REG0]] +; CHECK-BE-AIX-32-DAG: vmrgow v[[REG3]], v[[REG2]], v[[REG3]] +; CHECK-BE-AIX-32-NEXT: xxmrghd v[[REG0]], v[[REG3]], v[[REG0]] ; CHECK-BE-AIX-32-NEXT: blr entry: %0 = load float, float* %a, align 4 diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll index 469a56dfac1a8c..699f5a8c60b7b1 100644 --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -560,8 +560,7 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) { ; ; P8-AIX32-LABEL: unadjusted_lxvwsx: ; P8-AIX32: # %bb.0: # %entry -; P8-AIX32-NEXT: lwz r3, 0(r3) -; P8-AIX32-NEXT: mtfprwz f0, r3 +; P8-AIX32-NEXT: lfiwzx f0, 0, r3 ; P8-AIX32-NEXT: xxspltw v2, vs0, 1 ; P8-AIX32-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll index 81602637922460..d332f548d33ba3 100644 --- a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll +++ b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll @@ -28,8 +28,7 @@ define <16 x i8> @test(i32* %s, i32* %t) { ; ; CHECK-AIX-32-LABEL: test: ; CHECK-AIX-32: # %bb.0: # %entry -; CHECK-AIX-32-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-NEXT: mtfprwz f0, r3 +; CHECK-AIX-32-NEXT: lfiwzx f0, 0, r3 ; CHECK-AIX-32-NEXT: xxspltw v2, vs0, 1 ; CHECK-AIX-32-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll index f3959c3c8ec900..d170dc7e41537d 100644 --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -66,9 +66,8 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) { ; P9BE: lxsiwzx [[REG:[0-9]+]] ; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] ; P9BE-32-LABEL: test32: -; P9BE-32: lwzx [[REG1:[0-9]+]] -; P9BE-32: mtvsrwz [[REG2:[0-9]+]], [[REG1]] -; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG2]] +; P9BE-32: lxsiwzx [[REG:[0-9]+]] +; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] entry: %idx.ext63 = sext i32 %i_pix2 to i64 %add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63 diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll index d7883b540d892e..5034778592a5c2 100644 --- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll +++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll @@ -68,18 +68,15 @@ define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x fl ; ; AIX-32-LABEL: test2: ; AIX-32: # %bb.0: # %entry -; AIX-32-NEXT: lfs f0, 4(r3) -; AIX-32-NEXT: lfs f1, 0(r3) ; AIX-32-NEXT: lwz r5, L..C0(r2) # %const.0 -; AIX-32-NEXT: lfs f2, 4(r4) -; AIX-32-NEXT: xscvdpspn v2, f0 -; AIX-32-NEXT: lfs f0, 0(r4) -; AIX-32-NEXT: lxvw4x v0, 0, r5 -; AIX-32-NEXT: xscvdpspn v3, f1 -; AIX-32-NEXT: xscvdpspn v4, f2 -; AIX-32-NEXT: xscvdpspn v5, f0 -; AIX-32-NEXT: vperm v2, v3, v2, v0 -; AIX-32-NEXT: vperm v3, v5, v4, v0 +; AIX-32-NEXT: li r6, 4 +; AIX-32-NEXT: lxsiwzx v3, 0, r3 +; AIX-32-NEXT: lxsiwzx v0, 0, r4 +; AIX-32-NEXT: lxsiwzx v2, r3, r6 +; AIX-32-NEXT: lxsiwzx v5, r4, r6 +; AIX-32-NEXT: lxvw4x v4, 0, r5 +; AIX-32-NEXT: vperm v2, v3, v2, v4 +; AIX-32-NEXT: vperm v3, v0, v5, v4 ; AIX-32-NEXT: xvsubsp vs0, v2, v3 ; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1 ; AIX-32-NEXT: xscvspdpn f0, vs0 @@ -117,18 +114,15 @@ define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x fl ; ; AIX-32-LABEL: test3: ; AIX-32: # %bb.0: # %entry -; AIX-32-NEXT: lfs f0, 4(r3) -; AIX-32-NEXT: lfs f1, 0(r3) ; AIX-32-NEXT: lwz r5, L..C1(r2) # %const.0 -; AIX-32-NEXT: lfs f2, 4(r4) -; AIX-32-NEXT: xscvdpspn v2, f0 -; AIX-32-NEXT: lfs f0, 0(r4) -; AIX-32-NEXT: lxvw4x v0, 0, r5 -; AIX-32-NEXT: xscvdpspn v3, f1 -; AIX-32-NEXT: xscvdpspn v4, f2 -; AIX-32-NEXT: xscvdpspn v5, f0 -; AIX-32-NEXT: vperm v2, v3, v2, v0 -; AIX-32-NEXT: vperm v3, v5, v4, v0 +; AIX-32-NEXT: li r6, 4 +; AIX-32-NEXT: lxsiwzx v3, 0, r3 +; AIX-32-NEXT: lxsiwzx v0, 0, r4 +; AIX-32-NEXT: lxsiwzx v2, r3, r6 +; AIX-32-NEXT: lxsiwzx v5, r4, r6 +; AIX-32-NEXT: lxvw4x v4, 0, r5 +; AIX-32-NEXT: vperm v2, v3, v2, v4 +; AIX-32-NEXT: vperm v3, v0, v5, v4 ; AIX-32-NEXT: xvaddsp vs0, v2, v3 ; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1 ; AIX-32-NEXT: xscvspdpn f0, vs0 @@ -166,18 +160,15 @@ define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x fl ; ; AIX-32-LABEL: test4: ; AIX-32: # %bb.0: # %entry -; AIX-32-NEXT: lfs f0, 4(r3) -; AIX-32-NEXT: lfs f1, 0(r3) ; AIX-32-NEXT: lwz r5, L..C2(r2) # %const.0 -; AIX-32-NEXT: lfs f2, 4(r4) -; AIX-32-NEXT: xscvdpspn v2, f0 -; AIX-32-NEXT: lfs f0, 0(r4) -; AIX-32-NEXT: lxvw4x v0, 0, r5 -; AIX-32-NEXT: xscvdpspn v3, f1 -; AIX-32-NEXT: xscvdpspn v4, f2 -; AIX-32-NEXT: xscvdpspn v5, f0 -; AIX-32-NEXT: vperm v2, v3, v2, v0 -; AIX-32-NEXT: vperm v3, v5, v4, v0 +; AIX-32-NEXT: li r6, 4 +; AIX-32-NEXT: lxsiwzx v3, 0, r3 +; AIX-32-NEXT: lxsiwzx v0, 0, r4 +; AIX-32-NEXT: lxsiwzx v2, r3, r6 +; AIX-32-NEXT: lxsiwzx v5, r4, r6 +; AIX-32-NEXT: lxvw4x v4, 0, r5 +; AIX-32-NEXT: vperm v2, v3, v2, v4 +; AIX-32-NEXT: vperm v3, v0, v5, v4 ; AIX-32-NEXT: xvmulsp vs0, v2, v3 ; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1 ; AIX-32-NEXT: xscvspdpn f0, vs0 diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll index e0fd7526faaf92..87e4c7194d966c 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -10,16 +10,16 @@ ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64-ibm-aix-xcoff< %s | FileCheck %s \ -; RUN: --check-prefixes=P9-AIX,P9-AIX-64 +; RUN: --check-prefixes=AIX,P9-AIX,P9-AIX-64 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ -; RUN: --check-prefixes=P9-AIX,P9-AIX-32 +; RUN: --check-prefixes=AIX,P9-AIX,P9-AIX-32 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ -; RUN: --check-prefixes=P8-AIX-64 +; RUN: --check-prefixes=AIX,P8-AIX-64 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ -; RUN: --check-prefixes=P8-AIX-32 +; RUN: --check-prefixes=AIX,P8-AIX-32 ; Function Attrs: norecurse nounwind readonly define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { @@ -422,9 +422,8 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec ; ; P8-AIX-32-LABEL: s2v_test_f1: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 0(r3) ; P8-AIX-32-NEXT: lwz r4, L..C5(r2) # %const.0 -; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: lxsiwzx v3, 0, r3 ; P8-AIX-32-NEXT: lxvw4x v4, 0, r4 ; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX-32-NEXT: blr @@ -466,33 +465,12 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr ; -; P9-AIX-64-LABEL: s2v_test_f2: -; P9-AIX-64: # %bb.0: # %entry -; P9-AIX-64-NEXT: addi r3, r3, 4 -; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P9-AIX-64-NEXT: vmrgow v2, v3, v2 -; P9-AIX-64-NEXT: blr -; -; P9-AIX-32-LABEL: s2v_test_f2: -; P9-AIX-32: # %bb.0: # %entry -; P9-AIX-32-NEXT: lfs f0, 4(r3) -; P9-AIX-32-NEXT: xscvdpspn v3, f0 -; P9-AIX-32-NEXT: vmrgow v2, v3, v2 -; P9-AIX-32-NEXT: blr -; -; P8-AIX-64-LABEL: s2v_test_f2: -; P8-AIX-64: # %bb.0: # %entry -; P8-AIX-64-NEXT: addi r3, r3, 4 -; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P8-AIX-64-NEXT: vmrgow v2, v3, v2 -; P8-AIX-64-NEXT: blr -; -; P8-AIX-32-LABEL: s2v_test_f2: -; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 4(r3) -; P8-AIX-32-NEXT: xscvdpspn v3, f0 -; P8-AIX-32-NEXT: vmrgow v2, v3, v2 -; P8-AIX-32-NEXT: blr +; AIX-LABEL: s2v_test_f2: +; AIX: # %bb.0: # %entry +; AIX-NEXT: addi r3, r3, 4 +; AIX-NEXT: lxsiwzx v3, 0, r3 +; AIX-NEXT: vmrgow v2, v3, v2 +; AIX-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 %0 = load float, float* %arrayidx, align 8 @@ -542,8 +520,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec ; P9-AIX-32-LABEL: s2v_test_f3: ; P9-AIX-32: # %bb.0: # %entry ; P9-AIX-32-NEXT: slwi r4, r4, 2 -; P9-AIX-32-NEXT: lfsx f0, r3, r4 -; P9-AIX-32-NEXT: xscvdpspn v3, f0 +; P9-AIX-32-NEXT: lxsiwzx v3, r3, r4 ; P9-AIX-32-NEXT: vmrgow v2, v3, v2 ; P9-AIX-32-NEXT: blr ; @@ -557,8 +534,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec ; P8-AIX-32-LABEL: s2v_test_f3: ; P8-AIX-32: # %bb.0: # %entry ; P8-AIX-32-NEXT: slwi r4, r4, 2 -; P8-AIX-32-NEXT: lfsx f0, r3, r4 -; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: lxsiwzx v3, r3, r4 ; P8-AIX-32-NEXT: vmrgow v2, v3, v2 ; P8-AIX-32-NEXT: blr entry: @@ -601,33 +577,12 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr ; -; P9-AIX-64-LABEL: s2v_test_f4: -; P9-AIX-64: # %bb.0: # %entry -; P9-AIX-64-NEXT: addi r3, r3, 4 -; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P9-AIX-64-NEXT: vmrgow v2, v3, v2 -; P9-AIX-64-NEXT: blr -; -; P9-AIX-32-LABEL: s2v_test_f4: -; P9-AIX-32: # %bb.0: # %entry -; P9-AIX-32-NEXT: lfs f0, 4(r3) -; P9-AIX-32-NEXT: xscvdpspn v3, f0 -; P9-AIX-32-NEXT: vmrgow v2, v3, v2 -; P9-AIX-32-NEXT: blr -; -; P8-AIX-64-LABEL: s2v_test_f4: -; P8-AIX-64: # %bb.0: # %entry -; P8-AIX-64-NEXT: addi r3, r3, 4 -; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P8-AIX-64-NEXT: vmrgow v2, v3, v2 -; P8-AIX-64-NEXT: blr -; -; P8-AIX-32-LABEL: s2v_test_f4: -; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 4(r3) -; P8-AIX-32-NEXT: xscvdpspn v3, f0 -; P8-AIX-32-NEXT: vmrgow v2, v3, v2 -; P8-AIX-32-NEXT: blr +; AIX-LABEL: s2v_test_f4: +; AIX: # %bb.0: # %entry +; AIX-NEXT: addi r3, r3, 4 +; AIX-NEXT: lxsiwzx v3, 0, r3 +; AIX-NEXT: vmrgow v2, v3, v2 +; AIX-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 %0 = load float, float* %arrayidx, align 8 @@ -663,31 +618,11 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr ; -; P9-AIX-64-LABEL: s2v_test_f5: -; P9-AIX-64: # %bb.0: # %entry -; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P9-AIX-64-NEXT: vmrgow v2, v3, v2 -; P9-AIX-64-NEXT: blr -; -; P9-AIX-32-LABEL: s2v_test_f5: -; P9-AIX-32: # %bb.0: # %entry -; P9-AIX-32-NEXT: lfs f0, 0(r3) -; P9-AIX-32-NEXT: xscvdpspn v3, f0 -; P9-AIX-32-NEXT: vmrgow v2, v3, v2 -; P9-AIX-32-NEXT: blr -; -; P8-AIX-64-LABEL: s2v_test_f5: -; P8-AIX-64: # %bb.0: # %entry -; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P8-AIX-64-NEXT: vmrgow v2, v3, v2 -; P8-AIX-64-NEXT: blr -; -; P8-AIX-32-LABEL: s2v_test_f5: -; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 0(r3) -; P8-AIX-32-NEXT: xscvdpspn v3, f0 -; P8-AIX-32-NEXT: vmrgow v2, v3, v2 -; P8-AIX-32-NEXT: blr +; AIX-LABEL: s2v_test_f5: +; AIX: # %bb.0: # %entry +; AIX-NEXT: lxsiwzx v3, 0, r3 +; AIX-NEXT: vmrgow v2, v3, v2 +; AIX-NEXT: blr entry: %0 = load float, float* %ptr1, align 8 %vecins = insertelement <2 x float> %vec, float %0, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll index 225367f5a886dd..dd873aad5ec824 100644 --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -659,15 +659,14 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) { ; ; AIX-P8-32-LABEL: testFloatImm2: ; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: lfs f0, 0(r3) ; AIX-P8-32-NEXT: lwz r4, L..C8(r2) # %const.0 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lxsiwzx v3, 0, r3 +; AIX-P8-32-NEXT: li r5, 4 ; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 -; AIX-P8-32-NEXT: lfs f0, 4(r3) -; AIX-P8-32-NEXT: lwz r3, L..C9(r2) # %const.1 +; AIX-P8-32-NEXT: lwz r4, L..C9(r2) # %const.1 ; AIX-P8-32-NEXT: vperm v2, v3, v2, v4 -; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lxsiwzx v3, r3, r5 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 ; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 ; AIX-P8-32-NEXT: blr entry: @@ -732,17 +731,15 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) { ; ; AIX-P8-32-LABEL: testFloatImm3: ; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: lis r4, 4 -; AIX-P8-32-NEXT: lfsx f0, r3, r4 ; AIX-P8-32-NEXT: lwz r4, L..C10(r2) # %const.0 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lis r5, 4 +; AIX-P8-32-NEXT: lxsiwzx v3, r3, r5 ; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 -; AIX-P8-32-NEXT: lfs f0, 0(r3) -; AIX-P8-32-NEXT: lwz r3, L..C11(r2) # %const.1 +; AIX-P8-32-NEXT: lwz r4, L..C11(r2) # %const.1 ; AIX-P8-32-NEXT: vperm v2, v3, v2, v4 -; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 -; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-32-NEXT: lxvw4x v3, 0, r4 +; AIX-P8-32-NEXT: lxsiwzx v4, 0, r3 +; AIX-P8-32-NEXT: vperm v2, v2, v4, v3 ; AIX-P8-32-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 From 684c080108766b4f112f172fed4a49059484614d Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Fri, 27 May 2022 15:49:13 +0000 Subject: [PATCH 717/908] [Clang] Extend -gen-reproducer flag -gen-reproducer causes crash reproduction to be emitted even when clang didn't crash, and now can optionally take an argument of never, on-crash (default), on-error and always. Differential revision: https://reviews.llvm.org/D120201 --- clang/include/clang/Driver/Driver.h | 31 ++++++++++++--- clang/include/clang/Driver/Options.td | 4 ++ clang/lib/Driver/Driver.cpp | 6 +-- clang/test/Driver/emit-reproducer.c | 39 +++++++++++++++++++ clang/tools/driver/driver.cpp | 55 +++++++++++++++++---------- 5 files changed, 104 insertions(+), 31 deletions(-) create mode 100644 clang/test/Driver/emit-reproducer.c diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index f0f294a669d98a..86eea9375f6737 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -276,11 +276,6 @@ class Driver { unsigned ProbePrecompiled : 1; public: - /// Force clang to emit reproducer for driver invocation. This is enabled - /// indirectly by setting FORCE_CLANG_DIAGNOSTICS_CRASH environment variable - /// or when using the -gen-reproducer driver flag. - unsigned GenReproducer : 1; - // getFinalPhase - Determine which compilation mode we are in and record // which option we used to determine the final phase. // TODO: Much of what getFinalPhase returns are not actually true compiler @@ -505,6 +500,32 @@ class Driver { StringRef AdditionalInformation = "", CompilationDiagnosticReport *GeneratedReport = nullptr); + enum class CommandStatus { + Crash = 1, + Error, + Ok, + }; + + enum class ReproLevel { + Off = 0, + OnCrash = static_cast(CommandStatus::Crash), + OnError = static_cast(CommandStatus::Error), + Always = static_cast(CommandStatus::Ok), + }; + + bool maybeGenerateCompilationDiagnostics( + CommandStatus CS, ReproLevel Level, Compilation &C, + const Command &FailingCommand, StringRef AdditionalInformation = "", + CompilationDiagnosticReport *GeneratedReport = nullptr) { + if (static_cast(CS) > static_cast(Level)) + return false; + // Hack to ensure that diagnostic notes get emitted. + Diags.setLastDiagnosticIgnored(false); + generateCompilationDiagnostics(C, FailingCommand, AdditionalInformation, + GeneratedReport); + return true; + } + /// @} /// @name Helper Methods /// @{ diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e19baac5860aab..2a5e8bf27e3531 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -558,7 +558,10 @@ def arcmt_migrate_report_output : Separate<["-"], "arcmt-migrate-report-output"> def arcmt_migrate_emit_arc_errors : Flag<["-"], "arcmt-migrate-emit-errors">, HelpText<"Emit ARC errors even if the migrator can fix them">, Flags<[CC1Option]>, MarshallingInfoFlag>; +def gen_reproducer_eq: Joined<["-"], "gen-reproducer=">, Flags<[NoArgumentUnused, CoreOption]>, + HelpText<"Emit reproducer on (option: off, crash (default), error, always)">; def gen_reproducer: Flag<["-"], "gen-reproducer">, InternalDebugOpt, + Alias, AliasArgs<["always"]>, HelpText<"Auto-generates preprocessed source files and a reproduction script">; def gen_cdb_fragment_path: Separate<["-"], "gen-cdb-fragment-path">, InternalDebugOpt, HelpText<"Emit a compilation database fragment to the specified directory">; @@ -1397,6 +1400,7 @@ def fexperimental_new_constant_interpreter : Flag<["-"], "fexperimental-new-cons def fconstexpr_backtrace_limit_EQ : Joined<["-"], "fconstexpr-backtrace-limit=">, Group; def fno_crash_diagnostics : Flag<["-"], "fno-crash-diagnostics">, Group, Flags<[NoArgumentUnused, CoreOption]>, + Alias, AliasArgs<["off"]>, HelpText<"Disable auto-generation of preprocessed source files and a script for reproduction during a clang crash">; def fcrash_diagnostics_dir : Joined<["-"], "fcrash-diagnostics-dir=">, Group, Flags<[NoArgumentUnused, CoreOption]>, diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 94925568aec238..aaef2e1ded327c 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -198,8 +198,7 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, CCPrintOptions(false), CCPrintHeaders(false), CCLogDiagnostics(false), CCGenDiagnostics(false), CCPrintProcessStats(false), TargetTriple(TargetTriple), Saver(Alloc), CheckInputsExist(true), - ProbePrecompiled(true), GenReproducer(false), - SuppressMissingInputWarning(false) { + ProbePrecompiled(true), SuppressMissingInputWarning(false) { // Provide a sane fallback if no VFS is specified. if (!this->VFS) this->VFS = llvm::vfs::getRealFileSystem(); @@ -1217,9 +1216,6 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) { CCCPrintBindings = Args.hasArg(options::OPT_ccc_print_bindings); if (const Arg *A = Args.getLastArg(options::OPT_ccc_gcc_name)) CCCGenericGCCName = A->getValue(); - GenReproducer = Args.hasFlag(options::OPT_gen_reproducer, - options::OPT_fno_crash_diagnostics, - !!::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")); // Process -fproc-stat-report options. if (const Arg *A = Args.getLastArg(options::OPT_fproc_stat_report_EQ)) { diff --git a/clang/test/Driver/emit-reproducer.c b/clang/test/Driver/emit-reproducer.c new file mode 100644 index 00000000000000..f50171b7d9a9bf --- /dev/null +++ b/clang/test/Driver/emit-reproducer.c @@ -0,0 +1,39 @@ +// RUN: rm -rf %t && mkdir %t + +// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer=off 2>&1 | FileCheck %s --check-prefix=NOT +// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -fno-crash-diagnostics 2>&1 | FileCheck %s --check-prefix=NOT +// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t 2>&1 | FileCheck %s +// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer=crash 2>&1 | FileCheck %s +// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer=error 2>&1 | FileCheck %s +// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer=always 2>&1 | FileCheck %s +// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer 2>&1 | FileCheck %s + +// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer=off 2>&1 | FileCheck %s --check-prefix=NOT +// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -fno-crash-diagnostics 2>&1 | FileCheck %s --check-prefix=NOT +// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t 2>&1 | FileCheck %s --check-prefix=NOT +// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer=crash 2>&1 | FileCheck %s --check-prefix=NOT +// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer=error 2>&1 | FileCheck %s +// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer=always 2>&1 | FileCheck %s +// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer 2>&1 | FileCheck %s + +// RUN: %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer=off 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty +// RUN: %clang %s -fcrash-diagnostics-dir=%t -fno-crash-diagnostics 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty +// RUN: %clang %s -fcrash-diagnostics-dir=%t 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty +// RUN: %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer=crash 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty +// RUN: %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer=error 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty +// RUN: not %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer=always 2>&1 | FileCheck %s +// RUN: not %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer 2>&1 | FileCheck %s + +// RUN: not %clang $s -gen-reproducer=badvalue 2>&1 | FileCheck %s --check-prefix=BAD-VALUE +// BAD-VALUE: Unknown value for -gen-reproducer=: 'badvalue' + +// CHECK: note: diagnostic msg: {{.*}}emit-reproducer-{{.*}}.c +// NOT-NOT: note: diagnostic msg: {{.*}}emit-reproducer-{{.*}}.c + +#ifdef FATAL +#pragma clang __debug crash +#elif ERROR +int main +#else +int main() {} +#endif diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index a7bfb07e002bf9..c9aee57d92cf49 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -482,32 +482,37 @@ int main(int Argc, const char **Argv) { } std::unique_ptr C(TheDriver.BuildCompilation(Args)); + + Driver::ReproLevel ReproLevel = Driver::ReproLevel::OnCrash; + if (Arg *A = C->getArgs().getLastArg(options::OPT_gen_reproducer_eq)) { + auto Level = llvm::StringSwitch>(A->getValue()) + .Case("off", Driver::ReproLevel::Off) + .Case("crash", Driver::ReproLevel::OnCrash) + .Case("error", Driver::ReproLevel::OnError) + .Case("always", Driver::ReproLevel::Always) + .Default(None); + if (!Level) { + llvm::errs() << "Unknown value for " << A->getSpelling() << ": '" + << A->getValue() << "'\n"; + return 1; + } + ReproLevel = *Level; + } + if (!!::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) + ReproLevel = Driver::ReproLevel::Always; + int Res = 1; bool IsCrash = false; + Driver::CommandStatus CommandStatus = Driver::CommandStatus::Ok; + // Pretend the first command failed if ReproStatus is Always. + const Command *FailingCommand = &*C->getJobs().begin(); if (C && !C->containsError()) { SmallVector, 4> FailingCommands; Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) - << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - - // Print the bug report message that would be printed if we did actually - // crash, but only if we're crashing due to FORCE_CLANG_DIAGNOSTICS_CRASH. - if (::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) - llvm::dbgs() << llvm::getBugReportMsg(); - } - for (const auto &P : FailingCommands) { int CommandRes = P.first; - const Command *FailingCommand = P.second; + FailingCommand = P.second; if (!Res) Res = CommandRes; @@ -526,13 +531,21 @@ int main(int Argc, const char **Argv) { // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html IsCrash |= CommandRes > 128; #endif - if (IsCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); + CommandStatus = + IsCrash ? Driver::CommandStatus::Crash : Driver::CommandStatus::Error; + if (IsCrash) break; - } } } + // Print the bug report message that would be printed if we did actually + // crash, but only if we're crashing due to FORCE_CLANG_DIAGNOSTICS_CRASH. + if (::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) + llvm::dbgs() << llvm::getBugReportMsg(); + if (TheDriver.maybeGenerateCompilationDiagnostics(CommandStatus, ReproLevel, + *C, *FailingCommand)) + Res = 1; + Diags.getClient()->finish(); if (!UseNewCC1Process && IsCrash) { From e917801eddbe1b32f1adc81391fd434557391b5e Mon Sep 17 00:00:00 2001 From: Egor Zhdan Date: Tue, 24 May 2022 12:00:50 +0100 Subject: [PATCH 718/908] [Clang][Driver] Fix include paths for `--sysroot /` on Linux Currently if `--sysroot /` is passed to the Clang driver, the include paths generated by the Clang driver will start with a double slash: `//usr/include/...`. If VFS is used to inject files into the include paths (for example, the Swift compiler does this), VFS will get confused and the injected files won't be visible. This change makes sure that the include paths start with a single slash. Fixes #28283. Differential Revision: https://reviews.llvm.org/D126289 --- clang/include/clang/Driver/ToolChain.h | 3 ++ clang/lib/Driver/ToolChain.cpp | 8 +++ clang/lib/Driver/ToolChains/Gnu.cpp | 22 ++++----- clang/lib/Driver/ToolChains/Linux.cpp | 49 ++++++++++--------- .../basic_linux_libstdcxx_tree/usr/bin/.keep | 0 .../x86_64-unknown-linux-gnu/4.8/crtbegin.o | 0 clang/test/Driver/linux-header-search.cpp | 31 +++++++++++- 7 files changed, 77 insertions(+), 36 deletions(-) create mode 100644 clang/test/Driver/Inputs/basic_linux_libstdcxx_tree/usr/bin/.keep create mode 100644 clang/test/Driver/Inputs/basic_linux_libstdcxx_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/4.8/crtbegin.o diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 0bc345a970dfb4..f20ab164531b82 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -215,6 +215,9 @@ class ToolChain { static void addSystemIncludes(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, ArrayRef Paths); + + static std::string concat(StringRef Path, const Twine &A, const Twine &B = "", + const Twine &C = "", const Twine &D = ""); ///@} public: diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index eb3117e689d8cc..5130eb9b72c131 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -942,6 +942,14 @@ void ToolChain::addExternCSystemIncludeIfExists(const ArgList &DriverArgs, } } +/*static*/ std::string ToolChain::concat(StringRef Path, const Twine &A, + const Twine &B, const Twine &C, + const Twine &D) { + SmallString<128> Result(Path); + llvm::sys::path::append(Result, llvm::sys::path::Style::posix, A, B, C, D); + return std::string(Result); +} + std::string ToolChain::detectLibcxxVersion(StringRef IncludePath) const { std::error_code EC; int MaxVersion = 0; diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 38280a2e36526d..b3d414f2dc78f7 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2038,7 +2038,7 @@ void Generic_GCC::GCCInstallationDetector::init( if (!VFS.exists(Prefix)) continue; for (StringRef Suffix : CandidateLibDirs) { - const std::string LibDir = Prefix + Suffix.str(); + const std::string LibDir = concat(Prefix, Suffix); if (!VFS.exists(LibDir)) continue; // Maybe filter out /gcc and /gcc-cross. @@ -2104,7 +2104,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( // so we need to find those /usr/gcc/*/lib/gcc libdirs and go with // /usr/gcc/ as a prefix. - std::string PrefixDir = SysRoot.str() + "/usr/gcc"; + std::string PrefixDir = concat(SysRoot, "/usr/gcc"); std::error_code EC; for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(PrefixDir, EC), LE; @@ -2144,7 +2144,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( } // Fall back to /usr which is used by most non-Solaris systems. - Prefixes.push_back(SysRoot.str() + "/usr"); + Prefixes.push_back(concat(SysRoot, "/usr")); } /*static*/ void Generic_GCC::GCCInstallationDetector::CollectLibDirsAndTriples( @@ -2667,7 +2667,7 @@ bool Generic_GCC::GCCInstallationDetector::ScanGentooConfigs( const llvm::Triple &TargetTriple, const ArgList &Args, const SmallVectorImpl &CandidateTriples, const SmallVectorImpl &CandidateBiarchTriples) { - if (!D.getVFS().exists(D.SysRoot + GentooConfigDir)) + if (!D.getVFS().exists(concat(D.SysRoot, GentooConfigDir))) return false; for (StringRef CandidateTriple : CandidateTriples) { @@ -2686,8 +2686,8 @@ bool Generic_GCC::GCCInstallationDetector::ScanGentooGccConfig( const llvm::Triple &TargetTriple, const ArgList &Args, StringRef CandidateTriple, bool NeedsBiarchSuffix) { llvm::ErrorOr> File = - D.getVFS().getBufferForFile(D.SysRoot + GentooConfigDir + "/config-" + - CandidateTriple.str()); + D.getVFS().getBufferForFile(concat(D.SysRoot, GentooConfigDir, + "/config-" + CandidateTriple.str())); if (File) { SmallVector Lines; File.get()->getBuffer().split(Lines, "\n"); @@ -2698,8 +2698,8 @@ bool Generic_GCC::GCCInstallationDetector::ScanGentooGccConfig( continue; // Process the config file pointed to by CURRENT. llvm::ErrorOr> ConfigFile = - D.getVFS().getBufferForFile(D.SysRoot + GentooConfigDir + "/" + - Line.str()); + D.getVFS().getBufferForFile( + concat(D.SysRoot, GentooConfigDir, "/" + Line)); std::pair ActiveVersion = Line.rsplit('-'); // List of paths to scan for libraries. SmallVector GentooScanPaths; @@ -2732,7 +2732,7 @@ bool Generic_GCC::GCCInstallationDetector::ScanGentooGccConfig( // Scan all paths for GCC libraries. for (const auto &GentooScanPath : GentooScanPaths) { - std::string GentooPath = D.SysRoot + std::string(GentooScanPath); + std::string GentooPath = concat(D.SysRoot, GentooScanPath); if (D.getVFS().exists(GentooPath + "/crtbegin.o")) { if (!ScanGCCForMultilibs(TargetTriple, Args, GentooPath, NeedsBiarchSuffix)) @@ -3025,9 +3025,9 @@ Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, // If this is a development, non-installed, clang, libcxx will // not be found at ../include/c++ but it likely to be found at // one of the following two locations: - if (AddIncludePath(SysRoot + "/usr/local/include")) + if (AddIncludePath(concat(SysRoot, "/usr/local/include"))) return; - if (AddIncludePath(SysRoot + "/usr/include")) + if (AddIncludePath(concat(SysRoot, "/usr/include"))) return; } diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index a7c1cc0e22c506..43b2bf06e8b43a 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -97,9 +97,9 @@ std::string Linux::getMultiarchTriple(const Driver &D, case llvm::Triple::mips64: { std::string MT = std::string(IsMipsR6 ? "mipsisa64r6" : "mips64") + "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64"); - if (D.getVFS().exists(SysRoot + "/lib/" + MT)) + if (D.getVFS().exists(concat(SysRoot, "/lib", MT))) return MT; - if (D.getVFS().exists(SysRoot + "/lib/mips64-linux-gnu")) + if (D.getVFS().exists(concat(SysRoot, "/lib/mips64-linux-gnu"))) return "mips64-linux-gnu"; break; } @@ -108,14 +108,14 @@ std::string Linux::getMultiarchTriple(const Driver &D, return "mips64el-linux-android"; std::string MT = std::string(IsMipsR6 ? "mipsisa64r6el" : "mips64el") + "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64"); - if (D.getVFS().exists(SysRoot + "/lib/" + MT)) + if (D.getVFS().exists(concat(SysRoot, "/lib", MT))) return MT; - if (D.getVFS().exists(SysRoot + "/lib/mips64el-linux-gnu")) + if (D.getVFS().exists(concat(SysRoot, "/lib/mips64el-linux-gnu"))) return "mips64el-linux-gnu"; break; } case llvm::Triple::ppc: - if (D.getVFS().exists(SysRoot + "/lib/powerpc-linux-gnuspe")) + if (D.getVFS().exists(concat(SysRoot, "/lib/powerpc-linux-gnuspe"))) return "powerpc-linux-gnuspe"; return "powerpc-linux-gnu"; case llvm::Triple::ppcle: @@ -269,13 +269,13 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) // used. We need add both libo32 and /lib. if (Arch == llvm::Triple::mips || Arch == llvm::Triple::mipsel) { Generic_GCC::AddMultilibPaths(D, SysRoot, "libo32", MultiarchTriple, Paths); - addPathIfExists(D, SysRoot + "/libo32", Paths); - addPathIfExists(D, SysRoot + "/usr/libo32", Paths); + addPathIfExists(D, concat(SysRoot, "/libo32"), Paths); + addPathIfExists(D, concat(SysRoot, "/usr/libo32"), Paths); } Generic_GCC::AddMultilibPaths(D, SysRoot, OSLibDir, MultiarchTriple, Paths); - addPathIfExists(D, SysRoot + "/lib/" + MultiarchTriple, Paths); - addPathIfExists(D, SysRoot + "/lib/../" + OSLibDir, Paths); + addPathIfExists(D, concat(SysRoot, "/lib", MultiarchTriple), Paths); + addPathIfExists(D, concat(SysRoot, "/lib/..", OSLibDir), Paths); if (IsAndroid) { // Android sysroots contain a library directory for each supported OS @@ -283,24 +283,24 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) // directory. addPathIfExists( D, - SysRoot + "/usr/lib/" + MultiarchTriple + "/" + - llvm::to_string(Triple.getEnvironmentVersion().getMajor()), + concat(SysRoot, "/usr/lib", MultiarchTriple, + llvm::to_string(Triple.getEnvironmentVersion().getMajor())), Paths); } - addPathIfExists(D, SysRoot + "/usr/lib/" + MultiarchTriple, Paths); + addPathIfExists(D, concat(SysRoot, "/usr/lib", MultiarchTriple), Paths); // 64-bit OpenEmbedded sysroots may not have a /usr/lib dir. So they cannot // find /usr/lib64 as it is referenced as /usr/lib/../lib64. So we handle // this here. if (Triple.getVendor() == llvm::Triple::OpenEmbedded && Triple.isArch64Bit()) - addPathIfExists(D, SysRoot + "/usr/" + OSLibDir, Paths); + addPathIfExists(D, concat(SysRoot, "/usr", OSLibDir), Paths); else - addPathIfExists(D, SysRoot + "/usr/lib/../" + OSLibDir, Paths); + addPathIfExists(D, concat(SysRoot, "/usr/lib/..", OSLibDir), Paths); if (IsRISCV) { StringRef ABIName = tools::riscv::getRISCVABI(Args, Triple); - addPathIfExists(D, SysRoot + "/" + OSLibDir + "/" + ABIName, Paths); - addPathIfExists(D, SysRoot + "/usr/" + OSLibDir + "/" + ABIName, Paths); + addPathIfExists(D, concat(SysRoot, "/", OSLibDir, ABIName), Paths); + addPathIfExists(D, concat(SysRoot, "/usr", OSLibDir, ABIName), Paths); } Generic_GCC::AddMultiarchPaths(D, SysRoot, OSLibDir, Paths); @@ -312,8 +312,8 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) D.getVFS().exists(D.Dir + "/../lib/libc++.so")) addPathIfExists(D, D.Dir + "/../lib", Paths); - addPathIfExists(D, SysRoot + "/lib", Paths); - addPathIfExists(D, SysRoot + "/usr/lib", Paths); + addPathIfExists(D, concat(SysRoot, "/lib"), Paths); + addPathIfExists(D, concat(SysRoot, "/usr/lib"), Paths); } ToolChain::RuntimeLibType Linux::GetDefaultRuntimeLibType() const { @@ -586,7 +586,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, return; // LOCAL_INCLUDE_DIR - addSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/local/include"); + addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include")); // TOOL_INCLUDE_DIR AddMultilibIncludeArgs(DriverArgs, CC1Args); @@ -607,9 +607,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, // /usr/include. std::string MultiarchIncludeDir = getMultiarchTriple(D, getTriple(), SysRoot); if (!MultiarchIncludeDir.empty() && - D.getVFS().exists(SysRoot + "/usr/include/" + MultiarchIncludeDir)) - addExternCSystemInclude(DriverArgs, CC1Args, - SysRoot + "/usr/include/" + MultiarchIncludeDir); + D.getVFS().exists(concat(SysRoot, "/usr/include", MultiarchIncludeDir))) + addExternCSystemInclude( + DriverArgs, CC1Args, + concat(SysRoot, "/usr/include", MultiarchIncludeDir)); if (getTriple().getOS() == llvm::Triple::RTEMS) return; @@ -617,9 +618,9 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, // Add an include of '/include' directly. This isn't provided by default by // system GCCs, but is often used with cross-compiling GCCs, and harmless to // add even when Clang is acting as-if it were a system compiler. - addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/include"); + addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include")); - addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/include"); + addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include")); if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl()) addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude); diff --git a/clang/test/Driver/Inputs/basic_linux_libstdcxx_tree/usr/bin/.keep b/clang/test/Driver/Inputs/basic_linux_libstdcxx_tree/usr/bin/.keep new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/clang/test/Driver/Inputs/basic_linux_libstdcxx_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/4.8/crtbegin.o b/clang/test/Driver/Inputs/basic_linux_libstdcxx_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/4.8/crtbegin.o new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/clang/test/Driver/linux-header-search.cpp b/clang/test/Driver/linux-header-search.cpp index 6f89985f218975..91e4f8825e49a9 100644 --- a/clang/test/Driver/linux-header-search.cpp +++ b/clang/test/Driver/linux-header-search.cpp @@ -16,6 +16,22 @@ // CHECK-BASIC-LIBCXX-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/x86_64-unknown-linux-gnu/c++/v1" // CHECK-BASIC-LIBCXX-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/c++/v1" // CHECK-BASIC-LIBCXX-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/local/include" + +// Test include paths when the sysroot path ends with `/`. +// RUN: %clang -### %s -fsyntax-only 2>&1 \ +// RUN: --target=x86_64-unknown-linux-gnu \ +// RUN: -stdlib=libc++ \ +// RUN: -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: --sysroot=%S/Inputs/basic_linux_libcxx_tree/ \ +// RUN: --gcc-toolchain="" \ +// RUN: | FileCheck --check-prefix=CHECK-BASIC-LIBCXX-SYSROOT-SLASH %s +// CHECK-BASIC-LIBCXX-SYSROOT-SLASH: "-cc1" +// CHECK-BASIC-LIBCXX-SYSROOT-SLASH-SAME: "-isysroot" "[[SYSROOT:[^"]+/]]" +// CHECK-BASIC-LIBCXX-SYSROOT-SLASH-SAME: "-internal-isystem" "[[SYSROOT]]usr/include/x86_64-unknown-linux-gnu/c++/v1" +// CHECK-BASIC-LIBCXX-SYSROOT-SLASH-SAME: "-internal-isystem" "[[SYSROOT]]usr/include/c++/v1" +// CHECK-BASIC-LIBCXX-SYSROOT-SLASH-SAME: "-internal-isystem" "[[SYSROOT]]usr/local/include" + // RUN: %clang -### %s -fsyntax-only 2>&1 \ // RUN: --target=x86_64-unknown-linux-gnu \ // RUN: -stdlib=libc++ \ @@ -56,7 +72,20 @@ // CHECK-BASIC-LIBCXXV2-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/x86_64-unknown-linux-gnu/c++/v2" // CHECK-BASIC-LIBCXXV2-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/c++/v2" // CHECK-BASIC-LIBCXXV2-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/local/include" -// + +// Test Linux with libstdc++ when the sysroot path ends with `/`. +// RUN: %clang -### %s -fsyntax-only 2>&1 \ +// RUN: --target=x86_64-unknown-linux-gnu \ +// RUN: -stdlib=libstdc++ \ +// RUN: -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: --sysroot=%S/Inputs/basic_linux_libstdcxx_tree/ \ +// RUN: --gcc-toolchain="" \ +// RUN: | FileCheck --check-prefix=CHECK-BASIC-LIBSTDCXX-SYSROOT-SLASH %s +// CHECK-BASIC-LIBSTDCXX-SYSROOT-SLASH: "-cc1" +// CHECK-BASIC-LIBSTDCXX-SYSROOT-SLASH-SAME: "-isysroot" "[[SYSROOT:[^"]+/]]" +// CHECK-BASIC-LIBSTDCXX-SYSROOT-SLASH-SAME: "-internal-isystem" "[[SYSROOT]]usr/lib/gcc/x86_64-unknown-linux-gnu/4.8/../../../../x86_64-unknown-linux-gnu/include" + // Test Linux with both libc++ and libstdc++ installed. // RUN: %clang -### %s -fsyntax-only 2>&1 \ // RUN: --target=x86_64-unknown-linux-gnu \ From 5a6e0857577e6dafb03716cfd47fa319441691dc Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 27 May 2022 11:07:51 -0400 Subject: [PATCH 719/908] [InstCombine] reduce code duplication; NFC --- .../InstCombine/InstCombineMulDivRem.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 0441203e249a24..a2feacc1943b0b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -301,8 +301,9 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { } } - /// i1 mul -> i1 and. - if (I.getType()->isIntOrIntVectorTy(1)) + // i1 mul -> i1 and. + Type *Ty = I.getType(); + if (Ty->isIntOrIntVectorTy(1)) return BinaryOperator::CreateAnd(Op0, Op1); // X*(1 << Y) --> X << Y @@ -335,7 +336,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() && (Op0->hasOneUse() || Op1->hasOneUse() || X == Y)) { Value *And = Builder.CreateAnd(X, Y, "mulbool"); - return CastInst::Create(Instruction::ZExt, And, I.getType()); + return CastInst::Create(Instruction::ZExt, And, Ty); } // (sext bool X) * (zext bool Y) --> sext (and X, Y) // (zext bool X) * (sext bool Y) --> sext (and X, Y) @@ -345,22 +346,22 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() && (Op0->hasOneUse() || Op1->hasOneUse())) { Value *And = Builder.CreateAnd(X, Y, "mulbool"); - return CastInst::Create(Instruction::SExt, And, I.getType()); + return CastInst::Create(Instruction::SExt, And, Ty); } // (zext bool X) * Y --> X ? Y : 0 // Y * (zext bool X) --> X ? Y : 0 if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) - return SelectInst::Create(X, Op1, ConstantInt::get(I.getType(), 0)); + return SelectInst::Create(X, Op1, ConstantInt::getNullValue(Ty)); if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) - return SelectInst::Create(X, Op0, ConstantInt::get(I.getType(), 0)); + return SelectInst::Create(X, Op0, ConstantInt::getNullValue(Ty)); // (sext bool X) * C --> X ? -C : 0 Constant *ImmC; if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1) && match(Op1, m_ImmConstant(ImmC))) { Constant *NegC = ConstantExpr::getNeg(ImmC); - return SelectInst::Create(X, NegC, ConstantInt::getNullValue(I.getType())); + return SelectInst::Create(X, NegC, ConstantInt::getNullValue(Ty)); } // (lshr X, 31) * Y --> (X < 0) ? Y : 0 @@ -370,7 +371,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { if (match(&I, m_c_Mul(m_LShr(m_Value(X), m_APInt(C)), m_Value(Y))) && *C == C->getBitWidth() - 1) { Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); - return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(I.getType())); + return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty)); } // ((ashr X, 31) | 1) * X --> abs(X) From b5b6aa4d53407ff1f7805fc000def63669d64a9f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 27 May 2022 11:27:14 -0400 Subject: [PATCH 720/908] [InstCombine] fold multiply by signbit-splat to cmp+select (ashr i32 X, 31) * C --> (X < 0) ? -C : 0 https://alive2.llvm.org/ce/z/G8u9SS With a constant operand, this is an improvement in IR and codegen (where it can be converted to a mask op). Without a constant operand, we would have to negate the operand, so that is probably better left to the backend. This is similar but not the same optimization that is requested in #55618. --- .../InstCombine/InstCombineMulDivRem.cpp | 20 ++++++++++++++----- llvm/test/Transforms/InstCombine/mul.ll | 12 +++++++---- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index a2feacc1943b0b..897580a4148768 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -356,12 +356,22 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) return SelectInst::Create(X, Op0, ConstantInt::getNullValue(Ty)); - // (sext bool X) * C --> X ? -C : 0 Constant *ImmC; - if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1) && - match(Op1, m_ImmConstant(ImmC))) { - Constant *NegC = ConstantExpr::getNeg(ImmC); - return SelectInst::Create(X, NegC, ConstantInt::getNullValue(Ty)); + if (match(Op1, m_ImmConstant(ImmC))) { + // (sext bool X) * C --> X ? -C : 0 + if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { + Constant *NegC = ConstantExpr::getNeg(ImmC); + return SelectInst::Create(X, NegC, ConstantInt::getNullValue(Ty)); + } + + // (ashr i32 X, 31) * C --> (X < 0) ? -C : 0 + const APInt *C; + if (match(Op0, m_OneUse(m_AShr(m_Value(X), m_APInt(C)))) && + *C == C->getBitWidth() - 1) { + Constant *NegC = ConstantExpr::getNeg(ImmC); + Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); + return SelectInst::Create(IsNeg, NegC, ConstantInt::getNullValue(Ty)); + } } // (lshr X, 31) * Y --> (X < 0) ? Y : 0 diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 47fd5fc87b10ea..895ce433630c61 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -464,8 +464,8 @@ define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) { define i32 @signsplat_mul(i32 %x) { ; CHECK-LABEL: @signsplat_mul( -; CHECK-NEXT: [[ASH:%.*]] = ashr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ASH]], 42 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[MUL:%.*]] = select i1 [[ISNEG]], i32 -42, i32 0 ; CHECK-NEXT: ret i32 [[MUL]] ; %ash = ashr i32 %x, 31 @@ -475,8 +475,8 @@ define i32 @signsplat_mul(i32 %x) { define <2 x i32> @signsplat_mul_vec(<2 x i32> %x) { ; CHECK-LABEL: @signsplat_mul_vec( -; CHECK-NEXT: [[ASH:%.*]] = ashr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = mul nsw <2 x i32> [[ASH]], +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[MUL:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> , <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %ash = ashr <2 x i32> %x, @@ -484,6 +484,8 @@ define <2 x i32> @signsplat_mul_vec(<2 x i32> %x) { ret <2 x i32> %mul } +; negative test - wrong shift amount + define i32 @not_signsplat_mul(i32 %x) { ; CHECK-LABEL: @not_signsplat_mul( ; CHECK-NEXT: [[ASH:%.*]] = ashr i32 [[X:%.*]], 30 @@ -495,6 +497,8 @@ define i32 @not_signsplat_mul(i32 %x) { ret i32 %mul } +; negative test - extra use + define i32 @signsplat_mul_use(i32 %x) { ; CHECK-LABEL: @signsplat_mul_use( ; CHECK-NEXT: [[ASH:%.*]] = ashr i32 [[X:%.*]], 31 From 042ae89556572d045619dbc7ede1be20c644fa6d Mon Sep 17 00:00:00 2001 From: PeixinQiao Date: Sat, 28 May 2022 00:06:57 +0800 Subject: [PATCH 721/908] [OpenMP] Support operation conversion to LLVM for threadprivate directive This supports the operation conversion for threadprivate directive. The support for memref type conversion is not implemented. Reviewed By: kiranchandramohan, shraiysh Differential Revision: https://reviews.llvm.org/D124610 --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 41 +++++++++++++++++++ .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 29 ++++++++++--- .../OpenMPToLLVM/convert-to-llvmir.mlir | 38 ++++++++++++++++- 3 files changed, 101 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index e69891ec9abf31..ce96f6f5c3fbb4 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -813,6 +813,20 @@ def AtomicReadOp : OpenMP_Op<"atomic.read", [AllTypesMatch<["x", "v"]>]> { `:` type($x) attr-dict }]; let hasVerifier = 1; + let extraClassDeclaration = [{ + /// The number of variable operands. + unsigned getNumVariableOperands() { + assert(x() && "expected 'x' operand"); + assert(v() && "expected 'v' operand"); + return 2; + } + + /// The i-th variable operand passed. + Value getVariableOperand(unsigned i) { + assert(0 <= i < 2 && "invalid index position for an operand"); + return i == 0 ? x() : v(); + } + }]; } def AtomicWriteOp : OpenMP_Op<"atomic.write"> { @@ -847,6 +861,20 @@ def AtomicWriteOp : OpenMP_Op<"atomic.write"> { attr-dict }]; let hasVerifier = 1; + let extraClassDeclaration = [{ + /// The number of variable operands. + unsigned getNumVariableOperands() { + assert(address() && "expected address operand"); + assert(value() && "expected value operand"); + return 2; + } + + /// The i-th variable operand passed. + Value getVariableOperand(unsigned i) { + assert(0 <= i < 2 && "invalid index position for an operand"); + return i == 0 ? address() : value(); + } + }]; } def AtomicUpdateOp : OpenMP_Op<"atomic.update", @@ -996,6 +1024,19 @@ def ThreadprivateOp : OpenMP_Op<"threadprivate"> { let assemblyFormat = [{ $sym_addr `:` type($sym_addr) `->` type($tls_addr) attr-dict }]; + let extraClassDeclaration = [{ + /// The number of variable operands. + unsigned getNumVariableOperands() { + assert(sym_addr() && "expected one variable operand"); + return 1; + } + + /// The i-th variable operand passed. + Value getVariableOperand(unsigned i) { + assert(i == 0 && "invalid index position for an operand"); + return sym_addr(); + } + }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp index d16e51e0eb24de..25eb2bf5ddc442 100644 --- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp +++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp @@ -52,7 +52,23 @@ struct RegionLessOpConversion : public ConvertOpToLLVMPattern { LogicalResult matchAndRewrite(T curOp, typename T::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp(curOp, TypeRange(), adaptor.getOperands(), + TypeConverter *converter = ConvertToLLVMPattern::getTypeConverter(); + SmallVector resTypes; + if (failed(converter->convertTypes(curOp->getResultTypes(), resTypes))) + return failure(); + SmallVector convertedOperands; + for (unsigned idx = 0; idx < curOp.getNumVariableOperands(); ++idx) { + Value originalVariableOperand = curOp.getVariableOperand(idx); + if (!originalVariableOperand) + return failure(); + if (originalVariableOperand.getType().isa()) { + // TODO: Support memref type in variable operands + rewriter.notifyMatchFailure(curOp, "memref is not supported yet"); + } else { + convertedOperands.emplace_back(adaptor.getOperands()[idx]); + } + } + rewriter.replaceOpWithNewOp(curOp, resTypes, convertedOperands, curOp->getAttrs()); return success(); } @@ -65,10 +81,10 @@ void mlir::configureOpenMPToLLVMConversionLegality( mlir::omp::MasterOp>( [&](Operation *op) { return typeConverter.isLegal(&op->getRegion(0)); }); target - .addDynamicallyLegalOp( - [&](Operation *op) { - return typeConverter.isLegal(op->getOperandTypes()); - }); + .addDynamicallyLegalOp([&](Operation *op) { + return typeConverter.isLegal(op->getOperandTypes()); + }); } void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter, @@ -77,7 +93,8 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter, RegionOpConversion, RegionOpConversion, RegionLessOpConversion, - RegionLessOpConversion>(converter); + RegionLessOpConversion, + RegionLessOpConversion>(converter); } namespace { diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index cb1e2411e8091d..341e283ffd3338 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-openmp-to-llvm %s -split-input-file | FileCheck %s +// RUN: mlir-opt -convert-openmp-to-llvm -split-input-file %s | FileCheck %s // CHECK-LABEL: llvm.func @master_block_arg func.func @master_block_arg() { @@ -15,6 +15,8 @@ func.func @master_block_arg() { return } +// ----- + // CHECK-LABEL: llvm.func @branch_loop func.func @branch_loop() { %start = arith.constant 0 : index @@ -44,6 +46,8 @@ func.func @branch_loop() { return } +// ----- + // CHECK-LABEL: @wsloop // CHECK: (%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64, %[[ARG2:.*]]: i64, %[[ARG3:.*]]: i64, %[[ARG4:.*]]: i64, %[[ARG5:.*]]: i64) func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { @@ -62,3 +66,35 @@ func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: } return } + +// ----- + +// CHECK-LABEL: @atomic_write +// CHECK: (%[[ARG0:.*]]: !llvm.ptr) +// CHECK: %[[VAL0:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: omp.atomic.write %[[ARG0]] = %[[VAL0]] hint(none) memory_order(relaxed) : !llvm.ptr, i32 +func.func @atomic_write(%a: !llvm.ptr) -> () { + %1 = arith.constant 1 : i32 + omp.atomic.write %a = %1 hint(none) memory_order(relaxed) : !llvm.ptr, i32 + return +} + +// ----- + +// CHECK-LABEL: @atomic_read +// CHECK: (%[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr) +// CHECK: omp.atomic.read %[[ARG1]] = %[[ARG0]] memory_order(acquire) hint(contended) : !llvm.ptr +func.func @atomic_read(%a: !llvm.ptr, %b: !llvm.ptr) -> () { + omp.atomic.read %b = %a memory_order(acquire) hint(contended) : !llvm.ptr + return +} + +// ----- + +// CHECK-LABEL: @threadprivate +// CHECK: (%[[ARG0:.*]]: !llvm.ptr) +// CHECK: %[[VAL0:.*]] = omp.threadprivate %[[ARG0]] : !llvm.ptr -> !llvm.ptr +func.func @threadprivate(%a: !llvm.ptr) -> () { + %1 = omp.threadprivate %a : !llvm.ptr -> !llvm.ptr + return +} From a205f2904d0ae4e3c6ce73b6e0a0aff29e46bd96 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Fri, 27 May 2022 08:23:37 -0700 Subject: [PATCH 722/908] [WebAssembly] Consolidate sectionTypeToString in BinaryFormat [NFC] Currently there are 2 duplicate implementation, and I want to add a use in a 3rd place. Combine them in lib/BinaryFormat so they can be shared. Also update toString for symbol and reloc types to use StringRef Differential Revision: https://reviews.llvm.org/D126553 --- lld/wasm/OutputSections.cpp | 35 --------------------------- llvm/include/llvm/BinaryFormat/Wasm.h | 5 ++-- llvm/lib/BinaryFormat/Wasm.cpp | 29 ++++++++++++++++++++-- llvm/lib/Object/WasmObjectFile.cpp | 24 +++--------------- 4 files changed, 33 insertions(+), 60 deletions(-) diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp index d23fcc35883ccf..f7e4df8900a21d 100644 --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -33,41 +33,6 @@ std::string toString(const wasm::OutputSection &sec) { } namespace wasm { -static StringRef sectionTypeToString(uint32_t sectionType) { - switch (sectionType) { - case WASM_SEC_CUSTOM: - return "CUSTOM"; - case WASM_SEC_TYPE: - return "TYPE"; - case WASM_SEC_IMPORT: - return "IMPORT"; - case WASM_SEC_FUNCTION: - return "FUNCTION"; - case WASM_SEC_TABLE: - return "TABLE"; - case WASM_SEC_MEMORY: - return "MEMORY"; - case WASM_SEC_GLOBAL: - return "GLOBAL"; - case WASM_SEC_TAG: - return "TAG"; - case WASM_SEC_EXPORT: - return "EXPORT"; - case WASM_SEC_START: - return "START"; - case WASM_SEC_ELEM: - return "ELEM"; - case WASM_SEC_CODE: - return "CODE"; - case WASM_SEC_DATA: - return "DATA"; - case WASM_SEC_DATACOUNT: - return "DATACOUNT"; - default: - fatal("invalid section type"); - } -} - StringRef OutputSection::getSectionName() const { return sectionTypeToString(type); } diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 2419a06fbe96c6..4b6a1d584077ba 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -470,8 +470,9 @@ inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) { return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits; } -std::string toString(WasmSymbolType type); -std::string relocTypetoString(uint32_t type); +llvm::StringRef toString(WasmSymbolType type); +llvm::StringRef relocTypetoString(uint32_t type); +llvm::StringRef sectionTypeToString(uint32_t type); bool relocTypeHasAddend(uint32_t type); } // end namespace wasm diff --git a/llvm/lib/BinaryFormat/Wasm.cpp b/llvm/lib/BinaryFormat/Wasm.cpp index 55efe31f266901..babeb12e49ef37 100644 --- a/llvm/lib/BinaryFormat/Wasm.cpp +++ b/llvm/lib/BinaryFormat/Wasm.cpp @@ -8,7 +8,7 @@ #include "llvm/BinaryFormat/Wasm.h" -std::string llvm::wasm::toString(wasm::WasmSymbolType Type) { +llvm::StringRef llvm::wasm::toString(wasm::WasmSymbolType Type) { switch (Type) { case wasm::WASM_SYMBOL_TYPE_FUNCTION: return "WASM_SYMBOL_TYPE_FUNCTION"; @@ -26,7 +26,7 @@ std::string llvm::wasm::toString(wasm::WasmSymbolType Type) { llvm_unreachable("unknown symbol type"); } -std::string llvm::wasm::relocTypetoString(uint32_t Type) { +llvm::StringRef llvm::wasm::relocTypetoString(uint32_t Type) { switch (Type) { #define WASM_RELOC(NAME, VALUE) \ case VALUE: \ @@ -38,6 +38,31 @@ std::string llvm::wasm::relocTypetoString(uint32_t Type) { } } +llvm::StringRef llvm::wasm::sectionTypeToString(uint32_t Type) { +#define ECase(X) \ + case wasm::WASM_SEC_##X: \ + return #X; + switch (Type) { + ECase(CUSTOM); + ECase(TYPE); + ECase(IMPORT); + ECase(FUNCTION); + ECase(TABLE); + ECase(MEMORY); + ECase(GLOBAL); + ECase(EXPORT); + ECase(START); + ECase(ELEM); + ECase(CODE); + ECase(DATA); + ECase(DATACOUNT); + ECase(TAG); + default: + llvm_unreachable("unknown section type"); + } +#undef ECase +} + bool llvm::wasm::relocTypeHasAddend(uint32_t Type) { switch (Type) { case R_WASM_MEMORY_ADDR_LEB: diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index dfb9e2b3eab2b5..73c2f868492e2a 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -1727,29 +1727,11 @@ void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } Expected WasmObjectFile::getSectionName(DataRefImpl Sec) const { const WasmSection &S = Sections[Sec.d.a]; -#define ECase(X) \ - case wasm::WASM_SEC_##X: \ - return #X; - switch (S.Type) { - ECase(TYPE); - ECase(IMPORT); - ECase(FUNCTION); - ECase(TABLE); - ECase(MEMORY); - ECase(GLOBAL); - ECase(TAG); - ECase(EXPORT); - ECase(START); - ECase(ELEM); - ECase(CODE); - ECase(DATA); - ECase(DATACOUNT); - case wasm::WASM_SEC_CUSTOM: + if (S.Type == wasm::WASM_SEC_CUSTOM) return S.Name; - default: + if (S.Type > wasm::WASM_SEC_TAG) return createStringError(object_error::invalid_section_index, ""); - } -#undef ECase + return wasm::sectionTypeToString(S.Type); } uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } From bf1ab1f0eb9578914343f48096229ecccd0ecf52 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Tue, 24 May 2022 18:21:34 +0200 Subject: [PATCH 723/908] cmake: use llvm dir variables for clang/utils/hmaptool Install hmaptool using the LLVM specific variables, so everything goes in the right place in case llvm is included from a top level CMakeLists.txt. Signed-off-by: Matheus Izvekov Reviewed By: stephenneuendorffer Differential Revision: https://reviews.llvm.org/D126308 --- clang/utils/hmaptool/CMakeLists.txt | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/clang/utils/hmaptool/CMakeLists.txt b/clang/utils/hmaptool/CMakeLists.txt index f0d9866782b88a..320a24da346d63 100644 --- a/clang/utils/hmaptool/CMakeLists.txt +++ b/clang/utils/hmaptool/CMakeLists.txt @@ -1,19 +1,9 @@ -set(CLANG_HMAPTOOL hmaptool) +add_custom_command(OUTPUT "${LLVM_TOOLS_BINARY_DIR}/hmaptool" + COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/hmaptool" "${LLVM_TOOLS_BINARY_DIR}" + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/hmaptool") -add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/${CLANG_HMAPTOOL} - COMMAND ${CMAKE_COMMAND} -E make_directory - ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin - COMMAND ${CMAKE_COMMAND} -E copy - ${CMAKE_CURRENT_SOURCE_DIR}/${CLANG_HMAPTOOL} - ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/ - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${CLANG_HMAPTOOL}) - -list(APPEND Depends ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/${CLANG_HMAPTOOL}) -install(PROGRAMS ${CLANG_HMAPTOOL} - DESTINATION "${CMAKE_INSTALL_BINDIR}" - COMPONENT hmaptool) - -add_custom_target(hmaptool ALL DEPENDS ${Depends}) +install(PROGRAMS hmaptool DESTINATION "${LLVM_UTILS_INSTALL_DIR}}" COMPONENT hmaptool) +add_custom_target(hmaptool ALL DEPENDS "${LLVM_TOOLS_BINARY_DIR}/hmaptool") set_target_properties(hmaptool PROPERTIES FOLDER "Utils") if(NOT LLVM_ENABLE_IDE) From 865ad6bd21656aef5fabd2f4603ad4f759df6e62 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 19 May 2022 11:06:11 +0200 Subject: [PATCH 724/908] [libc++] Use __libcpp_clz for a tighter __log2i function While looking at D113413 I noticed that __log2i could perhaps be improved to be both slightly smaller and faster. projects/libcxx/benchmarks/sort.libcxx.out --benchmark_filter=BM_Sort_uint32_QuickSortAdversary* suggests this is performance neutral, but it shaves a few bytes off the benchmark binary, and even more off a Chromium build. Differential revision: https://reviews.llvm.org/D125958 --- libcxx/include/__algorithm/sort.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index d108dc31bd299b..a6ff5e3ac54ad5 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -14,8 +14,10 @@ #include <__algorithm/min_element.h> #include <__algorithm/partial_sort.h> #include <__algorithm/unwrap_iter.h> +#include <__bits> #include <__config> #include <__utility/swap.h> +#include #include #if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY) @@ -498,6 +500,15 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C template inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) { + if (__n == 0) + return 0; + if (sizeof(__n) <= sizeof(unsigned)) + return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast(__n)); + if (sizeof(__n) <= sizeof(unsigned long)) + return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast(__n)); + if (sizeof(__n) <= sizeof(unsigned long long)) + return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast(__n)); + _Number __log2 = 0; while (__n > 1) { __log2++; From 4dc3893eeb47bef9298c34cdc993165af88721a5 Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Fri, 27 May 2022 17:03:32 +0000 Subject: [PATCH 725/908] Revert "[Clang] Extend -gen-reproducer flag" This reverts commit 684c080108766b4f112f172fed4a49059484614d. --- clang/include/clang/Driver/Driver.h | 31 +++------------ clang/include/clang/Driver/Options.td | 4 -- clang/lib/Driver/Driver.cpp | 6 ++- clang/test/Driver/emit-reproducer.c | 39 ------------------- clang/tools/driver/driver.cpp | 55 ++++++++++----------------- 5 files changed, 31 insertions(+), 104 deletions(-) delete mode 100644 clang/test/Driver/emit-reproducer.c diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 86eea9375f6737..f0f294a669d98a 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -276,6 +276,11 @@ class Driver { unsigned ProbePrecompiled : 1; public: + /// Force clang to emit reproducer for driver invocation. This is enabled + /// indirectly by setting FORCE_CLANG_DIAGNOSTICS_CRASH environment variable + /// or when using the -gen-reproducer driver flag. + unsigned GenReproducer : 1; + // getFinalPhase - Determine which compilation mode we are in and record // which option we used to determine the final phase. // TODO: Much of what getFinalPhase returns are not actually true compiler @@ -500,32 +505,6 @@ class Driver { StringRef AdditionalInformation = "", CompilationDiagnosticReport *GeneratedReport = nullptr); - enum class CommandStatus { - Crash = 1, - Error, - Ok, - }; - - enum class ReproLevel { - Off = 0, - OnCrash = static_cast(CommandStatus::Crash), - OnError = static_cast(CommandStatus::Error), - Always = static_cast(CommandStatus::Ok), - }; - - bool maybeGenerateCompilationDiagnostics( - CommandStatus CS, ReproLevel Level, Compilation &C, - const Command &FailingCommand, StringRef AdditionalInformation = "", - CompilationDiagnosticReport *GeneratedReport = nullptr) { - if (static_cast(CS) > static_cast(Level)) - return false; - // Hack to ensure that diagnostic notes get emitted. - Diags.setLastDiagnosticIgnored(false); - generateCompilationDiagnostics(C, FailingCommand, AdditionalInformation, - GeneratedReport); - return true; - } - /// @} /// @name Helper Methods /// @{ diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2a5e8bf27e3531..e19baac5860aab 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -558,10 +558,7 @@ def arcmt_migrate_report_output : Separate<["-"], "arcmt-migrate-report-output"> def arcmt_migrate_emit_arc_errors : Flag<["-"], "arcmt-migrate-emit-errors">, HelpText<"Emit ARC errors even if the migrator can fix them">, Flags<[CC1Option]>, MarshallingInfoFlag>; -def gen_reproducer_eq: Joined<["-"], "gen-reproducer=">, Flags<[NoArgumentUnused, CoreOption]>, - HelpText<"Emit reproducer on (option: off, crash (default), error, always)">; def gen_reproducer: Flag<["-"], "gen-reproducer">, InternalDebugOpt, - Alias, AliasArgs<["always"]>, HelpText<"Auto-generates preprocessed source files and a reproduction script">; def gen_cdb_fragment_path: Separate<["-"], "gen-cdb-fragment-path">, InternalDebugOpt, HelpText<"Emit a compilation database fragment to the specified directory">; @@ -1400,7 +1397,6 @@ def fexperimental_new_constant_interpreter : Flag<["-"], "fexperimental-new-cons def fconstexpr_backtrace_limit_EQ : Joined<["-"], "fconstexpr-backtrace-limit=">, Group; def fno_crash_diagnostics : Flag<["-"], "fno-crash-diagnostics">, Group, Flags<[NoArgumentUnused, CoreOption]>, - Alias, AliasArgs<["off"]>, HelpText<"Disable auto-generation of preprocessed source files and a script for reproduction during a clang crash">; def fcrash_diagnostics_dir : Joined<["-"], "fcrash-diagnostics-dir=">, Group, Flags<[NoArgumentUnused, CoreOption]>, diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index aaef2e1ded327c..94925568aec238 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -198,7 +198,8 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, CCPrintOptions(false), CCPrintHeaders(false), CCLogDiagnostics(false), CCGenDiagnostics(false), CCPrintProcessStats(false), TargetTriple(TargetTriple), Saver(Alloc), CheckInputsExist(true), - ProbePrecompiled(true), SuppressMissingInputWarning(false) { + ProbePrecompiled(true), GenReproducer(false), + SuppressMissingInputWarning(false) { // Provide a sane fallback if no VFS is specified. if (!this->VFS) this->VFS = llvm::vfs::getRealFileSystem(); @@ -1216,6 +1217,9 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) { CCCPrintBindings = Args.hasArg(options::OPT_ccc_print_bindings); if (const Arg *A = Args.getLastArg(options::OPT_ccc_gcc_name)) CCCGenericGCCName = A->getValue(); + GenReproducer = Args.hasFlag(options::OPT_gen_reproducer, + options::OPT_fno_crash_diagnostics, + !!::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")); // Process -fproc-stat-report options. if (const Arg *A = Args.getLastArg(options::OPT_fproc_stat_report_EQ)) { diff --git a/clang/test/Driver/emit-reproducer.c b/clang/test/Driver/emit-reproducer.c deleted file mode 100644 index f50171b7d9a9bf..00000000000000 --- a/clang/test/Driver/emit-reproducer.c +++ /dev/null @@ -1,39 +0,0 @@ -// RUN: rm -rf %t && mkdir %t - -// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer=off 2>&1 | FileCheck %s --check-prefix=NOT -// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -fno-crash-diagnostics 2>&1 | FileCheck %s --check-prefix=NOT -// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t 2>&1 | FileCheck %s -// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer=crash 2>&1 | FileCheck %s -// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer=error 2>&1 | FileCheck %s -// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer=always 2>&1 | FileCheck %s -// RUN: not %clang -DFATAL %s -fcrash-diagnostics-dir=%t -gen-reproducer 2>&1 | FileCheck %s - -// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer=off 2>&1 | FileCheck %s --check-prefix=NOT -// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -fno-crash-diagnostics 2>&1 | FileCheck %s --check-prefix=NOT -// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t 2>&1 | FileCheck %s --check-prefix=NOT -// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer=crash 2>&1 | FileCheck %s --check-prefix=NOT -// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer=error 2>&1 | FileCheck %s -// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer=always 2>&1 | FileCheck %s -// RUN: not %clang -DERROR %s -fcrash-diagnostics-dir=%t -gen-reproducer 2>&1 | FileCheck %s - -// RUN: %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer=off 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty -// RUN: %clang %s -fcrash-diagnostics-dir=%t -fno-crash-diagnostics 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty -// RUN: %clang %s -fcrash-diagnostics-dir=%t 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty -// RUN: %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer=crash 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty -// RUN: %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer=error 2>&1 | FileCheck %s --check-prefix=NOT --allow-empty -// RUN: not %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer=always 2>&1 | FileCheck %s -// RUN: not %clang %s -fcrash-diagnostics-dir=%t -gen-reproducer 2>&1 | FileCheck %s - -// RUN: not %clang $s -gen-reproducer=badvalue 2>&1 | FileCheck %s --check-prefix=BAD-VALUE -// BAD-VALUE: Unknown value for -gen-reproducer=: 'badvalue' - -// CHECK: note: diagnostic msg: {{.*}}emit-reproducer-{{.*}}.c -// NOT-NOT: note: diagnostic msg: {{.*}}emit-reproducer-{{.*}}.c - -#ifdef FATAL -#pragma clang __debug crash -#elif ERROR -int main -#else -int main() {} -#endif diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index c9aee57d92cf49..a7bfb07e002bf9 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -482,37 +482,32 @@ int main(int Argc, const char **Argv) { } std::unique_ptr C(TheDriver.BuildCompilation(Args)); - - Driver::ReproLevel ReproLevel = Driver::ReproLevel::OnCrash; - if (Arg *A = C->getArgs().getLastArg(options::OPT_gen_reproducer_eq)) { - auto Level = llvm::StringSwitch>(A->getValue()) - .Case("off", Driver::ReproLevel::Off) - .Case("crash", Driver::ReproLevel::OnCrash) - .Case("error", Driver::ReproLevel::OnError) - .Case("always", Driver::ReproLevel::Always) - .Default(None); - if (!Level) { - llvm::errs() << "Unknown value for " << A->getSpelling() << ": '" - << A->getValue() << "'\n"; - return 1; - } - ReproLevel = *Level; - } - if (!!::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) - ReproLevel = Driver::ReproLevel::Always; - int Res = 1; bool IsCrash = false; - Driver::CommandStatus CommandStatus = Driver::CommandStatus::Ok; - // Pretend the first command failed if ReproStatus is Always. - const Command *FailingCommand = &*C->getJobs().begin(); if (C && !C->containsError()) { SmallVector, 4> FailingCommands; Res = TheDriver.ExecuteCompilation(*C, FailingCommands); + // Force a crash to test the diagnostics. + if (TheDriver.GenReproducer) { + Diags.Report(diag::err_drv_force_crash) + << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); + + // Pretend that every command failed. + FailingCommands.clear(); + for (const auto &J : C->getJobs()) + if (const Command *C = dyn_cast(&J)) + FailingCommands.push_back(std::make_pair(-1, C)); + + // Print the bug report message that would be printed if we did actually + // crash, but only if we're crashing due to FORCE_CLANG_DIAGNOSTICS_CRASH. + if (::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) + llvm::dbgs() << llvm::getBugReportMsg(); + } + for (const auto &P : FailingCommands) { int CommandRes = P.first; - FailingCommand = P.second; + const Command *FailingCommand = P.second; if (!Res) Res = CommandRes; @@ -531,21 +526,13 @@ int main(int Argc, const char **Argv) { // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html IsCrash |= CommandRes > 128; #endif - CommandStatus = - IsCrash ? Driver::CommandStatus::Crash : Driver::CommandStatus::Error; - if (IsCrash) + if (IsCrash) { + TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); break; + } } } - // Print the bug report message that would be printed if we did actually - // crash, but only if we're crashing due to FORCE_CLANG_DIAGNOSTICS_CRASH. - if (::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) - llvm::dbgs() << llvm::getBugReportMsg(); - if (TheDriver.maybeGenerateCompilationDiagnostics(CommandStatus, ReproLevel, - *C, *FailingCommand)) - Res = 1; - Diags.getClient()->finish(); if (!UseNewCC1Process && IsCrash) { From a5d7e2a8ac7ec30957da288dcd3ca4d7aa946d22 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 27 May 2022 09:48:50 -0700 Subject: [PATCH 726/908] [OpenMP][mlir] fix broken build Reviewed By: Mogball Differential Revision: https://reviews.llvm.org/D126556 --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 ++-- mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index ce96f6f5c3fbb4..18b7eb9457139a 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -823,7 +823,7 @@ def AtomicReadOp : OpenMP_Op<"atomic.read", [AllTypesMatch<["x", "v"]>]> { /// The i-th variable operand passed. Value getVariableOperand(unsigned i) { - assert(0 <= i < 2 && "invalid index position for an operand"); + assert(0 <= i && i < 2 && "invalid index position for an operand"); return i == 0 ? x() : v(); } }]; @@ -871,7 +871,7 @@ def AtomicWriteOp : OpenMP_Op<"atomic.write"> { /// The i-th variable operand passed. Value getVariableOperand(unsigned i) { - assert(0 <= i < 2 && "invalid index position for an operand"); + assert(0 <= i && i < 2 && "invalid index position for an operand"); return i == 0 ? address() : value(); } }]; diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp index 25eb2bf5ddc442..3f0964dc1115b4 100644 --- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp +++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp @@ -63,7 +63,8 @@ struct RegionLessOpConversion : public ConvertOpToLLVMPattern { return failure(); if (originalVariableOperand.getType().isa()) { // TODO: Support memref type in variable operands - rewriter.notifyMatchFailure(curOp, "memref is not supported yet"); + return rewriter.notifyMatchFailure(curOp, + "memref is not supported yet"); } else { convertedOperands.emplace_back(adaptor.getOperands()[idx]); } From c358d98b998fc5e34510e4d2c7c25b8d1eb32fb7 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 May 2022 15:24:59 -0400 Subject: [PATCH 727/908] [libc++] Add various missing _LIBCPP_HIDE_FROM_ABI Those were spotted when a project unintentionally started exporting these symbols from its ABI just by using the libc++ headers. Differential Revision: https://reviews.llvm.org/D126496 --- .../include/__filesystem/directory_iterator.h | 29 ++- libcxx/include/__filesystem/path.h | 220 +++++++++++------- libcxx/include/any | 1 + 3 files changed, 161 insertions(+), 89 deletions(-) diff --git a/libcxx/include/__filesystem/directory_iterator.h b/libcxx/include/__filesystem/directory_iterator.h index 6034fe50752cf8..5ff2f01ac7b276 100644 --- a/libcxx/include/__filesystem/directory_iterator.h +++ b/libcxx/include/__filesystem/directory_iterator.h @@ -44,25 +44,31 @@ class directory_iterator { public: //ctor & dtor + _LIBCPP_HIDE_FROM_ABI directory_iterator() noexcept {} + _LIBCPP_HIDE_FROM_ABI explicit directory_iterator(const path& __p) : directory_iterator(__p, nullptr) {} + _LIBCPP_HIDE_FROM_ABI directory_iterator(const path& __p, directory_options __opts) : directory_iterator(__p, nullptr, __opts) {} + _LIBCPP_HIDE_FROM_ABI directory_iterator(const path& __p, error_code& __ec) : directory_iterator(__p, &__ec) {} + _LIBCPP_HIDE_FROM_ABI directory_iterator(const path& __p, directory_options __opts, error_code& __ec) : directory_iterator(__p, &__ec, __opts) {} - directory_iterator(const directory_iterator&) = default; - directory_iterator(directory_iterator&&) = default; - directory_iterator& operator=(const directory_iterator&) = default; + _LIBCPP_HIDE_FROM_ABI directory_iterator(const directory_iterator&) = default; + _LIBCPP_HIDE_FROM_ABI directory_iterator(directory_iterator&&) = default; + _LIBCPP_HIDE_FROM_ABI directory_iterator& operator=(const directory_iterator&) = default; + _LIBCPP_HIDE_FROM_ABI directory_iterator& operator=(directory_iterator&& __o) noexcept { // non-default implementation provided to support self-move assign. if (this != &__o) { @@ -71,27 +77,32 @@ class directory_iterator { return *this; } - ~directory_iterator() = default; + _LIBCPP_HIDE_FROM_ABI ~directory_iterator() = default; + _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const { _LIBCPP_ASSERT(__imp_, "The end iterator cannot be dereferenced"); return __dereference(); } + _LIBCPP_HIDE_FROM_ABI const directory_entry* operator->() const { return &**this; } + _LIBCPP_HIDE_FROM_ABI directory_iterator& operator++() { return __increment(); } + _LIBCPP_HIDE_FROM_ABI __dir_element_proxy operator++(int) { __dir_element_proxy __p(**this); __increment(); return __p; } + _LIBCPP_HIDE_FROM_ABI directory_iterator& increment(error_code& __ec) { return __increment(&__ec); } private: - inline _LIBCPP_INLINE_VISIBILITY friend bool + inline _LIBCPP_HIDE_FROM_ABI friend bool operator==(const directory_iterator& __lhs, const directory_iterator& __rhs) noexcept; @@ -110,25 +121,25 @@ class directory_iterator { shared_ptr<__dir_stream> __imp_; }; -inline _LIBCPP_INLINE_VISIBILITY bool +inline _LIBCPP_HIDE_FROM_ABI bool operator==(const directory_iterator& __lhs, const directory_iterator& __rhs) noexcept { return __lhs.__imp_ == __rhs.__imp_; } -inline _LIBCPP_INLINE_VISIBILITY bool +inline _LIBCPP_HIDE_FROM_ABI bool operator!=(const directory_iterator& __lhs, const directory_iterator& __rhs) noexcept { return !(__lhs == __rhs); } // enable directory_iterator range-based for statements -inline _LIBCPP_INLINE_VISIBILITY directory_iterator +inline _LIBCPP_HIDE_FROM_ABI directory_iterator begin(directory_iterator __iter) noexcept { return __iter; } -inline _LIBCPP_INLINE_VISIBILITY directory_iterator +inline _LIBCPP_HIDE_FROM_ABI directory_iterator end(directory_iterator) noexcept { return directory_iterator(); } diff --git a/libcxx/include/__filesystem/path.h b/libcxx/include/__filesystem/path.h index 29bb1e94a8f3ff..8fafd72c7e4f68 100644 --- a/libcxx/include/__filesystem/path.h +++ b/libcxx/include/__filesystem/path.h @@ -71,6 +71,7 @@ struct __can_convert_char { }; template +_LIBCPP_HIDE_FROM_ABI typename enable_if<__can_convert_char<_ECharT>::value, bool>::type __is_separator(_ECharT __e) { #if defined(_LIBCPP_WIN32API) @@ -101,10 +102,16 @@ struct __is_pathable_string< : public __can_convert_char<_ECharT> { using _Str = basic_string<_ECharT, _Traits, _Alloc>; using _Base = __can_convert_char<_ECharT>; + + _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } + + _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_end(_Str const& __s) { return __s.data() + __s.length(); } + + _LIBCPP_HIDE_FROM_ABI static _ECharT __first_or_null(_Str const& __s) { return __s.empty() ? _ECharT{} : __s[0]; } @@ -117,10 +124,16 @@ struct __is_pathable_string< : public __can_convert_char<_ECharT> { using _Str = basic_string_view<_ECharT, _Traits>; using _Base = __can_convert_char<_ECharT>; + + _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } + + _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_end(_Str const& __s) { return __s.data() + __s.length(); } + + _LIBCPP_HIDE_FROM_ABI static _ECharT __first_or_null(_Str const& __s) { return __s.empty() ? _ECharT{} : __s[0]; } @@ -138,7 +151,10 @@ struct __is_pathable_char_array<_Source, _ECharT*, _UPtr, true> : __can_convert_char::type> { using _Base = __can_convert_char::type>; + _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_begin(const _ECharT* __b) { return __b; } + + _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_end(const _ECharT* __b) { using _Iter = const _ECharT*; const _ECharT __sentinel = _ECharT{}; @@ -148,6 +164,7 @@ struct __is_pathable_char_array<_Source, _ECharT*, _UPtr, true> return __e; } + _LIBCPP_HIDE_FROM_ABI static _ECharT __first_or_null(const _ECharT* __b) { return *__b; } }; @@ -164,9 +181,13 @@ struct __is_pathable_iter< using _ECharT = typename iterator_traits<_Iter>::value_type; using _Base = __can_convert_char<_ECharT>; + _LIBCPP_HIDE_FROM_ABI static _Iter __range_begin(_Iter __b) { return __b; } + + _LIBCPP_HIDE_FROM_ABI static _NullSentinel __range_end(_Iter) { return _NullSentinel{}; } + _LIBCPP_HIDE_FROM_ABI static _ECharT __first_or_null(_Iter __b) { return *__b; } }; @@ -216,6 +237,7 @@ struct _PathCVT { typedef __widen_from_utf8 _Widener; #endif + _LIBCPP_HIDE_FROM_ABI static void __append_range(__path_string& __dest, _ECharT const* __b, _ECharT const* __e) { #if defined(_LIBCPP_WIN32API) @@ -228,6 +250,7 @@ struct _PathCVT { } template + _LIBCPP_HIDE_FROM_ABI static void __append_range(__path_string& __dest, _Iter __b, _Iter __e) { static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); if (__b == __e) @@ -245,6 +268,7 @@ struct _PathCVT { } template + _LIBCPP_HIDE_FROM_ABI static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); const _ECharT __sentinel = _ECharT{}; @@ -265,6 +289,7 @@ struct _PathCVT { } template + _LIBCPP_HIDE_FROM_ABI static void __append_source(__path_string& __dest, _Source const& __s) { using _Traits = __is_pathable<_Source>; __append_range(__dest, _Traits::__range_begin(__s), @@ -277,6 +302,7 @@ template <> struct _PathCVT<__path_value> { template + _LIBCPP_HIDE_FROM_ABI static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type __append_range(__path_string& __dest, _Iter __b, _Iter __e) { for (; __b != __e; ++__b) @@ -284,12 +310,14 @@ struct _PathCVT<__path_value> { } template + _LIBCPP_HIDE_FROM_ABI static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type __append_range(__path_string& __dest, _Iter __b, _Iter __e) { __dest.append(__b, __e); } template + _LIBCPP_HIDE_FROM_ABI static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { const char __sentinel = char{}; for (; *__b != __sentinel; ++__b) @@ -297,6 +325,7 @@ struct _PathCVT<__path_value> { } template + _LIBCPP_HIDE_FROM_ABI static void __append_source(__path_string& __dest, _Source const& __s) { using _Traits = __is_pathable<_Source>; __append_range(__dest, _Traits::__range_begin(__s), @@ -308,6 +337,7 @@ struct _PathCVT<__path_value> { template <> struct _PathCVT { + _LIBCPP_HIDE_FROM_ABI static void __append_string(__path_string& __dest, const basic_string &__str) { size_t __size = __char_to_wide(__str, nullptr, 0); @@ -317,6 +347,7 @@ struct _PathCVT { } template + _LIBCPP_HIDE_FROM_ABI static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type __append_range(__path_string& __dest, _Iter __b, _Iter __e) { basic_string __tmp(__b, __e); @@ -324,6 +355,7 @@ struct _PathCVT { } template + _LIBCPP_HIDE_FROM_ABI static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type __append_range(__path_string& __dest, _Iter __b, _Iter __e) { basic_string __tmp(__b, __e); @@ -331,6 +363,7 @@ struct _PathCVT { } template + _LIBCPP_HIDE_FROM_ABI static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { const char __sentinel = char{}; basic_string __tmp; @@ -340,6 +373,7 @@ struct _PathCVT { } template + _LIBCPP_HIDE_FROM_ABI static void __append_source(__path_string& __dest, _Source const& __s) { using _Traits = __is_pathable<_Source>; __append_range(__dest, _Traits::__range_begin(__s), @@ -353,6 +387,7 @@ struct _PathExport { typedef __widen_from_utf8 _Widener; template + _LIBCPP_HIDE_FROM_ABI static void __append(_Str& __dest, const __path_string& __src) { string __utf8; _Narrower()(back_inserter(__utf8), __src.data(), __src.data() + __src.size()); @@ -363,6 +398,7 @@ struct _PathExport { template <> struct _PathExport { template + _LIBCPP_HIDE_FROM_ABI static void __append(_Str& __dest, const __path_string& __src) { size_t __size = __wide_to_char(__src, nullptr, 0); size_t __pos = __dest.size(); @@ -374,6 +410,7 @@ struct _PathExport { template <> struct _PathExport { template + _LIBCPP_HIDE_FROM_ABI static void __append(_Str& __dest, const __path_string& __src) { __dest.append(__src.begin(), __src.end()); } @@ -382,6 +419,7 @@ struct _PathExport { template <> struct _PathExport { template + _LIBCPP_HIDE_FROM_ABI static void __append(_Str& __dest, const __path_string& __src) { __dest.append(__src.begin(), __src.end()); } @@ -393,6 +431,7 @@ struct _PathExport { typedef __narrow_to_utf8 _Narrower; template + _LIBCPP_HIDE_FROM_ABI static void __append(_Str& __dest, const __path_string& __src) { _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size()); } @@ -429,21 +468,23 @@ class _LIBCPP_TYPE_VIS path { }; // constructors and destructor - _LIBCPP_INLINE_VISIBILITY path() noexcept {} - _LIBCPP_INLINE_VISIBILITY path(const path& __p) : __pn_(__p.__pn_) {} - _LIBCPP_INLINE_VISIBILITY path(path&& __p) noexcept + _LIBCPP_HIDE_FROM_ABI path() noexcept {} + _LIBCPP_HIDE_FROM_ABI path(const path& __p) : __pn_(__p.__pn_) {} + _LIBCPP_HIDE_FROM_ABI path(path&& __p) noexcept : __pn_(_VSTD::move(__p.__pn_)) {} - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path(string_type&& __s, format = format::auto_format) noexcept : __pn_(_VSTD::move(__s)) {} template > + _LIBCPP_HIDE_FROM_ABI path(const _Source& __src, format = format::auto_format) { _SourceCVT<_Source>::__append_source(__pn_, __src); } template + _LIBCPP_HIDE_FROM_ABI path(_InputIt __first, _InputIt __last, format = format::auto_format) { typedef typename iterator_traits<_InputIt>::value_type _ItVal; _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); @@ -460,41 +501,42 @@ class _LIBCPP_TYPE_VIS path { #endif */ - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI ~path() = default; // assignments - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& operator=(const path& __p) { __pn_ = __p.__pn_; return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& operator=(path&& __p) noexcept { __pn_ = _VSTD::move(__p.__pn_); return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& operator=(string_type&& __s) noexcept { __pn_ = _VSTD::move(__s); return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& assign(string_type&& __s) noexcept { __pn_ = _VSTD::move(__s); return *this; } template - _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + _LIBCPP_HIDE_FROM_ABI _EnableIfPathable<_Source> operator=(const _Source& __src) { return this->assign(__src); } template + _LIBCPP_HIDE_FROM_ABI _EnableIfPathable<_Source> assign(const _Source& __src) { __pn_.clear(); _SourceCVT<_Source>::__append_source(__pn_, __src); @@ -502,6 +544,7 @@ class _LIBCPP_TYPE_VIS path { } template + _LIBCPP_HIDE_FROM_ABI path& assign(_InputIt __first, _InputIt __last) { typedef typename iterator_traits<_InputIt>::value_type _ItVal; __pn_.clear(); @@ -512,6 +555,7 @@ class _LIBCPP_TYPE_VIS path { public: // appends #if defined(_LIBCPP_WIN32API) + _LIBCPP_HIDE_FROM_ABI path& operator/=(const path& __p) { auto __p_root_name = __p.__root_name(); auto __p_root_name_size = __p_root_name.size(); @@ -538,15 +582,18 @@ class _LIBCPP_TYPE_VIS path { } template + _LIBCPP_HIDE_FROM_ABI _EnableIfPathable<_Source> append(const _Source& __src) { return operator/=(path(__src)); } template + _LIBCPP_HIDE_FROM_ABI path& append(_InputIt __first, _InputIt __last) { return operator/=(path(__first, __last)); } #else + _LIBCPP_HIDE_FROM_ABI path& operator/=(const path& __p) { if (__p.is_absolute()) { __pn_ = __p.__pn_; @@ -562,12 +609,13 @@ class _LIBCPP_TYPE_VIS path { // is known at compile time to be "/' since the user almost certainly intended // to append a separator instead of overwriting the path with "/" template - _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + _LIBCPP_HIDE_FROM_ABI _EnableIfPathable<_Source> operator/=(const _Source& __src) { return this->append(__src); } template + _LIBCPP_HIDE_FROM_ABI _EnableIfPathable<_Source> append(const _Source& __src) { using _Traits = __is_pathable<_Source>; using _CVT = _PathCVT<_SourceChar<_Source> >; @@ -581,6 +629,7 @@ class _LIBCPP_TYPE_VIS path { } template + _LIBCPP_HIDE_FROM_ABI path& append(_InputIt __first, _InputIt __last) { typedef typename iterator_traits<_InputIt>::value_type _ItVal; static_assert(__can_convert_char<_ItVal>::value, "Must convertible"); @@ -595,37 +644,38 @@ class _LIBCPP_TYPE_VIS path { #endif // concatenation - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& operator+=(const path& __x) { __pn_ += __x.__pn_; return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& operator+=(const string_type& __x) { __pn_ += __x; return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& operator+=(__string_view __x) { __pn_ += __x; return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& operator+=(const value_type* __x) { __pn_ += __x; return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& operator+=(value_type __x) { __pn_ += __x; return *this; } template + _LIBCPP_HIDE_FROM_ABI typename enable_if<__can_convert_char<_ECharT>::value, path&>::type operator+=(_ECharT __x) { _PathCVT<_ECharT>::__append_source(__pn_, @@ -634,17 +684,20 @@ class _LIBCPP_TYPE_VIS path { } template + _LIBCPP_HIDE_FROM_ABI _EnableIfPathable<_Source> operator+=(const _Source& __x) { return this->concat(__x); } template + _LIBCPP_HIDE_FROM_ABI _EnableIfPathable<_Source> concat(const _Source& __x) { _SourceCVT<_Source>::__append_source(__pn_, __x); return *this; } template + _LIBCPP_HIDE_FROM_ABI path& concat(_InputIt __first, _InputIt __last) { typedef typename iterator_traits<_InputIt>::value_type _ItVal; _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); @@ -652,9 +705,10 @@ class _LIBCPP_TYPE_VIS path { } // modifiers - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI void clear() noexcept { __pn_.clear(); } + _LIBCPP_HIDE_FROM_ABI path& make_preferred() { #if defined(_LIBCPP_WIN32API) _VSTD::replace(__pn_.begin(), __pn_.end(), L'/', L'\\'); @@ -662,7 +716,7 @@ class _LIBCPP_TYPE_VIS path { return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI path& remove_filename() { auto __fname = __filename(); if (!__fname.empty()) @@ -670,6 +724,7 @@ class _LIBCPP_TYPE_VIS path { return *this; } + _LIBCPP_HIDE_FROM_ABI path& replace_filename(const path& __replacement) { remove_filename(); return (*this /= __replacement); @@ -677,25 +732,26 @@ class _LIBCPP_TYPE_VIS path { path& replace_extension(const path& __replacement = path()); - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI void swap(path& __rhs) noexcept { __pn_.swap(__rhs.__pn_); } // private helper to allow reserving memory in the path - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __s) { __pn_.reserve(__s); } // native format observers - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI const string_type& native() const noexcept { return __pn_; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_HIDE_FROM_ABI const value_type* c_str() const noexcept { return __pn_.c_str(); } - _LIBCPP_INLINE_VISIBILITY operator string_type() const { return __pn_; } + _LIBCPP_HIDE_FROM_ABI operator string_type() const { return __pn_; } #if defined(_LIBCPP_WIN32API) - _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { return __pn_; } + _LIBCPP_HIDE_FROM_ABI _VSTD::wstring wstring() const { return __pn_; } + _LIBCPP_HIDE_FROM_ABI _VSTD::wstring generic_wstring() const { _VSTD::wstring __s; __s.resize(__pn_.size()); @@ -706,6 +762,7 @@ class _LIBCPP_TYPE_VIS path { #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) template , class _Allocator = allocator<_ECharT> > + _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> string(const _Allocator& __a = _Allocator()) const { using _Str = basic_string<_ECharT, _Traits, _Allocator>; @@ -715,10 +772,10 @@ class _LIBCPP_TYPE_VIS path { return __s; } - _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { + _LIBCPP_HIDE_FROM_ABI _VSTD::string string() const { return string(); } - _LIBCPP_INLINE_VISIBILITY __u8_string u8string() const { + _LIBCPP_HIDE_FROM_ABI __u8_string u8string() const { using _CVT = __narrow_to_utf8; __u8_string __s; __s.reserve(__pn_.size()); @@ -726,16 +783,17 @@ class _LIBCPP_TYPE_VIS path { return __s; } - _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { + _LIBCPP_HIDE_FROM_ABI _VSTD::u16string u16string() const { return string(); } - _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { + _LIBCPP_HIDE_FROM_ABI _VSTD::u32string u32string() const { return string(); } // generic format observers template , class _Allocator = allocator<_ECharT> > + _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> generic_string(const _Allocator& __a = _Allocator()) const { using _Str = basic_string<_ECharT, _Traits, _Allocator>; @@ -748,9 +806,10 @@ class _LIBCPP_TYPE_VIS path { return __s; } - _VSTD::string generic_string() const { return generic_string(); } - _VSTD::u16string generic_u16string() const { return generic_string(); } - _VSTD::u32string generic_u32string() const { return generic_string(); } + _LIBCPP_HIDE_FROM_ABI _VSTD::string generic_string() const { return generic_string(); } + _LIBCPP_HIDE_FROM_ABI _VSTD::u16string generic_u16string() const { return generic_string(); } + _LIBCPP_HIDE_FROM_ABI _VSTD::u32string generic_u32string() const { return generic_string(); } + _LIBCPP_HIDE_FROM_ABI __u8_string generic_u8string() const { __u8_string __s = u8string(); _VSTD::replace(__s.begin(), __s.end(), '\\', '/'); @@ -759,16 +818,17 @@ class _LIBCPP_TYPE_VIS path { #endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ #else /* _LIBCPP_WIN32API */ - _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { return __pn_; } + _LIBCPP_HIDE_FROM_ABI _VSTD::string string() const { return __pn_; } #ifndef _LIBCPP_HAS_NO_CHAR8_T - _LIBCPP_INLINE_VISIBILITY _VSTD::u8string u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } + _LIBCPP_HIDE_FROM_ABI _VSTD::u8string u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } #else - _LIBCPP_INLINE_VISIBILITY _VSTD::string u8string() const { return __pn_; } + _LIBCPP_HIDE_FROM_ABI _VSTD::string u8string() const { return __pn_; } #endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) template , class _Allocator = allocator<_ECharT> > + _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> string(const _Allocator& __a = _Allocator()) const { using _CVT = __widen_from_utf8; @@ -780,39 +840,40 @@ class _LIBCPP_TYPE_VIS path { } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS - _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { + _LIBCPP_HIDE_FROM_ABI _VSTD::wstring wstring() const { return string(); } #endif - _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { + _LIBCPP_HIDE_FROM_ABI _VSTD::u16string u16string() const { return string(); } - _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { + _LIBCPP_HIDE_FROM_ABI _VSTD::u32string u32string() const { return string(); } #endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ // generic format observers - _VSTD::string generic_string() const { return __pn_; } + _LIBCPP_HIDE_FROM_ABI _VSTD::string generic_string() const { return __pn_; } #ifndef _LIBCPP_HAS_NO_CHAR8_T - _VSTD::u8string generic_u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } + _LIBCPP_HIDE_FROM_ABI _VSTD::u8string generic_u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } #else - _VSTD::string generic_u8string() const { return __pn_; } + _LIBCPP_HIDE_FROM_ABI _VSTD::string generic_u8string() const { return __pn_; } #endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) template , class _Allocator = allocator<_ECharT> > + _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> generic_string(const _Allocator& __a = _Allocator()) const { return string<_ECharT, _Traits, _Allocator>(__a); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS - _VSTD::wstring generic_wstring() const { return string(); } + _LIBCPP_HIDE_FROM_ABI _VSTD::wstring generic_wstring() const { return string(); } #endif - _VSTD::u16string generic_u16string() const { return string(); } - _VSTD::u32string generic_u32string() const { return string(); } + _LIBCPP_HIDE_FROM_ABI _VSTD::u16string generic_u16string() const { return string(); } + _LIBCPP_HIDE_FROM_ABI _VSTD::u32string generic_u32string() const { return string(); } #endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ #endif /* !_LIBCPP_WIN32API */ @@ -829,77 +890,77 @@ class _LIBCPP_TYPE_VIS path { public: // compare - _LIBCPP_INLINE_VISIBILITY int compare(const path& __p) const noexcept { + _LIBCPP_HIDE_FROM_ABI int compare(const path& __p) const noexcept { return __compare(__p.__pn_); } - _LIBCPP_INLINE_VISIBILITY int compare(const string_type& __s) const { + _LIBCPP_HIDE_FROM_ABI int compare(const string_type& __s) const { return __compare(__s); } - _LIBCPP_INLINE_VISIBILITY int compare(__string_view __s) const { + _LIBCPP_HIDE_FROM_ABI int compare(__string_view __s) const { return __compare(__s); } - _LIBCPP_INLINE_VISIBILITY int compare(const value_type* __s) const { + _LIBCPP_HIDE_FROM_ABI int compare(const value_type* __s) const { return __compare(__s); } // decomposition - _LIBCPP_INLINE_VISIBILITY path root_name() const { + _LIBCPP_HIDE_FROM_ABI path root_name() const { return string_type(__root_name()); } - _LIBCPP_INLINE_VISIBILITY path root_directory() const { + _LIBCPP_HIDE_FROM_ABI path root_directory() const { return string_type(__root_directory()); } - _LIBCPP_INLINE_VISIBILITY path root_path() const { + _LIBCPP_HIDE_FROM_ABI path root_path() const { #if defined(_LIBCPP_WIN32API) return string_type(__root_path_raw()); #else return root_name().append(string_type(__root_directory())); #endif } - _LIBCPP_INLINE_VISIBILITY path relative_path() const { + _LIBCPP_HIDE_FROM_ABI path relative_path() const { return string_type(__relative_path()); } - _LIBCPP_INLINE_VISIBILITY path parent_path() const { + _LIBCPP_HIDE_FROM_ABI path parent_path() const { return string_type(__parent_path()); } - _LIBCPP_INLINE_VISIBILITY path filename() const { + _LIBCPP_HIDE_FROM_ABI path filename() const { return string_type(__filename()); } - _LIBCPP_INLINE_VISIBILITY path stem() const { return string_type(__stem()); } - _LIBCPP_INLINE_VISIBILITY path extension() const { + _LIBCPP_HIDE_FROM_ABI path stem() const { return string_type(__stem()); } + _LIBCPP_HIDE_FROM_ABI path extension() const { return string_type(__extension()); } // query - _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY bool + _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __pn_.empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_root_name() const { + _LIBCPP_HIDE_FROM_ABI bool has_root_name() const { return !__root_name().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_root_directory() const { + _LIBCPP_HIDE_FROM_ABI bool has_root_directory() const { return !__root_directory().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_root_path() const { + _LIBCPP_HIDE_FROM_ABI bool has_root_path() const { return !__root_path_raw().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_relative_path() const { + _LIBCPP_HIDE_FROM_ABI bool has_relative_path() const { return !__relative_path().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_parent_path() const { + _LIBCPP_HIDE_FROM_ABI bool has_parent_path() const { return !__parent_path().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_filename() const { + _LIBCPP_HIDE_FROM_ABI bool has_filename() const { return !__filename().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_stem() const { return !__stem().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_extension() const { + _LIBCPP_HIDE_FROM_ABI bool has_stem() const { return !__stem().empty(); } + _LIBCPP_HIDE_FROM_ABI bool has_extension() const { return !__extension().empty(); } - _LIBCPP_INLINE_VISIBILITY bool is_absolute() const { + _LIBCPP_HIDE_FROM_ABI bool is_absolute() const { #if defined(_LIBCPP_WIN32API) __string_view __root_name_str = __root_name(); __string_view __root_dir = __root_directory(); @@ -923,13 +984,13 @@ class _LIBCPP_TYPE_VIS path { return has_root_directory(); #endif } - _LIBCPP_INLINE_VISIBILITY bool is_relative() const { return !is_absolute(); } + _LIBCPP_HIDE_FROM_ABI bool is_relative() const { return !is_absolute(); } // relative paths path lexically_normal() const; path lexically_relative(const path& __base) const; - _LIBCPP_INLINE_VISIBILITY path lexically_proximate(const path& __base) const { + _LIBCPP_HIDE_FROM_ABI path lexically_proximate(const path& __base) const { path __result = this->lexically_relative(__base); if (__result.native().empty()) return *this; @@ -945,7 +1006,7 @@ class _LIBCPP_TYPE_VIS path { #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) template - _LIBCPP_INLINE_VISIBILITY friend + _LIBCPP_HIDE_FROM_ABI friend typename enable_if::value && is_same<_Traits, char_traits >::value, basic_ostream<_CharT, _Traits>&>::type @@ -955,7 +1016,7 @@ class _LIBCPP_TYPE_VIS path { } template - _LIBCPP_INLINE_VISIBILITY friend + _LIBCPP_HIDE_FROM_ABI friend typename enable_if::value || !is_same<_Traits, char_traits >::value, basic_ostream<_CharT, _Traits>&>::type @@ -965,7 +1026,7 @@ class _LIBCPP_TYPE_VIS path { } template - _LIBCPP_INLINE_VISIBILITY friend basic_istream<_CharT, _Traits>& + _LIBCPP_HIDE_FROM_ABI friend basic_istream<_CharT, _Traits>& operator>>(basic_istream<_CharT, _Traits>& __is, path& __p) { basic_string<_CharT, _Traits> __tmp; __is >> _VSTD::__quoted(__tmp); @@ -974,33 +1035,32 @@ class _LIBCPP_TYPE_VIS path { } #endif // !_LIBCPP_HAS_NO_LOCALIZATION - friend _LIBCPP_INLINE_VISIBILITY bool operator==(const path& __lhs, const path& __rhs) noexcept { + friend _LIBCPP_HIDE_FROM_ABI bool operator==(const path& __lhs, const path& __rhs) noexcept { return __lhs.__compare(__rhs.__pn_) == 0; } - friend _LIBCPP_INLINE_VISIBILITY bool operator!=(const path& __lhs, const path& __rhs) noexcept { + friend _LIBCPP_HIDE_FROM_ABI bool operator!=(const path& __lhs, const path& __rhs) noexcept { return __lhs.__compare(__rhs.__pn_) != 0; } - friend _LIBCPP_INLINE_VISIBILITY bool operator<(const path& __lhs, const path& __rhs) noexcept { + friend _LIBCPP_HIDE_FROM_ABI bool operator<(const path& __lhs, const path& __rhs) noexcept { return __lhs.__compare(__rhs.__pn_) < 0; } - friend _LIBCPP_INLINE_VISIBILITY bool operator<=(const path& __lhs, const path& __rhs) noexcept { + friend _LIBCPP_HIDE_FROM_ABI bool operator<=(const path& __lhs, const path& __rhs) noexcept { return __lhs.__compare(__rhs.__pn_) <= 0; } - friend _LIBCPP_INLINE_VISIBILITY bool operator>(const path& __lhs, const path& __rhs) noexcept { + friend _LIBCPP_HIDE_FROM_ABI bool operator>(const path& __lhs, const path& __rhs) noexcept { return __lhs.__compare(__rhs.__pn_) > 0; } - friend _LIBCPP_INLINE_VISIBILITY bool operator>=(const path& __lhs, const path& __rhs) noexcept { + friend _LIBCPP_HIDE_FROM_ABI bool operator>=(const path& __lhs, const path& __rhs) noexcept { return __lhs.__compare(__rhs.__pn_) >= 0; } - friend _LIBCPP_INLINE_VISIBILITY path operator/(const path& __lhs, - const path& __rhs) { + friend _LIBCPP_HIDE_FROM_ABI path operator/(const path& __lhs, const path& __rhs) { path __result(__lhs); __result /= __rhs; return __result; } private: - inline _LIBCPP_INLINE_VISIBILITY path& + inline _LIBCPP_HIDE_FROM_ABI path& __assign_view(__string_view const& __s) noexcept { __pn_ = string_type(__s); return *this; @@ -1008,7 +1068,7 @@ class _LIBCPP_TYPE_VIS path { string_type __pn_; }; -inline _LIBCPP_INLINE_VISIBILITY void swap(path& __lhs, path& __rhs) noexcept { +inline _LIBCPP_HIDE_FROM_ABI void swap(path& __lhs, path& __rhs) noexcept { __lhs.swap(__rhs); } diff --git a/libcxx/include/any b/libcxx/include/any index bd0ea5677d8419..308a49e810369e 100644 --- a/libcxx/include/any +++ b/libcxx/include/any @@ -664,6 +664,7 @@ _RetType __pointer_or_func_cast(void*, /*IsFunction*/true_type) noexcept { } template +_LIBCPP_HIDE_FROM_ABI add_pointer_t<_ValueType> any_cast(any * __any) _NOEXCEPT { From 719bf2d9d9fdb8792b15d93364c9c6b2ddd68a36 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 May 2022 11:12:45 -0400 Subject: [PATCH 728/908] [runtimes] Officially deprecate the legacy testing configuration system Add a warning and tweak the release note to explain that the deprecation targets libc++, libc++abi and libuwnind as well. Also, as a fly-by, ensure that our CI runs the legacy testing configuration for libc++, libc++abi and libunwind. This doesn't matter too much since it's deprecated, but we might as well test it properly. Differential Revision: https://reviews.llvm.org/D126478 --- libcxx/docs/ReleaseNotes.rst | 9 +++++---- libcxx/test/configs/legacy.cfg.in | 3 +++ libcxx/utils/ci/run-buildbot | 4 +++- libcxxabi/test/lit.site.cfg.in | 3 +++ libunwind/test/lit.site.cfg.in | 3 +++ 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst index 514de5de3213de..b2bd0718001e55 100644 --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -148,7 +148,8 @@ Build System Changes its own headers. - The legacy testing configuration is now deprecated and will be removed in the next release. For - most users, this should not have any impact. However, if you are testing libc++ in a configuration - or on a platform that used to be supported by the legacy testing configuration and isn't supported - by one of the configurations in ``libcxx/test/configs``, please reach out to the libc++ developers - to get your configuration supported officially. + most users, this should not have any impact. However, if you are testing libc++, libc++abi or + libunwind in a configuration or on a platform that used to be supported by the legacy testing + configuration and isn't supported by one of the configurations in ``libcxx/test/configs``, + ``libcxxabi/test/configs`` or ``libunwind/test/configs``, please move to one of those + configurations or define your own. diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in index 8bb5bc76aa5ee3..a7ac6f8042402c 100644 --- a/libcxx/test/configs/legacy.cfg.in +++ b/libcxx/test/configs/legacy.cfg.in @@ -62,3 +62,6 @@ import libcxx.test.config configuration = libcxx.test.config.Configuration(lit_config, config) configuration.configure() configuration.print_config_info() + +lit_config.warning("This is a legacy testing configuration which will be removed in LLVM 16. " + "Please use one of the configurations in libcxx/test/configs or define your own.") diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 23708e680893fa..2b9f129fd25886 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -462,7 +462,9 @@ bootstrapping-build) ;; legacy-test-config) clean - generate-cmake -DLIBCXX_TEST_CONFIG="legacy.cfg.in" + generate-cmake -DLIBCXX_TEST_CONFIG="legacy.cfg.in" \ + -DLIBCXXABI_TEST_CONFIG="${MONOREPO_ROOT}/libcxxabi/test/lit.site.cfg.in" \ + -DLIBUNWIND_TEST_CONFIG="${MONOREPO_ROOT}/libunwind/test/lit.site.cfg.in" check-runtimes ;; legacy-project-build) diff --git a/libcxxabi/test/lit.site.cfg.in b/libcxxabi/test/lit.site.cfg.in index ae387f55e03560..7a786a3a7691e3 100644 --- a/libcxxabi/test/lit.site.cfg.in +++ b/libcxxabi/test/lit.site.cfg.in @@ -60,3 +60,6 @@ import libcxxabi.test.config configuration = libcxxabi.test.config.Configuration(lit_config, config) configuration.configure() configuration.print_config_info() + +lit_config.warning("This is a legacy testing configuration which will be removed in LLVM 16. " + "Please use one of the configurations in libcxxabi/test/configs or define your own.") diff --git a/libunwind/test/lit.site.cfg.in b/libunwind/test/lit.site.cfg.in index 9fb324007a55aa..31530935ce8760 100644 --- a/libunwind/test/lit.site.cfg.in +++ b/libunwind/test/lit.site.cfg.in @@ -56,3 +56,6 @@ import libunwind.test.config configuration = libunwind.test.config.Configuration(lit_config, config) configuration.configure() configuration.print_config_info() + +lit_config.warning("This is a legacy testing configuration which will be removed in LLVM 16. " + "Please use one of the configurations in libunwind/test/configs or define your own.") From 8e724ad96565de7f91d7d9f89c24d0086086a903 Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Fri, 27 May 2022 10:15:18 -0700 Subject: [PATCH 729/908] [TSan][Darwin] Deflake test The asserted order of THREAD_DESTROY and end of main() is not guaranteed: ``` 7: Hello from pthread 8: THREAD_TERMINATE 0x7e8000104000, self: 0x7e8000104000, name: child thread 9: Done. 10: THREAD_DESTROY 0x7e8000104000, self: 0x7e8000104000, name: child thread ``` Resulting in: ``` error: CHECK: expected string not found in input // CHECK: Done. ``` Remove checking for "Done." (end of main()) to deflake this test. Alternatively, we could use `CHECK-DAG`. rdar://94036145 --- compiler-rt/test/tsan/Darwin/dyld-insert-libraries.c | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler-rt/test/tsan/Darwin/dyld-insert-libraries.c b/compiler-rt/test/tsan/Darwin/dyld-insert-libraries.c index a52f4dea07bb18..2e28a23c227bc9 100644 --- a/compiler-rt/test/tsan/Darwin/dyld-insert-libraries.c +++ b/compiler-rt/test/tsan/Darwin/dyld-insert-libraries.c @@ -110,4 +110,3 @@ int main() { // CHECK: Hello from pthread // CHECK: THREAD_TERMINATE [[CHILD]], self: [[CHILD]], name: child thread // CHECK: THREAD_DESTROY [[CHILD]] -// CHECK: Done. From 5df2893a9a1355b6a19ef04f79c5502d67e2caa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 28 Apr 2022 17:52:44 -0500 Subject: [PATCH 730/908] AMDGPU: Add G_AMDGPU_MAD_64_32 instructions These generic instructions are trivially selected to V_MAD_[IU]64_[IU]32 instructions when run on the VALU. When at least both factors are scalar, it is usually better to execute some or all of the instruction on the SALU. To this end, we lower the instruction to simpler instructions that are supported on the SALU when applying the register bank mapping. Differential Revision: https://reviews.llvm.org/D124843 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 16 + .../Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 197 +++++++ .../Target/AMDGPU/AMDGPURegisterBankInfo.h | 2 + llvm/lib/Target/AMDGPU/SIInstructions.td | 13 + .../GlobalISel/inst-select-mad_64_32.mir | 48 ++ .../GlobalISel/regbankselect-mad_64_32.mir | 550 ++++++++++++++++++ 7 files changed, 827 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 2e507b31ec813e..defd8c3496ac04 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -458,6 +458,19 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE( return true; } +bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32( + MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32; + + I.setDesc(TII.get(IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 + : AMDGPU::V_MAD_I64_I32_e64)); + I.addOperand(*MF, MachineOperand::CreateImm(0)); + I.addImplicitDefUseOperands(*MF); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + // TODO: We should probably legalize these to only using 32-bit results. bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); @@ -3335,6 +3348,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_UADDE: case TargetOpcode::G_USUBE: return selectG_UADDO_USUBO_UADDE_USUBE(I); + case AMDGPU::G_AMDGPU_MAD_U64_U32: + case AMDGPU::G_AMDGPU_MAD_I64_I32: + return selectG_AMDGPU_MAD_64_32(I); case TargetOpcode::G_INTTOPTR: case TargetOpcode::G_BITCAST: case TargetOpcode::G_PTRTOINT: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 04c769a7ce924d..6a101f950abf92 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -97,6 +97,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool selectG_AND_OR_XOR(MachineInstr &I) const; bool selectG_ADD_SUB(MachineInstr &I) const; bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const; + bool selectG_AMDGPU_MAD_64_32(MachineInstr &I) const; bool selectG_EXTRACT(MachineInstr &I) const; bool selectG_MERGE_VALUES(MachineInstr &I) const; bool selectG_UNMERGE_VALUES(MachineInstr &I) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 6cac8d98ec7aa4..90c21863186603 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1555,6 +1555,157 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper, return true; } +bool AMDGPURegisterBankInfo::applyMappingMAD_64_32( + const OperandsMapper &OpdMapper) const { + MachineInstr &MI = OpdMapper.getMI(); + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + + // Insert basic copies. + applyDefaultMapping(OpdMapper); + + Register Dst0 = MI.getOperand(0).getReg(); + Register Dst1 = MI.getOperand(1).getReg(); + Register Src0 = MI.getOperand(2).getReg(); + Register Src1 = MI.getOperand(3).getReg(); + Register Src2 = MI.getOperand(4).getReg(); + + if (MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank) + return true; + + bool IsUnsigned = MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32; + LLT S1 = LLT::scalar(1); + LLT S32 = LLT::scalar(32); + + bool DstOnValu = MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank; + bool Accumulate = true; + + if (!DstOnValu) { + if (mi_match(Src2, MRI, m_ZeroInt())) + Accumulate = false; + } + + // Keep the multiplication on the SALU. + MachineIRBuilder B(MI); + + Register DstHi; + Register DstLo = B.buildMul(S32, Src0, Src1).getReg(0); + bool MulHiInVgpr = false; + + MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank); + + if (Subtarget.hasSMulHi()) { + DstHi = IsUnsigned ? B.buildUMulH(S32, Src0, Src1).getReg(0) + : B.buildSMulH(S32, Src0, Src1).getReg(0); + MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank); + } else { + Register VSrc0 = B.buildCopy(S32, Src0).getReg(0); + Register VSrc1 = B.buildCopy(S32, Src1).getReg(0); + + MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank); + MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank); + + DstHi = IsUnsigned ? B.buildUMulH(S32, VSrc0, VSrc1).getReg(0) + : B.buildSMulH(S32, VSrc0, VSrc1).getReg(0); + MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank); + + if (!DstOnValu) { + DstHi = buildReadFirstLane(B, MRI, DstHi); + } else { + MulHiInVgpr = true; + } + } + + // Accumulate and produce the "carry-out" bit. + // + // The "carry-out" is defined as bit 64 of the result when computed as a + // big integer. For unsigned multiply-add, this matches the usual definition + // of carry-out. For signed multiply-add, bit 64 is the sign bit of the + // result, which is determined as: + // sign(Src0 * Src1) + sign(Src2) + carry-out from unsigned 64-bit add + LLT CarryType = DstOnValu ? S1 : S32; + const RegisterBank &CarryBank = + DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank; + const RegisterBank &DstBank = + DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank; + Register Carry; + Register Zero; + + if (!IsUnsigned) { + Zero = B.buildConstant(S32, 0).getReg(0); + MRI.setRegBank(Zero, + MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank); + + Carry = B.buildICmp(CmpInst::ICMP_SLT, MulHiInVgpr ? S1 : S32, DstHi, Zero) + .getReg(0); + MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank + : AMDGPU::SGPRRegBank); + + if (DstOnValu && !MulHiInVgpr) { + Carry = B.buildTrunc(S1, Carry).getReg(0); + MRI.setRegBank(Carry, AMDGPU::VCCRegBank); + } + } + + if (Accumulate) { + if (DstOnValu) { + DstLo = B.buildCopy(S32, DstLo).getReg(0); + DstHi = B.buildCopy(S32, DstHi).getReg(0); + MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank); + MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank); + } + + auto Unmerge = B.buildUnmerge(S32, Src2); + Register Src2Lo = Unmerge.getReg(0); + Register Src2Hi = Unmerge.getReg(1); + MRI.setRegBank(Src2Lo, DstBank); + MRI.setRegBank(Src2Hi, DstBank); + + if (!IsUnsigned) { + auto Src2Sign = B.buildICmp(CmpInst::ICMP_SLT, CarryType, Src2Hi, Zero); + MRI.setRegBank(Src2Sign.getReg(0), CarryBank); + + Carry = B.buildXor(CarryType, Carry, Src2Sign).getReg(0); + MRI.setRegBank(Carry, CarryBank); + } + + auto AddLo = B.buildUAddo(S32, CarryType, DstLo, Src2Lo); + DstLo = AddLo.getReg(0); + Register CarryLo = AddLo.getReg(1); + MRI.setRegBank(DstLo, DstBank); + MRI.setRegBank(CarryLo, CarryBank); + + auto AddHi = B.buildUAdde(S32, CarryType, DstHi, Src2Hi, CarryLo); + DstHi = AddHi.getReg(0); + MRI.setRegBank(DstHi, DstBank); + + Register CarryHi = AddHi.getReg(1); + MRI.setRegBank(CarryHi, CarryBank); + + if (IsUnsigned) { + Carry = CarryHi; + } else { + Carry = B.buildXor(CarryType, Carry, CarryHi).getReg(0); + MRI.setRegBank(Carry, CarryBank); + } + } else { + if (IsUnsigned) { + Carry = B.buildConstant(CarryType, 0).getReg(0); + MRI.setRegBank(Carry, CarryBank); + } + } + + B.buildMerge(Dst0, {DstLo, DstHi}); + + if (DstOnValu) { + B.buildCopy(Dst1, Carry); + } else { + B.buildTrunc(Dst1, Carry); + } + + MI.eraseFromParent(); + return true; +} + // Return a suitable opcode for extending the operands of Opc when widening. static unsigned getExtendOp(unsigned Opc) { switch (Opc) { @@ -3093,6 +3244,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_UBFX: applyMappingBFE(OpdMapper, /*Signed*/ false); return; + case AMDGPU::G_AMDGPU_MAD_U64_U32: + case AMDGPU::G_AMDGPU_MAD_I64_I32: + applyMappingMAD_64_32(OpdMapper); + return; default: break; } @@ -3618,6 +3773,48 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getDefaultMappingSOP(MI); return getDefaultMappingVOP(MI); } + case AMDGPU::G_AMDGPU_MAD_U64_U32: + case AMDGPU::G_AMDGPU_MAD_I64_I32: { + // Three possible mappings: + // + // - Default SOP + // - Default VOP + // - Scalar multiply: src0 and src1 are SGPRs, the rest is VOP. + // + // This allows instruction selection to keep the multiplication part of the + // instruction on the SALU. + bool AllSalu = true; + bool MulSalu = true; + for (unsigned i = 0; i < 5; ++i) { + Register Reg = MI.getOperand(i).getReg(); + if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) { + if (Bank->getID() != AMDGPU::SGPRRegBankID) { + AllSalu = false; + if (i == 2 || i == 3) { + MulSalu = false; + break; + } + } + } + } + + if (AllSalu) + return getDefaultMappingSOP(MI); + + // If the multiply-add is full-rate in VALU, use that even if the + // multiplication part is scalar. Accumulating separately on the VALU would + // take two instructions. + if (!MulSalu || Subtarget.hasFullRate64Ops()) + return getDefaultMappingVOP(MI); + + // Keep the multiplication on the SALU, then accumulate on the VALU. + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); + OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); + OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64); + break; + } case AMDGPU::G_IMPLICIT_DEF: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 4079f34d18b391..c9741c2202e652 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -86,6 +86,8 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo { bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const; + bool applyMappingMAD_64_32(const OperandsMapper &OpdMapper) const; + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index d248992ad0c287..e39763a5ebe00d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3110,6 +3110,19 @@ def G_AMDGPU_CLAMP : AMDGPUGenericInstruction { let hasSideEffects = 0; } +// Integer multiply-add: arg0 * arg1 + arg2. +// +// arg0 and arg1 are 32-bit integers (interpreted as signed or unsigned), +// arg2 is a 64-bit integer. Result is a 64-bit integer and a 1-bit carry-out. +class G_AMDGPU_MAD_64_32 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst, type1:$carry_out); + let InOperandList = (ins type2:$arg0, type2:$arg1, type0:$arg2); + let hasSideEffects = 0; +} + +def G_AMDGPU_MAD_U64_U32 : G_AMDGPU_MAD_64_32; +def G_AMDGPU_MAD_I64_I32 : G_AMDGPU_MAD_64_32; + // Atomic cmpxchg. $cmpval ad $newval are packed in a single vector // operand Expects a MachineMemOperand in addition to explicit // operands. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir new file mode 100644 index 00000000000000..3ce440a66d8790 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir @@ -0,0 +1,48 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s + +--- +name: mad_u64_u32_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GCN-LABEL: name: mad_u64_u32_vvv + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 + ; GCN-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_e64_]], implicit [[V_MAD_U64_U32_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = COPY $vgpr3 + %4:vgpr(s64) = G_MERGE_VALUES %2, %3 + %5:vgpr(s64), %6:vcc(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %3 + S_ENDPGM 0, implicit %5, implicit %6 +... + +--- +name: mad_i64_i32_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GCN-LABEL: name: mad_i64_i32_vvv + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 + ; GCN-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_e64_]], implicit [[V_MAD_I64_I32_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = COPY $vgpr3 + %4:vgpr(s64) = G_MERGE_VALUES %2, %3 + %5:vgpr(s64), %6:vcc(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %3 + S_ENDPGM 0, implicit %5, implicit %6 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir new file mode 100644 index 00000000000000..88b0bd14276fe5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir @@ -0,0 +1,550 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX9MI %s +# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX10 %s + +--- +name: mad_u64_u32_sss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; + ; + ; GFX8-LABEL: name: mad_u64_u32_sss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec + ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) + ; GFX9MI-LABEL: name: mad_u64_u32_sss + ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] + ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] + ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) + ; GFX10-LABEL: name: mad_u64_u32_sss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 +... + +--- +name: mad_u64_u32_ssv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; + ; + ; GFX8-LABEL: name: mad_u64_u32_ssv + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + ; GFX9MI-LABEL: name: mad_u64_u32_ssv + ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX10-LABEL: name: mad_u64_u32_ssv + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 +... + +--- +name: mad_u64_u32_svs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 + ; + ; + ; CHECK-LABEL: name: mad_u64_u32_svs + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[COPY5]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = COPY $sgpr2 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 +... + +--- +name: mad_u64_u32_svv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 + ; + ; + ; CHECK-LABEL: name: mad_u64_u32_svv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[MV]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 +... + +--- +name: mad_u64_u32_vss +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 + ; + ; + ; CHECK-LABEL: name: mad_u64_u32_vss + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[COPY5]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = COPY $sgpr2 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 +... + +--- +name: mad_u64_u32_vsv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2 + ; + ; + ; CHECK-LABEL: name: mad_u64_u32_vsv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[MV]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 +... + +--- +name: mad_u64_u32_vvs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1 + ; + ; + ; CHECK-LABEL: name: mad_u64_u32_vvs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY4]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = COPY $sgpr2 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 +... + +--- +name: mad_u64_u32_vvv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; + ; + ; CHECK-LABEL: name: mad_u64_u32_vvv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[MV]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 +... + +--- +name: mad_i64_i32_sss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; + ; + ; GFX8-LABEL: name: mad_i64_i32_sss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec + ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C]] + ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) + ; GFX9MI-LABEL: name: mad_i64_i32_sss + ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] + ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX9MI-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] + ; GFX9MI-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] + ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] + ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]] + ; GFX9MI-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] + ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) + ; GFX10-LABEL: name: mad_i64_i32_sss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4 +... + +--- +name: mad_i64_i32_ssv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; + ; + ; GFX8-LABEL: name: mad_i64_i32_ssv + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) + ; GFX9MI-LABEL: name: mad_i64_i32_ssv + ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX10-LABEL: name: mad_i64_i32_ssv + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vcc(s1) = G_TRUNC [[ICMP]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[TRUNC]], [[ICMP1]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(s64) = G_MERGE_VALUES %2, %3 + %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4 +... + +--- +name: mad_u64_u32_ss0 +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; + ; + ; GFX8-LABEL: name: mad_u64_u32_ss0 + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY2]], [[COPY3]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec + ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32) + ; GFX9MI-LABEL: name: mad_u64_u32_ss0 + ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[UMULH]](s32) + ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32) + ; GFX10-LABEL: name: mad_u64_u32_ss0 + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[UMULH]](s32) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 +... + +--- +name: mad_u64_u32_vv0 +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; + ; + ; CHECK-LABEL: name: mad_u64_u32_vv0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY2]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 +... + +--- +name: mad_i64_i32_ss0 +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; + ; + ; GFX8-LABEL: name: mad_i64_i32_ss0 + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY2]], [[COPY3]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec + ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; GFX9MI-LABEL: name: mad_i64_i32_ss0 + ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C1]] + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[SMULH]](s32) + ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; GFX10-LABEL: name: mad_i64_i32_ss0 + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[SMULH]](s32) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2 +... + +--- +name: mad_i64_i32_vv0 +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; + ; + ; CHECK-LABEL: name: mad_i64_i32_vv0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY]](s32), [[COPY1]], [[COPY2]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2 +... From aaad507546a5cc788aabc907ec47bbbfb8283e8e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 27 May 2022 10:16:32 -0700 Subject: [PATCH 731/908] [RISCV] Return false from isOffsetFoldingLegal instead of reversing the fold in lowering. When lowering GlobalAddressNodes, we were removing a non-zero offset and creating a separate ADD. It already comes out of SelectionDAGBuilder with a separate ADD. The ADD was being removed by DAGCombiner. This patch disables the DAG combine so we don't have to reverse it. Test changes all look to be instruction order changes. Probably due to different DAG node ordering. Differential Revision: https://reviews.llvm.org/D126558 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 31 +++++++------------ llvm/lib/Target/RISCV/RISCVISelLowering.h | 1 + llvm/test/CodeGen/RISCV/double-mem.ll | 12 +++---- llvm/test/CodeGen/RISCV/float-mem.ll | 12 +++---- llvm/test/CodeGen/RISCV/half-mem.ll | 12 +++---- .../CodeGen/RISCV/hoist-global-addr-base.ll | 10 +++--- ...op-strength-reduce-add-cheaper-than-mul.ll | 4 +-- .../RISCV/loop-strength-reduce-loop-invar.ll | 4 +-- llvm/test/CodeGen/RISCV/mem.ll | 6 ++-- llvm/test/CodeGen/RISCV/mem64.ll | 6 ++-- .../CodeGen/RISCV/zext-with-load-is-free.ll | 24 +++++++------- 11 files changed, 58 insertions(+), 64 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4f74f3f01795c1..17c8870f24192a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1231,6 +1231,15 @@ bool RISCVTargetLowering::shouldSinkOperands( return true; } +bool RISCVTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // In order to maximise the opportunity for common subexpression elimination, + // keep a separate ADD node for the global address offset instead of folding + // it in the global address node. Later peephole optimisations may choose to + // fold it back in when profitable. + return false; +} + bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin. @@ -3578,21 +3587,12 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, SDLoc DL(Op); EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); - int64_t Offset = N->getOffset(); + assert(N->getOffset() == 0 && "unexpected offset in global node"); MVT XLenVT = Subtarget.getXLenVT(); const GlobalValue *GV = N->getGlobal(); bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); - SDValue Addr = getAddr(N, DAG, IsLocal); - - // In order to maximise the opportunity for common subexpression elimination, - // emit a separate ADD node for the global address offset instead of folding - // it in the global address node. Later peephole optimisations may choose to - // fold it back in when profitable. - if (Offset != 0) - return DAG.getNode(ISD::ADD, DL, Ty, Addr, - DAG.getConstant(Offset, DL, XLenVT)); - return Addr; + return getAddr(N, DAG, IsLocal); } SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, @@ -3701,7 +3701,7 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, SDLoc DL(Op); EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); - int64_t Offset = N->getOffset(); + assert(N->getOffset() == 0 && "unexpected offset in global node"); MVT XLenVT = Subtarget.getXLenVT(); TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); @@ -3724,13 +3724,6 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, break; } - // In order to maximise the opportunity for common subexpression elimination, - // emit a separate ADD node for the global address offset instead of folding - // it in the global address node. Later peephole optimisations may choose to - // fold it back in when profitable. - if (Offset != 0) - return DAG.getNode(ISD::ADD, DL, Ty, Addr, - DAG.getConstant(Offset, DL, XLenVT)); return Addr; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 1c461fb38ceb0e..b3e9ec6c117c87 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -354,6 +354,7 @@ class RISCVTargetLowering : public TargetLowering { SelectionDAG &DAG) const override; bool shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll index ba0b652ddffc7c..fc42f2a2364537 100644 --- a/llvm/test/CodeGen/RISCV/double-mem.ll +++ b/llvm/test/CodeGen/RISCV/double-mem.ll @@ -59,10 +59,10 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind { ; RV32IFD-NEXT: fadd.d fa0, fa0, fa1 ; RV32IFD-NEXT: lui a0, %hi(G) ; RV32IFD-NEXT: fld ft0, %lo(G)(a0) +; RV32IFD-NEXT: addi a1, a0, %lo(G) ; RV32IFD-NEXT: fsd fa0, %lo(G)(a0) -; RV32IFD-NEXT: addi a0, a0, %lo(G) -; RV32IFD-NEXT: fld ft0, 72(a0) -; RV32IFD-NEXT: fsd fa0, 72(a0) +; RV32IFD-NEXT: fld ft0, 72(a1) +; RV32IFD-NEXT: fsd fa0, 72(a1) ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fld_fsd_global: @@ -70,10 +70,10 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind { ; RV64IFD-NEXT: fadd.d fa0, fa0, fa1 ; RV64IFD-NEXT: lui a0, %hi(G) ; RV64IFD-NEXT: fld ft0, %lo(G)(a0) +; RV64IFD-NEXT: addi a1, a0, %lo(G) ; RV64IFD-NEXT: fsd fa0, %lo(G)(a0) -; RV64IFD-NEXT: addi a0, a0, %lo(G) -; RV64IFD-NEXT: fld ft0, 72(a0) -; RV64IFD-NEXT: fsd fa0, 72(a0) +; RV64IFD-NEXT: fld ft0, 72(a1) +; RV64IFD-NEXT: fsd fa0, 72(a1) ; RV64IFD-NEXT: ret ; Use %a and %b in an FP op to ensure floating point registers are used, even ; for the soft float ABI diff --git a/llvm/test/CodeGen/RISCV/float-mem.ll b/llvm/test/CodeGen/RISCV/float-mem.ll index fe1e9bcf431b8f..f90272c816afb2 100644 --- a/llvm/test/CodeGen/RISCV/float-mem.ll +++ b/llvm/test/CodeGen/RISCV/float-mem.ll @@ -61,10 +61,10 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind { ; RV32IF-NEXT: fadd.s fa0, fa0, fa1 ; RV32IF-NEXT: lui a0, %hi(G) ; RV32IF-NEXT: flw ft0, %lo(G)(a0) +; RV32IF-NEXT: addi a1, a0, %lo(G) ; RV32IF-NEXT: fsw fa0, %lo(G)(a0) -; RV32IF-NEXT: addi a0, a0, %lo(G) -; RV32IF-NEXT: flw ft0, 36(a0) -; RV32IF-NEXT: fsw fa0, 36(a0) +; RV32IF-NEXT: flw ft0, 36(a1) +; RV32IF-NEXT: fsw fa0, 36(a1) ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: flw_fsw_global: @@ -72,10 +72,10 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind { ; RV64IF-NEXT: fadd.s fa0, fa0, fa1 ; RV64IF-NEXT: lui a0, %hi(G) ; RV64IF-NEXT: flw ft0, %lo(G)(a0) +; RV64IF-NEXT: addi a1, a0, %lo(G) ; RV64IF-NEXT: fsw fa0, %lo(G)(a0) -; RV64IF-NEXT: addi a0, a0, %lo(G) -; RV64IF-NEXT: flw ft0, 36(a0) -; RV64IF-NEXT: fsw fa0, 36(a0) +; RV64IF-NEXT: flw ft0, 36(a1) +; RV64IF-NEXT: fsw fa0, 36(a1) ; RV64IF-NEXT: ret %1 = fadd float %a, %b %2 = load volatile float, float* @G diff --git a/llvm/test/CodeGen/RISCV/half-mem.ll b/llvm/test/CodeGen/RISCV/half-mem.ll index 3e99e17057660e..9049b92946a622 100644 --- a/llvm/test/CodeGen/RISCV/half-mem.ll +++ b/llvm/test/CodeGen/RISCV/half-mem.ll @@ -61,10 +61,10 @@ define half @flh_fsh_global(half %a, half %b) nounwind { ; RV32IZFH-NEXT: fadd.h fa0, fa0, fa1 ; RV32IZFH-NEXT: lui a0, %hi(G) ; RV32IZFH-NEXT: flh ft0, %lo(G)(a0) +; RV32IZFH-NEXT: addi a1, a0, %lo(G) ; RV32IZFH-NEXT: fsh fa0, %lo(G)(a0) -; RV32IZFH-NEXT: addi a0, a0, %lo(G) -; RV32IZFH-NEXT: flh ft0, 18(a0) -; RV32IZFH-NEXT: fsh fa0, 18(a0) +; RV32IZFH-NEXT: flh ft0, 18(a1) +; RV32IZFH-NEXT: fsh fa0, 18(a1) ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: flh_fsh_global: @@ -72,10 +72,10 @@ define half @flh_fsh_global(half %a, half %b) nounwind { ; RV64IZFH-NEXT: fadd.h fa0, fa0, fa1 ; RV64IZFH-NEXT: lui a0, %hi(G) ; RV64IZFH-NEXT: flh ft0, %lo(G)(a0) +; RV64IZFH-NEXT: addi a1, a0, %lo(G) ; RV64IZFH-NEXT: fsh fa0, %lo(G)(a0) -; RV64IZFH-NEXT: addi a0, a0, %lo(G) -; RV64IZFH-NEXT: flh ft0, 18(a0) -; RV64IZFH-NEXT: fsh fa0, 18(a0) +; RV64IZFH-NEXT: flh ft0, 18(a1) +; RV64IZFH-NEXT: fsh fa0, 18(a1) ; RV64IZFH-NEXT: ret %1 = fadd half %a, %b %2 = load volatile half, half* @G diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll index 85df696eb6a501..7ebc526bb4d33e 100644 --- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -99,11 +99,11 @@ define dso_local i32* @big_offset_one_use() local_unnamed_addr nounwind { ; ; RV64-LABEL: big_offset_one_use: ; RV64: # %bb.0: # %entry -; RV64-NEXT: lui a0, 4 -; RV64-NEXT: addiw a0, a0, 188 -; RV64-NEXT: lui a1, %hi(s) -; RV64-NEXT: addi a1, a1, %lo(s) -; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: lui a0, %hi(s) +; RV64-NEXT: addi a0, a0, %lo(s) +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: addiw a1, a1, 188 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: ret entry: ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 5) diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll index d062709000f043..25d32d76dbce8a 100644 --- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll @@ -42,7 +42,7 @@ define void @test(i32 signext %i) nounwind { ; RV32-NEXT: addi a3, a3, 1 ; RV32-NEXT: .LBB0_2: # %bb ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: add a4, a1, a2 +; RV32-NEXT: add a4, a2, a1 ; RV32-NEXT: add a1, a1, a0 ; RV32-NEXT: sb zero, 0(a4) ; RV32-NEXT: blt a1, a3, .LBB0_2 @@ -65,7 +65,7 @@ define void @test(i32 signext %i) nounwind { ; RV64-NEXT: addw a5, a5, a1 ; RV64-NEXT: slli a6, a5, 32 ; RV64-NEXT: srli a6, a6, 32 -; RV64-NEXT: add a6, a6, a3 +; RV64-NEXT: add a6, a3, a6 ; RV64-NEXT: sb zero, 0(a6) ; RV64-NEXT: addw a5, a5, a0 ; RV64-NEXT: addiw a2, a2, 1 diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll index a288b4ea77787c..ede08528198b20 100644 --- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll @@ -54,10 +54,10 @@ define void @test(i32 signext %row, i32 signext %N.in) nounwind { ; RV64-NEXT: blez a1, .LBB0_3 ; RV64-NEXT: # %bb.1: # %cond_true.preheader ; RV64-NEXT: li a4, 0 +; RV64-NEXT: slli a0, a0, 6 ; RV64-NEXT: lui a2, %hi(A) ; RV64-NEXT: addi a2, a2, %lo(A) -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: add a0, a2, a0 ; RV64-NEXT: li a2, 4 ; RV64-NEXT: li a3, 5 ; RV64-NEXT: .LBB0_2: # %cond_true diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll index e18f5018e403ca..7af934fdacabf2 100644 --- a/llvm/test/CodeGen/RISCV/mem.ll +++ b/llvm/test/CodeGen/RISCV/mem.ll @@ -170,10 +170,10 @@ define dso_local i32 @lw_sw_global(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, %hi(G) ; RV32I-NEXT: lw a1, %lo(G)(a2) +; RV32I-NEXT: addi a3, a2, %lo(G) ; RV32I-NEXT: sw a0, %lo(G)(a2) -; RV32I-NEXT: addi a2, a2, %lo(G) -; RV32I-NEXT: lw a3, 36(a2) -; RV32I-NEXT: sw a0, 36(a2) +; RV32I-NEXT: lw a2, 36(a3) +; RV32I-NEXT: sw a0, 36(a3) ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = load volatile i32, i32* @G diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll index 19f741335370d2..47ab20e950c5e6 100644 --- a/llvm/test/CodeGen/RISCV/mem64.ll +++ b/llvm/test/CodeGen/RISCV/mem64.ll @@ -215,10 +215,10 @@ define dso_local i64 @ld_sd_global(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, %hi(G) ; RV64I-NEXT: ld a1, %lo(G)(a2) +; RV64I-NEXT: addi a3, a2, %lo(G) ; RV64I-NEXT: sd a0, %lo(G)(a2) -; RV64I-NEXT: addi a2, a2, %lo(G) -; RV64I-NEXT: ld a3, 72(a2) -; RV64I-NEXT: sd a0, 72(a2) +; RV64I-NEXT: ld a2, 72(a3) +; RV64I-NEXT: sd a0, 72(a3) ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret %1 = load volatile i64, i64* @G diff --git a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll index 4b5f18de7d1754..55ea1826166434 100644 --- a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll +++ b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll @@ -10,12 +10,12 @@ define dso_local i32 @test_zext_i8() nounwind { ; RV32I-LABEL: test_zext_i8: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(bytes) -; RV32I-NEXT: lbu a1, %lo(bytes)(a0) -; RV32I-NEXT: addi a0, a0, %lo(bytes) -; RV32I-NEXT: lbu a0, 1(a0) -; RV32I-NEXT: xori a1, a1, 136 -; RV32I-NEXT: xori a0, a0, 7 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: addi a1, a0, %lo(bytes) +; RV32I-NEXT: lbu a0, %lo(bytes)(a0) +; RV32I-NEXT: lbu a1, 1(a1) +; RV32I-NEXT: xori a0, a0, 136 +; RV32I-NEXT: xori a1, a1, 7 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: # %if.then ; RV32I-NEXT: li a0, 1 @@ -42,14 +42,14 @@ define dso_local i32 @test_zext_i16() nounwind { ; RV32I-LABEL: test_zext_i16: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(shorts) -; RV32I-NEXT: lhu a1, %lo(shorts)(a0) -; RV32I-NEXT: addi a0, a0, %lo(shorts) -; RV32I-NEXT: lhu a0, 2(a0) +; RV32I-NEXT: addi a1, a0, %lo(shorts) +; RV32I-NEXT: lhu a0, %lo(shorts)(a0) +; RV32I-NEXT: lhu a1, 2(a1) ; RV32I-NEXT: lui a2, 16 ; RV32I-NEXT: addi a2, a2, -120 -; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: xori a0, a0, 7 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: xori a1, a1, 7 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: beqz a0, .LBB1_2 ; RV32I-NEXT: # %bb.1: # %if.then ; RV32I-NEXT: li a0, 1 From 4f12a721f1790f3fafadcd0ad4ce456bb0e04677 Mon Sep 17 00:00:00 2001 From: Pengxuan Zheng Date: Thu, 19 May 2022 11:30:57 -0700 Subject: [PATCH 732/908] [llvm-lib] Ignore /LTCG option "The /LTCG option to LIB specifies that the inputs from cl.exe include object files generated by using the /GL compiler option." Based on Microsoft's description above (https://docs.microsoft.com/en-us/cpp/build/reference/running-lib?view=msvc-170), there doesn't seem to be anything llvm-lib needs to do to support the flag. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D126000 --- llvm/lib/ToolDrivers/llvm-lib/Options.td | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/ToolDrivers/llvm-lib/Options.td b/llvm/lib/ToolDrivers/llvm-lib/Options.td index 7d9668396e4b5b..747dcc82df5e66 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/Options.td +++ b/llvm/lib/ToolDrivers/llvm-lib/Options.td @@ -43,4 +43,5 @@ def help_q : Flag<["/??", "-??", "/?", "-?"], "">, Alias; // The flags below do nothing. They are defined only for lib.exe compatibility. //============================================================================== +def ltcg : F<"ltcg">; def nologo : F<"nologo">; From 0e8f4ce31df192d188239575839d8cceb47caf85 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Fri, 27 May 2022 08:34:31 -0700 Subject: [PATCH 733/908] [lld][WebAssembly] Fix crash on undefined+weak function syms in LTO objects Symbols from LTO objects don't contain Wasm signatures, but we need a signature when we create undefined/stub functions for missing weakly undefined symbols. Luckily, after LTO, we know that symbols that are not referenced by a regular object file must not be needed in the final output so there is no need to generate undefined/stub function for them. Differential Revision: https://reviews.llvm.org/D126554 --- lld/test/wasm/lto/weak-undefined.ll | 11 +++++++++++ lld/wasm/SymbolTable.cpp | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lld/test/wasm/lto/weak-undefined.ll b/lld/test/wasm/lto/weak-undefined.ll index 36e422896f35d4..9a42c2bb5dbf70 100644 --- a/lld/test/wasm/lto/weak-undefined.ll +++ b/lld/test/wasm/lto/weak-undefined.ll @@ -11,6 +11,17 @@ target triple = "wasm32-unknown-unknown" declare extern_weak i32 @foo() +declare extern_weak i32 @bar() + +; The reference to bar here will exist in the LTO file, but the LTO process will +; remove it, so it will never be referenced by an normal object file, and +; therefore never have a signature. +define void @unused_function() #0 { +entry: + %call2 = call i32 @bar() + ret void +} + define void @_start() #0 { entry: %call2 = call i32 @foo() diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp index ef1402248eec83..2969d99e3ab9ac 100644 --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -854,7 +854,7 @@ void SymbolTable::replaceWithUndefined(Symbol *sym) { // the call instruction that passes Wasm validation. void SymbolTable::handleWeakUndefines() { for (Symbol *sym : getSymbols()) { - if (sym->isUndefWeak()) { + if (sym->isUndefWeak() && sym->isUsedInRegularObj) { if (sym->getSignature()) { replaceWithUndefined(sym); } else { From d0f65eaa850dca87fb3d58868d42b4fa4c06537b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 27 May 2022 12:13:41 -0700 Subject: [PATCH 734/908] [RISCV] Remove unused variables. NFC --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 17c8870f24192a..67445adb3e1759 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3585,10 +3585,8 @@ template SDValue RISCVTargetLowering::getAddr( SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); assert(N->getOffset() == 0 && "unexpected offset in global node"); - MVT XLenVT = Subtarget.getXLenVT(); const GlobalValue *GV = N->getGlobal(); bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); @@ -3699,10 +3697,8 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); assert(N->getOffset() == 0 && "unexpected offset in global node"); - MVT XLenVT = Subtarget.getXLenVT(); TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); From eb1c50378e73f3e05678633b3d2b4b7b23d5e709 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 23 May 2022 18:47:52 +0200 Subject: [PATCH 735/908] [libc++][NFC] Rename rand.dis to rand.dist Reviewed By: ldionne, #libc Spies: libcxx-commits, mgrang, mstorsjo Differential Revision: https://reviews.llvm.org/D126221 --- .../rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/io.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/max.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/min.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.bin/types.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/io.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/max.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/min.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.geo/types.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp | 0 .../rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp | 0 .../rand.dist.norm.cauchy/ctor_double_double.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/copy.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/eval.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/io.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/max.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/min.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.f/types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp | 0 .../rand.dist.norm.lognormal/ctor_double_double.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp | 0 .../rand.dist.norm.lognormal/eval_param.PR52906.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp | 0 .../rand.dist.norm.normal/ctor_double_double.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/io.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/max.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/min.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.normal/types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/copy.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/eval.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/io.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/max.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/min.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp | 0 .../rand.dist.norm/rand.dist.norm.t/types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/io.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/max.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/min.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.exp/types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp | 0 .../rand.dist.pois.extreme/ctor_double_double.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp | 0 .../rand.dist.pois.gamma/ctor_double_double.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp | 0 .../rand.dist.pois.weibull/ctor_double_double.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp | 0 .../rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp | 0 .../rand.dist.samp.discrete/param_ctor_default.pass.cpp | 0 .../rand.dist.samp.discrete/param_ctor_func.pass.cpp | 0 .../rand.dist.samp.discrete/param_ctor_init.pass.cpp | 0 .../rand.dist.samp.discrete/param_ctor_iterator.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp | 0 .../rand.dist.samp.pconst/param_ctor_default.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp | 0 .../rand.dist.samp.pconst/param_ctor_init_func.pass.cpp | 0 .../rand.dist.samp.pconst/param_ctor_iterator.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp | 0 .../rand.dist.samp.plinear/param_ctor_default.pass.cpp | 0 .../rand.dist.samp.plinear/param_ctor_func.pass.cpp | 0 .../rand.dist.samp.plinear/param_ctor_init_func.pass.cpp | 0 .../rand.dist.samp.plinear/param_ctor_iterator.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp | 0 .../rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/assign.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/copy.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/eq.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/eval.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/int128.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/io.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/max.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/min.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.int/types.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/assign.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/copy.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/ctor_real_real.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/eq.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/eval.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/io.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/max.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/min.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp | 0 .../rand.dist.uni/rand.dist.uni.real/types.pass.cpp | 0 381 files changed, 0 insertions(+), 0 deletions(-) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/ctor_double_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.f/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/ctor_double_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/eval_param.PR52906.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/ctor_double_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.normal/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.norm/rand.dist.norm.t/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/ctor_double_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/ctor_double_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/ctor_double_double.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/param_ctor_default.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/param_ctor_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/param_ctor_init.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/param_ctor_iterator.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/param_ctor_default.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/param_ctor_init_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/param_ctor_default.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/param_ctor_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/param_ctor_init_func.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.int/types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/ctor_real_real.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/io.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/max.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/min.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp (100%) rename libcxx/test/std/numerics/rand/{rand.dis => rand.dist}/rand.dist.uni/rand.dist.uni.real/types.pass.cpp (100%) diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/ctor_double_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_double_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/ctor_double_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/ctor_double_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_double_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/ctor_double_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval_param.PR52906.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/eval_param.PR52906.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval_param.PR52906.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/eval_param.PR52906.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/ctor_double_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_double_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/ctor_double_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/ctor_double_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_double_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/ctor_double_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/ctor_double_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_double_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/ctor_double_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/ctor_double_double.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_double_double.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/ctor_double_double.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_ctor_default.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_default.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_ctor_default.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_ctor_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_ctor_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_init.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_ctor_init.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_init.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_ctor_init.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_ctor_iterator.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_iterator.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_ctor_iterator.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_default.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_default.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_default.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_init_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_init_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_init_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_init_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_default.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_default.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_default.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_init_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_init_func.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_init_func.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_init_func.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_real_real.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/ctor_real_real.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_real_real.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/ctor_real_real.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/io.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/io.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/io.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/max.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/max.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/max.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/min.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/min.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/min.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/types.pass.cpp similarity index 100% rename from libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/types.pass.cpp rename to libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/types.pass.cpp From a7f9895cc18995549c7facb96e72718da282a864 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 May 2022 11:49:47 -0400 Subject: [PATCH 736/908] [runtimes] Rename various libcpp-has-no-XYZ Lit features to just no-XYZ Since those features are general properties of the environment, it makes sense to use them from libc++abi too, and so the name libcpp-has-no-xxx doesn't make sense. Differential Revision: https://reviews.llvm.org/D126482 --- .../incompatible_with_stdatomic.verify.cpp | 2 +- .../dont_hijack_header.compile.pass.cpp | 2 +- .../debug/containers.multithread.pass.cpp | 2 +- .../test/libcxx/debug/extern-templates.sh.cpp | 2 +- .../depr/depr.c.headers/locale_h.pass.cpp | 2 +- .../depr/depr.str.strstreams/version.pass.cpp | 2 +- .../header_regex_libcpp_version.pass.cpp | 2 +- libcxx/test/libcxx/fuzzing/regex.pass.cpp | 2 +- .../libcxx/gdb/gdb_pretty_printer_test.sh.cpp | 2 +- .../filesystem.inclusions.compile.pass.cpp | 2 +- .../ios.inclusions.compile.pass.cpp | 2 +- .../iostream.inclusions.compile.pass.cpp | 2 +- .../regex.inclusions.compile.pass.cpp | 2 +- .../thread.inclusions.compile.pass.cpp | 2 +- .../path.native.obs/string_alloc.pass.cpp | 2 +- .../input.streams/traits_mismatch.fail.cpp | 2 +- .../output.streams/traits_mismatch.fail.cpp | 2 +- .../string.streams/traits_mismatch.fail.cpp | 2 +- .../locale_dependent.compile.pass.cpp | 2 +- .../conversions.string/ctor_move.pass.cpp | 2 +- libcxx/test/libcxx/modules_include.sh.cpp | 2 +- .../has-no-random-device.verify.cpp | 2 +- .../has-no-incomplete-ranges.compile.pass.cpp | 2 +- .../strings/c.strings/version_cwchar.pass.cpp | 2 +- .../c.strings/version_cwctype.pass.cpp | 2 +- .../assert.set_exception.pass.cpp | 2 +- ...sert.set_exception_at_thread_exit.pass.cpp | 2 +- .../futures/futures.task/types.pass.cpp | 2 +- .../libcxx/thread/futures/version.pass.cpp | 2 +- .../thread.barrier/version.compile.pass.cpp | 2 +- ...otify_from_pthread_created_thread.pass.cpp | 2 +- .../native_handle.pass.cpp | 2 +- .../thread/thread.condition/version.pass.cpp | 2 +- .../thread.latch/version.compile.pass.cpp | 2 +- .../thread.lock.guard/nodiscard.verify.cpp | 2 +- .../thread.mutex.class/native_handle.pass.cpp | 2 +- .../native_handle.pass.cpp | 2 +- ...ad_safety_annotations_not_enabled.pass.cpp | 2 +- .../thread_safety_lock_guard.pass.cpp | 2 +- .../thread_safety_lock_unlock.pass.cpp | 2 +- .../thread_safety_missing_unlock.fail.cpp | 2 +- ...thread_safety_requires_capability.pass.cpp | 2 +- .../thread/thread.mutex/version.pass.cpp | 2 +- .../thread.semaphore/version.compile.pass.cpp | 2 +- .../thread.threads/create_late.pass.cpp | 2 +- .../native_handle.pass.cpp | 2 +- .../thread.thread.class/types.pass.cpp | 2 +- .../thread.thread.this/sleep_for.pass.cpp | 2 +- .../sleep_for.signals.pass.cpp | 2 +- .../thread/thread.threads/version.pass.cpp | 2 +- .../util.smartptr/race_condition.pass.cpp | 2 +- .../atomic_notify_all.pass.cpp | 2 +- .../atomic_notify_one.pass.cpp | 2 +- .../atomic_wait.pass.cpp | 2 +- .../atomic_wait_explicit.pass.cpp | 2 +- .../stdatomic.h.syn/types.compile.pass.cpp | 2 +- .../iterator.rel_ops.compile.pass.cpp | 2 +- .../std/depr/depr.c.headers/locale_h.pass.cpp | 2 +- .../std/depr/depr.c.headers/wchar_h.pass.cpp | 2 +- .../std/depr/depr.c.headers/wctype_h.pass.cpp | 2 +- .../std/depr/depr.ios.members/lit.local.cfg | 2 +- .../depr/depr.str.strstreams/lit.local.cfg | 2 +- .../stream_inserter.pass.cpp | 2 +- .../ostream_joiner.cons.pass.cpp | 2 +- .../make_ostream_joiner.pass.cpp | 2 +- .../ostream_joiner.op.assign.pass.cpp | 2 +- .../ostream_joiner.op.postincrement.pass.cpp | 2 +- .../ostream_joiner.op.pretincrement.pass.cpp | 2 +- .../ostream_joiner.op.star.pass.cpp | 2 +- .../header_regex_synop.pass.cpp | 2 +- .../filebuf.members/open_path.pass.cpp | 2 +- .../fstreams/fstream.cons/path.pass.cpp | 2 +- .../fstream.members/open_path.pass.cpp | 2 +- .../fstreams/ifstream.cons/path.pass.cpp | 2 +- .../ifstream.members/open_path.pass.cpp | 2 +- .../fstreams/ofstream.cons/path.pass.cpp | 2 +- .../ofstream.members/open_path.pass.cpp | 2 +- .../input.output/file.streams/lit.local.cfg | 2 +- .../directory_entry.io.pass.cpp | 2 +- .../path.member/path.append.pass.cpp | 2 +- .../path.member/path.assign/source.pass.cpp | 2 +- .../path.member/path.charconv.pass.cpp | 2 +- .../path.member/path.concat.pass.cpp | 2 +- .../path.construct/source.pass.cpp | 2 +- .../generic_string_alloc.pass.cpp | 2 +- .../path.generic.obs/named_overloads.pass.cpp | 2 +- .../path.native.obs/named_overloads.pass.cpp | 2 +- .../path.nonmember/path.io.pass.cpp | 2 +- .../fs.op.remove_all/toctou.pass.cpp | 4 +-- .../input.output/filesystems/lit.local.cfg | 2 +- .../input.output.general/lit.local.cfg | 2 +- .../iostream.format/lit.local.cfg | 2 +- .../CharT.pass.cpp | 2 +- .../CharT_pointer.pass.cpp | 2 +- .../char_to_wide.pass.cpp | 2 +- .../char_to_wide_pointer.pass.cpp | 2 +- .../quoted.manip/quoted_char.verify.cpp | 2 +- .../iostream.forward/lit.local.cfg | 2 +- .../iostream.objects/lit.local.cfg | 2 +- .../wide.stream.objects/wcerr.sh.cpp | 2 +- .../wide.stream.objects/wcin.sh.cpp | 2 +- .../wide.stream.objects/wclog.sh.cpp | 2 +- .../wide.stream.objects/wcout.sh.cpp | 2 +- .../ios/basic.ios.members/narrow.pass.cpp | 2 +- .../input.output/iostreams.base/lit.local.cfg | 2 +- .../iostreams.requirements/lit.local.cfg | 2 +- .../input.output/stream.buffers/lit.local.cfg | 2 +- .../input.output/string.streams/lit.local.cfg | 2 +- .../iterators/stream.iterators/lit.local.cfg | 2 +- .../barrier.version.compile.pass.cpp | 2 +- .../filesystem.version.compile.pass.cpp | 2 +- .../iomanip.version.compile.pass.cpp | 2 +- .../istream.version.compile.pass.cpp | 2 +- .../latch.version.compile.pass.cpp | 2 +- .../locale.version.compile.pass.cpp | 2 +- .../mutex.version.compile.pass.cpp | 2 +- .../ostream.version.compile.pass.cpp | 2 +- .../regex.version.compile.pass.cpp | 2 +- .../semaphore.version.compile.pass.cpp | 2 +- .../shared_mutex.version.compile.pass.cpp | 2 +- .../stdatomic.h.version.compile.pass.cpp | 2 +- .../thread.version.compile.pass.cpp | 2 +- libcxx/test/std/localization/lit.local.cfg | 2 +- .../ctor_wchar_t.pass.cpp | 2 +- .../locale.codecvt/ctor_wchar_t.pass.cpp | 2 +- .../wchar_t_always_noconv.pass.cpp | 2 +- .../wchar_t_encoding.pass.cpp | 2 +- .../wchar_t_in.pass.cpp | 2 +- .../wchar_t_length.pass.cpp | 2 +- .../wchar_t_max_length.pass.cpp | 2 +- .../wchar_t_out.pass.cpp | 2 +- .../wchar_t_unshift.pass.cpp | 2 +- .../locale.codecvt/types_wchar_t.pass.cpp | 2 +- .../locale.ctype.byname/is_1.pass.cpp | 2 +- .../locale.ctype.byname/is_many.pass.cpp | 2 +- .../locale.ctype.byname/narrow_1.pass.cpp | 2 +- .../locale.ctype.byname/narrow_many.pass.cpp | 2 +- .../locale.ctype.byname/scan_is.pass.cpp | 2 +- .../locale.ctype.byname/scan_not.pass.cpp | 2 +- .../locale.ctype.byname/widen_1.pass.cpp | 2 +- .../locale.ctype.byname/widen_many.pass.cpp | 2 +- .../category.ctype/locale.ctype/ctor.pass.cpp | 2 +- .../locale.ctype.members/is_1.pass.cpp | 2 +- .../locale.ctype.members/is_many.pass.cpp | 2 +- .../locale.ctype.members/narrow_1.pass.cpp | 2 +- .../locale.ctype.members/narrow_many.pass.cpp | 2 +- .../locale.ctype.members/scan_is.pass.cpp | 2 +- .../locale.ctype.members/scan_not.pass.cpp | 2 +- .../locale.ctype.members/tolower_1.pass.cpp | 2 +- .../tolower_many.pass.cpp | 2 +- .../locale.ctype.members/toupper_1.pass.cpp | 2 +- .../toupper_many.pass.cpp | 2 +- .../locale.ctype.members/widen_1.pass.cpp | 2 +- .../locale.ctype.members/widen_many.pass.cpp | 2 +- .../locale.ctype/types.pass.cpp | 2 +- .../date_order_wide.pass.cpp | 2 +- .../get_date_wide.pass.cpp | 2 +- .../get_monthname_wide.pass.cpp | 2 +- .../get_one_wide.pass.cpp | 2 +- .../get_time_wide.pass.cpp | 2 +- .../get_weekday_wide.pass.cpp | 2 +- .../get_year_wide.pass.cpp | 2 +- .../get_date_wide.pass.cpp | 2 +- .../get_monthname_wide.pass.cpp | 2 +- .../get_time_wide.pass.cpp | 2 +- .../get_weekday_wide.pass.cpp | 2 +- .../locale.stdcvt/codecvt_utf16.pass.cpp | 2 +- .../locale.stdcvt/codecvt_utf8.pass.cpp | 2 +- .../conversions.buffer/ctor.pass.cpp | 2 +- .../conversions.buffer/overflow.pass.cpp | 2 +- .../conversions.buffer/pbackfail.pass.cpp | 2 +- .../conversions.buffer/rdbuf.pass.cpp | 2 +- .../conversions.buffer/seekoff.pass.cpp | 2 +- .../conversions.buffer/state.pass.cpp | 2 +- .../conversions.buffer/test.pass.cpp | 2 +- .../conversions.buffer/underflow.pass.cpp | 2 +- .../conversions.string/converted.pass.cpp | 2 +- .../conversions.string/ctor_codecvt.pass.cpp | 2 +- .../ctor_codecvt_state.pass.cpp | 2 +- .../conversions.string/ctor_copy.pass.cpp | 2 +- .../ctor_err_string.pass.cpp | 2 +- .../conversions.string/from_bytes.pass.cpp | 2 +- .../conversions.string/state.pass.cpp | 2 +- .../conversions.string/to_bytes.pass.cpp | 2 +- .../conversions.string/types.pass.cpp | 2 +- .../namespace/addressable_functions.sh.cpp | 2 +- .../complex.ops/stream_input.pass.cpp | 2 +- .../complex.ops/stream_output.pass.cpp | 2 +- .../rand.adapt.disc/ctor_result_type.pass.cpp | 2 +- .../rand.adapt.disc/ctor_sseq.pass.cpp | 2 +- .../rand.adapt/rand.adapt.disc/io.pass.cpp | 2 +- .../ctor_result_type.pass.cpp | 2 +- .../rand.adapt.ibits/ctor_sseq.pass.cpp | 2 +- .../rand.adapt/rand.adapt.ibits/io.pass.cpp | 2 +- .../rand.adapt.shuf/ctor_result_type.pass.cpp | 2 +- .../rand.adapt.shuf/ctor_sseq.pass.cpp | 2 +- .../rand.adapt/rand.adapt.shuf/io.pass.cpp | 2 +- .../numerics/rand/rand.device/ctor.pass.cpp | 2 +- .../rand/rand.device/entropy.pass.cpp | 2 +- .../numerics/rand/rand.device/eval.pass.cpp | 2 +- .../rand.dist.bern.bernoulli/io.pass.cpp | 2 +- .../rand.dist.bern.bin/eval.PR44847.pass.cpp | 2 +- .../rand.dist.bern.bin/io.pass.cpp | 2 +- .../rand.dist.bern.geo/io.pass.cpp | 2 +- .../rand.dist.bern.negbin/io.pass.cpp | 2 +- .../rand.dist.norm.cauchy/io.pass.cpp | 2 +- .../rand.dist.norm.chisq/io.pass.cpp | 2 +- .../rand.dist.norm.f/io.pass.cpp | 2 +- .../rand.dist.norm.lognormal/io.pass.cpp | 2 +- .../rand.dist.norm.normal/io.pass.cpp | 2 +- .../rand.dist.norm.t/io.pass.cpp | 2 +- .../rand.dist.pois.exp/io.pass.cpp | 2 +- .../rand.dist.pois.extreme/io.pass.cpp | 2 +- .../rand.dist.pois.gamma/io.pass.cpp | 2 +- .../rand.dist.pois.poisson/io.pass.cpp | 2 +- .../rand.dist.pois.weibull/io.pass.cpp | 2 +- .../rand.dist.samp.discrete/io.pass.cpp | 2 +- .../rand.dist.samp.pconst/io.pass.cpp | 2 +- .../rand.dist.samp.plinear/io.pass.cpp | 2 +- .../rand.dist.uni.int/io.pass.cpp | 2 +- .../rand.dist.uni.real/io.pass.cpp | 2 +- .../rand.eng.lcong/ctor_result_type.pass.cpp | 2 +- .../rand/rand.eng/rand.eng.lcong/io.pass.cpp | 2 +- .../rand.eng.mers/ctor_result_type.pass.cpp | 2 +- .../rand.eng/rand.eng.mers/ctor_sseq.pass.cpp | 2 +- .../rand/rand.eng/rand.eng.mers/io.pass.cpp | 2 +- .../rand.eng.sub/ctor_result_type.pass.cpp | 2 +- .../rand.eng/rand.eng.sub/ctor_sseq.pass.cpp | 2 +- .../rand/rand.eng/rand.eng.sub/io.pass.cpp | 2 +- libcxx/test/std/re/lit.local.cfg | 2 +- libcxx/test/std/re/re.syn/wcmatch.pass.cpp | 2 +- .../std/re/re.syn/wcregex_iterator.pass.cpp | 2 +- .../re/re.syn/wcregex_token_iterator.pass.cpp | 2 +- .../test/std/re/re.syn/wcsub_match.pass.cpp | 2 +- libcxx/test/std/re/re.syn/wregex.pass.cpp | 2 +- libcxx/test/std/re/re.syn/wsmatch.pass.cpp | 2 +- .../std/re/re.syn/wsregex_iterator.pass.cpp | 2 +- .../re/re.syn/wsregex_token_iterator.pass.cpp | 2 +- .../test/std/re/re.syn/wssub_match.pass.cpp | 2 +- ...hrink_to_fit.explicit_instantiation.sh.cpp | 2 +- .../string.nonmembers/string.io/lit.local.cfg | 2 +- .../std/strings/c.strings/cwchar.pass.cpp | 2 +- .../std/strings/c.strings/cwctype.pass.cpp | 2 +- .../assign2.pass.cpp | 2 +- .../assign3.pass.cpp | 2 +- .../compare.pass.cpp | 2 +- .../copy.pass.cpp | 2 +- .../eof.pass.cpp | 2 +- .../eq.pass.cpp | 2 +- .../eq_int_type.pass.cpp | 2 +- .../find.pass.cpp | 2 +- .../length.pass.cpp | 2 +- .../lt.pass.cpp | 2 +- .../move.pass.cpp | 2 +- .../not_eof.pass.cpp | 2 +- .../to_char_type.pass.cpp | 2 +- .../to_int_type.pass.cpp | 2 +- .../types.pass.cpp | 2 +- .../string.conversions/to_wstring.pass.cpp | 2 +- .../string.view.io/stream_insert.pass.cpp | 2 +- .../string.view.nonmem/quoted.pass.cpp | 2 +- .../futures/futures.async/async.pass.cpp | 2 +- .../futures/futures.async/async.verify.cpp | 2 +- .../futures.async/async_race.38682.pass.cpp | 2 +- .../futures/futures.async/async_race.pass.cpp | 2 +- .../default_error_condition.pass.cpp | 2 +- .../equivalent_error_code_int.pass.cpp | 2 +- .../equivalent_int_error_condition.pass.cpp | 2 +- .../futures.errors/future_category.pass.cpp | 2 +- .../futures.errors/make_error_code.pass.cpp | 2 +- .../make_error_condition.pass.cpp | 2 +- .../futures.future_error/code.pass.cpp | 2 +- .../futures.future_error/types.pass.cpp | 2 +- .../futures.future_error/what.pass.cpp | 2 +- .../futures.overview/future_errc.pass.cpp | 2 +- .../futures.overview/future_status.pass.cpp | 2 +- .../is_error_code_enum_future_errc.pass.cpp | 2 +- .../futures/futures.overview/launch.pass.cpp | 2 +- .../futures.promise/alloc_ctor.pass.cpp | 2 +- .../futures.promise/copy_assign.verify.cpp | 2 +- .../futures.promise/copy_ctor.verify.cpp | 2 +- .../futures/futures.promise/default.pass.cpp | 2 +- .../futures/futures.promise/dtor.pass.cpp | 2 +- .../futures.promise/get_future.pass.cpp | 2 +- .../futures.promise/move_assign.pass.cpp | 2 +- .../futures.promise/move_ctor.pass.cpp | 2 +- .../futures.promise/set_exception.pass.cpp | 2 +- .../set_exception_at_thread_exit.pass.cpp | 2 +- .../futures.promise/set_lvalue.pass.cpp | 2 +- .../set_lvalue_at_thread_exit.pass.cpp | 2 +- .../futures.promise/set_rvalue.pass.cpp | 2 +- .../set_rvalue_at_thread_exit.pass.cpp | 2 +- .../set_value_at_thread_exit_const.pass.cpp | 2 +- .../set_value_at_thread_exit_void.pass.cpp | 2 +- .../futures.promise/set_value_const.pass.cpp | 2 +- .../futures.promise/set_value_void.pass.cpp | 2 +- .../futures/futures.promise/swap.pass.cpp | 2 +- .../futures.promise/uses_allocator.pass.cpp | 2 +- .../copy_assign.pass.cpp | 2 +- .../futures.shared_future/copy_ctor.pass.cpp | 2 +- .../ctor_future.pass.cpp | 2 +- .../futures.shared_future/default.pass.cpp | 2 +- .../futures.shared_future/dtor.pass.cpp | 2 +- .../futures.shared_future/get.pass.cpp | 2 +- .../move_assign.pass.cpp | 2 +- .../futures.shared_future/move_ctor.pass.cpp | 2 +- .../futures.shared_future/wait.pass.cpp | 2 +- .../futures.shared_future/wait_for.pass.cpp | 2 +- .../futures.shared_future/wait_until.pass.cpp | 2 +- .../futures.task.members/assign_copy.fail.cpp | 2 +- .../futures.task.members/assign_move.pass.cpp | 2 +- .../futures.task.members/ctor1.fail.cpp | 2 +- .../ctor2.compile.pass.cpp | 2 +- .../futures.task.members/ctor_copy.fail.cpp | 2 +- .../ctor_default.pass.cpp | 2 +- .../futures.task.members/ctor_func.pass.cpp | 2 +- .../ctor_func_alloc.pass.cpp | 2 +- .../futures.task.members/ctor_move.pass.cpp | 2 +- .../futures.task.members/dtor.pass.cpp | 2 +- .../futures.task.members/get_future.pass.cpp | 2 +- .../make_ready_at_thread_exit.pass.cpp | 2 +- .../futures.task.members/operator.pass.cpp | 2 +- .../futures.task.members/reset.pass.cpp | 2 +- .../futures.task.members/swap.pass.cpp | 2 +- .../futures.task.nonmembers/swap.pass.cpp | 2 +- .../uses_allocator.compile.pass.cpp | 2 +- .../copy_assign.verify.cpp | 2 +- .../copy_ctor.verify.cpp | 2 +- .../futures.unique_future/default.pass.cpp | 2 +- .../futures.unique_future/dtor.pass.cpp | 2 +- .../futures.unique_future/get.pass.cpp | 2 +- .../move_assign.pass.cpp | 2 +- .../futures.unique_future/move_ctor.pass.cpp | 2 +- .../futures.unique_future/share.pass.cpp | 2 +- .../futures.unique_future/wait.pass.cpp | 2 +- .../futures.unique_future/wait_for.pass.cpp | 2 +- .../futures.unique_future/wait_until.pass.cpp | 2 +- libcxx/test/std/thread/macro.pass.cpp | 2 +- .../std/thread/thread.barrier/arrive.pass.cpp | 2 +- .../thread.barrier/arrive_and_drop.pass.cpp | 2 +- .../thread.barrier/arrive_and_wait.pass.cpp | 2 +- .../thread/thread.barrier/completion.pass.cpp | 2 +- .../std/thread/thread.barrier/max.pass.cpp | 2 +- .../thread.condition/cv_status.pass.cpp | 2 +- .../notify_all_at_thread_exit.pass.cpp | 2 +- .../thread.condition.condvar/default.pass.cpp | 2 +- .../destructor.pass.cpp | 2 +- .../notify_all.pass.cpp | 2 +- .../notify_one.pass.cpp | 2 +- .../thread.condition.condvar/wait.pass.cpp | 2 +- .../wait_for.pass.cpp | 2 +- .../wait_for_pred.pass.cpp | 2 +- .../wait_pred.pass.cpp | 2 +- .../wait_until.pass.cpp | 2 +- .../wait_until_pred.pass.cpp | 2 +- .../default.pass.cpp | 2 +- .../destructor.pass.cpp | 2 +- .../notify_all.pass.cpp | 2 +- .../notify_one.pass.cpp | 2 +- .../thread.condition.condvarany/wait.pass.cpp | 2 +- .../wait_for.pass.cpp | 2 +- .../wait_for_pred.pass.cpp | 2 +- .../wait_pred.pass.cpp | 2 +- .../wait_terminates.sh.cpp | 2 +- .../wait_until.pass.cpp | 2 +- .../wait_until_pred.pass.cpp | 2 +- .../thread.latch/arrive_and_wait.pass.cpp | 2 +- .../thread/thread.latch/count_down.pass.cpp | 2 +- .../test/std/thread/thread.latch/max.pass.cpp | 2 +- .../std/thread/thread.latch/try_wait.pass.cpp | 2 +- .../thread.lock.algorithm/lock.pass.cpp | 2 +- .../thread.lock.algorithm/try_lock.pass.cpp | 2 +- .../thread.lock.guard/adopt_lock.pass.cpp | 2 +- .../thread.lock.guard/mutex.fail.cpp | 2 +- .../thread.lock.guard/mutex.pass.cpp | 2 +- .../thread.lock.guard/types.pass.cpp | 2 +- .../thread.lock.scoped/adopt_lock.pass.cpp | 2 +- .../thread.lock.scoped/assign.fail.cpp | 2 +- .../thread.lock.scoped/copy.fail.cpp | 2 +- .../thread.lock.scoped/mutex.fail.cpp | 2 +- .../thread.lock.scoped/mutex.pass.cpp | 2 +- .../thread.lock.scoped/types.pass.cpp | 2 +- .../thread.lock.shared.cons/default.pass.cpp | 2 +- .../move_assign.pass.cpp | 2 +- .../move_ctor.pass.cpp | 2 +- .../thread.lock.shared.cons/mutex.pass.cpp | 2 +- .../mutex_adopt_lock.pass.cpp | 2 +- .../mutex_defer_lock.pass.cpp | 2 +- .../mutex_duration.pass.cpp | 2 +- .../mutex_time_point.pass.cpp | 2 +- .../mutex_try_to_lock.pass.cpp | 2 +- .../thread.lock.shared.locking/lock.pass.cpp | 2 +- .../try_lock.pass.cpp | 2 +- .../try_lock_for.pass.cpp | 2 +- .../try_lock_until.pass.cpp | 2 +- .../unlock.pass.cpp | 2 +- .../member_swap.pass.cpp | 2 +- .../nonmember_swap.pass.cpp | 2 +- .../thread.lock.shared.mod/release.pass.cpp | 2 +- .../thread.lock.shared.obs/mutex.pass.cpp | 2 +- .../thread.lock.shared.obs/op_bool.pass.cpp | 2 +- .../thread.lock.shared.obs/owns_lock.pass.cpp | 2 +- .../thread.lock.shared/types.pass.cpp | 2 +- .../thread.lock.unique.cons/default.pass.cpp | 2 +- .../move_assign.pass.cpp | 2 +- .../move_ctor.pass.cpp | 2 +- .../thread.lock.unique.cons/mutex.pass.cpp | 2 +- .../mutex_adopt_lock.pass.cpp | 2 +- .../mutex_defer_lock.pass.cpp | 2 +- .../mutex_duration.pass.cpp | 2 +- .../mutex_time_point.pass.cpp | 2 +- .../mutex_try_to_lock.pass.cpp | 2 +- .../thread.lock.unique.locking/lock.pass.cpp | 2 +- .../try_lock.pass.cpp | 2 +- .../try_lock_for.pass.cpp | 2 +- .../try_lock_until.pass.cpp | 2 +- .../unlock.pass.cpp | 2 +- .../member_swap.pass.cpp | 2 +- .../nonmember_swap.pass.cpp | 2 +- .../thread.lock.unique.mod/release.pass.cpp | 2 +- .../thread.lock.unique.obs/mutex.pass.cpp | 2 +- .../thread.lock.unique.obs/op_bool.pass.cpp | 2 +- .../thread.lock.unique.obs/owns_lock.pass.cpp | 2 +- .../thread.lock.unique/types.pass.cpp | 2 +- .../thread.mutex/thread.lock/types.fail.cpp | 2 +- .../thread.mutex/thread.lock/types.pass.cpp | 2 +- .../thread.mutex.class/default.pass.cpp | 2 +- .../thread.mutex.class/lock.pass.cpp | 2 +- .../thread.mutex.class/try_lock.pass.cpp | 2 +- .../thread.mutex.recursive/default.pass.cpp | 2 +- .../thread.mutex.recursive/lock.pass.cpp | 2 +- .../thread.mutex.recursive/try_lock.pass.cpp | 2 +- .../thread.shared_mutex.class/assign.fail.cpp | 2 +- .../thread.shared_mutex.class/copy.fail.cpp | 2 +- .../default.pass.cpp | 2 +- .../thread.shared_mutex.class/lock.pass.cpp | 2 +- .../lock_shared.pass.cpp | 2 +- .../try_lock.pass.cpp | 2 +- .../try_lock_shared.pass.cpp | 2 +- .../default.pass.cpp | 2 +- .../lock.pass.cpp | 2 +- .../lock_shared.pass.cpp | 2 +- .../try_lock.pass.cpp | 2 +- .../try_lock_for.pass.cpp | 2 +- .../try_lock_shared.pass.cpp | 2 +- .../try_lock_shared_for.pass.cpp | 2 +- .../try_lock_shared_until.pass.cpp | 2 +- .../try_lock_until.pass.cpp | 2 +- .../try_lock_until_deadlock_bug.pass.cpp | 2 +- .../thread.timedmutex.class/default.pass.cpp | 2 +- .../thread.timedmutex.class/lock.pass.cpp | 2 +- .../thread.timedmutex.class/try_lock.pass.cpp | 2 +- .../try_lock_for.pass.cpp | 2 +- .../try_lock_until.pass.cpp | 2 +- .../default.pass.cpp | 2 +- .../thread.timedmutex.recursive/lock.pass.cpp | 2 +- .../try_lock.pass.cpp | 2 +- .../try_lock_for.pass.cpp | 2 +- .../try_lock_until.pass.cpp | 2 +- .../thread.once.callonce/call_once.pass.cpp | 2 +- .../thread.once.callonce/race.pass.cpp | 2 +- .../assign.compile.fail.cpp | 2 +- .../copy.compile.fail.cpp | 2 +- .../thread.once.onceflag/default.pass.cpp | 2 +- .../thread/thread.semaphore/acquire.pass.cpp | 2 +- .../thread/thread.semaphore/binary.pass.cpp | 2 +- .../thread.semaphore/ctor.compile.pass.cpp | 2 +- .../std/thread/thread.semaphore/max.pass.cpp | 2 +- .../thread/thread.semaphore/release.pass.cpp | 2 +- .../thread/thread.semaphore/timed.pass.cpp | 2 +- .../thread.semaphore/try_acquire.pass.cpp | 2 +- .../thread.thread.algorithm/swap.pass.cpp | 2 +- .../thread.thread.assign/move.pass.cpp | 2 +- .../thread.thread.assign/move2.pass.cpp | 2 +- .../thread.thread.constr/F.pass.cpp | 2 +- .../thread.thread.constr/default.pass.cpp | 2 +- .../thread.thread.constr/move.pass.cpp | 2 +- .../robust_against_adl.pass.cpp | 2 +- .../thread.thread.destr/dtor.pass.cpp | 2 +- .../thread.thread.id/assign.pass.cpp | 2 +- .../thread.thread.id/copy.pass.cpp | 2 +- .../thread.thread.id/default.pass.cpp | 2 +- .../thread.thread.id/enabled_hashes.pass.cpp | 2 +- .../thread.thread.id/eq.pass.cpp | 2 +- .../thread.thread.id/lt.pass.cpp | 2 +- .../thread.thread.id/stream.pass.cpp | 4 +-- .../thread.thread.id/thread_id.pass.cpp | 2 +- .../thread.thread.member/detach.pass.cpp | 2 +- .../thread.thread.member/get_id.pass.cpp | 2 +- .../thread.thread.member/join.pass.cpp | 2 +- .../thread.thread.member/joinable.pass.cpp | 2 +- .../thread.thread.member/swap.pass.cpp | 2 +- .../hardware_concurrency.pass.cpp | 2 +- .../thread.thread.this/get_id.pass.cpp | 2 +- .../thread.thread.this/sleep_until.pass.cpp | 2 +- .../thread.thread.this/yield.pass.cpp | 2 +- .../time.clock/time.clock.file/now.pass.cpp | 2 +- .../time.clock.file/to_from_sys.pass.cpp | 2 +- .../time.clock.steady/consistency.pass.cpp | 2 +- .../time.clock/time.clock.steady/now.pass.cpp | 2 +- .../charconv/charconv.msvc/test.pass.cpp | 6 ++-- .../format.arg.store/make_format_args.sh.cpp | 2 +- .../make_wformat_args.pass.cpp | 2 +- .../format.context/ctor.pass.cpp | 2 +- .../format.context/locale.pass.cpp | 2 +- .../format.functions/format.locale.pass.cpp | 2 +- .../format_to.locale.pass.cpp | 2 +- .../format_to_n.locale.pass.cpp | 2 +- .../formatted_size.locale.pass.cpp | 2 +- .../locale-specific_form.pass.cpp | 2 +- .../format.functions/vformat.locale.pass.cpp | 2 +- .../vformat_to.locale.pass.cpp | 2 +- .../unique.ptr/unique.ptr.special/io.fail.cpp | 2 +- .../unique.ptr/unique.ptr.special/io.pass.cpp | 2 +- .../atomic_compare_exchange_strong.pass.cpp | 2 +- ..._compare_exchange_strong_explicit.pass.cpp | 2 +- .../atomic_compare_exchange_weak.pass.cpp | 2 +- ...ic_compare_exchange_weak_explicit.pass.cpp | 2 +- .../atomic_exchange.pass.cpp | 2 +- .../atomic_exchange_explicit.pass.cpp | 2 +- .../atomic_is_lock_free.pass.cpp | 2 +- .../atomic_load.pass.cpp | 2 +- .../atomic_load_explicit.pass.cpp | 2 +- .../atomic_store.pass.cpp | 2 +- .../atomic_store_explicit.pass.cpp | 2 +- .../util.smartptr.shared.io/io.pass.cpp | 2 +- .../bitset.operators/stream_in.pass.cpp | 2 +- .../bitset.operators/stream_out.pass.cpp | 2 +- .../test.support/make_string_header.pass.cpp | 2 +- .../generate_feature_test_macro_components.py | 30 +++++++++---------- .../utils/generate_header_inclusion_tests.py | 28 ++++++++--------- libcxx/utils/libcxx/test/features.py | 15 ++++++---- libcxx/utils/libcxx/test/target_info.py | 2 +- .../test/cxa_thread_atexit_test.pass.cpp | 2 +- libcxxabi/test/forced_unwind3.pass.cpp | 2 +- libcxxabi/test/guard_threaded_test.pass.cpp | 2 +- libcxxabi/test/libcxxabi/test/config.py | 2 +- libcxxabi/test/test_fallback_malloc.pass.cpp | 2 +- .../thread_local_destruction_order.pass.cpp | 2 +- 539 files changed, 578 insertions(+), 575 deletions(-) diff --git a/libcxx/test/libcxx/atomics/atomics.syn/incompatible_with_stdatomic.verify.cpp b/libcxx/test/libcxx/atomics/atomics.syn/incompatible_with_stdatomic.verify.cpp index a8d56c5617a86e..ca092d9c602753 100644 --- a/libcxx/test/libcxx/atomics/atomics.syn/incompatible_with_stdatomic.verify.cpp +++ b/libcxx/test/libcxx/atomics/atomics.syn/incompatible_with_stdatomic.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // REQUIRES: c++03 || c++11 || c++14 || c++17 || c++20 // This test ensures that we issue a reasonable diagnostic when including after diff --git a/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.compile.pass.cpp b/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.compile.pass.cpp index f00c3efa8cf75f..f35eddfb353354 100644 --- a/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.compile.pass.cpp +++ b/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.compile.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // This test ensures that we don't hijack the header even when compiling // before C++23, since Clang used to provide that header before libc++ provided one. diff --git a/libcxx/test/libcxx/debug/containers.multithread.pass.cpp b/libcxx/test/libcxx/debug/containers.multithread.pass.cpp index 6cc9effb919841..e8f87b1d20d75c 100644 --- a/libcxx/test/libcxx/debug/containers.multithread.pass.cpp +++ b/libcxx/test/libcxx/debug/containers.multithread.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++11, c++14 -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: libcxx-no-debug-mode, c++03 // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 diff --git a/libcxx/test/libcxx/debug/extern-templates.sh.cpp b/libcxx/test/libcxx/debug/extern-templates.sh.cpp index b43f4d10af5c5d..774f675a785ae8 100644 --- a/libcxx/test/libcxx/debug/extern-templates.sh.cpp +++ b/libcxx/test/libcxx/debug/extern-templates.sh.cpp @@ -11,7 +11,7 @@ // necessary for correctness. See https://llvm.org/D94718 for details. // UNSUPPORTED: libcxx-no-debug-mode -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: cant-build-shared-library // This test relies on linking a shared library and then passing that shared diff --git a/libcxx/test/libcxx/depr/depr.c.headers/locale_h.pass.cpp b/libcxx/test/libcxx/depr/depr.c.headers/locale_h.pass.cpp index d2a438808cee66..72468612933cd0 100644 --- a/libcxx/test/libcxx/depr/depr.c.headers/locale_h.pass.cpp +++ b/libcxx/test/libcxx/depr/depr.c.headers/locale_h.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/libcxx/depr/depr.str.strstreams/version.pass.cpp b/libcxx/test/libcxx/depr/depr.str.strstreams/version.pass.cpp index ac7268969c92c6..f57921f859a543 100644 --- a/libcxx/test/libcxx/depr/depr.str.strstreams/version.pass.cpp +++ b/libcxx/test/libcxx/depr/depr.str.strstreams/version.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_regex_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_regex_libcpp_version.pass.cpp index ca63a212dbc7bb..7072603b7db070 100644 --- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_regex_libcpp_version.pass.cpp +++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_regex_libcpp_version.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/libcxx/fuzzing/regex.pass.cpp b/libcxx/test/libcxx/fuzzing/regex.pass.cpp index fca7a9accf2ccd..7e3b362fd6161b 100644 --- a/libcxx/test/libcxx/fuzzing/regex.pass.cpp +++ b/libcxx/test/libcxx/fuzzing/regex.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11 // UNSUPPORTED: no-exceptions -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index 27656af8f1cf39..6ad6ccea0e35fd 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -8,7 +8,7 @@ // REQUIRES: host-has-gdb-with-python // REQUIRES: locale.en_US.UTF-8 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: c++03 // TODO: Investigate this failure, which happens only with the Bootstrapping build. diff --git a/libcxx/test/libcxx/inclusions/filesystem.inclusions.compile.pass.cpp b/libcxx/test/libcxx/inclusions/filesystem.inclusions.compile.pass.cpp index 61123206970009..a726f3a5d5f334 100644 --- a/libcxx/test/libcxx/inclusions/filesystem.inclusions.compile.pass.cpp +++ b/libcxx/test/libcxx/inclusions/filesystem.inclusions.compile.pass.cpp @@ -12,7 +12,7 @@ // clang-format off // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // diff --git a/libcxx/test/libcxx/inclusions/ios.inclusions.compile.pass.cpp b/libcxx/test/libcxx/inclusions/ios.inclusions.compile.pass.cpp index 52fb9ee034dd58..560c819b67260f 100644 --- a/libcxx/test/libcxx/inclusions/ios.inclusions.compile.pass.cpp +++ b/libcxx/test/libcxx/inclusions/ios.inclusions.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/libcxx/inclusions/iostream.inclusions.compile.pass.cpp b/libcxx/test/libcxx/inclusions/iostream.inclusions.compile.pass.cpp index 4d1b448ef69b2b..a8f223a507e405 100644 --- a/libcxx/test/libcxx/inclusions/iostream.inclusions.compile.pass.cpp +++ b/libcxx/test/libcxx/inclusions/iostream.inclusions.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/libcxx/inclusions/regex.inclusions.compile.pass.cpp b/libcxx/test/libcxx/inclusions/regex.inclusions.compile.pass.cpp index a2a66f4be6b0fc..f7cf8b9a751beb 100644 --- a/libcxx/test/libcxx/inclusions/regex.inclusions.compile.pass.cpp +++ b/libcxx/test/libcxx/inclusions/regex.inclusions.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/libcxx/inclusions/thread.inclusions.compile.pass.cpp b/libcxx/test/libcxx/inclusions/thread.inclusions.compile.pass.cpp index fc5467423efde4..5e5ad7e605d788 100644 --- a/libcxx/test/libcxx/inclusions/thread.inclusions.compile.pass.cpp +++ b/libcxx/test/libcxx/inclusions/thread.inclusions.compile.pass.cpp @@ -12,7 +12,7 @@ // clang-format off // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/libcxx/input.output/filesystems/class.path/path.member/path.native.obs/string_alloc.pass.cpp b/libcxx/test/libcxx/input.output/filesystems/class.path/path.member/path.native.obs/string_alloc.pass.cpp index 87f9766435d4c1..a9ed30c7ca5bd9 100644 --- a/libcxx/test/libcxx/input.output/filesystems/class.path/path.member/path.native.obs/string_alloc.pass.cpp +++ b/libcxx/test/libcxx/input.output/filesystems/class.path/path.member/path.native.obs/string_alloc.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // These tests require locale for non-char paths -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/libcxx/input.output/iostream.format/input.streams/traits_mismatch.fail.cpp b/libcxx/test/libcxx/input.output/iostream.format/input.streams/traits_mismatch.fail.cpp index faa693ad1b48be..9d109eeb2a7c13 100644 --- a/libcxx/test/libcxx/input.output/iostream.format/input.streams/traits_mismatch.fail.cpp +++ b/libcxx/test/libcxx/input.output/iostream.format/input.streams/traits_mismatch.fail.cpp @@ -13,7 +13,7 @@ // The char type of the stream and the char_type of the traits have to match -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/libcxx/input.output/iostream.format/output.streams/traits_mismatch.fail.cpp b/libcxx/test/libcxx/input.output/iostream.format/output.streams/traits_mismatch.fail.cpp index bf974d3cf8bcc0..0079349783a750 100644 --- a/libcxx/test/libcxx/input.output/iostream.format/output.streams/traits_mismatch.fail.cpp +++ b/libcxx/test/libcxx/input.output/iostream.format/output.streams/traits_mismatch.fail.cpp @@ -13,7 +13,7 @@ // The char type of the stream and the char_type of the traits have to match -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/libcxx/input.output/string.streams/traits_mismatch.fail.cpp b/libcxx/test/libcxx/input.output/string.streams/traits_mismatch.fail.cpp index 91d992d2c33bb3..29b56726d10934 100644 --- a/libcxx/test/libcxx/input.output/string.streams/traits_mismatch.fail.cpp +++ b/libcxx/test/libcxx/input.output/string.streams/traits_mismatch.fail.cpp @@ -14,7 +14,7 @@ // // The char type of the stream and the char_type of the traits have to match -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include diff --git a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/locale_dependent.compile.pass.cpp b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/locale_dependent.compile.pass.cpp index df85906fc4eafc..683a2dcc79a02a 100644 --- a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/locale_dependent.compile.pass.cpp +++ b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/locale_dependent.compile.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include diff --git a/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp b/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp index db0c6603d13008..b0e9956f553fcf 100644 --- a/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp +++ b/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // UNSUPPORTED: c++03 diff --git a/libcxx/test/libcxx/modules_include.sh.cpp b/libcxx/test/libcxx/modules_include.sh.cpp index d165d53eb6a4d8..4dd8a8f73e87c1 100644 --- a/libcxx/test/libcxx/modules_include.sh.cpp +++ b/libcxx/test/libcxx/modules_include.sh.cpp @@ -21,7 +21,7 @@ // TODO: Some headers produce errors when we include them and the library has been // configured without support for them, which breaks the modules build. -// UNSUPPORTED: libcpp-has-no-localization, libcpp-has-no-filesystem-library, libcpp-has-no-threads, libcpp-has-no-wide-characters +// UNSUPPORTED: no-localization, no-filesystem, no-threads, no-wide-characters // Prevent from generating deprecated warnings for this test. #if defined(__DEPRECATED) diff --git a/libcxx/test/libcxx/numerics/rand/rand.device/has-no-random-device.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.device/has-no-random-device.verify.cpp index 66a3afb25645fa..7559c88b59e9f6 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.device/has-no-random-device.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.device/has-no-random-device.verify.cpp @@ -9,7 +9,7 @@ // Make sure that std::random_device is not available in namespace std:: when // libc++ is built without support for random device. -// REQUIRES: libcpp-has-no-random-device +// REQUIRES: no-random-device #include diff --git a/libcxx/test/libcxx/ranges/has-no-incomplete-ranges.compile.pass.cpp b/libcxx/test/libcxx/ranges/has-no-incomplete-ranges.compile.pass.cpp index 3dd6b20cc37332..a058275b47fb88 100644 --- a/libcxx/test/libcxx/ranges/has-no-incomplete-ranges.compile.pass.cpp +++ b/libcxx/test/libcxx/ranges/has-no-incomplete-ranges.compile.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // REQUIRES: libcpp-has-no-incomplete-ranges // Test that _LIBCPP_HAS_NO_INCOMPLETE_RANGES disables the std::ranges namespace. diff --git a/libcxx/test/libcxx/strings/c.strings/version_cwchar.pass.cpp b/libcxx/test/libcxx/strings/c.strings/version_cwchar.pass.cpp index 571bdc62cbd303..09afb0294e7d84 100644 --- a/libcxx/test/libcxx/strings/c.strings/version_cwchar.pass.cpp +++ b/libcxx/test/libcxx/strings/c.strings/version_cwchar.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/libcxx/strings/c.strings/version_cwctype.pass.cpp b/libcxx/test/libcxx/strings/c.strings/version_cwctype.pass.cpp index 453ceb198f7899..f40fb3a8a7eee8 100644 --- a/libcxx/test/libcxx/strings/c.strings/version_cwctype.pass.cpp +++ b/libcxx/test/libcxx/strings/c.strings/version_cwctype.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/libcxx/thread/futures/futures.promise/assert.set_exception.pass.cpp b/libcxx/test/libcxx/thread/futures/futures.promise/assert.set_exception.pass.cpp index ac025696fc2c04..c4090ed89c73b3 100644 --- a/libcxx/test/libcxx/thread/futures/futures.promise/assert.set_exception.pass.cpp +++ b/libcxx/test/libcxx/thread/futures/futures.promise/assert.set_exception.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: has-unix-headers -// UNSUPPORTED: c++03, libcpp-has-no-threads +// UNSUPPORTED: c++03, no-threads // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0|12.0}} // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_ASSERTIONS=1 diff --git a/libcxx/test/libcxx/thread/futures/futures.promise/assert.set_exception_at_thread_exit.pass.cpp b/libcxx/test/libcxx/thread/futures/futures.promise/assert.set_exception_at_thread_exit.pass.cpp index 7e098c02fd91f3..a4c21ec2061132 100644 --- a/libcxx/test/libcxx/thread/futures/futures.promise/assert.set_exception_at_thread_exit.pass.cpp +++ b/libcxx/test/libcxx/thread/futures/futures.promise/assert.set_exception_at_thread_exit.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: has-unix-headers -// UNSUPPORTED: c++03, libcpp-has-no-threads +// UNSUPPORTED: c++03, no-threads // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0|12.0}} // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_ASSERTIONS=1 diff --git a/libcxx/test/libcxx/thread/futures/futures.task/types.pass.cpp b/libcxx/test/libcxx/thread/futures/futures.task/types.pass.cpp index 2381b0d5c7c869..1f17d745134717 100644 --- a/libcxx/test/libcxx/thread/futures/futures.task/types.pass.cpp +++ b/libcxx/test/libcxx/thread/futures/futures.task/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/libcxx/thread/futures/version.pass.cpp b/libcxx/test/libcxx/thread/futures/version.pass.cpp index 601e6a48e79555..b32f7579e9a656 100644 --- a/libcxx/test/libcxx/thread/futures/version.pass.cpp +++ b/libcxx/test/libcxx/thread/futures/version.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/libcxx/thread/thread.barrier/version.compile.pass.cpp b/libcxx/test/libcxx/thread/thread.barrier/version.compile.pass.cpp index eeacdd88e4bd67..7890787b6a2a16 100644 --- a/libcxx/test/libcxx/thread/thread.barrier/version.compile.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.barrier/version.compile.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp b/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp index 30c03913688147..0d682cdddc65f5 100644 --- a/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // REQUIRES: libcpp-has-thread-api-pthread // notify_all_at_thread_exit(...) requires move semantics to transfer the diff --git a/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp index a4c7342f8a61e7..374aa2fd15350f 100644 --- a/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, libcpp-has-thread-api-external +// UNSUPPORTED: no-threads, libcpp-has-thread-api-external // XFAIL: windows diff --git a/libcxx/test/libcxx/thread/thread.condition/version.pass.cpp b/libcxx/test/libcxx/thread/thread.condition/version.pass.cpp index 552223a6361b4e..114f0b8035f6da 100644 --- a/libcxx/test/libcxx/thread/thread.condition/version.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.condition/version.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/libcxx/thread/thread.latch/version.compile.pass.cpp b/libcxx/test/libcxx/thread/thread.latch/version.compile.pass.cpp index 14cbb366060e93..365ee9276be174 100644 --- a/libcxx/test/libcxx/thread/thread.latch/version.compile.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.latch/version.compile.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp b/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp index 2ad1370c9d61e7..09a8ba8f2a2c46 100644 --- a/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp +++ b/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // [[nodiscard]] isn't supported in C++03 (not even as an extension) // UNSUPPORTED: c++03 diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/native_handle.pass.cpp index 269de52cbbf80a..3de6635f13be25 100644 --- a/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/native_handle.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/native_handle.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, libcpp-has-thread-api-external +// UNSUPPORTED: no-threads, libcpp-has-thread-api-external // XFAIL: windows diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/native_handle.pass.cpp index dda2d94a95c924..d76b3d71d36e31 100644 --- a/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/native_handle.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/native_handle.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, libcpp-has-thread-api-external +// UNSUPPORTED: no-threads, libcpp-has-thread-api-external // XFAIL: windows diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_annotations_not_enabled.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_annotations_not_enabled.pass.cpp index 2e181dc3b65e45..02072f5e6f8a76 100644 --- a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_annotations_not_enabled.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_annotations_not_enabled.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_guard.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_guard.pass.cpp index 10ad116657704b..1c1f36459c8379 100644 --- a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_guard.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_guard.pass.cpp @@ -10,7 +10,7 @@ // the processing goes awry preventing the definition of the types. // XFAIL: msvc -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // REQUIRES: thread-safety // diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_unlock.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_unlock.pass.cpp index 0b599f75b684dd..ddfe3269fdfa76 100644 --- a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_unlock.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_unlock.pass.cpp @@ -10,7 +10,7 @@ // the processing goes awry preventing the definition of the types. // XFAIL: msvc -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // REQUIRES: thread-safety // diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_missing_unlock.fail.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_missing_unlock.fail.cpp index beb508f4e30f4d..07a51b5db90cfb 100644 --- a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_missing_unlock.fail.cpp +++ b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_missing_unlock.fail.cpp @@ -10,7 +10,7 @@ // the processing goes awry preventing the definition of the types. // XFAIL: msvc -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // REQUIRES: thread-safety // diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_requires_capability.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_requires_capability.pass.cpp index edfcfea496fb0d..81ce3b25a83944 100644 --- a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_requires_capability.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_requires_capability.pass.cpp @@ -10,7 +10,7 @@ // the processing goes awry preventing the definition of the types. // XFAIL: msvc -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // REQUIRES: thread-safety // diff --git a/libcxx/test/libcxx/thread/thread.mutex/version.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/version.pass.cpp index bf565318a1a7bf..fca02ffef7dd93 100644 --- a/libcxx/test/libcxx/thread/thread.mutex/version.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.mutex/version.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/libcxx/thread/thread.semaphore/version.compile.pass.cpp b/libcxx/test/libcxx/thread/thread.semaphore/version.compile.pass.cpp index 9e2fe49b43ba19..3f59448a8524aa 100644 --- a/libcxx/test/libcxx/thread/thread.semaphore/version.compile.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.semaphore/version.compile.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/libcxx/thread/thread.threads/create_late.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/create_late.pass.cpp index 8c1b6ebade54d1..847b354702ed8b 100644 --- a/libcxx/test/libcxx/thread/thread.threads/create_late.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/create_late.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 #include "make_test_thread.h" diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/thread.thread.member/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/thread.thread.member/native_handle.pass.cpp index 0fa0d7ce764272..96ec3332519bcd 100644 --- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/thread.thread.member/native_handle.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/thread.thread.member/native_handle.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, libcpp-has-thread-api-external +// UNSUPPORTED: no-threads, libcpp-has-thread-api-external // XFAIL: windows diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/types.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/types.pass.cpp index ce47404884c63c..bb60647ef05d8f 100644 --- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/types.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, libcpp-has-thread-api-external +// UNSUPPORTED: no-threads, libcpp-has-thread-api-external // REQUIRES: libcpp-has-thread-api-pthread // diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp index fed874da3224f4..59bcec46cfda71 100644 --- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // Until 58a0a70fb2f1, this_thread::sleep_for could sometimes get interrupted // by signals and this test would fail spuriously. Disable the test on the diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp index 7024c9d8e686f7..5e5005c7128247 100644 --- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // This test uses the POSIX header which Windows doesn't provide // UNSUPPORTED: windows diff --git a/libcxx/test/libcxx/thread/thread.threads/version.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/version.pass.cpp index 4ced04d5ee2d40..7c6b2f2c4bab21 100644 --- a/libcxx/test/libcxx/thread/thread.threads/version.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/version.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/libcxx/utilities/memory/util.smartptr/race_condition.pass.cpp b/libcxx/test/libcxx/utilities/memory/util.smartptr/race_condition.pass.cpp index 5ea05cd20d5ee9..cbecd2d1dc7abd 100644 --- a/libcxx/test/libcxx/utilities/memory/util.smartptr/race_condition.pass.cpp +++ b/libcxx/test/libcxx/utilities/memory/util.smartptr/race_condition.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // // // diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp index fe81374957cf0f..58d97723e483c1 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // XFAIL: c++03 // XFAIL: !non-lockfree-atomics diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp index f8bfcb23bb0ecd..2c187425f87d22 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // XFAIL: c++03 // XFAIL: !non-lockfree-atomics diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp index 6d83442da5924e..1557f41f1c79b5 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // XFAIL: c++03 // XFAIL: !non-lockfree-atomics diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp index 27370961aa3e3f..9799c77a9b12d6 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // XFAIL: c++03 // XFAIL: !non-lockfree-atomics diff --git a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp index 6d2f810f0163bf..5d0a2859bfcdc7 100644 --- a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp +++ b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/containers/iterator.rel_ops.compile.pass.cpp b/libcxx/test/std/containers/iterator.rel_ops.compile.pass.cpp index 46d46b7c3013c7..89f08e1a3de831 100644 --- a/libcxx/test/std/containers/iterator.rel_ops.compile.pass.cpp +++ b/libcxx/test/std/containers/iterator.rel_ops.compile.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // std::filesystem is unavailable prior to macOS 10.15 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/depr/depr.c.headers/locale_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/locale_h.pass.cpp index 00e6f03113acae..9848f07a6d0d83 100644 --- a/libcxx/test/std/depr/depr.c.headers/locale_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/locale_h.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/depr/depr.c.headers/wchar_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/wchar_h.pass.cpp index e5b8c2c69b8088..78716da2cbd675 100644 --- a/libcxx/test/std/depr/depr.c.headers/wchar_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/wchar_h.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/std/depr/depr.c.headers/wctype_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/wctype_h.pass.cpp index 1738c6c4c727cd..c40623a9dcacce 100644 --- a/libcxx/test/std/depr/depr.c.headers/wctype_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/wctype_h.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/std/depr/depr.ios.members/lit.local.cfg b/libcxx/test/std/depr/depr.ios.members/lit.local.cfg index ebbd06ec931cc4..3d9bcf36aeb155 100644 --- a/libcxx/test/std/depr/depr.ios.members/lit.local.cfg +++ b/libcxx/test/std/depr/depr.ios.members/lit.local.cfg @@ -1,2 +1,2 @@ -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/depr/depr.str.strstreams/lit.local.cfg b/libcxx/test/std/depr/depr.str.strstreams/lit.local.cfg index ebbd06ec931cc4..3d9bcf36aeb155 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/lit.local.cfg +++ b/libcxx/test/std/depr/depr.str.strstreams/lit.local.cfg @@ -1,2 +1,2 @@ -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/stream_inserter.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/stream_inserter.pass.cpp index 2ba88489d452b6..03199d5e7bfe4c 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/stream_inserter.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/stream_inserter.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.cons/ostream_joiner.cons.pass.cpp b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.cons/ostream_joiner.cons.pass.cpp index a4210d0ad81ab8..af67bbefd8d7dd 100644 --- a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.cons/ostream_joiner.cons.pass.cpp +++ b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.cons/ostream_joiner.cons.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // // diff --git a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.creation/make_ostream_joiner.pass.cpp b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.creation/make_ostream_joiner.pass.cpp index ed58745d79f0a5..05f798fcae3d67 100644 --- a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.creation/make_ostream_joiner.pass.cpp +++ b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.creation/make_ostream_joiner.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // // diff --git a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.assign.pass.cpp b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.assign.pass.cpp index c0bb1fdc3a3fd2..7a193e839f2a31 100644 --- a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.assign.pass.cpp +++ b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.assign.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // // diff --git a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.postincrement.pass.cpp b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.postincrement.pass.cpp index 774ee36e389bb2..055280d62354cf 100644 --- a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.postincrement.pass.cpp +++ b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.postincrement.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // // diff --git a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.pretincrement.pass.cpp b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.pretincrement.pass.cpp index d8929c44da7bc3..f7de32d7ce7780 100644 --- a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.pretincrement.pass.cpp +++ b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.pretincrement.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // // diff --git a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.star.pass.cpp b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.star.pass.cpp index a0804ccb68e1e3..4eb6da5c39f032 100644 --- a/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.star.pass.cpp +++ b/libcxx/test/std/experimental/iterator/ostream.joiner/ostream.joiner.ops/ostream_joiner.op.star.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // // diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp index 7d5c2636b4fd2a..eb30b202d9d578 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp index 81749cfe19c434..035dd1fc5c26f9 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // Filesystem is supported on Apple platforms starting with macosx10.15. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp index 6714e62ce0b5c1..835cce670131a1 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // Filesystem is supported on Apple platforms starting with macosx10.15. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp index e44dde9eecc29c..f75d1151511c5a 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // Filesystem is supported on Apple platforms starting with macosx10.15. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp index 98643765fddf93..afaaa543ccc712 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // Filesystem is supported on Apple platforms starting with macosx10.15. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp index 67f741efb479a9..5a53c173f43376 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // Filesystem is supported on Apple platforms starting with macosx10.15. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp index 3a8be7fe0f4490..f8a6d28f3d8042 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // Filesystem is supported on Apple platforms starting with macosx10.15. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp index 36f81dfb110f96..db7a267fe950ad 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // Filesystem is supported on Apple platforms starting with macosx10.15. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/input.output/file.streams/lit.local.cfg b/libcxx/test/std/input.output/file.streams/lit.local.cfg index cebfbadef2a0e6..781b508d3e6e8f 100644 --- a/libcxx/test/std/input.output/file.streams/lit.local.cfg +++ b/libcxx/test/std/input.output/file.streams/lit.local.cfg @@ -1,3 +1,3 @@ # All non-trivial uses of iostreams require localization support -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.io/directory_entry.io.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.io/directory_entry.io.pass.cpp index bc3b55df589691..77b9dcffebe440 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.io/directory_entry.io.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.io/directory_entry.io.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // // diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp index b2dd3c95731c9b..afbad4c062dfb1 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // These tests require locale for non-char paths -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/source.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/source.pass.cpp index 68ef55d7f0fd66..68c24426b423bd 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/source.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/source.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // These tests require locale for non-char paths -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.charconv.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.charconv.pass.cpp index f8e8da49dbadc0..cf27cd28a74ae8 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.charconv.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.charconv.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: c++03 // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp index 1c91759f2f3e5e..5c0fb2070b2123 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // These tests require locale for non-char paths -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/source.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/source.pass.cpp index c97e4ddd1d04b7..f119f34338e72a 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/source.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/source.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // These tests require locale for non-char paths -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/generic_string_alloc.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/generic_string_alloc.pass.cpp index 7d8893adadf7f2..d67fc85bf84981 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/generic_string_alloc.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/generic_string_alloc.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // These tests require locale for non-char paths -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/named_overloads.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/named_overloads.pass.cpp index bc547e5485f36b..f381fb5a41369b 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/named_overloads.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/named_overloads.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // These tests require locale for non-char paths -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.native.obs/named_overloads.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.native.obs/named_overloads.pass.cpp index 6611da48d74ab4..0811e90aed0a0b 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.native.obs/named_overloads.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.native.obs/named_overloads.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // These tests require locale for non-char paths -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.io.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.io.pass.cpp index 501ded7c37c6ad..2f21649df60d11 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.io.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.io.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/toctou.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/toctou.pass.cpp index 04564ac9d34586..387044777d1b5a 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/toctou.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/toctou.pass.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-localization -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-localization +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/input.output/filesystems/lit.local.cfg b/libcxx/test/std/input.output/filesystems/lit.local.cfg index 95e9ec4ccd979a..b22876f0ac148a 100644 --- a/libcxx/test/std/input.output/filesystems/lit.local.cfg +++ b/libcxx/test/std/input.output/filesystems/lit.local.cfg @@ -6,5 +6,5 @@ if 'use_system_cxx_lib' in config.available_features: if any('target=x86_64-apple-macosx{}'.format(v) in config.available_features for v in too_old): config.unsupported = True -if 'libcpp-has-no-filesystem-library' in config.available_features: +if 'no-filesystem' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/input.output/input.output.general/lit.local.cfg b/libcxx/test/std/input.output/input.output.general/lit.local.cfg index cebfbadef2a0e6..781b508d3e6e8f 100644 --- a/libcxx/test/std/input.output/input.output.general/lit.local.cfg +++ b/libcxx/test/std/input.output/input.output.general/lit.local.cfg @@ -1,3 +1,3 @@ # All non-trivial uses of iostreams require localization support -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/input.output/iostream.format/lit.local.cfg b/libcxx/test/std/input.output/iostream.format/lit.local.cfg index cebfbadef2a0e6..781b508d3e6e8f 100644 --- a/libcxx/test/std/input.output/iostream.format/lit.local.cfg +++ b/libcxx/test/std/input.output/iostream.format/lit.local.cfg @@ -1,3 +1,3 @@ # All non-trivial uses of iostreams require localization support -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT.pass.cpp index f0867ef1043f27..2611bb494b0092 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT.pass.cpp @@ -14,7 +14,7 @@ // template // basic_ostream& operator<<(basic_ostream& out, charT c); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT_pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT_pointer.pass.cpp index 8046dfd3da608e..190d4ce786fe34 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT_pointer.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT_pointer.pass.cpp @@ -14,7 +14,7 @@ // template // basic_ostream& operator<<(basic_ostream& out, const charT* s); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide.pass.cpp index 797e4297c9f012..34bc847af47a6c 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide.pass.cpp @@ -14,7 +14,7 @@ // template // basic_ostream& operator<<(basic_ostream& out, char c); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide_pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide_pointer.pass.cpp index 1015da8b0fac30..08a66e81810047 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide_pointer.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide_pointer.pass.cpp @@ -14,7 +14,7 @@ // template // basic_ostream& operator<<(basic_ostream& out, const char* s); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.verify.cpp b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.verify.cpp index 29a084e62f6688..4d9635183d6634 100644 --- a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.verify.cpp +++ b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted_char.verify.cpp @@ -11,7 +11,7 @@ // quoted // UNSUPPORTED: c++03, c++11 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/input.output/iostream.forward/lit.local.cfg b/libcxx/test/std/input.output/iostream.forward/lit.local.cfg index cebfbadef2a0e6..781b508d3e6e8f 100644 --- a/libcxx/test/std/input.output/iostream.forward/lit.local.cfg +++ b/libcxx/test/std/input.output/iostream.forward/lit.local.cfg @@ -1,3 +1,3 @@ # All non-trivial uses of iostreams require localization support -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/input.output/iostream.objects/lit.local.cfg b/libcxx/test/std/input.output/iostream.objects/lit.local.cfg index cebfbadef2a0e6..781b508d3e6e8f 100644 --- a/libcxx/test/std/input.output/iostream.objects/lit.local.cfg +++ b/libcxx/test/std/input.output/iostream.objects/lit.local.cfg @@ -1,3 +1,3 @@ # All non-trivial uses of iostreams require localization support -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp index 65c9cd52f58fa9..f67aefcdc2b768 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp @@ -10,7 +10,7 @@ // istream wcerr; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp index c9be17125b832c..bc15a3d4498905 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp @@ -10,7 +10,7 @@ // istream wcin; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../send-stdin.sh diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp index 8ce29180b0fdfa..e2799547c9026e 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp @@ -10,7 +10,7 @@ // istream wclog; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp index a58feb4007355a..3276041496086e 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp @@ -10,7 +10,7 @@ // istream wcout; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stdout.sh diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/narrow.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/narrow.pass.cpp index fbc3ae88784c7f..bc862cc131b22f 100644 --- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/narrow.pass.cpp +++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/narrow.pass.cpp @@ -12,7 +12,7 @@ // char narrow(char_type c, char dfault) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/input.output/iostreams.base/lit.local.cfg b/libcxx/test/std/input.output/iostreams.base/lit.local.cfg index cebfbadef2a0e6..781b508d3e6e8f 100644 --- a/libcxx/test/std/input.output/iostreams.base/lit.local.cfg +++ b/libcxx/test/std/input.output/iostreams.base/lit.local.cfg @@ -1,3 +1,3 @@ # All non-trivial uses of iostreams require localization support -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/input.output/iostreams.requirements/lit.local.cfg b/libcxx/test/std/input.output/iostreams.requirements/lit.local.cfg index cebfbadef2a0e6..781b508d3e6e8f 100644 --- a/libcxx/test/std/input.output/iostreams.requirements/lit.local.cfg +++ b/libcxx/test/std/input.output/iostreams.requirements/lit.local.cfg @@ -1,3 +1,3 @@ # All non-trivial uses of iostreams require localization support -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/input.output/stream.buffers/lit.local.cfg b/libcxx/test/std/input.output/stream.buffers/lit.local.cfg index cebfbadef2a0e6..781b508d3e6e8f 100644 --- a/libcxx/test/std/input.output/stream.buffers/lit.local.cfg +++ b/libcxx/test/std/input.output/stream.buffers/lit.local.cfg @@ -1,3 +1,3 @@ # All non-trivial uses of iostreams require localization support -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/input.output/string.streams/lit.local.cfg b/libcxx/test/std/input.output/string.streams/lit.local.cfg index cebfbadef2a0e6..781b508d3e6e8f 100644 --- a/libcxx/test/std/input.output/string.streams/lit.local.cfg +++ b/libcxx/test/std/input.output/string.streams/lit.local.cfg @@ -1,3 +1,3 @@ # All non-trivial uses of iostreams require localization support -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/iterators/stream.iterators/lit.local.cfg b/libcxx/test/std/iterators/stream.iterators/lit.local.cfg index 4d0904117654bb..504efc647672bf 100644 --- a/libcxx/test/std/iterators/stream.iterators/lit.local.cfg +++ b/libcxx/test/std/iterators/stream.iterators/lit.local.cfg @@ -1,3 +1,3 @@ # stream iterators rely on the streams library, which requires localization -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.compile.pass.cpp index e0d81e8442610f..a9f9a6ea020b70 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp index 2e456ee5285ef6..ee28327294a67c 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/iomanip.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/iomanip.version.compile.pass.cpp index b057f1cce0d7bb..b367c28a3e282f 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/iomanip.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/iomanip.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/istream.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/istream.version.compile.pass.cpp index 5bdc336bc3c704..8494787b8a92cc 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/istream.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/istream.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.compile.pass.cpp index 70f95b15d0457f..c0655b67988232 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/locale.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/locale.version.compile.pass.cpp index dd5cf37b1127aa..74b69d048f2fef 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/locale.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/locale.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/mutex.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/mutex.version.compile.pass.cpp index 2091c128dd8653..fca784e7a77e43 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/mutex.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/mutex.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/ostream.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/ostream.version.compile.pass.cpp index 0889b20b8a1a1f..9d582144476b76 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/ostream.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/ostream.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/regex.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/regex.version.compile.pass.cpp index 99be5df83ffb62..38b9969e929100 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/regex.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/regex.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.compile.pass.cpp index 72162b6c3dc511..7390b6df336240 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.compile.pass.cpp index c804503fc4ec0c..9cd1ce0f06da04 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/stdatomic.h.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/stdatomic.h.version.compile.pass.cpp index a2e1aa026d4f88..8492b855fc62f1 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/stdatomic.h.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/stdatomic.h.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/thread.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/thread.version.compile.pass.cpp index dbf70e9cee55b2..0f5c066fc0ffbb 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/thread.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/thread.version.compile.pass.cpp @@ -11,7 +11,7 @@ // // clang-format off -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/localization/lit.local.cfg b/libcxx/test/std/localization/lit.local.cfg index ed0331c463d645..de374016022128 100644 --- a/libcxx/test/std/localization/lit.local.cfg +++ b/libcxx/test/std/localization/lit.local.cfg @@ -1,3 +1,3 @@ # tests are obviously not supported when localization support is disabled -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_wchar_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_wchar_t.pass.cpp index a5dd77bd449bf5..b15540c37f8cc7 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_wchar_t.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_wchar_t.pass.cpp @@ -15,7 +15,7 @@ // explicit codecvt_byname(const char* nm, size_t refs = 0); // explicit codecvt_byname(const string& nm, size_t refs = 0); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_wchar_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_wchar_t.pass.cpp index a10aa7566f7b64..dd7fdb63fc52c5 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_wchar_t.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_wchar_t.pass.cpp @@ -12,7 +12,7 @@ // explicit codecvt(size_t refs = 0); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_always_noconv.pass.cpp index dde838575255d8..6da9a6da18daad 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_always_noconv.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_always_noconv.pass.cpp @@ -12,7 +12,7 @@ // bool always_noconv() const throw(); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_encoding.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_encoding.pass.cpp index 5a6a99d0ab0544..0e7e9f99c68a4e 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_encoding.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_encoding.pass.cpp @@ -12,7 +12,7 @@ // int encoding() const throw(); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_in.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_in.pass.cpp index b3c6f714969f85..075409fb27866d 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_in.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_in.pass.cpp @@ -14,7 +14,7 @@ // const externT* from, const externT* from_end, const externT*& from_next, // internT* to, internT* to_end, internT*& to_next) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_length.pass.cpp index 997712be299b15..7ff9c8eb1eb1ac 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_length.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_length.pass.cpp @@ -12,7 +12,7 @@ // int length(stateT& state, const externT* from, const externT* from_end, size_t max) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_max_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_max_length.pass.cpp index 2831e26d594316..6c5b223f89928e 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_max_length.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_max_length.pass.cpp @@ -12,7 +12,7 @@ // int max_length() const throw(); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_out.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_out.pass.cpp index 1fd7fafa388b1e..d037fbb4220993 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_out.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_out.pass.cpp @@ -14,7 +14,7 @@ // const internT* from, const internT* from_end, const internT*& from_next, // externT* to, externT* to_end, externT*& to_next) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_unshift.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_unshift.pass.cpp index ad61fb6824e24f..c5671971528362 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_unshift.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_unshift.pass.cpp @@ -13,7 +13,7 @@ // result unshift(stateT& state, // externT* to, externT* to_end, externT*& to_next) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // This is pretty much just an "are you breathing" test diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_wchar_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_wchar_t.pass.cpp index e108491cdcd210..7b2cea2aa09f04 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_wchar_t.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_wchar_t.pass.cpp @@ -20,7 +20,7 @@ // ... // }; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp index 392e37373f81b2..d08a71219fce85 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp @@ -13,7 +13,7 @@ // bool is(mask m, charT c) const; // REQUIRES: locale.en_US.UTF-8 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp index 1c7c1f81fc399a..8aa1aa85d97e89 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp @@ -13,7 +13,7 @@ // const charT* do_is(const charT* low, const charT* high, mask* vec) const; // REQUIRES: locale.en_US.UTF-8 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_1.pass.cpp index c4cb4f7728af03..c1b470609ce939 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_1.pass.cpp @@ -8,7 +8,7 @@ // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_CA.ISO8859-1 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_many.pass.cpp index 1442f1a9e0cad2..e33486ccef300a 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_many.pass.cpp @@ -8,7 +8,7 @@ // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_CA.ISO8859-1 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp index 163bd7a501d236..540efde1ea6adc 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp @@ -13,7 +13,7 @@ // const charT* scan_is(mask m, const charT* low, const charT* high) const; // REQUIRES: locale.en_US.UTF-8 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp index e2c34f2527fd46..73e3c138575d37 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp @@ -13,7 +13,7 @@ // const charT* scan_not(mask m, const charT* low, const charT* high) const; // REQUIRES: locale.en_US.UTF-8 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp index 6ecf65278be358..bafdfcea04600e 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp @@ -8,7 +8,7 @@ // REQUIRES: locale.en_US.UTF-8 // XFAIL: LIBCXX-AIX-FIXME -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp index 38157de78731ce..552eab1f2ab41a 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp @@ -8,7 +8,7 @@ // REQUIRES: locale.en_US.UTF-8 // XFAIL: LIBCXX-AIX-FIXME -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/ctor.pass.cpp index c43e9e6123de6f..c40e644aadc087 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/ctor.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/ctor.pass.cpp @@ -12,7 +12,7 @@ // explicit ctype(size_t refs = 0); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_1.pass.cpp index df7981a33111a7..b2b5215484d856 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_1.pass.cpp @@ -12,7 +12,7 @@ // bool is(mask m, charT c) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_many.pass.cpp index f95e58e8273eb2..ca9f220a99b666 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_many.pass.cpp @@ -12,7 +12,7 @@ // const charT* do_is(const charT* low, const charT* high, mask* vec) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_1.pass.cpp index 647724758c9a68..215abaf28cb6b2 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_1.pass.cpp @@ -12,7 +12,7 @@ // char narrow(charT c, char dfault) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_many.pass.cpp index db5f8ae85a2ab9..ab26e7777a38a0 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_many.pass.cpp @@ -12,7 +12,7 @@ // const charT* narrow(const charT* low, const charT*, char dfault, char* to) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_is.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_is.pass.cpp index 864c20fdf5f962..2078af74209eb0 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_is.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_is.pass.cpp @@ -12,7 +12,7 @@ // const charT* scan_is(mask m, const charT* low, const charT* high) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_not.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_not.pass.cpp index 41362cf0b64802..77f8c9bb26f4e3 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_not.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_not.pass.cpp @@ -12,7 +12,7 @@ // const charT* scan_not(mask m, const charT* low, const charT* high) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_1.pass.cpp index 50628ad26f19aa..53228f7a2c89f9 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_1.pass.cpp @@ -12,7 +12,7 @@ // charT tolower(charT) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_many.pass.cpp index d93d51d6f3fb99..04a663bbae8c80 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_many.pass.cpp @@ -12,7 +12,7 @@ // const charT* tolower(charT* low, const charT* high) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_1.pass.cpp index 3905161f10814a..a9275ddf03ce02 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_1.pass.cpp @@ -12,7 +12,7 @@ // charT toupper(charT) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_many.pass.cpp index 70bb0fd9d4fd71..976d2a9306fdb5 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_many.pass.cpp @@ -12,7 +12,7 @@ // const charT* toupper(charT* low, const charT* high) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_1.pass.cpp index 7cdc8327f7ebf6..ab927ad0cf4cb7 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_1.pass.cpp @@ -12,7 +12,7 @@ // charT widen(char c) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_many.pass.cpp index eb600d77cb5b28..55f6610ab0826e 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_many.pass.cpp @@ -12,7 +12,7 @@ // const char* widen(const char* low, const char* high, charT* to) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/types.pass.cpp index a1e7cedcef9019..cf70539654a506 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/types.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/types.pass.cpp @@ -17,7 +17,7 @@ // typedef CharT char_type; // }; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order_wide.pass.cpp index 1560125338bd5c..5928c87345afbd 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order_wide.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date_wide.pass.cpp index 2691bb42c58d70..25a718e369e564 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date_wide.pass.cpp @@ -9,7 +9,7 @@ // NetBSD does not support LC_TIME at the moment // XFAIL: netbsd -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // XFAIL: LIBCXX-AIX-FIXME // REQUIRES: locale.en_US.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname_wide.pass.cpp index 87eec9c7e1ce0d..ff241691cec843 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname_wide.pass.cpp @@ -9,7 +9,7 @@ // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.zh_CN.UTF-8 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp index 6ab1d949eab8a5..86176f65a51f3f 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp @@ -10,7 +10,7 @@ // XFAIL: netbsd // XFAIL: LIBCXX-AIX-FIXME -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time_wide.pass.cpp index 39e3adf3f53156..c71e65bce5d8b0 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time_wide.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp index bdc4653ad0d28d..2813c5dec568c2 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year_wide.pass.cpp index beb85fe11812db..c608381ab4146a 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year_wide.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date_wide.pass.cpp index 2af988ce69944a..e6a73d1139b96e 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date_wide.pass.cpp @@ -14,7 +14,7 @@ // get_date(iter_type s, iter_type end, ios_base& str, // ios_base::iostate& err, tm* t) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname_wide.pass.cpp index 0bd4ea325cd336..9a872e21d291a1 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname_wide.pass.cpp @@ -14,7 +14,7 @@ // get_monthname(iter_type s, iter_type end, ios_base& str, // ios_base::iostate& err, tm* t) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time_wide.pass.cpp index b137bea451cb0d..a7425a17f3d1e0 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time_wide.pass.cpp @@ -14,7 +14,7 @@ // get_time(iter_type s, iter_type end, ios_base& str, // ios_base::iostate& err, tm* t) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday_wide.pass.cpp index b328286720cb5a..2e63114c1f4aa1 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday_wide.pass.cpp @@ -14,7 +14,7 @@ // get_weekday(iter_type s, iter_type end, ios_base& str, // ios_base::iostate& err, tm* t) const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp index 47b9ea434f94c9..2a7161092a1301 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp @@ -16,7 +16,7 @@ // // unspecified // }; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // Not a portable test diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp index e281f1ae51bfb9..271117b8b97105 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp @@ -16,7 +16,7 @@ // // unspecified // }; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // Not a portable test diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp index bc179f4d6d01f1..8d1ab9eead3a27 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp @@ -18,7 +18,7 @@ // explicit wbuffer_convert(streambuf* bytebuf, Codecvt* pcvt = new Codecvt, // state_type state = state_type()); // C++20 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp index 27ced20f23841d..881e7b3501add7 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp @@ -14,7 +14,7 @@ // This test is not entirely portable -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp index b07b5ee1f96d19..95d410b31bbf49 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp @@ -16,7 +16,7 @@ // This test is not entirely portable -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp index 72c7c8c6e823ef..3d5d3b3e3c8c13 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp @@ -12,7 +12,7 @@ // streambuf *rdbuf(streambuf *bytebuf); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp index 8a3ca75766f80d..e0246144da2f07 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp @@ -17,7 +17,7 @@ // This test is not entirely portable -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp index 8736da7a6e91af..6dca17ca76e8f3 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp @@ -12,7 +12,7 @@ // state_type state() const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp index 0f4a49b84fcb18..c201d20c6cfc72 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp @@ -10,7 +10,7 @@ // wbuffer_convert -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp index a198a9577dbf76..23ceedf9437c0f 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp @@ -16,7 +16,7 @@ // This test is not entirely portable -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp index 5ce556101e8207..5d5504ac10512f 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp @@ -12,7 +12,7 @@ // size_t converted() const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp index d4b19125ac3dc0..6c62c551ff36c9 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp @@ -15,7 +15,7 @@ // wstring_convert() : wstring_convert(new Codecvt) {} // C++20 // explicit wstring_convert(Codecvt* pcvt); // C++20 -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp index c5a3dd8f8cd06a..96f7b3b52d548a 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp @@ -12,7 +12,7 @@ // wstring_convert(Codecvt* pcvt, state_type state); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp index df386dfbd4708c..2ca8862c79e128 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp @@ -17,7 +17,7 @@ // wstring_convert(wstring_convert const&) = delete; // wstring_convert& operator=(wstring_convert const&) = delete; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp index 5ad3dbe4e9f2fa..59d73e84e3f8de 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp @@ -13,7 +13,7 @@ // wstring_convert(const byte_string& byte_err, // const wide_string& wide_err = wide_string()); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp index bd0191de0cc19b..cfd9de68b2ac3c 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp @@ -15,7 +15,7 @@ // wide_string from_bytes(const byte_string& str); // wide_string from_bytes(const char* first, const char* last); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp index 1602b1a3360236..5ddfc647251517 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp @@ -12,7 +12,7 @@ // state_type state() const; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp index 5ccaca01c7a602..247376976a454a 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp @@ -15,7 +15,7 @@ // byte_string to_bytes(const wide_string& wstr); // byte_string to_bytes(const Elem* first, const Elem* last); -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp index 88c4bcc012a264..82cb9dcfda5212 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp @@ -19,7 +19,7 @@ // typedef typename Codecvt::state_type state_type; // typedef typename wide_string::traits_type::int_type int_type; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/namespace/addressable_functions.sh.cpp b/libcxx/test/std/namespace/addressable_functions.sh.cpp index d149a6bb244921..172659f4395834 100644 --- a/libcxx/test/std/namespace/addressable_functions.sh.cpp +++ b/libcxx/test/std/namespace/addressable_functions.sh.cpp @@ -18,7 +18,7 @@ // RUN: %{exec} %t.exe // The functions checked below come from & friends -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/stream_input.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/stream_input.pass.cpp index 344caa2841c123..bed585fc18a879 100644 --- a/libcxx/test/std/numerics/complex.number/complex.ops/stream_input.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.ops/stream_input.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/stream_output.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/stream_output.pass.cpp index 9d9f4011b1dc23..b7111086257627 100644 --- a/libcxx/test/std/numerics/complex.number/complex.ops/stream_output.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.ops/stream_output.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_result_type.pass.cpp index 68e5d8d0e4fa25..1df5615a465700 100644 --- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_result_type.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_result_type.pass.cpp @@ -14,7 +14,7 @@ // explicit discard_block_engine(result_type s = default_seed); // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_sseq.pass.cpp index 2e598664ea9d9e..67c13763019f76 100644 --- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_sseq.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_sseq.pass.cpp @@ -14,7 +14,7 @@ // template explicit discard_block_engine(Sseq& q); // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/io.pass.cpp index c5acba18e5d172..c912287c1cab87 100644 --- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_result_type.pass.cpp index b54aa24a42c521..7ffbd3656bf8c4 100644 --- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_result_type.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_result_type.pass.cpp @@ -14,7 +14,7 @@ // explicit independent_bits_engine(result_type s = default_seed); // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_sseq.pass.cpp index 791ba3c1c675d8..73f760711b1c81 100644 --- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_sseq.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_sseq.pass.cpp @@ -14,7 +14,7 @@ // template explicit independent_bits_engine(Sseq& q); // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/io.pass.cpp index f7cc60961fae69..52045c57ed307b 100644 --- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_result_type.pass.cpp index b0202291958268..e7b576256393eb 100644 --- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_result_type.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_result_type.pass.cpp @@ -14,7 +14,7 @@ // explicit shuffle_order_engine(result_type s = default_seed); // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_sseq.pass.cpp index 3ab141e799273c..1b9c6a49ddbed4 100644 --- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_sseq.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_sseq.pass.cpp @@ -14,7 +14,7 @@ // template explicit shuffle_order_engine(Sseq& q); // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/io.pass.cpp index 2970c1a2215cfe..9521bc78c13195 100644 --- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.device/ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.device/ctor.pass.cpp index 3acefa1ad1ef61..b92a5838c32b3e 100644 --- a/libcxx/test/std/numerics/rand/rand.device/ctor.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.device/ctor.pass.cpp @@ -14,7 +14,7 @@ // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{11.0|12.0}} -// UNSUPPORTED: libcpp-has-no-random-device +// UNSUPPORTED: no-random-device // diff --git a/libcxx/test/std/numerics/rand/rand.device/entropy.pass.cpp b/libcxx/test/std/numerics/rand/rand.device/entropy.pass.cpp index 3f9ea85bfc0516..39fbfe838077c7 100644 --- a/libcxx/test/std/numerics/rand/rand.device/entropy.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.device/entropy.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-random-device +// UNSUPPORTED: no-random-device // diff --git a/libcxx/test/std/numerics/rand/rand.device/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.device/eval.pass.cpp index 3705ac14532f78..3339a54c7e5343 100644 --- a/libcxx/test/std/numerics/rand/rand.device/eval.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.device/eval.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-random-device +// UNSUPPORTED: no-random-device // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp index b55c7b2da079d0..c3c0a08405791f 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp index 2a02546138ea1f..9213d2bac5a615 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp @@ -16,7 +16,7 @@ // Test the fix for https://llvm.org/PR44847. // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp index accc6c3aea73cb..35f579903a4605 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp index c8b47227d6c5af..394f6aab60d241 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp index a8ca74c23ca26b..63c4b0d1f68463 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp index 2873aefae57032..0793473b60dba3 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp index 8a2c3805d7293e..0294ff52ea3574 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/io.pass.cpp index a6aca373767a9f..a1adc064b6f08a 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp index 92abb3a49a5f70..37641549e899a4 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp index fc136a3be38c00..dcbeaa39a30733 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/io.pass.cpp index 1e5e9f693eaab0..f6a7c1117fb735 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp index 0cf3a5bf95ddb7..a7525b6f104fee 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp index 19ac0ea02f3a91..df925f8c009e62 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp index c5cb803a77e653..bb9d2d8d5f0931 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp index e5e634af2dbb42..d78f19d6b8b710 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp index 06b3325b9f95bb..89f7fbac53d396 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp index 3b21092069e674..7485dae0cdbb62 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp index 7c761c045cafee..72a66c7be77636 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp index f01b38b93c51fc..e3983b9c2d0c6e 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/io.pass.cpp index a52b253a6b260f..ad01da4706d9a2 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/io.pass.cpp index 7935e6c2c8d0e9..2ee9683306c437 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_result_type.pass.cpp index e78bce24c22e91..807e698ef1be81 100644 --- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_result_type.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_result_type.pass.cpp @@ -16,7 +16,7 @@ // explicit linear_congruential_engine(result_type s); // C++20 // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/io.pass.cpp index f9e9b2d19b2ac6..7f7b75e09c1104 100644 --- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_result_type.pass.cpp index 22c5fa61b12962..a8140566f07af9 100644 --- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_result_type.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_result_type.pass.cpp @@ -18,7 +18,7 @@ // explicit mersenne_twister_engine(result_type s); // C++20 // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq.pass.cpp index a820468f1f93af..ff028929747a68 100644 --- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq.pass.cpp @@ -16,7 +16,7 @@ // template explicit mersenne_twister_engine(Sseq& q); // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/io.pass.cpp index cdb71638ff6876..c83e53f20305fd 100644 --- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_result_type.pass.cpp index f6d1cc03065080..291dbf70ff1e1c 100644 --- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_result_type.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_result_type.pass.cpp @@ -16,7 +16,7 @@ // explicit subtract_with_carry_engine(result_type s); // C++20 // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_sseq.pass.cpp index 0a8d7e661b4d31..16db08361dd8db 100644 --- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_sseq.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_sseq.pass.cpp @@ -14,7 +14,7 @@ // template explicit subtract_with_carry_engine(Sseq& q); // Serializing/deserializing the state of the RNG requires iostreams -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/io.pass.cpp index 472ed2dc257161..b5a8e7750127e2 100644 --- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/re/lit.local.cfg b/libcxx/test/std/re/lit.local.cfg index a2664746cc18ca..9626384f580ed9 100644 --- a/libcxx/test/std/re/lit.local.cfg +++ b/libcxx/test/std/re/lit.local.cfg @@ -1,3 +1,3 @@ # Unfortunately, uses locales in regex_traits -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/re/re.syn/wcmatch.pass.cpp b/libcxx/test/std/re/re.syn/wcmatch.pass.cpp index 3f35901cb4a392..5b23042c520c97 100644 --- a/libcxx/test/std/re/re.syn/wcmatch.pass.cpp +++ b/libcxx/test/std/re/re.syn/wcmatch.pass.cpp @@ -10,7 +10,7 @@ // typedef match_results wcmatch; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/re/re.syn/wcregex_iterator.pass.cpp b/libcxx/test/std/re/re.syn/wcregex_iterator.pass.cpp index 78d354c21421c3..c0f2e8aca2411e 100644 --- a/libcxx/test/std/re/re.syn/wcregex_iterator.pass.cpp +++ b/libcxx/test/std/re/re.syn/wcregex_iterator.pass.cpp @@ -10,7 +10,7 @@ // typedef regex_iterator wcregex_iterator; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/re/re.syn/wcregex_token_iterator.pass.cpp b/libcxx/test/std/re/re.syn/wcregex_token_iterator.pass.cpp index 2f88b895455cb6..5fd0ecb485a527 100644 --- a/libcxx/test/std/re/re.syn/wcregex_token_iterator.pass.cpp +++ b/libcxx/test/std/re/re.syn/wcregex_token_iterator.pass.cpp @@ -10,7 +10,7 @@ // typedef regex_token_iterator wcregex_token_iterator; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/re/re.syn/wcsub_match.pass.cpp b/libcxx/test/std/re/re.syn/wcsub_match.pass.cpp index 0b71ee71a4b332..6f86199978d469 100644 --- a/libcxx/test/std/re/re.syn/wcsub_match.pass.cpp +++ b/libcxx/test/std/re/re.syn/wcsub_match.pass.cpp @@ -10,7 +10,7 @@ // typedef sub_match wcsub_match; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/re/re.syn/wregex.pass.cpp b/libcxx/test/std/re/re.syn/wregex.pass.cpp index 15df351c42e186..1bb2af9d816f1d 100644 --- a/libcxx/test/std/re/re.syn/wregex.pass.cpp +++ b/libcxx/test/std/re/re.syn/wregex.pass.cpp @@ -10,7 +10,7 @@ // typedef basic_regex wregex; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/re/re.syn/wsmatch.pass.cpp b/libcxx/test/std/re/re.syn/wsmatch.pass.cpp index 5518a5c2a26ad7..2c26ddad8264fe 100644 --- a/libcxx/test/std/re/re.syn/wsmatch.pass.cpp +++ b/libcxx/test/std/re/re.syn/wsmatch.pass.cpp @@ -10,7 +10,7 @@ // typedef match_results wsmatch; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/re/re.syn/wsregex_iterator.pass.cpp b/libcxx/test/std/re/re.syn/wsregex_iterator.pass.cpp index 461b1471d9fd55..5e0fa272ce9bf9 100644 --- a/libcxx/test/std/re/re.syn/wsregex_iterator.pass.cpp +++ b/libcxx/test/std/re/re.syn/wsregex_iterator.pass.cpp @@ -10,7 +10,7 @@ // typedef regex_iterator wsregex_iterator; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/re/re.syn/wsregex_token_iterator.pass.cpp b/libcxx/test/std/re/re.syn/wsregex_token_iterator.pass.cpp index 520671c723fe74..9a33568cb33d3c 100644 --- a/libcxx/test/std/re/re.syn/wsregex_token_iterator.pass.cpp +++ b/libcxx/test/std/re/re.syn/wsregex_token_iterator.pass.cpp @@ -10,7 +10,7 @@ // typedef regex_token_iterator wsregex_token_iterator; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/re/re.syn/wssub_match.pass.cpp b/libcxx/test/std/re/re.syn/wssub_match.pass.cpp index cf53f2888d140b..f963c70bbfabc8 100644 --- a/libcxx/test/std/re/re.syn/wssub_match.pass.cpp +++ b/libcxx/test/std/re/re.syn/wssub_match.pass.cpp @@ -10,7 +10,7 @@ // typedef sub_match wssub_match; -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.explicit_instantiation.sh.cpp b/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.explicit_instantiation.sh.cpp index a47f0888e40983..bf27795154bc93 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.explicit_instantiation.sh.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.explicit_instantiation.sh.cpp @@ -15,7 +15,7 @@ // RUN: %{cxx} %{flags} %{compile_flags} %s %{link_flags} -DTU2 -c -o %t.tu2.o // RUN: %{cxx} %{flags} %t.tu1.o %t.tu2.o %{link_flags} -o %t.exe -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization #include #include diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/lit.local.cfg b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/lit.local.cfg index 8f204af4a58cf6..77804094932251 100644 --- a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/lit.local.cfg +++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/lit.local.cfg @@ -1,3 +1,3 @@ # These std::string functions require iostreams, which requires localization -if 'libcpp-has-no-localization' in config.available_features: +if 'no-localization' in config.available_features: config.unsupported = True diff --git a/libcxx/test/std/strings/c.strings/cwchar.pass.cpp b/libcxx/test/std/strings/c.strings/cwchar.pass.cpp index 1ac1f28800910d..4e9909e97a95dc 100644 --- a/libcxx/test/std/strings/c.strings/cwchar.pass.cpp +++ b/libcxx/test/std/strings/c.strings/cwchar.pass.cpp @@ -8,7 +8,7 @@ // -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/c.strings/cwctype.pass.cpp b/libcxx/test/std/strings/c.strings/cwctype.pass.cpp index 294b8de8bb55f7..171ebf8866b696 100644 --- a/libcxx/test/std/strings/c.strings/cwctype.pass.cpp +++ b/libcxx/test/std/strings/c.strings/cwctype.pass.cpp @@ -8,7 +8,7 @@ // -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign2.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign2.pass.cpp index 8ef8cfe6290a1f..3348efbd451232 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign2.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign2.pass.cpp @@ -13,7 +13,7 @@ // static void assign(char_type& c1, const char_type& c2); // constexpr in C++17 -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign3.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign3.pass.cpp index 8c58696725a80e..bc60599258e602 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign3.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign3.pass.cpp @@ -12,7 +12,7 @@ // static char_type* assign(char_type* s, size_t n, char_type a); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/compare.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/compare.pass.cpp index 493e3d0cf4a41a..fb9617d6335adf 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/compare.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/compare.pass.cpp @@ -13,7 +13,7 @@ // static int compare(const char_type* s1, const char_type* s2, size_t n); // constexpr in C++17 -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/copy.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/copy.pass.cpp index 02fdb8198a1e48..b1bfba9db94698 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/copy.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/copy.pass.cpp @@ -12,7 +12,7 @@ // static char_type* copy(char_type* s1, const char_type* s2, size_t n); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eof.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eof.pass.cpp index a1f9be8f51f8f0..5cfb1f4efa6527 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eof.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eof.pass.cpp @@ -12,7 +12,7 @@ // static constexpr int_type eof(); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq.pass.cpp index 51b31d9eff6f04..9ca8ea1d2b1e23 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq.pass.cpp @@ -12,7 +12,7 @@ // static constexpr bool eq(char_type c1, char_type c2); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq_int_type.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq_int_type.pass.cpp index f5a5cc047d1598..d3536d714ed620 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq_int_type.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq_int_type.pass.cpp @@ -12,7 +12,7 @@ // static constexpr bool eq_int_type(int_type c1, int_type c2); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/find.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/find.pass.cpp index 11e0f8e0d1d3dc..2293723354225b 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/find.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/find.pass.cpp @@ -13,7 +13,7 @@ // static const char_type* find(const char_type* s, size_t n, const char_type& a); // constexpr in C++17 -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/length.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/length.pass.cpp index 12ebe97c3af3ea..d9d62a3b124877 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/length.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/length.pass.cpp @@ -13,7 +13,7 @@ // static size_t length(const char_type* s); // constexpr in C++17 -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp index 2bd23e99db1bed..4ec86aa1434ee3 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp @@ -12,7 +12,7 @@ // static constexpr bool lt(char_type c1, char_type c2); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/move.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/move.pass.cpp index d09663346462d1..d91048c52914cc 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/move.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/move.pass.cpp @@ -12,7 +12,7 @@ // static char_type* move(char_type* s1, const char_type* s2, size_t n); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/not_eof.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/not_eof.pass.cpp index c20ba810d1bf2e..bebf917df20a04 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/not_eof.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/not_eof.pass.cpp @@ -12,7 +12,7 @@ // static constexpr int_type not_eof(int_type c); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_char_type.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_char_type.pass.cpp index cea9e46616a25c..4ded256b458243 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_char_type.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_char_type.pass.cpp @@ -12,7 +12,7 @@ // static constexpr char_type to_char_type(int_type c); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_int_type.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_int_type.pass.cpp index 02cc92e4e15b34..b1259876b6731b 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_int_type.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_int_type.pass.cpp @@ -12,7 +12,7 @@ // static constexpr int_type to_int_type(char_type c); -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/types.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/types.pass.cpp index 165b766b69909e..5e0e3a1ae5b007 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/types.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/types.pass.cpp @@ -16,7 +16,7 @@ // typedef streampos pos_type; // typedef mbstate_t state_type; -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters #include #include diff --git a/libcxx/test/std/strings/string.conversions/to_wstring.pass.cpp b/libcxx/test/std/strings/string.conversions/to_wstring.pass.cpp index 7909cc860859ac..530982defd4cd1 100644 --- a/libcxx/test/std/strings/string.conversions/to_wstring.pass.cpp +++ b/libcxx/test/std/strings/string.conversions/to_wstring.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/std/strings/string.view/string.view.io/stream_insert.pass.cpp b/libcxx/test/std/strings/string.view/string.view.io/stream_insert.pass.cpp index fb7dffeef368ab..a240851b22d24c 100644 --- a/libcxx/test/std/strings/string.view/string.view.io/stream_insert.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.io/stream_insert.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/strings/string.view/string.view.nonmem/quoted.pass.cpp b/libcxx/test/std/strings/string.view/string.view.nonmem/quoted.pass.cpp index 3e7f85ec04a5f3..18f9e10defcb6a 100644 --- a/libcxx/test/std/strings/string.view/string.view.nonmem/quoted.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.nonmem/quoted.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/thread/futures/futures.async/async.pass.cpp b/libcxx/test/std/thread/futures/futures.async/async.pass.cpp index 2fdafec138f58e..4e328db6cce900 100644 --- a/libcxx/test/std/thread/futures/futures.async/async.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.async/async.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.async/async.verify.cpp b/libcxx/test/std/thread/futures/futures.async/async.verify.cpp index 176459eb571c3b..997d27a8ceacc7 100644 --- a/libcxx/test/std/thread/futures/futures.async/async.verify.cpp +++ b/libcxx/test/std/thread/futures/futures.async/async.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14, c++17 // diff --git a/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp b/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp index 4fe7cd05433984..d0cee304f8b829 100644 --- a/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // There's currently no release of OS X whose dylib contains the patch for diff --git a/libcxx/test/std/thread/futures/futures.async/async_race.pass.cpp b/libcxx/test/std/thread/futures/futures.async/async_race.pass.cpp index 9d69550c8a6c77..91a70c2f8b2957 100644 --- a/libcxx/test/std/thread/futures/futures.async/async_race.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.async/async_race.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.errors/default_error_condition.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/default_error_condition.pass.cpp index b712254fed127c..ea250277cac069 100644 --- a/libcxx/test/std/thread/futures/futures.errors/default_error_condition.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.errors/default_error_condition.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.errors/equivalent_error_code_int.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/equivalent_error_code_int.pass.cpp index 379191dac068ee..c2fa17c359d4be 100644 --- a/libcxx/test/std/thread/futures/futures.errors/equivalent_error_code_int.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.errors/equivalent_error_code_int.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.errors/equivalent_int_error_condition.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/equivalent_int_error_condition.pass.cpp index 9c0dade18e8967..1d8d14b2329b67 100644 --- a/libcxx/test/std/thread/futures/futures.errors/equivalent_int_error_condition.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.errors/equivalent_int_error_condition.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.errors/future_category.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/future_category.pass.cpp index 3e5aed2e670b40..4c8f9da6c2bfc3 100644 --- a/libcxx/test/std/thread/futures/futures.errors/future_category.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.errors/future_category.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.errors/make_error_code.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/make_error_code.pass.cpp index c28e157704701d..ff87e5ad1be31b 100644 --- a/libcxx/test/std/thread/futures/futures.errors/make_error_code.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.errors/make_error_code.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.errors/make_error_condition.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/make_error_condition.pass.cpp index 31997dcd564eaf..549ea2152c8072 100644 --- a/libcxx/test/std/thread/futures/futures.errors/make_error_condition.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.errors/make_error_condition.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.future_error/code.pass.cpp b/libcxx/test/std/thread/futures/futures.future_error/code.pass.cpp index 53acba393fb8a6..99d2ade796e458 100644 --- a/libcxx/test/std/thread/futures/futures.future_error/code.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.future_error/code.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.future_error/types.pass.cpp b/libcxx/test/std/thread/futures/futures.future_error/types.pass.cpp index d8ea4ce2de9d58..8e77c01e73cea4 100644 --- a/libcxx/test/std/thread/futures/futures.future_error/types.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.future_error/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.future_error/what.pass.cpp b/libcxx/test/std/thread/futures/futures.future_error/what.pass.cpp index 0b093fc235bcbb..f62212ec26234c 100644 --- a/libcxx/test/std/thread/futures/futures.future_error/what.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.future_error/what.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // LWG 2056 changed the values of future_errc, so if we're using new headers // with an old library we'll get incorrect messages. diff --git a/libcxx/test/std/thread/futures/futures.overview/future_errc.pass.cpp b/libcxx/test/std/thread/futures/futures.overview/future_errc.pass.cpp index bf4442d537fe11..49e2bcbbc8511f 100644 --- a/libcxx/test/std/thread/futures/futures.overview/future_errc.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.overview/future_errc.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // Libc++'s enum class emulation does not allow static_cast(0) to work. diff --git a/libcxx/test/std/thread/futures/futures.overview/future_status.pass.cpp b/libcxx/test/std/thread/futures/futures.overview/future_status.pass.cpp index 3b6304d4745071..d73625374f3200 100644 --- a/libcxx/test/std/thread/futures/futures.overview/future_status.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.overview/future_status.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.overview/is_error_code_enum_future_errc.pass.cpp b/libcxx/test/std/thread/futures/futures.overview/is_error_code_enum_future_errc.pass.cpp index c7e2c2aebaa352..f6f93f0e13f223 100644 --- a/libcxx/test/std/thread/futures/futures.overview/is_error_code_enum_future_errc.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.overview/is_error_code_enum_future_errc.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.overview/launch.pass.cpp b/libcxx/test/std/thread/futures/futures.overview/launch.pass.cpp index 590ddd05b9e7ea..37ad2ff4a8645f 100644 --- a/libcxx/test/std/thread/futures/futures.overview/launch.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.overview/launch.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp index cbb2c2e7e25fe7..2e8b2aa53df949 100644 --- a/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/copy_assign.verify.cpp b/libcxx/test/std/thread/futures/futures.promise/copy_assign.verify.cpp index 4724f58d12b3f8..96d7e572f2a06a 100644 --- a/libcxx/test/std/thread/futures/futures.promise/copy_assign.verify.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/copy_assign.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.promise/copy_ctor.verify.cpp b/libcxx/test/std/thread/futures/futures.promise/copy_ctor.verify.cpp index fa0f54282cdb3a..b4f3d7f452f626 100644 --- a/libcxx/test/std/thread/futures/futures.promise/copy_ctor.verify.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/copy_ctor.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.promise/default.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/default.pass.cpp index 9994057e62127c..80a5f0a61df3ee 100644 --- a/libcxx/test/std/thread/futures/futures.promise/default.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/dtor.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/dtor.pass.cpp index d7ada1bfed4c05..2ee79eaa2b6e0a 100644 --- a/libcxx/test/std/thread/futures/futures.promise/dtor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/dtor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/get_future.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/get_future.pass.cpp index 2e9bce246d5eb1..e90e395a96b849 100644 --- a/libcxx/test/std/thread/futures/futures.promise/get_future.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/get_future.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/move_assign.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/move_assign.pass.cpp index ac504efbd60278..350db2acf2f9a8 100644 --- a/libcxx/test/std/thread/futures/futures.promise/move_assign.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/move_assign.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/move_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/move_ctor.pass.cpp index 9775e6655835cd..b473c1b346fdf1 100644 --- a/libcxx/test/std/thread/futures/futures.promise/move_ctor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/move_ctor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_exception.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_exception.pass.cpp index 004e179346a2d7..bc6bc26dbbb01e 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_exception.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_exception.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-exceptions -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp index ab666641d139a5..a1a351ec02b041 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-exceptions -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_lvalue.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_lvalue.pass.cpp index 790ed50023e462..f61eee8f779c95 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_lvalue.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_lvalue.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp index d3e1da2197a9ae..314293cbdf28be 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_rvalue.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_rvalue.pass.cpp index a2d2555361d4b8..f5a6ab937d6111 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_rvalue.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_rvalue.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-threads, no-exceptions +// UNSUPPORTED: no-threads, no-exceptions // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_rvalue_at_thread_exit.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_rvalue_at_thread_exit.pass.cpp index 5a982eabab8de9..b51f549f26d333 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_rvalue_at_thread_exit.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_rvalue_at_thread_exit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++03 +// UNSUPPORTED: no-threads, c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_const.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_const.pass.cpp index da9b7a8d8d0dd4..7bf34b390b097b 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_const.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_const.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_void.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_void.pass.cpp index 9786973c0c6c45..05dca4dbd053c0 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_void.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_void.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_value_const.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_value_const.pass.cpp index 222c83b6933005..c4c89550272a5a 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_value_const.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_value_const.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/set_value_void.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_value_void.pass.cpp index 0506d869cd636e..9f12e498b84ec5 100644 --- a/libcxx/test/std/thread/futures/futures.promise/set_value_void.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/set_value_void.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-exceptions -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp index fbf77b121d474b..efb32a58f58f3f 100644 --- a/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.promise/uses_allocator.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/uses_allocator.pass.cpp index ad75ff033b6761..7c859a999aced3 100644 --- a/libcxx/test/std/thread/futures/futures.promise/uses_allocator.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/uses_allocator.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/copy_assign.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/copy_assign.pass.cpp index 0ee06e11008319..f7f8d272c28c7f 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/copy_assign.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/copy_assign.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/copy_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/copy_ctor.pass.cpp index 393657f8b1eab0..35f19e44ecba10 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/copy_ctor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/copy_ctor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/ctor_future.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/ctor_future.pass.cpp index c3c52acceaac86..f120fa03b9e66d 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/ctor_future.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/ctor_future.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/default.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/default.pass.cpp index 90f164203724a8..a808eb94c298a9 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/default.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp index 027073fde8cc32..4a65daf640dada 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-exceptions -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/get.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/get.pass.cpp index 61835df782cd8e..62da170a90c422 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/get.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/get.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/move_assign.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/move_assign.pass.cpp index cc94b294062f02..cc3f3f85a84c2d 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/move_assign.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/move_assign.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/move_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/move_ctor.pass.cpp index 07a5099efdc19a..ecc0e81d7477df 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/move_ctor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/move_ctor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp index 26a5f1f49f82df..12c71ab05e26ed 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/wait_for.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/wait_for.pass.cpp index 44b88f38d0f0f1..80674387a38d18 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/wait_for.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/wait_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.shared_future/wait_until.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/wait_until.pass.cpp index fc356dc0d7724d..904735f5176ae6 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/wait_until.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/wait_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_copy.fail.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_copy.fail.cpp index 9c1e68b51204f7..09db1a80c10107 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_copy.fail.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_copy.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_move.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_move.pass.cpp index dbab27ba3f08e1..64db057d804918 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_move.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_move.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor1.fail.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor1.fail.cpp index 89c7c2a4cafec8..3c61c76ea9802a 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor1.fail.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor1.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor2.compile.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor2.compile.pass.cpp index 2bc5c62758add8..3ab59909cfafbe 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor2.compile.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor2.compile.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_copy.fail.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_copy.fail.cpp index 2129b0233c57f6..5250ae739b97d7 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_copy.fail.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_copy.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_default.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_default.pass.cpp index 3217847a7d9f3d..491e8d97b74171 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_default.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func.pass.cpp index e35f428ccf3243..b3581a751e39d6 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp index 788afbdffdfe3b..1b2177ed8f4444 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // packaged_task allocator support was removed in C++17 (LWG 2921) // REQUIRES: c++11 || c++14 diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_move.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_move.pass.cpp index e3ffa66022805c..eb80e749199e1f 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_move.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_move.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/dtor.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/dtor.pass.cpp index e6fa378210da41..606dccc345dbf3 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/dtor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/dtor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/get_future.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/get_future.pass.cpp index c918bad4676870..7c5931d18bc30c 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/get_future.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/get_future.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/make_ready_at_thread_exit.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/make_ready_at_thread_exit.pass.cpp index 3863b995189873..71c63b54358d78 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/make_ready_at_thread_exit.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/make_ready_at_thread_exit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/operator.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/operator.pass.cpp index f3a27ccf772cff..e6c833ef23a83f 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/operator.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/operator.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/reset.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/reset.pass.cpp index e2e11faea66a55..0d3b76b82a9981 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/reset.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/reset.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/swap.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/swap.pass.cpp index c56510366e6aca..f0e8bfe92baa50 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/swap.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/swap.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/swap.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/swap.pass.cpp index 5cf4064b00521e..3ac2712d0412cd 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/swap.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/swap.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/uses_allocator.compile.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/uses_allocator.compile.pass.cpp index ce55773d4e8d50..44d9facc94119b 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/uses_allocator.compile.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/uses_allocator.compile.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // packaged_task allocator support was removed in C++17 (LWG 2976) // REQUIRES: c++11 || c++14 diff --git a/libcxx/test/std/thread/futures/futures.unique_future/copy_assign.verify.cpp b/libcxx/test/std/thread/futures/futures.unique_future/copy_assign.verify.cpp index c3c2923ea1e061..284ccd86390a37 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/copy_assign.verify.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/copy_assign.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.unique_future/copy_ctor.verify.cpp b/libcxx/test/std/thread/futures/futures.unique_future/copy_ctor.verify.cpp index 56ba432e3c92c8..442489025b61ff 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/copy_ctor.verify.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/copy_ctor.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.unique_future/default.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/default.pass.cpp index d3dc86e3c40954..8dc1384ac3e189 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/default.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp index 643fb6a4259a6e..58c95d5486e3e9 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-exceptions -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.unique_future/get.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/get.pass.cpp index 3262a2b7ce57c6..9c5b3340e997f4 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/get.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/get.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.unique_future/move_assign.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/move_assign.pass.cpp index 551162459ebca1..0378e8719d4ee2 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/move_assign.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/move_assign.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++03 +// UNSUPPORTED: no-threads, c++03 // diff --git a/libcxx/test/std/thread/futures/futures.unique_future/move_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/move_ctor.pass.cpp index 3c6d364ae08a28..168ba1953e1846 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/move_ctor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/move_ctor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++03 +// UNSUPPORTED: no-threads, c++03 // diff --git a/libcxx/test/std/thread/futures/futures.unique_future/share.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/share.pass.cpp index 6a96417d6bb57c..4cc8e26030d0a2 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/share.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/share.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp index 2b9b0f66e1178c..2385156c3154b0 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp index 3f41bc1a81cb5a..5d5107a17159c1 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // ALLOW_RETRIES: 3 diff --git a/libcxx/test/std/thread/futures/futures.unique_future/wait_until.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/wait_until.pass.cpp index d4e27d9d9761fe..00c1b8dfedd5ff 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/wait_until.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/wait_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/macro.pass.cpp b/libcxx/test/std/thread/macro.pass.cpp index 5d24d5f36a3799..c435aa206f2234 100644 --- a/libcxx/test/std/thread/macro.pass.cpp +++ b/libcxx/test/std/thread/macro.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp index 2f4149ea2ef263..c982c274ed507b 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp index f3c7ffb78b731c..da75565d9ff738 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp index 1a0fea1ff08a08..14503b432fe94f 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp index 6c436434a5cab2..d9760803d2e851 100644 --- a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.barrier/max.pass.cpp b/libcxx/test/std/thread/thread.barrier/max.pass.cpp index c48d03f5b9338b..0060e31ccd3476 100644 --- a/libcxx/test/std/thread/thread.barrier/max.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/max.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.condition/cv_status.pass.cpp b/libcxx/test/std/thread/thread.condition/cv_status.pass.cpp index 1de9bc7e22175c..b13c926ee703e8 100644 --- a/libcxx/test/std/thread/thread.condition/cv_status.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/cv_status.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/notify_all_at_thread_exit.pass.cpp b/libcxx/test/std/thread/thread.condition/notify_all_at_thread_exit.pass.cpp index 48a077455b368f..19da74ca657356 100644 --- a/libcxx/test/std/thread/thread.condition/notify_all_at_thread_exit.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/notify_all_at_thread_exit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // notify_all_at_thread_exit(...) requires move semantics to transfer the // unique_lock. diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/default.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/default.pass.cpp index 5291e119eef3dd..9baa73e05df55c 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/default.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/destructor.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/destructor.pass.cpp index 6f2d6b97fb700f..530cc1264f3b69 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/destructor.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/destructor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_all.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_all.pass.cpp index b095cd06e4e4f0..e0d587dbca0e9a 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_all.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_all.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp index 821fbdda5b62cc..b0c476fa19dec8 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait.pass.cpp index a2dc4466f0d693..1a8222b11e9cff 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for.pass.cpp index e0b9736c65b37e..42150207c3c4d8 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for_pred.pass.cpp index bdea74eb04ba8a..872bcb6d8a57db 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for_pred.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for_pred.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_pred.pass.cpp index d640e5724cf225..15feba55616b00 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_pred.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_pred.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until.pass.cpp index 5b5143fa0113a2..03205e68dca673 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until_pred.pass.cpp index 6ab61c0ec90572..fb8bd6e3806939 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until_pred.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until_pred.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/default.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/default.pass.cpp index 29e8a8ed65371d..0cf70ad3c517b2 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/default.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/destructor.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/destructor.pass.cpp index 7ac99550b11c12..6f5111a03b78a5 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/destructor.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/destructor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_all.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_all.pass.cpp index 05825ea26c946f..17124908deb3c7 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_all.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_all.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_one.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_one.pass.cpp index 6223698178e267..4fc436b728bb9a 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_one.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_one.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait.pass.cpp index a29fd9d7bd7c44..ffce91be0e59fb 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for.pass.cpp index bbd93f22f59641..95acef90470ec3 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for_pred.pass.cpp index 2fe1d94ab8920e..0b560022bc67c9 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for_pred.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for_pred.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_pred.pass.cpp index 2b88acb7b048ec..a5e28137bef8c4 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_pred.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_pred.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_terminates.sh.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_terminates.sh.cpp index c507917b531730..fd61634bd96e5d 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_terminates.sh.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_terminates.sh.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: no-exceptions -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until.pass.cpp index 435bc967d87b14..0f2334393d8331 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until_pred.pass.cpp index 8a0b827a01b980..aa60ae4715df6d 100644 --- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until_pred.pass.cpp +++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until_pred.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp index ac7d76f7eb8a2b..37b0c6e70c04d7 100644 --- a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp index 27c59bfe1759b0..fe9578bd79cb93 100644 --- a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.latch/max.pass.cpp b/libcxx/test/std/thread/thread.latch/max.pass.cpp index 5506cd0e70a6df..8b9176c8cac570 100644 --- a/libcxx/test/std/thread/thread.latch/max.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/max.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp index c960f822ac181d..6533b6cc57b8e9 100644 --- a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock.algorithm/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock.algorithm/lock.pass.cpp index 8dd837b0a06c1e..c6866fa58fdb70 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock.algorithm/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock.algorithm/lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // This test hangs forever when built against libstdc++ (Oct 2016). // UNSUPPORTED: stdlib=libstdc++ diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock.algorithm/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock.algorithm/try_lock.pass.cpp index f41f70d8edf986..ac508b5f9161f7 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock.algorithm/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock.algorithm/try_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/adopt_lock.pass.cpp index db6a2e35f9c598..cb105f20db597d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/adopt_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/adopt_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/mutex.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/mutex.fail.cpp index 383c1539a60bc7..82f672891c4522 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/mutex.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/mutex.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/mutex.pass.cpp index ddcc0805a85594..6025b0c3b465bd 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/mutex.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/types.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/types.pass.cpp index 03d7351b4a5b5e..8b10d9dab8f2a4 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/types.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/adopt_lock.pass.cpp index 491d2e9b110bb8..83b29ff669dcad 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/adopt_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/adopt_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/assign.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/assign.fail.cpp index 7269b195aa5593..9d5fd6049e0c78 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/assign.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/assign.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/copy.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/copy.fail.cpp index 4caab8f52c151d..37e60b885e083c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/copy.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/copy.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/mutex.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/mutex.fail.cpp index 663dfd0b989013..44f1ee37e6afa7 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/mutex.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/mutex.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/mutex.pass.cpp index 816eb7c0ea34db..48a96f90254e3e 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/mutex.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/types.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/types.pass.cpp index 8caa621aa417e2..7c938745e7dfc3 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/types.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.scoped/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp index 3f3868e1289f40..e901591275e386 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp index 07b1b32108910a..1db771d218ed40 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp index 612fc34e8acdea..9bbbb9301abf10 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp index 2e9f64379834fa..09bdb835b45455 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp index b4ee76cf2660c3..da4db9e58d5cb9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp index 4e868298337f26..5c698e031f7b23 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp index 66e810b02cc5c6..4c4f04afd6f065 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp index 074c906b2d0f30..7ab3c2ca00efb9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp index 256d4facd82fd0..d3555747b51aa6 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp index c9189987cb1135..e633251d7288a0 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp index 57148253a4341d..ae387cdc8446df 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp index bb62472a64a5b6..7f8189c2985770 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp index 1a931636588919..fb4afdd4d91014 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/unlock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/unlock.pass.cpp index 838baab8ddf39f..210824e1a7ee96 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/unlock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/unlock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/member_swap.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/member_swap.pass.cpp index 09590ae373cf1a..aeca2022ba9c9d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/member_swap.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/member_swap.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/nonmember_swap.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/nonmember_swap.pass.cpp index 97dfcbde15e3a5..25e5cf624caa80 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/nonmember_swap.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/nonmember_swap.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/release.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/release.pass.cpp index 71e9541b3ebd0a..9dfdd0b0ded2fa 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/release.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/release.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp index 7c2fdcddf082a3..216a6a04bb5e5a 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp index 8f15135d81c1cb..1186671ad04d28 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp index 23eae754bcfef0..abe9198cd48adc 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/types.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/types.pass.cpp index 495360d622cedf..7c1f5f3fe40291 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/types.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/default.pass.cpp index 7dc374c2323881..2034a26b1d9132 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_assign.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_assign.pass.cpp index a98f128827a958..0af918c1e20eee 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_assign.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_assign.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++03 +// UNSUPPORTED: no-threads, c++03 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_ctor.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_ctor.pass.cpp index a912314542b600..cce0eb5fb90572 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_ctor.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_ctor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++03 +// UNSUPPORTED: no-threads, c++03 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp index e6ae4809c71de7..bba78cf24178a6 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_adopt_lock.pass.cpp index 9d723aa71b0b65..b2e80659abb359 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_adopt_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_adopt_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_defer_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_defer_lock.pass.cpp index a16dcbe4f33f52..fa00da655b0df8 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_defer_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_defer_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_duration.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_duration.pass.cpp index 156ea8ae690786..4bfabab919f177 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_duration.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_duration.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_time_point.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_time_point.pass.cpp index d806218439eaed..b85bbace3233c1 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_time_point.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_time_point.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp index 1ebbb6b7e7ea25..39d3bde9c12a83 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp index 2dce5a57113486..b1b5af08d4df9d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp index 88ac9f843565f7..83e1d7eaa62fd6 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp index a6166ceda869c1..1a0fba6146f5cb 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp index 6c7da1c0cdff2b..3f6f2137fe24af 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp index 1f0a0e529dc49e..4cd72b6bd8d2fb 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/member_swap.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/member_swap.pass.cpp index 8641bcbeab7334..fc12d3baea202c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/member_swap.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/member_swap.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/nonmember_swap.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/nonmember_swap.pass.cpp index ecb262009b5030..03d268c4b9306a 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/nonmember_swap.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/nonmember_swap.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/release.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/release.pass.cpp index eb26114bb1034f..4f2d59c3d333d4 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/release.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/release.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/mutex.pass.cpp index f9f3050b09604a..dd2f5239645f91 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/mutex.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/op_bool.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/op_bool.pass.cpp index 5304f79485f9f5..ea05eb7d023a9f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/op_bool.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/op_bool.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/owns_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/owns_lock.pass.cpp index c25e9a21768751..9b192fbcd621e8 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/owns_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/owns_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/types.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/types.pass.cpp index 85aa6f5d75efc1..d8497888f59bba 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/types.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/types.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/types.fail.cpp index e8d57b17119cdd..b4acac85b7e307 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/types.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/types.fail.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/types.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/types.pass.cpp index c03ca4622aabf7..2776e00aa31ade 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/types.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/default.pass.cpp index f754e7b7031d39..d8115cd1bf3a08 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp index 7ceea1f575f0e4..6f91cc3d3ab112 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp index 7bc74aa4676ca3..645b74bca920ef 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/default.pass.cpp index 2e27d157f2662c..43dc38d7cab517 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp index 1ce9a4cde192e7..51b64b9aaddbb4 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp index 060b910ae3afa8..801e2e738c5cb2 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.fail.cpp index 7fc5442c64fb8d..4f95c5cf6b5579 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // shared_mutex was introduced in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.fail.cpp index 27086a84919c9c..182ba376ddea58 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // shared_mutex was introduced in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp index 76c6cebbee49e8..0402a89cf67277 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // shared_mutex was introduced in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp index 4d97129cd38d20..08257e4f1428f6 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp index fec48900fab4c1..c83877356bfc46 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp index f1d18c80d6a8e9..92727eadbd9b35 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp index fc9eb36937ff41..98c48adc1a7c30 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp index cd27c79cc94223..1456606fb75d24 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp index def36ca5e35f49..c4836a574e9dc3 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp index aad2c43473be7c..1ec0814e207edd 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp index 172742161d81a5..20eda45677f4e2 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp index b61dba0630c06a..f4a610c4cf4a59 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp index 16e9291876333d..fdf5899ad58a15 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp index eda6453d58eeb8..2cfaa7985bf887 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp index 02ee39876a7a38..a3a37d586b6834 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp index 3118893efd21ce..19d03a8459eb55 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp index 5dd2b7cf2fa4c3..9589c8d4a50238 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp index b2e8e72e282fa8..d13a0ad435aea8 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp index a61fd99368f327..a2a91bc26a7dc7 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp index 30cfcaccf9dfb7..02d0874c080694 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp index fc757def0bbfff..acfa5560962ae3 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp index 85649041c948aa..23385c100807d5 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp index afde76c2c45629..73e2e7a3a5ab9e 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp index d58cee5d4a9b39..91c6f1c0643240 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp index 6e36056a736d74..5915698553f5ae 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp index 4a9fd3ec522e01..b0b27801c8c74d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp index 37e4410c0af98f..5c5807d6736c92 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/call_once.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/call_once.pass.cpp index 4748c6866aebae..7708efcb54c7f9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/call_once.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/call_once.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/race.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/race.pass.cpp index 0084f0c1a5cd28..1bc608afef3536 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/race.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/race.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/assign.compile.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/assign.compile.fail.cpp index c5da06c36c9fd6..73d86a0a756484 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/assign.compile.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/assign.compile.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/copy.compile.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/copy.compile.fail.cpp index 70b145aee01921..f84ce8fba0c1e9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/copy.compile.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/copy.compile.fail.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/default.pass.cpp index c6b48f373a00a4..e99f9410adf425 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.onceflag/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp index d7182078b9e1d1..32dd77cc8871c7 100644 --- a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp index 562681e1958b67..fe1d49ffd7c676 100644 --- a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp index a3569abc229c6e..9ffee4df04977e 100644 --- a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp index 8f48640a194596..5a3026bc351e9f 100644 --- a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // diff --git a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp index 88c17113eab730..31a9264aaac932 100644 --- a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp index 12054a26d7cc27..9cdee8e0bd057d 100644 --- a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp index 28559bf8dbf446..d6c898e4785cdc 100644 --- a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 // This test requires the dylib support introduced in D68480, which shipped in macOS 11.0. diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.algorithm/swap.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.algorithm/swap.pass.cpp index 332754ae2a5241..ecca7e60d08e0f 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.algorithm/swap.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.algorithm/swap.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move.pass.cpp index 371d8266dc2a7b..b5d0b9167102f5 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move2.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move2.pass.cpp index ccae438018c225..9c31a2c65b2d43 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move2.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move2.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp index 404eb70c04c477..fc3d5c12227464 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp @@ -12,7 +12,7 @@ // template thread(F&& f, Args&&... args); -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: sanitizer-new-delete #include diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/default.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/default.pass.cpp index 2e3af5d14fb5c7..9e0c50df7c6b3d 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/default.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/move.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/move.pass.cpp index 3611fc623d1412..b9f733c4850da5 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/move.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/move.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/robust_against_adl.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/robust_against_adl.pass.cpp index 400f95263156ec..9ba8d950b7b056 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/robust_against_adl.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/robust_against_adl.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.destr/dtor.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.destr/dtor.pass.cpp index 1af063c02e07ce..77665050d1848c 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.destr/dtor.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.destr/dtor.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/assign.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/assign.pass.cpp index fddc9538b2f8ca..4095ac0b8ce58c 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/assign.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/assign.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/copy.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/copy.pass.cpp index 7da109752b968a..141903c9c62bea 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/copy.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/copy.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/default.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/default.pass.cpp index 4d2bcfaee6e1a0..6a146200ce0e71 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/default.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp index bd34f4b3cabc92..52725c9f119c8f 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/eq.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/eq.pass.cpp index e58f1d7d0d4a83..8d083e29451b85 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/eq.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/eq.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/lt.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/lt.pass.cpp index 3ef7f362532ae5..e1438da1d19d75 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/lt.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/lt.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/stream.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/stream.pass.cpp index f04a0ebdf299f6..9050d9cce2921c 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/stream.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/stream.pass.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-threads +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/thread_id.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/thread_id.pass.cpp index 80bcbf97c1dccf..7ce5593c8af211 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/thread_id.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/thread_id.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/detach.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/detach.pass.cpp index 03dc79d2d83791..a6b3a34d85706a 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/detach.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/detach.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/get_id.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/get_id.pass.cpp index 314b1e072deb9c..2682750e1861e5 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/get_id.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/get_id.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/join.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/join.pass.cpp index 2286f74eda72fa..509d0197c0332b 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/join.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/join.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/joinable.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/joinable.pass.cpp index d3c1405c5a1531..32fed906946d04 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/joinable.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/joinable.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/swap.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/swap.pass.cpp index f390899aa5d339..ab5f891fcf619c 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/swap.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/swap.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp index 65e99e015af958..e00e16a8399387 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.this/get_id.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.this/get_id.pass.cpp index 4149f5a43a6ce9..5b0df20b93c79d 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.this/get_id.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.this/get_id.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp index 908635f30b7698..3b4ae203d2c3bb 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.this/yield.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.this/yield.pass.cpp index bcb1a2f0f871a9..8084dc328fccf6 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.this/yield.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.this/yield.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/time/time.clock/time.clock.file/now.pass.cpp b/libcxx/test/std/time/time.clock/time.clock.file/now.pass.cpp index 6011424f6e7eec..ad3daf770a018d 100644 --- a/libcxx/test/std/time/time.clock/time.clock.file/now.pass.cpp +++ b/libcxx/test/std/time/time.clock/time.clock.file/now.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // Filesystem is supported on Apple platforms starting with macosx10.15. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/time/time.clock/time.clock.file/to_from_sys.pass.cpp b/libcxx/test/std/time/time.clock/time.clock.file/to_from_sys.pass.cpp index 525894df8a9288..df84bee308b9ed 100644 --- a/libcxx/test/std/time/time.clock/time.clock.file/to_from_sys.pass.cpp +++ b/libcxx/test/std/time/time.clock/time.clock.file/to_from_sys.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-filesystem-library +// UNSUPPORTED: no-filesystem // Filesystem is supported on Apple platforms starting with macosx10.15. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/time/time.clock/time.clock.steady/consistency.pass.cpp b/libcxx/test/std/time/time.clock/time.clock.steady/consistency.pass.cpp index 5e7ad46c0cb815..fa0ff4f50db836 100644 --- a/libcxx/test/std/time/time.clock/time.clock.steady/consistency.pass.cpp +++ b/libcxx/test/std/time/time.clock/time.clock.steady/consistency.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-monotonic-clock +// UNSUPPORTED: no-monotonic-clock // Due to C++17 inline variables ASAN flags this test as containing an ODR // violation because Clock::is_steady is defined in both the dylib and this TU. diff --git a/libcxx/test/std/time/time.clock/time.clock.steady/now.pass.cpp b/libcxx/test/std/time/time.clock/time.clock.steady/now.pass.cpp index 14dc9a9832dc4b..1d439a39e3fb2d 100644 --- a/libcxx/test/std/time/time.clock/time.clock.steady/now.pass.cpp +++ b/libcxx/test/std/time/time.clock/time.clock.steady/now.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-monotonic-clock +// UNSUPPORTED: no-monotonic-clock // diff --git a/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp index 65c61ce5b1e8bf..c1a46994c047db 100644 --- a/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp +++ b/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp @@ -13,9 +13,9 @@ // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // steady_clock requires threads. -// UNSUPPORTED: libcpp-has-no-threads -// UNSUPPORTED: libcpp-has-no-random-device -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-threads +// UNSUPPORTED: no-random-device +// UNSUPPORTED: no-localization // XFAIL: LIBCXX-AIX-FIXME diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp index 80e911ad24c6cc..d36f189d7bad94 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp @@ -7,7 +7,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format -// UNSUPPORTED: libcpp-has-no-wide-characters +// UNSUPPORTED: no-wide-characters // Validate it works regardless of the signedness of `char`. // RUN: %{cxx} %{flags} %{compile_flags} -fsigned-char -fsyntax-only %s diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp index f3740e240adde7..62fa5fa8823439 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp @@ -7,7 +7,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format -// XFAIL: libcpp-has-no-wide-characters +// XFAIL: no-wide-characters // diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp index 65938ea6d9c7df..bb4fcdc1d90d01 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format // REQUIRES: locale.en_US.UTF-8 diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp index a8c353578a5922..a8752bfc3043b1 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format // REQUIRES: locale.en_US.UTF-8 diff --git a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp index 63e6478ce34831..92f1f4b122a540 100644 --- a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format // TODO FMT Evaluate gcc-11 status // UNSUPPORTED: gcc-11 diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp index 8856704555c8cc..e45331b1dd1863 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format // TODO FMT Evaluate gcc-11 status // UNSUPPORTED: gcc-11 diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp index b1777d71329ff5..6c2f2c530f1cbe 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format // TODO FMT Evaluate gcc-11 status // UNSUPPORTED: gcc-11 diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp index 6b59a83ed5bf0b..a487d558ebf44e 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format // TODO FMT Evaluate gcc-11 status // UNSUPPORTED: gcc-11 diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp index f37cd4e62970c0..504ebc3dbeabcb 100644 --- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format // The issue is caused in __format_spec::__determine_grouping(). diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp index 5649cd9c21e937..092773ea0c9445 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format // TODO FMT Evaluate gcc-11 status // UNSUPPORTED: gcc-11 diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp index 5f1921fa0a8027..fe41ad0001c583 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format // TODO FMT Evaluate gcc-11 status // UNSUPPORTED: gcc-11 diff --git a/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.fail.cpp b/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.fail.cpp index 583ca60e7681f9..12c9f2018083f3 100644 --- a/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.fail.cpp +++ b/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.fail.cpp @@ -9,7 +9,7 @@ // Because we don't have a functioning decltype in C++03 // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.pass.cpp b/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.pass.cpp index d09e029878b074..68c9a27e1e4a71 100644 --- a/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.pass.cpp +++ b/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // Because we don't have a functioning decltype in C++03 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_strong.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_strong.pass.cpp index f37b6e0d1919b0..37be6ceea3e094 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_strong.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_strong.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_strong_explicit.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_strong_explicit.pass.cpp index 32cce75a4874c2..3965863b86ccf5 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_strong_explicit.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_strong_explicit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_weak.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_weak.pass.cpp index e00c8471d1d54a..6dd04f924a105e 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_weak.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_weak.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_weak_explicit.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_weak_explicit.pass.cpp index 7f697fcbca842f..4837fa9793a5e8 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_weak_explicit.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_compare_exchange_weak_explicit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_exchange.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_exchange.pass.cpp index 700d56d3f19c20..f488e0ed1d14e1 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_exchange.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_exchange.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_exchange_explicit.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_exchange_explicit.pass.cpp index c38c47a2b315c7..1945f7bba6dc06 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_exchange_explicit.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_exchange_explicit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_is_lock_free.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_is_lock_free.pass.cpp index dc112305479657..37f7d12eda2c62 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_is_lock_free.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_is_lock_free.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_load.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_load.pass.cpp index efbe6f9f45a541..1b9a15ac92ac1f 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_load.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_load.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_load_explicit.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_load_explicit.pass.cpp index 565f66433db729..5c2970133328f3 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_load_explicit.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_load_explicit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_store.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_store.pass.cpp index 5b12be4d552976..5b7bd5fad69c47 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_store.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_store.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_store_explicit.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_store_explicit.pass.cpp index 8e05844a6d3237..5712190421308d 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_store_explicit.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared.atomic/atomic_store_explicit.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.io/io.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.io/io.pass.cpp index 6c3c9e47f1059b..34706aa3c8683e 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.io/io.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.io/io.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // diff --git a/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_in.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_in.pass.cpp index e2667e5379878d..e04b38f58932c8 100644 --- a/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_in.pass.cpp +++ b/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_in.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // test: diff --git a/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_out.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_out.pass.cpp index 79621e25cc5a1d..dfdb9fe9585163 100644 --- a/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_out.pass.cpp +++ b/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_out.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // test: diff --git a/libcxx/test/support/test.support/make_string_header.pass.cpp b/libcxx/test/support/test.support/make_string_header.pass.cpp index 041e71efb404a3..d0871669750712 100644 --- a/libcxx/test/support/test.support/make_string_header.pass.cpp +++ b/libcxx/test/support/test.support/make_string_header.pass.cpp @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-localization +// UNSUPPORTED: no-localization // "support/make_string.h" diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 1e31677fb75769..b8e82806c9f2e7 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -764,23 +764,23 @@ def add_version_header(tc): # should be marked as UNSUPPORTED, because including # is a hard error in that case. lit_markup = { - "barrier": ["UNSUPPORTED: libcpp-has-no-threads"], - "filesystem": ["UNSUPPORTED: libcpp-has-no-filesystem-library"], + "barrier": ["UNSUPPORTED: no-threads"], + "filesystem": ["UNSUPPORTED: no-filesystem"], "format": ["UNSUPPORTED: libcpp-has-no-incomplete-format"], - "iomanip": ["UNSUPPORTED: libcpp-has-no-localization"], - "ios": ["UNSUPPORTED: libcpp-has-no-localization"], - "iostream": ["UNSUPPORTED: libcpp-has-no-localization"], - "istream": ["UNSUPPORTED: libcpp-has-no-localization"], - "latch": ["UNSUPPORTED: libcpp-has-no-threads"], - "locale": ["UNSUPPORTED: libcpp-has-no-localization"], - "mutex": ["UNSUPPORTED: libcpp-has-no-threads"], - "ostream": ["UNSUPPORTED: libcpp-has-no-localization"], + "iomanip": ["UNSUPPORTED: no-localization"], + "ios": ["UNSUPPORTED: no-localization"], + "iostream": ["UNSUPPORTED: no-localization"], + "istream": ["UNSUPPORTED: no-localization"], + "latch": ["UNSUPPORTED: no-threads"], + "locale": ["UNSUPPORTED: no-localization"], + "mutex": ["UNSUPPORTED: no-threads"], + "ostream": ["UNSUPPORTED: no-localization"], "ranges": ["UNSUPPORTED: libcpp-has-no-incomplete-ranges"], - "regex": ["UNSUPPORTED: libcpp-has-no-localization"], - "semaphore": ["UNSUPPORTED: libcpp-has-no-threads"], - "shared_mutex": ["UNSUPPORTED: libcpp-has-no-threads"], - "stdatomic.h": ["UNSUPPORTED: libcpp-has-no-threads"], - "thread": ["UNSUPPORTED: libcpp-has-no-threads"], + "regex": ["UNSUPPORTED: no-localization"], + "semaphore": ["UNSUPPORTED: no-threads"], + "shared_mutex": ["UNSUPPORTED: no-threads"], + "stdatomic.h": ["UNSUPPORTED: no-threads"], + "thread": ["UNSUPPORTED: no-threads"], } def get_std_dialects(): diff --git a/libcxx/utils/generate_header_inclusion_tests.py b/libcxx/utils/generate_header_inclusion_tests.py index 01a9deeac8b75b..b6ff3e4b332f29 100755 --- a/libcxx/utils/generate_header_inclusion_tests.py +++ b/libcxx/utils/generate_header_inclusion_tests.py @@ -92,22 +92,22 @@ def get_libcxx_paths(): # should be marked as UNSUPPORTED, because including # is a hard error in that case. lit_markup = { - "barrier": ["UNSUPPORTED: libcpp-has-no-threads"], - "filesystem": ["UNSUPPORTED: libcpp-has-no-filesystem-library"], + "barrier": ["UNSUPPORTED: no-threads"], + "filesystem": ["UNSUPPORTED: no-filesystem"], "format": ["UNSUPPORTED: libcpp-has-no-incomplete-format"], - "iomanip": ["UNSUPPORTED: libcpp-has-no-localization"], - "ios": ["UNSUPPORTED: libcpp-has-no-localization"], - "iostream": ["UNSUPPORTED: libcpp-has-no-localization"], - "istream": ["UNSUPPORTED: libcpp-has-no-localization"], - "latch": ["UNSUPPORTED: libcpp-has-no-threads"], - "locale": ["UNSUPPORTED: libcpp-has-no-localization"], - "mutex": ["UNSUPPORTED: libcpp-has-no-threads"], - "ostream": ["UNSUPPORTED: libcpp-has-no-localization"], + "iomanip": ["UNSUPPORTED: no-localization"], + "ios": ["UNSUPPORTED: no-localization"], + "iostream": ["UNSUPPORTED: no-localization"], + "istream": ["UNSUPPORTED: no-localization"], + "latch": ["UNSUPPORTED: no-threads"], + "locale": ["UNSUPPORTED: no-localization"], + "mutex": ["UNSUPPORTED: no-threads"], + "ostream": ["UNSUPPORTED: no-localization"], "ranges": ["UNSUPPORTED: libcpp-has-no-incomplete-ranges"], - "regex": ["UNSUPPORTED: libcpp-has-no-localization"], - "semaphore": ["UNSUPPORTED: libcpp-has-no-threads"], - "shared_mutex": ["UNSUPPORTED: libcpp-has-no-threads"], - "thread": ["UNSUPPORTED: libcpp-has-no-threads"] + "regex": ["UNSUPPORTED: no-localization"], + "semaphore": ["UNSUPPORTED: no-threads"], + "shared_mutex": ["UNSUPPORTED: no-threads"], + "thread": ["UNSUPPORTED: no-threads"] } diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index 180949ef41ec6e..859f5d1467cacd 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -174,17 +174,20 @@ def _hasSuitableClangTidy(cfg): # is defined after including <__config_site>, add a Lit feature called # `libcpp-xxx-yyy-zzz`. When a macro is defined to a specific value # (e.g. `_LIBCPP_ABI_VERSION=2`), the feature is `libcpp-xxx-yyy-zzz=`. +# +# Note that features that are more strongly tied to libc++ are named libcpp-foo, +# while features that are more general in nature are not prefixed with 'libcpp-'. macros = { - '_LIBCPP_HAS_NO_MONOTONIC_CLOCK': 'libcpp-has-no-monotonic-clock', - '_LIBCPP_HAS_NO_THREADS': 'libcpp-has-no-threads', + '_LIBCPP_HAS_NO_MONOTONIC_CLOCK': 'no-monotonic-clock', + '_LIBCPP_HAS_NO_THREADS': 'no-threads', '_LIBCPP_HAS_THREAD_API_EXTERNAL': 'libcpp-has-thread-api-external', '_LIBCPP_HAS_THREAD_API_PTHREAD': 'libcpp-has-thread-api-pthread', '_LIBCPP_NO_VCRUNTIME': 'libcpp-no-vcruntime', '_LIBCPP_ABI_VERSION': 'libcpp-abi-version', - '_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY': 'libcpp-has-no-filesystem-library', - '_LIBCPP_HAS_NO_RANDOM_DEVICE': 'libcpp-has-no-random-device', - '_LIBCPP_HAS_NO_LOCALIZATION': 'libcpp-has-no-localization', - '_LIBCPP_HAS_NO_WIDE_CHARACTERS': 'libcpp-has-no-wide-characters', + '_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY': 'no-filesystem', + '_LIBCPP_HAS_NO_RANDOM_DEVICE': 'no-random-device', + '_LIBCPP_HAS_NO_LOCALIZATION': 'no-localization', + '_LIBCPP_HAS_NO_WIDE_CHARACTERS': 'no-wide-characters', '_LIBCPP_HAS_NO_INCOMPLETE_FORMAT': 'libcpp-has-no-incomplete-format', '_LIBCPP_HAS_NO_INCOMPLETE_RANGES': 'libcpp-has-no-incomplete-ranges', '_LIBCPP_HAS_NO_UNICODE': 'libcpp-has-no-unicode', diff --git a/libcxx/utils/libcxx/test/target_info.py b/libcxx/utils/libcxx/test/target_info.py index 198d2eebe979e9..a4b0852670a0f6 100644 --- a/libcxx/utils/libcxx/test/target_info.py +++ b/libcxx/utils/libcxx/test/target_info.py @@ -96,7 +96,7 @@ def add_cxx_compile_flags(self, flags): '-D__STDC_CONSTANT_MACROS'] def add_cxx_link_flags(self, flags): - enable_threads = ('libcpp-has-no-threads' not in + enable_threads = ('no-threads' not in self.full_config.config.available_features) llvm_unwinder = self.full_config.get_lit_bool('llvm_unwinder', False) shared_libcxx = self.full_config.get_lit_bool('enable_shared', True) diff --git a/libcxxabi/test/cxa_thread_atexit_test.pass.cpp b/libcxxabi/test/cxa_thread_atexit_test.pass.cpp index 16186e361df4a5..d641591697b3c7 100644 --- a/libcxxabi/test/cxa_thread_atexit_test.pass.cpp +++ b/libcxxabi/test/cxa_thread_atexit_test.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // REQUIRES: linux #include diff --git a/libcxxabi/test/forced_unwind3.pass.cpp b/libcxxabi/test/forced_unwind3.pass.cpp index 3468813a7dfd94..50f7139428fc7a 100644 --- a/libcxxabi/test/forced_unwind3.pass.cpp +++ b/libcxxabi/test/forced_unwind3.pass.cpp @@ -10,7 +10,7 @@ // what pthread_cancel does. // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: no-exceptions #include diff --git a/libcxxabi/test/guard_threaded_test.pass.cpp b/libcxxabi/test/guard_threaded_test.pass.cpp index fc94dd970a1189..78dafba7f7ecd9 100644 --- a/libcxxabi/test/guard_threaded_test.pass.cpp +++ b/libcxxabi/test/guard_threaded_test.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads // UNSUPPORTED: no-exceptions #define TESTING_CXA_GUARD diff --git a/libcxxabi/test/libcxxabi/test/config.py b/libcxxabi/test/libcxxabi/test/config.py index be93f153c75377..4adc566c9edb7b 100644 --- a/libcxxabi/test/libcxxabi/test/config.py +++ b/libcxxabi/test/libcxxabi/test/config.py @@ -50,7 +50,7 @@ def configure_compile_flags(self): self.cxx.compile_flags += ['-funwind-tables'] if not self.get_lit_bool('enable_threads', True): self.cxx.compile_flags += ['-D_LIBCXXABI_HAS_NO_THREADS'] - self.config.available_features.add('libcpp-has-no-threads') + self.config.available_features.add('no-threads') super(Configuration, self).configure_compile_flags() def configure_compile_flags_header_includes(self): diff --git a/libcxxabi/test/test_fallback_malloc.pass.cpp b/libcxxabi/test/test_fallback_malloc.pass.cpp index 55713d2520de2e..5f429ef0e3ef0f 100644 --- a/libcxxabi/test/test_fallback_malloc.pass.cpp +++ b/libcxxabi/test/test_fallback_malloc.pass.cpp @@ -11,7 +11,7 @@ #include <__threading_support> -// UNSUPPORTED: modules-build && libcpp-has-no-threads +// UNSUPPORTED: modules-build && no-threads // Necessary because we include a private source file of libc++abi, which // only understands _LIBCXXABI_HAS_NO_THREADS. diff --git a/libcxxabi/test/thread_local_destruction_order.pass.cpp b/libcxxabi/test/thread_local_destruction_order.pass.cpp index 765cd7cc8a23aa..e25636001a346f 100644 --- a/libcxxabi/test/thread_local_destruction_order.pass.cpp +++ b/libcxxabi/test/thread_local_destruction_order.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: no-threads #include #include From 03a09079d6cd6fd227868e5cd76c67f49df325ef Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 27 May 2022 13:06:45 -0700 Subject: [PATCH 737/908] [CMake] Make FindLibEdit.cmake more robust FindLibEdit uses pkg-config to find the necessary flags, but this may break with cross-compilation, because the PkgConfig module in CMake doesn't respect the SYSROOT specified in a toolchain file. Instead of taking the parameters from pkg-config for granted, we check whether our compiler can actually include and link against the library. Fixes #55445 Fixes #55671 Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D126450 --- cmake/Modules/FindLibEdit.cmake | 69 +++++++++++++++------------------ 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/cmake/Modules/FindLibEdit.cmake b/cmake/Modules/FindLibEdit.cmake index 94c6150cefda79..7e62d4d839ae17 100644 --- a/cmake/Modules/FindLibEdit.cmake +++ b/cmake/Modules/FindLibEdit.cmake @@ -13,55 +13,50 @@ # LibEdit_LIBRARIES - libraries to link # LibEdit_VERSION_STRING - version number -if(LibEdit_INCLUDE_DIRS AND LibEdit_LIBRARIES) - set(LibEdit_FOUND TRUE) -else() - find_package(PkgConfig QUIET) - pkg_check_modules(PC_LIBEDIT QUIET libedit) +find_package(PkgConfig QUIET) +pkg_check_modules(PC_LIBEDIT QUIET libedit) - find_path(LibEdit_INCLUDE_DIRS - NAMES - histedit.h - HINTS - ${PC_LIBEDIT_INCLUDEDIR} - ${PC_LIBEDIT_INCLUDE_DIRS} - "${CMAKE_INSTALL_FULL_INCLUDEDIR}") - find_library(LibEdit_LIBRARIES - NAMES - edit libedit - HINTS - ${PC_LIBEDIT_LIBDIR} - ${PC_LIBEDIT_LIBRARY_DIRS} - "${CMAKE_INSTALL_FULL_LIBDIR}") +find_path(LibEdit_INCLUDE_DIRS NAMES histedit.h HINTS ${PC_LIBEDIT_INCLUDE_DIRS}) +find_library(LibEdit_LIBRARIES NAMES edit HINTS ${PC_LIBEDIT_LIBRARY_DIRS}) - if(LibEdit_INCLUDE_DIRS AND EXISTS "${LibEdit_INCLUDE_DIRS}/histedit.h") +include(CheckIncludeFile) +if(LibEdit_INCLUDE_DIRS AND EXISTS "${LibEdit_INCLUDE_DIRS}/histedit.h") + cmake_push_check_state() + list(APPEND CMAKE_REQUIRED_INCLUDES ${LibEdit_INCLUDE_DIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES ${LibEdit_LIBRARIES}) + check_include_file(histedit.h HAVE_HISTEDIT_H) + cmake_pop_check_state() + if (HAVE_HISTEDIT_H) file(STRINGS "${LibEdit_INCLUDE_DIRS}/histedit.h" - libedit_major_version_str - REGEX "^#define[ \t]+LIBEDIT_MAJOR[ \t]+[0-9]+") + libedit_major_version_str + REGEX "^#define[ \t]+LIBEDIT_MAJOR[ \t]+[0-9]+") string(REGEX REPLACE "^#define[ \t]+LIBEDIT_MAJOR[ \t]+([0-9]+)" "\\1" - LIBEDIT_MAJOR_VERSION "${libedit_major_version_str}") + libedit_major_version "${libedit_major_version_str}") file(STRINGS "${LibEdit_INCLUDE_DIRS}/histedit.h" - libedit_minor_version_str - REGEX "^#define[ \t]+LIBEDIT_MINOR[ \t]+[0-9]+") + libedit_minor_version_str + REGEX "^#define[ \t]+LIBEDIT_MINOR[ \t]+[0-9]+") string(REGEX REPLACE "^#define[ \t]+LIBEDIT_MINOR[ \t]+([0-9]+)" "\\1" - LIBEDIT_MINOR_VERSION "${libedit_minor_version_str}") + libedit_minor_version "${libedit_minor_version_str}") set(LibEdit_VERSION_STRING "${libedit_major_version}.${libedit_minor_version}") + else() + set(LibEdit_INCLUDE_DIRS "") + set(LibEdit_LIBRARIES "") endif() - - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(LibEdit - FOUND_VAR - LibEdit_FOUND - REQUIRED_VARS - LibEdit_INCLUDE_DIRS - LibEdit_LIBRARIES - VERSION_VAR - LibEdit_VERSION_STRING) - mark_as_advanced(LibEdit_INCLUDE_DIRS LibEdit_LIBRARIES) endif() +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LibEdit + FOUND_VAR + LibEdit_FOUND + REQUIRED_VARS + LibEdit_INCLUDE_DIRS + LibEdit_LIBRARIES + VERSION_VAR + LibEdit_VERSION_STRING) +mark_as_advanced(LibEdit_INCLUDE_DIRS LibEdit_LIBRARIES) + if (LibEdit_FOUND AND NOT TARGET LibEdit::LibEdit) add_library(LibEdit::LibEdit UNKNOWN IMPORTED) set_target_properties(LibEdit::LibEdit PROPERTIES From b09e54541a926a62c5379086ab1a710508ef518e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 27 May 2022 13:10:47 -0700 Subject: [PATCH 738/908] [RISCV] Use template version of SignExtend64 for constant extends. NFC We were inconsistent about which one we used. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 10 +++++----- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 7d6e27f8e7808a..26f3614770c7a2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -646,13 +646,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { int64_t Imm = ConstNode->getSExtValue(); // If the upper XLen-16 bits are not used, try to convert this to a simm12 // by sign extending bit 15. - if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) && + if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && hasAllHUsers(Node)) - Imm = SignExtend64(Imm, 16); + Imm = SignExtend64<16>(Imm); // If the upper 32-bits are not used try to convert this into a simm32 by // sign extending bit 32. if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) - Imm = SignExtend64(Imm, 32); + Imm = SignExtend64<32>(Imm); ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); return; @@ -970,7 +970,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { uint64_t ShiftedC1 = C1 << ConstantShift; // If this RV32, we need to sign extend the constant. if (XLen == 32) - ShiftedC1 = SignExtend64(ShiftedC1, 32); + ShiftedC1 = SignExtend64<32>(ShiftedC1); // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); @@ -2168,7 +2168,7 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { // First the LUI. uint64_t Imm = Op1.getOperand(0).getConstantOperandVal(0); Imm <<= 12; - Imm = SignExtend64(Imm, 32); + Imm = SignExtend64<32>(Imm); // Then the ADDI. uint64_t LoImm = cast(Op1.getOperand(1))->getSExtValue(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 67445adb3e1759..16a71e1cf16643 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1991,7 +1991,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // our vector and clear our accumulated data. if (I != 0 && I % NumViaIntegerBits == 0) { if (NumViaIntegerBits <= 32) - Bits = SignExtend64(Bits, 32); + Bits = SignExtend64<32>(Bits); SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); @@ -2007,7 +2007,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // Insert the (remaining) scalar value into position in our integer // vector type. if (NumViaIntegerBits <= 32) - Bits = SignExtend64(Bits, 32); + Bits = SignExtend64<32>(Bits); SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); @@ -2155,7 +2155,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // On RV64, sign-extend from 32 to 64 bits where possible in order to // achieve better constant materializion. if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) - SplatValue = SignExtend64(SplatValue, 32); + SplatValue = SignExtend64<32>(SplatValue); // Since we can't introduce illegal i64 types at this stage, we can only // perform an i64 splat on RV32 if it is its own sign-extended value. That From d4905a7b20b19383f84dd5249c9dae5325cb7305 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 27 May 2022 12:51:54 -0700 Subject: [PATCH 739/908] [RISCV] Add a vsetvli PRE test involving non-1 LMUL --- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index b554b11a6ab88d..ce8eabdfb41816 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -832,6 +832,36 @@ fallthrough: ret %res } +define @pre_lmul( %x, %y, i1 %cond) nounwind { +; CHECK-LABEL: pre_lmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: beqz a1, .LBB18_2 +; CHECK-NEXT: # %bb.1: # %if +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: .LBB18_2: # %if.end +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %vl = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0) + %a = call @llvm.riscv.vadd.nxv2i32( undef, %x, %y, i64 %vl) + br i1 %cond, label %if, label %if.end + +if: + ; Deliberately change vtype - this could be an unknown call, but the broader + ; code quality is distractingly bad + tail call i64 @llvm.riscv.vsetvlimax.i64(i64 2, i64 1) + br label %if.end + +if.end: + %b = call @llvm.riscv.vadd.nxv2i32( undef, %a, %y, i64 %vl) + ret %b +} + declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64) declare @llvm.riscv.vle.nxv1f64.i64(, * nocapture, i64) declare @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(, , , i64) From 0fbe3f3f486e01448121f7931a4ca29fac1504ab Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Fri, 27 May 2022 12:42:46 -0700 Subject: [PATCH 740/908] [mlir][sparse] Fixes C++98 warning The semicolons were introduced in D126105 in order to correct clang-format, but I forgot this file must be compiled as C++98 rather than C++11. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D126561 --- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index ccc8c7a19387ae..3a595fdb26e934 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -1551,14 +1551,17 @@ FOREVERY_O(IMPL_SPARSEINDICES) FOREVERY_SIMPLEX_V(IMPL_ADDELT) IMPL_ADDELT(C64, complex64) // Marked static because it's not part of the public API. -// (The `static` keyword confuses clang-format, so the extraneous trailing -// semicolon is required to teach clang-format not to indent the prototype -// of `_mlir_ciface_addEltC32` below.) -static IMPL_ADDELT(C32ABI, complex32); +// NOTE: the `static` keyword confuses clang-format here, causing +// the strange indentation of the `_mlir_ciface_addEltC32` prototype. +// In C++11 we can add a semicolon after the call to `IMPL_ADDELT` +// and that will correct clang-format. Alas, this file is compiled +// in C++98 mode where that semicolon is illegal (and there's no portable +// macro magic to license a no-op semicolon at the top level). +static IMPL_ADDELT(C32ABI, complex32) #undef IMPL_ADDELT -void *_mlir_ciface_addEltC32(void *coo, float r, float i, - StridedMemRefType *iref, - StridedMemRefType *pref) { + void *_mlir_ciface_addEltC32(void *coo, float r, float i, + StridedMemRefType *iref, + StridedMemRefType *pref) { return _mlir_ciface_addEltC32ABI(coo, complex32(r, i), iref, pref); } @@ -1595,14 +1598,12 @@ FOREVERY_V(IMPL_GETNEXT) FOREVERY_SIMPLEX_V(IMPL_LEXINSERT) IMPL_LEXINSERT(C64, complex64) // Marked static because it's not part of the public API. -// (The `static` keyword confuses clang-format, so the extraneous trailing -// semicolon is required to teach clang-format not to indent the prototype -// of `_mlir_ciface_lexInsertC32` below.) -static IMPL_LEXINSERT(C32ABI, complex32); +// NOTE: see the note for `_mlir_ciface_addEltC32ABI` +static IMPL_LEXINSERT(C32ABI, complex32) #undef IMPL_LEXINSERT -void _mlir_ciface_lexInsertC32(void *tensor, - StridedMemRefType *cref, float r, - float i) { + void _mlir_ciface_lexInsertC32(void *tensor, + StridedMemRefType *cref, + float r, float i) { _mlir_ciface_lexInsertC32ABI(tensor, cref, complex32(r, i)); } From 542a83c3622225f27c72d86c7af1fe1c56621116 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 27 May 2022 14:39:30 -0700 Subject: [PATCH 741/908] [RISCV] Correct load/store alignments in sink-splat-operands.ll. NFC These should be aligned to the natural alignment of the element. Probably copy/paste mistake from the i32 tests. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D126567 --- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 77 +++++++------------ 1 file changed, 28 insertions(+), 49 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index fb69d3cca636e0..b3669c283e76a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -3775,15 +3775,12 @@ define void @sink_splat_mul_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_mul_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB67_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: bnez a2, .LBB67_1 @@ -3798,10 +3795,10 @@ vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %0 = getelementptr inbounds i64, i64* %a, i64 %index %1 = bitcast i64* %0 to <4 x i64>* - %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %wide.load = load <4 x i64>, <4 x i64>* %1, align 8 %2 = mul <4 x i64> %wide.load, %broadcast.splat %3 = bitcast i64* %0 to <4 x i64>* - store <4 x i64> %2, <4 x i64>* %3, align 4 + store <4 x i64> %2, <4 x i64>* %3, align 8 %index.next = add nuw i64 %index, 4 %4 = icmp eq i64 %index.next, 1024 br i1 %4, label %for.cond.cleanup, label %vector.body @@ -3814,15 +3811,12 @@ define void @sink_splat_add_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_add_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB68_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: bnez a2, .LBB68_1 @@ -3837,10 +3831,10 @@ vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %0 = getelementptr inbounds i64, i64* %a, i64 %index %1 = bitcast i64* %0 to <4 x i64>* - %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %wide.load = load <4 x i64>, <4 x i64>* %1, align 8 %2 = add <4 x i64> %wide.load, %broadcast.splat %3 = bitcast i64* %0 to <4 x i64>* - store <4 x i64> %2, <4 x i64>* %3, align 4 + store <4 x i64> %2, <4 x i64>* %3, align 8 %index.next = add nuw i64 %index, 4 %4 = icmp eq i64 %index.next, 1024 br i1 %4, label %for.cond.cleanup, label %vector.body @@ -3853,15 +3847,12 @@ define void @sink_splat_sub_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_sub_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB69_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsub.vx v8, v8, a1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: bnez a2, .LBB69_1 @@ -3876,10 +3867,10 @@ vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %0 = getelementptr inbounds i64, i64* %a, i64 %index %1 = bitcast i64* %0 to <4 x i64>* - %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %wide.load = load <4 x i64>, <4 x i64>* %1, align 8 %2 = sub <4 x i64> %wide.load, %broadcast.splat %3 = bitcast i64* %0 to <4 x i64>* - store <4 x i64> %2, <4 x i64>* %3, align 4 + store <4 x i64> %2, <4 x i64>* %3, align 8 %index.next = add nuw i64 %index, 4 %4 = icmp eq i64 %index.next, 1024 br i1 %4, label %for.cond.cleanup, label %vector.body @@ -3892,15 +3883,12 @@ define void @sink_splat_rsub_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_rsub_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB70_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vrsub.vx v8, v8, a1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: bnez a2, .LBB70_1 @@ -3915,10 +3903,10 @@ vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %0 = getelementptr inbounds i64, i64* %a, i64 %index %1 = bitcast i64* %0 to <4 x i64>* - %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %wide.load = load <4 x i64>, <4 x i64>* %1, align 8 %2 = sub <4 x i64> %broadcast.splat, %wide.load %3 = bitcast i64* %0 to <4 x i64>* - store <4 x i64> %2, <4 x i64>* %3, align 4 + store <4 x i64> %2, <4 x i64>* %3, align 8 %index.next = add nuw i64 %index, 4 %4 = icmp eq i64 %index.next, 1024 br i1 %4, label %for.cond.cleanup, label %vector.body @@ -3931,15 +3919,12 @@ define void @sink_splat_and_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_and_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB71_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: bnez a2, .LBB71_1 @@ -3954,10 +3939,10 @@ vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %0 = getelementptr inbounds i64, i64* %a, i64 %index %1 = bitcast i64* %0 to <4 x i64>* - %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %wide.load = load <4 x i64>, <4 x i64>* %1, align 8 %2 = and <4 x i64> %wide.load, %broadcast.splat %3 = bitcast i64* %0 to <4 x i64>* - store <4 x i64> %2, <4 x i64>* %3, align 4 + store <4 x i64> %2, <4 x i64>* %3, align 8 %index.next = add nuw i64 %index, 4 %4 = icmp eq i64 %index.next, 1024 br i1 %4, label %for.cond.cleanup, label %vector.body @@ -3970,15 +3955,12 @@ define void @sink_splat_or_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_or_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB72_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: bnez a2, .LBB72_1 @@ -3993,10 +3975,10 @@ vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %0 = getelementptr inbounds i64, i64* %a, i64 %index %1 = bitcast i64* %0 to <4 x i64>* - %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %wide.load = load <4 x i64>, <4 x i64>* %1, align 8 %2 = or <4 x i64> %wide.load, %broadcast.splat %3 = bitcast i64* %0 to <4 x i64>* - store <4 x i64> %2, <4 x i64>* %3, align 4 + store <4 x i64> %2, <4 x i64>* %3, align 8 %index.next = add nuw i64 %index, 4 %4 = icmp eq i64 %index.next, 1024 br i1 %4, label %for.cond.cleanup, label %vector.body @@ -4009,15 +3991,12 @@ define void @sink_splat_xor_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_xor_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB73_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: bnez a2, .LBB73_1 @@ -4032,10 +4011,10 @@ vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %0 = getelementptr inbounds i64, i64* %a, i64 %index %1 = bitcast i64* %0 to <4 x i64>* - %wide.load = load <4 x i64>, <4 x i64>* %1, align 4 + %wide.load = load <4 x i64>, <4 x i64>* %1, align 8 %2 = xor <4 x i64> %wide.load, %broadcast.splat %3 = bitcast i64* %0 to <4 x i64>* - store <4 x i64> %2, <4 x i64>* %3, align 4 + store <4 x i64> %2, <4 x i64>* %3, align 8 %index.next = add nuw i64 %index, 4 %4 = icmp eq i64 %index.next, 1024 br i1 %4, label %for.cond.cleanup, label %vector.body From 9191078707390c32b691ff84633ab9450b380ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 27 May 2022 16:37:43 +0300 Subject: [PATCH 742/908] [lldb] Fix cross compiling on macOS When cross compiling, a separate nested cmake is spawned, for building host code generation tools such as lldb-tblgen. When cross compiling on macOS, the nested native build would trigger the lldb check for libc++, if testing is enabled (which it is by default). (Even if `LLDB_INCLUDE_TESTS=OFF` is set on the main build, it has to be passed separately in `CROSS_TOOLCHAIN_FLAGS_NATIVE` to reach the nested build.) Skip this check when building the host tools when cross compiling, as the user won't try to run tests in that nested build. (Alternatively, we could consider disabling all the `*_INCLUDE_TESTS` by default in the nested host tools build.) Differential Revision: https://reviews.llvm.org/D126557 --- lldb/test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index 7a55bf47b5d342..e23ac6651a557d 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -103,7 +103,7 @@ if(TARGET clang) add_lldb_test_dependency(tsan) endif() - if(APPLE) + if(APPLE AND NOT LLVM_TARGET_IS_CROSSCOMPILE_HOST) # FIXME: Standalone builds should import the cxx target as well. if(LLDB_BUILT_STANDALONE) # For now check that the include directory exists. From de20fb72adb45bb3a876f960daf567a9b96c3412 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Fri, 27 May 2022 23:39:52 +0900 Subject: [PATCH 743/908] [bazel] BLAKE3: Adopt aarch64 and x86_64. FIXME: arm(32) may be applicable here. I haven't tested yet. Differential Revision: https://reviews.llvm.org/D126543 --- .../llvm-project-overlay/llvm/BUILD.bazel | 37 +++++++++++++++---- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 570df56dda1a27..2e9faad29af966 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -193,7 +193,19 @@ cc_library( "lib/Support/BLAKE3/blake3.c", "lib/Support/BLAKE3/blake3_dispatch.c", "lib/Support/BLAKE3/blake3_portable.c", - ], + ] + select({ + "@platforms//cpu:aarch64": [ + "lib/Support/BLAKE3/blake3_neon.c", + ], + "@platforms//cpu:x86_64": [ + "lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S", + "lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S", + "lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S", + "lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S", + ], + "//conditions:default": [ + ] + }), hdrs = glob([ "include/llvm/Support/**/*.h", "include/llvm/ADT/*.h", @@ -213,13 +225,22 @@ cc_library( "include/llvm/Support/VCSRevision.h", ], copts = llvm_copts, - defines = [ - "BLAKE3_NO_AVX2", - "BLAKE3_NO_AVX512", - "BLAKE3_NO_SSE2", - "BLAKE3_NO_SSE41", - "BLAKE3_USE_NEON=0", - ], + defines = select({ + "@platforms//cpu:aarch64": [ + ], + "//conditions:default": [ + "BLAKE3_USE_NEON=0", + ] + }) + select({ + "@platforms//cpu:x86_64": [ + ], + "//conditions:default": [ + "BLAKE3_NO_AVX2", + "BLAKE3_NO_AVX512", + "BLAKE3_NO_SSE2", + "BLAKE3_NO_SSE41", + ] + }), includes = ["include"], linkopts = select({ "@bazel_tools//src/conditions:windows": [], From 6a8457924335623eb2303775894dbde39fcf4e64 Mon Sep 17 00:00:00 2001 From: eopXD Date: Tue, 24 May 2022 20:06:34 -0700 Subject: [PATCH 744/908] [LSR][TTI][PowerPC][SystemZ][X86] Add const-ness to TTI::isLSRCostLess. NFC Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D126350 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 12 ++++++------ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 2 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 3 ++- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 4 ++-- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 4 ++-- .../Target/SystemZ/SystemZTargetTransformInfo.cpp | 4 ++-- llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h | 4 ++-- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 4 ++-- llvm/lib/Target/X86/X86TargetTransformInfo.h | 4 ++-- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 4 ++-- 10 files changed, 23 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 2a4620e5abcacf..8d2f9b50745281 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -616,8 +616,8 @@ class TargetTransformInfo { Instruction *I = nullptr) const; /// Return true if LSR cost of C1 is lower than C1. - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) const; + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) const; /// Return true if LSR major cost is number of registers. Targets which /// implement their own isLSRCostLess and unset number of registers as major @@ -1557,8 +1557,8 @@ class TargetTransformInfo::Concept { int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I) = 0; - virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) = 0; + virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) = 0; virtual bool isNumRegsMajorCostOfLSR() = 0; virtual bool isProfitableLSRChainElement(Instruction *I) = 0; virtual bool canMacroFuseCmp() = 0; @@ -1949,8 +1949,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace, I); } - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) override { + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) override { return Impl.isLSRCostLess(C1, C2); } bool isNumRegsMajorCostOfLSR() override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 044e9bc179a5cd..9423e7bb7f3688 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -210,7 +210,7 @@ class TargetTransformInfoImplBase { return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); } - bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const { + bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const { return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) < std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index ac5c56d5bd964a..caa59386588949 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -348,7 +348,8 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, Scale, AddrSpace, I); } -bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { +bool TargetTransformInfo::isLSRCostLess(const LSRCost &C1, + const LSRCost &C2) const { return TTIImpl->isLSRCostLess(C1, C2); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 7b956fd3478e0f..6bc0472f088ed2 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1318,8 +1318,8 @@ bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, return true; } -bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) { +bool PPCTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { // PowerPC default behaviour here is "instruction number 1st priority". // If LsrNoInsnsCost is set, call default implementation. if (!LsrNoInsnsCost) diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 4c0005552285dd..790eb0b42afae9 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -76,8 +76,8 @@ class PPCTTIImpl : public BasicTTIImplBase { OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2); + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); bool isNumRegsMajorCostOfLSR(); bool shouldBuildRelLookupTables() const; /// @} diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index b4428923d18fcc..c06d43c4f7f22f 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -339,8 +339,8 @@ void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, BaseT::getPeelingPreferences(L, SE, PP); } -bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) { +bool SystemZTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { // SystemZ specific: check instruction count (first), and don't care about // ImmCost, since offsets are checked explicitly. return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 190f7717328ac6..33317e799eabb1 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -59,8 +59,8 @@ class SystemZTTIImpl : public BasicTTIImplBase { void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2); + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); /// @} /// \name Vector TTI Implementations diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index dd7c1bd8d9252f..2adbdcb693d537 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5188,8 +5188,8 @@ InstructionCost X86TTIImpl::getGatherScatterOpCost( return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); } -bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) { +bool X86TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { // X86 specific here are "instruction number 1st priority". return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 473350365a8eda..91ea6c35d41b7f 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -225,8 +225,8 @@ class X86TTIImpl : public BasicTTIImplBase { InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2); + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); bool canMacroFuseCmp(); bool isLegalMaskedLoad(Type *DataType, Align Alignment); bool isLegalMaskedStore(Type *DataType, Align Alignment); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5077ee87484820..e53dea5a231b90 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1067,7 +1067,7 @@ class Cost { C.ScaleCost = 0; } - bool isLess(Cost &Other); + bool isLess(const Cost &Other); void Lose(); @@ -1464,7 +1464,7 @@ void Cost::Lose() { } /// Choose the lower cost. -bool Cost::isLess(Cost &Other) { +bool Cost::isLess(const Cost &Other) { if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && C.Insns != Other.C.Insns) return C.Insns < Other.C.Insns; From 85b4470035b74834dcba3be14e8abb530f302caa Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 27 May 2022 15:37:42 -0700 Subject: [PATCH 745/908] [RISCV] Allow PRE of vsetvli involving non-1 LMUL This is a follow up to address a review comment from D124869. When deciding whether to PRE a vsetvli, we can allow non-LMUL1 vsetvlis. Differential Revision: https://reviews.llvm.org/D126563 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 13 ++++++++----- llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll | 14 +++++++------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 6497fb2de3dcca..5a0b242dc889a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1290,14 +1290,17 @@ static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { // vreg def placement. return RISCV::X0 == Info.getAVLReg(); - if (RISCVII::LMUL_1 != Info.getVLMUL()) - // TODO: Generalize the code below to account for LMUL - return false; - unsigned AVL = Info.getAVLImm(); unsigned SEW = Info.getSEW(); unsigned AVLInBits = AVL * SEW; - return ST.getRealMinVLen() >= AVLInBits; + + unsigned LMul; + bool Fractional; + std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); + + if (Fractional) + return ST.getRealMinVLen() / LMul >= AVLInBits; + return ST.getRealMinVLen() * LMul >= AVLInBits; } /// Perform simple partial redundancy elimination of the VSETVLI instructions diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index b3669c283e76a7..15716a8ec6e7cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -3775,9 +3775,9 @@ define void @sink_splat_mul_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_mul_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: .LBB67_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) @@ -3811,9 +3811,9 @@ define void @sink_splat_add_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_add_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: .LBB68_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) @@ -3847,9 +3847,9 @@ define void @sink_splat_sub_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_sub_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: .LBB69_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) @@ -3883,9 +3883,9 @@ define void @sink_splat_rsub_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_rsub_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: .LBB70_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vrsub.vx v8, v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) @@ -3919,9 +3919,9 @@ define void @sink_splat_and_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_and_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: .LBB71_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) @@ -3955,9 +3955,9 @@ define void @sink_splat_or_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_or_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: .LBB72_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) @@ -3991,9 +3991,9 @@ define void @sink_splat_xor_lmul2(i64* nocapture %a, i64 signext %x) { ; CHECK-LABEL: sink_splat_xor_lmul2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: .LBB73_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) From 4a368136693ba9c4e827702e9d390280c3d5e7ac Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Fri, 27 May 2022 18:53:19 -0400 Subject: [PATCH 746/908] [OpenACC][OpenMP] Document atomic-in-teams extension That is, put D126323 in the status doc and explain its relationship to OpenACC support. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D126547 --- clang/docs/OpenMPSupport.rst | 15 ++++++----- openmp/docs/openacc/OpenMPExtensions.rst | 33 ++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 1292af07a8e417..88e3050d3622c7 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -369,9 +369,12 @@ documentation are provided. As these extensions mature, they will be considered for standardization. Please contact *openmp-dev* at *lists.llvm.org* to provide feedback. -+------------------------------+---------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ -|Category | Feature | Status | Reviews | -+==============================+===========================================================================+==========================+========================================================+ -| device extension | `'ompx_hold' map type modifier | :good:`prototyped` | D106509, D106510 | -| | `_ | | | -+------------------------------+---------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ ++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ +|Category | Feature | Status | Reviews | ++==============================+===================================================================================+==========================+========================================================+ +| atomic extension | `'atomic' strictly nested within 'teams' | :good:`prototyped` | D126323 | +| | `_ | | | ++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ +| device extension | `'ompx_hold' map type modifier | :good:`prototyped` | D106509, D106510 | +| | `_ | | | ++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ diff --git a/openmp/docs/openacc/OpenMPExtensions.rst b/openmp/docs/openacc/OpenMPExtensions.rst index bc662c32e81ca2..82cf77548b9a00 100644 --- a/openmp/docs/openacc/OpenMPExtensions.rst +++ b/openmp/docs/openacc/OpenMPExtensions.rst @@ -137,3 +137,36 @@ selecting OpenMP reference counts, the implementation is the same at the runtime level. That is, OpenACC's dynamic reference count is OpenMP's dynamic reference count, and OpenACC's structured reference count is our OpenMP hold reference count extension. + +.. _atomicWithinTeams: + +``atomic`` Strictly Nested Within ``teams`` +------------------------------------------- + +Example +^^^^^^^ + +OpenMP 5.2, sec. 10.2 "teams Construct", p. 232, L9-12 restricts what +regions can be strictly nested within a ``teams`` region. As an +extension, Clang relaxes that restriction in the case of the +``atomic`` construct so that, for example, the following case is +permitted: + +.. code-block:: c++ + + #pragma omp target teams map(tofrom:x) + #pragma omp atomic update + x++; + +Relationship with OpenACC +^^^^^^^^^^^^^^^^^^^^^^^^^ + +This extension is important when translating OpenACC to OpenMP because +OpenACC does not have the same restriction for its corresponding +constructs. For example, the following is conforming OpenACC: + +.. code-block:: c++ + + #pragma acc parallel copy(x) + #pragma acc atomic update + x++; From 1534177f8f7edd83083ceda7c14d6d40cc872c6e Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 28 May 2022 01:45:55 +0200 Subject: [PATCH 747/908] [mlir][bufferization][NFC] Move OpFilter out of BufferizationOptions Differential Revision: https://reviews.llvm.org/D126568 --- .../IR/BufferizableOpInterface.h | 231 +++++++++--------- .../Arithmetic/Transforms/Bufferize.cpp | 4 +- .../IR/BufferizableOpInterface.cpp | 38 +-- .../Bufferization/Transforms/Bufferize.cpp | 6 +- .../Dialect/Linalg/Transforms/Bufferize.cpp | 2 +- .../Dialect/Shape/Transforms/Bufferize.cpp | 2 +- .../Dialect/Tensor/Transforms/Bufferize.cpp | 2 +- .../Dialect/Vector/Transforms/Bufferize.cpp | 2 +- 8 files changed, 148 insertions(+), 139 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index eb3cff0722b04c..94fc30bafef9eb 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -23,28 +23,11 @@ class AnalysisState; class BufferizableOpInterface; struct DialectAnalysisState; -/// Options for BufferizableOpInterface-based bufferization. -struct BufferizationOptions { - /// Allocator function: Generate a memref allocation with the given type, - /// dynamic extents and alignment. - using AllocationFn = std::function( - OpBuilder &, Location, MemRefType, ValueRange, unsigned int)>; - /// Deallocator function: Deallocate a buffer that was allocated with - /// AllocatorFn. - using DeallocationFn = - std::function; - /// Memcpy function: Generate a memcpy between two buffers. - using MemCpyFn = - std::function; - /// Initializer function for analysis state. - using AnalysisStateInitFn = std::function; - /// Initializer function for dialect-specific analysis state. - using DialectStateInitFn = - std::function()>; - +class OpFilter { +public: /// An op filter entry. Filters can be used to specify which ops should be /// processed by the bufferization. - struct OpFilterEntry { + struct Entry { /// If the filter function evaluates to `true`, the filter matches. using FilterFn = std::function; @@ -55,116 +38,156 @@ struct BufferizationOptions { FilterType type; }; - enum class LayoutMapOption : int8_t { - InferLayoutMap = 0, - IdentityLayoutMap = 1, - FullyDynamicLayoutMap = 2 - }; - - BufferizationOptions(); - - /// Return `true` if the filter has at least one ALLOW rule. - bool filterHasAllowRule() const { - for (const OpFilterEntry &e : opFilter) - if (e.type == OpFilterEntry::FilterType::ALLOW) - return true; - return false; - } - - /// Return whether the op should be bufferized or not. + /// Return whether the op is allowed or not. /// - /// If the filter does not have an ALLOW rule, ops are bufferized by default, + /// If the filter does not have an ALLOW rule, ops are allowed by default, /// unless they are explicitly marked as DENY. If the filter has at least one - /// ALLOW rule, ops are ignored by default and only bufferized if they match + /// ALLOW rule, ops are denied by default and only allowed if they match /// an ALLOW rule and no DENY rule. bool isOpAllowed(Operation *op) const; - /// Allow the given dialects in the filter. + /// Allow the given dialects. /// - /// This function adds one or multiple ALLOW filters. - template - void allowDialectInFilter() { - // The following expands a call to allowDialectInFilterImpl for each dialect + /// This function adds one or multiple ALLOW entries. + template void allowDialect() { + // The following expands a call to allowDialectImpl for each dialect // in 'DialectTs'. This magic is necessary due to a limitation in the places // that a parameter pack can be expanded in c++11. // FIXME: In c++17 this can be simplified by using 'fold expressions'. - (void)std::initializer_list{ - 0, (allowDialectInFilterImpl(), 0)...}; + (void)std::initializer_list{0, (allowDialectImpl(), 0)...}; } - /// Deny the given dialects in the filter. + /// Deny the given dialects. /// - /// This function adds one or multiple DENY filters. - template void denyDialectInFilter() { + /// This function adds one or multiple DENY entries. + template void denyDialect() { // FIXME: In c++17 this can be simplified by using 'fold expressions'. - (void)std::initializer_list{ - 0, (denyDialectInFilterImpl(), 0)...}; + (void)std::initializer_list{0, (denyDialectImpl(), 0)...}; } - /// Allow the given dialect in the filter. + /// Allow the given dialect. /// - /// This function adds an ALLOW filter. - void allowDialectInFilter(StringRef dialectNamespace) { - OpFilterEntry::FilterFn filterFn = [=](Operation *op) { + /// This function adds an ALLOW entry. + void allowDialect(StringRef dialectNamespace) { + Entry::FilterFn filterFn = [=](Operation *op) { return op->getDialect()->getNamespace() == dialectNamespace; }; - opFilter.push_back( - OpFilterEntry{filterFn, OpFilterEntry::FilterType::ALLOW}); + entries.push_back(Entry{filterFn, Entry::FilterType::ALLOW}); } - /// Allow the given ops in the filter. + /// Allow the given ops. /// - /// This function adds one or multiple ALLOW filters. - template - void allowOperationInFilter() { + /// This function adds one or multiple ALLOW entries. + template void allowOperation() { // FIXME: In c++17 this can be simplified by using 'fold expressions'. - (void)std::initializer_list{ - 0, (allowOperationInFilterImpl(), 0)...}; + (void)std::initializer_list{0, (allowOperationImpl(), 0)...}; } - /// Deny the given ops in the filter. + /// Deny the given ops. /// - /// This function adds one or multiple DENY filters. - template void denyOperationInFilter() { + /// This function adds one or multiple DENY entries. + template void denyOperation() { // FIXME: In c++17 this can be simplified by using 'fold expressions'. - (void)std::initializer_list{ - 0, (denyOperationInFilterImpl(), 0)...}; + (void)std::initializer_list{0, (denyOperationImpl(), 0)...}; } - /// Allow the given op in the filter. + /// Allow the given op. /// - /// This function adds an ALLOW filter. - void allowOperationInFilter(StringRef opName) { - OpFilterEntry::FilterFn filterFn = [=](Operation *op) { + /// This function adds an ALLOW entry. + void allowOperation(StringRef opName) { + Entry::FilterFn filterFn = [=](Operation *op) { return op->getName().getStringRef() == opName; }; - allowOperationInFilter(filterFn); + allowOperation(filterFn); } - /// Deny the given op in the filter. + /// Deny the given op. /// - /// This function adds a DENY filter. - void denyOperationInFilter(StringRef opName) { - OpFilterEntry::FilterFn filterFn = [=](Operation *op) { + /// This function adds a DENY entry. + void denyOperation(StringRef opName) { + Entry::FilterFn filterFn = [=](Operation *op) { return op->getName().getStringRef() == opName; }; - denyOperationInFilter(filterFn); + denyOperation(filterFn); } - /// Allow ops that are matched by `fn` in the filter. + /// Allow ops that are matched by `fn`. /// - /// This function adds an ALLOW filter. - void allowOperationInFilter(OpFilterEntry::FilterFn fn) { - opFilter.push_back(OpFilterEntry{fn, OpFilterEntry::FilterType::ALLOW}); + /// This function adds an ALLOW entry. + void allowOperation(Entry::FilterFn fn) { + entries.push_back(Entry{fn, Entry::FilterType::ALLOW}); } - /// Deny ops that are matched by `fn` in the filter. + /// Deny ops that are matched by `fn`. /// - /// This function adds a DENY filter. - void denyOperationInFilter(OpFilterEntry::FilterFn fn) { - opFilter.push_back(OpFilterEntry{fn, OpFilterEntry::FilterType::DENY}); + /// This function adds a DENY entry. + void denyOperation(Entry::FilterFn fn) { + entries.push_back(Entry{fn, Entry::FilterType::DENY}); + } + +private: + /// Return `true` if the filter has at least one ALLOW rule. + bool hasAllowRule() const { + for (const Entry &e : entries) + if (e.type == Entry::FilterType::ALLOW) + return true; + return false; + } + + /// Allow a dialect. + template void allowDialectImpl() { + allowDialect(DialectT::getDialectNamespace()); + } + + /// Deny a dialect. + template void denyDialectImpl() { + denyDialect(DialectT::getDialectNamespace()); + } + + /// Allow an op. + template void allowOperationImpl() { + allowOperation(OpTy::getOperationName()); } + /// Deny an op. + template void denyOperationImpl() { + denyOperation(OpTy::getOperationName()); + } + + /// A list of filter entries that determine whether an op should be allowed or + /// denied. If the filter has an ALLOW rule, only ops that are allowed and not + /// denied are allowed. If the filter does not have an ALLOW rule, only ops + /// that are not denied are allowed. + SmallVector entries; +}; + +/// Options for BufferizableOpInterface-based bufferization. +struct BufferizationOptions { + /// Allocator function: Generate a memref allocation with the given type, + /// dynamic extents and alignment. + using AllocationFn = std::function( + OpBuilder &, Location, MemRefType, ValueRange, unsigned int)>; + /// Deallocator function: Deallocate a buffer that was allocated with + /// AllocatorFn. + using DeallocationFn = + std::function; + /// Memcpy function: Generate a memcpy between two buffers. + using MemCpyFn = + std::function; + /// Initializer function for analysis state. + using AnalysisStateInitFn = std::function; + /// Initializer function for dialect-specific analysis state. + using DialectStateInitFn = + std::function()>; + + enum class LayoutMapOption : int8_t { + InferLayoutMap = 0, + IdentityLayoutMap = 1, + FullyDynamicLayoutMap = 2 + }; + + BufferizationOptions(); + /// Try to cast the given op to BufferizableOpInterface if the op is allow /// listed. BufferizableOpInterface dynCastBufferizableOp(Operation *op) const; @@ -173,6 +196,13 @@ struct BufferizationOptions { /// listed. BufferizableOpInterface dynCastBufferizableOp(Value value) const; + /// A filter that specifies which ops should be bufferized and which ops + /// should be ignored. + OpFilter opFilter; + + /// Return `true` if the given op should be bufferized. + bool isOpAllowed(Operation *op) const; + /// Helper functions for allocation, deallocation, memory copying. Optional allocationFn; Optional deallocationFn; @@ -276,12 +306,6 @@ struct BufferizationOptions { /// Buffer alignment for new memory allocations. unsigned int bufferAlignment = 128; - /// A list of op filters that determine whether an op should be processed or - /// ignored by the bufferization. If the filter has an ALLOW rule, only ops - /// that are allowed and not denied are bufferized. If the filter does not - /// have an ALLOW rule, only ops that are not denied are bufferized. - SmallVector opFilter; - /// Initializer functions for analysis state. These can be used to /// initialize dialect-specific analysis state. SmallVector stateInitializers; @@ -289,29 +313,6 @@ struct BufferizationOptions { /// Add a analysis state initializer that initializes the specified /// dialect-specific analysis state. void addDialectStateInitializer(StringRef name, const DialectStateInitFn &fn); - -private: - /// Allow a dialect. - template - void allowDialectInFilterImpl() { - allowDialectInFilter(DialectT::getDialectNamespace()); - } - - /// Deny a dialect. - template void denyDialectInFilterImpl() { - denyDialectInFilter(DialectT::getDialectNamespace()); - } - - /// Allow an op. - template - void allowOperationInFilterImpl() { - allowOperationInFilter(OpTy::getOperationName()); - } - - /// Deny an op. - template void denyOperationInFilterImpl() { - denyOperationInFilter(OpTy::getOperationName()); - } }; /// Specify fine-grain relationship between buffers to enable more analysis. diff --git a/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp index 4fb3d0e03ff1e8..3237b1a8804419 100644 --- a/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp @@ -32,9 +32,9 @@ struct ArithmeticBufferizePass void runOnOperation() override { BufferizationOptions options = getPartialBufferizationOptions(); if (constantOpOnly) { - options.allowOperationInFilter(); + options.opFilter.allowOperation(); } else { - options.allowDialectInFilter(); + options.opFilter.allowDialect(); } options.bufferAlignment = alignment; diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 79b499545ca9f2..635e7d770e60b5 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -45,28 +45,19 @@ static const char *kBufferAllocationAttr = "bufferization.allocation"; static const char *kSkipDeallocAttr = "bufferization.skip_dealloc"; //===----------------------------------------------------------------------===// -// BufferizationOptions +// OpFilter //===----------------------------------------------------------------------===// -// Default constructor for BufferizationOptions. -BufferizationOptions::BufferizationOptions() = default; - -bool BufferizationOptions::isOpAllowed(Operation *op) const { - // Special case: If function boundary bufferization is deactivated, do not - // allow ops that belong to the `func` dialect. - bool isFuncBoundaryOp = isa_and_nonnull(op->getDialect()); - if (!bufferizeFunctionBoundaries && isFuncBoundaryOp) - return false; - +bool OpFilter::isOpAllowed(Operation *op) const { // All other ops: Allow/disallow according to filter. - bool isAllowed = !filterHasAllowRule(); - for (const OpFilterEntry &entry : opFilter) { + bool isAllowed = !hasAllowRule(); + for (const Entry &entry : entries) { bool filterResult = entry.fn(op); switch (entry.type) { - case OpFilterEntry::ALLOW: + case Entry::ALLOW: isAllowed |= filterResult; break; - case OpFilterEntry::DENY: + case Entry::DENY: if (filterResult) // DENY filter matches. This op is no allowed. (Even if other ALLOW // filters may match.) @@ -76,6 +67,23 @@ bool BufferizationOptions::isOpAllowed(Operation *op) const { return isAllowed; } +//===----------------------------------------------------------------------===// +// BufferizationOptions +//===----------------------------------------------------------------------===// + +// Default constructor for BufferizationOptions. +BufferizationOptions::BufferizationOptions() = default; + +bool BufferizationOptions::isOpAllowed(Operation *op) const { + // Special case: If function boundary bufferization is deactivated, do not + // allow ops that belong to the `func` dialect. + bool isFuncBoundaryOp = isa_and_nonnull(op->getDialect()); + if (!bufferizeFunctionBoundaries && isFuncBoundaryOp) + return false; + + return opFilter.isOpAllowed(op); +} + BufferizableOpInterface BufferizationOptions::dynCastBufferizableOp(Operation *op) const { auto bufferizableOp = dyn_cast(op); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 4f621979a5e35a..d4b12bad308f0f 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -194,7 +194,7 @@ struct OneShotBufferizePass opt.promoteBufferResultsToOutParams = promoteBufferResultsToOutParams; opt.unknownTypeConversion = parseLayoutMapOption(unknownTypeConversion); - BufferizationOptions::OpFilterEntry::FilterFn filterFn = + OpFilter::Entry::FilterFn filterFn = [&](Operation *op) { // Filter may be specified via options. if (this->dialectFilter.hasValue()) @@ -204,7 +204,7 @@ struct OneShotBufferizePass // No filter specified: All other ops are allowed. return true; }; - opt.allowOperationInFilter(filterFn); + opt.opFilter.allowOperation(filterFn); } else { opt = *options; } @@ -242,7 +242,7 @@ struct BufferizationBufferizePass : public BufferizationBufferizeBase { void runOnOperation() override { BufferizationOptions options = getPartialBufferizationOptions(); - options.allowDialectInFilter(); + options.opFilter.allowDialect(); if (failed(bufferizeOp(getOperation(), options))) signalPassFailure(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp index 143cef1ce5cf07..63433ef50fe2ed 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -28,7 +28,7 @@ namespace { struct LinalgBufferizePass : public LinalgBufferizeBase { void runOnOperation() override { BufferizationOptions options = getPartialBufferizationOptions(); - options.allowDialectInFilter(); + options.opFilter.allowDialect(); if (failed(bufferizeOp(getOperation(), options))) signalPassFailure(); diff --git a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp index b84c3b7e8a4704..373552801673c8 100644 --- a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp @@ -23,7 +23,7 @@ namespace { struct ShapeBufferizePass : public ShapeBufferizeBase { void runOnOperation() override { BufferizationOptions options = getPartialBufferizationOptions(); - options.allowDialectInFilter(); + options.opFilter.allowDialect(); if (failed(bufferizeOp(getOperation(), options))) signalPassFailure(); diff --git a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp index c9233989969129..75bfc878b92269 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp @@ -30,7 +30,7 @@ namespace { struct TensorBufferizePass : public TensorBufferizeBase { void runOnOperation() override { BufferizationOptions options = getPartialBufferizationOptions(); - options.allowDialectInFilter(); + options.opFilter.allowDialect(); if (failed(bufferizeOp(getOperation(), options))) signalPassFailure(); diff --git a/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp index 4ed2dd629c1b95..f98eeda1bde538 100644 --- a/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp @@ -27,7 +27,7 @@ namespace { struct VectorBufferizePass : public VectorBufferizeBase { void runOnOperation() override { BufferizationOptions options = getPartialBufferizationOptions(); - options.allowDialectInFilter(); + options.opFilter.allowDialect(); if (failed(bufferizeOp(getOperation(), options))) signalPassFailure(); From 3490aadf56c90d42bbf974d84fc168c5fb78cd37 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 28 May 2022 04:36:24 +0200 Subject: [PATCH 748/908] [mlir][bufferization][NFC] Remove post-analysis step infrastructure Now that analysis and bufferization are better separated, post-analysis steps are no longer needed. Users can directly interleave analysis and bufferization as needed. Differential Revision: https://reviews.llvm.org/D126571 --- .../IR/BufferizableOpInterface.h | 3 + .../Transforms/OneShotAnalysis.h | 27 ++------ .../Bufferization/Transforms/Bufferize.cpp | 7 ++ .../Transforms/OneShotAnalysis.cpp | 25 ++++--- .../Transforms/OneShotModuleBufferize.cpp | 66 +++++-------------- 5 files changed, 48 insertions(+), 80 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index 94fc30bafef9eb..675959ea486b52 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -409,6 +409,9 @@ class AnalysisState { /// Return true if `v1` and `v2` bufferize to equivalent buffers. virtual bool areEquivalentBufferizedValues(Value v1, Value v2) const = 0; + /// Return true if `v1` and `v2` may bufferize to aliasing buffers. + virtual bool areAliasingBufferizedValues(Value v1, Value v2) const = 0; + /// Return `true` if the given tensor has undefined contents. virtual bool hasUndefinedContents(OpOperand *opOperand) const = 0; diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h index ae2ca6eeccbcb2..8d44d579fbc8db 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h @@ -19,30 +19,10 @@ struct OneShotBufferizationOptions; class BufferizationAliasInfo; class OneShotAnalysisState; -/// PostAnalysisStepFns can be registered with `BufferizationOptions` and are -/// executed after the analysis, but before bufferization. They can be used to -/// implement custom dialect-specific optimizations. They may modify the IR, but -/// must keep `aliasInfo` consistent. Newly created operations and operations -/// that should be re-analyzed must be added to `newOps`. -using PostAnalysisStepFn = std::function &)>; - -using PostAnalysisStepList = SmallVector; - /// Options for analysis-enabled bufferization. struct OneShotBufferizationOptions : public BufferizationOptions { OneShotBufferizationOptions() = default; - /// Register a "post analysis" step. Such steps are executed after the - /// analysis, but before bufferization. - void addPostAnalysisStep(PostAnalysisStepFn fn) { - postAnalysisSteps.push_back(fn); - } - - /// Registered post analysis steps. - PostAnalysisStepList postAnalysisSteps; - /// Specifies whether returning newly allocated memrefs should be allowed. /// Otherwise, a pass failure is triggered. bool allowReturnAllocs = false; @@ -165,6 +145,9 @@ class OneShotAnalysisState : public AnalysisState { /// Return true if `v1` and `v2` bufferize to equivalent buffers. bool areEquivalentBufferizedValues(Value v1, Value v2) const override; + /// Return true if `v1` and `v2` may bufferize to aliasing buffers. + bool areAliasingBufferizedValues(Value v1, Value v2) const override; + /// Return `true` if the given tensor has undefined contents. bool hasUndefinedContents(OpOperand *opOperand) const override; @@ -180,6 +163,10 @@ class OneShotAnalysisState : public AnalysisState { /// `yieldedTensors`. Also include all aliasing tensors in the same block. void gatherYieldedTensors(Operation *op); + /// Return true if the buffer of the given tensor value is written to. Must + /// not be called for values inside not yet analyzed functions. + bool isValueWritten(Value value) const; + private: /// `aliasInfo` keeps track of aliasing and equivalent values. Only internal /// functions and `runOneShotBufferize` may access this object. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index d4b12bad308f0f..7e63193aaf9536 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -503,6 +503,13 @@ class AlwaysCopyAnalysisState : public AnalysisState { return false; } + /// Return true if `v1` and `v2` may bufferize to aliasing buffers. + bool areAliasingBufferizedValues(Value v1, Value v2) const override { + // There is no analysis, so we do not know if the values are equivalent. The + // conservative answer is "true". + return true; + } + /// Return `true` if the given tensor has undefined contents. bool hasUndefinedContents(OpOperand *opOperand) const override { // There is no analysis, so the conservative answer is "false". diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp index dd43a51b16526b..f29d9a96b0b37a 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp @@ -216,6 +216,11 @@ bool OneShotAnalysisState::areEquivalentBufferizedValues(Value v1, return aliasInfo.areEquivalentBufferizedValues(v1, v2); } +bool OneShotAnalysisState::areAliasingBufferizedValues(Value v1, + Value v2) const { + return aliasInfo.areAliasingBufferizedValues(v1, v2); +} + // Gather yielded tensors in `yieldedTensors` by querying all aliases. This is // to ensure that such information is available during bufferization time. // Alias information can no longer be queried through BufferizationAliasInfo @@ -290,6 +295,16 @@ bool OneShotAnalysisState::isTensorYielded(Value tensor) const { return yieldedTensors.contains(tensor); } +bool OneShotAnalysisState::isValueWritten(Value value) const { + bool isWritten = false; + aliasInfo.applyOnAliases(value, [&](Value val) { + for (OpOperand &use : val.getUses()) + if (isInPlace(use) && bufferizesToMemoryWrite(use)) + isWritten = true; + }); + return isWritten; +} + //===----------------------------------------------------------------------===// // Bufferization-specific alias analysis. //===----------------------------------------------------------------------===// @@ -935,16 +950,6 @@ LogicalResult bufferization::analyzeOp(Operation *op, return failure(); equivalenceAnalysis(op, aliasInfo, state); - for (const PostAnalysisStepFn &fn : options.postAnalysisSteps) { - SmallVector newOps; - if (failed(fn(op, state, aliasInfo, newOps))) - return failure(); - // Analyze ops that were created by the PostAnalysisStepFn. - if (failed(inPlaceAnalysis(newOps, aliasInfo, state, domInfo))) - return failure(); - equivalenceAnalysis(newOps, aliasInfo, state); - } - bool failedAnalysis = false; if (!options.allowReturnAllocs) { SmallVector newOps; diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index c9b941c6a34ce2..b93022fd6bebb2 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -16,7 +16,7 @@ // respective callers. // // After analyzing a FuncOp, additional information about its bbArgs is -// gathered through PostAnalysisStepFns and stored in `FuncAnalysisState`. +// gathered and stored in `FuncAnalysisState`. // // * `aliasingFuncOpBBArgsAnalysis` determines the equivalent/aliasing bbArgs // for @@ -155,14 +155,11 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, /// Store function BlockArguments that are equivalent to/aliasing a returned /// value in FuncAnalysisState. -static LogicalResult -aliasingFuncOpBBArgsAnalysis(Operation *op, AnalysisState &state, - BufferizationAliasInfo &aliasInfo, - SmallVector &newOps) { +static LogicalResult aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, + OneShotAnalysisState &state) { FuncAnalysisState &funcState = getFuncAnalysisState(state); // Support only single return-terminated block in the function. - auto funcOp = cast(op); func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); assert(returnOp && "expected func with single return op"); @@ -172,12 +169,12 @@ aliasingFuncOpBBArgsAnalysis(Operation *op, AnalysisState &state, if (bbArg.getType().isa()) { int64_t returnIdx = returnVal.getOperandNumber(); int64_t bbArgIdx = bbArg.getArgNumber(); - if (aliasInfo.areEquivalentBufferizedValues(returnVal.get(), bbArg)) { + if (state.areEquivalentBufferizedValues(returnVal.get(), bbArg)) { funcState.equivalentFuncArgs[funcOp][returnIdx] = bbArgIdx; if (state.getOptions().testAnalysisOnly) annotateEquivalentReturnBbArg(returnVal, bbArg); } - if (aliasInfo.areAliasingBufferizedValues(returnVal.get(), bbArg)) { + if (state.areAliasingBufferizedValues(returnVal.get(), bbArg)) { funcState.aliasingFuncArgs[funcOp][returnIdx].push_back(bbArgIdx); funcState.aliasingReturnVals[funcOp][bbArgIdx].push_back(returnIdx); } @@ -186,35 +183,6 @@ aliasingFuncOpBBArgsAnalysis(Operation *op, AnalysisState &state, return success(); } -/// Return true if the buffer of the given tensor value is written to. Must not -/// be called for values inside not yet analyzed functions. (Post-analysis -/// steps do not have to be run yet, i.e., "in progress" is also OK.) -static bool isValueWritten(Value value, const AnalysisState &state, - const BufferizationAliasInfo &aliasInfo) { -#ifndef NDEBUG - assert(value.getType().isa() && "expected TensorType"); - func::FuncOp funcOp; - if (auto bbArg = value.dyn_cast()) { - Operation *owner = bbArg.getOwner()->getParentOp(); - funcOp = isa(owner) ? cast(owner) - : owner->getParentOfType(); - } else { - funcOp = value.getDefiningOp()->getParentOfType(); - } - assert(getFuncOpAnalysisState(state, funcOp) != - FuncOpAnalysisState::NotAnalyzed && - "FuncOp must be fully analyzed or analysis in progress"); -#endif // NDEBUG - - bool isWritten = false; - aliasInfo.applyOnAliases(value, [&](Value val) { - for (OpOperand &use : val.getUses()) - if (state.isInPlace(use) && state.bufferizesToMemoryWrite(use)) - isWritten = true; - }); - return isWritten; -} - static void annotateFuncArgAccess(func::FuncOp funcOp, BlockArgument bbArg, bool isRead, bool isWritten) { OpBuilder b(funcOp.getContext()); @@ -231,15 +199,12 @@ static void annotateFuncArgAccess(func::FuncOp funcOp, BlockArgument bbArg, funcOp.setArgAttr(bbArg.getArgNumber(), "bufferization.access", accessType); } -/// Determine which FuncOp bbArgs are read and which are written. If this -/// PostAnalysisStepFn is run on a function with unknown ops, it will -/// conservatively assume that such ops bufferize to a read + write. -static LogicalResult -funcOpBbArgReadWriteAnalysis(Operation *op, AnalysisState &state, - BufferizationAliasInfo &aliasInfo, - SmallVector &newOps) { +/// Determine which FuncOp bbArgs are read and which are written. When run on a +/// function with unknown ops, we conservatively assume that such ops bufferize +/// to a read + write. +static LogicalResult funcOpBbArgReadWriteAnalysis(FuncOp funcOp, + OneShotAnalysisState &state) { FuncAnalysisState &funcState = getFuncAnalysisState(state); - auto funcOp = cast(op); // If the function has no body, conservatively assume that all args are // read + written. @@ -256,7 +221,7 @@ funcOpBbArgReadWriteAnalysis(Operation *op, AnalysisState &state, if (!bbArg.getType().isa()) continue; bool isRead = state.isValueRead(bbArg); - bool isWritten = isValueWritten(bbArg, state, aliasInfo); + bool isWritten = state.isValueWritten(bbArg); if (state.getOptions().testAnalysisOnly) annotateFuncArgAccess(funcOp, bbArg, isRead, isWritten); if (isRead) @@ -434,10 +399,6 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap))) return failure(); - // Collect bbArg/return value information after the analysis. - options.addPostAnalysisStep(aliasingFuncOpBBArgsAnalysis); - options.addPostAnalysisStep(funcOpBbArgReadWriteAnalysis); - // Analyze ops. for (func::FuncOp funcOp : orderedFuncOps) { // No body => no analysis. @@ -454,6 +415,11 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( if (failed(analyzeOp(funcOp, analysisState))) return failure(); + // Run some extra function analyses. + if (failed(aliasingFuncOpBBArgsAnalysis(funcOp, analysisState)) || + failed(funcOpBbArgReadWriteAnalysis(funcOp, analysisState))) + return failure(); + // Mark op as fully analyzed. funcState.analyzedFuncOps[funcOp] = FuncOpAnalysisState::Analyzed; From f470f8cbcefd4a74c654b0e2a2005a62c94047de Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 28 May 2022 04:42:47 +0200 Subject: [PATCH 749/908] [mlir][bufferize][NFC] Split analysis+bufferization of ModuleBufferization Analysis and bufferization can now be run separately. Differential Revision: https://reviews.llvm.org/D126572 --- .../Transforms/OneShotModuleBufferize.h | 11 ++++ .../Transforms/OneShotModuleBufferize.cpp | 58 ++++++++++++++----- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h index 5f42e14ce9d46d..367eddeeb45c75 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h @@ -15,8 +15,19 @@ struct LogicalResult; class ModuleOp; namespace bufferization { +struct BufferizationState; +class OneShotAnalysisState; struct OneShotBufferizationOptions; +/// Analyze `moduleOp` and its nested ops. Bufferization decisions are stored in +/// `state`. +LogicalResult analyzeModuleOp(ModuleOp moduleOp, OneShotAnalysisState &state); + +/// Bufferize `op` and its nested ops that implement `BufferizableOpInterface`. +/// Whether buffer copies are needed or not is queried from the given state. +LogicalResult bufferizeModuleOp(ModuleOp moduleOp, + const OneShotAnalysisState &analysisState); + /// Run One-Shot Module Bufferization on the given module. Performs a simple /// function call analysis to determine which function arguments are /// inplaceable. Then analyzes and bufferizes FuncOps one-by-one with One-Shot diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index b93022fd6bebb2..e358979868403e 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -380,15 +380,15 @@ static void foldMemRefCasts(func::FuncOp funcOp) { funcOp.setType(newFuncType); } -LogicalResult mlir::bufferization::runOneShotModuleBufferize( - ModuleOp moduleOp, OneShotBufferizationOptions options) { +LogicalResult +mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, + OneShotAnalysisState &state) { + OneShotBufferizationOptions options = + static_cast(state.getOptions()); assert(options.bufferizeFunctionBoundaries && "expected that function boundary bufferization is activated"); - IRRewriter rewriter(moduleOp.getContext()); - OneShotAnalysisState analysisState(moduleOp, options); - BufferizationState bufferizationState(analysisState); - FuncAnalysisState &funcState = getFuncAnalysisState(analysisState); - BufferizationAliasInfo &aliasInfo = analysisState.getAliasInfo(); + FuncAnalysisState &funcState = getFuncAnalysisState(state); + BufferizationAliasInfo &aliasInfo = state.getAliasInfo(); // A list of functions in the order in which they are analyzed + bufferized. SmallVector orderedFuncOps; @@ -412,12 +412,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( equivalenceAnalysis(funcOp, aliasInfo, funcState); // Analyze funcOp. - if (failed(analyzeOp(funcOp, analysisState))) + if (failed(analyzeOp(funcOp, state))) return failure(); // Run some extra function analyses. - if (failed(aliasingFuncOpBBArgsAnalysis(funcOp, analysisState)) || - failed(funcOpBbArgReadWriteAnalysis(funcOp, analysisState))) + if (failed(aliasingFuncOpBBArgsAnalysis(funcOp, state)) || + failed(funcOpBbArgReadWriteAnalysis(funcOp, state))) return failure(); // Mark op as fully analyzed. @@ -425,11 +425,29 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( // Add annotations to function arguments. if (options.testAnalysisOnly) - annotateOpsWithBufferizationMarkers(funcOp, analysisState); + annotateOpsWithBufferizationMarkers(funcOp, state); } - if (options.testAnalysisOnly) - return success(); + return success(); +} + +LogicalResult mlir::bufferization::bufferizeModuleOp( + ModuleOp moduleOp, const OneShotAnalysisState &analysisState) { + auto const &options = static_cast( + analysisState.getOptions()); + assert(options.bufferizeFunctionBoundaries && + "expected that function boundary bufferization is activated"); + IRRewriter rewriter(moduleOp.getContext()); + BufferizationState bufferizationState(analysisState); + + // A list of functions in the order in which they are analyzed + bufferized. + SmallVector orderedFuncOps; + + // A mapping of FuncOps to their callers. + FuncCallerMap callerMap; + + if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap))) + return failure(); // Bufferize functions. for (func::FuncOp funcOp : orderedFuncOps) { @@ -466,3 +484,17 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( return success(); } + +LogicalResult mlir::bufferization::runOneShotModuleBufferize( + ModuleOp moduleOp, OneShotBufferizationOptions options) { + assert(options.bufferizeFunctionBoundaries && + "expected that function boundary bufferization is activated"); + OneShotAnalysisState analysisState(moduleOp, options); + if (failed(analyzeModuleOp(moduleOp, analysisState))) + return failure(); + if (options.testAnalysisOnly) + return success(); + if (failed(bufferizeModuleOp(moduleOp, analysisState))) + return failure(); + return success(); +} From 2f0a634c5e80ca8625dfb6f505ef7947309078f9 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 28 May 2022 04:48:36 +0200 Subject: [PATCH 750/908] [mlir][bufferization] Add extra filter mechanism to bufferizeOp Differential Revision: https://reviews.llvm.org/D126569 --- .../Bufferization/Transforms/Bufferize.h | 5 +-- .../Bufferization/Transforms/Bufferize.cpp | 31 ++++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h index 94fc36a7387cf6..bb39073ae379a4 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h @@ -27,6 +27,7 @@ namespace bufferization { class AnalysisState; struct BufferizationState; struct BufferizationOptions; +class OpFilter; /// A helper type converter class that automatically populates the relevant /// materializations and type conversions for bufferization. @@ -84,8 +85,8 @@ BufferizationOptions getPartialBufferizationOptions(); /// Reuse an existing `BufferizationState`. /// /// Note: This function overload is useful for extending the bufferization. -LogicalResult bufferizeOp(Operation *op, - BufferizationState &bufferizationState); +LogicalResult bufferizeOp(Operation *op, BufferizationState &bufferizationState, + const OpFilter *opFilter = nullptr); /// Finalize all buffer allocations: Create alloc/dealloc ops as specified by /// the bufferization options. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 7e63193aaf9536..3e86637c2cf9dd 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -345,9 +345,10 @@ class BufferizationRewriter : public IRRewriter { public: BufferizationRewriter(MLIRContext *ctx, DenseSet &erasedOps, DenseSet &toMemrefOps, - const BufferizationOptions &options) + const BufferizationOptions &options, + const OpFilter *opFilter) : IRRewriter(ctx), erasedOps(erasedOps), toMemrefOps(toMemrefOps), - options(options) {} + options(options), opFilter(opFilter) {} protected: void notifyOperationRemoved(Operation *op) override { @@ -370,10 +371,18 @@ class BufferizationRewriter : public IRRewriter { if (isa(op)) return; + // Skip non-tensor ops. + if (!hasTensorSemantics(op)) + return; + + // Skip ops that are not allowed. + if (!options.isOpAllowed(op) || (opFilter && !opFilter->isOpAllowed(op))) + return; + // Adding new bufferizable ops is not allowed during bufferization. Such ops // would not be analyzed and can lead to surprising behavior. - assert((!hasTensorSemantics(op) || !options.isOpAllowed(op)) && - "creating new tensor ops is not allowed during bufferization"); + llvm_unreachable( + "creating new tensor ops is not allowed during bufferization"); } private: @@ -387,12 +396,14 @@ class BufferizationRewriter : public IRRewriter { /// Used for debug modes. LLVM_ATTRIBUTE_UNUSED const BufferizationOptions &options; + + const OpFilter *opFilter; }; } // namespace -LogicalResult -bufferization::bufferizeOp(Operation *op, - BufferizationState &bufferizationState) { +LogicalResult bufferization::bufferizeOp(Operation *op, + BufferizationState &bufferizationState, + const OpFilter *opFilter) { const auto &options = bufferizationState.getOptions(); assert(options.unknownTypeConversion != BufferizationOptions::LayoutMapOption::InferLayoutMap && @@ -420,7 +431,7 @@ bufferization::bufferizeOp(Operation *op, // Bufferize all ops. BufferizationRewriter rewriter(op->getContext(), erasedOps, toMemrefOps, - bufferizationState.getOptions()); + bufferizationState.getOptions(), opFilter); for (unsigned i = 0; i < worklist.size(); ++i) { Operation *op = worklist[i]; // Skip ops that were erased. @@ -430,6 +441,8 @@ bufferization::bufferizeOp(Operation *op, auto bufferizableOp = options.dynCastBufferizableOp(op); if (!bufferizableOp) continue; + if (opFilter && !opFilter->isOpAllowed(op)) + continue; // Skip ops that no longer have tensor semantics. if (!hasTensorSemantics(op)) continue; @@ -462,6 +475,8 @@ bufferization::bufferizeOp(Operation *op, // Continue ops that are not allowed. if (!options.isOpAllowed(op)) continue; + if (opFilter && !opFilter->isOpAllowed(op)) + continue; // Ops without any uses and no side effects will fold away. if (op->getUses().empty() && MemoryEffectOpInterface::hasNoEffect(op)) continue; From a27b9139ab1bbdc93afaf240de2c2b9e45092884 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 27 May 2022 18:22:38 -0700 Subject: [PATCH 751/908] [scudo] Clean up Zircon header file uses Make fuchsia.h and fuchsia.cpp each include what they use. --- compiler-rt/lib/scudo/standalone/fuchsia.cpp | 1 + compiler-rt/lib/scudo/standalone/fuchsia.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/scudo/standalone/fuchsia.cpp b/compiler-rt/lib/scudo/standalone/fuchsia.cpp index 3b473bc9e22da2..088f077b4d9ebd 100644 --- a/compiler-rt/lib/scudo/standalone/fuchsia.cpp +++ b/compiler-rt/lib/scudo/standalone/fuchsia.cpp @@ -17,6 +17,7 @@ #include // for sync_mutex_t #include // for getenv() #include +#include #include #include diff --git a/compiler-rt/lib/scudo/standalone/fuchsia.h b/compiler-rt/lib/scudo/standalone/fuchsia.h index d6993f892140e1..c1dfd7638ec537 100644 --- a/compiler-rt/lib/scudo/standalone/fuchsia.h +++ b/compiler-rt/lib/scudo/standalone/fuchsia.h @@ -13,7 +13,8 @@ #if SCUDO_FUCHSIA -#include +#include +#include namespace scudo { From 068b8af7961caafadeffd1a99a103ef1d7a36e21 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 27 May 2022 22:03:48 -0700 Subject: [PATCH 752/908] [ARM][AArch64] Change -mharden-sls= to use err_drv_unsupported_option_argument Update the diagnostic in D81404: the convention is to use err_drv_unsupported_option_argument instead of adding a new diagnostic for every option. Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D126511 --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 2 -- clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 4 ++-- clang/lib/Driver/ToolChains/Arch/ARM.cpp | 4 ++-- clang/test/Driver/sls-hardening-options.c | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 3cc9565e6e8b27..ef08198273a168 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -428,8 +428,6 @@ def err_invalid_branch_protection: Error < "invalid branch protection option '%0' in '%1'">; def warn_unsupported_branch_protection: Warning < "invalid branch protection option '%0' in '%1'">, InGroup; -def err_invalid_sls_hardening : Error< - "invalid sls hardening option '%0' in '%1'">; def err_sls_hardening_arm_not_supported : Error< "-mharden-sls is only supported on armv7-a or later">; diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index d82198ec678bf9..b17743b728d923 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -324,8 +324,8 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, DisableComdat = true; continue; } - D.Diag(diag::err_invalid_sls_hardening) - << Scope << A->getAsString(Args); + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Scope; break; } } diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 68e8dc2e5f052f..dc6b35e39cfdd7 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -868,8 +868,8 @@ void arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple, DisableComdat = true; continue; } - D.Diag(diag::err_invalid_sls_hardening) - << Scope << A->getAsString(Args); + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Scope; break; } } diff --git a/clang/test/Driver/sls-hardening-options.c b/clang/test/Driver/sls-hardening-options.c index cb35d8b5f99d05..4deb74e66f1de2 100644 --- a/clang/test/Driver/sls-hardening-options.c +++ b/clang/test/Driver/sls-hardening-options.c @@ -99,7 +99,7 @@ // NOCOMDAT-OFF-NOT: "harden-sls-nocomdat" // NOCOMDAT: "+harden-sls-nocomdat" -// BAD-SLS-SPEC: invalid sls hardening option '{{[^']+}}' in '-mharden-sls= +// BAD-SLS-SPEC: unsupported argument '{{[^']+}}' to option '-mharden-sls=' // RUN: %clang -target armv6a--none-eabi -c %s -### -mharden-sls=all 2>&1 | \ // RUN: FileCheck %s --check-prefix=SLS-NOT-SUPPORTED From 3b4500014a481f0de300f42e1e59a8137d136ed1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 27 May 2022 22:28:39 -0700 Subject: [PATCH 753/908] [Driver] Replace err_invalid_branch_protection with err_drv_unsupported_option_argument The convention is to use err_drv_unsupported_option_argument instead of adding a new diagnostic for every option. --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 2 -- clang/lib/Driver/ToolChains/Clang.cpp | 11 +++++------ clang/test/Driver/aarch64-security-options.c | 8 ++++---- clang/test/Driver/arm-security-options.c | 6 +++--- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index ef08198273a168..9f4864a33a33b4 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -424,8 +424,6 @@ def warn_ignoring_verify_debuginfo_preserve_export : Warning< "ignoring -fverify-debuginfo-preserve-export=%0 because " "-fverify-debuginfo-preserve wasn't enabled">, InGroup; -def err_invalid_branch_protection: Error < - "invalid branch protection option '%0' in '%1'">; def warn_unsupported_branch_protection: Warning < "invalid branch protection option '%0' in '%1'">, InGroup; def err_sls_hardening_arm_not_supported : Error< diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 367001ddd2d393..936682e49299e6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1693,18 +1693,17 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args, if (A->getOption().matches(options::OPT_msign_return_address_EQ)) { Scope = A->getValue(); - if (!Scope.equals("none") && !Scope.equals("non-leaf") && - !Scope.equals("all")) - D.Diag(diag::err_invalid_branch_protection) - << Scope << A->getAsString(Args); + if (Scope != "none" && Scope != "non-leaf" && Scope != "all") + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Scope; Key = "a_key"; IndirectBranches = false; } else { StringRef DiagMsg; llvm::ARM::ParsedBranchProtection PBP; if (!llvm::ARM::parseBranchProtection(A->getValue(), PBP, DiagMsg)) - D.Diag(diag::err_invalid_branch_protection) - << DiagMsg << A->getAsString(Args); + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << DiagMsg; if (!isAArch64 && PBP.Key == "b_key") D.Diag(diag::warn_unsupported_branch_protection) << "b-key" << A->getAsString(Args); diff --git a/clang/test/Driver/aarch64-security-options.c b/clang/test/Driver/aarch64-security-options.c index b4bb57e71aa795..2044dbd732ed9a 100644 --- a/clang/test/Driver/aarch64-security-options.c +++ b/clang/test/Driver/aarch64-security-options.c @@ -44,11 +44,11 @@ // CONFLICT: "-msign-return-address=none" -// BAD-RA-PROTECTION: invalid branch protection option 'foo' in '-msign-return-address={{.*}}' -// BAD-BP-PROTECTION: invalid branch protection option 'bar' in '-mbranch-protection={{.*}}' +// BAD-RA-PROTECTION: unsupported argument 'foo' to option '-msign-return-address=' +// BAD-BP-PROTECTION: unsupported argument 'bar' to option '-mbranch-protection=' -// BAD-B-KEY-COMBINATION: invalid branch protection option 'b-key' in '-mbranch-protection={{.*}}' -// BAD-LEAF-COMBINATION: invalid branch protection option 'leaf' in '-mbranch-protection={{.*}}' +// BAD-B-KEY-COMBINATION: unsupported argument 'b-key' to option '-mbranch-protection=' +// BAD-LEAF-COMBINATION: unsupported argument 'leaf' to option '-mbranch-protection=' // Check that the linker driver doesn't warn about -mbranch-protection=standard // as an unused option. diff --git a/clang/test/Driver/arm-security-options.c b/clang/test/Driver/arm-security-options.c index fc1b5da78d2cf6..b181f193707152 100644 --- a/clang/test/Driver/arm-security-options.c +++ b/clang/test/Driver/arm-security-options.c @@ -85,9 +85,9 @@ // BTE-OFF-NOT: "-mbranch-target-enforce" // BTE-ON: "-mbranch-target-enforce" -// BAD-BP-PROTECTION: invalid branch protection option 'bar' in '-mbranch-protection={{.*}}' +// BAD-BP-PROTECTION: unsupported argument 'bar' to option '-mbranch-protection=' -// BAD-B-KEY-COMBINATION: invalid branch protection option 'b-key' in '-mbranch-protection={{.*}}' -// BAD-LEAF-COMBINATION: invalid branch protection option 'leaf' in '-mbranch-protection={{.*}}' +// BAD-B-KEY-COMBINATION: unsupported argument 'b-key' to option '-mbranch-protection=' +// BAD-LEAF-COMBINATION: unsupported argument 'leaf' to option '-mbranch-protection=' // INCOMPATIBLE-ARCH: '-mbranch-protection=' option is incompatible with the '{{.*}}' architecture From d3d3e2528e60c93000c6d7f90053d1672cc7b47d Mon Sep 17 00:00:00 2001 From: Yuki Okushi Date: Fri, 27 May 2022 17:42:49 +0900 Subject: [PATCH 754/908] [CompilerInstance] Fix weird condition on `createCodeCompletionConsumer` Fixes llvm#53545 Differential Revision: https://reviews.llvm.org/D126524 --- clang/lib/Frontend/CompilerInstance.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index bb896449b621f8..0ec7964bc59939 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -710,13 +710,10 @@ static bool EnableCodeCompletion(Preprocessor &PP, void CompilerInstance::createCodeCompletionConsumer() { const ParsedSourceLocation &Loc = getFrontendOpts().CodeCompletionAt; if (!CompletionConsumer) { - setCodeCompletionConsumer( - createCodeCompletionConsumer(getPreprocessor(), - Loc.FileName, Loc.Line, Loc.Column, - getFrontendOpts().CodeCompleteOpts, - llvm::outs())); - if (!CompletionConsumer) - return; + setCodeCompletionConsumer(createCodeCompletionConsumer( + getPreprocessor(), Loc.FileName, Loc.Line, Loc.Column, + getFrontendOpts().CodeCompleteOpts, llvm::outs())); + return; } else if (EnableCodeCompletion(getPreprocessor(), Loc.FileName, Loc.Line, Loc.Column)) { setCodeCompletionConsumer(nullptr); From fad6e37995b461a7750bdc203aad37eca9532fd5 Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Fri, 27 May 2022 23:59:30 -0700 Subject: [PATCH 755/908] [Lex] Fix crash during dependency scanning while skipping an unmatched `#if` --- clang/lib/Lex/Lexer.cpp | 1 + .../ClangScanDeps/skipping-unmatched-if.c | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 clang/test/ClangScanDeps/skipping-unmatched-if.c diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index a0a7a6ae789b4b..d4601261b58bc6 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -4223,6 +4223,7 @@ bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) { } break; case pp_eof: + NextDepDirectiveTokenIndex = 0; return LexEndOfFile(Result, BufferEnd); } } while (!Stop); diff --git a/clang/test/ClangScanDeps/skipping-unmatched-if.c b/clang/test/ClangScanDeps/skipping-unmatched-if.c new file mode 100644 index 00000000000000..fec7857d6bfde2 --- /dev/null +++ b/clang/test/ClangScanDeps/skipping-unmatched-if.c @@ -0,0 +1,27 @@ +// Check dependency scanning when skipping an unmatched #if + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json + +// RUN: not clang-scan-deps -compilation-database %t/cdb.json 2>&1 | FileCheck %s +// CHECK: header1.h:1:2: error: unterminated conditional directive + +//--- cdb.json.template +[{ + "directory" : "DIR", + "command" : "clang -target x86_64-apple-macosx10.7 -c DIR/test.cpp -o DIR/test.o", + "file" : "DIR/test.o" +}] + +//--- test.cpp +#include "header1.h" +#include "header2.h" + +//--- header1.h +#if 0 + +//--- header2.h +#ifndef _HEADER2_H_ +#define _HEADER2_H_ +#endif From 34f73804ed60e90276e2303398162c2fc05f47ec Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sat, 21 May 2022 00:45:51 +0200 Subject: [PATCH 756/908] [libc++] Remove unused __functional includes Reviewed By: ldionne, #libc Spies: arichardson, smeenai, libcxx-commits, arphaman Differential Revision: https://reviews.llvm.org/D126098 --- libcxx/include/__algorithm/sort.h | 1 + libcxx/include/__format/format_arg.h | 2 ++ libcxx/include/experimental/coroutine | 1 + libcxx/include/ext/hash_map | 1 + libcxx/include/ext/hash_set | 1 + libcxx/include/iterator | 6 ------ libcxx/include/map | 2 ++ libcxx/include/memory | 8 -------- libcxx/include/queue | 1 + libcxx/include/set | 1 + libcxx/include/string | 7 +------ libcxx/include/string_view | 1 + libcxx/include/tuple | 6 ------ libcxx/include/typeindex | 5 ----- libcxx/include/unordered_map | 1 + libcxx/include/unordered_set | 1 + libcxx/include/variant | 4 ---- libcxx/include/vector | 7 +------ libcxx/test/libcxx/algorithms/debug_less.pass.cpp | 3 ++- libcxx/test/libcxx/algorithms/sort_stability.pass.cpp | 1 + .../alg.nonmodifying/alg.foreach/for_each_n.pass.cpp | 1 + .../concept.invocable/invocable.compile.pass.cpp | 1 + .../regular_invocable.compile.pass.cpp | 1 + .../forwardlist/forwardlist.ops/remove_if.pass.cpp | 1 + .../forwardlist/forwardlist.ops/unique_pred.pass.cpp | 3 ++- .../sequences/list/list.ops/unique_pred.pass.cpp | 1 + .../coroutine.handle/coroutine.handle.hash/hash.pass.cpp | 1 + .../move.iterators/move.iterator/types.pass.cpp | 1 + .../numerics/numeric.ops/reduce/reduce_init_op.pass.cpp | 1 + .../range.adaptors/range.join.view/sentinel/eq.pass.cpp | 1 + libcxx/test/std/ranges/range.adaptors/range.zip/types.h | 1 + libcxx/test/support/deduction_guides_sfinae_checks.h | 1 + 32 files changed, 31 insertions(+), 43 deletions(-) diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index a6ff5e3ac54ad5..b5eaf75055ff18 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -16,6 +16,7 @@ #include <__algorithm/unwrap_iter.h> #include <__bits> #include <__config> +#include <__functional/operations.h> #include <__utility/swap.h> #include #include diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index 3c61446378b533..3f2afc898d2c66 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -16,7 +16,9 @@ #include <__format/format_error.h> #include <__format/format_fwd.h> #include <__format/format_parse_context.h> +#include <__functional/invoke.h> #include <__memory/addressof.h> +#include <__utility/forward.h> #include <__utility/unreachable.h> #include <__variant/monostate.h> #include diff --git a/libcxx/include/experimental/coroutine b/libcxx/include/experimental/coroutine index 9fc9d19276fddd..111073bb6d6785 100644 --- a/libcxx/include/experimental/coroutine +++ b/libcxx/include/experimental/coroutine @@ -47,6 +47,7 @@ template struct hash>; #include <__assert> // all public C++ headers provide the assertion handler #include <__functional/hash.h> +#include <__functional/operations.h> #include #include #include // for hash diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map index ccc82ca8e33647..92572c08c24cc3 100644 --- a/libcxx/include/ext/hash_map +++ b/libcxx/include/ext/hash_map @@ -206,6 +206,7 @@ template #include <__hash_table> #include #include +#include #include #include diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set index a4aff6ece60edb..eb619394066828 100644 --- a/libcxx/include/ext/hash_set +++ b/libcxx/include/ext/hash_set @@ -197,6 +197,7 @@ template #include <__hash_table> #include #include +#include #if defined(__DEPRECATED) && __DEPRECATED #if defined(_LIBCPP_WARNING) diff --git a/libcxx/include/iterator b/libcxx/include/iterator index e98372527cc026..30c9a10139ffbc 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -724,12 +724,6 @@ template constexpr const E* data(initializer_list il) noexcept; #include // TODO: remove these headers -#include <__functional/binary_function.h> -#include <__functional/invoke.h> -#include <__functional/operations.h> -#include <__functional/reference_wrapper.h> -#include <__functional/unary_function.h> -#include <__functional/weak_result_type.h> #include <__memory/allocator_arg_t.h> #include <__memory/uses_allocator.h> #include diff --git a/libcxx/include/map b/libcxx/include/map index c6c8e8c09c4f3b..d48f3a25646ed9 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -532,7 +532,9 @@ erase_if(multimap& c, Predicate pred); // C++20 #include <__algorithm/lexicographical_compare.h> #include <__assert> // all public C++ headers provide the assertion handler #include <__config> +#include <__functional/binary_function.h> #include <__functional/is_transparent.h> +#include <__functional/operations.h> #include <__iterator/iterator_traits.h> #include <__node_handle> #include <__tree> diff --git a/libcxx/include/memory b/libcxx/include/memory index ffc674c2a3fbcc..1360f9da8be89f 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -872,14 +872,6 @@ template #include #include -// TODO: remove these headers -#include <__functional/binary_function.h> -#include <__functional/invoke.h> -#include <__functional/operations.h> -#include <__functional/reference_wrapper.h> -#include <__functional/unary_function.h> -#include <__functional/weak_result_type.h> - #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR) # include <__memory/auto_ptr.h> #endif diff --git a/libcxx/include/queue b/libcxx/include/queue index bd3a39f49436b0..da5d63077b26ba 100644 --- a/libcxx/include/queue +++ b/libcxx/include/queue @@ -222,6 +222,7 @@ template #include <__algorithm/push_heap.h> #include <__assert> // all public C++ headers provide the assertion handler #include <__config> +#include <__functional/operations.h> #include <__iterator/iterator_traits.h> #include <__memory/uses_allocator.h> #include <__utility/forward.h> diff --git a/libcxx/include/set b/libcxx/include/set index 6aeddc862b1479..be7af622880dec 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -476,6 +476,7 @@ erase_if(multiset& c, Predicate pred); // C++20 #include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__functional/is_transparent.h> +#include <__functional/operations.h> #include <__iterator/iterator_traits.h> #include <__node_handle> #include <__tree> diff --git a/libcxx/include/string b/libcxx/include/string index 1ccf4862bb50f7..e3fe06b50d2b99 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -524,6 +524,7 @@ basic_string operator "" s( const char32_t *str, size_t len ); #include <__config> #include <__debug> #include <__format/enable_insertable.h> +#include <__functional/unary_function.h> #include <__ios/fpos.h> #include <__iterator/wrap_iter.h> #include <__memory/allocate_at_least.h> @@ -547,12 +548,6 @@ basic_string operator "" s( const char32_t *str, size_t len ); #include // TODO: remove these headers -#include <__functional/binary_function.h> -#include <__functional/invoke.h> -#include <__functional/operations.h> -#include <__functional/reference_wrapper.h> -#include <__functional/unary_function.h> -#include <__functional/weak_result_type.h> #include #include diff --git a/libcxx/include/string_view b/libcxx/include/string_view index bd31b8739d4820..776baf291202d4 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -199,6 +199,7 @@ namespace std { #include <__algorithm/min.h> #include <__assert> // all public C++ headers provide the assertion handler #include <__config> +#include <__functional/unary_function.h> #include <__fwd/string_view.h> #include <__ranges/concepts.h> #include <__ranges/data.h> diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 76297dd76e5b25..cc08d8c2af3264 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -185,12 +185,6 @@ template #include // TODO: remove these headers -#include <__functional/binary_function.h> -#include <__functional/invoke.h> -#include <__functional/operations.h> -#include <__functional/reference_wrapper.h> -#include <__functional/unary_function.h> -#include <__functional/weak_result_type.h> #include #include #include diff --git a/libcxx/include/typeindex b/libcxx/include/typeindex index 0b61f28b96eba6..8da8f58c53b5a5 100644 --- a/libcxx/include/typeindex +++ b/libcxx/include/typeindex @@ -52,11 +52,6 @@ struct hash #include // TODO: remove these headers -#include <__functional/binary_function.h> -#include <__functional/invoke.h> -#include <__functional/operations.h> -#include <__functional/reference_wrapper.h> -#include <__functional/weak_result_type.h> #include <__memory/allocator_arg_t.h> #include <__memory/uses_allocator.h> #include diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index e5741b054660d6..16b843b944aef5 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -519,6 +519,7 @@ template #include <__config> #include <__debug> #include <__functional/is_transparent.h> +#include <__functional/operations.h> #include <__hash_table> #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index de0e12197d16a1..890475f36f7a86 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -464,6 +464,7 @@ template #include <__config> #include <__debug> #include <__functional/is_transparent.h> +#include <__functional/operations.h> #include <__hash_table> #include <__memory/addressof.h> #include <__node_handle> diff --git a/libcxx/include/variant b/libcxx/include/variant index b1800ed63b965e..16fcede8756ffa 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -221,10 +221,6 @@ namespace std { #include // TODO: remove these headers -#include <__functional/binary_function.h> -#include <__functional/invoke.h> -#include <__functional/reference_wrapper.h> -#include <__functional/weak_result_type.h> #include <__memory/allocator_arg_t.h> #include <__memory/uses_allocator.h> #include diff --git a/libcxx/include/vector b/libcxx/include/vector index 5238ac58481db9..d327e2827938e0 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -285,6 +285,7 @@ erase_if(vector& c, Predicate pred); // C++20 #include <__debug> #include <__format/enable_insertable.h> #include <__functional/hash.h> +#include <__functional/unary_function.h> #include <__iterator/iterator_traits.h> #include <__iterator/wrap_iter.h> #include <__memory/allocate_at_least.h> @@ -305,12 +306,6 @@ erase_if(vector& c, Predicate pred); // C++20 #include // TODO: remove these headers -#include <__functional/binary_function.h> -#include <__functional/invoke.h> -#include <__functional/operations.h> -#include <__functional/reference_wrapper.h> -#include <__functional/unary_function.h> -#include <__functional/weak_result_type.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/test/libcxx/algorithms/debug_less.pass.cpp b/libcxx/test/libcxx/algorithms/debug_less.pass.cpp index 05ba0f19643593..6caad3ad1176ba 100644 --- a/libcxx/test/libcxx/algorithms/debug_less.pass.cpp +++ b/libcxx/test/libcxx/algorithms/debug_less.pass.cpp @@ -17,8 +17,9 @@ // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG=1 #include -#include #include +#include +#include #include "test_macros.h" #include "check_assertion.h" diff --git a/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp b/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp index 54563a924c24d3..8b3ec2b6865244 100644 --- a/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp +++ b/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp index 45ec2f4258812b..6eb5c247a211e0 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp @@ -16,6 +16,7 @@ #include #include +#include #include "test_macros.h" #include "test_iterators.h" diff --git a/libcxx/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp index f5a00f86d9ac82..9b9bac27174e0f 100644 --- a/libcxx/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp +++ b/libcxx/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/libcxx/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp index 4d546054e6d9af..bfd20751861d12 100644 --- a/libcxx/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp +++ b/libcxx/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp index 589ae66f4364d5..7142c8afe77eb5 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include "test_macros.h" #include "min_allocator.h" diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp index ce0e1efa3cf665..bfadd6febc017e 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp @@ -11,9 +11,10 @@ // template void unique(BinaryPredicate binary_pred); // C++17 and before // template size_type unique(BinaryPredicate binary_pred); // C++20 and after +#include #include +#include #include -#include #include "test_macros.h" #include "min_allocator.h" diff --git a/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp index 75f8c7b12571c4..8139ddd36c192e 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp @@ -13,6 +13,7 @@ #include #include +#include #include "test_macros.h" #include "min_allocator.h" diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.hash/hash.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.hash/hash.pass.cpp index 06ada6a545766d..0fd56b3c754482 100644 --- a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.hash/hash.pass.cpp +++ b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.hash/hash.pass.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include "test_macros.h" diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/types.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/types.pass.cpp index f262b4d29eebe4..5676564a8446f3 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/types.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/types.pass.cpp @@ -24,6 +24,7 @@ // using reference = iter_rvalue_reference_t; // Until C++20, value_type&& // }; +#include #include #include diff --git a/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp b/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp index a51519ed71db37..347d3755a07cf0 100644 --- a/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp @@ -16,6 +16,7 @@ #include #include +#include #include "test_macros.h" #include "test_iterators.h" diff --git a/libcxx/test/std/ranges/range.adaptors/range.join.view/sentinel/eq.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.join.view/sentinel/eq.pass.cpp index 7f1ce0b45250c6..c85f52e92adc33 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.join.view/sentinel/eq.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.join.view/sentinel/eq.pass.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include "../types.h" diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/types.h b/libcxx/test/std/ranges/range.adaptors/range.zip/types.h index 662b87577675d4..47c88e9787feab 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/types.h @@ -9,6 +9,7 @@ #ifndef TEST_STD_RANGES_RANGE_ADAPTORS_RANGE_ZIP_TYPES_H #define TEST_STD_RANGES_RANGE_ADAPTORS_RANGE_ZIP_TYPES_H +#include #include #include "test_macros.h" diff --git a/libcxx/test/support/deduction_guides_sfinae_checks.h b/libcxx/test/support/deduction_guides_sfinae_checks.h index f1f31c8e8c3848..a4910a1e8ad634 100644 --- a/libcxx/test/support/deduction_guides_sfinae_checks.h +++ b/libcxx/test/support/deduction_guides_sfinae_checks.h @@ -9,6 +9,7 @@ #ifndef TEST_SUPPORT_DEDUCTION_GUIDES_SFINAE_CHECKS_H #define TEST_SUPPORT_DEDUCTION_GUIDES_SFINAE_CHECKS_H +#include #include #include #include From 30c37fb89cb7d6cc02e92a7e81abcec5a1a4a38a Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 26 May 2022 11:49:01 +0200 Subject: [PATCH 757/908] [libc++] Granularize more of Reviewed By: ldionne, #libc Spies: libcxx-commits, mgorny Differential Revision: https://reviews.llvm.org/D126244 --- libcxx/include/CMakeLists.txt | 28 + libcxx/include/__type_traits/add_const.h | 30 + libcxx/include/__type_traits/add_cv.h | 30 + .../__type_traits/add_lvalue_reference.h | 33 + .../__type_traits/add_rvalue_reference.h | 33 + libcxx/include/__type_traits/add_volatile.h | 30 + libcxx/include/__type_traits/extent.h | 55 ++ libcxx/include/__type_traits/is_abstract.h | 31 + libcxx/include/__type_traits/is_aggregate.h | 33 + libcxx/include/__type_traits/is_arithmetic.h | 34 + .../include/__type_traits/is_bounded_array.h | 35 + libcxx/include/__type_traits/is_class.h | 48 ++ libcxx/include/__type_traits/is_compound.h | 47 ++ libcxx/include/__type_traits/is_empty.h | 57 ++ libcxx/include/__type_traits/is_enum.h | 55 ++ libcxx/include/__type_traits/is_final.h | 36 ++ libcxx/include/__type_traits/is_fundamental.h | 53 ++ .../include/__type_traits/is_member_pointer.h | 45 ++ libcxx/include/__type_traits/is_object.h | 52 ++ libcxx/include/__type_traits/is_pointer.h | 59 ++ libcxx/include/__type_traits/is_scalar.h | 62 ++ libcxx/include/__type_traits/is_signed.h | 55 ++ .../__type_traits/is_unbounded_array.h | 34 + libcxx/include/__type_traits/is_union.h | 42 ++ libcxx/include/__type_traits/is_unsigned.h | 59 ++ libcxx/include/__type_traits/rank.h | 36 ++ .../__type_traits/remove_all_extents.h | 34 + libcxx/include/__type_traits/remove_pointer.h | 32 + libcxx/include/__type_traits/type_identity.h | 33 + libcxx/include/module.modulemap | 28 + libcxx/include/type_traits | 600 +----------------- libcxx/test/libcxx/private_headers.verify.cpp | 28 + 32 files changed, 1295 insertions(+), 572 deletions(-) create mode 100644 libcxx/include/__type_traits/add_const.h create mode 100644 libcxx/include/__type_traits/add_cv.h create mode 100644 libcxx/include/__type_traits/add_lvalue_reference.h create mode 100644 libcxx/include/__type_traits/add_rvalue_reference.h create mode 100644 libcxx/include/__type_traits/add_volatile.h create mode 100644 libcxx/include/__type_traits/extent.h create mode 100644 libcxx/include/__type_traits/is_abstract.h create mode 100644 libcxx/include/__type_traits/is_aggregate.h create mode 100644 libcxx/include/__type_traits/is_arithmetic.h create mode 100644 libcxx/include/__type_traits/is_bounded_array.h create mode 100644 libcxx/include/__type_traits/is_class.h create mode 100644 libcxx/include/__type_traits/is_compound.h create mode 100644 libcxx/include/__type_traits/is_empty.h create mode 100644 libcxx/include/__type_traits/is_enum.h create mode 100644 libcxx/include/__type_traits/is_final.h create mode 100644 libcxx/include/__type_traits/is_fundamental.h create mode 100644 libcxx/include/__type_traits/is_member_pointer.h create mode 100644 libcxx/include/__type_traits/is_object.h create mode 100644 libcxx/include/__type_traits/is_pointer.h create mode 100644 libcxx/include/__type_traits/is_scalar.h create mode 100644 libcxx/include/__type_traits/is_signed.h create mode 100644 libcxx/include/__type_traits/is_unbounded_array.h create mode 100644 libcxx/include/__type_traits/is_union.h create mode 100644 libcxx/include/__type_traits/is_unsigned.h create mode 100644 libcxx/include/__type_traits/rank.h create mode 100644 libcxx/include/__type_traits/remove_all_extents.h create mode 100644 libcxx/include/__type_traits/remove_pointer.h create mode 100644 libcxx/include/__type_traits/type_identity.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index f1ff3111f6ee8a..bf1c41e05c2b56 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -440,33 +440,61 @@ set(files __threading_support __tree __tuple + __type_traits/add_const.h + __type_traits/add_cv.h + __type_traits/add_lvalue_reference.h __type_traits/add_pointer.h + __type_traits/add_rvalue_reference.h + __type_traits/add_volatile.h __type_traits/conditional.h __type_traits/decay.h __type_traits/enable_if.h + __type_traits/extent.h __type_traits/integral_constant.h + __type_traits/is_abstract.h + __type_traits/is_aggregate.h + __type_traits/is_arithmetic.h __type_traits/is_array.h __type_traits/is_base_of.h + __type_traits/is_bounded_array.h __type_traits/is_callable.h + __type_traits/is_class.h + __type_traits/is_compound.h __type_traits/is_const.h __type_traits/is_convertible.h + __type_traits/is_empty.h + __type_traits/is_enum.h + __type_traits/is_final.h __type_traits/is_floating_point.h __type_traits/is_function.h + __type_traits/is_fundamental.h __type_traits/is_integral.h __type_traits/is_member_function_pointer.h __type_traits/is_member_object_pointer.h + __type_traits/is_member_pointer.h __type_traits/is_null_pointer.h + __type_traits/is_object.h + __type_traits/is_pointer.h __type_traits/is_reference.h __type_traits/is_reference_wrapper.h __type_traits/is_referenceable.h __type_traits/is_same.h + __type_traits/is_scalar.h + __type_traits/is_signed.h + __type_traits/is_unbounded_array.h + __type_traits/is_union.h + __type_traits/is_unsigned.h __type_traits/is_void.h __type_traits/is_volatile.h + __type_traits/rank.h + __type_traits/remove_all_extents.h __type_traits/remove_const.h __type_traits/remove_cv.h __type_traits/remove_extent.h + __type_traits/remove_pointer.h __type_traits/remove_reference.h __type_traits/remove_volatile.h + __type_traits/type_identity.h __undef_macros __utility/as_const.h __utility/auto_cast.h diff --git a/libcxx/include/__type_traits/add_const.h b/libcxx/include/__type_traits/add_const.h new file mode 100644 index 00000000000000..c616a13f840800 --- /dev/null +++ b/libcxx/include/__type_traits/add_const.h @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_ADD_CONST_H +#define _LIBCPP___TYPE_TRAITS_ADD_CONST_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS add_const { + typedef _LIBCPP_NODEBUG const _Tp type; +}; + +#if _LIBCPP_STD_VER > 11 +template using add_const_t = typename add_const<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_ADD_CONST_H diff --git a/libcxx/include/__type_traits/add_cv.h b/libcxx/include/__type_traits/add_cv.h new file mode 100644 index 00000000000000..8a85353f16c047 --- /dev/null +++ b/libcxx/include/__type_traits/add_cv.h @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_ADD_CV_H +#define _LIBCPP___TYPE_TRAITS_ADD_CV_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS add_cv { + typedef _LIBCPP_NODEBUG const volatile _Tp type; +}; + +#if _LIBCPP_STD_VER > 11 +template using add_cv_t = typename add_cv<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_ADD_CV_H diff --git a/libcxx/include/__type_traits/add_lvalue_reference.h b/libcxx/include/__type_traits/add_lvalue_reference.h new file mode 100644 index 00000000000000..0d1ee4185c8cdb --- /dev/null +++ b/libcxx/include/__type_traits/add_lvalue_reference.h @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_ADD_LVALUE_REFERENCE_H +#define _LIBCPP___TYPE_TRAITS_ADD_LVALUE_REFERENCE_H + +#include <__config> +#include <__type_traits/is_referenceable.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template ::value> struct __add_lvalue_reference_impl { typedef _LIBCPP_NODEBUG _Tp type; }; +template struct __add_lvalue_reference_impl<_Tp, true> { typedef _LIBCPP_NODEBUG _Tp& type; }; + +template struct _LIBCPP_TEMPLATE_VIS add_lvalue_reference +{typedef _LIBCPP_NODEBUG typename __add_lvalue_reference_impl<_Tp>::type type;}; + +#if _LIBCPP_STD_VER > 11 +template using add_lvalue_reference_t = typename add_lvalue_reference<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_ADD_LVALUE_REFERENCE_H diff --git a/libcxx/include/__type_traits/add_rvalue_reference.h b/libcxx/include/__type_traits/add_rvalue_reference.h new file mode 100644 index 00000000000000..b2a1428095d89a --- /dev/null +++ b/libcxx/include/__type_traits/add_rvalue_reference.h @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_ADD_RVALUE_REFERENCE_H +#define _LIBCPP___TYPE_TRAITS_ADD_RVALUE_REFERENCE_H + +#include <__config> +#include <__type_traits/is_referenceable.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template ::value> struct __add_rvalue_reference_impl { typedef _LIBCPP_NODEBUG _Tp type; }; +template struct __add_rvalue_reference_impl<_Tp, true> { typedef _LIBCPP_NODEBUG _Tp&& type; }; + +template struct _LIBCPP_TEMPLATE_VIS add_rvalue_reference +{typedef _LIBCPP_NODEBUG typename __add_rvalue_reference_impl<_Tp>::type type;}; + +#if _LIBCPP_STD_VER > 11 +template using add_rvalue_reference_t = typename add_rvalue_reference<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_ADD_RVALUE_REFERENCE_H diff --git a/libcxx/include/__type_traits/add_volatile.h b/libcxx/include/__type_traits/add_volatile.h new file mode 100644 index 00000000000000..73b1cb8418fe16 --- /dev/null +++ b/libcxx/include/__type_traits/add_volatile.h @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_ADD_VOLATILE_H +#define _LIBCPP___TYPE_TRAITS_ADD_VOLATILE_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS add_volatile { + typedef _LIBCPP_NODEBUG volatile _Tp type; +}; + +#if _LIBCPP_STD_VER > 11 +template using add_volatile_t = typename add_volatile<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_ADD_VOLATILE_H diff --git a/libcxx/include/__type_traits/extent.h b/libcxx/include/__type_traits/extent.h new file mode 100644 index 00000000000000..935ec4937c0ae8 --- /dev/null +++ b/libcxx/include/__type_traits/extent.h @@ -0,0 +1,55 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_EXTENT_H +#define _LIBCPP___TYPE_TRAITS_EXTENT_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_keyword(__array_extent) + +template +struct _LIBCPP_TEMPLATE_VIS extent + : integral_constant { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr size_t extent_v = __array_extent(_Tp, _Ip); +#endif + +#else // __has_keyword(__array_extent) + +template struct _LIBCPP_TEMPLATE_VIS extent + : public integral_constant {}; +template struct _LIBCPP_TEMPLATE_VIS extent<_Tp[], 0> + : public integral_constant {}; +template struct _LIBCPP_TEMPLATE_VIS extent<_Tp[], _Ip> + : public integral_constant::value> {}; +template struct _LIBCPP_TEMPLATE_VIS extent<_Tp[_Np], 0> + : public integral_constant {}; +template struct _LIBCPP_TEMPLATE_VIS extent<_Tp[_Np], _Ip> + : public integral_constant::value> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr size_t extent_v = extent<_Tp, _Ip>::value; +#endif + +#endif // __has_keyword(__array_extent) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_EXTENT_H diff --git a/libcxx/include/__type_traits/is_abstract.h b/libcxx/include/__type_traits/is_abstract.h new file mode 100644 index 00000000000000..0480118b37616a --- /dev/null +++ b/libcxx/include/__type_traits/is_abstract.h @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_ABSTRACT_H +#define _LIBCPP___TYPE_TRAITS_IS_ABSTRACT_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS is_abstract + : public integral_constant {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_abstract_v = is_abstract<_Tp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_ABSTRACT_H diff --git a/libcxx/include/__type_traits/is_aggregate.h b/libcxx/include/__type_traits/is_aggregate.h new file mode 100644 index 00000000000000..f32aec8ac543b8 --- /dev/null +++ b/libcxx/include/__type_traits/is_aggregate.h @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_AGGREGATE_H +#define _LIBCPP___TYPE_TRAITS_IS_AGGREGATE_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +template struct _LIBCPP_TEMPLATE_VIS +is_aggregate : public integral_constant {}; + +template +inline constexpr bool is_aggregate_v = is_aggregate<_Tp>::value; + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_AGGREGATE_H diff --git a/libcxx/include/__type_traits/is_arithmetic.h b/libcxx/include/__type_traits/is_arithmetic.h new file mode 100644 index 00000000000000..6d631f41c7d4a5 --- /dev/null +++ b/libcxx/include/__type_traits/is_arithmetic.h @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_ARITHMETIC_H +#define _LIBCPP___TYPE_TRAITS_IS_ARITHMETIC_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_floating_point.h> +#include <__type_traits/is_integral.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS is_arithmetic + : public integral_constant::value || + is_floating_point<_Tp>::value> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_ARITHMETIC_H diff --git a/libcxx/include/__type_traits/is_bounded_array.h b/libcxx/include/__type_traits/is_bounded_array.h new file mode 100644 index 00000000000000..f6e800d723bd10 --- /dev/null +++ b/libcxx/include/__type_traits/is_bounded_array.h @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_BOUNDED_ARRAY_H +#define _LIBCPP___TYPE_TRAITS_IS_BOUNDED_ARRAY_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +template struct _LIBCPP_TEMPLATE_VIS is_bounded_array : false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_bounded_array<_Tp[_Np]> : true_type {}; + +template +inline constexpr +bool is_bounded_array_v = is_bounded_array<_Tp>::value; + +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_BOUNDED_ARRAY_H diff --git a/libcxx/include/__type_traits/is_class.h b/libcxx/include/__type_traits/is_class.h new file mode 100644 index 00000000000000..523dc493e8e7fd --- /dev/null +++ b/libcxx/include/__type_traits/is_class.h @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_CLASS_H +#define _LIBCPP___TYPE_TRAITS_IS_CLASS_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_union.h> +#include <__type_traits/remove_cv.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_feature(is_class) || defined(_LIBCPP_COMPILER_GCC) + +template struct _LIBCPP_TEMPLATE_VIS is_class + : public integral_constant {}; + +#else + +namespace __is_class_imp +{ +template char __test(int _Tp::*); +template __two __test(...); +} + +template struct _LIBCPP_TEMPLATE_VIS is_class + : public integral_constant(0)) == 1 && !is_union<_Tp>::value> {}; + +#endif + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_class_v = is_class<_Tp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_CLASS_H diff --git a/libcxx/include/__type_traits/is_compound.h b/libcxx/include/__type_traits/is_compound.h new file mode 100644 index 00000000000000..f055155b9f54f4 --- /dev/null +++ b/libcxx/include/__type_traits/is_compound.h @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_COMPOUND_H +#define _LIBCPP___TYPE_TRAITS_IS_COMPOUND_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_fundamental.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// >= 11 because in C++03 nullptr isn't actually nullptr +#if __has_keyword(__is_compound) && !defined(_LIBCPP_CXX03_LANG) + +template +struct _LIBCPP_TEMPLATE_VIS is_compound : _BoolConstant<__is_compound(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_compound_v = __is_compound(_Tp); +#endif + +#else // __has_keyword(__is_compound) + +template struct _LIBCPP_TEMPLATE_VIS is_compound + : public integral_constant::value> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_compound_v = is_compound<_Tp>::value; +#endif + +#endif // __has_keyword(__is_compound) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_COMPOUND_H diff --git a/libcxx/include/__type_traits/is_empty.h b/libcxx/include/__type_traits/is_empty.h new file mode 100644 index 00000000000000..e81c6f7e2fa400 --- /dev/null +++ b/libcxx/include/__type_traits/is_empty.h @@ -0,0 +1,57 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_EMPTY_H +#define _LIBCPP___TYPE_TRAITS_IS_EMPTY_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_feature(is_empty) || defined(_LIBCPP_COMPILER_GCC) + +template +struct _LIBCPP_TEMPLATE_VIS is_empty + : public integral_constant {}; + +#else // __has_feature(is_empty) + +template +struct __is_empty1 + : public _Tp +{ + double __lx; +}; + +struct __is_empty2 +{ + double __lx; +}; + +template ::value> +struct __libcpp_empty : public integral_constant) == sizeof(__is_empty2)> {}; + +template struct __libcpp_empty<_Tp, false> : public false_type {}; + +template struct _LIBCPP_TEMPLATE_VIS is_empty : public __libcpp_empty<_Tp> {}; + +#endif // __has_feature(is_empty) + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_empty_v = is_empty<_Tp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_EMPTY_H diff --git a/libcxx/include/__type_traits/is_enum.h b/libcxx/include/__type_traits/is_enum.h new file mode 100644 index 00000000000000..ac160da84e9919 --- /dev/null +++ b/libcxx/include/__type_traits/is_enum.h @@ -0,0 +1,55 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_ENUM_H +#define _LIBCPP___TYPE_TRAITS_IS_ENUM_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/remove_cv.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_feature(is_enum) || defined(_LIBCPP_COMPILER_GCC) + +template struct _LIBCPP_TEMPLATE_VIS is_enum + : public integral_constant {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_enum_v = __is_enum(_Tp); +#endif + +#else + +template struct _LIBCPP_TEMPLATE_VIS is_enum + : public integral_constant::value && + !is_integral<_Tp>::value && + !is_floating_point<_Tp>::value && + !is_array<_Tp>::value && + !is_pointer<_Tp>::value && + !is_reference<_Tp>::value && + !is_member_pointer<_Tp>::value && + !is_union<_Tp>::value && + !is_class<_Tp>::value && + !is_function<_Tp>::value > {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_enum_v = is_enum<_Tp>::value; +#endif + +#endif // __has_feature(is_enum) || defined(_LIBCPP_COMPILER_GCC) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_ENUM_H diff --git a/libcxx/include/__type_traits/is_final.h b/libcxx/include/__type_traits/is_final.h new file mode 100644 index 00000000000000..f2d9b5b3c8f49c --- /dev/null +++ b/libcxx/include/__type_traits/is_final.h @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_FINAL_H +#define _LIBCPP___TYPE_TRAITS_IS_FINAL_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS +__libcpp_is_final : public integral_constant {}; + +#if _LIBCPP_STD_VER > 11 +template struct _LIBCPP_TEMPLATE_VIS +is_final : public integral_constant {}; +#endif + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_final_v = is_final<_Tp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_FINAL_H diff --git a/libcxx/include/__type_traits/is_fundamental.h b/libcxx/include/__type_traits/is_fundamental.h new file mode 100644 index 00000000000000..859cfde39bfa40 --- /dev/null +++ b/libcxx/include/__type_traits/is_fundamental.h @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_FUNDAMENTAL_H +#define _LIBCPP___TYPE_TRAITS_IS_FUNDAMENTAL_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_null_pointer.h> +#include <__type_traits/is_void.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// Before Clang 10, __is_fundamental didn't work for nullptr_t. +// In C++03 nullptr_t is library-provided but must still count as "fundamental." +#if __has_keyword(__is_fundamental) && \ + !(defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1000) && \ + !defined(_LIBCPP_CXX03_LANG) + +template +struct _LIBCPP_TEMPLATE_VIS is_fundamental : _BoolConstant<__is_fundamental(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_fundamental_v = __is_fundamental(_Tp); +#endif + +#else // __has_keyword(__is_fundamental) + +template struct _LIBCPP_TEMPLATE_VIS is_fundamental + : public integral_constant::value || + __is_nullptr_t<_Tp>::value || + is_arithmetic<_Tp>::value> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value; +#endif + +#endif // __has_keyword(__is_fundamental) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_FUNDAMENTAL_H diff --git a/libcxx/include/__type_traits/is_member_pointer.h b/libcxx/include/__type_traits/is_member_pointer.h new file mode 100644 index 00000000000000..76595dfdab7f0f --- /dev/null +++ b/libcxx/include/__type_traits/is_member_pointer.h @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_MEMBER_POINTER_H +#define _LIBCPP___TYPE_TRAITS_IS_MEMBER_POINTER_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_keyword(__is_member_pointer) + +template +struct _LIBCPP_TEMPLATE_VIS is_member_pointer : _BoolConstant<__is_member_pointer(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_member_pointer_v = __is_member_pointer(_Tp); +#endif + +#else // __has_keyword(__is_member_pointer) + +template struct _LIBCPP_TEMPLATE_VIS is_member_pointer + : public _BoolConstant< __libcpp_is_member_pointer::type>::__is_member > {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value; +#endif + +#endif // __has_keyword(__is_member_pointer) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_MEMBER_POINTER_H diff --git a/libcxx/include/__type_traits/is_object.h b/libcxx/include/__type_traits/is_object.h new file mode 100644 index 00000000000000..0d8339d19492c0 --- /dev/null +++ b/libcxx/include/__type_traits/is_object.h @@ -0,0 +1,52 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_OBJECT_H +#define _LIBCPP___TYPE_TRAITS_IS_OBJECT_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_array.h> +#include <__type_traits/is_class.h> +#include <__type_traits/is_scalar.h> +#include <__type_traits/is_union.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_keyword(__is_object) + +template +struct _LIBCPP_TEMPLATE_VIS is_object : _BoolConstant<__is_object(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_object_v = __is_object(_Tp); +#endif + +#else // __has_keyword(__is_object) + +template struct _LIBCPP_TEMPLATE_VIS is_object + : public integral_constant::value || + is_array<_Tp>::value || + is_union<_Tp>::value || + is_class<_Tp>::value > {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_object_v = is_object<_Tp>::value; +#endif + +#endif // __has_keyword(__is_object) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_OBJECT_H diff --git a/libcxx/include/__type_traits/is_pointer.h b/libcxx/include/__type_traits/is_pointer.h new file mode 100644 index 00000000000000..ab66989ef86d6f --- /dev/null +++ b/libcxx/include/__type_traits/is_pointer.h @@ -0,0 +1,59 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_POINTER_H +#define _LIBCPP___TYPE_TRAITS_IS_POINTER_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/remove_cv.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// Before AppleClang 12.0.5, __is_pointer didn't work for Objective-C types. +#if __has_keyword(__is_pointer) && \ + !(defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1205) + +template +struct _LIBCPP_TEMPLATE_VIS is_pointer : _BoolConstant<__is_pointer(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_pointer_v = __is_pointer(_Tp); +#endif + +#else // __has_keyword(__is_pointer) + +template struct __libcpp_is_pointer : public false_type {}; +template struct __libcpp_is_pointer<_Tp*> : public true_type {}; + +template struct __libcpp_remove_objc_qualifiers { typedef _Tp type; }; +#if defined(_LIBCPP_HAS_OBJC_ARC) +template struct __libcpp_remove_objc_qualifiers<_Tp __strong> { typedef _Tp type; }; +template struct __libcpp_remove_objc_qualifiers<_Tp __weak> { typedef _Tp type; }; +template struct __libcpp_remove_objc_qualifiers<_Tp __autoreleasing> { typedef _Tp type; }; +template struct __libcpp_remove_objc_qualifiers<_Tp __unsafe_unretained> { typedef _Tp type; }; +#endif + +template struct _LIBCPP_TEMPLATE_VIS is_pointer + : public __libcpp_is_pointer::type>::type> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_pointer_v = is_pointer<_Tp>::value; +#endif + +#endif // __has_keyword(__is_pointer) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_POINTER_H diff --git a/libcxx/include/__type_traits/is_scalar.h b/libcxx/include/__type_traits/is_scalar.h new file mode 100644 index 00000000000000..f745bf5e363892 --- /dev/null +++ b/libcxx/include/__type_traits/is_scalar.h @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_SCALAR_H +#define _LIBCPP___TYPE_TRAITS_IS_SCALAR_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_arithmetic.h> +#include <__type_traits/is_enum.h> +#include <__type_traits/is_member_pointer.h> +#include <__type_traits/is_pointer.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// In C++03 nullptr_t is library-provided but must still count as "scalar." +#if __has_keyword(__is_scalar) && !defined(_LIBCPP_CXX03_LANG) + +template +struct _LIBCPP_TEMPLATE_VIS is_scalar : _BoolConstant<__is_scalar(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_scalar_v = __is_scalar(_Tp); +#endif + +#else // __has_keyword(__is_scalar) + +template struct __is_block : false_type {}; +#if defined(_LIBCPP_HAS_EXTENSION_BLOCKS) +template struct __is_block<_Rp (^)(_Args...)> : true_type {}; +#endif + +template struct _LIBCPP_TEMPLATE_VIS is_scalar + : public integral_constant::value || + is_member_pointer<_Tp>::value || + is_pointer<_Tp>::value || + __is_nullptr_t<_Tp>::value || + __is_block<_Tp>::value || + is_enum<_Tp>::value > {}; + +template <> struct _LIBCPP_TEMPLATE_VIS is_scalar : public true_type {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_scalar_v = is_scalar<_Tp>::value; +#endif + +#endif // __has_keyword(__is_scalar) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_SCALAR_H diff --git a/libcxx/include/__type_traits/is_signed.h b/libcxx/include/__type_traits/is_signed.h new file mode 100644 index 00000000000000..241d6f551a4ba5 --- /dev/null +++ b/libcxx/include/__type_traits/is_signed.h @@ -0,0 +1,55 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_SIGNED_H +#define _LIBCPP___TYPE_TRAITS_IS_SIGNED_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_keyword(__is_signed) + +template +struct _LIBCPP_TEMPLATE_VIS is_signed : _BoolConstant<__is_signed(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_signed_v = __is_signed(_Tp); +#endif + +#else // __has_keyword(__is_signed) + +template ::value> +struct __libcpp_is_signed_impl : public _BoolConstant<(_Tp(-1) < _Tp(0))> {}; + +template +struct __libcpp_is_signed_impl<_Tp, false> : public true_type {}; // floating point + +template ::value> +struct __libcpp_is_signed : public __libcpp_is_signed_impl<_Tp> {}; + +template struct __libcpp_is_signed<_Tp, false> : public false_type {}; + +template struct _LIBCPP_TEMPLATE_VIS is_signed : public __libcpp_is_signed<_Tp> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_signed_v = is_signed<_Tp>::value; +#endif + +#endif // __has_keyword(__is_signed) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_SIGNED_H diff --git a/libcxx/include/__type_traits/is_unbounded_array.h b/libcxx/include/__type_traits/is_unbounded_array.h new file mode 100644 index 00000000000000..9e857533c6fd99 --- /dev/null +++ b/libcxx/include/__type_traits/is_unbounded_array.h @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_UNBOUNDED_ARRAY_H +#define _LIBCPP___TYPE_TRAITS_IS_UNBOUNDED_ARRAY_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +template struct _LIBCPP_TEMPLATE_VIS is_unbounded_array : false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_unbounded_array<_Tp[]> : true_type {}; + +template +inline constexpr +bool is_unbounded_array_v = is_unbounded_array<_Tp>::value; + +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_UNBOUNDED_ARRAY_H diff --git a/libcxx/include/__type_traits/is_union.h b/libcxx/include/__type_traits/is_union.h new file mode 100644 index 00000000000000..d02931fc01d342 --- /dev/null +++ b/libcxx/include/__type_traits/is_union.h @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_UNION_H +#define _LIBCPP___TYPE_TRAITS_IS_UNION_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include <__type_traits/remove_cv.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if __has_feature(is_union) || defined(_LIBCPP_COMPILER_GCC) + +template struct _LIBCPP_TEMPLATE_VIS is_union + : public integral_constant {}; + +#else + +template struct __libcpp_union : public false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_union + : public __libcpp_union::type> {}; + +#endif + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_union_v = is_union<_Tp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_UNION_H diff --git a/libcxx/include/__type_traits/is_unsigned.h b/libcxx/include/__type_traits/is_unsigned.h new file mode 100644 index 00000000000000..3c25205e6417a6 --- /dev/null +++ b/libcxx/include/__type_traits/is_unsigned.h @@ -0,0 +1,59 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_IS_UNSIGNED_H +#define _LIBCPP___TYPE_TRAITS_IS_UNSIGNED_H + +#include <__config> +#include <__type_traits/integral_constant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// Before Clang 13, __is_unsigned returned true for enums with signed underlying type. +// No currently-released version of AppleClang contains the fixed intrinsic. +#if __has_keyword(__is_unsigned) && \ + !(defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1300) && \ + !defined(_LIBCPP_APPLE_CLANG_VER) + +template +struct _LIBCPP_TEMPLATE_VIS is_unsigned : _BoolConstant<__is_unsigned(_Tp)> { }; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_unsigned_v = __is_unsigned(_Tp); +#endif + +#else // __has_keyword(__is_unsigned) + +template ::value> +struct __libcpp_is_unsigned_impl : public _BoolConstant<(_Tp(0) < _Tp(-1))> {}; + +template +struct __libcpp_is_unsigned_impl<_Tp, false> : public false_type {}; // floating point + +template ::value> +struct __libcpp_is_unsigned : public __libcpp_is_unsigned_impl<_Tp> {}; + +template struct __libcpp_is_unsigned<_Tp, false> : public false_type {}; + +template struct _LIBCPP_TEMPLATE_VIS is_unsigned : public __libcpp_is_unsigned<_Tp> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr bool is_unsigned_v = is_unsigned<_Tp>::value; +#endif + +#endif // __has_keyword(__is_unsigned) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_IS_UNSIGNED_H diff --git a/libcxx/include/__type_traits/rank.h b/libcxx/include/__type_traits/rank.h new file mode 100644 index 00000000000000..193d3fd129a151 --- /dev/null +++ b/libcxx/include/__type_traits/rank.h @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_RANK_H +#define _LIBCPP___TYPE_TRAITS_RANK_H + +#include <__config> +#include <__type_traits/integral_constant.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS rank + : public integral_constant {}; +template struct _LIBCPP_TEMPLATE_VIS rank<_Tp[]> + : public integral_constant::value + 1> {}; +template struct _LIBCPP_TEMPLATE_VIS rank<_Tp[_Np]> + : public integral_constant::value + 1> {}; + +#if _LIBCPP_STD_VER > 14 +template +inline constexpr size_t rank_v = rank<_Tp>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_RANK_H diff --git a/libcxx/include/__type_traits/remove_all_extents.h b/libcxx/include/__type_traits/remove_all_extents.h new file mode 100644 index 00000000000000..075e3acabb4471 --- /dev/null +++ b/libcxx/include/__type_traits/remove_all_extents.h @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_REMOVE_ALL_EXTENTS_H +#define _LIBCPP___TYPE_TRAITS_REMOVE_ALL_EXTENTS_H + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS remove_all_extents + {typedef _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_all_extents<_Tp[]> + {typedef typename remove_all_extents<_Tp>::type type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_all_extents<_Tp[_Np]> + {typedef typename remove_all_extents<_Tp>::type type;}; + +#if _LIBCPP_STD_VER > 11 +template using remove_all_extents_t = typename remove_all_extents<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_REMOVE_ALL_EXTENTS_H diff --git a/libcxx/include/__type_traits/remove_pointer.h b/libcxx/include/__type_traits/remove_pointer.h new file mode 100644 index 00000000000000..50cde3829470b3 --- /dev/null +++ b/libcxx/include/__type_traits/remove_pointer.h @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_REMOVE_POINTER_H +#define _LIBCPP___TYPE_TRAITS_REMOVE_POINTER_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct _LIBCPP_TEMPLATE_VIS remove_pointer {typedef _LIBCPP_NODEBUG _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp*> {typedef _LIBCPP_NODEBUG _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* const> {typedef _LIBCPP_NODEBUG _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* volatile> {typedef _LIBCPP_NODEBUG _Tp type;}; +template struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* const volatile> {typedef _LIBCPP_NODEBUG _Tp type;}; + +#if _LIBCPP_STD_VER > 11 +template using remove_pointer_t = typename remove_pointer<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_REMOVE_POINTER_H diff --git a/libcxx/include/__type_traits/type_identity.h b/libcxx/include/__type_traits/type_identity.h new file mode 100644 index 00000000000000..42e52b16725b16 --- /dev/null +++ b/libcxx/include/__type_traits/type_identity.h @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___TYPE_TRAITS_TYPE_IDENTITY_H +#define _LIBCPP___TYPE_TRAITS_TYPE_IDENTITY_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct __type_identity { typedef _Tp type; }; + +template +using __type_identity_t _LIBCPP_NODEBUG = typename __type_identity<_Tp>::type; + +#if _LIBCPP_STD_VER > 17 +template struct type_identity { typedef _Tp type; }; +template using type_identity_t = typename type_identity<_Tp>::type; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_TYPE_IDENTITY_H diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index d186ebe1c34576..ba1d0ffd49f6cc 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -987,33 +987,61 @@ module std [system] { export functional.__functional.unwrap_ref export * + module add_const { private header "__type_traits/add_const.h" } + module add_cv { private header "__type_traits/add_cv.h" } + module add_lvalue_reference { private header "__type_traits/add_lvalue_reference.h" } module add_pointer { private header "__type_traits/add_pointer.h" } + module add_rvalue_reference { private header "__type_traits/add_rvalue_reference.h" } + module add_volatile { private header "__type_traits/add_volatile.h" } module conditional { private header "__type_traits/conditional.h" } module decay { private header "__type_traits/decay.h" } module enable_if { private header "__type_traits/enable_if.h" } + module extent { private header "__type_traits/extent.h" } module integral_constant { private header "__type_traits/integral_constant.h" } + module is_abstract { private header "__type_traits/is_abstract.h" } + module is_aggregate { private header "__type_traits/is_aggregate.h" } + module is_arithmetic { private header "__type_traits/is_arithmetic.h" } module is_array { private header "__type_traits/is_array.h" } module is_base_of { private header "__type_traits/is_base_of.h" } + module is_bounded_array { private header "__type_traits/is_bounded_array.h" } module is_callable { private header "__type_traits/is_callable.h" } + module is_class { private header "__type_traits/is_class.h" } + module is_compound { private header "__type_traits/is_compound.h" } module is_const { private header "__type_traits/is_const.h" } module is_convertible { private header "__type_traits/is_convertible.h" } + module is_empty { private header "__type_traits/is_empty.h" } + module is_enum { private header "__type_traits/is_enum.h" } + module is_final { private header "__type_traits/is_final.h" } module is_floating_point { private header "__type_traits/is_floating_point.h" } module is_function { private header "__type_traits/is_function.h" } + module is_fundamental { private header "__type_traits/is_fundamental.h" } module is_integral { private header "__type_traits/is_integral.h" } module is_member_function_pointer { private header "__type_traits/is_member_function_pointer.h" } module is_member_object_pointer { private header "__type_traits/is_member_object_pointer.h" } + module is_member_pointer { private header "__type_traits/is_member_pointer.h" } module is_null_pointer { private header "__type_traits/is_null_pointer.h" } + module is_object { private header "__type_traits/is_object.h" } + module is_pointer { private header "__type_traits/is_pointer.h" } module is_reference { private header "__type_traits/is_reference.h" } module is_reference_wrapper { private header "__type_traits/is_reference_wrapper.h" } module is_referenceable { private header "__type_traits/is_referenceable.h" } module is_same { private header "__type_traits/is_same.h" } + module is_scalar { private header "__type_traits/is_scalar.h" } + module is_signed { private header "__type_traits/is_signed.h" } + module is_unbounded_array { private header "__type_traits/is_unbounded_array.h" } + module is_union { private header "__type_traits/is_union.h" } + module is_unsigned { private header "__type_traits/is_unsigned.h" } module is_void { private header "__type_traits/is_void.h" } module is_volatile { private header "__type_traits/is_volatile.h" } + module rank { private header "__type_traits/rank.h" } + module remove_all_extents { private header "__type_traits/remove_all_extents.h" } module remove_const { private header "__type_traits/remove_const.h" } module remove_cv { private header "__type_traits/remove_cv.h" } module remove_extent { private header "__type_traits/remove_extent.h" } + module remove_pointer { private header "__type_traits/remove_pointer.h" } module remove_reference { private header "__type_traits/remove_reference.h" } module remove_volatile { private header "__type_traits/remove_volatile.h" } + module type_identity { private header "__type_traits/type_identity.h" } } module typeindex { header "typeindex" diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 5908850cba5edd..852aed161f086b 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -418,33 +418,61 @@ namespace std */ #include <__assert> // all public C++ headers provide the assertion handler #include <__config> +#include <__type_traits/add_const.h> +#include <__type_traits/add_cv.h> +#include <__type_traits/add_lvalue_reference.h> #include <__type_traits/add_pointer.h> +#include <__type_traits/add_rvalue_reference.h> +#include <__type_traits/add_volatile.h> #include <__type_traits/conditional.h> #include <__type_traits/decay.h> #include <__type_traits/enable_if.h> +#include <__type_traits/extent.h> #include <__type_traits/integral_constant.h> +#include <__type_traits/is_abstract.h> +#include <__type_traits/is_aggregate.h> +#include <__type_traits/is_arithmetic.h> #include <__type_traits/is_array.h> #include <__type_traits/is_base_of.h> +#include <__type_traits/is_bounded_array.h> #include <__type_traits/is_callable.h> +#include <__type_traits/is_class.h> +#include <__type_traits/is_compound.h> #include <__type_traits/is_const.h> #include <__type_traits/is_convertible.h> +#include <__type_traits/is_empty.h> +#include <__type_traits/is_enum.h> +#include <__type_traits/is_final.h> #include <__type_traits/is_floating_point.h> #include <__type_traits/is_function.h> +#include <__type_traits/is_fundamental.h> #include <__type_traits/is_integral.h> #include <__type_traits/is_member_function_pointer.h> #include <__type_traits/is_member_object_pointer.h> +#include <__type_traits/is_member_pointer.h> #include <__type_traits/is_null_pointer.h> +#include <__type_traits/is_object.h> +#include <__type_traits/is_pointer.h> #include <__type_traits/is_reference.h> #include <__type_traits/is_reference_wrapper.h> #include <__type_traits/is_referenceable.h> #include <__type_traits/is_same.h> +#include <__type_traits/is_scalar.h> +#include <__type_traits/is_signed.h> +#include <__type_traits/is_unbounded_array.h> +#include <__type_traits/is_union.h> +#include <__type_traits/is_unsigned.h> #include <__type_traits/is_void.h> #include <__type_traits/is_volatile.h> +#include <__type_traits/rank.h> +#include <__type_traits/remove_all_extents.h> #include <__type_traits/remove_const.h> #include <__type_traits/remove_cv.h> #include <__type_traits/remove_extent.h> +#include <__type_traits/remove_pointer.h> #include <__type_traits/remove_reference.h> #include <__type_traits/remove_volatile.h> +#include <__type_traits/type_identity.h> #include <__utility/declval.h> #include #include @@ -558,327 +586,6 @@ template <> struct __libcpp_is_unsigned_integer : public tru template <> struct __libcpp_is_unsigned_integer<__uint128_t> : public true_type {}; #endif -// is_pointer - -// Before AppleClang 12.0.5, __is_pointer didn't work for Objective-C types. -#if __has_keyword(__is_pointer) && \ - !(defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1205) - -template -struct _LIBCPP_TEMPLATE_VIS is_pointer : _BoolConstant<__is_pointer(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_pointer_v = __is_pointer(_Tp); -#endif - -#else // __has_keyword(__is_pointer) - -template struct __libcpp_is_pointer : public false_type {}; -template struct __libcpp_is_pointer<_Tp*> : public true_type {}; - -template struct __libcpp_remove_objc_qualifiers { typedef _Tp type; }; -#if defined(_LIBCPP_HAS_OBJC_ARC) -template struct __libcpp_remove_objc_qualifiers<_Tp __strong> { typedef _Tp type; }; -template struct __libcpp_remove_objc_qualifiers<_Tp __weak> { typedef _Tp type; }; -template struct __libcpp_remove_objc_qualifiers<_Tp __autoreleasing> { typedef _Tp type; }; -template struct __libcpp_remove_objc_qualifiers<_Tp __unsafe_unretained> { typedef _Tp type; }; -#endif - -template struct _LIBCPP_TEMPLATE_VIS is_pointer - : public __libcpp_is_pointer::type>::type> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_pointer_v = is_pointer<_Tp>::value; -#endif - -#endif // __has_keyword(__is_pointer) - -// is_union - -#if __has_feature(is_union) || defined(_LIBCPP_COMPILER_GCC) - -template struct _LIBCPP_TEMPLATE_VIS is_union - : public integral_constant {}; - -#else - -template struct __libcpp_union : public false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_union - : public __libcpp_union::type> {}; - -#endif - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_union_v = is_union<_Tp>::value; -#endif - -// is_class - -#if __has_feature(is_class) || defined(_LIBCPP_COMPILER_GCC) - -template struct _LIBCPP_TEMPLATE_VIS is_class - : public integral_constant {}; - -#else - -namespace __is_class_imp -{ -template char __test(int _Tp::*); -template __two __test(...); -} - -template struct _LIBCPP_TEMPLATE_VIS is_class - : public integral_constant(0)) == 1 && !is_union<_Tp>::value> {}; - -#endif - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_class_v = is_class<_Tp>::value; -#endif - -// is_member_pointer - -#if __has_keyword(__is_member_pointer) - -template -struct _LIBCPP_TEMPLATE_VIS is_member_pointer : _BoolConstant<__is_member_pointer(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_member_pointer_v = __is_member_pointer(_Tp); -#endif - -#else // __has_keyword(__is_member_pointer) - -template struct _LIBCPP_TEMPLATE_VIS is_member_pointer - : public _BoolConstant< __libcpp_is_member_pointer::type>::__is_member > {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value; -#endif - -#endif // __has_keyword(__is_member_pointer) - -// is_enum - -#if __has_feature(is_enum) || defined(_LIBCPP_COMPILER_GCC) - -template struct _LIBCPP_TEMPLATE_VIS is_enum - : public integral_constant {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_enum_v = __is_enum(_Tp); -#endif - -#else - -template struct _LIBCPP_TEMPLATE_VIS is_enum - : public integral_constant::value && - !is_integral<_Tp>::value && - !is_floating_point<_Tp>::value && - !is_array<_Tp>::value && - !is_pointer<_Tp>::value && - !is_reference<_Tp>::value && - !is_member_pointer<_Tp>::value && - !is_union<_Tp>::value && - !is_class<_Tp>::value && - !is_function<_Tp>::value > {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_enum_v = is_enum<_Tp>::value; -#endif - -#endif // __has_feature(is_enum) || defined(_LIBCPP_COMPILER_GCC) - -// is_arithmetic - - -template struct _LIBCPP_TEMPLATE_VIS is_arithmetic - : public integral_constant::value || - is_floating_point<_Tp>::value> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value; -#endif - -// is_fundamental - -// Before Clang 10, __is_fundamental didn't work for nullptr_t. -// In C++03 nullptr_t is library-provided but must still count as "fundamental." -#if __has_keyword(__is_fundamental) && \ - !(defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1000) && \ - !defined(_LIBCPP_CXX03_LANG) - -template -struct _LIBCPP_TEMPLATE_VIS is_fundamental : _BoolConstant<__is_fundamental(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_fundamental_v = __is_fundamental(_Tp); -#endif - -#else // __has_keyword(__is_fundamental) - -template struct _LIBCPP_TEMPLATE_VIS is_fundamental - : public integral_constant::value || - __is_nullptr_t<_Tp>::value || - is_arithmetic<_Tp>::value> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value; -#endif - -#endif // __has_keyword(__is_fundamental) - -// is_scalar - -// In C++03 nullptr_t is library-provided but must still count as "scalar." -#if __has_keyword(__is_scalar) && !defined(_LIBCPP_CXX03_LANG) - -template -struct _LIBCPP_TEMPLATE_VIS is_scalar : _BoolConstant<__is_scalar(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_scalar_v = __is_scalar(_Tp); -#endif - -#else // __has_keyword(__is_scalar) - -template struct __is_block : false_type {}; -#if defined(_LIBCPP_HAS_EXTENSION_BLOCKS) -template struct __is_block<_Rp (^)(_Args...)> : true_type {}; -#endif - -template struct _LIBCPP_TEMPLATE_VIS is_scalar - : public integral_constant::value || - is_member_pointer<_Tp>::value || - is_pointer<_Tp>::value || - __is_nullptr_t<_Tp>::value || - __is_block<_Tp>::value || - is_enum<_Tp>::value > {}; - -template <> struct _LIBCPP_TEMPLATE_VIS is_scalar : public true_type {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_scalar_v = is_scalar<_Tp>::value; -#endif - -#endif // __has_keyword(__is_scalar) - -// is_object - -#if __has_keyword(__is_object) - -template -struct _LIBCPP_TEMPLATE_VIS is_object : _BoolConstant<__is_object(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_object_v = __is_object(_Tp); -#endif - -#else // __has_keyword(__is_object) - -template struct _LIBCPP_TEMPLATE_VIS is_object - : public integral_constant::value || - is_array<_Tp>::value || - is_union<_Tp>::value || - is_class<_Tp>::value > {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_object_v = is_object<_Tp>::value; -#endif - -#endif // __has_keyword(__is_object) - -// is_compound - -// >= 11 because in C++03 nullptr isn't actually nullptr -#if __has_keyword(__is_compound) && !defined(_LIBCPP_CXX03_LANG) - -template -struct _LIBCPP_TEMPLATE_VIS is_compound : _BoolConstant<__is_compound(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_compound_v = __is_compound(_Tp); -#endif - -#else // __has_keyword(__is_compound) - -template struct _LIBCPP_TEMPLATE_VIS is_compound - : public integral_constant::value> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_compound_v = is_compound<_Tp>::value; -#endif - -#endif // __has_keyword(__is_compound) - -// add_const - -template struct _LIBCPP_TEMPLATE_VIS add_const { - typedef _LIBCPP_NODEBUG const _Tp type; -}; - -#if _LIBCPP_STD_VER > 11 -template using add_const_t = typename add_const<_Tp>::type; -#endif - -// add_volatile - -template struct _LIBCPP_TEMPLATE_VIS add_volatile { - typedef _LIBCPP_NODEBUG volatile _Tp type; -}; - -#if _LIBCPP_STD_VER > 11 -template using add_volatile_t = typename add_volatile<_Tp>::type; -#endif - -// add_cv -template struct _LIBCPP_TEMPLATE_VIS add_cv { - typedef _LIBCPP_NODEBUG const volatile _Tp type; -}; - -#if _LIBCPP_STD_VER > 11 -template using add_cv_t = typename add_cv<_Tp>::type; -#endif - -// add_lvalue_reference - -template ::value> struct __add_lvalue_reference_impl { typedef _LIBCPP_NODEBUG _Tp type; }; -template struct __add_lvalue_reference_impl<_Tp, true> { typedef _LIBCPP_NODEBUG _Tp& type; }; - -template struct _LIBCPP_TEMPLATE_VIS add_lvalue_reference -{typedef _LIBCPP_NODEBUG typename __add_lvalue_reference_impl<_Tp>::type type;}; - -#if _LIBCPP_STD_VER > 11 -template using add_lvalue_reference_t = typename add_lvalue_reference<_Tp>::type; -#endif - -template ::value> struct __add_rvalue_reference_impl { typedef _LIBCPP_NODEBUG _Tp type; }; -template struct __add_rvalue_reference_impl<_Tp, true> { typedef _LIBCPP_NODEBUG _Tp&& type; }; - -template struct _LIBCPP_TEMPLATE_VIS add_rvalue_reference -{typedef _LIBCPP_NODEBUG typename __add_rvalue_reference_impl<_Tp>::type type;}; - -#if _LIBCPP_STD_VER > 11 -template using add_rvalue_reference_t = typename add_rvalue_reference<_Tp>::type; -#endif - template struct __unconstref { typedef _LIBCPP_NODEBUG typename remove_const::type>::type type; @@ -908,221 +615,6 @@ struct __any __any(...); }; -// remove_pointer - -template struct _LIBCPP_TEMPLATE_VIS remove_pointer {typedef _LIBCPP_NODEBUG _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp*> {typedef _LIBCPP_NODEBUG _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* const> {typedef _LIBCPP_NODEBUG _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* volatile> {typedef _LIBCPP_NODEBUG _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* const volatile> {typedef _LIBCPP_NODEBUG _Tp type;}; - -#if _LIBCPP_STD_VER > 11 -template using remove_pointer_t = typename remove_pointer<_Tp>::type; -#endif - -// add_pointer - -// type_identity - -template -struct __type_identity { typedef _Tp type; }; - -template -using __type_identity_t _LIBCPP_NODEBUG = typename __type_identity<_Tp>::type; - -#if _LIBCPP_STD_VER > 17 -template struct type_identity { typedef _Tp type; }; -template using type_identity_t = typename type_identity<_Tp>::type; -#endif - -// is_signed - -#if __has_keyword(__is_signed) - -template -struct _LIBCPP_TEMPLATE_VIS is_signed : _BoolConstant<__is_signed(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_signed_v = __is_signed(_Tp); -#endif - -#else // __has_keyword(__is_signed) - -template ::value> -struct __libcpp_is_signed_impl : public _BoolConstant<(_Tp(-1) < _Tp(0))> {}; - -template -struct __libcpp_is_signed_impl<_Tp, false> : public true_type {}; // floating point - -template ::value> -struct __libcpp_is_signed : public __libcpp_is_signed_impl<_Tp> {}; - -template struct __libcpp_is_signed<_Tp, false> : public false_type {}; - -template struct _LIBCPP_TEMPLATE_VIS is_signed : public __libcpp_is_signed<_Tp> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_signed_v = is_signed<_Tp>::value; -#endif - -#endif // __has_keyword(__is_signed) - -// is_unsigned - -// Before Clang 13, __is_unsigned returned true for enums with signed underlying type. -// No currently-released version of AppleClang contains the fixed intrinsic. -#if __has_keyword(__is_unsigned) && \ - !(defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1300) && \ - !defined(_LIBCPP_APPLE_CLANG_VER) - -template -struct _LIBCPP_TEMPLATE_VIS is_unsigned : _BoolConstant<__is_unsigned(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_unsigned_v = __is_unsigned(_Tp); -#endif - -#else // __has_keyword(__is_unsigned) - -template ::value> -struct __libcpp_is_unsigned_impl : public _BoolConstant<(_Tp(0) < _Tp(-1))> {}; - -template -struct __libcpp_is_unsigned_impl<_Tp, false> : public false_type {}; // floating point - -template ::value> -struct __libcpp_is_unsigned : public __libcpp_is_unsigned_impl<_Tp> {}; - -template struct __libcpp_is_unsigned<_Tp, false> : public false_type {}; - -template struct _LIBCPP_TEMPLATE_VIS is_unsigned : public __libcpp_is_unsigned<_Tp> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_unsigned_v = is_unsigned<_Tp>::value; -#endif - -#endif // __has_keyword(__is_unsigned) - -// rank - -template struct _LIBCPP_TEMPLATE_VIS rank - : public integral_constant {}; -template struct _LIBCPP_TEMPLATE_VIS rank<_Tp[]> - : public integral_constant::value + 1> {}; -template struct _LIBCPP_TEMPLATE_VIS rank<_Tp[_Np]> - : public integral_constant::value + 1> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr size_t rank_v = rank<_Tp>::value; -#endif - -// extent - -#if __has_keyword(__array_extent) - -template -struct _LIBCPP_TEMPLATE_VIS extent - : integral_constant { }; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr size_t extent_v = __array_extent(_Tp, _Ip); -#endif - -#else // __has_keyword(__array_extent) - -template struct _LIBCPP_TEMPLATE_VIS extent - : public integral_constant {}; -template struct _LIBCPP_TEMPLATE_VIS extent<_Tp[], 0> - : public integral_constant {}; -template struct _LIBCPP_TEMPLATE_VIS extent<_Tp[], _Ip> - : public integral_constant::value> {}; -template struct _LIBCPP_TEMPLATE_VIS extent<_Tp[_Np], 0> - : public integral_constant {}; -template struct _LIBCPP_TEMPLATE_VIS extent<_Tp[_Np], _Ip> - : public integral_constant::value> {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr size_t extent_v = extent<_Tp, _Ip>::value; -#endif - -#endif // __has_keyword(__array_extent) - -// remove_all_extents - -template struct _LIBCPP_TEMPLATE_VIS remove_all_extents - {typedef _Tp type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_all_extents<_Tp[]> - {typedef typename remove_all_extents<_Tp>::type type;}; -template struct _LIBCPP_TEMPLATE_VIS remove_all_extents<_Tp[_Np]> - {typedef typename remove_all_extents<_Tp>::type type;}; - -#if _LIBCPP_STD_VER > 11 -template using remove_all_extents_t = typename remove_all_extents<_Tp>::type; -#endif - -#if _LIBCPP_STD_VER > 17 -// is_bounded_array - -template struct _LIBCPP_TEMPLATE_VIS is_bounded_array : false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_bounded_array<_Tp[_Np]> : true_type {}; - -template -inline constexpr -bool is_bounded_array_v = is_bounded_array<_Tp>::value; - -// is_unbounded_array - -template struct _LIBCPP_TEMPLATE_VIS is_unbounded_array : false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_unbounded_array<_Tp[]> : true_type {}; - -template -inline constexpr -bool is_unbounded_array_v = is_unbounded_array<_Tp>::value; -#endif - -// is_abstract - -template struct _LIBCPP_TEMPLATE_VIS is_abstract - : public integral_constant {}; - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_abstract_v = is_abstract<_Tp>::value; -#endif - -// is_final - -template struct _LIBCPP_TEMPLATE_VIS -__libcpp_is_final : public integral_constant {}; - -#if _LIBCPP_STD_VER > 11 -template struct _LIBCPP_TEMPLATE_VIS -is_final : public integral_constant {}; -#endif - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_final_v = is_final<_Tp>::value; -#endif - -// is_aggregate -#if _LIBCPP_STD_VER > 14 - -template struct _LIBCPP_TEMPLATE_VIS -is_aggregate : public integral_constant {}; - -template -inline constexpr bool is_aggregate_v = is_aggregate<_Tp>::value; - -#endif // _LIBCPP_STD_VER > 14 - // __is_core_convertible // [conv.general]/3 says "E is convertible to T" whenever "T t=E;" is well-formed. @@ -1164,42 +656,6 @@ inline constexpr bool is_nothrow_convertible_v = is_nothrow_convertible<_Fm, _To #endif // _LIBCPP_STD_VER > 17 -// is_empty - -#if __has_feature(is_empty) || defined(_LIBCPP_COMPILER_GCC) - -template -struct _LIBCPP_TEMPLATE_VIS is_empty - : public integral_constant {}; - -#else // __has_feature(is_empty) - -template -struct __is_empty1 - : public _Tp -{ - double __lx; -}; - -struct __is_empty2 -{ - double __lx; -}; - -template ::value> -struct __libcpp_empty : public integral_constant) == sizeof(__is_empty2)> {}; - -template struct __libcpp_empty<_Tp, false> : public false_type {}; - -template struct _LIBCPP_TEMPLATE_VIS is_empty : public __libcpp_empty<_Tp> {}; - -#endif // __has_feature(is_empty) - -#if _LIBCPP_STD_VER > 14 -template -inline constexpr bool is_empty_v = is_empty<_Tp>::value; -#endif - // is_polymorphic #if __has_feature(is_polymorphic) || defined(_LIBCPP_COMPILER_MSVC) diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index c0debf7b8b2257..121e4d9d98778d 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -450,33 +450,61 @@ END-SCRIPT #include <__thread/poll_with_backoff.h> // expected-error@*:* {{use of private header from outside its module: '__thread/poll_with_backoff.h'}} #include <__thread/timed_backoff_policy.h> // expected-error@*:* {{use of private header from outside its module: '__thread/timed_backoff_policy.h'}} #include <__tuple> // expected-error@*:* {{use of private header from outside its module: '__tuple'}} +#include <__type_traits/add_const.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/add_const.h'}} +#include <__type_traits/add_cv.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/add_cv.h'}} +#include <__type_traits/add_lvalue_reference.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/add_lvalue_reference.h'}} #include <__type_traits/add_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/add_pointer.h'}} +#include <__type_traits/add_rvalue_reference.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/add_rvalue_reference.h'}} +#include <__type_traits/add_volatile.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/add_volatile.h'}} #include <__type_traits/conditional.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/conditional.h'}} #include <__type_traits/decay.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/decay.h'}} #include <__type_traits/enable_if.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/enable_if.h'}} +#include <__type_traits/extent.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/extent.h'}} #include <__type_traits/integral_constant.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/integral_constant.h'}} +#include <__type_traits/is_abstract.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_abstract.h'}} +#include <__type_traits/is_aggregate.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_aggregate.h'}} +#include <__type_traits/is_arithmetic.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_arithmetic.h'}} #include <__type_traits/is_array.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_array.h'}} #include <__type_traits/is_base_of.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_base_of.h'}} +#include <__type_traits/is_bounded_array.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_bounded_array.h'}} #include <__type_traits/is_callable.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_callable.h'}} +#include <__type_traits/is_class.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_class.h'}} +#include <__type_traits/is_compound.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_compound.h'}} #include <__type_traits/is_const.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_const.h'}} #include <__type_traits/is_convertible.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_convertible.h'}} +#include <__type_traits/is_empty.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_empty.h'}} +#include <__type_traits/is_enum.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_enum.h'}} +#include <__type_traits/is_final.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_final.h'}} #include <__type_traits/is_floating_point.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_floating_point.h'}} #include <__type_traits/is_function.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_function.h'}} +#include <__type_traits/is_fundamental.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_fundamental.h'}} #include <__type_traits/is_integral.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_integral.h'}} #include <__type_traits/is_member_function_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_member_function_pointer.h'}} #include <__type_traits/is_member_object_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_member_object_pointer.h'}} +#include <__type_traits/is_member_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_member_pointer.h'}} #include <__type_traits/is_null_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_null_pointer.h'}} +#include <__type_traits/is_object.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_object.h'}} +#include <__type_traits/is_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_pointer.h'}} #include <__type_traits/is_reference.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_reference.h'}} #include <__type_traits/is_reference_wrapper.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_reference_wrapper.h'}} #include <__type_traits/is_referenceable.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_referenceable.h'}} #include <__type_traits/is_same.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_same.h'}} +#include <__type_traits/is_scalar.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_scalar.h'}} +#include <__type_traits/is_signed.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_signed.h'}} +#include <__type_traits/is_unbounded_array.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_unbounded_array.h'}} +#include <__type_traits/is_union.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_union.h'}} +#include <__type_traits/is_unsigned.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_unsigned.h'}} #include <__type_traits/is_void.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_void.h'}} #include <__type_traits/is_volatile.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/is_volatile.h'}} +#include <__type_traits/rank.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/rank.h'}} +#include <__type_traits/remove_all_extents.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_all_extents.h'}} #include <__type_traits/remove_const.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_const.h'}} #include <__type_traits/remove_cv.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_cv.h'}} #include <__type_traits/remove_extent.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_extent.h'}} +#include <__type_traits/remove_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_pointer.h'}} #include <__type_traits/remove_reference.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_reference.h'}} #include <__type_traits/remove_volatile.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/remove_volatile.h'}} +#include <__type_traits/type_identity.h> // expected-error@*:* {{use of private header from outside its module: '__type_traits/type_identity.h'}} #include <__utility/as_const.h> // expected-error@*:* {{use of private header from outside its module: '__utility/as_const.h'}} #include <__utility/auto_cast.h> // expected-error@*:* {{use of private header from outside its module: '__utility/auto_cast.h'}} #include <__utility/cmp.h> // expected-error@*:* {{use of private header from outside its module: '__utility/cmp.h'}} From 936e9bf4bdc8a879766233888d62a12f56bf26c9 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sat, 28 May 2022 08:16:52 +0000 Subject: [PATCH 758/908] [gn build] Port 30c37fb89cb7 --- .../gn/secondary/libcxx/include/BUILD.gn | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 9f63c5a87396e9..44a7905692615f 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -496,33 +496,61 @@ if (current_toolchain == default_toolchain) { "__threading_support", "__tree", "__tuple", + "__type_traits/add_const.h", + "__type_traits/add_cv.h", + "__type_traits/add_lvalue_reference.h", "__type_traits/add_pointer.h", + "__type_traits/add_rvalue_reference.h", + "__type_traits/add_volatile.h", "__type_traits/conditional.h", "__type_traits/decay.h", "__type_traits/enable_if.h", + "__type_traits/extent.h", "__type_traits/integral_constant.h", + "__type_traits/is_abstract.h", + "__type_traits/is_aggregate.h", + "__type_traits/is_arithmetic.h", "__type_traits/is_array.h", "__type_traits/is_base_of.h", + "__type_traits/is_bounded_array.h", "__type_traits/is_callable.h", + "__type_traits/is_class.h", + "__type_traits/is_compound.h", "__type_traits/is_const.h", "__type_traits/is_convertible.h", + "__type_traits/is_empty.h", + "__type_traits/is_enum.h", + "__type_traits/is_final.h", "__type_traits/is_floating_point.h", "__type_traits/is_function.h", + "__type_traits/is_fundamental.h", "__type_traits/is_integral.h", "__type_traits/is_member_function_pointer.h", "__type_traits/is_member_object_pointer.h", + "__type_traits/is_member_pointer.h", "__type_traits/is_null_pointer.h", + "__type_traits/is_object.h", + "__type_traits/is_pointer.h", "__type_traits/is_reference.h", "__type_traits/is_reference_wrapper.h", "__type_traits/is_referenceable.h", "__type_traits/is_same.h", + "__type_traits/is_scalar.h", + "__type_traits/is_signed.h", + "__type_traits/is_unbounded_array.h", + "__type_traits/is_union.h", + "__type_traits/is_unsigned.h", "__type_traits/is_void.h", "__type_traits/is_volatile.h", + "__type_traits/rank.h", + "__type_traits/remove_all_extents.h", "__type_traits/remove_const.h", "__type_traits/remove_cv.h", "__type_traits/remove_extent.h", + "__type_traits/remove_pointer.h", "__type_traits/remove_reference.h", "__type_traits/remove_volatile.h", + "__type_traits/type_identity.h", "__undef_macros", "__utility/as_const.h", "__utility/auto_cast.h", From 65af17c7a00df3f8906cded34b63b309e687cf61 Mon Sep 17 00:00:00 2001 From: Peixin-Qiao Date: Sat, 28 May 2022 16:34:26 +0800 Subject: [PATCH 759/908] [flang][OpenMP][OpenACC] Fix exit of a region The stop statement is allowed in OpenMP/OpenACC block region. Reviewed By: kiranchandramohan, shraiysh Differential Revision: https://reviews.llvm.org/D126471 --- flang/lib/Semantics/check-directive-structure.h | 1 - flang/test/Semantics/OpenACC/acc-branch.f90 | 9 +++------ flang/test/Semantics/omp-parallel02.f90 | 1 - flang/test/Semantics/omp-task01.f90 | 1 - 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/flang/lib/Semantics/check-directive-structure.h b/flang/lib/Semantics/check-directive-structure.h index 4dc653071ef8eb..6444e839967dfd 100644 --- a/flang/lib/Semantics/check-directive-structure.h +++ b/flang/lib/Semantics/check-directive-structure.h @@ -58,7 +58,6 @@ template class NoBranchingEnforce { CheckConstructNameBranching("EXIT"); } } - void Post(const parser::StopStmt &) { EmitBranchOutError("STOP"); } void Post(const parser::CycleStmt &cycleStmt) { if (const auto &cycleName{cycleStmt.v}) { CheckConstructNameBranching("CYCLE", cycleName.value()); diff --git a/flang/test/Semantics/OpenACC/acc-branch.f90 b/flang/test/Semantics/OpenACC/acc-branch.f90 index 06c3b4da92921a..ec6280b88d5842 100644 --- a/flang/test/Semantics/OpenACC/acc-branch.f90 +++ b/flang/test/Semantics/OpenACC/acc-branch.f90 @@ -93,8 +93,7 @@ program openacc_clause_validity do i = 1, N a(i) = 3.14 if(i == N-1) THEN - !ERROR: STOP statement is not allowed in a PARALLEL construct - stop 999 + stop 999 ! no error end if end do !$acc end parallel @@ -120,8 +119,7 @@ program openacc_clause_validity do i = 1, N a(i) = 3.14 if(i == N-1) THEN - !ERROR: STOP statement is not allowed in a KERNELS construct - stop 999 + stop 999 ! no error end if end do !$acc end kernels @@ -163,8 +161,7 @@ program openacc_clause_validity do i = 1, N a(i) = 3.14 if(i == N-1) THEN - !ERROR: STOP statement is not allowed in a SERIAL construct - stop 999 + stop 999 ! no error end if end do !$acc end serial diff --git a/flang/test/Semantics/omp-parallel02.f90 b/flang/test/Semantics/omp-parallel02.f90 index 59032e17c6ae0e..eff0e7c70d1a0c 100644 --- a/flang/test/Semantics/omp-parallel02.f90 +++ b/flang/test/Semantics/omp-parallel02.f90 @@ -15,7 +15,6 @@ program omp_parallel do j = 1, 10 print *, "Hello" !CHECK: In the enclosing PARALLEL directive branched into - !CHECK: STOP statement is not allowed in a PARALLEL construct 10 stop end do end do diff --git a/flang/test/Semantics/omp-task01.f90 b/flang/test/Semantics/omp-task01.f90 index 1206a889e8375e..4dc80d6d70e052 100644 --- a/flang/test/Semantics/omp-task01.f90 +++ b/flang/test/Semantics/omp-task01.f90 @@ -17,7 +17,6 @@ recursive subroutine traverse ( P ) !$omp task call traverse(P%left) !CHECK: In the enclosing TASK directive branched into - !CHECK: STOP statement is not allowed in a TASK construct 10 stop !$omp end task endif From 66073306d888acd9b3e8acbbb4bda54ba25bc7e9 Mon Sep 17 00:00:00 2001 From: Peixin-Qiao Date: Sat, 28 May 2022 16:41:14 +0800 Subject: [PATCH 760/908] [flang][OpenMP] Fix pointer variables in atomic read/write For pointer variables, using getSymbolAddress cannot get the coorect address for atomic read/write operands. Use genExprAddr to fix it. Reviewed By: shraiysh, NimishMishra Differential Revision: https://reviews.llvm.org/D125793 --- flang/lib/Lower/OpenMP.cpp | 55 ++++++++---------------- flang/test/Lower/OpenMP/atomic-read.f90 | 32 ++++++++++++++ flang/test/Lower/OpenMP/atomic-write.f90 | 27 ++++++++++++ 3 files changed, 77 insertions(+), 37 deletions(-) diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index b42fb95d50ffee..535ffac82d9e5b 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -841,11 +841,7 @@ genOmpAtomicWrite(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpAtomicWrite &atomicWrite) { auto &firOpBuilder = converter.getFirOpBuilder(); auto currentLocation = converter.getCurrentLocation(); - mlir::Value address; - // If no hint clause is specified, the effect is as if - // hint(omp_sync_hint_none) had been specified. - mlir::IntegerAttr hint = nullptr; - mlir::omp::ClauseMemoryOrderKindAttr memory_order = nullptr; + // Get the value and address of atomic write operands. const Fortran::parser::OmpAtomicClauseList &rightHandClauseList = std::get<2>(atomicWrite.t); const Fortran::parser::OmpAtomicClauseList &leftHandClauseList = @@ -855,16 +851,14 @@ genOmpAtomicWrite(Fortran::lower::AbstractConverter &converter, const auto &assignmentStmtVariable = std::get( std::get<3>(atomicWrite.t).statement.t); Fortran::lower::StatementContext stmtCtx; - auto value = fir::getBase(converter.genExprValue( + mlir::Value value = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(assignmentStmtExpr), stmtCtx)); - if (auto varDesignator = std::get_if< - Fortran::common::Indirection>( - &assignmentStmtVariable.u)) { - if (const auto *name = getDesignatorNameIfDataRef(varDesignator->value())) { - address = converter.getSymbolAddress(*name->symbol); - } - } - + mlir::Value address = fir::getBase(converter.genExprAddr( + *Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx)); + // If no hint clause is specified, the effect is as if + // hint(omp_sync_hint_none) had been specified. + mlir::IntegerAttr hint = nullptr; + mlir::omp::ClauseMemoryOrderKindAttr memory_order = nullptr; genOmpAtomicHintAndMemoryOrderClauses(converter, leftHandClauseList, hint, memory_order); genOmpAtomicHintAndMemoryOrderClauses(converter, rightHandClauseList, hint, @@ -878,12 +872,7 @@ static void genOmpAtomicRead(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpAtomicRead &atomicRead) { auto &firOpBuilder = converter.getFirOpBuilder(); auto currentLocation = converter.getCurrentLocation(); - mlir::Value to_address; - mlir::Value from_address; - // If no hint clause is specified, the effect is as if - // hint(omp_sync_hint_none) had been specified. - mlir::IntegerAttr hint = nullptr; - mlir::omp::ClauseMemoryOrderKindAttr memory_order = nullptr; + // Get the address of atomic read operands. const Fortran::parser::OmpAtomicClauseList &rightHandClauseList = std::get<2>(atomicRead.t); const Fortran::parser::OmpAtomicClauseList &leftHandClauseList = @@ -892,23 +881,15 @@ static void genOmpAtomicRead(Fortran::lower::AbstractConverter &converter, std::get(std::get<3>(atomicRead.t).statement.t); const auto &assignmentStmtVariable = std::get( std::get<3>(atomicRead.t).statement.t); - if (auto exprDesignator = std::get_if< - Fortran::common::Indirection>( - &assignmentStmtExpr.u)) { - if (const auto *name = - getDesignatorNameIfDataRef(exprDesignator->value())) { - from_address = converter.getSymbolAddress(*name->symbol); - } - } - - if (auto varDesignator = std::get_if< - Fortran::common::Indirection>( - &assignmentStmtVariable.u)) { - if (const auto *name = getDesignatorNameIfDataRef(varDesignator->value())) { - to_address = converter.getSymbolAddress(*name->symbol); - } - } - + Fortran::lower::StatementContext stmtCtx; + mlir::Value from_address = fir::getBase(converter.genExprAddr( + *Fortran::semantics::GetExpr(assignmentStmtExpr), stmtCtx)); + mlir::Value to_address = fir::getBase(converter.genExprAddr( + *Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx)); + // If no hint clause is specified, the effect is as if + // hint(omp_sync_hint_none) had been specified. + mlir::IntegerAttr hint = nullptr; + mlir::omp::ClauseMemoryOrderKindAttr memory_order = nullptr; genOmpAtomicHintAndMemoryOrderClauses(converter, leftHandClauseList, hint, memory_order); genOmpAtomicHintAndMemoryOrderClauses(converter, rightHandClauseList, hint, diff --git a/flang/test/Lower/OpenMP/atomic-read.f90 b/flang/test/Lower/OpenMP/atomic-read.f90 index 36ad61ff0e9015..0f8a042be40aeb 100644 --- a/flang/test/Lower/OpenMP/atomic-read.f90 +++ b/flang/test/Lower/OpenMP/atomic-read.f90 @@ -44,3 +44,35 @@ program OmpAtomic g = h end program OmpAtomic +! Test lowering atomic read for pointer variables. +! Please notice to use %[[VAL_4]] and %[[VAL_1]] for operands of atomic +! operation, instead of %[[VAL_3]] and %[[VAL_0]]. + +!CHECK-LABEL: func.func @_QPatomic_read_pointer() { +!CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box> {bindc_name = "x", uniq_name = "_QFatomic_read_pointerEx"} +!CHECK: %[[VAL_1:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFatomic_read_pointerEx.addr"} +!CHECK: %[[VAL_2:.*]] = fir.zero_bits !fir.ptr +!CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref> +!CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box> {bindc_name = "y", uniq_name = "_QFatomic_read_pointerEy"} +!CHECK: %[[VAL_4:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFatomic_read_pointerEy.addr"} +!CHECK: %[[VAL_5:.*]] = fir.zero_bits !fir.ptr +!CHECK: fir.store %[[VAL_5]] to %[[VAL_4]] : !fir.ref> +!CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_1]] : !fir.ref> +!CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_4]] : !fir.ref> +!CHECK: omp.atomic.read %[[VAL_7]] = %[[VAL_6]] : !fir.ptr +!CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_4]] : !fir.ref> +!CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_8]] : !fir.ptr +!CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_1]] : !fir.ref> +!CHECK: fir.store %[[VAL_9]] to %[[VAL_10]] : !fir.ptr +!CHECK: return +!CHECK: } + +subroutine atomic_read_pointer() + integer, pointer :: x, y + + !$omp atomic read + y = x + + x = y +end + diff --git a/flang/test/Lower/OpenMP/atomic-write.f90 b/flang/test/Lower/OpenMP/atomic-write.f90 index 1a9c264403563d..ed8209092ec262 100644 --- a/flang/test/Lower/OpenMP/atomic-write.f90 +++ b/flang/test/Lower/OpenMP/atomic-write.f90 @@ -36,3 +36,30 @@ program OmpAtomicWrite y = 10*x + z/2 end program OmpAtomicWrite +! Test lowering atomic read for pointer variables. +! Please notice to use %[[VAL_1]] for operands of atomic operation, instead +! of %[[VAL_0]]. + +!CHECK-LABEL: func.func @_QPatomic_write_pointer() { +!CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box> {bindc_name = "x", uniq_name = "_QFatomic_write_pointerEx"} +!CHECK: %[[VAL_1:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFatomic_write_pointerEx.addr"} +!CHECK: %[[VAL_2:.*]] = fir.zero_bits !fir.ptr +!CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref> +!CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 +!CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_1]] : !fir.ref> +!CHECK: omp.atomic.write %[[VAL_4]] = %[[VAL_3]] : !fir.ptr, i32 +!CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 +!CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_1]] : !fir.ref> +!CHECK: fir.store %[[VAL_5]] to %[[VAL_6]] : !fir.ptr +!CHECK: return +!CHECK: } + +subroutine atomic_write_pointer() + integer, pointer :: x + + !$omp atomic write + x = 1 + + x = 2 +end + From b86771a2f780accf327ecfe163a3bf0f4814b628 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Sat, 22 May 2021 06:39:33 -0700 Subject: [PATCH 761/908] [libc++] Minor emscripten changes from downstream Differential Revision: https://reviews.llvm.org/D126583 --- libcxx/include/__config | 5 +++-- libcxx/include/__locale | 3 +-- libcxx/include/locale | 2 +- libcxx/src/include/config_elast.h | 2 ++ 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 62b367167637f9..14e6850ebdc108 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -353,7 +353,7 @@ #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || \ defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) # define _LIBCPP_USING_ARC4_RANDOM -#elif defined(__wasi__) +#elif defined(__wasi__) || defined(__EMSCRIPTEN__) # define _LIBCPP_USING_GETENTROPY #elif defined(__Fuchsia__) # define _LIBCPP_USING_FUCHSIA_CPRNG @@ -1028,7 +1028,8 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container( defined(__APPLE__) || \ defined(__sun__) || \ defined(__MVS__) || \ - defined(_AIX) + defined(_AIX) || \ + defined(__EMSCRIPTEN__) # define _LIBCPP_HAS_THREAD_API_PTHREAD # elif defined(__Fuchsia__) // TODO(44575): Switch to C11 thread API when possible. diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 9fd707c7651caf..5198178aa758e9 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -33,8 +33,7 @@ # include <__support/newlib/xlocale.h> #elif defined(__OpenBSD__) # include <__support/openbsd/xlocale.h> -#elif (defined(__APPLE__) || defined(__FreeBSD__) \ - || defined(__EMSCRIPTEN__) || defined(__IBMCPP__)) +#elif (defined(__APPLE__) || defined(__FreeBSD__) || defined(__IBMCPP__)) # include #elif defined(__Fuchsia__) # include <__support/fuchsia/xlocale.h> diff --git a/libcxx/include/locale b/libcxx/include/locale index 879f4d9820c965..3bed47c071e5b0 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -211,7 +211,7 @@ template class messages_byname; #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) // Most unix variants have catopen. These are the specific ones that don't. -# if !defined(__BIONIC__) && !defined(_NEWLIB_VERSION) +# if !defined(__BIONIC__) && !defined(_NEWLIB_VERSION) && !defined(__EMSCRIPTEN__) # define _LIBCPP_HAS_CATOPEN 1 # include # endif diff --git a/libcxx/src/include/config_elast.h b/libcxx/src/include/config_elast.h index 0ed53a3b20d12c..bef26ec5019ecc 100644 --- a/libcxx/src/include/config_elast.h +++ b/libcxx/src/include/config_elast.h @@ -29,6 +29,8 @@ // No _LIBCPP_ELAST needed on Fuchsia #elif defined(__wasi__) // No _LIBCPP_ELAST needed on WASI +#elif defined(__EMSCRIPTEN__) +// No _LIBCPP_ELAST needed on Emscripten #elif defined(__linux__) || defined(_LIBCPP_HAS_MUSL_LIBC) #define _LIBCPP_ELAST 4095 #elif defined(__APPLE__) From 0a2d2eed4321daa5e43698e0d379c583d151d27a Mon Sep 17 00:00:00 2001 From: Yuki Okushi Date: Fri, 27 May 2022 18:03:30 +0900 Subject: [PATCH 762/908] [docs] Update the label name for new contributors The `beginner` label is deprecated and the `good first issue` label is now preferred. Differential Revision: https://reviews.llvm.org/D126526 --- llvm/docs/Contributing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/docs/Contributing.rst b/llvm/docs/Contributing.rst index 65b53c686b6662..c0f82e526104ec 100644 --- a/llvm/docs/Contributing.rst +++ b/llvm/docs/Contributing.rst @@ -26,7 +26,7 @@ about it. Please let us know and follow the instructions in Bug Fixes --------- If you are interested in contributing code to LLVM, bugs labeled with the -`beginner`_ keyword in the `bug tracker`_ are a good way to get familiar with +`good first issue`_ keyword in the `bug tracker`_ are a good way to get familiar with the code base. If you are interested in fixing a bug please comment on it to let people know you are working on it. @@ -148,7 +148,7 @@ of LLVM's high-level design, as well as its internals: .. _Forum: https://discourse.llvm.org .. _irc.oftc.net: irc://irc.oftc.net/llvm -.. _beginner: https://github.com/llvm/llvm-project/issues?q=is%3Aopen+is%3Aissue+label%3Abeginner +.. _good first issue: https://github.com/llvm/llvm-project/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22 .. _bug tracker: https://github.com/llvm/llvm-project/issues .. _clang-format-diff.py: https://reviews.llvm.org/source/llvm-github/browse/main/clang/tools/clang-format/clang-format-diff.py .. _git-clang-format: https://reviews.llvm.org/source/llvm-github/browse/main/clang/tools/clang-format/git-clang-format From 73506256bff646a2283121b8497750ec74a016cd Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Tue, 24 May 2022 15:06:12 -0700 Subject: [PATCH 763/908] [flang] Avoid spurious warnings from reading module files When processing the literal constants of the various kinds of INTEGER that are too large by 1 (e.g., 2147483648_4) in expression analysis, emit a portability warning rather than a fatal error if the literal constant appears as the operand to a unary minus, since the folded result will be in range. And don't emit any warning if the negated literal is coming from a module file -- f18 wrote the module file and the warning would simply be confusing, especially to the programmer that wrote (-2147483647_4-1) in the first place. Further, emit portability warnings for the canonical expressions for infinities and NaN (-1./0., 0./0., & 1./0.), but not when they appear in a module file, for the same reason. The Fortran language has no syntax for these special values so we have to emit expressions that fold to them. Fixes LLVM bugs https://github.com/llvm/llvm-project/issues/55086 and https://github.com/llvm/llvm-project/issues/55081. Differential Revision: https://reviews.llvm.org/D126584 --- flang/docs/Extensions.md | 5 +++ flang/include/flang/Evaluate/common.h | 6 ++++ flang/include/flang/Semantics/expression.h | 5 +-- flang/include/flang/Semantics/semantics.h | 2 ++ flang/lib/Evaluate/fold-implementation.h | 17 ++++++++- flang/lib/Semantics/expression.cpp | 41 +++++++++++++++++----- flang/lib/Semantics/mod-file.cpp | 4 +++ flang/lib/Semantics/semantics.cpp | 10 ++++++ flang/test/Semantics/dosemantics03.f90 | 6 ++-- 9 files changed, 81 insertions(+), 15 deletions(-) diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index 568a222bde72d6..e8381b319f56f0 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -74,6 +74,11 @@ end `CFI_section`, `CFI_setpointer` or `CFI_allocate`, the lower bound on that dimension will be set to 1 for consistency with the `LBOUND()` intrinsic function. +* `-2147483648_4` is, strictly speaking, a non-conforming literal + constant on a machine with 32-bit two's-complement integers as + kind 4, because the grammar of Fortran expressions parses it as a + negation of a literal constant, not a negative literal constant. + This compiler accepts it with a portability warning. ## Extensions, deletions, and legacy features supported by default diff --git a/flang/include/flang/Evaluate/common.h b/flang/include/flang/Evaluate/common.h index 4feb823045d1ff..e3c07056addf10 100644 --- a/flang/include/flang/Evaluate/common.h +++ b/flang/include/flang/Evaluate/common.h @@ -259,6 +259,11 @@ class FoldingContext { std::size_t maxAlignment() const { return maxAlignment_; } const semantics::DerivedTypeSpec *pdtInstance() const { return pdtInstance_; } const IntrinsicProcTable &intrinsics() const { return intrinsics_; } + bool inModuleFile() const { return inModuleFile_; } + FoldingContext &set_inModuleFile(bool yes = true) { + inModuleFile_ = yes; + return *this; + } ConstantSubscript &StartImpliedDo(parser::CharBlock, ConstantSubscript = 1); std::optional GetImpliedDo(parser::CharBlock) const; @@ -282,6 +287,7 @@ class FoldingContext { static constexpr bool bigEndian_{false}; // TODO: configure for target static constexpr std::size_t maxAlignment_{8}; // TODO: configure for target const semantics::DerivedTypeSpec *pdtInstance_{nullptr}; + bool inModuleFile_{false}; std::map impliedDos_; }; diff --git a/flang/include/flang/Semantics/expression.h b/flang/include/flang/Semantics/expression.h index 1cf5506748b1c9..e6a88ec6f16ea8 100644 --- a/flang/include/flang/Semantics/expression.h +++ b/flang/include/flang/Semantics/expression.h @@ -255,7 +255,7 @@ class ExpressionAnalyzer { int IntegerTypeSpecKind(const parser::IntegerTypeSpec &); private: - MaybeExpr Analyze(const parser::IntLiteralConstant &); + MaybeExpr Analyze(const parser::IntLiteralConstant &, bool negated = false); MaybeExpr Analyze(const parser::RealLiteralConstant &); MaybeExpr Analyze(const parser::ComplexPart &); MaybeExpr Analyze(const parser::ComplexLiteralConstant &); @@ -308,7 +308,8 @@ class ExpressionAnalyzer { const std::optional &, int defaultKind); template MaybeExpr ExprOrVariable(const PARSED &, parser::CharBlock source); - template MaybeExpr IntLiteralConstant(const PARSED &); + template + MaybeExpr IntLiteralConstant(const PARSED &, bool negated = false); MaybeExpr AnalyzeString(std::string &&, int kind); std::optional> AsSubscript(MaybeExpr &&); std::optional> TripletPart( diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index bb96099dee4d08..cc57577f53c74d 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -170,6 +170,8 @@ class SemanticsContext { const Scope &FindScope(parser::CharBlock) const; Scope &FindScope(parser::CharBlock); + bool IsInModuleFile(parser::CharBlock) const; + const ConstructStack &constructStack() const { return constructStack_; } template void PushConstruct(const N &node) { constructStack_.emplace_back(&node); diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h index 04295d31c619ae..97abfc688d6398 100644 --- a/flang/lib/Evaluate/fold-implementation.h +++ b/flang/lib/Evaluate/fold-implementation.h @@ -1823,7 +1823,22 @@ Expr FoldOperation(FoldingContext &context, Divide &&x) { return Expr{Constant{quotAndRem.quotient}}; } else { auto quotient{folded->first.Divide(folded->second, context.rounding())}; - RealFlagWarnings(context, quotient.flags, "division"); + // Don't warn about -1./0., 0./0., or 1./0. from a module file + // they are interpreted as canonical Fortran representations of -Inf, + // NaN, and Inf respectively. + bool isCanonicalNaNOrInf{false}; + if constexpr (T::category == TypeCategory::Real) { + if (folded->second.IsZero() && context.inModuleFile()) { + using IntType = typename T::Scalar::Word; + auto intNumerator{folded->first.template ToInteger()}; + isCanonicalNaNOrInf = intNumerator.flags == RealFlags{} && + intNumerator.value >= IntType{-1} && + intNumerator.value <= IntType{1}; + } + } + if (!isCanonicalNaNOrInf) { + RealFlagWarnings(context, quotient.flags, "division"); + } if (context.flushSubnormalsToZero()) { quotient.value = quotient.value.FlushSubnormalToZero(); } diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 4a100e420e3450..be295408279bce 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -423,8 +423,21 @@ struct IntTypeVisitor { template Result Test() { if (T::kind >= kind) { const char *p{digits.begin()}; - auto value{T::Scalar::Read(p, 10, true /*signed*/)}; - if (!value.overflow) { + using Int = typename T::Scalar; + typename Int::ValueWithOverflow num{0, false}; + if (isNegated) { + auto unsignedNum{Int::Read(p, 10, false /*unsigned*/)}; + num.value = unsignedNum.value.Negate().value; + num.overflow = unsignedNum.overflow || num.value > Int{0}; + if (!num.overflow && num.value.Negate().overflow && + !analyzer.context().IsInModuleFile(digits)) { + analyzer.Say(digits, + "negated maximum INTEGER(KIND=%d) literal"_port_en_US, T::kind); + } + } else { + num = Int::Read(p, 10, true /*signed*/); + } + if (!num.overflow) { if (T::kind > kind) { if (!isDefaultKind || !analyzer.context().IsEnabled(LanguageFeature::BigIntLiterals)) { @@ -438,7 +451,7 @@ struct IntTypeVisitor { } } return Expr{ - Expr{Expr{Constant{std::move(value.value)}}}}; + Expr{Expr{Constant{std::move(num.value)}}}}; } } return std::nullopt; @@ -447,17 +460,19 @@ struct IntTypeVisitor { parser::CharBlock digits; int kind; bool isDefaultKind; + bool isNegated; }; template -MaybeExpr ExpressionAnalyzer::IntLiteralConstant(const PARSED &x) { +MaybeExpr ExpressionAnalyzer::IntLiteralConstant( + const PARSED &x, bool isNegated) { const auto &kindParam{std::get>(x.t)}; bool isDefaultKind{!kindParam}; int kind{AnalyzeKindParam(kindParam, GetDefaultKind(TypeCategory::Integer))}; if (CheckIntrinsicKind(TypeCategory::Integer, kind)) { auto digits{std::get(x.t)}; if (MaybeExpr result{common::SearchTypes( - IntTypeVisitor{*this, digits, kind, isDefaultKind})}) { + IntTypeVisitor{*this, digits, kind, isDefaultKind, isNegated})}) { return result; } else if (isDefaultKind) { Say(digits, @@ -471,10 +486,11 @@ MaybeExpr ExpressionAnalyzer::IntLiteralConstant(const PARSED &x) { return std::nullopt; } -MaybeExpr ExpressionAnalyzer::Analyze(const parser::IntLiteralConstant &x) { +MaybeExpr ExpressionAnalyzer::Analyze( + const parser::IntLiteralConstant &x, bool isNegated) { auto restorer{ GetContextualMessages().SetLocation(std::get(x.t))}; - return IntLiteralConstant(x); + return IntLiteralConstant(x, isNegated); } MaybeExpr ExpressionAnalyzer::Analyze( @@ -2595,6 +2611,13 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::Expr::UnaryPlus &x) { } MaybeExpr ExpressionAnalyzer::Analyze(const parser::Expr::Negate &x) { + if (const auto *litConst{ + std::get_if(&x.v.value().u)}) { + if (const auto *intConst{ + std::get_if(&litConst->u)}) { + return Analyze(*intConst, true); + } + } return NumericUnaryHelper(*this, NumericOperator::Subtract, x); } @@ -3462,10 +3485,10 @@ bool ArgumentAnalyzer::OkLogicalIntegerAssignment( std::optional msg; if (lhs == TypeCategory::Integer && rhs == TypeCategory::Logical) { // allow assignment to LOGICAL from INTEGER as a legacy extension - msg = "nonstandard usage: assignment of LOGICAL to INTEGER"_port_en_US; + msg = "assignment of LOGICAL to INTEGER"_port_en_US; } else if (lhs == TypeCategory::Logical && rhs == TypeCategory::Integer) { // ... and assignment to LOGICAL from INTEGER - msg = "nonstandard usage: assignment of INTEGER to LOGICAL"_port_en_US; + msg = "assignment of INTEGER to LOGICAL"_port_en_US; } else { return false; } diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 466a49f9ab4d70..2ba8918f884731 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -1018,9 +1018,13 @@ Scope *ModFileReader::Read(const SourceName &name, if (!pair.second) { return nullptr; } + // Process declarations from the module file Symbol &modSymbol{*pair.first->second}; modSymbol.set(Symbol::Flag::ModFile); + bool wasInModuleFile{context_.foldingContext().inModuleFile()}; + context_.foldingContext().set_inModuleFile(true); ResolveNames(context_, parseTree, topScope); + context_.foldingContext().set_inModuleFile(wasInModuleFile); CHECK(modSymbol.has()); CHECK(modSymbol.test(Symbol::Flag::ModFile)); if (isIntrinsic.value_or(false)) { diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index c409bb59ee0619..762877d671dab6 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -356,6 +356,16 @@ Scope &SemanticsContext::FindScope(parser::CharBlock source) { } } +bool SemanticsContext::IsInModuleFile(parser::CharBlock source) const { + for (const Scope *scope{&FindScope(source)}; !scope->IsGlobal(); + scope = &scope->parent()) { + if (scope->IsModuleFile()) { + return true; + } + } + return false; +} + void SemanticsContext::PopConstruct() { CHECK(!constructStack_.empty()); constructStack_.pop_back(); diff --git a/flang/test/Semantics/dosemantics03.f90 b/flang/test/Semantics/dosemantics03.f90 index 0d2664062cbb07..89e46a6ca516a8 100644 --- a/flang/test/Semantics/dosemantics03.f90 +++ b/flang/test/Semantics/dosemantics03.f90 @@ -215,7 +215,7 @@ END FUNCTION ifunc ! Invalid initial expression !ERROR: Integer literal is too large for INTEGER(KIND=4) - DO ivar = -2147483648_4, 10, 3 + DO ivar = -2147483649_4, 10, 3 PRINT *, "ivar is: ", ivar END DO @@ -257,7 +257,7 @@ END FUNCTION ifunc ! Invalid final expression !ERROR: Integer literal is too large for INTEGER(KIND=4) - DO ivar = 1, -2147483648_4, 3 + DO ivar = 1, -2147483649_4, 3 PRINT *, "ivar is: ", ivar END DO @@ -299,7 +299,7 @@ END FUNCTION ifunc ! Invalid step expression !ERROR: Integer literal is too large for INTEGER(KIND=4) - DO ivar = 1, 10, -2147483648_4 + DO ivar = 1, 10, -2147483649_4 PRINT *, "ivar is: ", ivar END DO From bc08a16d828bd589a4ec832200e97e6c96a6ef41 Mon Sep 17 00:00:00 2001 From: Yuki Okushi Date: Fri, 27 May 2022 19:21:52 +0900 Subject: [PATCH 764/908] Remove `deplibs` keyword completely D102763 removed the almost support of `deplibs` but it seems `kw_deplibs` was missed. This patch removes it. Differential Revision: https://reviews.llvm.org/D126527 --- llvm/include/llvm/AsmParser/LLToken.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 715401f126be8e..11a7a6768e91c2 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -88,7 +88,6 @@ enum Kind { kw_triple, kw_source_filename, kw_unwind, - kw_deplibs, // FIXME: Remove in 4.0 kw_datalayout, kw_volatile, kw_atomic, From 3142c761da974e6f963cebca34a7880392203347 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 26 May 2022 11:01:19 -0700 Subject: [PATCH 765/908] [nfc][flang] Fix spelling errors and usage in an error message Differential Revision: https://reviews.llvm.org/D126490 --- flang/lib/Semantics/check-call.cpp | 2 +- flang/test/Semantics/call02.f90 | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 19edc232fede93..10d178b312c72f 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -865,7 +865,7 @@ static bool CheckElementalConformance(parser::ContextualMessages &messages, dummy.GetIntent() == common::Intent::InOut) && hasArrayArg) { messages.Say( - "In an elemental procedure with at least one array arugment, actual argument %s that corresponds to an INTENT(OUT) or INTENT(INOUT) dummay argument must be an array"_err_en_US, + "In an elemental procedure reference with at least one array argument, actual argument %s that corresponds to an INTENT(OUT) or INTENT(INOUT) dummy argument must be an array"_err_en_US, expr->AsFortran()); } } diff --git a/flang/test/Semantics/call02.f90 b/flang/test/Semantics/call02.f90 index db10472f1a4e5a..8ef10415be80ff 100644 --- a/flang/test/Semantics/call02.f90 +++ b/flang/test/Semantics/call02.f90 @@ -126,9 +126,8 @@ subroutine p04 subroutine p05 integer :: a1(2), a2, a3 - - !ERROR: In an elemental procedure with at least one array arugment, actual argument a2 that corresponds to an INTENT(OUT) or INTENT(INOUT) dummay argument must be an array - !ERROR: In an elemental procedure with at least one array arugment, actual argument a3 that corresponds to an INTENT(OUT) or INTENT(INOUT) dummay argument must be an array + !ERROR: In an elemental procedure reference with at least one array argument, actual argument a2 that corresponds to an INTENT(OUT) or INTENT(INOUT) dummy argument must be an array + !ERROR: In an elemental procedure reference with at least one array argument, actual argument a3 that corresponds to an INTENT(OUT) or INTENT(INOUT) dummy argument must be an array call s1(a1, a2, a3) contains elemental subroutine s1(a, b, c) @@ -139,4 +138,3 @@ elemental subroutine s1(a, b, c) c = a end end - From 0c190575ebfc81e117d95e2eca25789610192125 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 26 May 2022 16:56:27 -0700 Subject: [PATCH 766/908] [flang] Make generic resolution conform to 15.5.5.2 w/r/t host association When two or more generic interfaces are available by declaration or by USE association at different scoping levels, we need to search the outer generic interfaces as well as the inner ones, but only after the inner ones have failed to produce a specific procedure that matches a given set of actual arguments. This means that it is possible for a specific procedure of a generic interface of an inner scope to override a conflicting specific procedure of a generic interface of an outer scope. Also cope with forward references to derived types when a generic interface is also in scope. Fixes LLVM bug https://github.com/llvm/llvm-project/issues/55240 and LLVM bug https://github.com/llvm/llvm-project/issues/55300. Differential Revision: https://reviews.llvm.org/D126587 --- flang/include/flang/Semantics/expression.h | 4 +- flang/include/flang/Semantics/tools.h | 5 + flang/lib/Semantics/expression.cpp | 159 ++++++++++++--------- flang/lib/Semantics/resolve-names.cpp | 99 ++++++------- flang/lib/Semantics/tools.cpp | 2 +- flang/test/Semantics/generic01.f90 | 84 +++++++++++ flang/test/Semantics/resolve22.f90 | 15 ++ 7 files changed, 247 insertions(+), 121 deletions(-) create mode 100644 flang/test/Semantics/generic01.f90 diff --git a/flang/include/flang/Semantics/expression.h b/flang/include/flang/Semantics/expression.h index e6a88ec6f16ea8..6c0d385a266bc9 100644 --- a/flang/include/flang/Semantics/expression.h +++ b/flang/include/flang/Semantics/expression.h @@ -336,7 +336,7 @@ class ExpressionAnalyzer { }; std::optional AnalyzeProcedureComponentRef( - const parser::ProcComponentRef &, ActualArguments &&); + const parser::ProcComponentRef &, ActualArguments &&, bool isSubroutine); std::optional CheckCall( parser::CharBlock, const ProcedureDesignator &, ActualArguments &); using AdjustActuals = @@ -344,7 +344,7 @@ class ExpressionAnalyzer { bool ResolveForward(const Symbol &); std::pair ResolveGeneric(const Symbol &, const ActualArguments &, const AdjustActuals &, - bool mightBeStructureConstructor = false); + bool isSubroutine, bool mightBeStructureConstructor = false); void EmitGenericResolutionError(const Symbol &, bool dueToNullActuals); const Symbol &AccessSpecific( const Symbol &originalGeneric, const Symbol &specific); diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 27c9f36727d919..58e789ba85ab7a 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -614,5 +614,10 @@ bool HasDefinedIo( const Symbol *FindUnsafeIoDirectComponent( GenericKind::DefinedIo, const DerivedTypeSpec &, const Scope * = nullptr); +// Some intrinsic operators have more than one name (e.g. `operator(.eq.)` and +// `operator(==)`). GetAllNames() returns them all, including symbolName. +std::forward_list GetAllNames( + const SemanticsContext &, const SourceName &); + } // namespace Fortran::semantics #endif // FORTRAN_SEMANTICS_TOOLS_H_ diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index be295408279bce..b2330db5ef5d66 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -9,6 +9,7 @@ #include "flang/Semantics/expression.h" #include "check-call.h" #include "pointer-assignment.h" +#include "resolve-names-utils.h" #include "resolve-names.h" #include "flang/Common/Fortran.h" #include "flang/Common/idioms.h" @@ -1911,8 +1912,8 @@ static const Symbol *GetBindingResolution( } auto ExpressionAnalyzer::AnalyzeProcedureComponentRef( - const parser::ProcComponentRef &pcr, ActualArguments &&arguments) - -> std::optional { + const parser::ProcComponentRef &pcr, ActualArguments &&arguments, + bool isSubroutine) -> std::optional { const parser::StructureComponent &sc{pcr.v.thing}; if (MaybeExpr base{Analyze(sc.base)}) { if (const Symbol * sym{sc.component.symbol}) { @@ -1935,7 +1936,7 @@ auto ExpressionAnalyzer::AnalyzeProcedureComponentRef( } return true; }}; - auto pair{ResolveGeneric(*sym, arguments, adjustment)}; + auto pair{ResolveGeneric(*sym, arguments, adjustment, isSubroutine)}; sym = pair.first; if (sym) { // re-resolve the name to the specific binding @@ -2060,67 +2061,94 @@ bool ExpressionAnalyzer::ResolveForward(const Symbol &symbol) { // adjustActuals is called on procedure bindings to handle pass arg. std::pair ExpressionAnalyzer::ResolveGeneric( const Symbol &symbol, const ActualArguments &actuals, - const AdjustActuals &adjustActuals, bool mightBeStructureConstructor) { + const AdjustActuals &adjustActuals, bool isSubroutine, + bool mightBeStructureConstructor) { const Symbol *elemental{nullptr}; // matching elemental specific proc const Symbol *nonElemental{nullptr}; // matching non-elemental specific - const auto &details{symbol.GetUltimate().get()}; - bool anyBareNullActual{ - std::find_if(actuals.begin(), actuals.end(), [](auto iter) { - return IsBareNullPointer(iter->UnwrapExpr()); - }) != actuals.end()}; - for (const Symbol &specific : details.specificProcs()) { - if (!ResolveForward(specific)) { - continue; - } - if (std::optional procedure{ - characteristics::Procedure::Characterize( - ProcedureDesignator{specific}, context_.foldingContext())}) { - ActualArguments localActuals{actuals}; - if (specific.has()) { - if (!adjustActuals.value()(specific, localActuals)) { - continue; - } - } - if (semantics::CheckInterfaceForGeneric(*procedure, localActuals, - GetFoldingContext(), false /* no integer conversions */) && - CheckCompatibleArguments(*procedure, localActuals)) { - if ((procedure->IsElemental() && elemental) || - (!procedure->IsElemental() && nonElemental)) { - // 16.9.144(6): a bare NULL() is not allowed as an actual - // argument to a generic procedure if the specific procedure - // cannot be unambiguously distinguished - return {nullptr, true /* due to NULL actuals */}; + const Symbol &ultimate{symbol.GetUltimate()}; + // Check for a match with an explicit INTRINSIC + if (ultimate.attrs().test(semantics::Attr::INTRINSIC)) { + parser::Messages buffer; + auto restorer{foldingContext_.messages().SetMessages(buffer)}; + ActualArguments localActuals{actuals}; + if (context_.intrinsics().Probe( + CallCharacteristics{ultimate.name().ToString(), isSubroutine}, + localActuals, foldingContext_) && + !buffer.AnyFatalError()) { + return {&ultimate, false}; + } + } + if (const auto *details{ultimate.detailsIf()}) { + bool anyBareNullActual{ + std::find_if(actuals.begin(), actuals.end(), [](auto iter) { + return IsBareNullPointer(iter->UnwrapExpr()); + }) != actuals.end()}; + for (const Symbol &specific : details->specificProcs()) { + if (!ResolveForward(specific)) { + continue; + } + if (std::optional procedure{ + characteristics::Procedure::Characterize( + ProcedureDesignator{specific}, context_.foldingContext())}) { + ActualArguments localActuals{actuals}; + if (specific.has()) { + if (!adjustActuals.value()(specific, localActuals)) { + continue; + } } - if (!procedure->IsElemental()) { - // takes priority over elemental match - nonElemental = &specific; - if (!anyBareNullActual) { - break; // unambiguous case + if (semantics::CheckInterfaceForGeneric(*procedure, localActuals, + GetFoldingContext(), false /* no integer conversions */) && + CheckCompatibleArguments(*procedure, localActuals)) { + if ((procedure->IsElemental() && elemental) || + (!procedure->IsElemental() && nonElemental)) { + // 16.9.144(6): a bare NULL() is not allowed as an actual + // argument to a generic procedure if the specific procedure + // cannot be unambiguously distinguished + return {nullptr, true /* due to NULL actuals */}; + } + if (!procedure->IsElemental()) { + // takes priority over elemental match + nonElemental = &specific; + if (!anyBareNullActual) { + break; // unambiguous case + } + } else { + elemental = &specific; } - } else { - elemental = &specific; } } } - } - if (nonElemental) { - return {&AccessSpecific(symbol, *nonElemental), false}; - } else if (elemental) { - return {&AccessSpecific(symbol, *elemental), false}; - } - // Check parent derived type - if (const auto *parentScope{symbol.owner().GetDerivedTypeParent()}) { - if (const Symbol * extended{parentScope->FindComponent(symbol.name())}) { - if (extended->GetUltimate().has()) { - auto pair{ResolveGeneric(*extended, actuals, adjustActuals, false)}; + if (nonElemental) { + return {&AccessSpecific(symbol, *nonElemental), false}; + } else if (elemental) { + return {&AccessSpecific(symbol, *elemental), false}; + } + // Check parent derived type + if (const auto *parentScope{symbol.owner().GetDerivedTypeParent()}) { + if (const Symbol * extended{parentScope->FindComponent(symbol.name())}) { + auto pair{ResolveGeneric( + *extended, actuals, adjustActuals, isSubroutine, false)}; if (pair.first) { return pair; } } } + if (mightBeStructureConstructor && details->derivedType()) { + return {details->derivedType(), false}; + } } - if (mightBeStructureConstructor && details.derivedType()) { - return {details.derivedType(), false}; + // Check for generic or explicit INTRINSIC of the same name in outer scopes. + // See 15.5.5.2 for details. + if (!symbol.owner().IsGlobal() && !symbol.owner().IsDerivedType()) { + for (const std::string &n : GetAllNames(context_, symbol.name())) { + if (const Symbol * outer{symbol.owner().parent().FindSymbol(n)}) { + auto pair{ResolveGeneric(*outer, actuals, adjustActuals, isSubroutine, + mightBeStructureConstructor)}; + if (pair.first) { + return pair; + } + } + } } return {nullptr, false}; } @@ -2179,7 +2207,8 @@ auto ExpressionAnalyzer::GetCalleeAndArguments( isSubroutine, mightBeStructureConstructor); }, [&](const parser::ProcComponentRef &pcr) { - return AnalyzeProcedureComponentRef(pcr, std::move(arguments)); + return AnalyzeProcedureComponentRef( + pcr, std::move(arguments), isSubroutine); }, }, pd.u); @@ -2196,28 +2225,26 @@ auto ExpressionAnalyzer::GetCalleeAndArguments(const parser::Name &name, CheckForBadRecursion(name.source, ultimate); bool dueToNullActual{false}; bool isGenericInterface{ultimate.has()}; + bool isExplicitIntrinsic{ultimate.attrs().test(semantics::Attr::INTRINSIC)}; const Symbol *resolution{nullptr}; - if (isGenericInterface) { + if (isGenericInterface || isExplicitIntrinsic) { ExpressionAnalyzer::AdjustActuals noAdjustment; - auto pair{ResolveGeneric( - *symbol, arguments, noAdjustment, mightBeStructureConstructor)}; + auto pair{ResolveGeneric(*symbol, arguments, noAdjustment, isSubroutine, + mightBeStructureConstructor)}; resolution = pair.first; dueToNullActual = pair.second; if (resolution) { // re-resolve name to the specific procedure name.symbol = const_cast(resolution); } + } else { + resolution = symbol; } - if (!resolution) { + if (!resolution || resolution->attrs().test(semantics::Attr::INTRINSIC)) { // Not generic, or no resolution; may be intrinsic - bool isIntrinsic{symbol->attrs().test(semantics::Attr::INTRINSIC)}; - if (!isIntrinsic && !isGenericInterface) { - resolution = symbol; - } else if (std::optional specificCall{ - context_.intrinsics().Probe( - CallCharacteristics{ - ultimate.name().ToString(), isSubroutine}, - arguments, GetFoldingContext())}) { + if (std::optional specificCall{context_.intrinsics().Probe( + CallCharacteristics{ultimate.name().ToString(), isSubroutine}, + arguments, GetFoldingContext())}) { CheckBadExplicitType(*specificCall, *symbol); return CalleeAndArguments{ ProcedureDesignator{std::move(specificCall->specificIntrinsic)}, @@ -3507,7 +3534,7 @@ std::optional ArgumentAnalyzer::GetDefinedAssignmentProc() { const auto &scope{context_.context().FindScope(source_)}; if (const Symbol * symbol{scope.FindSymbol(oprName)}) { ExpressionAnalyzer::AdjustActuals noAdjustment; - auto pair{context_.ResolveGeneric(*symbol, actuals_, noAdjustment)}; + auto pair{context_.ResolveGeneric(*symbol, actuals_, noAdjustment, true)}; if (pair.first) { proc = pair.first; } else { @@ -3615,7 +3642,7 @@ const Symbol *ArgumentAnalyzer::FindBoundOp( [&](const Symbol &proc, ActualArguments &) { return passIndex == GetPassIndex(proc); }}; - auto pair{context_.ResolveGeneric(*symbol, actuals_, adjustment)}; + auto pair{context_.ResolveGeneric(*symbol, actuals_, adjustment, false)}; if (!pair.first) { context_.EmitGenericResolutionError(*symbol, pair.second); } diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index f03d3fa24b990f..4c34392ecdc848 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -593,6 +593,10 @@ class ScopeHandler : public ImplicitRulesVisitor { derivedType = &currScope().MakeSymbol(name, attrs, std::move(details)); d->set_derivedType(*derivedType); + } else if (derivedType->CanReplaceDetails(details)) { + // was forward-referenced + derivedType->attrs() |= attrs; + derivedType->set_details(std::move(details)); } else { SayAlreadyDeclared(name, *derivedType); } @@ -4048,11 +4052,13 @@ bool DeclarationVisitor::Pre(const parser::IntentStmt &x) { bool DeclarationVisitor::Pre(const parser::IntrinsicStmt &x) { HandleAttributeStmt(Attr::INTRINSIC, x.v); for (const auto &name : x.v) { + if (!IsIntrinsic(name.source, std::nullopt)) { + Say(name.source, "'%s' is not a known intrinsic procedure"_err_en_US); + } auto &symbol{DEREF(FindSymbol(name))}; if (symbol.has()) { // Generic interface is extending intrinsic; ok - } else if (!symbol.has() && - !ConvertToProcEntity(symbol)) { + } else if (!ConvertToProcEntity(symbol)) { SayWithDecl( name, symbol, "INTRINSIC attribute not allowed on '%s'"_err_en_US); } else if (symbol.attrs().test(Attr::EXTERNAL)) { // C840 @@ -4096,25 +4102,6 @@ bool DeclarationVisitor::HandleAttributeStmt( } Symbol &DeclarationVisitor::HandleAttributeStmt( Attr attr, const parser::Name &name) { - if (attr == Attr::INTRINSIC) { - if (!IsIntrinsic(name.source, std::nullopt)) { - Say(name.source, "'%s' is not a known intrinsic procedure"_err_en_US); - } else if (currScope().kind() == Scope::Kind::Subprogram || - currScope().kind() == Scope::Kind::Block) { - if (auto *symbol{FindSymbol(name)}) { - if (symbol->GetUltimate().has() && - symbol->owner() != currScope()) { - // Declaring a name INTRINSIC when there is a generic - // interface of the same name in the host scope. - // Host-associate the generic and mark it INTRINSIC - // rather than completely overriding the generic. - symbol = &MakeHostAssocSymbol(name, *symbol); - symbol->attrs().set(Attr::INTRINSIC); - return *symbol; - } - } - } - } auto *symbol{FindInScope(name)}; if (attr == Attr::ASYNCHRONOUS || attr == Attr::VOLATILE) { // these can be set on a symbol that is host-assoc or use-assoc @@ -5626,13 +5613,28 @@ void DeclarationVisitor::SetType( std::optional DeclarationVisitor::ResolveDerivedType( const parser::Name &name) { - Symbol *symbol{FindSymbol(NonDerivedTypeScope(), name)}; - if (!symbol || symbol->has()) { + Scope &outer{NonDerivedTypeScope()}; + Symbol *symbol{FindSymbol(outer, name)}; + Symbol *ultimate{symbol ? &symbol->GetUltimate() : nullptr}; + auto *generic{ultimate ? ultimate->detailsIf() : nullptr}; + if (generic) { + if (Symbol * genDT{generic->derivedType()}) { + symbol = genDT; + generic = nullptr; + } + } + if (!symbol || symbol->has() || + (generic && &ultimate->owner() == &outer)) { if (allowForwardReferenceToDerivedType()) { if (!symbol) { - symbol = &MakeSymbol(InclusiveScope(), name.source, Attrs{}); + symbol = &MakeSymbol(outer, name.source, Attrs{}); Resolve(name, *symbol); - }; + } else if (generic) { + // forward ref to type with later homonymous generic + symbol = &outer.MakeSymbol(name.source, Attrs{}, UnknownDetails{}); + generic->set_derivedType(*symbol); + name.symbol = symbol; + } DerivedTypeDetails details; details.set_isForwardReferenced(true); symbol->set_details(std::move(details)); @@ -5645,11 +5647,6 @@ std::optional DeclarationVisitor::ResolveDerivedType( return std::nullopt; } symbol = &symbol->GetUltimate(); - if (auto *details{symbol->detailsIf()}) { - if (details->derivedType()) { - symbol = &details->derivedType()->GetUltimate(); - } - } if (symbol->has()) { return DerivedTypeSpec{name.source, *symbol}; } else { @@ -7056,39 +7053,37 @@ void ResolveNamesVisitor::CreateGeneric(const parser::GenericSpec &x) { Symbol *existing{nullptr}; // Check all variants of names, e.g. "operator(.ne.)" for "operator(/=)" for (const std::string &n : GetAllNames(context(), symbolName)) { - existing = currScope().FindSymbol(n); - if (existing) { + if (auto iter{currScope().find(n)}; iter != currScope().end()) { + existing = &*iter->second; break; } } if (existing) { Symbol &ultimate{existing->GetUltimate()}; if (const auto *existingGeneric{ultimate.detailsIf()}) { - if (&ultimate.owner() != &currScope()) { - // Create a local copy of a host or use associated generic so that + if (const auto *existingUse{existing->detailsIf()}) { + // Create a local copy of a use associated generic so that // it can be locally extended without corrupting the original. genericDetails.CopyFrom(*existingGeneric); - if (const auto *use{existing->detailsIf()}) { - AddGenericUse(genericDetails, existing->name(), use->symbol()); - EraseSymbol(*existing); - } - existing = &MakeSymbol(symbolName, Attrs{}, std::move(genericDetails)); - } - info.Resolve(existing); - return; - } - if (&existing->owner() == &currScope()) { - if (ultimate.has() || - ultimate.has()) { - genericDetails.set_specific(ultimate); - } else if (ultimate.has()) { - genericDetails.set_derivedType(ultimate); - } else { - SayAlreadyDeclared(symbolName, *existing); + AddGenericUse(genericDetails, existing->name(), existingUse->symbol()); + } else if (existing == &ultimate) { + // Extending an extant generic in the same scope + info.Resolve(existing); return; + } else { + // Host association of a generic is handled in ResolveGeneric() + CHECK(existing->has()); } - EraseSymbol(*existing); + } else if (ultimate.has() || + ultimate.has()) { + genericDetails.set_specific(ultimate); + } else if (ultimate.has()) { + genericDetails.set_derivedType(ultimate); + } else { + SayAlreadyDeclared(symbolName, *existing); + return; } + EraseSymbol(*existing); } info.Resolve(&MakeSymbol(symbolName, Attrs{}, std::move(genericDetails))); } diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 807c0152eb9a10..848bf2dca32a8f 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -668,7 +668,7 @@ bool HasImpureFinal(const DerivedTypeSpec &derived) { derived.typeSymbol().detailsIf()}) { const auto &finals{details->finals()}; return std::any_of(finals.begin(), finals.end(), - [](const auto &x) { return !x.second->attrs().test(Attr::PURE); }); + [](const auto &x) { return !IsPureProcedure(*x.second); }); } else { return false; } diff --git a/flang/test/Semantics/generic01.f90 b/flang/test/Semantics/generic01.f90 new file mode 100644 index 00000000000000..21132f92d090a5 --- /dev/null +++ b/flang/test/Semantics/generic01.f90 @@ -0,0 +1,84 @@ +! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s +! Tests rules of 15.5.5.2 for generics and explicit intrinsics +! competing at various scoping levels. +module m1 + private + public abs + interface abs + module procedure :: abs_int_redef, abs_noargs + end interface +contains + integer function abs_int_redef(j) + integer, intent(in) :: j + abs_int_redef = j + end function + integer function abs_noargs() + abs_noargs = 0 + end function +end module + +module m2 + private + public abs + interface abs + module procedure abs_real_redef + end interface +contains + real function abs_real_redef(x) + real, intent(in) :: x + abs_real_redef = x + end function +end module + +module m3 + use m1, only: abs + implicit none +contains + subroutine test1 + use m2, only: abs + !CHECK: abs_int_redef( + print *, abs(1) + !CHECK: abs_real_redef( + print *, abs(1.) + !CHECK: 1.41421353816986083984375_4 + print *, abs((1,1)) + !CHECK: abs_noargs( + print *, abs() + end subroutine + subroutine test2 + intrinsic abs ! override some of module's use of m1 + block + use m2, only: abs + !CHECK: 1_4 + print *, abs(1) + !CHECK: abs_real_redef( + print *, abs(1.) + !CHECK: 1.41421353816986083984375_4 + print *, abs((1,1)) + !CHECK: abs_noargs( + print *, abs() + end block + end subroutine + subroutine test3 + interface abs + module procedure abs_int_redef2 ! override module's use of m1 + end interface + !CHECK: abs_int_redef2( + print *, abs(1) + !CHECK: 1._4 + print *, abs(1.) + !CHECK: 1.41421353816986083984375_4 + print *, abs((1,1)) + !CHECK: abs_noargs( + print *, abs() + block + use m1, only: abs ! override the override + !CHECK: abs_int_redef( + print *, abs(1) + end block + end subroutine + integer function abs_int_redef2(j) + integer, intent(in) :: j + abs_int_redef2 = j + end function +end module diff --git a/flang/test/Semantics/resolve22.f90 b/flang/test/Semantics/resolve22.f90 index 6f2d00958d372b..8009a32a0f0d51 100644 --- a/flang/test/Semantics/resolve22.f90 +++ b/flang/test/Semantics/resolve22.f90 @@ -30,3 +30,18 @@ subroutine s3 type(t) :: x x = t() end subroutine + +module m4 + type t1 + class(t2), pointer :: p => null() + end type + type t2 + end type + interface t2 + procedure ctor + end interface + contains + function ctor() + type(t2) ctor + end function +end module From 50f2e49924566330ea9aa731908f2864603bf4fb Mon Sep 17 00:00:00 2001 From: Daniel Hannon Date: Sat, 28 May 2022 10:10:10 -0700 Subject: [PATCH 767/908] [clang][cmake] Fixed typo in hmaptool CMakeLists.txt There was a typo in the CMakeLists.txt for hmap tool that installed it to the wrong directory https://github.com/llvm/llvm-project/issues/55753 Reviewed By: keith Differential Revision: https://reviews.llvm.org/D126598 --- clang/utils/hmaptool/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/utils/hmaptool/CMakeLists.txt b/clang/utils/hmaptool/CMakeLists.txt index 320a24da346d63..375d7b04488297 100644 --- a/clang/utils/hmaptool/CMakeLists.txt +++ b/clang/utils/hmaptool/CMakeLists.txt @@ -2,7 +2,7 @@ add_custom_command(OUTPUT "${LLVM_TOOLS_BINARY_DIR}/hmaptool" COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/hmaptool" "${LLVM_TOOLS_BINARY_DIR}" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/hmaptool") -install(PROGRAMS hmaptool DESTINATION "${LLVM_UTILS_INSTALL_DIR}}" COMPONENT hmaptool) +install(PROGRAMS hmaptool DESTINATION "${LLVM_UTILS_INSTALL_DIR}" COMPONENT hmaptool) add_custom_target(hmaptool ALL DEPENDS "${LLVM_TOOLS_BINARY_DIR}/hmaptool") set_target_properties(hmaptool PROPERTIES FOLDER "Utils") From f1983feaa1962b37e590dbf7875481210ace6f17 Mon Sep 17 00:00:00 2001 From: Emil Kieri Date: Sat, 28 May 2022 00:27:59 +0200 Subject: [PATCH 768/908] [flang] Make extension explicit: exponent-letter matching kind-param As an extension for REAL literals, we allow an exponent letter which matches an explicit kind-param. The standard requires the exponent to be 'E' if a kind-param is present. This patch - documents this extension in Extensions.md - enables a portability warning if it is used with -pedantic The test case for this, kinds05.f90, needs D125804, which makes test_errors.py test warnings as well, to actually test the warnings. I include it already now to keep things together, it will do no harm (I hope ...). We also add WARNING-directives to the test kinds04.f90 in preparation for D125804. As the exponent-letter 'Q' does not imply the same kind on all platforms, the emitted warnings are platform-dependent. Therefore, the test is duplicated into two variants which are run conditionally. Finally, we promote the portability warning for when the exponent letter is neither 'E' nor matching the kind-param to a standard warning. Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D126459 --- flang/docs/Extensions.md | 1 + flang/include/flang/Common/Fortran-features.h | 16 ++++----- flang/lib/Semantics/expression.cpp | 18 +++++++--- flang/test/Semantics/kinds04_q10.f90 | 36 +++++++++++++++++++ .../{kinds04.f90 => kinds04_q16.f90} | 23 +++++++----- flang/test/Semantics/kinds05.f90 | 18 ++++++++++ 6 files changed, 91 insertions(+), 21 deletions(-) create mode 100644 flang/test/Semantics/kinds04_q10.f90 rename flang/test/Semantics/{kinds04.f90 => kinds04_q16.f90} (61%) create mode 100644 flang/test/Semantics/kinds05.f90 diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index e8381b319f56f0..484aee5cfcc208 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -95,6 +95,7 @@ end "not yet implemented" message. * Structure field access with `.field` * `BYTE` as synonym for `INTEGER(KIND=1)`; but not when spelled `TYPE(BYTE)`. +* When kind-param is used for REAL literals, allow a matching exponent letter * Quad precision REAL literals with `Q` * `X` prefix/suffix as synonym for `Z` on hexadecimal literals * `B`, `O`, `Z`, and `X` accepted as suffixes as well as prefixes diff --git a/flang/include/flang/Common/Fortran-features.h b/flang/include/flang/Common/Fortran-features.h index 2a095791835421..2411e7cd02932d 100644 --- a/flang/include/flang/Common/Fortran-features.h +++ b/flang/include/flang/Common/Fortran-features.h @@ -19,14 +19,14 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines, FixedFormContinuationWithColumn1Ampersand, LogicalAbbreviations, XOROperator, PunctuationInNames, OptionalFreeFormSpace, BOZExtensions, EmptyStatement, AlternativeNE, ExecutionPartNamelist, DECStructures, - DoubleComplex, Byte, StarKind, QuadPrecision, SlashInitialization, - TripletInArrayConstructor, MissingColons, SignedComplexLiteral, - OldStyleParameter, ComplexConstructor, PercentLOC, SignedPrimary, FileName, - Carriagecontrol, Convert, Dispose, IOListLeadingComma, - AbbreviatedEditDescriptor, ProgramParentheses, PercentRefAndVal, - OmitFunctionDummies, CrayPointer, Hollerith, ArithmeticIF, Assign, - AssignedGOTO, Pause, OpenACC, OpenMP, CruftAfterAmpersand, ClassicCComments, - AdditionalFormats, BigIntLiterals, RealDoControls, + DoubleComplex, Byte, StarKind, ExponentMatchingKindParam, QuadPrecision, + SlashInitialization, TripletInArrayConstructor, MissingColons, + SignedComplexLiteral, OldStyleParameter, ComplexConstructor, PercentLOC, + SignedPrimary, FileName, Carriagecontrol, Convert, Dispose, + IOListLeadingComma, AbbreviatedEditDescriptor, ProgramParentheses, + PercentRefAndVal, OmitFunctionDummies, CrayPointer, Hollerith, ArithmeticIF, + Assign, AssignedGOTO, Pause, OpenACC, OpenMP, CruftAfterAmpersand, + ClassicCComments, AdditionalFormats, BigIntLiterals, RealDoControls, EquivalenceNumericWithCharacter, EquivalenceNonDefaultNumeric, EquivalenceSameNonSequence, AdditionalIntrinsics, AnonymousParents, OldLabelDoEndStatements, LogicalIntegerAssignment, EmptySourceFile, diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index b2330db5ef5d66..d8b7729a59d37a 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -569,12 +569,20 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::RealLiteralConstant &x) { if (letterKind) { defaultKind = *letterKind; } - // C716 requires 'E' as an exponent, but this is more useful + // C716 requires 'E' as an exponent. + // Extension: allow exponent-letter matching the kind-param. auto kind{AnalyzeKindParam(x.kind, defaultKind)}; - if (letterKind && kind != *letterKind && expoLetter != 'e') { - Say("Explicit kind parameter on real constant disagrees with " - "exponent letter '%c'"_port_en_US, - expoLetter); + if (letterKind && expoLetter != 'e') { + if (kind != *letterKind) { + Say("Explicit kind parameter on real constant disagrees with " + "exponent letter '%c'"_warn_en_US, + expoLetter); + } else if (x.kind && + context_.ShouldWarn( + common::LanguageFeature::ExponentMatchingKindParam)) { + Say("Explicit kind parameter together with non-'E' exponent letter " + "is not standard"_port_en_US); + } } auto result{common::SearchTypes( RealTypeVisitor{kind, x.real.source, GetFoldingContext()})}; diff --git a/flang/test/Semantics/kinds04_q10.f90 b/flang/test/Semantics/kinds04_q10.f90 new file mode 100644 index 00000000000000..2dbbcbda0f384f --- /dev/null +++ b/flang/test/Semantics/kinds04_q10.f90 @@ -0,0 +1,36 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! C716 If both kind-param and exponent-letter appear, exponent-letter +! shall be E. (As an extension we also allow an exponent-letter which matches +! the kind-param) +! C717 The value of kind-param shall specify an approximation method that +! exists on the processor. +! +! This test is for x86_64, where exponent-letter 'q' is for +! 10-byte extended precision +! REQUIRES: x86-registered-target +subroutine s(var) + real :: realvar1 = 4.0E6_4 + real :: realvar2 = 4.0D6 + real :: realvar3 = 4.0Q6 + real :: realvar4 = 4.0D6_8 + real :: realvar5 = 4.0Q6_10 + !WARNING: Explicit kind parameter on real constant disagrees with exponent letter 'q' + real :: realvar6 = 4.0Q6_16 + real :: realvar7 = 4.0E6_8 + real :: realvar8 = 4.0E6_10 + real :: realvar9 = 4.0E6_16 + !ERROR: Unsupported REAL(KIND=32) + real :: realvar10 = 4.0E6_32 + + double precision :: doublevar1 = 4.0E6_4 + double precision :: doublevar2 = 4.0D6 + double precision :: doublevar3 = 4.0Q6 + double precision :: doublevar4 = 4.0D6_8 + !WARNING: Explicit kind parameter on real constant disagrees with exponent letter 'q' + double precision :: doublevar5 = 4.0Q6_16 + double precision :: doublevar6 = 4.0E6_8 + double precision :: doublevar7 = 4.0E6_10 + double precision :: doublevar8 = 4.0E6_16 + !ERROR: Unsupported REAL(KIND=32) + double precision :: doublevar9 = 4.0E6_32 +end subroutine s diff --git a/flang/test/Semantics/kinds04.f90 b/flang/test/Semantics/kinds04_q16.f90 similarity index 61% rename from flang/test/Semantics/kinds04.f90 rename to flang/test/Semantics/kinds04_q16.f90 index 26957d9cfc1dc5..527cbe9aff1221 100644 --- a/flang/test/Semantics/kinds04.f90 +++ b/flang/test/Semantics/kinds04_q16.f90 @@ -1,19 +1,26 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 -! C716 If both kind-param and exponent-letter appear, exponent-letter -! shall be E. -! C717 The value of kind-param shall specify an approximation method that +! C716 If both kind-param and exponent-letter appear, exponent-letter +! shall be E. (As an extension we also allow an exponent-letter which matches +! the kind-param) +! C717 The value of kind-param shall specify an approximation method that ! exists on the processor. +! +! This test is for non-x86_64, where exponent-letter 'q' is for +! 16-byte quadruple precision +! UNSUPPORTED: x86-registered-target subroutine s(var) real :: realvar1 = 4.0E6_4 real :: realvar2 = 4.0D6 real :: realvar3 = 4.0Q6 real :: realvar4 = 4.0D6_8 - real :: realvar5 = 4.0Q6_16 - real :: realvar6 = 4.0E6_8 - real :: realvar7 = 4.0E6_10 - real :: realvar8 = 4.0E6_16 + !WARNING: Explicit kind parameter on real constant disagrees with exponent letter 'q' + real :: realvar5 = 4.0Q6_10 + real :: realvar6 = 4.0Q6_16 + real :: realvar7 = 4.0E6_8 + real :: realvar8 = 4.0E6_10 + real :: realvar9 = 4.0E6_16 !ERROR: Unsupported REAL(KIND=32) - real :: realvar9 = 4.0E6_32 + real :: realvar10 = 4.0E6_32 double precision :: doublevar1 = 4.0E6_4 double precision :: doublevar2 = 4.0D6 diff --git a/flang/test/Semantics/kinds05.f90 b/flang/test/Semantics/kinds05.f90 new file mode 100644 index 00000000000000..31953393ef3788 --- /dev/null +++ b/flang/test/Semantics/kinds05.f90 @@ -0,0 +1,18 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic +! Check that we get portability warnings for the extensions +! - exponent-letter 'Q' +! - matching but non-'E' exponent letter together with kind-param + +subroutine s + real :: realvar1 = 4.0 + real :: realvar2 = 4.0D6 + real :: realvar3 = 4.0_8 + real :: realvar4 = 4.0E6_4 + real :: realvar5 = 4.0E6_8 + !WARNING: nonstandard usage: Q exponent + real :: realvar6 = 4.0Q6 + !WARNING: Explicit kind parameter together with non-'E' exponent letter is not standard + real :: realvar7 = 4.0D6_8 + !WARNING: Explicit kind parameter on real constant disagrees with exponent letter 'd' + real :: realvar8 = 4.0D6_4 +end subroutine s From 2f23abd6c692134c69b6d19f726d51e11200d8e4 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Sat, 28 May 2022 13:09:36 -0700 Subject: [PATCH 769/908] Increase the default maximum stack walk lldb will only backtrace a fixed number of stack frames, as a last-ditch attempt to avoid a runaway looping backtrace. It's unusual that anyone ends up depending on this final safety net in years. I picked the original number of 300000 was picked by seeing how many stack frames I could make in a small recursive function on Darwin systems before using the default stack space. Checking again today on a modern system, I can exceed this limit & lldb will not show the original invocation of the recursing call. Double the old value to cover this larger maximum possible stack frame count, as a default value. (`target.process.thread.max-backtrace-depth`) --- lldb/source/Target/TargetProperties.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Target/TargetProperties.td b/lldb/source/Target/TargetProperties.td index 62d2c866bba99c..1ceb9db82013d0 100644 --- a/lldb/source/Target/TargetProperties.td +++ b/lldb/source/Target/TargetProperties.td @@ -277,6 +277,6 @@ let Definition = "thread" in { DefaultFalse, Desc<"If true, this thread will single-step and log execution.">; def MaxBacktraceDepth: Property<"max-backtrace-depth", "UInt64">, - DefaultUnsignedValue<300000>, + DefaultUnsignedValue<600000>, Desc<"Maximum number of frames to backtrace.">; } From 6abce17fc285747fb9b5522db2acab5fc06cabe6 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 28 May 2022 21:16:05 +0100 Subject: [PATCH 770/908] [VPlan] Use Exiting-block instead of Exit-block terminology (NFC). In LLVM's common loop terminology, an exit block is a block outside a loop with a predecessor inside the loop. An exiting block is a block inside the loop which branches to an exit block outside the loop. This patch updates a few places where VPlan was using ExitBlock for a block exiting a region. Those instances have been updated to use ExitingBlock. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D126173 --- .../Transforms/Vectorize/LoopVectorize.cpp | 26 +++++----- llvm/lib/Transforms/Vectorize/VPlan.cpp | 30 ++++++------ llvm/lib/Transforms/Vectorize/VPlan.h | 49 ++++++++++--------- .../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 2 +- .../Transforms/Vectorize/VPlanPredicator.cpp | 4 +- .../Transforms/Vectorize/VPlanVerifier.cpp | 37 +++++++------- 6 files changed, 75 insertions(+), 73 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1a0dea02b340d7..579d2dfb357dde 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3694,7 +3694,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); - VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitBasicBlock(); + VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock(); Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]); if (Cost->requiresScalarEpilogue(VF)) { // No edge from the middle block to the unique exit block has been inserted @@ -3905,7 +3905,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, Type *PhiTy = OrigPhi->getType(); VPBasicBlock *LatchVPBB = - PhiR->getParent()->getEnclosingLoopRegion()->getExitBasicBlock(); + PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock(); BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB]; // If tail is folded by masking, the vector value to leave the loop should be // a Select choosing between the vectorized LoopExitInst and vectorized Phi, @@ -8684,7 +8684,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL, {CanonicalIVPHI}, DL); CanonicalIVPHI->addOperand(CanonicalIVIncrement); - VPBasicBlock *EB = TopRegion->getExitBasicBlock(); + VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); if (IsVPlanNative) EB->setCondBit(nullptr); EB->appendRecipe(CanonicalIVIncrement); @@ -8978,7 +8978,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); // Adjust the recipes for any inloop reductions. - adjustRecipesForReductions(cast(TopRegion->getExit()), Plan, + adjustRecipesForReductions(cast(TopRegion->getExiting()), Plan, RecipeBuilder, Range.Start); // Introduce a recipe to combine the incoming and previous values of a @@ -9066,7 +9066,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // Fold Exit block into its predecessor if possible. // TODO: Fold block earlier once all VPlan transforms properly maintain a // VPBasicBlock as exit. - VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExit()); + VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExiting()); assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; @@ -9110,8 +9110,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // code-generation. VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); VPBasicBlock *Preheader = LoopRegion->getEntryBasicBlock(); - VPBasicBlock *Exit = LoopRegion->getExitBasicBlock(); - VPBlockBase *Latch = Exit->getSinglePredecessor(); + VPBasicBlock *Exiting = LoopRegion->getExitingBasicBlock(); + VPBlockBase *Latch = Exiting->getSinglePredecessor(); VPBlockBase *Header = Preheader->getSingleSuccessor(); // 1. Move preheader block out of main vector loop. @@ -9120,16 +9120,16 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { VPBlockUtils::connectBlocks(Preheader, LoopRegion); Plan->setEntry(Preheader); - // 2. Disconnect backedge and exit block. + // 2. Disconnect backedge and exiting block. VPBlockUtils::disconnectBlocks(Latch, Header); - VPBlockUtils::disconnectBlocks(Latch, Exit); + VPBlockUtils::disconnectBlocks(Latch, Exiting); - // 3. Update entry and exit of main vector loop region. + // 3. Update entry and exiting of main vector loop region. LoopRegion->setEntry(Header); - LoopRegion->setExit(Latch); + LoopRegion->setExiting(Latch); - // 4. Remove exit block. - delete Exit; + // 4. Remove exiting block. + delete Exiting; addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(), true, true); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 17df347e0866f9..d2e844fb095253 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -158,25 +158,25 @@ void VPBlockBase::setPlan(VPlan *ParentPlan) { } /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. -const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { +const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const { const VPBlockBase *Block = this; while (const VPRegionBlock *Region = dyn_cast(Block)) - Block = Region->getExit(); + Block = Region->getExiting(); return cast(Block); } -VPBasicBlock *VPBlockBase::getExitBasicBlock() { +VPBasicBlock *VPBlockBase::getExitingBasicBlock() { VPBlockBase *Block = this; while (VPRegionBlock *Region = dyn_cast(Block)) - Block = Region->getExit(); + Block = Region->getExiting(); return cast(Block); } VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { if (!Successors.empty() || !Parent) return this; - assert(Parent->getExit() == this && - "Block w/o successors not the exit of its parent."); + assert(Parent->getExiting() == this && + "Block w/o successors not the exiting block of its parent."); return Parent->getEnclosingBlockWithSuccessors(); } @@ -261,7 +261,7 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { // Hook up the new basic block to its predecessors. for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { - VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); + VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); auto &PredVPSuccessors = PredVPBB->getSuccessors(); BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; @@ -298,14 +298,14 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { "Trying to reset an existing successor block."); PredBBTerminator->setSuccessor(idx, NewBB); } else { - // PredVPBB is the exit block of a loop region. Connect its successor + // PredVPBB is the exiting block of a loop region. Connect its successor // outside the region. auto *LoopRegion = cast(PredVPBB->getParent()); assert(!LoopRegion->isReplicator() && "predecessor must be in a loop region"); assert(PredVPSuccessors.empty() && - LoopRegion->getExitBasicBlock() == PredVPBB && - "PredVPBB must be the exit block of its parent region"); + LoopRegion->getExitingBasicBlock() == PredVPBB && + "PredVPBB must be the exiting block of its parent region"); assert(this == LoopRegion->getSingleSuccessor() && "the current block must be the single successor of the region"); PredBBTerminator->setSuccessor(0, NewBB); @@ -334,7 +334,7 @@ void VPBasicBlock::execute(VPTransformState *State) { State->CFG.PrevBB = NewBB; } else if (PrevVPBB && /* A */ !((SingleHPred = getSingleHierarchicalPredecessor()) && - SingleHPred->getExitBasicBlock() == PrevVPBB && + SingleHPred->getExitingBasicBlock() == PrevVPBB && PrevVPBB->getSingleHierarchicalSuccessor() && (SingleHPred->getParent() == getEnclosingLoopRegion() && !IsLoopRegion(SingleHPred))) && /* B */ @@ -345,7 +345,7 @@ void VPBasicBlock::execute(VPTransformState *State) { // is PrevVPBB and the latter has a single (hierarchical) successor which // both are in the same non-replicator region; and // C. when the current VPBB is an entry of a region replica - where PrevVPBB - // is the exit of this region from a previous instance, or the + // is the exiting VPBB of this region from a previous instance, or the // predecessor of this region. NewBB = createEmptyBasicBlock(State->CFG); @@ -986,7 +986,7 @@ void VPlan::execute(VPTransformState *State) { } } - VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitBasicBlock(); + VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; // Fix the latch value of canonical, reduction and first-order recurrences @@ -1187,8 +1187,8 @@ void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, const Twine &Label) { // Due to "dot" we print an edge between two regions as an edge between the - // exit basic block and the entry basic of the respective regions. - const VPBlockBase *Tail = From->getExitBasicBlock(); + // exiting basic block and the entry basic of the respective regions. + const VPBlockBase *Tail = From->getExitingBasicBlock(); const VPBlockBase *Head = To->getEntryBasicBlock(); OS << Indent << getUID(Tail) << " -> " << getUID(Head); OS << " [ label=\"" << Label << '\"'; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 7072deb6a7860e..876092dc88461c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -493,11 +493,11 @@ class VPBlockBase { const VPBasicBlock *getEntryBasicBlock() const; VPBasicBlock *getEntryBasicBlock(); - /// \return the VPBasicBlock that is the exit of this VPBlockBase, + /// \return the VPBasicBlock that is the exiting this VPBlockBase, /// recursively, if the latter is a VPRegionBlock. Otherwise, if this /// VPBlockBase is a VPBasicBlock, it is returned. - const VPBasicBlock *getExitBasicBlock() const; - VPBasicBlock *getExitBasicBlock(); + const VPBasicBlock *getExitingBasicBlock() const; + VPBasicBlock *getExitingBasicBlock(); const VPBlocksTy &getSuccessors() const { return Successors; } VPBlocksTy &getSuccessors() { return Successors; } @@ -2109,7 +2109,7 @@ class VPBasicBlock : public VPBlockBase { }; /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks -/// which form a Single-Entry-Single-Exit subgraph of the output IR CFG. +/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. /// A VPRegionBlock may indicate that its contents are to be replicated several /// times. This is designed to support predicated scalarization, in which a /// scalar if-then code structure needs to be generated VF * UF times. Having @@ -2120,25 +2120,26 @@ class VPRegionBlock : public VPBlockBase { /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. VPBlockBase *Entry; - /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock. - VPBlockBase *Exit; + /// Hold the Single Exiting block of the SESE region modelled by the + /// VPRegionBlock. + VPBlockBase *Exiting; /// An indicator whether this region is to generate multiple replicated /// instances of output IR corresponding to its VPBlockBases. bool IsReplicator; public: - VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit, + VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name = "", bool IsReplicator = false) - : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit), + : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), IsReplicator(IsReplicator) { assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); - assert(Exit->getSuccessors().empty() && "Exit block has successors."); + assert(Exiting->getSuccessors().empty() && "Exit block has successors."); Entry->setParent(this); - Exit->setParent(this); + Exiting->setParent(this); } VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) - : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr), + : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr), IsReplicator(IsReplicator) {} ~VPRegionBlock() override { @@ -2172,22 +2173,22 @@ class VPRegionBlock : public VPBlockBase { // DominatorTreeBase representing the Graph type. VPBlockBase &front() const { return *Entry; } - const VPBlockBase *getExit() const { return Exit; } - VPBlockBase *getExit() { return Exit; } + const VPBlockBase *getExiting() const { return Exiting; } + VPBlockBase *getExiting() { return Exiting; } - /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p - /// ExitBlock must have no successors. - void setExit(VPBlockBase *ExitBlock) { - assert(ExitBlock->getSuccessors().empty() && + /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p + /// ExitingBlock must have no successors. + void setExiting(VPBlockBase *ExitingBlock) { + assert(ExitingBlock->getSuccessors().empty() && "Exit block cannot have successors."); - Exit = ExitBlock; - ExitBlock->setParent(this); + Exiting = ExitingBlock; + ExitingBlock->setParent(this); } /// Returns the pre-header VPBasicBlock of the loop region. VPBasicBlock *getPreheaderVPBB() { assert(!isReplicator() && "should only get pre-header of loop regions"); - return getSinglePredecessor()->getExitBasicBlock(); + return getSinglePredecessor()->getExitingBasicBlock(); } /// An indicator whether this region is to generate multiple replicated @@ -2321,11 +2322,11 @@ struct GraphTraits> using nodes_iterator = df_iterator; static NodeRef getEntryNode(Inverse N) { - return N.Graph->getExit(); + return N.Graph->getExiting(); } static nodes_iterator nodes_begin(GraphRef N) { - return nodes_iterator::begin(N->getExit()); + return nodes_iterator::begin(N->getExiting()); } static nodes_iterator nodes_end(GraphRef N) { @@ -2871,8 +2872,8 @@ class VPBlockUtils { R.moveBefore(*PredVPBB, PredVPBB->end()); VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); auto *ParentRegion = cast(Block->getParent()); - if (ParentRegion->getExit() == Block) - ParentRegion->setExit(PredVPBB); + if (ParentRegion->getExiting() == Block) + ParentRegion->setExiting(PredVPBB); SmallVector Successors(Block->successors()); for (auto *Succ : Successors) { VPBlockUtils::disconnectBlocks(Block, Succ); diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 57f98ea20d2baa..c6d5fbd6ad4246 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -329,7 +329,7 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { // 5. Final Top Region setup. Set outermost loop pre-header and single exit as // Top Region entry and exit. TopRegion->setEntry(PreheaderVPBB); - TopRegion->setExit(LoopExitVPBB); + TopRegion->setExiting(LoopExitVPBB); return TopRegion; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 40960906510c98..d6ba1e9820ba3f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -130,9 +130,9 @@ VPlanPredicator::getEdgeTypeBetween(VPBlockBase *FromBlock, // predecessor blocks. void VPlanPredicator::createOrPropagatePredicates(VPBlockBase *CurrBlock, VPRegionBlock *Region) { - // Blocks that dominate region exit inherit the predicate from the region. + // Blocks that dominate region exiting inherit the predicate from the region. // Return after setting the predicate. - if (VPDomTree.dominates(CurrBlock, Region->getExit())) { + if (VPDomTree.dominates(CurrBlock, Region->getExiting())) { VPValue *RegionBP = Region->getPredicate(); CurrBlock->setPredicate(RegionBP); return; diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index fe9eb77901cf42..bc6a9c973ba70c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -43,9 +43,9 @@ static bool hasDuplicates(const SmallVectorImpl &VPBlockVec) { /// \p Region. Checks in this function are generic for VPBlockBases. They are /// not specific for VPBasicBlocks or VPRegionBlocks. static void verifyBlocksInRegion(const VPRegionBlock *Region) { - for (const VPBlockBase *VPB : - make_range(df_iterator::begin(Region->getEntry()), - df_iterator::end(Region->getExit()))) { + for (const VPBlockBase *VPB : make_range( + df_iterator::begin(Region->getEntry()), + df_iterator::end(Region->getExiting()))) { // Check block's parent. assert(VPB->getParent() == Region && "VPBlockBase has wrong parent"); @@ -94,13 +94,14 @@ static void verifyBlocksInRegion(const VPRegionBlock *Region) { /// VPBlockBases. Do not recurse inside nested VPRegionBlocks. static void verifyRegion(const VPRegionBlock *Region) { const VPBlockBase *Entry = Region->getEntry(); - const VPBlockBase *Exit = Region->getExit(); + const VPBlockBase *Exiting = Region->getExiting(); - // Entry and Exit shouldn't have any predecessor/successor, respectively. + // Entry and Exiting shouldn't have any predecessor/successor, respectively. assert(!Entry->getNumPredecessors() && "Region entry has predecessors."); - assert(!Exit->getNumSuccessors() && "Region exit has successors."); + assert(!Exiting->getNumSuccessors() && + "Region exiting block has successors."); (void)Entry; - (void)Exit; + (void)Exiting; verifyBlocksInRegion(Region); } @@ -111,9 +112,9 @@ static void verifyRegionRec(const VPRegionBlock *Region) { verifyRegion(Region); // Recurse inside nested regions. - for (const VPBlockBase *VPB : - make_range(df_iterator::begin(Region->getEntry()), - df_iterator::end(Region->getExit()))) { + for (const VPBlockBase *VPB : make_range( + df_iterator::begin(Region->getEntry()), + df_iterator::end(Region->getExiting()))) { if (const auto *SubRegion = dyn_cast(VPB)) verifyRegionRec(SubRegion); } @@ -170,19 +171,19 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { return false; } - const VPBasicBlock *Exit = dyn_cast(TopRegion->getExit()); - if (!Exit) { - errs() << "VPlan exit block is not a VPBasicBlock\n"; + const VPBasicBlock *Exiting = dyn_cast(TopRegion->getExiting()); + if (!Exiting) { + errs() << "VPlan exiting block is not a VPBasicBlock\n"; return false; } - if (Exit->empty()) { - errs() << "VPlan vector loop exit must end with BranchOnCount " + if (Exiting->empty()) { + errs() << "VPlan vector loop exiting block must end with BranchOnCount " "VPInstruction but is empty\n"; return false; } - auto *LastInst = dyn_cast(std::prev(Exit->end())); + auto *LastInst = dyn_cast(std::prev(Exiting->end())); if (!LastInst || LastInst->getOpcode() != VPInstruction::BranchOnCount) { errs() << "VPlan vector loop exit must end with BranchOnCount " "VPInstruction\n"; @@ -197,8 +198,8 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { errs() << "region entry block has predecessors\n"; return false; } - if (Region->getExit()->getNumSuccessors() != 0) { - errs() << "region exit block has successors\n"; + if (Region->getExiting()->getNumSuccessors() != 0) { + errs() << "region exiting block has successors\n"; return false; } } From dac27da7b941c16f12f439b08da5a4500b268fd9 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Sun, 29 May 2022 02:06:11 +0530 Subject: [PATCH 771/908] [MLIR][Presburger] Add applyDomain/Range to IntegerRelation This patch adds support for applying a relation on domain/range of a relation. Reviewed By: arjunp, ftynse Differential Revision: https://reviews.llvm.org/D126339 --- .../Analysis/Presburger/IntegerRelation.h | 38 ++++++++++++++++-- .../Analysis/Presburger/IntegerRelation.cpp | 40 +++++++++++++++++-- .../Presburger/IntegerRelationTest.cpp | 31 ++++++++++++++ 3 files changed, 103 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h index 3481d71f6d5c4a..4a866c17dd3b12 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h @@ -459,10 +459,16 @@ class IntegerRelation { void removeDuplicateDivs(); /// Converts identifiers of kind srcKind in the range [idStart, idLimit) to - /// variables of kind dstKind and placed after all the other variables of kind - /// dstKind. The internal ordering among the moved variables is preserved. + /// variables of kind dstKind. If `pos` is given, the variables are placed at + /// position `pos` of dstKind, otherwise they are placed after all the other + /// variables of kind dstKind. The internal ordering among the moved variables + /// is preserved. void convertIdKind(IdKind srcKind, unsigned idStart, unsigned idLimit, - IdKind dstKind); + IdKind dstKind, unsigned pos); + void convertIdKind(IdKind srcKind, unsigned idStart, unsigned idLimit, + IdKind dstKind) { + convertIdKind(srcKind, idStart, idLimit, dstKind, getNumIdKind(dstKind)); + } void convertToLocal(IdKind kind, unsigned idStart, unsigned idLimit) { convertIdKind(kind, idStart, idLimit, IdKind::Local); } @@ -523,6 +529,32 @@ class IntegerRelation { /// modifies R to be B -> A. void inverse(); + /// Let the relation `this` be R1, and the relation `rel` be R2. Modifies R1 + /// to be the composition of R1 and R2: R1;R2. + /// + /// Formally, if R1: A -> B, and R2: B -> C, then this function returns a + /// relation R3: A -> C such that a point (a, c) belongs to R3 iff there + /// exists b such that (a, b) is in R1 and, (b, c) is in R2. + void compose(const IntegerRelation &rel); + + /// Given a relation `rel`, apply the relation to the domain of this relation. + /// + /// R1: i -> j : (0 <= i < 2, j = i) + /// R2: i -> k : (k = i floordiv 2) + /// R3: k -> j : (0 <= k < 1, 2k <= j <= 2k + 1) + /// + /// R1 = {(0, 0), (1, 1)}. R2 maps both 0 and 1 to 0. + /// So R3 = {(0, 0), (0, 1)}. + /// + /// Formally, R1.applyDomain(R2) = R2.inverse().compose(R1). + void applyDomain(const IntegerRelation &rel); + + /// Given a relation `rel`, apply the relation to the range of this relation. + /// + /// Formally, R1.applyRange(R2) is the same as R1.compose(R2) but we provide + /// this for uniformity with `applyDomain`. + void applyRange(const IntegerRelation &rel); + void print(raw_ostream &os) const; void dump() const; diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp index d6337cbac8fea0..6e5eff78e49a50 100644 --- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp @@ -1184,16 +1184,16 @@ void IntegerRelation::removeRedundantLocalVars() { } void IntegerRelation::convertIdKind(IdKind srcKind, unsigned idStart, - unsigned idLimit, IdKind dstKind) { + unsigned idLimit, IdKind dstKind, + unsigned pos) { assert(idLimit <= getNumIdKind(srcKind) && "Invalid id range"); if (idStart >= idLimit) return; // Append new local variables corresponding to the dimensions to be converted. - unsigned newIdsBegin = getIdKindEnd(dstKind); unsigned convertCount = idLimit - idStart; - appendId(dstKind, convertCount); + unsigned newIdsBegin = insertId(dstKind, pos, convertCount); // Swap the new local variables with dimensions. // @@ -2137,6 +2137,40 @@ void IntegerRelation::inverse() { convertIdKind(IdKind::Range, 0, numRangeIds, IdKind::Domain); } +void IntegerRelation::compose(const IntegerRelation &rel) { + assert(getRangeSet().getSpace().isCompatible(rel.getDomainSet().getSpace()) && + "Range of `this` should be compatible with Domain of `rel`"); + + IntegerRelation copyRel = rel; + + // Let relation `this` be R1: A -> B, and `rel` be R2: B -> C. + // We convert R1 to A -> (B X C), and R2 to B X C then intersect the range of + // R1 with R2. After this, we get R1: A -> C, by projecting out B. + // TODO: Using nested spaces here would help, since we could directly + // intersect the range with another relation. + unsigned numBIds = getNumRangeIds(); + + // Convert R1 from A -> B to A -> (B X C). + appendId(IdKind::Range, copyRel.getNumRangeIds()); + + // Convert R2 to B X C. + copyRel.convertIdKind(IdKind::Domain, 0, numBIds, IdKind::Range, 0); + + // Intersect R2 to range of R1. + intersectRange(IntegerPolyhedron(copyRel)); + + // Project out B in R1. + convertIdKind(IdKind::Range, 0, numBIds, IdKind::Local); +} + +void IntegerRelation::applyDomain(const IntegerRelation &rel) { + inverse(); + compose(rel); + inverse(); +} + +void IntegerRelation::applyRange(const IntegerRelation &rel) { compose(rel); } + void IntegerRelation::printSpace(raw_ostream &os) const { space.print(os); os << getNumConstraints() << " constraints\n"; diff --git a/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp index d23900b38f54a3..eda5bfecdcba32 100644 --- a/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp +++ b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp @@ -91,3 +91,34 @@ TEST(IntegerRelationTest, intersectDomainAndRange) { EXPECT_TRUE(copyRel.isEqual(expectedRel)); } } + +TEST(IntegerRelationTest, applyDomainAndRange) { + + { + IntegerRelation map1 = parseRelationFromSet( + "(x, y, a, b)[N] : (a - x - N == 0, b - y + N == 0)", 2); + IntegerRelation map2 = + parseRelationFromSet("(x, y, a)[N] : (a - x - y == 0)", 2); + + map1.applyRange(map2); + + IntegerRelation map3 = + parseRelationFromSet("(x, y, a)[N] : (a - x - y == 0)", 2); + + EXPECT_TRUE(map1.isEqual(map3)); + } + + { + IntegerRelation map1 = parseRelationFromSet( + "(x, y, a, b)[N] : (a - x + N == 0, b - y - N == 0)", 2); + IntegerRelation map2 = + parseRelationFromSet("(x, y, a, b)[N] : (a - N == 0, b - N == 0)", 2); + + IntegerRelation map3 = + parseRelationFromSet("(x, y, a, b)[N] : (x - N == 0, y - N == 0)", 2); + + map1.applyDomain(map2); + + EXPECT_TRUE(map1.isEqual(map3)); + } +} From 41a054ef78856b789acfafca92eb8f79b8129ac0 Mon Sep 17 00:00:00 2001 From: Yuki Okushi Date: Sun, 29 May 2022 01:26:46 +0900 Subject: [PATCH 772/908] [clang] Remove `rm` script which is no longer necessary 8b4fa2c98e07997469f53bee30c0d24a61dc7c8c added this to remove left-over files on January. Now we could assume they're cleaned up and this `rm` script is no longer necessary as FIXME states. Differential Revision: https://reviews.llvm.org/D126597 --- clang/test/Sema/test-wunaligned-access.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/test/Sema/test-wunaligned-access.c b/clang/test/Sema/test-wunaligned-access.c index 74945a8539eb82..909cda45f489b0 100644 --- a/clang/test/Sema/test-wunaligned-access.c +++ b/clang/test/Sema/test-wunaligned-access.c @@ -1,6 +1,3 @@ -// FIXME: Remove rm after a few days. -// RUN: rm -f %S/test-wunaligned-access.ll - // RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm -o %t // REQUIRES: arm-registered-target // From 5ff27fe1ff03d5aeaf8567c97618170f0cef8f58 Mon Sep 17 00:00:00 2001 From: Purva-Chaudhari Date: Sun, 29 May 2022 04:55:10 +0000 Subject: [PATCH 773/908] [clang-repl] Recover the lookup tables of the primary context. Before this patch, there was re-declaration error if error was encountered in the same line. The recovery support acted only if this type of error was encountered in the first line of the program and not in subsequent lines. For example: ``` clang-repl> int i=9; clang-repl> int j=9; err; input_line_3:1:5: error: redefinition of 'j' int j = 9; ``` Differential revision: https://reviews.llvm.org/D123674 --- clang/lib/Interpreter/IncrementalParser.cpp | 2 +- clang/test/Interpreter/execute.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp index 0f1ef3233a2a1b..87687ea6906d7a 100644 --- a/clang/lib/Interpreter/IncrementalParser.cpp +++ b/clang/lib/Interpreter/IncrementalParser.cpp @@ -188,7 +188,7 @@ IncrementalParser::ParseOrWrapTopLevelDecl() { S.TUScope->setEntity(PreviousTU); // Clean up the lookup table - if (StoredDeclsMap *Map = PreviousTU->getLookupPtr()) { + if (StoredDeclsMap *Map = PreviousTU->getPrimaryContext()->getLookupPtr()) { for (auto I = Map->begin(); I != Map->end(); ++I) { StoredDeclsList &List = I->second; DeclContextLookupResult R = List.getLookupResult(); diff --git a/clang/test/Interpreter/execute.cpp b/clang/test/Interpreter/execute.cpp index 298046c068c37f..5f3fe3e837d9a6 100644 --- a/clang/test/Interpreter/execute.cpp +++ b/clang/test/Interpreter/execute.cpp @@ -1,3 +1,4 @@ +// RUN: clang-repl "int x = 10;" "int y=7; err;" "int y = 10;" // RUN: clang-repl "int i = 10;" 'extern "C" int printf(const char*,...);' \ // RUN: 'auto r1 = printf("i = %d\n", i);' | FileCheck --check-prefix=CHECK-DRIVER %s // REQUIRES: host-supports-jit From ebb6f3ce370c69fad51859ebe22fd82f25749161 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Sun, 29 May 2022 11:11:32 +0200 Subject: [PATCH 774/908] [libc++] Adds missing includes. This fixes the broken Apple builds. This has been tested in D121530 (https://buildkite.com/llvm-project/libcxx-ci/builds/11113) --- libcxx/include/__type_traits/is_unsigned.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxx/include/__type_traits/is_unsigned.h b/libcxx/include/__type_traits/is_unsigned.h index 3c25205e6417a6..b0522812aad393 100644 --- a/libcxx/include/__type_traits/is_unsigned.h +++ b/libcxx/include/__type_traits/is_unsigned.h @@ -11,6 +11,8 @@ #include <__config> #include <__type_traits/integral_constant.h> +#include <__type_traits/is_arithmetic.h> +#include <__type_traits/is_integral.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header From c7283c263c0ae56d031d16b330a9cc9f22d58888 Mon Sep 17 00:00:00 2001 From: Javed Absar Date: Sat, 28 May 2022 20:32:13 +0100 Subject: [PATCH 775/908] [mlir][NFC] Trivial : Fix typo Reviewed By: JohnTitor Differential Revision: https://reviews.llvm.org/D126601 --- mlir/test/lib/Transforms/TestInlining.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/lib/Transforms/TestInlining.cpp b/mlir/test/lib/Transforms/TestInlining.cpp index 4bf85c075b5ff3..3fb70285cef457 100644 --- a/mlir/test/lib/Transforms/TestInlining.cpp +++ b/mlir/test/lib/Transforms/TestInlining.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // TODO: This pass is only necessary because the main inlining pass -// has no abstracted away the call+callee relationship. When the inlining +// has not abstracted away the call+callee relationship. When the inlining // interface has this support, this pass should be removed. // //===----------------------------------------------------------------------===// From 7e69bd9bf00ece8ac63aab91f73673d0d731bc24 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sat, 28 May 2022 15:22:25 +0200 Subject: [PATCH 776/908] [libc++] Use __enable_if_t and is_integral in cstddef Reviewed By: ldionne, #libc Spies: libcxx-commits Differential Revision: https://reviews.llvm.org/D126469 --- libcxx/include/__type_traits/enable_if.h | 1 - libcxx/include/cstddef | 11 ++++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libcxx/include/__type_traits/enable_if.h b/libcxx/include/__type_traits/enable_if.h index d7ccd6fd0368fb..c5e8df45ea5830 100644 --- a/libcxx/include/__type_traits/enable_if.h +++ b/libcxx/include/__type_traits/enable_if.h @@ -10,7 +10,6 @@ #define _LIBCPP___TYPE_TRAITS_ENABLE_IF_H #include <__config> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/cstddef b/libcxx/include/cstddef index 2b0659ab8ee7db..8daba076d7db21 100644 --- a/libcxx/include/cstddef +++ b/libcxx/include/cstddef @@ -35,6 +35,8 @@ Types: #include <__assert> // all public C++ headers provide the assertion handler #include <__config> +#include <__type_traits/enable_if.h> +#include <__type_traits/integral_constant.h> #include <__type_traits/is_integral.h> #include #include @@ -60,11 +62,6 @@ namespace std // purposefully not versioned { enum class byte : unsigned char {}; - -template struct __enable_if_integral_imp {}; -template <> struct __enable_if_integral_imp { using type = byte; }; -template using _EnableByteOverload = typename __enable_if_integral_imp<__libcpp_is_integral<_Tp>::value>::type; - constexpr byte operator| (byte __lhs, byte __rhs) noexcept { return static_cast( @@ -105,6 +102,10 @@ constexpr byte operator~ (byte __b) noexcept ~static_cast(__b) )); } + +template +using _EnableByteOverload = __enable_if_t::value, byte>; + template constexpr _EnableByteOverload<_Integer> & operator<<=(byte& __lhs, _Integer __shift) noexcept From 75d12e49c729e65f77bfdfe65b16df8c39944d35 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Tue, 3 May 2022 02:53:05 +0200 Subject: [PATCH 777/908] [libclang] Fall back to getMainExecutable when dladdr fails musl-libc doesn't support dladdr in statically linked binaries: > Are you using static or dynamic linking? If static, dladdr is just a > stub that always fails. It could be implemented to work under some > conditions, but it would be highly dependent on what options you > compile the binary with, since by default static binaries do not > contain the bloat that would be needed to perform introspection. Source: https://www.openwall.com/lists/musl/2013/01/15/25 (in response to a bug report). Libclang unfortunately uses dladdr to find the ResourcesPath so will fail if it is linked statically on Alpine Linux. This patch fixes this issue by falling back to getMainExecutable if dladdr returns an error. Reference: https://github.com/llvm/llvm-project/issues/40641#issuecomment-981011427 Differential Revision: https://reviews.llvm.org/D124815 --- clang/tools/libclang/CIndexer.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/clang/tools/libclang/CIndexer.cpp b/clang/tools/libclang/CIndexer.cpp index c7baab3a2c4708..41f06a83553ad3 100644 --- a/clang/tools/libclang/CIndexer.cpp +++ b/clang/tools/libclang/CIndexer.cpp @@ -125,13 +125,23 @@ const std::string &CIndexer::getClangResourcesPath() { #elif defined(_AIX) getClangResourcesPathImplAIX(LibClangPath); #else - // This silly cast below avoids a C++ warning. Dl_info info; - if (dladdr((void *)(uintptr_t)clang_createTranslationUnit, &info) == 0) - llvm_unreachable("Call to dladdr() failed"); + std::string Path; + // This silly cast below avoids a C++ warning. + if (dladdr((void *)(uintptr_t)clang_createTranslationUnit, &info) != 0) { + // We now have the CIndex directory, locate clang relative to it. + LibClangPath += info.dli_fname; + } else if (!(Path = llvm::sys::fs::getMainExecutable(nullptr, nullptr)).empty()) { + // If we can't get the path using dladdr, try to get the main executable + // path. This may be needed when we're statically linking libclang with + // musl libc, for example. + LibClangPath += Path; + } else { + // It's rather unlikely we end up here. But it could happen, so report an + // error instead of crashing. + llvm::report_fatal_error("Could not locate Clang resource path"); + } - // We now have the CIndex directory, locate clang relative to it. - LibClangPath += info.dli_fname; #endif // Cache our result. From 0bd645d3707dce452663d4634495155321a6fd1c Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Sun, 29 May 2022 13:42:22 +0200 Subject: [PATCH 778/908] [libclang] Fix error message capitalization This was a review suggestion from MaskRay that I forgot to incorporate in the patch. See: https://reviews.llvm.org/D124815 --- clang/tools/libclang/CIndexer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/libclang/CIndexer.cpp b/clang/tools/libclang/CIndexer.cpp index 41f06a83553ad3..dab3fc4e201d06 100644 --- a/clang/tools/libclang/CIndexer.cpp +++ b/clang/tools/libclang/CIndexer.cpp @@ -139,7 +139,7 @@ const std::string &CIndexer::getClangResourcesPath() { } else { // It's rather unlikely we end up here. But it could happen, so report an // error instead of crashing. - llvm::report_fatal_error("Could not locate Clang resource path"); + llvm::report_fatal_error("could not locate Clang resource path"); } #endif From 4cb184ce1c53f6e355d328390a9ec2d36da6e880 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Sun, 15 May 2022 15:17:46 +0200 Subject: [PATCH 779/908] [libc++] Adds __format_string as nasty macro. Both D121530 and D125606 had issues with this macro. Reviewed By: #libc, philnik Differential Revision: https://reviews.llvm.org/D125629 --- libcxx/test/libcxx/nasty_macros.compile.pass.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libcxx/test/libcxx/nasty_macros.compile.pass.cpp b/libcxx/test/libcxx/nasty_macros.compile.pass.cpp index f6e04d2dbd75b5..0874deeec9d45e 100644 --- a/libcxx/test/libcxx/nasty_macros.compile.pass.cpp +++ b/libcxx/test/libcxx/nasty_macros.compile.pass.cpp @@ -81,6 +81,7 @@ # define __bound NASTY_MACRO # define __deallocate NASTY_MACRO # define __deref NASTY_MACRO +# define __format_string NASTY_MACRO # define __full NASTY_MACRO # define __in NASTY_MACRO # define __inout NASTY_MACRO From 773c6e4358da80573b86234c0a2e1096aed78d1c Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Sun, 15 May 2022 15:29:03 +0200 Subject: [PATCH 780/908] [libc++][doc] Clarify wording on the status page. Reviewed By: philnik, #libc Differential Revision: https://reviews.llvm.org/D125630 --- libcxx/docs/Status/Cxx20.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libcxx/docs/Status/Cxx20.rst b/libcxx/docs/Status/Cxx20.rst index 43744077638ffc..3872308e85773d 100644 --- a/libcxx/docs/Status/Cxx20.rst +++ b/libcxx/docs/Status/Cxx20.rst @@ -41,7 +41,9 @@ Paper Status .. note:: .. [#note-P0600] P0600: The missing bits in P0600 are in |sect|\ [mem.res.class] and |sect|\ [mem.poly.allocator.class]. - .. [#note-P0645] P0645: The paper is implemented but still marked as an incomplete feature. Not yet implemented LWG-issues will cause API and ABI breakage. + .. [#note-P0645] P0645: The paper is implemented but still marked as an incomplete feature + (the feature-test macro is not set and the libary is only available when built with ``LIBCXX_ENABLE_INCOMPLETE_FEATURES``). + Not yet implemented LWG-issues will cause API and ABI breakage. .. [#note-P0966] P0966: It was previously erroneously marked as complete in version 8.0. See `bug 45368 `__. .. [#note-P0619] P0619: Only sections D.8, D.9, D.10 and D.13 are implemented. Sections D.4, D.7, D.11, D.12, and D.14 remain undone. .. [#note-P0883] P0883: shared_ptr and floating-point changes weren't applied as they themselves aren't implemented yet. From 9080e21906a162cb2aabcd214d62fa3e463b7675 Mon Sep 17 00:00:00 2001 From: "chenglin.bi" Date: Mon, 30 May 2022 00:30:56 +0800 Subject: [PATCH 781/908] [InstCombine] Add baseline tests for shift+and transforms; NFC --- llvm/test/Transforms/InstCombine/and.ll | 140 ++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index 07f0ae62ca396e..c7a63d7fc5280c 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -passes=instcombine -S | FileCheck %s declare void @use8(i8) +declare void @use16(i16) declare void @use32(i32) ; There should be no 'and' instructions left in any test. @@ -1620,3 +1621,142 @@ define i8 @not_lshr_bitwidth_mask(i8 %x, i8 %y) { %r = and i8 %not, %y ret i8 %r } + +define i16 @shl_lshr_pow2_const(i16 %x) { +; CHECK-LABEL: @shl_lshr_pow2_const( +; CHECK-NEXT: [[SHL:%.*]] = shl i16 4, [[X:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr i16 [[SHL]], 6 +; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 8 +; CHECK-NEXT: ret i16 [[R]] +; + %shl = shl i16 4, %x + %lshr = lshr i16 %shl, 6 + %r = and i16 %lshr, 8 + ret i16 %r +} + +define i16 @shl_lshr_pow2_const_negative_oneuse(i16 %x) { +; CHECK-LABEL: @shl_lshr_pow2_const_negative_oneuse( +; CHECK-NEXT: [[SHL:%.*]] = shl i16 4, [[X:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr i16 [[SHL]], 6 +; CHECK-NEXT: call void @use16(i16 [[LSHR]]) +; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 8 +; CHECK-NEXT: ret i16 [[R]] +; + %shl = shl i16 4, %x + %lshr = lshr i16 %shl, 6 + call void @use16(i16 %lshr) + %r = and i16 %lshr, 8 + ret i16 %r +} + + +define i16 @shl_lshr_pow2_const_negative_nopow2_1(i16 %x) { +; CHECK-LABEL: @shl_lshr_pow2_const_negative_nopow2_1( +; CHECK-NEXT: [[SHL:%.*]] = shl i16 3, [[X:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr i16 [[SHL]], 6 +; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 8 +; CHECK-NEXT: ret i16 [[R]] +; + %shl = shl i16 3, %x + %lshr = lshr i16 %shl, 6 + %r = and i16 %lshr, 8 + ret i16 %r +} + +define i16 @shl_lshr_pow2_const_negative_nopow2_2(i16 %x) { +; CHECK-LABEL: @shl_lshr_pow2_const_negative_nopow2_2( +; CHECK-NEXT: [[SHL:%.*]] = shl i16 3, [[X:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr i16 [[SHL]], 6 +; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 7 +; CHECK-NEXT: ret i16 [[R]] +; + %shl = shl i16 3, %x + %lshr = lshr i16 %shl, 6 + %r = and i16 %lshr, 7 + ret i16 %r +} + +define i16 @shl_lshr_pow2_const_negative_overflow1(i16 %x) { +; CHECK-LABEL: @shl_lshr_pow2_const_negative_overflow1( +; CHECK-NEXT: ret i16 0 +; + %shl = shl i16 4096, %x + %lshr = lshr i16 %shl, 6 + %r = and i16 %lshr, 8 + ret i16 %r +} + +define i16 @shl_lshr_pow2_const_negative_overflow2(i16 %x) { +; CHECK-LABEL: @shl_lshr_pow2_const_negative_overflow2( +; CHECK-NEXT: ret i16 0 +; + %shl = shl i16 8, %x + %lshr = lshr i16 %shl, 6 + %r = and i16 %lshr, 32768 + ret i16 %r +} + +define i16 @lshr_lshr_pow2_const(i16 %x) { +; CHECK-LABEL: @lshr_lshr_pow2_const( +; CHECK-NEXT: [[LSHR1:%.*]] = lshr i16 2048, [[X:%.*]] +; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 [[LSHR1]], 6 +; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR2]], 4 +; CHECK-NEXT: ret i16 [[R]] +; + %lshr1 = lshr i16 2048, %x + %lshr2 = lshr i16 %lshr1, 6 + %r = and i16 %lshr2, 4 + ret i16 %r +} + +define i16 @lshr_lshr_pow2_const_negative_oneuse(i16 %x) { +; CHECK-LABEL: @lshr_lshr_pow2_const_negative_oneuse( +; CHECK-NEXT: [[LSHR1:%.*]] = lshr i16 2048, [[X:%.*]] +; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 [[LSHR1]], 6 +; CHECK-NEXT: call void @use16(i16 [[LSHR2]]) +; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR2]], 4 +; CHECK-NEXT: ret i16 [[R]] +; + %lshr1 = lshr i16 2048, %x + %lshr2 = lshr i16 %lshr1, 6 + call void @use16(i16 %lshr2) + %r = and i16 %lshr2, 4 + ret i16 %r +} + +define i16 @lshr_lshr_pow2_const_negative_nopow2_1(i16 %x) { +; CHECK-LABEL: @lshr_lshr_pow2_const_negative_nopow2_1( +; CHECK-NEXT: [[LSHR1:%.*]] = lshr i16 2047, [[X:%.*]] +; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 [[LSHR1]], 6 +; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR2]], 4 +; CHECK-NEXT: ret i16 [[R]] +; + %lshr1 = lshr i16 2047, %x + %lshr2 = lshr i16 %lshr1, 6 + %r = and i16 %lshr2, 4 + ret i16 %r +} + +define i16 @lshr_lshr_pow2_const_negative_nopow2_2(i16 %x) { +; CHECK-LABEL: @lshr_lshr_pow2_const_negative_nopow2_2( +; CHECK-NEXT: [[LSHR1:%.*]] = lshr i16 8192, [[X:%.*]] +; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 [[LSHR1]], 6 +; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR2]], 3 +; CHECK-NEXT: ret i16 [[R]] +; + %lshr1 = lshr i16 8192, %x + %lshr2 = lshr i16 %lshr1, 6 + %r = and i16 %lshr2, 3 + ret i16 %r +} + +define i16 @lshr_lshr_pow2_const_negative_overflow(i16 %x) { +; CHECK-LABEL: @lshr_lshr_pow2_const_negative_overflow( +; CHECK-NEXT: ret i16 0 +; + %lshr1 = lshr i16 32768, %x + %lshr2 = lshr i16 %lshr1, 15 + %r = and i16 %lshr2, 4 + ret i16 %r +} From c99690462ef7d510134ee3d58cdae87b26c23cb7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 29 May 2022 17:55:39 +0100 Subject: [PATCH 782/908] [X86] Adjust vector shift costs to match SoG (Issue #54889) znver1/2 models were incorrectly modelling the fpupipe (should be pipe2 for shift-by-scalar-amount and pipe1 for shift-by-element-amount) and znver1 ymm variants also require double pumping. Now matches AMD SoG, Agner and instlatx64 numbers. Thanks to @fabian-r for the report --- llvm/lib/Target/X86/X86ScheduleZnver1.td | 18 +- llvm/lib/Target/X86/X86ScheduleZnver2.td | 16 +- .../llvm-mca/X86/Znver1/resources-avx1.s | 34 ++-- .../llvm-mca/X86/Znver1/resources-avx2.s | 178 +++++++++--------- .../tools/llvm-mca/X86/Znver1/resources-mmx.s | 98 +++++----- .../llvm-mca/X86/Znver1/resources-sse2.s | 34 ++-- .../llvm-mca/X86/Znver2/resources-avx1.s | 34 ++-- .../llvm-mca/X86/Znver2/resources-avx2.s | 114 +++++------ .../tools/llvm-mca/X86/Znver2/resources-mmx.s | 98 +++++----- .../llvm-mca/X86/Znver2/resources-sse2.s | 34 ++-- .../X86/Znver3/reg-move-elimination-mmx.s | 1 + .../X86/Znver3/reg-move-elimination-x87.s | 1 + 12 files changed, 329 insertions(+), 331 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index c1be7141dfb6c5..2e226e81c95225 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -398,14 +398,17 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -444,11 +447,6 @@ defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; -// Vector Shift Operations -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : X86WriteResPairUnsupported; - // Vector insert/extract operations. defm : ZnWriteResFpuPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 0c28830074aaf2..7805a5d78e758f 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -393,14 +393,17 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -439,11 +442,6 @@ defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; -// Vector Shift Operations -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : X86WriteResPairUnsupported; - // Vector insert/extract operations. defm : Zn2WriteResFpuPair; diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s index 77e9c01139584d..7b67829550cc4e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s @@ -1562,30 +1562,30 @@ vzeroupper # CHECK-NEXT: 1 8 0.50 * vpsignd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsignw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpsignw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpslld $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpslld $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpslld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpslld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 1.00 vpslldq $1, %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsllq $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsllq $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsllq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsllq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsllw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsllw $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsllw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsllw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsrad $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsrad $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrad %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrad (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsraw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsraw $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsraw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsraw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsrld $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsrld $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrldq $1, %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsrlq $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsrlq $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrlq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrlq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsrlw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsrlw $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrlw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrlw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsubb %xmm0, %xmm1, %xmm2 @@ -1738,7 +1738,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 175.00 175.00 - - - - - 146.58 171.08 198.25 527.08 - +# CHECK-NEXT: 175.00 175.00 - - - - - 144.58 169.08 204.25 525.08 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2274,30 +2274,30 @@ vzeroupper # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsignd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsignw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsignw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpslld $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - vpslld $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpslld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpslld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpslldq $1, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsllq $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllq $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsllq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsllw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllw $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsllw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrad $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrad $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrad %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrad (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsraw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsraw $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpsraw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsraw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrld $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrld $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrldq $1, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlq $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlq $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrlq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlw $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrlw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsubb %xmm0, %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s index 6bd2baaa3e2378..19e411dfaef608 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s @@ -681,52 +681,52 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpsignd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsignw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpsignw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpslld $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpslld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpslld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpslld $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpslld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vpslld (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vpslldq $1, %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsllq $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsllq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsllq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsllvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsllvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsllvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsllvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsllvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsllvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsllvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsllvq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsllw $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsllw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsllw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsrad $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsrad %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsrad (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsravd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsravd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsravd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsravd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsraw $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsraw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsraw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsrld $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsrld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsrld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsllq $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsllq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vpsllq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsllvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 4.00 vpsllvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 10 4.00 * vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsllvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 4.00 vpsllvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 10 4.00 * vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsllw $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsllw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vpsllw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsrad $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsrad %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vpsrad (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsravd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 4.00 vpsravd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 10 4.00 * vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsraw $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsraw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vpsraw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsrld $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsrld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vpsrld (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vpsrldq $1, %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsrlq $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsrlq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsrlq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsrlvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsrlvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrlvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsrlvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsrlvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsrlvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrlvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsrlvq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsrlw $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsrlw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsrlw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsrlq $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsrlq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vpsrlq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsrlvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 4.00 vpsrlvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 10 4.00 * vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsrlvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 4.00 vpsrlvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 10 4.00 * vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsrlw $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 1 2.00 vpsrlw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vpsrlw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsubb %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpsubb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsubd %ymm0, %ymm1, %ymm2 @@ -778,7 +778,7 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 67.00 67.00 - - - - - 73.17 75.67 85.00 43.17 - +# CHECK-NEXT: 67.00 67.00 - - - - - 71.17 123.67 105.00 41.17 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -1003,52 +1003,52 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsignd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsignw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsignw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpslld $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - vpslld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpslld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpslld $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpslld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 2.00 - - vpslld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpslldq $1, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsllq $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsllq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsllvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsllvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsllvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsllvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsllvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsllvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsllvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsllvq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsllw $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsllw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsllw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrad $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrad %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrad (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsravd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsravd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsravd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsravd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsraw $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsraw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsraw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrld $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsllq $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsllq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 2.00 - - vpsllq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - vpsllvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 - - - vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 - - - vpsllvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 4.00 - - - vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - vpsllvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 - - - vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 - - - vpsllvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 4.00 - - - vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsllw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsllw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 2.00 - - vpsllw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsrad $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsrad %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 2.00 - - vpsrad (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - vpsravd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 - - - vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 - - - vpsravd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 4.00 - - - vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsraw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsraw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 2.00 - - vpsraw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsrld $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsrld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 2.00 - - vpsrld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrldq $1, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlq $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrlq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsrlvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsrlvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsrlvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsrlvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsrlvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsrlvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpsrlvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpsrlvq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlw $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - vpsrlw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - vpsrlw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsrlq $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsrlq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 2.00 - - vpsrlq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - vpsrlvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 - - - vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 - - - vpsrlvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 4.00 - - - vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - vpsrlvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 - - - vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 - - - vpsrlvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 4.00 - - - vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsrlw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - vpsrlw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 2.00 - - vpsrlw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsubb %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsubb (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpsubd %ymm0, %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-mmx.s index a7985768ab8dbc..cddb647b16041e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-mmx.s @@ -217,30 +217,30 @@ pxor (%rax), %mm2 # CHECK-NEXT: 1 11 1.00 * pmullw (%rax), %mm2 # CHECK-NEXT: 1 1 0.25 por %mm0, %mm2 # CHECK-NEXT: 1 8 0.50 * por (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 pslld $1, %mm2 -# CHECK-NEXT: 1 1 0.25 pslld %mm0, %mm2 -# CHECK-NEXT: 1 8 0.50 * pslld (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psllq $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psllq %mm0, %mm2 -# CHECK-NEXT: 1 8 0.50 * psllq (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psllw $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psllw %mm0, %mm2 -# CHECK-NEXT: 1 8 0.50 * psllw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psrad $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psrad %mm0, %mm2 -# CHECK-NEXT: 1 8 0.50 * psrad (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psraw $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psraw %mm0, %mm2 -# CHECK-NEXT: 1 8 0.50 * psraw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psrld $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psrld %mm0, %mm2 -# CHECK-NEXT: 1 8 0.50 * psrld (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psrlq $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psrlq %mm0, %mm2 -# CHECK-NEXT: 1 8 0.50 * psrlq (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psrlw $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psrlw %mm0, %mm2 -# CHECK-NEXT: 1 8 0.50 * psrlw (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 pslld $1, %mm2 +# CHECK-NEXT: 1 1 1.00 pslld %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * pslld (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psllq $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psllq %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psllq (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psllw $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psllw %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psllw (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psrad $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psrad %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psrad (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psraw $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psraw %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psraw (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psrld $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psrld %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psrld (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psrlq $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psrlq %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psrlq (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psrlw $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psrlw %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psrlw (%rax), %mm2 # CHECK-NEXT: 1 1 0.25 psubb %mm0, %mm2 # CHECK-NEXT: 1 8 0.50 * psubb (%rax), %mm2 # CHECK-NEXT: 1 1 0.25 psubd %mm0, %mm2 @@ -286,7 +286,7 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 24.00 24.00 - - - - - 27.25 24.25 28.25 21.25 - +# CHECK-NEXT: 24.00 24.00 - - - - - 21.25 18.25 46.25 15.25 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -343,30 +343,30 @@ pxor (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - pmullw (%rax), %mm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - por %mm0, %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - por (%rax), %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - pslld $1, %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - pslld %mm0, %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - pslld (%rax), %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psllq $1, %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psllq %mm0, %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - psllq (%rax), %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psllw $1, %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psllw %mm0, %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - psllw (%rax), %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrad $1, %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrad %mm0, %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - psrad (%rax), %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psraw $1, %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psraw %mm0, %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - psraw (%rax), %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrld $1, %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrld %mm0, %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - psrld (%rax), %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrlq $1, %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrlq %mm0, %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - psrlq (%rax), %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrlw $1, %mm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrlw %mm0, %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - psrlw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - pslld $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - pslld %mm0, %mm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - pslld (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psllq $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psllq %mm0, %mm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psllq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psllw $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psllw %mm0, %mm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psllw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrad $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrad %mm0, %mm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psrad (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psraw $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psraw %mm0, %mm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psraw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrld $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrld %mm0, %mm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psrld (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrlq $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrlq %mm0, %mm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psrlq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrlw $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrlw %mm0, %mm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psrlw (%rax), %mm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psubb %mm0, %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - psubb (%rax), %mm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psubd %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s index e5ba24e3a3bcc0..e9374b7feb5063 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s @@ -594,30 +594,30 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * pshufhw $1, (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 pshuflw $1, %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pshuflw $1, (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 pslld $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 pslld $1, %xmm2 # CHECK-NEXT: 1 1 1.00 pslld %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * pslld (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pslldq $1, %xmm2 -# CHECK-NEXT: 1 1 0.25 psllq $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psllq $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psllq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psllq (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psllw $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psllw $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psllw %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psllw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psrad $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psrad $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrad %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrad (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psraw $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psraw $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psraw %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psraw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psrld $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psrld $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrld %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrld (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 psrldq $1, %xmm2 -# CHECK-NEXT: 1 1 0.25 psrlq $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psrlq $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrlq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrlq (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psrlw $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psrlw $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrlw %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrlw (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 psubb %xmm0, %xmm2 @@ -691,7 +691,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 66.50 66.50 - - - - - 56.92 48.42 79.75 157.92 - +# CHECK-NEXT: 66.50 66.50 - - - - - 54.92 46.42 85.75 155.92 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -882,30 +882,30 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - pshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - pshuflw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - pshuflw $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - pslld $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - pslld $1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - pslld %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - pslld (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - pslldq $1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psllq $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psllq $1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - psllq %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psllq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psllw $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psllw $1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - psllw %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psllw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrad $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrad $1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - psrad %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psrad (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psraw $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psraw $1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - psraw %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psraw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrld $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrld $1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - psrld %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psrld (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - psrldq $1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrlq $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrlq $1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - psrlq %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psrlq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psrlw $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - psrlw $1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - psrlw %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 - - psrlw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - psubb %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s index d2565a2d967e41..0ccc0219941a72 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s @@ -1562,30 +1562,30 @@ vzeroupper # CHECK-NEXT: 1 8 0.33 * vpsignd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsignw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.33 * vpsignw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpslld $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpslld $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpslld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpslld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 1.00 vpslldq $1, %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsllq $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsllq $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsllq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsllq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsllw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsllw $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsllw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsllw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsrad $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsrad $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrad %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrad (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsraw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsraw $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsraw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsraw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsrld $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsrld $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 1.00 vpsrldq $1, %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsrlq $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsrlq $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrlq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrlq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vpsrlw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpsrlw $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrlw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrlw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsubb %xmm0, %xmm1, %xmm2 @@ -1739,7 +1739,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 137.92 165.42 194.25 471.42 - +# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 135.92 163.42 200.25 469.42 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -2275,30 +2275,30 @@ vzeroupper # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vpsignd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsignw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vpsignw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpslld $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpslld $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpslld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpslld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpslldq $1, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsllq $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsllq $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsllq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsllq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsllw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsllw $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsllw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsllw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsrad $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrad $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrad %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsrad (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsraw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsraw $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsraw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsraw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsrld $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrld $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsrld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrldq $1, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlq $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrlq $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrlq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsrlq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrlw $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrlw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsrlw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsubb %xmm0, %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s index 01831f5416c94c..142d3e89bff6ed 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s @@ -681,50 +681,50 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.33 * vpsignd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsignw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.33 * vpsignw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpslld $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpslld $1, %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 vpslld %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 1.00 * vpslld (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vpslldq $1, %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsllq $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsllq $1, %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 vpsllq %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 1.00 * vpsllq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 0.50 vpsllvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 0.50 * vpsllvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 0.50 vpsllvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 0.50 * vpsllvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 0.50 vpsllvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 0.50 * vpsllvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 0.50 vpsllvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 0.50 * vpsllvq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsllw $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsllvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 2.00 vpsllvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 2.00 * vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsllvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 2.00 vpsllvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 2.00 * vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsllw $1, %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 vpsllw %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 1.00 * vpsllw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsrad $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsrad $1, %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 vpsrad %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 1.00 * vpsrad (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 0.50 vpsravd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 0.50 * vpsravd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 0.50 vpsravd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 0.50 * vpsravd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsraw $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsravd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 2.00 vpsravd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 2.00 * vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsraw $1, %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 vpsraw %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 1.00 * vpsraw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsrld $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsrld $1, %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 vpsrld %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 1.00 * vpsrld (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vpsrldq $1, %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsrlq $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsrlq $1, %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 vpsrlq %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 1.00 * vpsrlq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 0.50 vpsrlvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 0.50 * vpsrlvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 0.50 vpsrlvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 0.50 * vpsrlvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 0.50 vpsrlvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 0.50 * vpsrlvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 0.50 vpsrlvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 0.50 * vpsrlvq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.25 vpsrlw $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsrlvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 2.00 vpsrlvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 2.00 * vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 2.00 vpsrlvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 2.00 * vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 2.00 vpsrlvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 2.00 * vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsrlw $1, %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 vpsrlw %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 1.00 * vpsrlw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsubb %ymm0, %ymm1, %ymm2 @@ -779,7 +779,7 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 44.67 44.67 44.67 - - - - - 70.17 75.17 85.00 42.67 - +# CHECK-NEXT: 44.67 44.67 44.67 - - - - - 68.17 103.17 81.00 40.67 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -1004,50 +1004,50 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vpsignd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsignw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vpsignw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpslld $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpslld $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpslld %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpslld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpslldq $1, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsllq $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsllq $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsllq %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsllq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsllvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsllvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsllvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsllvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsllvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsllvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsllvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsllvq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsllw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsllvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsllvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsllvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsllvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsllw $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsllw %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsllw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsrad $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrad $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrad %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsrad (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsravd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsravd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsravd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsravd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsraw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsravd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsravd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsraw $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsraw %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsraw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsrld $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrld $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrld %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsrld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrldq $1, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlq $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrlq $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrlq %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsrlq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsrlvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsrlvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsrlvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsrlvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsrlvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsrlvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpsrlvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpsrlvq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsrlw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsrlvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsrlvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsrlvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - vpsrlvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrlw $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - vpsrlw %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - vpsrlw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpsubb %ymm0, %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-mmx.s index e6b0113907fb78..15561172b6bb56 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-mmx.s @@ -217,30 +217,30 @@ pxor (%rax), %mm2 # CHECK-NEXT: 1 11 1.00 * pmullw (%rax), %mm2 # CHECK-NEXT: 1 1 0.25 por %mm0, %mm2 # CHECK-NEXT: 1 8 0.33 * por (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 pslld $1, %mm2 -# CHECK-NEXT: 1 1 0.25 pslld %mm0, %mm2 -# CHECK-NEXT: 1 8 0.33 * pslld (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psllq $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psllq %mm0, %mm2 -# CHECK-NEXT: 1 8 0.33 * psllq (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psllw $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psllw %mm0, %mm2 -# CHECK-NEXT: 1 8 0.33 * psllw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psrad $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psrad %mm0, %mm2 -# CHECK-NEXT: 1 8 0.33 * psrad (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psraw $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psraw %mm0, %mm2 -# CHECK-NEXT: 1 8 0.33 * psraw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psrld $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psrld %mm0, %mm2 -# CHECK-NEXT: 1 8 0.33 * psrld (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psrlq $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psrlq %mm0, %mm2 -# CHECK-NEXT: 1 8 0.33 * psrlq (%rax), %mm2 -# CHECK-NEXT: 1 1 0.25 psrlw $1, %mm2 -# CHECK-NEXT: 1 1 0.25 psrlw %mm0, %mm2 -# CHECK-NEXT: 1 8 0.33 * psrlw (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 pslld $1, %mm2 +# CHECK-NEXT: 1 1 1.00 pslld %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * pslld (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psllq $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psllq %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psllq (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psllw $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psllw %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psllw (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psrad $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psrad %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psrad (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psraw $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psraw %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psraw (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psrld $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psrld %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psrld (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psrlq $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psrlq %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psrlq (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 psrlw $1, %mm2 +# CHECK-NEXT: 1 1 1.00 psrlw %mm0, %mm2 +# CHECK-NEXT: 1 8 1.00 * psrlw (%rax), %mm2 # CHECK-NEXT: 1 1 0.25 psubb %mm0, %mm2 # CHECK-NEXT: 1 8 0.33 * psubb (%rax), %mm2 # CHECK-NEXT: 1 1 0.25 psubd %mm0, %mm2 @@ -287,7 +287,7 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 16.00 16.00 16.00 - - - - - 27.25 24.25 28.25 21.25 - +# CHECK-NEXT: 16.00 16.00 16.00 - - - - - 21.25 18.25 46.25 15.25 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -344,30 +344,30 @@ pxor (%rax), %mm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - pmullw (%rax), %mm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - por %mm0, %mm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - por (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - pslld $1, %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - pslld %mm0, %mm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - pslld (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psllq $1, %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psllq %mm0, %mm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - psllq (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psllw $1, %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psllw %mm0, %mm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - psllw (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrad $1, %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrad %mm0, %mm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - psrad (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psraw $1, %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psraw %mm0, %mm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - psraw (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrld $1, %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrld %mm0, %mm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - psrld (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrlq $1, %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrlq %mm0, %mm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - psrlq (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrlw $1, %mm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrlw %mm0, %mm2 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - psrlw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - pslld $1, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - pslld %mm0, %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - pslld (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psllq $1, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psllq %mm0, %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psllq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psllw $1, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psllw %mm0, %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psllw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrad $1, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrad %mm0, %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psrad (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psraw $1, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psraw %mm0, %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psraw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrld $1, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrld %mm0, %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psrld (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrlq $1, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrlq %mm0, %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psrlq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrlw $1, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrlw %mm0, %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psrlw (%rax), %mm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psubb %mm0, %mm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - psubb (%rax), %mm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psubd %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s index 60eea89f7f5386..e9d04ea092b5ee 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s @@ -594,30 +594,30 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.33 * pshufhw $1, (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 pshuflw $1, %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.33 * pshuflw $1, (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 pslld $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 pslld $1, %xmm2 # CHECK-NEXT: 1 1 1.00 pslld %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * pslld (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 pslldq $1, %xmm2 -# CHECK-NEXT: 1 1 0.25 psllq $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psllq $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psllq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psllq (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psllw $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psllw $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psllw %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psllw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psrad $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psrad $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrad %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrad (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psraw $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psraw $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psraw %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psraw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psrld $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psrld $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrld %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrld (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 psrldq $1, %xmm2 -# CHECK-NEXT: 1 1 0.25 psrlq $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psrlq $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrlq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrlq (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 psrlw $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psrlw $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrlw %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrlw (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 psubb %xmm0, %xmm2 @@ -692,7 +692,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 55.92 49.92 81.25 156.92 - +# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 53.92 47.92 87.25 154.92 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -883,30 +883,30 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - pshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - pshuflw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - pshuflw $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - pslld $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - pslld $1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - pslld %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - pslld (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - pslldq $1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psllq $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psllq $1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - psllq %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psllq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psllw $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psllw $1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - psllw %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psllw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrad $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrad $1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - psrad %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psrad (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psraw $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psraw $1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - psraw %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psraw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrld $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrld $1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - psrld %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psrld (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - psrldq $1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrlq $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrlq $1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - psrlq %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psrlq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psrlw $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - psrlw $1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - psrlw %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 - - psrlw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - psubb %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s index ae84c48bbf6c9c..b556fd61f07788 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s @@ -173,6 +173,7 @@ movq %mm7, %mm0 # CHECK-NEXT: [9,2] . . . D==============================================================eER . movq %mm2, %mm3 # CHECK-NEXT: [9,3] . . . D===============================================================eER. movq %mm3, %mm4 # CHECK-NEXT: [9,4] . . . D===============================================================eER movq %mm4, %mm5 +# CHECK-NEXT: Truncated display due to cycle limit # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s index 1bbfb9ba120e44..de59edf2352ef0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s @@ -183,6 +183,7 @@ fxch %st(0) # CHECK-NEXT: [8,5] . . . D================================================================eER fxch %st(5) # CHECK-NEXT: [8,6] . . . D===============================================================eER fxch %st(6) # CHECK-NEXT: [8,7] . . . D===============================================================eER fxch %st(7) +# CHECK-NEXT: Truncated display due to cycle limit # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions From e091721fdced304673caca67051091ab2e2178ba Mon Sep 17 00:00:00 2001 From: "chenglin.bi" Date: Mon, 30 May 2022 01:01:37 +0800 Subject: [PATCH 783/908] [InstCombine] Add baseline tests for shift+and+icmp transforms; NFC --- .../Transforms/InstCombine/icmp-and-shift.ll | 458 ++++++++++++++++++ 1 file changed, 458 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/icmp-and-shift.ll diff --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll new file mode 100644 index 00000000000000..6193692976a1ba --- /dev/null +++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll @@ -0,0 +1,458 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +declare void @use(i8) + +define i32 @icmp_eq_and_pow2_shl1(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_shl1( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP0:%.*]], 4 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %shl = shl i32 1, %0 + %and = and i32 %shl, 16 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_eq_and_pow2_shl1_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_shl1_vec( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] +; + %shl = shl <2 x i32> , %0 + %and = and <2 x i32> %shl, + %cmp = icmp eq <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_ne_and_pow2_shl1(i32 %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_shl1( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0:%.*]], 4 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %shl = shl i32 1, %0 + %and = and i32 %shl, 16 + %cmp = icmp ne i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_ne_and_pow2_shl1_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_shl1_vec( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] +; + %shl = shl <2 x i32> , %0 + %and = and <2 x i32> %shl, + %cmp = icmp ne <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[SHL]], 4 +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %shl = shl i32 2, %0 + %and = and i32 %shl, 16 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_vec( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[SHL]], +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %shl = shl <2 x i32> , %0 + %and = and <2 x i32> %shl, + %cmp = icmp eq <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[SHL]], 4 +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 +; CHECK-NEXT: ret i32 [[AND_LOBIT]] +; + %shl = shl i32 2, %0 + %and = and i32 %shl, 16 + %cmp = icmp ne i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_ne_and_pow2_shl_pow2_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2_vec( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[SHL]], +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]] +; + %shl = shl <2 x i32> , %0 + %and = and <2 x i32> %shl, + %cmp = icmp ne <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_eq_and_pow2_shl_pow2_negative1(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_negative1( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 11, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[SHL]], 4 +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %shl = shl i32 11, %0 + %and = and i32 %shl, 16 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @icmp_eq_and_pow2_shl_pow2_negative2(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_negative2( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], 14 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %shl = shl i32 2, %0 + %and = and i32 %shl, 14 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @icmp_eq_and_pow2_shl_pow2_negative3(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_negative3( +; CHECK-NEXT: ret i32 1 +; + %shl = shl i32 32, %0 + %and = and i32 %shl, 16 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + + +define i32 @icmp_eq_and_pow2_minus1_shl1(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl1( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %shl = shl i32 1, %0 + %and = and i32 %shl, 15 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_eq_and_pow2_minus1_shl1_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl1_vec( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] +; + %shl = shl <2 x i32> , %0 + %and = and <2 x i32> %shl, + %cmp = icmp eq <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_ne_and_pow2_minus1_shl1(i32 %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl1( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP0:%.*]], 4 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %shl = shl i32 1, %0 + %and = and i32 %shl, 15 + %cmp = icmp ne i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_ne_and_pow2_minus1_shl1_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl1_vec( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] +; + %shl = shl <2 x i32> , %0 + %and = and <2 x i32> %shl, + %cmp = icmp ne <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_eq_and_pow2_minus1_shl_pow2(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl_pow2( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], 14 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %shl = shl i32 2, %0 + %and = and i32 %shl, 15 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_eq_and_pow2_minus1_shl_pow2_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl_pow2_vec( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[SHL]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], zeroinitializer +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] +; + %shl = shl <2 x i32> , %0 + %and = and <2 x i32> %shl, + %cmp = icmp eq <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_ne_and_pow2_minus1_shl_pow2(i32 %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl_pow2( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], 14 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %shl = shl i32 2, %0 + %and = and i32 %shl, 15 + %cmp = icmp ne i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_ne_and_pow2_minus1_shl_pow2_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl_pow2_vec( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[SHL]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] +; + %shl = shl <2 x i32> , %0 + %and = and <2 x i32> %shl, + %cmp = icmp ne <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_eq_and_pow2_minus1_shl1_negative1(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl1_negative1( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 3, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %shl = shl i32 3, %0 + %and = and i32 %shl, 15 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @icmp_eq_and_pow2_minus1_shl1_negative2(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl1_negative2( +; CHECK-NEXT: ret i32 1 +; + %shl = shl i32 32, %0 + %and = and i32 %shl, 15 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + + +define i32 @icmp_eq_and1_lshr_pow2(i32 %0) { +; CHECK-LABEL: @icmp_eq_and1_lshr_pow2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %lshr = lshr i32 8, %0 + %and = and i32 %lshr, 1 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_eq_and1_lshr_pow2_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_eq_and1_lshr_pow2_vec( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] +; + %lshr = lshr <2 x i32> , %0 + %and = and <2 x i32> %lshr, + %cmp = icmp eq <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_ne_and1_lshr_pow2(i32 %0) { +; CHECK-LABEL: @icmp_ne_and1_lshr_pow2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %lshr = lshr i32 8, %0 + %and = and i32 %lshr, 1 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_ne_and1_lshr_pow2_vec( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[LSHR]], +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]] +; + %lshr = lshr <2 x i32> , %0 + %and = and <2 x i32> %lshr, + %cmp = icmp ne <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_eq_and_pow2_lshr_pow2(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2( +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[LSHR]], 2 +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %lshr = lshr i32 8, %0 + %and = and i32 %lshr, 4 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @icmp_eq_and_pow2_lshr_pow2_case2(i32 %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2_case2( +; CHECK-NEXT: ret i32 1 +; + %lshr = lshr i32 4, %0 + %and = and i32 %lshr, 8 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2_vec( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[LSHR]], +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %lshr = lshr <2 x i32> , %0 + %and = and <2 x i32> %lshr, + %cmp = icmp eq <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_ne_and_pow2_lshr_pow2(i32 %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2( +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[LSHR]], 2 +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %lshr = lshr i32 8, %0 + %and = and i32 %lshr, 4 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @icmp_ne_and_pow2_lshr_pow2_case2(i32 %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2_case2( +; CHECK-NEXT: ret i32 1 +; + %lshr = lshr i32 4, %0 + %and = and i32 %lshr, 8 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define <2 x i32> @icmp_ne_and_pow2_lshr_pow2_vec(<2 x i32> %0) { +; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2_vec( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[LSHR]], +; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]] +; + %lshr = lshr <2 x i32> , %0 + %and = and <2 x i32> %lshr, + %cmp = icmp ne <2 x i32> %and, + %conv = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %conv +} + +define i32 @icmp_eq_and1_lshr_pow2_negative1(i32 %0) { +; CHECK-LABEL: @icmp_eq_and1_lshr_pow2_negative1( +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 7, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[LSHR]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %lshr = lshr i32 7, %0 + %and = and i32 %lshr, 1 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @icmp_eq_and1_lshr_pow2_negative2(i32 %0) { +; CHECK-LABEL: @icmp_eq_and1_lshr_pow2_negative2( +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8, [[TMP0:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[LSHR]], 3 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %lshr = lshr i32 8, %0 + %and = and i32 %lshr, 3 + %cmp = icmp eq i32 %and, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} From 7f1e048041f5df3fd9d799cf13f20d5bce5967e1 Mon Sep 17 00:00:00 2001 From: Joe Loser Date: Sat, 28 May 2022 17:08:18 -0600 Subject: [PATCH 784/908] [libc++][test] Remove Clang <= 3.7 workaround in is_default_constructible test Clang 3.7 and below is not actively used or supported in the test suite now, so remove the workaround in the test. Differential Revision: https://reviews.llvm.org/D126603 --- .../meta.unary.prop/is_default_constructible.pass.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_default_constructible.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_default_constructible.pass.cpp index ce2ec95618e5ff..72cd97e7a32d32 100644 --- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_default_constructible.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_default_constructible.pass.cpp @@ -106,21 +106,11 @@ int main(int, char**) #if TEST_STD_VER >= 11 test_is_not_default_constructible(); test_is_not_default_constructible(); - -// TODO: Remove this workaround once Clang <= 3.7 are no longer used regularly. -// In those compiler versions the __is_constructible builtin gives the wrong -// results for abominable function types. -#if (defined(TEST_APPLE_CLANG_VER) && TEST_APPLE_CLANG_VER < 703) \ - || (defined(TEST_CLANG_VER) && TEST_CLANG_VER < 308) -#define WORKAROUND_CLANG_BUG -#endif -#if !defined(WORKAROUND_CLANG_BUG) test_is_not_default_constructible(); test_is_not_default_constructible (); test_is_not_default_constructible (); test_is_not_default_constructible (); test_is_not_default_constructible (); -#endif #endif return 0; From 9a3144d078389c19b269b8dd94b9f5306754c039 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 29 May 2022 19:42:13 +0100 Subject: [PATCH 785/908] [AArch64] Reuse larger DUP if available If both a v2i32 DUP(x) and a v4i32 DUP(x) node exists, we can re-use the larger node using a vector extract to obtain the smaller. This comes up in the smull/smlal code, but needs a small fixup to allow the smull2 code in tryExtendDUPToExtractHigh/performAddSubLongCombine to still match smull2 extracts. Differential Revision: https://reviews.llvm.org/D126449 --- .../Target/AArch64/AArch64ISelLowering.cpp | 44 ++++++++++++++----- .../AArch64/aarch64-matrix-umull-smull.ll | 30 ++++++------- 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d31008496ea4f2..2652e98fb30b2b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15147,6 +15147,11 @@ static SDValue tryCombineFixedPointConvert(SDNode *N, // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold // similarly here. static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { + MVT VT = N.getSimpleValueType(); + if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N.getConstantOperandVal(1) == 0) + N = N.getOperand(0); + switch (N.getOpcode()) { case AArch64ISD::DUP: case AArch64ISD::DUPLANE8: @@ -15167,18 +15172,19 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { return SDValue(); } - MVT NarrowTy = N.getSimpleValueType(); - if (!NarrowTy.is64BitVector()) + if (!VT.is64BitVector()) return SDValue(); - MVT ElementTy = NarrowTy.getVectorElementType(); - unsigned NumElems = NarrowTy.getVectorNumElements(); - MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2); + SDLoc DL(N); + unsigned NumElems = VT.getVectorNumElements(); + if (N.getValueType().is64BitVector()) { + MVT ElementTy = VT.getVectorElementType(); + MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2); + N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops()); + } - SDLoc dl(N); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, - DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()), - DAG.getConstant(NumElems, dl, MVT::i64)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N, + DAG.getConstant(NumElems, DL, MVT::i64)); } static bool isEssentiallyExtractHighSubvector(SDValue N) { @@ -18225,6 +18231,24 @@ static SDValue performSelectCombine(SDNode *N, return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); } +static SDValue performDUPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + // If "v2i32 DUP(x)" and "v4i32 DUP(x)" both exist, use an extract from the + // 128bit vector version. + if (VT.is64BitVector() && DCI.isAfterLegalizeDAG()) { + EVT LVT = VT.getDoubleNumVectorElementsVT(*DCI.DAG.getContext()); + if (SDNode *LN = DCI.DAG.getNodeIfExists( + N->getOpcode(), DCI.DAG.getVTList(LVT), {N->getOperand(0)})) { + SDLoc DL(N); + return DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(LN, 0), + DCI.DAG.getConstant(0, DL, MVT::i64)); + } + } + + return performPostLD1Combine(N, DCI, false); +} + /// Get rid of unnecessary NVCASTs (that don't change the type). static SDValue performNVCASTCombine(SDNode *N) { if (N->getValueType(0) == N->getOperand(0).getValueType()) @@ -18948,7 +18972,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case AArch64ISD::CSEL: return performCSELCombine(N, DCI, DAG); case AArch64ISD::DUP: - return performPostLD1Combine(N, DCI, false); + return performDUPCombine(N, DCI); case AArch64ISD::NVCAST: return performNVCASTCombine(N); case AArch64ISD::SPLICE: diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index 12b451f509f730..0ad7b8b15ad33a 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -201,22 +201,21 @@ define void @larger_smull(i16* nocapture noundef readonly %x, i16 noundef %y, i3 ; CHECK-NEXT: b .LBB3_6 ; CHECK-NEXT: .LBB3_3: // %vector.ph ; CHECK-NEXT: and x10, x9, #0xfffffff0 -; CHECK-NEXT: dup v0.4h, w8 ; CHECK-NEXT: add x11, x2, #32 ; CHECK-NEXT: add x12, x0, #16 ; CHECK-NEXT: mov x13, x10 -; CHECK-NEXT: dup v1.8h, w8 +; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: .LBB3_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q2, q3, [x12, #-16] +; CHECK-NEXT: ldp q1, q2, [x12, #-16] ; CHECK-NEXT: subs x13, x13, #16 ; CHECK-NEXT: add x12, x12, #32 -; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h +; CHECK-NEXT: smull2 v3.4s, v0.8h, v1.8h +; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h +; CHECK-NEXT: smull2 v4.4s, v0.8h, v2.8h ; CHECK-NEXT: smull v2.4s, v0.4h, v2.4h -; CHECK-NEXT: smull2 v5.4s, v1.8h, v3.8h -; CHECK-NEXT: smull v3.4s, v0.4h, v3.4h -; CHECK-NEXT: stp q2, q4, [x11, #-32] -; CHECK-NEXT: stp q3, q5, [x11], #64 +; CHECK-NEXT: stp q1, q3, [x11, #-32] +; CHECK-NEXT: stp q2, q4, [x11], #64 ; CHECK-NEXT: b.ne .LBB3_4 ; CHECK-NEXT: // %bb.5: // %middle.block ; CHECK-NEXT: cmp x10, x9 @@ -314,22 +313,21 @@ define void @larger_umull(i16* nocapture noundef readonly %x, i16 noundef %y, i3 ; CHECK-NEXT: b .LBB4_6 ; CHECK-NEXT: .LBB4_3: // %vector.ph ; CHECK-NEXT: and x10, x9, #0xfffffff0 -; CHECK-NEXT: dup v0.4h, w8 ; CHECK-NEXT: add x11, x2, #32 ; CHECK-NEXT: add x12, x0, #16 ; CHECK-NEXT: mov x13, x10 -; CHECK-NEXT: dup v1.8h, w8 +; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: .LBB4_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q2, q3, [x12, #-16] +; CHECK-NEXT: ldp q1, q2, [x12, #-16] ; CHECK-NEXT: subs x13, x13, #16 ; CHECK-NEXT: add x12, x12, #32 -; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h +; CHECK-NEXT: umull2 v3.4s, v0.8h, v1.8h +; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h +; CHECK-NEXT: umull2 v4.4s, v0.8h, v2.8h ; CHECK-NEXT: umull v2.4s, v0.4h, v2.4h -; CHECK-NEXT: umull2 v5.4s, v1.8h, v3.8h -; CHECK-NEXT: umull v3.4s, v0.4h, v3.4h -; CHECK-NEXT: stp q2, q4, [x11, #-32] -; CHECK-NEXT: stp q3, q5, [x11], #64 +; CHECK-NEXT: stp q1, q3, [x11, #-32] +; CHECK-NEXT: stp q2, q4, [x11], #64 ; CHECK-NEXT: b.ne .LBB4_4 ; CHECK-NEXT: // %bb.5: // %middle.block ; CHECK-NEXT: cmp x10, x9 From 0776c48f9b7e69fa447bee57c7c0985caa856be9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 29 May 2022 21:57:14 +0100 Subject: [PATCH 786/908] Recommit "[LICM] Only create load in ph when promoting load or store doesn't exec." This reverts the revert commit ad95255b9215a. The updated version also creates a load when the store may not execute. In those cases, we still need to introduce a load in a function where there may not have been one before, so this doesn't completely resolve issue #51248. Original message: When only a store is sunk, there is no need to create a load in the pre-header, as the result of the load will never get used. The dead load can can introduce UB, if the function is marked as writeonly. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D123473 --- llvm/lib/Transforms/Scalar/LICM.cpp | 44 ++++++++++++--------- llvm/test/Transforms/LICM/scalar-promote.ll | 11 ++---- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 74ae5977a36639..da079e42a8c49b 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1959,6 +1959,7 @@ bool llvm::promoteLoopAccessesToScalars( bool DereferenceableInPH = false; bool SafeToInsertStore = false; + bool StoreIsGuanteedToExecute = false; bool FoundLoadToPromote = false; SmallVector LoopUses; @@ -2039,10 +2040,12 @@ bool llvm::promoteLoopAccessesToScalars( // alignment than any other guaranteed stores, in which case we can // raise the alignment on the promoted store. Align InstAlignment = Store->getAlign(); - + bool GuaranteedToExecute = + SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop); + StoreIsGuanteedToExecute |= GuaranteedToExecute; if (!DereferenceableInPH || !SafeToInsertStore || (InstAlignment > Alignment)) { - if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) { + if (GuaranteedToExecute) { DereferenceableInPH = true; SafeToInsertStore = true; Alignment = std::max(Alignment, InstAlignment); @@ -2156,21 +2159,26 @@ bool llvm::promoteLoopAccessesToScalars( // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. - LoadInst *PreheaderLoad = new LoadInst( - AccessTy, SomePtr, SomePtr->getName() + ".promoted", - Preheader->getTerminator()); - if (SawUnorderedAtomic) - PreheaderLoad->setOrdering(AtomicOrdering::Unordered); - PreheaderLoad->setAlignment(Alignment); - PreheaderLoad->setDebugLoc(DebugLoc()); - if (AATags) - PreheaderLoad->setAAMetadata(AATags); - SSA.AddAvailableValue(Preheader, PreheaderLoad); - - MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB( - PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); - MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); - MSSAU.insertUse(NewMemUse, /*RenameUses=*/true); + LoadInst *PreheaderLoad = nullptr; + if (FoundLoadToPromote || !StoreIsGuanteedToExecute) { + PreheaderLoad = + new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted", + Preheader->getTerminator()); + if (SawUnorderedAtomic) + PreheaderLoad->setOrdering(AtomicOrdering::Unordered); + PreheaderLoad->setAlignment(Alignment); + PreheaderLoad->setDebugLoc(DebugLoc()); + if (AATags) + PreheaderLoad->setAAMetadata(AATags); + + MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB( + PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); + MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); + MSSAU.insertUse(NewMemUse, /*RenameUses=*/true); + SSA.AddAvailableValue(Preheader, PreheaderLoad); + } else { + SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy)); + } if (VerifyMemorySSA) MSSAU.getMemorySSA()->verifyMemorySSA(); @@ -2181,7 +2189,7 @@ bool llvm::promoteLoopAccessesToScalars( if (VerifyMemorySSA) MSSAU.getMemorySSA()->verifyMemorySSA(); // If the SSAUpdater didn't use the load in the preheader, just zap it now. - if (PreheaderLoad->use_empty()) + if (PreheaderLoad && PreheaderLoad->use_empty()) eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU); return true; diff --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll index ba274835d05f8c..8304479f4f238d 100644 --- a/llvm/test/Transforms/LICM/scalar-promote.ll +++ b/llvm/test/Transforms/LICM/scalar-promote.ll @@ -644,10 +644,9 @@ define void @test_sink_store_only() writeonly { ; CHECK: Function Attrs: writeonly ; CHECK-LABEL: @test_sink_store_only( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GLB_PROMOTED:%.*]] = load i8, i8* @glb, align 1 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ [[GLB_PROMOTED]], [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ poison, [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], 4 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]] @@ -683,10 +682,9 @@ define void @test_sink_store_to_local_object_only_loop_must_execute() writeonly ; CHECK-LABEL: @test_sink_store_to_local_object_only_loop_must_execute( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i8, i8* [[A]], align 1 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[DIV1:%.*]] = phi i8 [ poison, [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP_LATCH]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], 4 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]] @@ -842,17 +840,16 @@ define void @sink_store_lcssa_phis(i32* %ptr, i1 %c) { ; CHECK: loop.2.header: ; CHECK-NEXT: br i1 false, label [[LOOP_3_HEADER_PREHEADER:%.*]], label [[LOOP_1_LATCH:%.*]] ; CHECK: loop.3.header.preheader: -; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP_3_HEADER:%.*]] ; CHECK: loop.3.header: -; CHECK-NEXT: [[I_11:%.*]] = phi i32 [ [[I_1:%.*]], [[LOOP_3_LATCH:%.*]] ], [ [[PTR_PROMOTED]], [[LOOP_3_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[I_11:%.*]] = phi i32 [ [[I_1:%.*]], [[LOOP_3_LATCH:%.*]] ], [ poison, [[LOOP_3_HEADER_PREHEADER]] ] ; CHECK-NEXT: [[I_1]] = phi i32 [ 1, [[LOOP_3_LATCH]] ], [ 0, [[LOOP_3_HEADER_PREHEADER]] ] ; CHECK-NEXT: br i1 true, label [[LOOP_3_LATCH]], label [[LOOP_2_LATCH:%.*]] ; CHECK: loop.3.latch: ; CHECK-NEXT: br label [[LOOP_3_HEADER]] ; CHECK: loop.2.latch: ; CHECK-NEXT: [[I_11_LCSSA:%.*]] = phi i32 [ [[I_11]], [[LOOP_3_HEADER]] ] -; CHECK-NEXT: store i32 [[I_11_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: store i32 [[I_11_LCSSA]], i32* [[PTR:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP_2_HEADER]] ; CHECK: loop.1.latch: ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP_1_HEADER]], label [[EXIT:%.*]] From e642d0ea215897993f925661d53b9091fa2bcc6e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 29 May 2022 18:03:25 -0700 Subject: [PATCH 787/908] [RISCV] Add test cases showing missed opportunity to use shXadd.uw. NFC The tests here show the codegen for something like this C code. unsigned diff = ptr1 - ptr2; return ptr3[diff]; The pointer difference is truncated to 32-bits before being used again as an index. In SelectionDAG this appears as an AND between a SRL and a SHL. DAGCombiner will remove the shifts leaving only an AND. The Mask now has 1,2, or 3 trailing zeros and 31, 30, or 29 leading zeros. We end up falling back to constant materialization to create this mask. We could instead use srli followed by slli.uw. Or since we have an add, we can use srli followed by shXadd.uw. Differential Revision: https://reviews.llvm.org/D126589 --- llvm/test/CodeGen/RISCV/rv64zba.ll | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 87dd4655bb7f40..c35d781932e6ff 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1163,3 +1163,56 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind { %1 = ashr i32 %ext, 9 ret i32 %1 } + +; This the IR you get from InstCombine if take the difference of 2 pointers and +; cast is to unsigned before using as an index. +define signext i16 @sh1adduw_ptrdiff(i64 %diff, i16* %baseptr) { +; CHECK-LABEL: sh1adduw_ptrdiff: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: slli a2, a2, 33 +; CHECK-NEXT: addi a2, a2, -2 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: lh a0, 0(a0) +; CHECK-NEXT: ret + %ptrdiff = lshr exact i64 %diff, 1 + %cast = and i64 %ptrdiff, 4294967295 + %ptr = getelementptr inbounds i16, i16* %baseptr, i64 %cast + %res = load i16, i16* %ptr + ret i16 %res +} + +define signext i32 @sh2adduw_ptrdiff(i64 %diff, i32* %baseptr) { +; CHECK-LABEL: sh2adduw_ptrdiff: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: slli a2, a2, 34 +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: lw a0, 0(a0) +; CHECK-NEXT: ret + %ptrdiff = lshr exact i64 %diff, 2 + %cast = and i64 %ptrdiff, 4294967295 + %ptr = getelementptr inbounds i32, i32* %baseptr, i64 %cast + %res = load i32, i32* %ptr + ret i32 %res +} + +define i64 @sh3adduw_ptrdiff(i64 %diff, i64* %baseptr) { +; CHECK-LABEL: sh3adduw_ptrdiff: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: slli a2, a2, 35 +; CHECK-NEXT: addi a2, a2, -8 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: ld a0, 0(a0) +; CHECK-NEXT: ret + %ptrdiff = lshr exact i64 %diff, 3 + %cast = and i64 %ptrdiff, 4294967295 + %ptr = getelementptr inbounds i64, i64* %baseptr, i64 %cast + %res = load i64, i64* %ptr + ret i64 %res +} From 6a6cf2e28db5ef2451c117c7896208091da5d2a0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 29 May 2022 18:05:06 -0700 Subject: [PATCH 788/908] [RISCV] isel (add (and X, 0x1FFFFFFFE), Y) as (SH1ADD (SRLI X, 1), Y) This pattern is what we get after DAG combine for C code like this. short *ptr1, *ptr2, *ptr3; unsigned diff = ptr1 - ptr2; return ptr3[diff]; Reviewed By: reames Differential Revision: https://reviews.llvm.org/D126588 --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 8 +++ llvm/test/CodeGen/RISCV/rv64zba.ll | 75 +++++++++++++++-------- 2 files changed, 56 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index a1e795819b1a84..ea9f5f150925c8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -1174,6 +1174,14 @@ def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)) (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)), (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; + +// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. +def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), non_imm12:$rs2)), + (SH1ADD_UW (SRLI GPR:$rs1, 1), GPR:$rs2)>; +def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)), + (SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>; +def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)), + (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>; } // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbcOrZbkc] in { diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index c35d781932e6ff..5fe872ee0f8739 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1167,15 +1167,22 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind { ; This the IR you get from InstCombine if take the difference of 2 pointers and ; cast is to unsigned before using as an index. define signext i16 @sh1adduw_ptrdiff(i64 %diff, i16* %baseptr) { -; CHECK-LABEL: sh1adduw_ptrdiff: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 1 -; CHECK-NEXT: slli a2, a2, 33 -; CHECK-NEXT: addi a2, a2, -2 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: lh a0, 0(a0) -; CHECK-NEXT: ret +; RV64I-LABEL: sh1adduw_ptrdiff: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 1 +; RV64I-NEXT: slli a2, a2, 33 +; RV64I-NEXT: addi a2, a2, -2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lh a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh1adduw_ptrdiff: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a0, a0, 1 +; RV64ZBA-NEXT: sh1add.uw a0, a0, a1 +; RV64ZBA-NEXT: lh a0, 0(a0) +; RV64ZBA-NEXT: ret %ptrdiff = lshr exact i64 %diff, 1 %cast = and i64 %ptrdiff, 4294967295 %ptr = getelementptr inbounds i16, i16* %baseptr, i64 %cast @@ -1184,15 +1191,22 @@ define signext i16 @sh1adduw_ptrdiff(i64 %diff, i16* %baseptr) { } define signext i32 @sh2adduw_ptrdiff(i64 %diff, i32* %baseptr) { -; CHECK-LABEL: sh2adduw_ptrdiff: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 1 -; CHECK-NEXT: slli a2, a2, 34 -; CHECK-NEXT: addi a2, a2, -4 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: lw a0, 0(a0) -; CHECK-NEXT: ret +; RV64I-LABEL: sh2adduw_ptrdiff: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 1 +; RV64I-NEXT: slli a2, a2, 34 +; RV64I-NEXT: addi a2, a2, -4 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh2adduw_ptrdiff: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a0, a0, 2 +; RV64ZBA-NEXT: sh2add.uw a0, a0, a1 +; RV64ZBA-NEXT: lw a0, 0(a0) +; RV64ZBA-NEXT: ret %ptrdiff = lshr exact i64 %diff, 2 %cast = and i64 %ptrdiff, 4294967295 %ptr = getelementptr inbounds i32, i32* %baseptr, i64 %cast @@ -1201,15 +1215,22 @@ define signext i32 @sh2adduw_ptrdiff(i64 %diff, i32* %baseptr) { } define i64 @sh3adduw_ptrdiff(i64 %diff, i64* %baseptr) { -; CHECK-LABEL: sh3adduw_ptrdiff: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 1 -; CHECK-NEXT: slli a2, a2, 35 -; CHECK-NEXT: addi a2, a2, -8 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: ld a0, 0(a0) -; CHECK-NEXT: ret +; RV64I-LABEL: sh3adduw_ptrdiff: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 1 +; RV64I-NEXT: slli a2, a2, 35 +; RV64I-NEXT: addi a2, a2, -8 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh3adduw_ptrdiff: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a0, a0, 3 +; RV64ZBA-NEXT: sh3add.uw a0, a0, a1 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret %ptrdiff = lshr exact i64 %diff, 3 %cast = and i64 %ptrdiff, 4294967295 %ptr = getelementptr inbounds i64, i64* %baseptr, i64 %cast From 967ef4ad0afc29376ab608795f328feb12f5e407 Mon Sep 17 00:00:00 2001 From: Lian Wang Date: Fri, 27 May 2022 06:20:21 +0000 Subject: [PATCH 789/908] [NFC][VP] Fix llvm.vp.merge intrinsic Expansion in LangRef Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D126457 --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index aae95fb83d9a20..cf97401afb3616 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -17994,7 +17994,7 @@ Example: ;; Lanes at and above %pivot are taken from %on_false %atfirst = insertelement <4 x i32> undef, i32 %pivot, i32 0 %splat = shufflevector <4 x i32> %atfirst, <4 x i32> poison, <4 x i32> zeroinitializer - %pivotmask = icmp ult <4 x i32> %splat, <4 x i32> + %pivotmask = icmp ult <4 x i32> , <4 x i32> %splat %mergemask = and <4 x i1> %cond, <4 x i1> %pivotmask %also.r = select <4 x i1> %mergemask, <4 x i32> %on_true, <4 x i32> %on_false From c98b3a8cd9856f2fcc7ba1f9ed9896b291bbab7b Mon Sep 17 00:00:00 2001 From: Sockke Date: Mon, 30 May 2022 09:43:27 +0800 Subject: [PATCH 790/908] Fix `performance-unnecessary-value-param` for template specialization The checker missed a check for parameter type of primary template of specialization template and this could cause build breakages. Reviewed By: aaron.ballman, flx Differential Revision: https://reviews.llvm.org/D116593 --- .../UnnecessaryValueParamCheck.cpp | 17 +++---------- clang-tools-extra/docs/ReleaseNotes.rst | 4 +++ ...rmance-unnecessary-value-param-delayed.cpp | 2 +- .../performance-unnecessary-value-param.cpp | 25 +++++++++++++++++-- 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp index ba2b89c291214e..00f4b00f53819d 100644 --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp @@ -51,18 +51,6 @@ bool hasLoopStmtAncestor(const DeclRefExpr &DeclRef, const Decl &Decl, return Matches.empty(); } -bool isExplicitTemplateSpecialization(const FunctionDecl &Function) { - if (const auto *SpecializationInfo = Function.getTemplateSpecializationInfo()) - if (SpecializationInfo->getTemplateSpecializationKind() == - TSK_ExplicitSpecialization) - return true; - if (const auto *Method = llvm::dyn_cast(&Function)) - if (Method->getTemplatedKind() == FunctionDecl::TK_MemberSpecialization && - Method->getMemberSpecializationInfo()->isExplicitSpecialization()) - return true; - return false; -} - } // namespace UnnecessaryValueParamCheck::UnnecessaryValueParamCheck( @@ -147,11 +135,12 @@ void UnnecessaryValueParamCheck::check(const MatchFinder::MatchResult &Result) { // 2. the function is virtual as it might break overrides // 3. the function is referenced outside of a call expression within the // compilation unit as the signature change could introduce build errors. - // 4. the function is an explicit template specialization. + // 4. the function is a primary template or an explicit template + // specialization. const auto *Method = llvm::dyn_cast(Function); if (Param->getBeginLoc().isMacroID() || (Method && Method->isVirtual()) || isReferencedOutsideOfCallExpr(*Function, *Result.Context) || - isExplicitTemplateSpecialization(*Function)) + (Function->getTemplatedKind() != FunctionDecl::TK_NonTemplate)) return; for (const auto *FunctionDecl = Function; FunctionDecl != nullptr; FunctionDecl = FunctionDecl->getPreviousDecl()) { diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 787f535dedb641..5196c53291d19b 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -202,6 +202,10 @@ Changes in existing checks ` to simplify expressions using DeMorgan's Theorem. +- Fixed a crash in :doc:`performance-unnecessary-value-param + ` when the specialization + template has an unnecessary value paramter. Removed the fix for a template. + Removed checks ^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-value-param-delayed.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-value-param-delayed.cpp index 623bf8b30829e1..53ec8713be3389 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-value-param-delayed.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-value-param-delayed.cpp @@ -69,7 +69,7 @@ struct PositiveConstValueConstructor { template void templateWithNonTemplatizedParameter(const ExpensiveToCopyType S, T V) { // CHECK-MESSAGES: [[@LINE-1]]:90: warning: the const qualified parameter 'S' - // CHECK-FIXES: template void templateWithNonTemplatizedParameter(const ExpensiveToCopyType& S, T V) { + // CHECK-FIXES-NOT: template void templateWithNonTemplatizedParameter(const ExpensiveToCopyType& S, T V) { } void instantiated() { diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-value-param.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-value-param.cpp index f801494cf0ff51..d578eedd94a390 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-value-param.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-value-param.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s performance-unnecessary-value-param %t +// RUN: %check_clang_tidy %s performance-unnecessary-value-param %t -- -- -fno-delayed-template-parsing // CHECK-FIXES: #include @@ -109,7 +109,7 @@ struct PositiveConstValueConstructor { template void templateWithNonTemplatizedParameter(const ExpensiveToCopyType S, T V) { // CHECK-MESSAGES: [[@LINE-1]]:90: warning: the const qualified parameter 'S' - // CHECK-FIXES: template void templateWithNonTemplatizedParameter(const ExpensiveToCopyType& S, T V) { + // CHECK-FIXES-NOT: template void templateWithNonTemplatizedParameter(const ExpensiveToCopyType& S, T V) { } void instantiated() { @@ -381,3 +381,24 @@ void templateFunction(ExpensiveToCopyType E) { // CHECK-FIXES: void templateFunction(ExpensiveToCopyType E) { E.constReference(); } + +template +T templateSpecializationFunction(ExpensiveToCopyType E) { + // CHECK-MESSAGES: [[@LINE-1]]:54: warning: the parameter 'E' is copied + // CHECK-FIXES-NOT: T templateSpecializationFunction(const ExpensiveToCopyType& E) { + return T(); +} + +template <> +bool templateSpecializationFunction(ExpensiveToCopyType E) { + // CHECK-MESSAGES: [[@LINE-1]]:57: warning: the parameter 'E' is copied + // CHECK-FIXES-NOT: bool templateSpecializationFunction(const ExpensiveToCopyType& E) { + return true; +} + +template <> +int templateSpecializationFunction(ExpensiveToCopyType E) { + // CHECK-MESSAGES: [[@LINE-1]]:56: warning: the parameter 'E' is copied + // CHECK-FIXES-NOT: int templateSpecializationFunction(const ExpensiveToCopyType& E) { + return 0; +} From ef256ed58ebf4689ec8d8fffaee529063f0b8729 Mon Sep 17 00:00:00 2001 From: Chenbing Zheng Date: Mon, 30 May 2022 10:16:32 +0800 Subject: [PATCH 791/908] [InstCombine] bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest) Only solve dest type is vector to avoid inverse transform in visitBitCast. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D125951 --- .../InstCombine/InstCombineCasts.cpp | 19 +++++++++++++------ .../InstCombine/bitcast-inseltpoison.ll | 7 +------ llvm/test/Transforms/InstCombine/bitcast.ll | 10 ++-------- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 9fb266bd60c75f..c3fb0b542e5c7c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2363,13 +2363,20 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, // The bitcast must be to a vectorizable type, otherwise we can't make a new // type to extract from. Type *DestType = BitCast.getType(); - if (!VectorType::isValidElementType(DestType)) - return nullptr; + VectorType *VecType = cast(VecOp->getType()); + if (VectorType::isValidElementType(DestType)) { + auto *NewVecType = VectorType::get(DestType, VecType); + auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc"); + return ExtractElementInst::Create(NewBC, Index); + } - auto *NewVecType = - VectorType::get(DestType, cast(VecOp->getType())); - auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc"); - return ExtractElementInst::Create(NewBC, Index); + // Only solve DestType is vector to avoid inverse transform in visitBitCast. + // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest) + auto *FixedVType = dyn_cast(VecType); + if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1) + return CastInst::Create(Instruction::BitCast, VecOp, DestType); + + return nullptr; } /// Change the type of a bitwise logic operation if we can eliminate a bitcast. diff --git a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll index 95edb21f7340bd..c987c2885115d0 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll @@ -353,14 +353,9 @@ define i64 @bitcast_extelt2(<4 x float> %A) { ret i64 %bc2 } -; TODO: This should return %A. - define <2 x i32> @bitcast_extelt3(<2 x i32> %A) { ; CHECK-LABEL: @bitcast_extelt3( -; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x i32> [[A:%.*]] to <1 x i64> -; CHECK-NEXT: [[EXT:%.*]] = extractelement <1 x i64> [[BC1]], i64 0 -; CHECK-NEXT: [[BC2:%.*]] = bitcast i64 [[EXT]] to <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[BC2]] +; CHECK-NEXT: ret <2 x i32> [[A:%.*]] ; %bc1 = bitcast <2 x i32> %A to <1 x i64> %ext = extractelement <1 x i64> %bc1, i32 0 diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll index 68193558280798..1346885f698801 100644 --- a/llvm/test/Transforms/InstCombine/bitcast.ll +++ b/llvm/test/Transforms/InstCombine/bitcast.ll @@ -402,14 +402,9 @@ define i64 @bitcast_extelt2(<4 x float> %A) { ret i64 %bc2 } -; TODO: This should return %A. - define <2 x i32> @bitcast_extelt3(<2 x i32> %A) { ; CHECK-LABEL: @bitcast_extelt3( -; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x i32> [[A:%.*]] to <1 x i64> -; CHECK-NEXT: [[EXT:%.*]] = extractelement <1 x i64> [[BC1]], i64 0 -; CHECK-NEXT: [[BC2:%.*]] = bitcast i64 [[EXT]] to <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[BC2]] +; CHECK-NEXT: ret <2 x i32> [[A:%.*]] ; %bc1 = bitcast <2 x i32> %A to <1 x i64> %ext = extractelement <1 x i64> %bc1, i32 0 @@ -433,8 +428,7 @@ define double @bitcast_extelt4(i128 %A) { define <2 x i32> @bitcast_extelt5(<1 x i64> %A) { ; CHECK-LABEL: @bitcast_extelt5( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <1 x i64> [[A:%.*]], i64 0 -; CHECK-NEXT: [[BC:%.*]] = bitcast i64 [[EXT]] to <2 x i32> +; CHECK-NEXT: [[BC:%.*]] = bitcast <1 x i64> [[A:%.*]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[BC]] ; %ext = extractelement <1 x i64> %A, i32 0 From 99eca8353808f63670c647aeae03e0ce66eb21e0 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 30 May 2022 10:08:39 +0800 Subject: [PATCH 792/908] [Driver] Enable to use C++20 standalne by -fcxx-modules This patch allows user to use C++20 module by -fcxx-modules. Previously, we could only use it under -std=c++20. Given that user could use C++20 coroutine standalonel by -fcoroutines-ts. It makes sense to offer an option to use C++20 modules without enabling C++20. Reviewed By: iains, MaskRay Differential Revision: https://reviews.llvm.org/D120540 --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 6 ++++++ clang/test/Driver/modules.cpp | 11 +++++++++++ clang/test/Modules/cxx-modules.cppm | 7 +++++++ 4 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 clang/test/Modules/cxx-modules.cppm diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e19baac5860aab..0343e48c1a9d11 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1410,7 +1410,7 @@ defm async_exceptions: BoolFOption<"async-exceptions", PosFlag, NegFlag>; defm cxx_modules : BoolFOption<"cxx-modules", LangOpts<"CPlusPlusModules">, Default, - NegFlag, PosFlag, + NegFlag, PosFlag, BothFlags<[NoXarchOption], " modules for C++">>, ShouldParseIf; def fdebug_pass_arguments : Flag<["-"], "fdebug-pass-arguments">, Group; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 936682e49299e6..5106c0e327b87f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3627,6 +3627,12 @@ static void RenderModulesOptions(Compilation &C, const Driver &D, HaveModules = true; } + if (Args.hasFlag(options::OPT_fcxx_modules, options::OPT_fno_cxx_modules, + false)) { + CmdArgs.push_back("-fcxx-modules"); + HaveModules = true; + } + // -fmodule-maps enables implicit reading of module map files. By default, // this is enabled if we are using Clang's flavor of precompiled modules. if (Args.hasFlag(options::OPT_fimplicit_module_maps, diff --git a/clang/test/Driver/modules.cpp b/clang/test/Driver/modules.cpp index 87b6cc640cb0de..bac9f715f13a45 100644 --- a/clang/test/Driver/modules.cpp +++ b/clang/test/Driver/modules.cpp @@ -73,3 +73,14 @@ import "foo.h"; // CHECK-HEADER-UNIT-USE: BAR; FOO; #endif + +// Check the independent use of -fcxx-modules +// +// RUN: %clang++ -fcxx-modules -std=c++17 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX17-MODULES +// CHECK-CXX17-MODULES: "-fcxx-modules" +// RUN: %clang++ -fcxx-modules -std=c++14 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX14-MODULES +// CHECK-CXX14-MODULES: "-fcxx-modules" +// RUN: %clang++ -fcxx-modules -std=c++11 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX11-MODULES +// CHECK-CXX11-MODULES: "-fcxx-modules" +// RUN: %clang++ -fcxx-modules -std=c++03 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX03-MODULES +// CHECK-CXX03-MODULES: "-fcxx-modules" diff --git a/clang/test/Modules/cxx-modules.cppm b/clang/test/Modules/cxx-modules.cppm new file mode 100644 index 00000000000000..be17f85f8d17d4 --- /dev/null +++ b/clang/test/Modules/cxx-modules.cppm @@ -0,0 +1,7 @@ +// This tests that we could use C++20 modules standalone. +// RUN: %clang -std=c++03 -fcxx-modules -fsyntax-only -Xclang -verify %s +// RUN: %clang -std=c++11 -fcxx-modules -fsyntax-only -Xclang -verify %s +// RUN: %clang -std=c++14 -fcxx-modules -fsyntax-only -Xclang -verify %s +// RUN: %clang -std=c++17 -fcxx-modules -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics +export module M; From 42c3c70a9e3f0a697b8794f4e8b603091ffb12a4 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 30 May 2022 10:43:13 +0800 Subject: [PATCH 793/908] Revert "[Driver] Enable to use C++20 standalne by -fcxx-modules" This reverts commit 99eca8353808f63670c647aeae03e0ce66eb21e0. Since it would cause clang-tools-extra fail. --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 6 ------ clang/test/Driver/modules.cpp | 11 ----------- clang/test/Modules/cxx-modules.cppm | 7 ------- 4 files changed, 1 insertion(+), 25 deletions(-) delete mode 100644 clang/test/Modules/cxx-modules.cppm diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0343e48c1a9d11..e19baac5860aab 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1410,7 +1410,7 @@ defm async_exceptions: BoolFOption<"async-exceptions", PosFlag, NegFlag>; defm cxx_modules : BoolFOption<"cxx-modules", LangOpts<"CPlusPlusModules">, Default, - NegFlag, PosFlag, + NegFlag, PosFlag, BothFlags<[NoXarchOption], " modules for C++">>, ShouldParseIf; def fdebug_pass_arguments : Flag<["-"], "fdebug-pass-arguments">, Group; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 5106c0e327b87f..936682e49299e6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3627,12 +3627,6 @@ static void RenderModulesOptions(Compilation &C, const Driver &D, HaveModules = true; } - if (Args.hasFlag(options::OPT_fcxx_modules, options::OPT_fno_cxx_modules, - false)) { - CmdArgs.push_back("-fcxx-modules"); - HaveModules = true; - } - // -fmodule-maps enables implicit reading of module map files. By default, // this is enabled if we are using Clang's flavor of precompiled modules. if (Args.hasFlag(options::OPT_fimplicit_module_maps, diff --git a/clang/test/Driver/modules.cpp b/clang/test/Driver/modules.cpp index bac9f715f13a45..87b6cc640cb0de 100644 --- a/clang/test/Driver/modules.cpp +++ b/clang/test/Driver/modules.cpp @@ -73,14 +73,3 @@ import "foo.h"; // CHECK-HEADER-UNIT-USE: BAR; FOO; #endif - -// Check the independent use of -fcxx-modules -// -// RUN: %clang++ -fcxx-modules -std=c++17 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX17-MODULES -// CHECK-CXX17-MODULES: "-fcxx-modules" -// RUN: %clang++ -fcxx-modules -std=c++14 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX14-MODULES -// CHECK-CXX14-MODULES: "-fcxx-modules" -// RUN: %clang++ -fcxx-modules -std=c++11 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX11-MODULES -// CHECK-CXX11-MODULES: "-fcxx-modules" -// RUN: %clang++ -fcxx-modules -std=c++03 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX03-MODULES -// CHECK-CXX03-MODULES: "-fcxx-modules" diff --git a/clang/test/Modules/cxx-modules.cppm b/clang/test/Modules/cxx-modules.cppm deleted file mode 100644 index be17f85f8d17d4..00000000000000 --- a/clang/test/Modules/cxx-modules.cppm +++ /dev/null @@ -1,7 +0,0 @@ -// This tests that we could use C++20 modules standalone. -// RUN: %clang -std=c++03 -fcxx-modules -fsyntax-only -Xclang -verify %s -// RUN: %clang -std=c++11 -fcxx-modules -fsyntax-only -Xclang -verify %s -// RUN: %clang -std=c++14 -fcxx-modules -fsyntax-only -Xclang -verify %s -// RUN: %clang -std=c++17 -fcxx-modules -fsyntax-only -Xclang -verify %s -// expected-no-diagnostics -export module M; From 083798e2702f9b12828ba9d3ee4d1486340a2cb5 Mon Sep 17 00:00:00 2001 From: Ping Deng Date: Mon, 30 May 2022 02:52:40 +0000 Subject: [PATCH 794/908] [LegalizeTypes][VP] Add integer promotion support for vp.fptosi/vp.fptoui Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D125760 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 20 ++++++++++++++--- .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 22 +++++++++++++++++++ .../RISCV/rvv/fixed-vectors-fptosi-vp.ll | 13 +++++++++++ .../RISCV/rvv/fixed-vectors-fptoui-vp.ll | 13 +++++++++++ llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll | 22 +++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll | 13 +++++++++++ llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll | 13 +++++++++++ 7 files changed, 113 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 686de3933c5f9b..123ccfd3550f93 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -135,6 +135,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + case ISD::VP_FPTOSI: + case ISD::VP_FPTOUI: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: @@ -669,6 +671,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) NewOpc = ISD::STRICT_FP_TO_SINT; + if (N->getOpcode() == ISD::VP_FPTOUI && + !TLI.isOperationLegal(ISD::VP_FPTOUI, NVT) && + TLI.isOperationLegalOrCustom(ISD::VP_FPTOSI, NVT)) + NewOpc = ISD::VP_FPTOSI; + SDValue Res; if (N->isStrictFPOpcode()) { Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, @@ -676,8 +683,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - } else + } else if (NewOpc == ISD::VP_FPTOSI || NewOpc == ISD::VP_FPTOUI) { + Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1), + N->getOperand(2)}); + } else { Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + } // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the @@ -687,8 +698,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // before legalization: fp-to-uint16, 65534. -> 0xfffe // after legalization: fp-to-sint32, 65534. -> 0x0000fffe return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || - N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? - ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, + N->getOpcode() == ISD::STRICT_FP_TO_UINT || + N->getOpcode() == ISD::VP_FPTOUI) + ? ISD::AssertZext + : ISD::AssertSext, + dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index 566fcdbc2ca9a4..5a7edd2734ff2f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -44,6 +44,28 @@ define <2 x i1> @fp2si_v2f32_v2i1(<2 x float> %x) { ret <2 x i1> %z } +define <2 x i15> @fp2si_v2f32_v2i15(<2 x float> %x) { +; CHECK-LABEL: fp2si_v2f32_v2i15: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %z = fptosi <2 x float> %x to <2 x i15> + ret <2 x i15> %z +} + +define <2 x i15> @fp2ui_v2f32_v2i15(<2 x float> %x) { +; CHECK-LABEL: fp2ui_v2f32_v2i15: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %z = fptoui <2 x float> %x to <2 x i15> + ret <2 x i15> %z +} + define <2 x i1> @fp2ui_v2f32_v2i1(<2 x float> %x) { ; CHECK-LABEL: fp2ui_v2f32_v2i1: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll index d833523079e378..423e123ce30eb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll @@ -4,6 +4,19 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+experimental-zvfh \ ; RUN: -riscv-v-vector-bits-min=128 < %s | FileCheck %s +declare <4 x i7> @llvm.vp.fptosi.v4i7.v4f16(<4 x half>, <4 x i1>, i32) + +define <4 x i7> @vfptosi_v4i7_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_v4i7_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i7> @llvm.vp.fptosi.v4i7.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + ret <4 x i7> %v +} + declare <4 x i8> @llvm.vp.fptosi.v4i8.v4f16(<4 x half>, <4 x i1>, i32) define <4 x i8> @vfptosi_v4i8_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll index 0f96789c6cc93d..2f97d38b2a0dfa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll @@ -4,6 +4,19 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+experimental-zvfh \ ; RUN: -riscv-v-vector-bits-min=128 < %s | FileCheck %s +declare <4 x i7> @llvm.vp.fptoui.v4i7.v4f16(<4 x half>, <4 x i1>, i32) + +define <4 x i7> @vfptoui_v4i7_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptoui_v4i7_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i7> @llvm.vp.fptoui.v4i7.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + ret <4 x i7> %v +} + declare <4 x i8> @llvm.vp.fptoui.v4i8.v4f16(<4 x half>, <4 x i1>, i32) define <4 x i8> @vfptoui_v4i8_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll index 680811037399b1..d73b6f6f9c9d9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll @@ -16,6 +16,28 @@ define @vfptosi_nxv1f16_nxv1i1( %va) { ret %evec } +define @vfptosi_nxv1f16_nxv1i7( %va) { +; CHECK-LABEL: vfptosi_nxv1f16_nxv1i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = fptosi %va to + ret %evec +} + +define @vfptoui_nxv1f16_nxv1i7( %va) { +; CHECK-LABEL: vfptoui_nxv1f16_nxv1i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = fptoui %va to + ret %evec +} + define @vfptoui_nxv1f16_nxv1i1( %va) { ; CHECK-LABEL: vfptoui_nxv1f16_nxv1i1: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll index 9686bc1850afff..72d0d400217f38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll @@ -2,6 +2,19 @@ ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s +declare @llvm.vp.fptosi.v4i7.v4f16(, , i32) + +define @vfptosi_v4i7_v4f16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_v4i7_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptosi.v4i7.v4f16( %va, %m, i32 %evl) + ret %v +} + declare @llvm.vp.fptosi.nxv2i8.nxv2f16(, , i32) define @vfptosi_nxv2i8_nxv2f16( %va, %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll index 9a64cd8d9e3b68..1d1995a6c29a81 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll @@ -2,6 +2,19 @@ ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s +declare @llvm.vp.fptoui.v4i7.v4f16(, , i32) + +define @vfptoui_v4i7_v4f16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptoui_v4i7_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptoui.v4i7.v4f16( %va, %m, i32 %evl) + ret %v +} + declare @llvm.vp.fptoui.nxv2i8.nxv2f16(, , i32) define @vfptoui_nxv2i8_nxv2f16( %va, %m, i32 zeroext %evl) { From 88af539c0eaa5a282e6c1632797d33dfc0ebcb65 Mon Sep 17 00:00:00 2001 From: Ping Deng Date: Mon, 30 May 2022 02:54:23 +0000 Subject: [PATCH 795/908] [RISCV] Support VP_REDUCE_MUL mask operation Reviewed By: reames Differential Revision: https://reviews.llvm.org/D126520 --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 5 + .../rvv/fixed-vectors-reduction-mask-vp.ll | 118 +++++++++++++++++ .../CodeGen/RISCV/rvv/vreductions-mask-vp.ll | 119 ++++++++++++++++++ 3 files changed, 242 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 09c9d4179ae13a..755ae2ac2361ba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8864,6 +8864,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (VT.isVector() && VT.getVectorElementType() == MVT::i1) Opcode = ISD::VP_AND; break; + case ISD::VP_REDUCE_MUL: + // If it is VP_REDUCE_MUL mask operation then turn it to VP_REDUCE_AND + if (VT == MVT::i1) + Opcode = ISD::VP_REDUCE_AND; + break; case ISD::VP_REDUCE_ADD: // If it is VP_REDUCE_ADD mask operation then turn it to VP_REDUCE_XOR if (VT == MVT::i1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll index 547799ddbda552..c9265477dba414 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -890,3 +890,121 @@ define signext i1 @vpreduce_umin_nxv64i1(i1 signext %s, <64 x i1> %v, <64 x i1> %r = call i1 @llvm.vp.reduce.umin.nxv64i1(i1 %s, <64 x i1> %v, <64 x i1> %m, i32 %evl) ret i1 %r } + +declare i1 @llvm.vp.reduce.mul.v1i1(i1, <1 x i1>, <1 x i1>, i32) + +define i1 @vpreduce_mul_v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.v2i1(i1, <2 x i1>, <2 x i1>, i32) + +define signext i1 @vpreduce_mul_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.v4i1(i1, <4 x i1>, <4 x i1>, i32) + +define signext i1 @vpreduce_mul_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.v8i1(i1, <8 x i1>, <8 x i1>, i32) + +define signext i1 @vpreduce_mul_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.v16i1(i1, <16 x i1>, <16 x i1>, i32) + +define signext i1 @vpreduce_mul_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.v32i1(i1, <32 x i1>, <32 x i1>, i32) + +define signext i1 @vpreduce_mul_v32i1(i1 signext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.v32i1(i1 %s, <32 x i1> %v, <32 x i1> %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.v64i1(i1, <64 x i1>, <64 x i1>, i32) + +define signext i1 @vpreduce_mul_v64i1(i1 signext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.v64i1(i1 %s, <64 x i1> %v, <64 x i1> %m, i32 %evl) + ret i1 %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll index 9b4464000b1270..850990ffd7fd10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll @@ -1029,3 +1029,122 @@ define signext i1 @vpreduce_umin_nxv64i1(i1 signext %s, %v, < %r = call i1 @llvm.vp.reduce.umin.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } + +declare i1 @llvm.vp.reduce.mul.nxv1i1(i1, , , i32) + +define signext i1 @vpreduce_mul_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.nxv1i1(i1 %s, %v, %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.nxv2i1(i1, , , i32) + +define signext i1 @vpreduce_mul_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.nxv2i1(i1 %s, %v, %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.nxv4i1(i1, , , i32) + +define signext i1 @vpreduce_mul_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.nxv4i1(i1 %s, %v, %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.nxv8i1(i1, , , i32) + +define signext i1 @vpreduce_mul_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.nxv8i1(i1 %s, %v, %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.nxv16i1(i1, , , i32) + +define signext i1 @vpreduce_mul_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.nxv16i1(i1 %s, %v, %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.nxv32i1(i1, , , i32) + +define signext i1 @vpreduce_mul_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.nxv32i1(i1 %s, %v, %m, i32 %evl) + ret i1 %r +} + +declare i1 @llvm.vp.reduce.mul.nxv64i1(i1, , , i32) + +define signext i1 @vpreduce_mul_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_mul_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmnot.m v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %r = call i1 @llvm.vp.reduce.mul.nxv64i1(i1 %s, %v, %m, i32 %evl) + ret i1 %r +} From 3f3a235aa2e610b5ba393228a666d55a8135ef4a Mon Sep 17 00:00:00 2001 From: Sockke Date: Mon, 30 May 2022 13:02:25 +0800 Subject: [PATCH 796/908] [clang-apply-replacements] Added an option to ignore insert conflict. If two different texts are inserted at the same offset, clang-apply-replacements prints the conflict error and discards all fixes. This patch adds support for adjusting conflict offset and keeps running to continually fix them. https://godbolt.org/z/P938EGoxj doesn't have any fixes when I run run-clang-tidy.py to generate a YAML file with clang-tidy and fix them with clang-apply-replacements. The YAML file has two different header texts insertions at the same offset, unlike clang-tidy with '-fix', clang-apply-replacements does not adjust for this conflict. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D123924 --- .../Tooling/ApplyReplacements.h | 3 ++- .../lib/Tooling/ApplyReplacements.cpp | 21 ++++++++++++++-- .../tool/ClangApplyReplacementsMain.cpp | 7 +++++- .../clang-tidy/tool/run-clang-tidy.py | 1 + .../Inputs/ignore-conflict/file1.yaml | 24 +++++++++++++++++++ .../ignore-conflict/ignore-conflict.cpp | 4 ++++ .../ignore-conflict.cpp | 5 ++++ .../clang/Tooling/Refactoring/AtomicChange.h | 2 ++ 8 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/file1.yaml create mode 100644 clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp create mode 100644 clang-tools-extra/test/clang-apply-replacements/ignore-conflict.cpp diff --git a/clang-tools-extra/clang-apply-replacements/include/clang-apply-replacements/Tooling/ApplyReplacements.h b/clang-tools-extra/clang-apply-replacements/include/clang-apply-replacements/Tooling/ApplyReplacements.h index e37768277c5a47..b0cf7317c0ed89 100644 --- a/clang-tools-extra/clang-apply-replacements/include/clang-apply-replacements/Tooling/ApplyReplacements.h +++ b/clang-tools-extra/clang-apply-replacements/include/clang-apply-replacements/Tooling/ApplyReplacements.h @@ -87,7 +87,8 @@ std::error_code collectReplacementsFromDirectory( /// \li false If there were conflicts. bool mergeAndDeduplicate(const TUReplacements &TUs, const TUDiagnostics &TUDs, FileToChangesMap &FileChanges, - clang::SourceManager &SM); + clang::SourceManager &SM, + bool IgnoreInsertConflict = false); /// Apply \c AtomicChange on File and rewrite it. /// diff --git a/clang-tools-extra/clang-apply-replacements/lib/Tooling/ApplyReplacements.cpp b/clang-tools-extra/clang-apply-replacements/lib/Tooling/ApplyReplacements.cpp index b13b83cf3f0001..e25c8e2449dc81 100644 --- a/clang-tools-extra/clang-apply-replacements/lib/Tooling/ApplyReplacements.cpp +++ b/clang-tools-extra/clang-apply-replacements/lib/Tooling/ApplyReplacements.cpp @@ -202,7 +202,7 @@ groupReplacements(const TUReplacements &TUs, const TUDiagnostics &TUDs, bool mergeAndDeduplicate(const TUReplacements &TUs, const TUDiagnostics &TUDs, FileToChangesMap &FileChanges, - clang::SourceManager &SM) { + clang::SourceManager &SM, bool IgnoreInsertConflict) { auto GroupedReplacements = groupReplacements(TUs, TUDs, SM); bool ConflictDetected = false; @@ -231,7 +231,24 @@ bool mergeAndDeduplicate(const TUReplacements &TUs, const TUDiagnostics &TUDs, // For now, printing directly the error reported by `AtomicChange` is // the easiest solution. errs() << llvm::toString(std::move(Err)) << "\n"; - ConflictDetected = true; + if (IgnoreInsertConflict) { + tooling::Replacements &Replacements = FileChange.getReplacements(); + unsigned NewOffset = + Replacements.getShiftedCodePosition(R.getOffset()); + unsigned NewLength = Replacements.getShiftedCodePosition( + R.getOffset() + R.getLength()) - + NewOffset; + if (NewLength == R.getLength()) { + tooling::Replacement RR = tooling::Replacement( + R.getFilePath(), NewOffset, NewLength, R.getReplacementText()); + Replacements = Replacements.merge(tooling::Replacements(RR)); + } else { + llvm::errs() + << "Can't resolve conflict, skipping the replacement.\n"; + ConflictDetected = true; + } + } else + ConflictDetected = true; } } FileChanges.try_emplace(Entry, diff --git a/clang-tools-extra/clang-apply-replacements/tool/ClangApplyReplacementsMain.cpp b/clang-tools-extra/clang-apply-replacements/tool/ClangApplyReplacementsMain.cpp index 52ef725352d0b7..68e7feaa3baedc 100644 --- a/clang-tools-extra/clang-apply-replacements/tool/ClangApplyReplacementsMain.cpp +++ b/clang-tools-extra/clang-apply-replacements/tool/ClangApplyReplacementsMain.cpp @@ -42,6 +42,11 @@ static cl::opt RemoveTUReplacementFiles( "merging/replacing."), cl::init(false), cl::cat(ReplacementCategory)); +static cl::opt IgnoreInsertConflict( + "ignore-insert-conflict", + cl::desc("Ignore insert conflict and keep running to fix."), + cl::init(false), cl::cat(ReplacementCategory)); + static cl::opt DoFormat( "format", cl::desc("Enable formatting of code changed by applying replacements.\n" @@ -131,7 +136,7 @@ int main(int argc, char **argv) { SourceManager SM(Diagnostics, Files); FileToChangesMap Changes; - if (!mergeAndDeduplicate(TURs, TUDs, Changes, SM)) + if (!mergeAndDeduplicate(TURs, TUDs, Changes, SM, IgnoreInsertConflict)) return 1; tooling::ApplyChangesSpec Spec; diff --git a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py index fdfe5a07bc551b..821b941d4c3837 100755 --- a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py +++ b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py @@ -180,6 +180,7 @@ def find_binary(arg, name, build_path): def apply_fixes(args, clang_apply_replacements_binary, tmpdir): """Calls clang-apply-fixes on a given directory.""" invocation = [clang_apply_replacements_binary] + invocation.append('-ignore-insert-conflict') if args.format: invocation.append('-format') if args.style: diff --git a/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/file1.yaml b/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/file1.yaml new file mode 100644 index 00000000000000..a80cf82b2bf331 --- /dev/null +++ b/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/file1.yaml @@ -0,0 +1,24 @@ +--- +MainSourceFile: ignore-conflict.cpp +Diagnostics: + - DiagnosticName: test-ignore-conflict-insertion + DiagnosticMessage: + Message: Fix + FilePath: $(path)/ignore-conflict.cpp + FileOffset: 0 + Replacements: + - FilePath: $(path)/ignore-conflict.cpp + Offset: 0 + Length: 0 + ReplacementText: "#include \n" + - DiagnosticName: test-ignore-conflict-insertion + DiagnosticMessage: + Message: Fix + FilePath: $(path)/ignore-conflict.cpp + FileOffset: 0 + Replacements: + - FilePath: $(path)/ignore-conflict.cpp + Offset: 0 + Length: 0 + ReplacementText: "#include \n" +... diff --git a/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp b/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp new file mode 100644 index 00000000000000..8791dd952319d4 --- /dev/null +++ b/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp @@ -0,0 +1,4 @@ +class MyType {}; +// CHECK: #include +// CHECK-NEXT: #include +// CEHCK-NEXT: class MyType {}; diff --git a/clang-tools-extra/test/clang-apply-replacements/ignore-conflict.cpp b/clang-tools-extra/test/clang-apply-replacements/ignore-conflict.cpp new file mode 100644 index 00000000000000..4e681dd15f0dbc --- /dev/null +++ b/clang-tools-extra/test/clang-apply-replacements/ignore-conflict.cpp @@ -0,0 +1,5 @@ +// RUN: mkdir -p %T/Inputs/ignore-conflict +// RUN: grep -Ev "// *[A-Z-]+:" %S/Inputs/ignore-conflict/ignore-conflict.cpp > %T/Inputs/ignore-conflict/ignore-conflict.cpp +// RUN: sed "s#\$(path)#%/T/Inputs/ignore-conflict#" %S/Inputs/ignore-conflict/file1.yaml > %T/Inputs/ignore-conflict/file1.yaml +// RUN: clang-apply-replacements --ignore-insert-conflict %T/Inputs/ignore-conflict +// RUN: FileCheck -input-file=%T/Inputs/ignore-conflict/ignore-conflict.cpp %S/Inputs/ignore-conflict/ignore-conflict.cpp diff --git a/clang/include/clang/Tooling/Refactoring/AtomicChange.h b/clang/include/clang/Tooling/Refactoring/AtomicChange.h index 3945a7c9fefbde..92f322ef7d80b5 100644 --- a/clang/include/clang/Tooling/Refactoring/AtomicChange.h +++ b/clang/include/clang/Tooling/Refactoring/AtomicChange.h @@ -116,6 +116,8 @@ class AtomicChange { /// Returns a const reference to existing replacements. const Replacements &getReplacements() const { return Replaces; } + Replacements &getReplacements() { return Replaces; } + llvm::ArrayRef getInsertedHeaders() const { return InsertedHeaders; } From 503d5771b6c5e3544a9fa3be6b8d085ffbbd4057 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Mon, 30 May 2022 12:48:10 +0700 Subject: [PATCH 797/908] [JumpThreading][NFCI] Reuse existing DT instead of recomputation This whole part with recomputation of BPI and BFI looks redundant, and we tried to get rid of it in D124439. Unfortunately, it causes some hard-to-reproduce failures due to invalid state of analysis. Until this is investigated and fixed, let's try to reuse at least part of available analyzes. DT is available at this point, and there is no need to recompute it. Please revert if you see it causing *any* behavior changes. --- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index a9e5b811f15d46..b4b2b6cacd1936 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -317,7 +317,7 @@ bool JumpThreading::runOnFunction(Function &F) { std::unique_ptr BFI; std::unique_ptr BPI; if (F.hasProfileData()) { - LoopInfo LI{DominatorTree(F)}; + LoopInfo LI{*DT}; BPI.reset(new BranchProbabilityInfo(F, LI, TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } From a544710cd43ba9f7729a613c58729f146e792a8e Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 30 May 2022 10:08:39 +0800 Subject: [PATCH 798/908] [Driver] Enable to use C++20 standalne by -fcxx-modules This patch allows user to use C++20 module by -fcxx-modules. Previously, we could only use it under -std=c++20. Given that user could use C++20 coroutine standalonel by -fcoroutines-ts. It makes sense to offer an option to use C++20 modules without enabling C++20. Reviewed By: iains, MaskRay Differential Revision: https://reviews.llvm.org/D120540 --- clang-tools-extra/test/pp-trace/pp-trace-modules.cpp | 2 +- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 6 ++++++ clang/test/Driver/modules.cpp | 11 +++++++++++ clang/test/Modules/cxx-modules.cppm | 7 +++++++ 5 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 clang/test/Modules/cxx-modules.cppm diff --git a/clang-tools-extra/test/pp-trace/pp-trace-modules.cpp b/clang-tools-extra/test/pp-trace/pp-trace-modules.cpp index 32d6c48e5d448f..028c967a4e0d8e 100644 --- a/clang-tools-extra/test/pp-trace/pp-trace-modules.cpp +++ b/clang-tools-extra/test/pp-trace/pp-trace-modules.cpp @@ -1,5 +1,5 @@ // RUN: rm -rf %t -// RUN: pp-trace -callbacks '*,-FileChanged,-MacroDefined' %s -- -x objective-c++ -undef -target x86_64 -std=c++11 -fmodules -fcxx-modules -fmodules-cache-path=%t -I%S -I%S/Input | FileCheck --strict-whitespace %s +// RUN: pp-trace -callbacks '*,-FileChanged,-MacroDefined' %s -- -x objective-c++ -undef -target x86_64 -std=c++11 -fmodules -fmodules-cache-path=%t -I%S -I%S/Input | FileCheck --strict-whitespace %s // CHECK: --- diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e19baac5860aab..0343e48c1a9d11 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1410,7 +1410,7 @@ defm async_exceptions: BoolFOption<"async-exceptions", PosFlag, NegFlag>; defm cxx_modules : BoolFOption<"cxx-modules", LangOpts<"CPlusPlusModules">, Default, - NegFlag, PosFlag, + NegFlag, PosFlag, BothFlags<[NoXarchOption], " modules for C++">>, ShouldParseIf; def fdebug_pass_arguments : Flag<["-"], "fdebug-pass-arguments">, Group; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 936682e49299e6..5106c0e327b87f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3627,6 +3627,12 @@ static void RenderModulesOptions(Compilation &C, const Driver &D, HaveModules = true; } + if (Args.hasFlag(options::OPT_fcxx_modules, options::OPT_fno_cxx_modules, + false)) { + CmdArgs.push_back("-fcxx-modules"); + HaveModules = true; + } + // -fmodule-maps enables implicit reading of module map files. By default, // this is enabled if we are using Clang's flavor of precompiled modules. if (Args.hasFlag(options::OPT_fimplicit_module_maps, diff --git a/clang/test/Driver/modules.cpp b/clang/test/Driver/modules.cpp index 87b6cc640cb0de..bac9f715f13a45 100644 --- a/clang/test/Driver/modules.cpp +++ b/clang/test/Driver/modules.cpp @@ -73,3 +73,14 @@ import "foo.h"; // CHECK-HEADER-UNIT-USE: BAR; FOO; #endif + +// Check the independent use of -fcxx-modules +// +// RUN: %clang++ -fcxx-modules -std=c++17 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX17-MODULES +// CHECK-CXX17-MODULES: "-fcxx-modules" +// RUN: %clang++ -fcxx-modules -std=c++14 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX14-MODULES +// CHECK-CXX14-MODULES: "-fcxx-modules" +// RUN: %clang++ -fcxx-modules -std=c++11 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX11-MODULES +// CHECK-CXX11-MODULES: "-fcxx-modules" +// RUN: %clang++ -fcxx-modules -std=c++03 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX03-MODULES +// CHECK-CXX03-MODULES: "-fcxx-modules" diff --git a/clang/test/Modules/cxx-modules.cppm b/clang/test/Modules/cxx-modules.cppm new file mode 100644 index 00000000000000..be17f85f8d17d4 --- /dev/null +++ b/clang/test/Modules/cxx-modules.cppm @@ -0,0 +1,7 @@ +// This tests that we could use C++20 modules standalone. +// RUN: %clang -std=c++03 -fcxx-modules -fsyntax-only -Xclang -verify %s +// RUN: %clang -std=c++11 -fcxx-modules -fsyntax-only -Xclang -verify %s +// RUN: %clang -std=c++14 -fcxx-modules -fsyntax-only -Xclang -verify %s +// RUN: %clang -std=c++17 -fcxx-modules -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics +export module M; From 751c7be5b20ff515267d977f96d63458dc5e0897 Mon Sep 17 00:00:00 2001 From: Sheng Date: Mon, 30 May 2022 14:26:38 +0800 Subject: [PATCH 799/908] [TableGen] Remove code beads Code beads is useless since the only user, M68k, has moved on to a new encoding/decoding infrastructure. Reviewed By: myhsu Differential Revision: https://reviews.llvm.org/D126349 --- llvm/lib/Target/M68k/CMakeLists.txt | 1 - llvm/lib/Target/M68k/M68kInstrInfo.cpp | 53 ++- .../M68k/MCTargetDesc/M68kMCCodeEmitter.cpp | 337 +----------------- .../M68k/is-pcrel-register-operand-legal.mir | 10 + llvm/utils/TableGen/CMakeLists.txt | 1 - llvm/utils/TableGen/CodeBeadsGen.cpp | 135 ------- llvm/utils/TableGen/TableGen.cpp | 6 - llvm/utils/TableGen/TableGenBackends.h | 1 - 8 files changed, 37 insertions(+), 507 deletions(-) create mode 100644 llvm/test/CodeGen/M68k/is-pcrel-register-operand-legal.mir delete mode 100644 llvm/utils/TableGen/CodeBeadsGen.cpp diff --git a/llvm/lib/Target/M68k/CMakeLists.txt b/llvm/lib/Target/M68k/CMakeLists.txt index 5e398217c181ca..86f785f301aee4 100644 --- a/llvm/lib/Target/M68k/CMakeLists.txt +++ b/llvm/lib/Target/M68k/CMakeLists.txt @@ -7,7 +7,6 @@ tablegen(LLVM M68kGenRegisterInfo.inc -gen-register-info) tablegen(LLVM M68kGenRegisterBank.inc -gen-register-bank) tablegen(LLVM M68kGenInstrInfo.inc -gen-instr-info) tablegen(LLVM M68kGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM M68kGenMCCodeBeads.inc -gen-code-beads) tablegen(LLVM M68kGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM M68kGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM M68kGenDAGISel.inc -gen-dag-isel) diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index 105c816f988548..b33469529ca55a 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Regex.h" #include @@ -601,40 +602,26 @@ bool M68kInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { bool M68kInstrInfo::isPCRelRegisterOperandLegal( const MachineOperand &MO) const { assert(MO.isReg()); - const auto *MI = MO.getParent(); - const uint8_t *Beads = M68k::getMCInstrBeads(MI->getOpcode()); - assert(*Beads); - - // Only addressing mode k has (non-pc) register with PCRel - // So we're looking for EA Beads equal to - // `3Bits<011>_1Bit<1>_2Bits<11>` - // FIXME: There is an important caveat and two assumptions - // here: The caveat is that EA encoding always sit on the LSB. - // Where the assumptions are that if there are more than one - // operands, the EA encoding for the source operand always sit - // on the LSB. At the same time, k addressing mode can not be used - // on destination operand. - // The last assumption is kinda dirty so we need to find a way around - // it - const uint8_t EncEAk[3] = {0b011, 0b1, 0b11}; - for (const uint8_t Pat : EncEAk) { - uint8_t Bead = *(Beads++); - if (!Bead) - return false; - switch (Bead & 0xF) { - default: - return false; - case M68kBeads::Bits1: - case M68kBeads::Bits2: - case M68kBeads::Bits3: { - uint8_t Val = (Bead & 0xF0) >> 4; - if (Val != Pat) - return false; - } - } - } - return true; + // Check whether this MO belongs to an instruction with addressing mode 'k', + // Refer to TargetInstrInfo.h for more information about this function. + + const MachineInstr *MI = MO.getParent(); + const unsigned NameIndices = M68kInstrNameIndices[MI->getOpcode()]; + StringRef InstrName(&M68kInstrNameData[NameIndices]); + const unsigned OperandNo = MI->getOperandNo(&MO); + + // If this machine operand is the 2nd operand, then check + // whether the instruction has destination addressing mode 'k'. + if (OperandNo == 1) + return Regex("[A-Z]+(8|16|32)k[a-z](_TC)?$").match(InstrName); + + // If this machine operand is the last one, then check + // whether the instruction has source addressing mode 'k'. + if (OperandNo == MI->getNumExplicitOperands() - 1) + return Regex("[A-Z]+(8|16|32)[a-z]k(_TC)?$").match(InstrName); + + return false; } void M68kInstrInfo::copyPhysReg(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp index 34777063f3db07..f43673e4f14d11 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp @@ -65,26 +65,6 @@ class M68kMCCodeEmitter : public MCCodeEmitter { ~M68kMCCodeEmitter() override {} - // TableGen'erated function - const uint8_t *getGenInstrBeads(const MCInst &MI) const { - return M68k::getMCInstrBeads(MI.getOpcode()); - } - - unsigned encodeBits(unsigned ThisByte, uint8_t Bead, const MCInst &MI, - const MCInstrDesc &Desc, uint64_t &Buffer, - unsigned Offset, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned encodeReg(unsigned ThisByte, uint8_t Bead, const MCInst &MI, - const MCInstrDesc &Desc, uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned encodeImm(unsigned ThisByte, uint8_t Bead, const MCInst &MI, - const MCInstrDesc &Desc, uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; @@ -237,246 +217,6 @@ void M68kMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &Op, } } -unsigned M68kMCCodeEmitter::encodeBits(unsigned ThisByte, uint8_t Bead, - const MCInst &MI, - const MCInstrDesc &Desc, - uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - unsigned Num = 0; - switch (Bead & 0xF) { - case M68kBeads::Bits1: - Num = 1; - break; - case M68kBeads::Bits2: - Num = 2; - break; - case M68kBeads::Bits3: - Num = 3; - break; - case M68kBeads::Bits4: - Num = 4; - break; - } - unsigned char Val = (Bead & 0xF0) >> 4; - - LLVM_DEBUG(dbgs() << "\tEncodeBits" - << " Num: " << Num << " Val: 0x"); - LLVM_DEBUG(dbgs().write_hex(Val) << "\n"); - - Buffer |= (Val << Offset); - - return Num; -} - -unsigned M68kMCCodeEmitter::encodeReg(unsigned ThisByte, uint8_t Bead, - const MCInst &MI, const MCInstrDesc &Desc, - uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - bool DA, Reg; - switch (Bead & 0xF) { - default: - llvm_unreachable("Unrecognized Bead code for register type"); - case M68kBeads::DAReg: - Reg = true; - DA = true; - break; - case M68kBeads::DA: - Reg = false; - DA = true; - break; - case M68kBeads::DReg: - case M68kBeads::Reg: - Reg = true; - DA = false; - break; - } - - unsigned Op = (Bead & 0x70) >> 4; - bool Alt = (Bead & 0x80); - LLVM_DEBUG(dbgs() << "\tEncodeReg" - << " Op: " << Op << ", DA: " << DA << ", Reg: " << Reg - << ", Alt: " << Alt << "\n"); - - auto MIOpIdx = M68k::getLogicalOperandIdx(MI.getOpcode(), Op); - bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL; - - MCOperand MCO; - if (M68kII::hasMultiMIOperands(MI.getOpcode(), Op)) { - if (IsPCRel) { - assert(Alt && - "PCRel addresses use Alt bead register encoding by default"); - MCO = MI.getOperand(MIOpIdx + M68k::PCRelIndex); - } else { - MCO = MI.getOperand(MIOpIdx + (Alt ? M68k::MemIndex : M68k::MemBase)); - } - } else { - assert(!Alt && "You cannot use Alt register with a simple operand"); - MCO = MI.getOperand(MIOpIdx); - } - - unsigned RegNum = MCO.getReg(); - auto RI = Ctx.getRegisterInfo(); - - unsigned Written = 0; - if (Reg) { - uint32_t Val = RI->getEncodingValue(RegNum); - Buffer |= (Val & 7) << Offset; - Offset += 3; - Written += 3; - } - - if (DA) { - Buffer |= (uint64_t)M68kII::isAddressRegister(RegNum) << Offset; - Written++; - } - - return Written; -} - -static unsigned EmitConstant(uint64_t Val, unsigned Size, unsigned Pad, - uint64_t &Buffer, unsigned Offset) { - assert(Size + Offset <= 64 && isUIntN(Size, Val) && "Value does not fit"); - - // Writing Value in host's endianness - Buffer |= (Val & ((1ULL << Size) - 1)) << Offset; - return Size + Pad; -} - -unsigned M68kMCCodeEmitter::encodeImm(unsigned ThisByte, uint8_t Bead, - const MCInst &MI, const MCInstrDesc &Desc, - uint64_t &Buffer, unsigned Offset, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - unsigned ThisWord = ThisByte / 2; - unsigned Size = 0; - unsigned Pad = 0; - unsigned FixOffset = 0; - int64_t Addendum = 0; - bool NoExpr = false; - - unsigned Type = Bead & 0xF; - unsigned Op = (Bead & 0x70) >> 4; - bool Alt = (Bead & 0x80); - - auto MIOpIdx = M68k::getLogicalOperandIdx(MI.getOpcode(), Op); - bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL; - - // The PC value upon instruction reading of a short jump will point to the - // next instruction, thus we need to compensate 2 bytes, which is the diff - // between the patch point and the PC. - if (IsPCRel && ThisWord == 0) - Addendum -= 2; - - switch (Type) { - // ??? what happens if it is not byte aligned - // ??? is it even possible - case M68kBeads::Disp8: - Size = 8; - Pad = 0; - FixOffset = ThisByte + 1; - Addendum += 1; - break; - case M68kBeads::Imm8: - Size = 8; - Pad = 8; - FixOffset = ThisByte; - break; - case M68kBeads::Imm16: - Size = 16; - Pad = 0; - FixOffset = ThisByte; - break; - case M68kBeads::Imm32: - Size = 32; - Pad = 0; - FixOffset = ThisByte; - break; - case M68kBeads::Imm3: - Size = 3; - Pad = 0; - NoExpr = true; - break; - } - - LLVM_DEBUG(dbgs() << "\tEncodeImm" - << " Op: " << Op << ", Size: " << Size << ", Alt: " << Alt - << "\n"); - - MCOperand MCO; - if (M68kII::hasMultiMIOperands(MI.getOpcode(), Op)) { - - if (IsPCRel) { - assert(!Alt && "You cannot use ALT operand with PCRel"); - MCO = MI.getOperand(MIOpIdx + M68k::PCRelDisp); - } else { - MCO = MI.getOperand(MIOpIdx + (Alt ? M68k::MemOuter : M68k::MemDisp)); - } - - if (MCO.isExpr()) { - assert(!NoExpr && "Cannot use expression here"); - const MCExpr *Expr = MCO.getExpr(); - - // This only makes sense for PCRel instructions since PC points to the - // extension word and Disp8 for example is right justified and requires - // correction. E.g. R_68K_PC32 is calculated as S + A - P, P for Disp8 - // will be EXTENSION_WORD + 1 thus we need to have A equal to 1 to - // compensate. - // TODO count extension words - if (IsPCRel && Addendum != 0) { - Expr = MCBinaryExpr::createAdd( - Expr, MCConstantExpr::create(Addendum, Ctx), Ctx); - } - - Fixups.push_back(MCFixup::create( - FixOffset, Expr, getFixupForSize(Size, IsPCRel), MI.getLoc())); - // Write zeros - return EmitConstant(0, Size, Pad, Buffer, Offset); - } - - } else { - MCO = MI.getOperand(MIOpIdx); - if (MCO.isExpr()) { - assert(!NoExpr && "Cannot use expression here"); - const MCExpr *Expr = MCO.getExpr(); - - if (Addendum != 0) { - Expr = MCBinaryExpr::createAdd( - Expr, MCConstantExpr::create(Addendum, Ctx), Ctx); - } - - Fixups.push_back(MCFixup::create( - FixOffset, Expr, getFixupForSize(Size, IsPCRel), MI.getLoc())); - // Write zeros - return EmitConstant(0, Size, Pad, Buffer, Offset); - } - } - - int64_t I = MCO.getImm(); - - // Store 8 as 0, thus making range 1-8 - if (Type == M68kBeads::Imm3 && Alt) { - assert(I && "Cannot encode Alt Imm3 zero value"); - I %= 8; - } else { - assert(isIntN(Size, I)); - } - - uint64_t Imm = I; - - // 32 bit Imm requires HI16 first then LO16 - if (Size == 32) { - Offset += EmitConstant((Imm >> 16) & 0xFFFF, 16, Pad, Buffer, Offset); - EmitConstant(Imm & 0xFFFF, 16, Pad, Buffer, Offset); - return Size; - } - - return EmitConstant(Imm & ((1ULL << Size) - 1), Size, Pad, Buffer, Offset); -} - -#include "M68kGenMCCodeBeads.inc" - void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { @@ -490,79 +230,16 @@ void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, APInt EncodedInst(16, 0U); APInt Scratch(16, 0U); getBinaryCodeForInstr(MI, Fixups, EncodedInst, Scratch, STI); - if (EncodedInst.getBitWidth()) { - LLVM_DEBUG(dbgs() << "Instruction " << MCII.getName(Opcode) << "(" << Opcode - << ") is using the new code emitter\n"); - ArrayRef Data(EncodedInst.getRawData(), - EncodedInst.getNumWords()); - int64_t InstSize = EncodedInst.getBitWidth(); - for (uint64_t Word : Data) { - for (int i = 0; i < 4 && InstSize > 0; ++i, InstSize -= 16) { - support::endian::write(OS, static_cast(Word), - support::big); - Word >>= 16; - } - } - return; - } - - const uint8_t *Beads = getGenInstrBeads(MI); - if (!Beads || !*Beads) { - llvm_unreachable("*** Instruction does not have Beads defined"); - } - uint64_t Buffer = 0; - unsigned Offset = 0; - unsigned ThisByte = 0; - - for (uint8_t Bead = *Beads; Bead; Bead = *++Beads) { - // Check for control beads - if (!(Bead & 0xF)) { - switch (Bead >> 4) { - case M68kBeads::Ignore: - continue; - } - } - - switch (Bead & 0xF) { - default: - llvm_unreachable("Unknown Bead code"); - break; - case M68kBeads::Bits1: - case M68kBeads::Bits2: - case M68kBeads::Bits3: - case M68kBeads::Bits4: - Offset += - encodeBits(ThisByte, Bead, MI, Desc, Buffer, Offset, Fixups, STI); - break; - case M68kBeads::DAReg: - case M68kBeads::DA: - case M68kBeads::DReg: - case M68kBeads::Reg: - Offset += - encodeReg(ThisByte, Bead, MI, Desc, Buffer, Offset, Fixups, STI); - break; - case M68kBeads::Disp8: - case M68kBeads::Imm8: - case M68kBeads::Imm16: - case M68kBeads::Imm32: - case M68kBeads::Imm3: - Offset += - encodeImm(ThisByte, Bead, MI, Desc, Buffer, Offset, Fixups, STI); - break; - } - - // Since M68k is Big Endian we need to rotate each instruction word - while (Offset / 16) { - support::endian::write(OS, Buffer, support::big); - Buffer >>= 16; - Offset -= 16; - ThisByte += 2; + ArrayRef Data(EncodedInst.getRawData(), EncodedInst.getNumWords()); + int64_t InstSize = EncodedInst.getBitWidth(); + for (uint64_t Word : Data) { + for (int i = 0; i < 4 && InstSize > 0; ++i, InstSize -= 16) { + support::endian::write(OS, static_cast(Word), + support::big); + Word >>= 16; } } - - assert(Offset == 0 && "M68k Instructions are % 2 bytes"); - assert((ThisByte && !(ThisByte % 2)) && "M68k Instructions are % 2 bytes"); } MCCodeEmitter *llvm::createM68kMCCodeEmitter(const MCInstrInfo &MCII, diff --git a/llvm/test/CodeGen/M68k/is-pcrel-register-operand-legal.mir b/llvm/test/CodeGen/M68k/is-pcrel-register-operand-legal.mir new file mode 100644 index 00000000000000..a755164937b160 --- /dev/null +++ b/llvm/test/CodeGen/M68k/is-pcrel-register-operand-legal.mir @@ -0,0 +1,10 @@ +# RUN: llc -O0 -mtriple=m68k -start-after=prologepilog -verify-machineinstrs %s -o - | FileCheck %s + +name: is-pcrel-register-operand-legal +body: | + bb.0.entry: + ; CHECK: move.l (0,%pc,%a0), (%a1) + ; CHECK: move.l (%a0), (0,%pc,%a1) + + MOV32jk $a1, 0, $a0, implicit-def $ccr + MOV32kj 0, $a1, $a0, implicit-def $ccr \ No newline at end of file diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt index af77744de4ce56..0974ee8eb244b3 100644 --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -8,7 +8,6 @@ add_tablegen(llvm-tblgen LLVM AsmWriterInst.cpp Attributes.cpp CallingConvEmitter.cpp - CodeBeadsGen.cpp CodeEmitterGen.cpp CodeGenDAGPatterns.cpp CodeGenHwModes.cpp diff --git a/llvm/utils/TableGen/CodeBeadsGen.cpp b/llvm/utils/TableGen/CodeBeadsGen.cpp deleted file mode 100644 index 512f3541fa1ff4..00000000000000 --- a/llvm/utils/TableGen/CodeBeadsGen.cpp +++ /dev/null @@ -1,135 +0,0 @@ -//===---------- CodeBeadsGen.cpp - Code Beads Generator -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// CodeBeads are data fields carrying auxiliary information for instructions. -// -// Under the hood it's simply implemented by a `bits` field (with arbitrary -// length) in each TG instruction description, where this TG backend will -// generate a helper function to access it. -// -// This is especially useful for expressing variable length encoding -// instructions and complex addressing modes. Since in those cases each -// instruction is usually associated with large amount of information like -// addressing mode details used on a specific operand. Instead of retreating to -// ad-hoc methods to figure out these information when encoding an instruction, -// CodeBeads provide a clean table for the instruction encoder to lookup. -//===----------------------------------------------------------------------===// - -#include "CodeGenInstruction.h" -#include "CodeGenTarget.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" -#include -using namespace llvm; - -namespace { - -class CodeBeadsGen { - RecordKeeper &Records; - -public: - CodeBeadsGen(RecordKeeper &R) : Records(R) {} - void run(raw_ostream &OS); -}; - -void CodeBeadsGen::run(raw_ostream &OS) { - CodeGenTarget Target(Records); - std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); - - // For little-endian instruction bit encodings, reverse the bit order - Target.reverseBitsForLittleEndianEncoding(); - - ArrayRef NumberedInstructions = - Target.getInstructionsByEnumValue(); - - // Emit function declaration - OS << "const uint8_t *llvm::" << Target.getInstNamespace(); - OS << "::getMCInstrBeads(unsigned Opcode) {\n"; - - // First, get the maximum bit length among all beads. And do some - // simple validation - unsigned MaxBitLength = 0; - - for (const CodeGenInstruction *CGI : NumberedInstructions) { - Record *R = CGI->TheDef; - if (!R->getValue("Beads")) - continue; - - BitsInit *BI = R->getValueAsBitsInit("Beads"); - if (!BI->isComplete()) { - PrintFatalError(R->getLoc(), "Record `" + R->getName() + - "', bit field 'Beads' is not complete"); - } - - MaxBitLength = std::max(MaxBitLength, BI->getNumBits()); - } - - // Number of bytes - unsigned Parts = MaxBitLength / 8; - - // Emit instruction base values - OS << " static const uint8_t InstBits[][" << Parts << "] = {\n"; - for (const CodeGenInstruction *CGI : NumberedInstructions) { - Record *R = CGI->TheDef; - - if (R->getValueAsString("Namespace") == "TargetOpcode" || - !R->getValue("Beads")) { - OS << "\t{ 0x0 },\t// "; - if (R->getValueAsBit("isPseudo")) - OS << "(Pseudo) "; - OS << R->getName() << "\n"; - continue; - } - - BitsInit *BI = R->getValueAsBitsInit("Beads"); - - // Convert to byte array: - // [dcba] -> [a][b][c][d] - OS << "\t{"; - for (unsigned p = 0; p < Parts; ++p) { - unsigned Right = 8 * p; - unsigned Left = Right + 8; - - uint8_t Value = 0; - for (unsigned i = Right; i != Left; ++i) { - unsigned Shift = i % 8; - if (auto *B = dyn_cast(BI->getBit(i))) { - Value |= (static_cast(B->getValue()) << Shift); - } else { - PrintFatalError(R->getLoc(), "Record `" + R->getName() + - "', bit 'Beads[" + Twine(i) + - "]' is not defined"); - } - } - - if (p) - OS << ','; - OS << " 0x"; - OS.write_hex(Value); - OS << ""; - } - OS << " }," << '\t' << "// " << R->getName() << "\n"; - } - OS << "\t{ 0x0 }\n };\n"; - - // Emit initial function code - OS << " return InstBits[Opcode];\n" - << "}\n\n"; -} - -} // End anonymous namespace - -namespace llvm { - -void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS) { - emitSourceFileHeader("Machine Code Beads", OS); - CodeBeadsGen(RK).run(OS); -} - -} // namespace llvm diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index cde49919f54fe5..e9279a64267005 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -25,7 +25,6 @@ enum ActionType { NullBackend, DumpJSON, GenEmitter, - GenCodeBeads, GenRegisterInfo, GenInstrInfo, GenInstrDocs, @@ -82,8 +81,6 @@ cl::opt Action( clEnumValN(DumpJSON, "dump-json", "Dump all records as machine-readable JSON"), clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"), - clEnumValN(GenCodeBeads, "gen-code-beads", - "Generate machine code beads"), clEnumValN(GenRegisterInfo, "gen-register-info", "Generate registers and register classes info"), clEnumValN(GenInstrInfo, "gen-instr-info", @@ -164,9 +161,6 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenEmitter: EmitCodeEmitter(Records, OS); break; - case GenCodeBeads: - EmitCodeBeads(Records, OS); - break; case GenRegisterInfo: EmitRegisterInfo(Records, OS); break; diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h index 224efa98bae16c..c53b71cad5996c 100644 --- a/llvm/utils/TableGen/TableGenBackends.h +++ b/llvm/utils/TableGen/TableGenBackends.h @@ -67,7 +67,6 @@ void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS); void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS); void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS); void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS); -void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS); void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS); void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS); void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS); From b16460bb48ca641cdaa1d8f6c3ff0940d387d0d6 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 30 May 2022 06:27:55 +0000 Subject: [PATCH 800/908] [gn build] Port 751c7be5b20f --- llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index 09bf4d391eb13d..f9ff0de9ea1305 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -12,7 +12,6 @@ executable("llvm-tblgen") { "Attributes.cpp", "CTagsEmitter.cpp", "CallingConvEmitter.cpp", - "CodeBeadsGen.cpp", "CodeEmitterGen.cpp", "CodeGenDAGPatterns.cpp", "CodeGenHwModes.cpp", From 738c20e6df01217b6364c3b75d0ced6b6fb6277e Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 30 May 2022 14:38:04 +0800 Subject: [PATCH 801/908] [NFC] Use %clang instead of %clang++ in tests Previously the tests uses %clang++ instead of %clang, which cause the test fail in windows. --- clang/test/Driver/modules.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/Driver/modules.cpp b/clang/test/Driver/modules.cpp index bac9f715f13a45..472b07c6579927 100644 --- a/clang/test/Driver/modules.cpp +++ b/clang/test/Driver/modules.cpp @@ -76,11 +76,11 @@ FOO; // Check the independent use of -fcxx-modules // -// RUN: %clang++ -fcxx-modules -std=c++17 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX17-MODULES +// RUN: %clang -fcxx-modules -std=c++17 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX17-MODULES // CHECK-CXX17-MODULES: "-fcxx-modules" -// RUN: %clang++ -fcxx-modules -std=c++14 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX14-MODULES +// RUN: %clang -fcxx-modules -std=c++14 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX14-MODULES // CHECK-CXX14-MODULES: "-fcxx-modules" -// RUN: %clang++ -fcxx-modules -std=c++11 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX11-MODULES +// RUN: %clang -fcxx-modules -std=c++11 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX11-MODULES // CHECK-CXX11-MODULES: "-fcxx-modules" -// RUN: %clang++ -fcxx-modules -std=c++03 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX03-MODULES +// RUN: %clang -fcxx-modules -std=c++03 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX03-MODULES // CHECK-CXX03-MODULES: "-fcxx-modules" From a5ddd4a23858044beb3c04ebc6b965760eb7c125 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 30 May 2022 09:01:55 +0200 Subject: [PATCH 802/908] [pseudo] Remove an unnecessary nullable check diagnostic in the bnf grammar, NFC. This diagnostic has been handled in eliminateOptional. --- clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp index 7ca5bc1a90221d..f581adb3932ef9 100644 --- a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp @@ -270,10 +270,6 @@ class GrammarBuilder { if (T.Rules[RID] == T.Rules[RID + 1]) Diagnostics.push_back( llvm::formatv("Duplicate rule: `{0}`", G.dumpRule(RID))); - // Warning for nullable nonterminals - if (T.Rules[RID].Size == 0) - Diagnostics.push_back( - llvm::formatv("Rule `{0}` has a nullable RHS", G.dumpRule(RID))); } // symbol-id -> used counts std::vector UseCounts(T.Nonterminals.size(), 0); From bcf3d5248678db3bc12eda9fbc212ab2c4fbcc0f Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Mon, 30 May 2022 08:32:01 +0200 Subject: [PATCH 803/908] [MLIR][GPU] Expose GpuParallelLoopMapping as non-test pass. Reviewed By: bondhugula, herhut Differential Revision: https://reviews.llvm.org/D126199 --- .../mlir/Dialect/GPU/ParallelLoopMapper.h | 8 --- mlir/include/mlir/Dialect/GPU/Passes.h | 7 +++ mlir/include/mlir/Dialect/GPU/Passes.td | 7 +++ .../GPU/Transforms/ParallelLoopMapper.cpp | 52 +++++++++++-------- mlir/test/Dialect/GPU/mapping.mlir | 2 +- mlir/test/lib/Dialect/GPU/CMakeLists.txt | 1 - .../GPU/TestGpuParallelLoopMapping.cpp | 47 ----------------- mlir/tools/mlir-opt/mlir-opt.cpp | 2 - 8 files changed, 46 insertions(+), 80 deletions(-) delete mode 100644 mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h index 07d92e82ab5afa..9ae3683298f681 100644 --- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h +++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h @@ -60,13 +60,5 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor, LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayRef mapping); } // namespace gpu - -/// Maps the parallel loops found in the given function to workgroups. The first -/// loop encountered will be mapped to the global workgroup and the second loop -/// encountered to the local workgroup. Within each mapping, the first three -/// dimensions are mapped to x/y/z hardware ids and all following dimensions are -/// mapped to sequential loops. -void greedilyMapParallelSCFToGPU(Region ®ion); - } // namespace mlir #endif // MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h index b9b127c6f5dbf1..53f3f84efbaa5b 100644 --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -39,6 +39,13 @@ createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef()); /// Rewrites a function region so that GPU ops execute asynchronously. std::unique_ptr> createGpuAsyncRegionPass(); +/// Maps the parallel loops found in the given function to workgroups. The first +/// loop encountered will be mapped to the global workgroup and the second loop +/// encountered to the local workgroup. Within each mapping, the first three +/// dimensions are mapped to x/y/z hardware ids and all following dimensions are +/// mapped to sequential loops. +std::unique_ptr> createGpuMapParallelLoopsPass(); + /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect. void populateGpuAllReducePatterns(RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Dialect/GPU/Passes.td b/mlir/include/mlir/Dialect/GPU/Passes.td index 0752839fe51541..f5786e877713b3 100644 --- a/mlir/include/mlir/Dialect/GPU/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Passes.td @@ -29,4 +29,11 @@ def GpuAsyncRegionPass : Pass<"gpu-async-region", "func::FuncOp"> { let dependentDialects = ["async::AsyncDialect"]; } +def GpuMapParallelLoopsPass + : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> { + let summary = "Greedily maps loops to GPU hardware dimensions."; + let constructor = "mlir::createGpuMapParallelLoopsPass()"; + let description = "Greedily maps loops to GPU hardware dimensions."; +} + #endif // MLIR_DIALECT_GPU_PASSES diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp index b032169188bcac..c7a1ef3994f55c 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp @@ -13,26 +13,25 @@ #include "mlir/Dialect/GPU/ParallelLoopMapper.h" +#include "PassDetail.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/GPU/Passes.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/AffineMap.h" #include "mlir/Pass/Pass.h" -using namespace mlir; -using namespace mlir::gpu; -using namespace mlir::scf; - #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" + namespace mlir { -namespace gpu { -StringRef getMappingAttrName() { return "mapping"; } +using scf::ParallelOp; -ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor, - AffineMap map, - AffineMap bound) { +StringRef gpu::getMappingAttrName() { return "mapping"; } + +gpu::ParallelLoopDimMapping +gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map, + AffineMap bound) { MLIRContext *context = map.getContext(); OpBuilder builder(context); return ParallelLoopDimMapping::get( @@ -40,8 +39,8 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor, AffineMapAttr::get(map), AffineMapAttr::get(bound), context); } -LogicalResult setMappingAttr(scf::ParallelOp ploopOp, - ArrayRef mapping) { +LogicalResult gpu::setMappingAttr(ParallelOp ploopOp, + ArrayRef mapping) { // Verify that each processor is mapped to only once. llvm::DenseSet specifiedMappings; for (auto dimAttr : mapping) { @@ -56,20 +55,17 @@ LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs)); return success(); } -} // namespace gpu -} // namespace mlir +namespace gpu { namespace { - enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; +} // namespace static constexpr int kNumHardwareIds = 3; -} // namespace - /// Bounded increment on MappingLevel. Increments to the next /// level unless Sequential was already reached. -MappingLevel &operator++(MappingLevel &mappingLevel) { +static MappingLevel &operator++(MappingLevel &mappingLevel) { if (mappingLevel < Sequential) { mappingLevel = static_cast(mappingLevel + 1); } @@ -82,8 +78,7 @@ MappingLevel &operator++(MappingLevel &mappingLevel) { /// TODO: Make this use x for the inner-most loop that is /// distributed to map to x, the next innermost to y and the next innermost to /// z. -static gpu::Processor getHardwareIdForMapping(MappingLevel level, - int dimension) { +static Processor getHardwareIdForMapping(MappingLevel level, int dimension) { if (dimension >= kNumHardwareIds || level == Sequential) return Processor::Sequential; @@ -145,6 +140,21 @@ static void mapParallelOp(ParallelOp parallelOp, } } -void mlir::greedilyMapParallelSCFToGPU(Region ®ion) { - region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); +namespace { +struct GpuMapParallelLoopsPass + : public GpuMapParallelLoopsPassBase { + void runOnOperation() override { + for (Region ®ion : getOperation()->getRegions()) { + region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); + } + } +}; + +} // namespace +} // namespace gpu +} // namespace mlir + +std::unique_ptr> +mlir::createGpuMapParallelLoopsPass() { + return std::make_unique(); } diff --git a/mlir/test/Dialect/GPU/mapping.mlir b/mlir/test/Dialect/GPU/mapping.mlir index ff5b07f1844c56..8c233648bca908 100644 --- a/mlir/test/Dialect/GPU/mapping.mlir +++ b/mlir/test/Dialect/GPU/mapping.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-gpu-greedy-parallel-loop-mapping -split-input-file %s | FileCheck %s +// RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) { diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt index 97fc6699f6fbf7..65f4780ea82e35 100644 --- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt @@ -3,7 +3,6 @@ add_mlir_library(MLIRGPUTestPasses TestConvertGPUKernelToCubin.cpp TestConvertGPUKernelToHsaco.cpp TestGpuMemoryPromotion.cpp - TestGpuParallelLoopMapping.cpp TestGpuRewrite.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp b/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp deleted file mode 100644 index f53abba6a21b12..00000000000000 --- a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp +++ /dev/null @@ -1,47 +0,0 @@ -//===- TestGPUParallelLoopMapping.cpp - Test pass for GPU loop mapping ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the pass testing the utilities for mapping parallel -// loops to gpu hardware ids. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/GPU/ParallelLoopMapper.h" -#include "mlir/Pass/Pass.h" - -using namespace mlir; - -namespace { -/// Simple pass for testing the mapping of parallel loops to hardware ids using -/// a greedy mapping strategy. -struct TestGpuGreedyParallelLoopMappingPass - : public PassWrapper> { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( - TestGpuGreedyParallelLoopMappingPass) - - StringRef getArgument() const final { - return "test-gpu-greedy-parallel-loop-mapping"; - } - StringRef getDescription() const final { - return "Greedily maps all parallel loops to gpu hardware ids."; - } - void runOnOperation() override { - for (Region ®ion : getOperation()->getRegions()) - greedilyMapParallelSCFToGPU(region); - } -}; -} // namespace - -namespace mlir { -namespace test { -void registerTestGpuParallelLoopMappingPass() { - PassRegistration(); -} -} // namespace test -} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 78814df031e6e7..dcd8946d9c4045 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -79,7 +79,6 @@ void registerTestDynamicPipelinePass(); void registerTestExpandTanhPass(); void registerTestComposeSubView(); void registerTestMultiBuffering(); -void registerTestGpuParallelLoopMappingPass(); void registerTestIRVisitorsPass(); void registerTestGenericIRVisitorsPass(); void registerTestGenericIRVisitorsInterruptPass(); @@ -176,7 +175,6 @@ void registerTestPasses() { mlir::test::registerTestExpandTanhPass(); mlir::test::registerTestComposeSubView(); mlir::test::registerTestMultiBuffering(); - mlir::test::registerTestGpuParallelLoopMappingPass(); mlir::test::registerTestIRVisitorsPass(); mlir::test::registerTestGenericIRVisitorsPass(); mlir::test::registerTestInterfaces(); From f5fa633b0955a8cee878b384801038fccef11fdc Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Fri, 27 May 2022 15:59:19 +0200 Subject: [PATCH 804/908] [mlir] Lower complex.sqrt and complex.atan2 to Arithmetic dialect. I don't see a point here in the lit tests here since sqrt, mul and other ops expand as well. I just added "smoke" tests to verify that the conversion works and does not create any illegal ops. I will create a patch that adds a simple integration test to mlir/test/Integration/Dialect/ComplexOps/ that will compare the values. Differential Revision: https://reviews.llvm.org/D126539 --- .../ComplexToStandard/ComplexToStandard.cpp | 113 +++++++++++++++++- .../convert-to-standard.mlir | 47 +++++++- 2 files changed, 158 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index e1eca6181dff96..194e1669a86c9c 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -44,6 +44,49 @@ struct AbsOpConversion : public OpConversionPattern { } }; +// atan2(y,x) = -i * log((x + i * y)/sqrt(x**2+y**2)) +struct Atan2OpConversion : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(complex::Atan2Op op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); + + auto type = op.getType().cast(); + Type elementType = type.getElementType(); + + Value lhs = adaptor.getLhs(); + Value rhs = adaptor.getRhs(); + + Value rhsSquared = b.create(type, rhs, rhs); + Value lhsSquared = b.create(type, lhs, lhs); + Value rhsSquaredPlusLhsSquared = + b.create(type, rhsSquared, lhsSquared); + Value sqrtOfRhsSquaredPlusLhsSquared = + b.create(type, rhsSquaredPlusLhsSquared); + + Value zero = + b.create(elementType, b.getZeroAttr(elementType)); + Value one = b.create(elementType, + b.getFloatAttr(elementType, 1)); + Value i = b.create(type, zero, one); + Value iTimesLhs = b.create(i, lhs); + Value rhsPlusILhs = b.create(rhs, iTimesLhs); + + Value divResult = + b.create(rhsPlusILhs, sqrtOfRhsSquaredPlusLhsSquared); + Value logResult = b.create(divResult); + + Value negativeOne = b.create( + elementType, b.getFloatAttr(elementType, -1)); + Value negativeI = b.create(type, zero, negativeOne); + + rewriter.replaceOpWithNewOp(op, negativeI, logResult); + return success(); + } +}; + template struct ComparisonOpConversion : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -700,6 +743,72 @@ struct SinOpConversion : public TrigonometricOpConversion { } }; +// The algorithm is listed in https://dl.acm.org/doi/pdf/10.1145/363717.363780. +struct SqrtOpConversion : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(complex::SqrtOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); + + auto type = op.getType().cast(); + Type elementType = type.getElementType(); + Value arg = adaptor.getComplex(); + + Value zero = + b.create(elementType, b.getZeroAttr(elementType)); + + Value real = b.create(elementType, adaptor.getComplex()); + Value imag = b.create(elementType, adaptor.getComplex()); + + Value absLhs = b.create(real); + Value absArg = b.create(elementType, arg); + Value addAbs = b.create(absLhs, absArg); + Value sqrtAddAbs = b.create(addAbs); + Value sqrtAddAbsDivTwo = b.create( + sqrtAddAbs, b.create( + elementType, b.getFloatAttr(elementType, 2))); + + Value realIsNegative = + b.create(arith::CmpFPredicate::OLT, real, zero); + Value imagIsNegative = + b.create(arith::CmpFPredicate::OLT, imag, zero); + + Value resultReal = sqrtAddAbsDivTwo; + + Value imagDivTwoResultReal = b.create( + imag, b.create(resultReal, resultReal)); + + Value negativeResultReal = b.create(resultReal); + + Value resultImag = b.create( + realIsNegative, + b.create(imagIsNegative, negativeResultReal, + resultReal), + imagDivTwoResultReal); + + resultReal = b.create( + realIsNegative, + b.create( + imag, b.create(resultImag, resultImag)), + resultReal); + + Value realIsZero = + b.create(arith::CmpFPredicate::OEQ, real, zero); + Value imagIsZero = + b.create(arith::CmpFPredicate::OEQ, imag, zero); + Value argIsZero = b.create(realIsZero, imagIsZero); + + resultReal = b.create(argIsZero, zero, resultReal); + resultImag = b.create(argIsZero, zero, resultImag); + + rewriter.replaceOpWithNewOp(op, type, resultReal, + resultImag); + return success(); + } +}; + struct SignOpConversion : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -735,6 +844,7 @@ void mlir::populateComplexToStandardConversionPatterns( // clang-format off patterns.add< AbsOpConversion, + Atan2OpConversion, ComparisonOpConversion, ComparisonOpConversion, BinaryComplexOpConversion, @@ -748,7 +858,8 @@ void mlir::populateComplexToStandardConversionPatterns( MulOpConversion, NegOpConversion, SignOpConversion, - SinOpConversion>(patterns.getContext()); + SinOpConversion, + SqrtOpConversion>(patterns.getContext()); // clang-format on } diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index 6f57e722b520ef..bf410287185179 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-complex-to-standard)" | FileCheck %s +// RUN: mlir-opt %s --convert-complex-to-standard --split-input-file | FileCheck %s // CHECK-LABEL: func @complex_abs // CHECK-SAME: %[[ARG:.*]]: complex @@ -14,6 +14,17 @@ func.func @complex_abs(%arg: complex) -> f32 { // CHECK: %[[NORM:.*]] = math.sqrt %[[SQ_NORM]] : f32 // CHECK: return %[[NORM]] : f32 +// ----- + +// CHECK-LABEL: func @complex_atan2 +func.func @complex_atan2(%lhs: complex, + %rhs: complex) -> complex { + %atan2 = complex.atan2 %lhs, %rhs : complex + return %atan2 : complex +} + +// ----- + // CHECK-LABEL: func @complex_add // CHECK-SAME: (%[[LHS:.*]]: complex, %[[RHS:.*]]: complex) func.func @complex_add(%lhs: complex, %rhs: complex) -> complex { @@ -29,6 +40,8 @@ func.func @complex_add(%lhs: complex, %rhs: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex +// ----- + // CHECK-LABEL: func @complex_cos // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_cos(%arg: complex) -> complex { @@ -50,6 +63,8 @@ func.func @complex_cos(%arg: complex) -> complex { // CHECK-DAG: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] +// ----- + // CHECK-LABEL: func @complex_div // CHECK-SAME: (%[[LHS:.*]]: complex, %[[RHS:.*]]: complex) func.func @complex_div(%lhs: complex, %rhs: complex) -> complex { @@ -159,6 +174,8 @@ func.func @complex_div(%lhs: complex, %rhs: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL_WITH_SPECIAL_CASES]], %[[RESULT_IMAG_WITH_SPECIAL_CASES]] : complex // CHECK: return %[[RESULT]] : complex +// ----- + // CHECK-LABEL: func @complex_eq // CHECK-SAME: %[[LHS:.*]]: complex, %[[RHS:.*]]: complex func.func @complex_eq(%lhs: complex, %rhs: complex) -> i1 { @@ -174,6 +191,8 @@ func.func @complex_eq(%lhs: complex, %rhs: complex) -> i1 { // CHECK: %[[EQUAL:.*]] = arith.andi %[[REAL_EQUAL]], %[[IMAG_EQUAL]] : i1 // CHECK: return %[[EQUAL]] : i1 +// ----- + // CHECK-LABEL: func @complex_exp // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_exp(%arg: complex) -> complex { @@ -190,6 +209,8 @@ func.func @complex_exp(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex +// ----- + // CHECK-LABEL: func.func @complex_expm1( // CHECK-SAME: %[[ARG:.*]]: complex) -> complex { func.func @complex_expm1(%arg: complex) -> complex { @@ -211,6 +232,8 @@ func.func @complex_expm1(%arg: complex) -> complex { // CHECK: %[[RES:.*]] = complex.create %[[REAL_M1]], %[[IMAG]] : complex // CHECK: return %[[RES]] : complex +// ----- + // CHECK-LABEL: func @complex_log // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_log(%arg: complex) -> complex { @@ -230,6 +253,8 @@ func.func @complex_log(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex +// ----- + // CHECK-LABEL: func @complex_log1p // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_log1p(%arg: complex) -> complex { @@ -254,6 +279,8 @@ func.func @complex_log1p(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex +// ----- + // CHECK-LABEL: func @complex_mul // CHECK-SAME: (%[[LHS:.*]]: complex, %[[RHS:.*]]: complex) func.func @complex_mul(%lhs: complex, %rhs: complex) -> complex { @@ -372,6 +399,8 @@ func.func @complex_mul(%lhs: complex, %rhs: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[FINAL_REAL]], %[[FINAL_IMAG]] : complex // CHECK: return %[[RESULT]] : complex +// ----- + // CHECK-LABEL: func @complex_neg // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_neg(%arg: complex) -> complex { @@ -385,6 +414,8 @@ func.func @complex_neg(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[NEG_REAL]], %[[NEG_IMAG]] : complex // CHECK: return %[[RESULT]] : complex +// ----- + // CHECK-LABEL: func @complex_neq // CHECK-SAME: %[[LHS:.*]]: complex, %[[RHS:.*]]: complex func.func @complex_neq(%lhs: complex, %rhs: complex) -> i1 { @@ -400,6 +431,8 @@ func.func @complex_neq(%lhs: complex, %rhs: complex) -> i1 { // CHECK: %[[NOT_EQUAL:.*]] = arith.ori %[[REAL_NOT_EQUAL]], %[[IMAG_NOT_EQUAL]] : i1 // CHECK: return %[[NOT_EQUAL]] : i1 +// ----- + // CHECK-LABEL: func @complex_sin // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_sin(%arg: complex) -> complex { @@ -421,6 +454,8 @@ func.func @complex_sin(%arg: complex) -> complex { // CHECK-DAG: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] +// ----- + // CHECK-LABEL: func @complex_sign // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_sign(%arg: complex) -> complex { @@ -445,6 +480,8 @@ func.func @complex_sign(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = arith.select %[[IS_ZERO]], %[[ARG]], %[[SIGN]] : complex // CHECK: return %[[RESULT]] : complex +// ----- + // CHECK-LABEL: func @complex_sub // CHECK-SAME: (%[[LHS:.*]]: complex, %[[RHS:.*]]: complex) func.func @complex_sub(%lhs: complex, %rhs: complex) -> complex { @@ -459,3 +496,11 @@ func.func @complex_sub(%lhs: complex, %rhs: complex) -> complex { // CHECK: %[[RESULT_IMAG:.*]] = arith.subf %[[IMAG_LHS]], %[[IMAG_RHS]] : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex + +// ----- + +// CHECK-LABEL: func @complex_sqrt +func.func @complex_sqrt(%arg: complex) -> complex { + %sqrt = complex.sqrt %arg : complex + return %sqrt : complex +} From 1721ff1dfd4592f1a391d9161627e2a68198a44e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 25 May 2022 16:37:38 +0200 Subject: [PATCH 805/908] [GVN] Enable enable-split-backedge-in-load-pre option by default This option was added in D89854. It prevents GVN from performing load PRE in a loop, if doing so would require critical edge splitting on the backedge. From the review: > I know that GVN Load PRE negatively impacts peeling, > loop predication, so the passes expecting that latch has > a conditional branch. In the PhaseOrdering test in this patch, splitting the backedge negatively affects vectorization: After critical edge splitting, the loop gets rotated, effectively peeling off the first loop iteration. The effect is that the first element is handled separately, then the bulk of the elements use a vectorized reduction (but using unaligned, off-by-one memory accesses) and then a tail of 15 elements is handled separately again. It's probably worth noting that the loop load PRE from D99926 is not affected by this change (as it does not need backedge splitting). This is about normal load PRE that happens to occur inside a loop. Differential Revision: https://reviews.llvm.org/D126382 --- llvm/lib/Transforms/Scalar/GVN.cpp | 2 +- .../PRE/load-pre-metadata-accsess-group.ll | 2 +- .../GVN/PRE/pre-loop-load-through-select.ll | 2 +- llvm/test/Transforms/GVN/PRE/rle.ll | 4 +- llvm/test/Transforms/GVN/PRE/volatile.ll | 2 +- .../GVN/condprop-memdep-invalidation.ll | 2 +- .../X86/vector-reduction-known-first-value.ll | 95 ++++--------------- 7 files changed, 23 insertions(+), 86 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 34c1483a37e04d..290702c15b1f43 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -107,7 +107,7 @@ static cl::opt GVNEnableLoadInLoopPRE("enable-load-in-loop-pre", cl::init(true)); static cl::opt GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", - cl::init(true)); + cl::init(false)); static cl::opt GVNEnableMemDep("enable-gvn-memdep", cl::init(true)); static cl::opt MaxNumDeps( diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll b/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll index 5cde83c1e21bd5..cfe61ce639a0a2 100644 --- a/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll +++ b/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loops -basic-aa -gvn -enable-load-pre -S | FileCheck %s +; RUN: opt < %s -loops -basic-aa -gvn -enable-split-backedge-in-load-pre -S | FileCheck %s define dso_local void @test1(i32* nocapture readonly %aa, i32* nocapture %bb) local_unnamed_addr { ; CHECK-LABEL: @test1( diff --git a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll index 56aa111bc13ff2..e3ab965e199dfb 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='require,loop(loop-simplifycfg),gvn' -S %s | FileCheck %s +; RUN: opt -passes='require,loop(loop-simplifycfg),gvn' -enable-split-backedge-in-load-pre -S %s | FileCheck %s define i32 @test_pointer_phi_select_same_object(i32* %ptr, i32* %end) { ; CHECK-LABEL: @test_pointer_phi_select_same_object( diff --git a/llvm/test/Transforms/GVN/PRE/rle.ll b/llvm/test/Transforms/GVN/PRE/rle.ll index 4c9730b8e465f5..4722e151f72bcf 100644 --- a/llvm/test/Transforms/GVN/PRE/rle.ll +++ b/llvm/test/Transforms/GVN/PRE/rle.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basic-aa -gvn -S -dce | FileCheck %s --check-prefixes=CHECK,LE -; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -basic-aa -gvn -S -dce | FileCheck %s --check-prefixes=CHECK,BE +; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basic-aa -gvn -enable-split-backedge-in-load-pre -S -dce | FileCheck %s --check-prefixes=CHECK,LE +; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -basic-aa -gvn -enable-split-backedge-in-load-pre -S -dce | FileCheck %s --check-prefixes=CHECK,BE ;; Trivial RLE test. define i32 @test0(i32 %V, i32* %P) { diff --git a/llvm/test/Transforms/GVN/PRE/volatile.ll b/llvm/test/Transforms/GVN/PRE/volatile.ll index 1a51388723f1df..86dd251e6b36e9 100644 --- a/llvm/test/Transforms/GVN/PRE/volatile.ll +++ b/llvm/test/Transforms/GVN/PRE/volatile.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Tests that check our handling of volatile instructions encountered ; when scanning for dependencies -; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s +; RUN: opt -basic-aa -gvn -enable-split-backedge-in-load-pre -S < %s | FileCheck %s ; Check that we can bypass a volatile load when searching ; for dependencies of a non-volatile load diff --git a/llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll b/llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll index 9ff0aef0a28382..69c997e1bb4fe7 100644 --- a/llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll +++ b/llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=gvn -S %s | FileCheck %s +; RUN: opt -passes=gvn -enable-split-backedge-in-load-pre -S %s | FileCheck %s ; Test case for PR31651. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll index 508a501d9e847f..e4ee7e7d190359 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll @@ -11,89 +11,26 @@ define i16 @test(ptr %ptr) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[FIRST:%.*]] = load i8, ptr [[PTR:%.*]], align 1 ; CHECK-NEXT: tail call void @use(i8 [[FIRST]]) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[VAL_EXT2:%.*]] = zext i8 [[FIRST]] to i16 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[VAL_EXT2]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ [[TMP0]], [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <8 x i16> [ zeroinitializer, [[ENTRY]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD6]] to <8 x i16> -; CHECK-NEXT: [[TMP5]] = add <8 x i16> [[VEC_PHI]], [[TMP3]] -; CHECK-NEXT: [[TMP6]] = add <8 x i16> [[VEC_PHI5]], [[TMP4]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[ENTRY]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i16> [ zeroinitializer, [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD2]] to <8 x i16> +; CHECK-NEXT: [[TMP4]] = add <8 x i16> [[VEC_PHI]], [[TMP2]] +; CHECK-NEXT: [[TMP5]] = add <8 x i16> [[VEC_PHI1]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1008 -; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: loop.loop_crit_edge: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i16> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[BIN_RDX]]) -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1009 -; CHECK-NEXT: [[VAL_PRE:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT]], align 1 -; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i8 [[VAL_PRE]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT:%.*]] = add i16 [[TMP8]], [[VAL_EXT]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1010 -; CHECK-NEXT: [[VAL_PRE_1:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_1]], align 1 -; CHECK-NEXT: [[VAL_EXT_1:%.*]] = zext i8 [[VAL_PRE_1]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_1:%.*]] = add i16 [[ACCUM_NEXT]], [[VAL_EXT_1]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1011 -; CHECK-NEXT: [[VAL_PRE_2:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_2]], align 1 -; CHECK-NEXT: [[VAL_EXT_2:%.*]] = zext i8 [[VAL_PRE_2]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_2:%.*]] = add i16 [[ACCUM_NEXT_1]], [[VAL_EXT_2]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1012 -; CHECK-NEXT: [[VAL_PRE_3:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_3]], align 1 -; CHECK-NEXT: [[VAL_EXT_3:%.*]] = zext i8 [[VAL_PRE_3]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_3:%.*]] = add i16 [[ACCUM_NEXT_2]], [[VAL_EXT_3]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1013 -; CHECK-NEXT: [[VAL_PRE_4:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_4]], align 1 -; CHECK-NEXT: [[VAL_EXT_4:%.*]] = zext i8 [[VAL_PRE_4]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_4:%.*]] = add i16 [[ACCUM_NEXT_3]], [[VAL_EXT_4]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1014 -; CHECK-NEXT: [[VAL_PRE_5:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_5]], align 1 -; CHECK-NEXT: [[VAL_EXT_5:%.*]] = zext i8 [[VAL_PRE_5]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_5:%.*]] = add i16 [[ACCUM_NEXT_4]], [[VAL_EXT_5]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1015 -; CHECK-NEXT: [[VAL_PRE_6:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_6]], align 1 -; CHECK-NEXT: [[VAL_EXT_6:%.*]] = zext i8 [[VAL_PRE_6]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_6:%.*]] = add i16 [[ACCUM_NEXT_5]], [[VAL_EXT_6]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1016 -; CHECK-NEXT: [[VAL_PRE_7:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_7]], align 1 -; CHECK-NEXT: [[VAL_EXT_7:%.*]] = zext i8 [[VAL_PRE_7]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_7:%.*]] = add i16 [[ACCUM_NEXT_6]], [[VAL_EXT_7]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_8:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1017 -; CHECK-NEXT: [[VAL_PRE_8:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_8]], align 1 -; CHECK-NEXT: [[VAL_EXT_8:%.*]] = zext i8 [[VAL_PRE_8]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_8:%.*]] = add i16 [[ACCUM_NEXT_7]], [[VAL_EXT_8]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_9:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1018 -; CHECK-NEXT: [[VAL_PRE_9:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_9]], align 1 -; CHECK-NEXT: [[VAL_EXT_9:%.*]] = zext i8 [[VAL_PRE_9]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_9:%.*]] = add i16 [[ACCUM_NEXT_8]], [[VAL_EXT_9]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_10:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1019 -; CHECK-NEXT: [[VAL_PRE_10:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_10]], align 1 -; CHECK-NEXT: [[VAL_EXT_10:%.*]] = zext i8 [[VAL_PRE_10]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_10:%.*]] = add i16 [[ACCUM_NEXT_9]], [[VAL_EXT_10]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1020 -; CHECK-NEXT: [[VAL_PRE_11:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_11]], align 1 -; CHECK-NEXT: [[VAL_EXT_11:%.*]] = zext i8 [[VAL_PRE_11]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_11:%.*]] = add i16 [[ACCUM_NEXT_10]], [[VAL_EXT_11]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_12:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1021 -; CHECK-NEXT: [[VAL_PRE_12:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_12]], align 1 -; CHECK-NEXT: [[VAL_EXT_12:%.*]] = zext i8 [[VAL_PRE_12]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_12:%.*]] = add i16 [[ACCUM_NEXT_11]], [[VAL_EXT_12]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_13:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1022 -; CHECK-NEXT: [[VAL_PRE_13:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_13]], align 1 -; CHECK-NEXT: [[VAL_EXT_13:%.*]] = zext i8 [[VAL_PRE_13]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_13:%.*]] = add i16 [[ACCUM_NEXT_12]], [[VAL_EXT_13]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_14:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1023 -; CHECK-NEXT: [[VAL_PRE_14:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_14]], align 1 -; CHECK-NEXT: [[VAL_EXT_14:%.*]] = zext i8 [[VAL_PRE_14]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_14:%.*]] = add i16 [[ACCUM_NEXT_13]], [[VAL_EXT_14]] -; CHECK-NEXT: ret i16 [[ACCUM_NEXT_14]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i16> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[BIN_RDX]]) +; CHECK-NEXT: ret i16 [[TMP7]] ; entry: %first = load i8, ptr %ptr From 1956f280376a73013f442f7f5ccb0a9dfe0f6b27 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 30 May 2022 08:51:39 +0100 Subject: [PATCH 806/908] [X86] Adjust vector extend to ymm to match SoG (Issue #54889) znver1 ymm variants of VPMOVSX**/VPMOVZX** instructions require double pumping. Now matches AMD SoG, Agner and instlatx64 numbers. Thanks to @fabian-r for the report --- llvm/lib/Target/X86/X86ScheduleZnver1.td | 2 +- .../llvm-mca/X86/Znver1/resources-avx2.s | 98 +++++++++---------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 2e226e81c95225..041d224d40ca9e 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -439,7 +439,7 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s index 19e411dfaef608..820728ef2d1d6a 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s @@ -625,30 +625,30 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpminuw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 2 2 2.00 vpmovmskb %ymm0, %ecx -# CHECK-NEXT: 2 1 0.50 vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovsxbd (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovsxbq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovsxbw (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovsxdq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovsxwd (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovsxwq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovzxbd (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovzxbq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovzxbw (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovzxdq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovzxwd (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: 2 8 0.50 * vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovsxbd %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovsxbq %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovsxbw %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovsxdq %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovsxwd %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovsxwq %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovzxbd %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovzxbq %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovzxbw %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovzxdq %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovzxwd %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: 3 1 2.00 vpmovzxwq %xmm0, %ymm2 +# CHECK-NEXT: 3 8 2.00 * vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: 1 4 1.00 vpmuldq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 11 1.00 * vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vpmulhrsw %ymm0, %ymm1, %ymm2 @@ -778,7 +778,7 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 67.00 67.00 - - - - - 71.17 123.67 105.00 41.17 - +# CHECK-NEXT: 67.00 67.00 - - - - - 71.17 159.67 141.00 41.17 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -947,30 +947,30 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpminuw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - 2.00 - - vpmovmskb %ymm0, %ecx -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbw (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxdq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbw (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxdq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxbd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxbq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxbw %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxdq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxwd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxwq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxbd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxbq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxbw %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxdq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxwd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxwq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmuldq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmulhrsw %ymm0, %ymm1, %ymm2 From f82967b7866efb6af6e9f1bb471f4180e9844a65 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 30 May 2022 08:59:49 +0100 Subject: [PATCH 807/908] [M68k] Remove unused variable to fix MSVC warning. NFC. --- llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp index f43673e4f14d11..6b093623a106d1 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp @@ -221,7 +221,6 @@ void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { unsigned Opcode = MI.getOpcode(); - const MCInstrDesc &Desc = MCII.get(Opcode); LLVM_DEBUG(dbgs() << "EncodeInstruction: " << MCII.getName(Opcode) << "(" << Opcode << ")\n"); From 544d6507baa2c708aa96b9d3c18c96838910b1c8 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Mon, 30 May 2022 09:18:23 +0200 Subject: [PATCH 808/908] [MLIR][NVVM] NFC: add labels to test functions. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D126631 --- mlir/test/Dialect/LLVMIR/nvvm.mlir | 65 +++++++++++++++++++---------- mlir/test/Target/LLVMIR/nvvmir.mlir | 52 ++++++++++++++++------- 2 files changed, 78 insertions(+), 39 deletions(-) diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 728755d822bb00..9b28841c3c7810 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s -split-input-file -verify-diagnostics | FileCheck %s +// CHECK-LABEL: @nvvm_special_regs func.func @nvvm_special_regs() -> i32 { // CHECK: nvvm.read.ptx.sreg.tid.x : i32 %0 = nvvm.read.ptx.sreg.tid.x : i32 @@ -28,12 +29,14 @@ func.func @nvvm_special_regs() -> i32 { llvm.return %0 : i32 } -func.func @llvm.nvvm.barrier0() { +// CHECK-LABEL: @llvm_nvvm_barrier0 +func.func @llvm_nvvm_barrier0() { // CHECK: nvvm.barrier0 nvvm.barrier0 llvm.return } +// CHECK-LABEL: @nvvm_shfl func.func @nvvm_shfl( %arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32, %arg4 : f32) -> i32 { @@ -50,6 +53,7 @@ func.func @nvvm_shfl( llvm.return %0 : i32 } +// CHECK-LABEL: @nvvm_shfl_pred func.func @nvvm_shfl_pred( %arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32, %arg4 : f32) -> !llvm.struct<(i32, i1)> { @@ -60,6 +64,7 @@ func.func @nvvm_shfl_pred( llvm.return %0 : !llvm.struct<(i32, i1)> } +// CHECK-LABEL: @nvvm_vote( func.func @nvvm_vote(%arg0 : i32, %arg1 : i1) -> i32 { // CHECK: nvvm.vote.ballot.sync %{{.*}}, %{{.*}} : i32 %0 = nvvm.vote.ballot.sync %arg0, %arg1 : i32 @@ -77,19 +82,21 @@ func.func @nvvm_mma_m8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } +// CHECK-LABEL: @nvvm_mma_m8n8k4_f16_f16 func.func @nvvm_mma_m8n8k4_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : vector<2xf16>, %c1 : vector<2xf16>, %c2 : vector<2xf16>, %c3 : vector<2xf16>) { + %c0 : vector<2xf16>, %c1 : vector<2xf16>, %c2 : vector<2xf16>, %c3 : vector<2xf16>) { // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}] %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - shape = {k = 4 : i32, m = 8 : i32, n = 8 : i32}} : (vector<2xf16>,vector<2xf16>,vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + shape = {k = 4 : i32, m = 8 : i32, n = 8 : i32}} : (vector<2xf16>,vector<2xf16>,vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> } +// CHECK-LABEL: @nvvm_mma_m8n8k16_s8_s8 func.func @nvvm_mma_m8n8k16_s8_s8(%a0 : i32, %b0 : i32, - %c0 : i32, %c1 : i32) { - // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 16 : i32, m = 8 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32)> + %c0 : i32, %c1 : i32) { + // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 16 : i32, m = 8 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32)> %0 = nvvm.mma.sync A[%a0] B[%b0] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, @@ -98,20 +105,22 @@ func.func @nvvm_mma_m8n8k16_s8_s8(%a0 : i32, %b0 : i32, llvm.return %0 : !llvm.struct<(i32, i32)> } -func.func @nvvm_mma_m16n8k8_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, +// CHECK-LABEL: @nvvm_mma_m16n8k8_f16_f16 +func.func @nvvm_mma_m16n8k8_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %b0 : vector<2xf16>, %c0 : vector<2xf16>, %c1 : vector<2xf16>) { // CHECK: nvvm.mma.sync A[%{{.*}}, %{{.*}}] B[%{{.*}}] C[%{{.*}}, %{{.*}}] {{{.*}}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - shape = {k = 8 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + shape = {k = 8 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f16 func.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : vector<2xf16>, %c1 : vector<2xf16>) { + %c0 : vector<2xf16>, %c1 : vector<2xf16>) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -119,10 +128,11 @@ func.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f16 func.func @nvvm_mma_m16n8k16_f32_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : vector<2xf16>, %c1 : vector<2xf16>) { + %c0 : vector<2xf16>, %c1 : vector<2xf16>) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -130,10 +140,11 @@ func.func @nvvm_mma_m16n8k16_f32_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, llvm.return %0 : !llvm.struct<(f32, f32, f32, f32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f32 func.func @nvvm_mma_m16n8k16_f16_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { + %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -141,10 +152,11 @@ func.func @nvvm_mma_m16n8k16_f16_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f32 func.func @nvvm_mma_m16n8k16_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { + %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -152,7 +164,8 @@ func.func @nvvm_mma_m16n8k16_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, llvm.return %0 : !llvm.struct<(f32, f32, f32, f32)> } -func.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32, +// CHECK-LABEL: @nvvm_mma_m16n8k4_tf32_f32 +func.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32, %b0 : i32, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 4 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, f32) -> !llvm.struct<(f32, f32, f32, f32)> @@ -163,8 +176,9 @@ func.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32, llvm.return %0 : !llvm.struct<(f32, f32, f32, f32)> } -func.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { +// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_s8 +func.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, %b0 : i32, + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -174,9 +188,10 @@ func.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, %b0 : i32, llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } -func.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, - %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { +// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_u8 +func.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, + %b0 : i32, + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 16 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -186,24 +201,26 @@ func.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k256_b1_b1 func.func @nvvm_mma_m16n8k256_b1_b1(%a0 : i32, %a1 : i32, %a2 : i32, %a3 : i32, %b0 : i32, %b1 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {b1Op = #nvvm.mma_b1op, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 256 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, b1Op = #nvvm.mma_b1op, shape = {k = 256 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k128_b1_b1 func.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {b1Op = #nvvm.mma_b1op, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 128 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, b1Op = #nvvm.mma_b1op, shape = {k = 128 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> @@ -212,7 +229,7 @@ func.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32, // CHECK-LABEL: @nvvm_mma_m8n8k128_b1_b1 func.func @nvvm_mma_m8n8k128_b1_b1(%a0 : i32, %b0 : i32, - %c0 : i32, %c1 : i32) { + %c0 : i32, %c1 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}] {b1Op = #nvvm.mma_b1op, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 128 : i32, m = 8 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32)> %0 = nvvm.mma.sync A[%a0] B[%b0] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -224,7 +241,7 @@ func.func @nvvm_mma_m8n8k128_b1_b1(%a0 : i32, // CHECK-LABEL: @nvvm_mma_m16n8k32_s4_s4 func.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) { // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, shape = {k = 32 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)> %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -243,6 +260,7 @@ func.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.stru llvm.return %0 : !llvm.struct<(i32, i32, i32, i32)> } +// CHECK-LABEL: @nvvm_wmma_mma func.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : i32, %6 : i32, %7 : i32, %8 : f32, %9 : f32, %10 : f32, %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) @@ -255,6 +273,7 @@ func.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : llvm.return %r : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } +// CHECK-LABEL: @cp_async llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 nvvm.cp.async.shared.global %arg0, %arg1, 16 diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index f3bd013167ab4c..36239c3e19d4f8 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -1,5 +1,6 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s +// CHECK-LABEL: @nvvm_special_regs llvm.func @nvvm_special_regs() -> i32 { // CHECK: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %1 = nvvm.read.ptx.sreg.tid.x : i32 @@ -32,12 +33,14 @@ llvm.func @nvvm_special_regs() -> i32 { llvm.return %1 : i32 } +// CHECK-LABEL: @llvm_nvvm_barrier0 llvm.func @llvm_nvvm_barrier0() { // CHECK: call void @llvm.nvvm.barrier0() nvvm.barrier0 llvm.return } +// CHECK-LABEL: @nvvm_shfl llvm.func @nvvm_shfl( %0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : f32) -> i32 { @@ -60,6 +63,7 @@ llvm.func @nvvm_shfl( llvm.return %6 : i32 } +// CHECK-LABEL: @nvvm_shfl_pred llvm.func @nvvm_shfl_pred( %0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : f32) -> !llvm.struct<(i32, i1)> { @@ -82,6 +86,7 @@ llvm.func @nvvm_shfl_pred( llvm.return %6 : !llvm.struct<(i32, i1)> } +// CHECK-LABEL: @nvvm_vote llvm.func @nvvm_vote(%0 : i32, %1 : i1) -> i32 { // CHECK: call i32 @llvm.nvvm.vote.ballot.sync(i32 %{{.*}}, i1 %{{.*}}) %3 = nvvm.vote.ballot.sync %0, %1 : i32 @@ -94,11 +99,12 @@ llvm.func @nvvm_mma_mn8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2x %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> { // CHECK: call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32 - %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7] + %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {m = 8 : i32, n = 8 : i32, k = 4 : i32}} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f16 llvm.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, @@ -111,10 +117,11 @@ llvm.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, } // f32 return type, f16 accumulate type +// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f16 llvm.func @nvvm_mma_m16n8k16_f32_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, - %c0 : vector<2xf16>, %c1 : vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32)> { + %c0 : vector<2xf16>, %c1 : vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32)> { // CHECK: call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f16 %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -123,6 +130,7 @@ llvm.func @nvvm_mma_m16n8k16_f32_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>, } // f16 return type, f32 accumulate type +// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f32 llvm.func @nvvm_mma_m16n8k16_f16_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, @@ -135,55 +143,60 @@ llvm.func @nvvm_mma_m16n8k16_f16_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, } // f32 return type, f32 accumulate type +// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f32 llvm.func @nvvm_mma_m16n8k16_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %a2 : vector<2xf16>, %a3 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) -> !llvm.struct<(f32, f32, f32, f32)> { - // CHECK: call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32 + // CHECK: call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32 %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {m = 16 : i32, n = 8 : i32, k = 16 : i32}} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32)> } -llvm.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, - %b0 : i32, +// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_s8 +llvm.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, + %b0 : i32, %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.s8 %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, intOverflowBehavior=#nvvm.mma_int_overflow, shape = {m = 16 : i32, n = 8 : i32, k = 16 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } -llvm.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, - %b0 : i32, +// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_u8 +llvm.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32, + %b0 : i32, %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.satfinite.s8.u8 %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, intOverflowBehavior=#nvvm.mma_int_overflow, shape = {m = 16 : i32, n = 8 : i32, k = 16 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } -llvm.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32, +// CHECK-LABEL: @nvvm_mma_m16n8k128_b1_b1 +llvm.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.mma.xor.popc.m16n8k128.row.col.b1 %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, - multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, + multiplicandAPtxType = #nvvm.mma_type, multiplicandBPtxType = #nvvm.mma_type, b1Op = #nvvm.mma_b1op, shape = {k = 128 : i32, m = 16 : i32, n = 8 : i32}} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)> llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } +// CHECK-LABEL: @nvvm_mma_m16n8k32_s4_s4 llvm.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, %b0 : i32, - %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> { + %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k32.row.col.satfinite.s4 %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3] {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, @@ -193,16 +206,18 @@ llvm.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)> } +// CHECK-LABEL: @nvvm_mma_m8n8k4_f64_f64 llvm.func @nvvm_mma_m8n8k4_f64_f64(%a0 : f64, - %b0 : f64, + %b0 : f64, %c0 : f64, %c1 : f64) -> !llvm.struct<(f64, f64)> { // CHECK: call { double, double } @llvm.nvvm.mma.m8n8k4.row.col.f64 %0 = nvvm.mma.sync A[%a0] B[%b0] C[%c0, %c1] - {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, + {layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, shape = {m = 8 : i32, n = 8 : i32, k = 4 : i32}} : (f64, f64, f64) -> !llvm.struct<(f64, f64)> llvm.return %0 : !llvm.struct<(f64, f64)> } +// CHECK-LABEL: @nvvm_mma_m16n8k4_tf32_f32 llvm.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32, %b0 : i32, %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) -> !llvm.struct<(f32, f32, f32, f32)> { @@ -228,6 +243,7 @@ llvm.func @gpu_wmma_load_op(%arg0: !llvm.ptr, %arg1: i32) { // The test below checks the correct mapping of the nvvm.wmma.*.store.* op to the correct intrinsic // in the LLVM NVPTX backend. +// CHECK-LABEL: @gpu_wmma_store_op llvm.func @gpu_wmma_store_op(%arg0: !llvm.ptr, %arg1: i32, %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, %arg4: vector<2 xf16>, %arg5: vector<2 x f16>) { @@ -240,6 +256,7 @@ llvm.func @gpu_wmma_store_op(%arg0: !llvm.ptr, %arg1: i32, // The test below checks the correct mapping of the nvvm.wmma.*.mma.* op to the correct intrinsic // in the LLVM NVPTX backend. +// CHECK-LABEL: @gpu_wmma_mma_op llvm.func @gpu_wmma_mma_op(%arg0: vector<2 x f16>, %arg1: vector<2 x f16>, %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, %arg4: vector<2 x f16>, %arg5: vector<2 x f16>, @@ -261,6 +278,7 @@ llvm.func @gpu_wmma_mma_op(%arg0: vector<2 x f16>, %arg1: vector<2 x f16>, llvm.return } +// CHECK-LABEL: @nvvm_wmma_load_tf32 llvm.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0i32(i32* %{{.*}}, i32 %{{.*}}) %0 = nvvm.wmma.load %arg0, %arg1 @@ -269,6 +287,7 @@ llvm.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) { llvm.return } +// CHECK-LABEL: @nvvm_wmma_mma llvm.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : i32, %6 : i32, %7 : i32, %8 : f32, %9 : f32, %10 : f32, %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) { @@ -280,6 +299,7 @@ llvm.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : llvm.return } +// CHECK-LABEL: @cp_async llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { // CHECK: call void @llvm.nvvm.cp.async.ca.shared.global.4(i8 addrspace(3)* %{{.*}}, i8 addrspace(1)* %{{.*}}) nvvm.cp.async.shared.global %arg0, %arg1, 4 @@ -296,7 +316,7 @@ llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { llvm.return } -// CHECK-LABEL: @ld_matrix( +// CHECK-LABEL: @ld_matrix llvm.func @ld_matrix(%arg0: !llvm.ptr) { // CHECK: call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3i32(i32 addrspace(3)* %{{.*}}) %l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 From 402b8373021a69090aae505c7f39d9e750127ff2 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 30 May 2022 10:48:58 +0200 Subject: [PATCH 809/908] Revert "[mlir] Lower complex.sqrt and complex.atan2 to Arithmetic dialect." This reverts commit f5fa633b0955a8cee878b384801038fccef11fdc. Integration test sparse_complex_ops.mlir breaks because of it. --- .../ComplexToStandard/ComplexToStandard.cpp | 113 +----------------- .../convert-to-standard.mlir | 47 +------- 2 files changed, 2 insertions(+), 158 deletions(-) diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index 194e1669a86c9c..e1eca6181dff96 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -44,49 +44,6 @@ struct AbsOpConversion : public OpConversionPattern { } }; -// atan2(y,x) = -i * log((x + i * y)/sqrt(x**2+y**2)) -struct Atan2OpConversion : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(complex::Atan2Op op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); - - auto type = op.getType().cast(); - Type elementType = type.getElementType(); - - Value lhs = adaptor.getLhs(); - Value rhs = adaptor.getRhs(); - - Value rhsSquared = b.create(type, rhs, rhs); - Value lhsSquared = b.create(type, lhs, lhs); - Value rhsSquaredPlusLhsSquared = - b.create(type, rhsSquared, lhsSquared); - Value sqrtOfRhsSquaredPlusLhsSquared = - b.create(type, rhsSquaredPlusLhsSquared); - - Value zero = - b.create(elementType, b.getZeroAttr(elementType)); - Value one = b.create(elementType, - b.getFloatAttr(elementType, 1)); - Value i = b.create(type, zero, one); - Value iTimesLhs = b.create(i, lhs); - Value rhsPlusILhs = b.create(rhs, iTimesLhs); - - Value divResult = - b.create(rhsPlusILhs, sqrtOfRhsSquaredPlusLhsSquared); - Value logResult = b.create(divResult); - - Value negativeOne = b.create( - elementType, b.getFloatAttr(elementType, -1)); - Value negativeI = b.create(type, zero, negativeOne); - - rewriter.replaceOpWithNewOp(op, negativeI, logResult); - return success(); - } -}; - template struct ComparisonOpConversion : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -743,72 +700,6 @@ struct SinOpConversion : public TrigonometricOpConversion { } }; -// The algorithm is listed in https://dl.acm.org/doi/pdf/10.1145/363717.363780. -struct SqrtOpConversion : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(complex::SqrtOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); - - auto type = op.getType().cast(); - Type elementType = type.getElementType(); - Value arg = adaptor.getComplex(); - - Value zero = - b.create(elementType, b.getZeroAttr(elementType)); - - Value real = b.create(elementType, adaptor.getComplex()); - Value imag = b.create(elementType, adaptor.getComplex()); - - Value absLhs = b.create(real); - Value absArg = b.create(elementType, arg); - Value addAbs = b.create(absLhs, absArg); - Value sqrtAddAbs = b.create(addAbs); - Value sqrtAddAbsDivTwo = b.create( - sqrtAddAbs, b.create( - elementType, b.getFloatAttr(elementType, 2))); - - Value realIsNegative = - b.create(arith::CmpFPredicate::OLT, real, zero); - Value imagIsNegative = - b.create(arith::CmpFPredicate::OLT, imag, zero); - - Value resultReal = sqrtAddAbsDivTwo; - - Value imagDivTwoResultReal = b.create( - imag, b.create(resultReal, resultReal)); - - Value negativeResultReal = b.create(resultReal); - - Value resultImag = b.create( - realIsNegative, - b.create(imagIsNegative, negativeResultReal, - resultReal), - imagDivTwoResultReal); - - resultReal = b.create( - realIsNegative, - b.create( - imag, b.create(resultImag, resultImag)), - resultReal); - - Value realIsZero = - b.create(arith::CmpFPredicate::OEQ, real, zero); - Value imagIsZero = - b.create(arith::CmpFPredicate::OEQ, imag, zero); - Value argIsZero = b.create(realIsZero, imagIsZero); - - resultReal = b.create(argIsZero, zero, resultReal); - resultImag = b.create(argIsZero, zero, resultImag); - - rewriter.replaceOpWithNewOp(op, type, resultReal, - resultImag); - return success(); - } -}; - struct SignOpConversion : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -844,7 +735,6 @@ void mlir::populateComplexToStandardConversionPatterns( // clang-format off patterns.add< AbsOpConversion, - Atan2OpConversion, ComparisonOpConversion, ComparisonOpConversion, BinaryComplexOpConversion, @@ -858,8 +748,7 @@ void mlir::populateComplexToStandardConversionPatterns( MulOpConversion, NegOpConversion, SignOpConversion, - SinOpConversion, - SqrtOpConversion>(patterns.getContext()); + SinOpConversion>(patterns.getContext()); // clang-format on } diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index bf410287185179..6f57e722b520ef 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --convert-complex-to-standard --split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="func.func(convert-complex-to-standard)" | FileCheck %s // CHECK-LABEL: func @complex_abs // CHECK-SAME: %[[ARG:.*]]: complex @@ -14,17 +14,6 @@ func.func @complex_abs(%arg: complex) -> f32 { // CHECK: %[[NORM:.*]] = math.sqrt %[[SQ_NORM]] : f32 // CHECK: return %[[NORM]] : f32 -// ----- - -// CHECK-LABEL: func @complex_atan2 -func.func @complex_atan2(%lhs: complex, - %rhs: complex) -> complex { - %atan2 = complex.atan2 %lhs, %rhs : complex - return %atan2 : complex -} - -// ----- - // CHECK-LABEL: func @complex_add // CHECK-SAME: (%[[LHS:.*]]: complex, %[[RHS:.*]]: complex) func.func @complex_add(%lhs: complex, %rhs: complex) -> complex { @@ -40,8 +29,6 @@ func.func @complex_add(%lhs: complex, %rhs: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex -// ----- - // CHECK-LABEL: func @complex_cos // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_cos(%arg: complex) -> complex { @@ -63,8 +50,6 @@ func.func @complex_cos(%arg: complex) -> complex { // CHECK-DAG: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] -// ----- - // CHECK-LABEL: func @complex_div // CHECK-SAME: (%[[LHS:.*]]: complex, %[[RHS:.*]]: complex) func.func @complex_div(%lhs: complex, %rhs: complex) -> complex { @@ -174,8 +159,6 @@ func.func @complex_div(%lhs: complex, %rhs: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL_WITH_SPECIAL_CASES]], %[[RESULT_IMAG_WITH_SPECIAL_CASES]] : complex // CHECK: return %[[RESULT]] : complex -// ----- - // CHECK-LABEL: func @complex_eq // CHECK-SAME: %[[LHS:.*]]: complex, %[[RHS:.*]]: complex func.func @complex_eq(%lhs: complex, %rhs: complex) -> i1 { @@ -191,8 +174,6 @@ func.func @complex_eq(%lhs: complex, %rhs: complex) -> i1 { // CHECK: %[[EQUAL:.*]] = arith.andi %[[REAL_EQUAL]], %[[IMAG_EQUAL]] : i1 // CHECK: return %[[EQUAL]] : i1 -// ----- - // CHECK-LABEL: func @complex_exp // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_exp(%arg: complex) -> complex { @@ -209,8 +190,6 @@ func.func @complex_exp(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex -// ----- - // CHECK-LABEL: func.func @complex_expm1( // CHECK-SAME: %[[ARG:.*]]: complex) -> complex { func.func @complex_expm1(%arg: complex) -> complex { @@ -232,8 +211,6 @@ func.func @complex_expm1(%arg: complex) -> complex { // CHECK: %[[RES:.*]] = complex.create %[[REAL_M1]], %[[IMAG]] : complex // CHECK: return %[[RES]] : complex -// ----- - // CHECK-LABEL: func @complex_log // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_log(%arg: complex) -> complex { @@ -253,8 +230,6 @@ func.func @complex_log(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex -// ----- - // CHECK-LABEL: func @complex_log1p // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_log1p(%arg: complex) -> complex { @@ -279,8 +254,6 @@ func.func @complex_log1p(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex -// ----- - // CHECK-LABEL: func @complex_mul // CHECK-SAME: (%[[LHS:.*]]: complex, %[[RHS:.*]]: complex) func.func @complex_mul(%lhs: complex, %rhs: complex) -> complex { @@ -399,8 +372,6 @@ func.func @complex_mul(%lhs: complex, %rhs: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[FINAL_REAL]], %[[FINAL_IMAG]] : complex // CHECK: return %[[RESULT]] : complex -// ----- - // CHECK-LABEL: func @complex_neg // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_neg(%arg: complex) -> complex { @@ -414,8 +385,6 @@ func.func @complex_neg(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = complex.create %[[NEG_REAL]], %[[NEG_IMAG]] : complex // CHECK: return %[[RESULT]] : complex -// ----- - // CHECK-LABEL: func @complex_neq // CHECK-SAME: %[[LHS:.*]]: complex, %[[RHS:.*]]: complex func.func @complex_neq(%lhs: complex, %rhs: complex) -> i1 { @@ -431,8 +400,6 @@ func.func @complex_neq(%lhs: complex, %rhs: complex) -> i1 { // CHECK: %[[NOT_EQUAL:.*]] = arith.ori %[[REAL_NOT_EQUAL]], %[[IMAG_NOT_EQUAL]] : i1 // CHECK: return %[[NOT_EQUAL]] : i1 -// ----- - // CHECK-LABEL: func @complex_sin // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_sin(%arg: complex) -> complex { @@ -454,8 +421,6 @@ func.func @complex_sin(%arg: complex) -> complex { // CHECK-DAG: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] -// ----- - // CHECK-LABEL: func @complex_sign // CHECK-SAME: %[[ARG:.*]]: complex func.func @complex_sign(%arg: complex) -> complex { @@ -480,8 +445,6 @@ func.func @complex_sign(%arg: complex) -> complex { // CHECK: %[[RESULT:.*]] = arith.select %[[IS_ZERO]], %[[ARG]], %[[SIGN]] : complex // CHECK: return %[[RESULT]] : complex -// ----- - // CHECK-LABEL: func @complex_sub // CHECK-SAME: (%[[LHS:.*]]: complex, %[[RHS:.*]]: complex) func.func @complex_sub(%lhs: complex, %rhs: complex) -> complex { @@ -496,11 +459,3 @@ func.func @complex_sub(%lhs: complex, %rhs: complex) -> complex { // CHECK: %[[RESULT_IMAG:.*]] = arith.subf %[[IMAG_LHS]], %[[IMAG_RHS]] : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex - -// ----- - -// CHECK-LABEL: func @complex_sqrt -func.func @complex_sqrt(%arg: complex) -> complex { - %sqrt = complex.sqrt %arg : complex - return %sqrt : complex -} From 14cc4674bf5ac848e4d2564c73aa68c28bbd5144 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 30 May 2022 09:50:08 +0100 Subject: [PATCH 810/908] [X86] Adjust vector fp test costs to match int test costs znver1/2 models were missing the vtestps/pd overrides to match the vptest integer equivalents. Noticed while investigating Issue #54889 --- llvm/lib/Target/X86/X86ScheduleZnver1.td | 4 +-- llvm/lib/Target/X86/X86ScheduleZnver2.td | 4 +-- .../llvm-mca/X86/Znver1/resources-avx1.s | 34 +++++++++---------- .../llvm-mca/X86/Znver2/resources-avx1.s | 34 +++++++++---------- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 041d224d40ca9e..bfa5a1447f6a74 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -334,8 +334,8 @@ defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 7805a5d78e758f..7a98b943191d45 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -331,8 +331,8 @@ defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s index 7b67829550cc4e..905fbe09fae13b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s @@ -1683,14 +1683,14 @@ vzeroupper # CHECK-NEXT: 1 10 0.50 * vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 0.50 vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 10 0.50 * vsubss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 1 8 0.50 * vtestpd (%rax), %xmm1 -# CHECK-NEXT: 1 1 0.25 vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 1 8 0.50 * vtestpd (%rax), %ymm1 -# CHECK-NEXT: 1 1 0.25 vtestps %xmm0, %xmm1 -# CHECK-NEXT: 1 8 0.50 * vtestps (%rax), %xmm1 -# CHECK-NEXT: 1 1 0.25 vtestps %ymm0, %ymm1 -# CHECK-NEXT: 1 8 0.50 * vtestps (%rax), %ymm1 +# CHECK-NEXT: 1 1 1.00 vtestpd %xmm0, %xmm1 +# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1 +# CHECK-NEXT: 1 1 1.00 vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1 1 1.00 vtestps %xmm0, %xmm1 +# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %xmm1 +# CHECK-NEXT: 1 1 1.00 vtestps %ymm0, %ymm1 +# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1 # CHECK-NEXT: 2 3 1.00 vucomisd %xmm0, %xmm1 # CHECK-NEXT: 2 10 1.00 * vucomisd (%rax), %xmm1 # CHECK-NEXT: 2 3 1.00 vucomiss %xmm0, %xmm1 @@ -1738,7 +1738,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 175.00 175.00 - - - - - 144.58 169.08 204.25 525.08 - +# CHECK-NEXT: 175.00 175.00 - - - - - 142.58 175.08 210.25 523.08 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2395,14 +2395,14 @@ vzeroupper # CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - vsubss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vtestpd (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vtestpd (%rax), %ymm1 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vtestps %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vtestps (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vtestps %ymm0, %ymm1 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vtestps (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestpd %xmm0, %xmm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestpd (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestpd (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestps %xmm0, %xmm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestps (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestps %ymm0, %ymm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestps (%rax), %ymm1 # CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - vucomisd %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 1.00 - - vucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - vucomiss %xmm0, %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s index 0ccc0219941a72..755a754a2f0a62 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s @@ -1683,14 +1683,14 @@ vzeroupper # CHECK-NEXT: 1 10 0.50 * vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 0.50 vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 10 0.50 * vsubss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.25 vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 1 8 0.33 * vtestpd (%rax), %xmm1 -# CHECK-NEXT: 1 1 0.25 vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 1 8 0.33 * vtestpd (%rax), %ymm1 -# CHECK-NEXT: 1 1 0.25 vtestps %xmm0, %xmm1 -# CHECK-NEXT: 1 8 0.33 * vtestps (%rax), %xmm1 -# CHECK-NEXT: 1 1 0.25 vtestps %ymm0, %ymm1 -# CHECK-NEXT: 1 8 0.33 * vtestps (%rax), %ymm1 +# CHECK-NEXT: 1 1 1.00 vtestpd %xmm0, %xmm1 +# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1 +# CHECK-NEXT: 1 1 1.00 vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1 1 1.00 vtestps %xmm0, %xmm1 +# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %xmm1 +# CHECK-NEXT: 1 1 1.00 vtestps %ymm0, %ymm1 +# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1 # CHECK-NEXT: 2 3 1.00 vucomisd %xmm0, %xmm1 # CHECK-NEXT: 2 10 1.00 * vucomisd (%rax), %xmm1 # CHECK-NEXT: 2 3 1.00 vucomiss %xmm0, %xmm1 @@ -1739,7 +1739,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 135.92 163.42 200.25 469.42 - +# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 133.92 169.42 206.25 467.42 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -2396,14 +2396,14 @@ vzeroupper # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 0.50 0.50 - vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 0.50 0.50 - vsubss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vtestpd (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vtestpd (%rax), %ymm1 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vtestps %xmm0, %xmm1 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vtestps (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vtestps %ymm0, %ymm1 -# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vtestps (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - vtestpd %xmm0, %xmm1 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 1.00 - - vtestpd (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 1.00 - - vtestpd (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - vtestps %xmm0, %xmm1 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 1.00 - - vtestps (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - vtestps %ymm0, %ymm1 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 1.00 - - vtestps (%rax), %ymm1 # CHECK-NEXT: - - - - - - - - 0.50 0.50 1.00 - - vucomisd %xmm0, %xmm1 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 1.00 - - vucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - - - - 0.50 0.50 1.00 - - vucomiss %xmm0, %xmm1 From 082822b381ad5d8beff16dfe3d227661d7830182 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Mon, 30 May 2022 10:26:49 +0100 Subject: [PATCH 811/908] [AMDGPU][GFX9] Support base+soffset+offset SMEM stores. Reviewed By: dp Differential Revision: https://reviews.llvm.org/D126388 --- llvm/lib/Target/AMDGPU/SMInstructions.td | 34 +++++++++++++------ llvm/test/MC/AMDGPU/gfx9_asm_smem.s | 3 ++ .../MC/Disassembler/AMDGPU/gfx8_dasm_all.txt | 10 ++++++ .../MC/Disassembler/AMDGPU/gfx9_dasm_all.txt | 15 ++++++++ 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index aef940ad8e03bc..a1064e86440660 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -590,7 +590,7 @@ multiclass SM_Real_Loads_vi op, string ps> { } } -class SMEM_Real_Store_vi op, SM_Pseudo ps> : SMEM_Real_vi { +class SMEM_Real_Store_Probe_vi op, SM_Pseudo ps> : SMEM_Real_vi { // encoding bits<7> sdata; @@ -598,21 +598,33 @@ class SMEM_Real_Store_vi op, SM_Pseudo ps> : SMEM_Real_vi { let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); } -multiclass SM_Real_Stores_vi op, string ps, - SM_Store_Pseudo immPs = !cast(ps#_IMM), - SM_Store_Pseudo sgprPs = !cast(ps#_SGPR)> { - def _IMM_vi : SMEM_Real_Store_vi { - let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); - } +class SMEM_Real_Store_vi op, string ps, dag offsets> + : SMEM_Real_Store_Probe_vi (ps)> { + RegisterClass SrcClass = !cast(ps).SrcClass; + RegisterClass BaseClass = !cast(ps).BaseClass; + let InOperandList = !con((ins SrcClass:$sdata, BaseClass:$sbase), + offsets, (ins CPol:$cpol)); +} - def _SGPR_vi : SMEM_Real_Store_vi { - let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); +multiclass SM_Real_Stores_vi op, string ps> { + def _IMM_vi : SMEM_Real_Store_vi ; + def _SGPR_vi : SMEM_Real_Store_vi ; + let IsGFX9Specific = true in { + // The alternative GFX9 SGPR encoding using soffset to encode the + // offset register. Not available in assembler and goes to the GFX9 + // encoding family to avoid conflicts with the primary SGPR variant. + let SOffsetEn = 1, Offset = ?, Subtarget = SIEncodingFamily.GFX9, + AsmVariantName = "NonParsable" in + def _SGPR_alt_gfx9 : SMEM_Real_Store_vi ; + def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi < + op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>; } } multiclass SM_Real_Probe_vi op, string ps> { - def _IMM_vi : SMEM_Real_Store_vi (ps#_IMM)>; - def _SGPR_vi : SMEM_Real_Store_vi (ps#_SGPR)>; + def _IMM_vi : SMEM_Real_Store_Probe_vi (ps#_IMM)>; + def _SGPR_vi : SMEM_Real_Store_Probe_vi (ps#_SGPR)>; } defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">; diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s index 64918dcceb3e5c..66ef4cde4623c2 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s @@ -816,6 +816,9 @@ s_store_dword s1, s[4:5], m0 s_store_dword s1, s[4:5], 0x0 // CHECK: [0x42,0x00,0x42,0xc0,0x00,0x00,0x00,0x00] +s_store_dword s1, s[4:5], s7 offset:0x12345 +// CHECK: [0x42,0x40,0x42,0xc0,0x45,0x23,0x01,0x0e] + s_store_dword s1, s[4:5], s0 glc // CHECK: [0x42,0x00,0x41,0xc0,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt index 5a7fa762763308..10ac16ca3a8def 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt @@ -10264,6 +10264,11 @@ # CHECK: s_store_dword s1, s[4:5], m0 ; encoding: [0x42,0x00,0x40,0xc0,0x7c,0x00,0x00,0x00] 0x42,0x00,0x40,0xc0,0x7c,0x00,0x00,0x00 +# Make sure that raising the GFX9 soffset_en bit doesn't affect GFX8 +# decoding. +# CHECK: s_store_dword s1, s[4:5], m0 ; encoding: [0x42,0x00,0x40,0xc0,0x7c,0x00,0x00,0x00] +0x42,0x40,0x40,0xc0,0x7c,0x00,0x00,0x00 + # CHECK: s_store_dword s101, s[4:5], m0 ; encoding: [0x42,0x19,0x40,0xc0,0x7c,0x00,0x00,0x00] 0x42,0x19,0x40,0xc0,0x7c,0x00,0x00,0x00 @@ -10318,6 +10323,11 @@ # CHECK: s_store_dword s1, s[4:5], 0x7ffff ; encoding: [0x42,0x00,0x42,0xc0,0xff,0xff,0x07,0x00] 0x42,0x00,0x42,0xc0,0xff,0xff,0x07,0x00 +# Make sure that raising the GFX9 soffset_en bit doesn't affect GFX8 +# decoding. +# CHECK: s_store_dword s1, s[4:5], 0x7ffff ; encoding: [0x42,0x00,0x42,0xc0,0xff,0xff,0x07,0x00] +0x42,0x40,0x42,0xc0,0xff,0xff,0x07,0x00 + # CHECK: s_store_dword s1, s[4:5], m0 glc ; encoding: [0x42,0x00,0x41,0xc0,0x7c,0x00,0x00,0x00] 0x42,0x00,0x41,0xc0,0x7c,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt index f5346734706ded..79f6aae6b9e628 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -10547,6 +10547,12 @@ # CHECK: s_store_dword s1, s[4:5], s101 ; encoding: [0x42,0x00,0x40,0xc0,0x65,0x00,0x00,0x00] 0x42,0x00,0x40,0xc0,0x65,0x00,0x00,0x00 +# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1 +# and the offset register encoded in the soffset field with the offset +# field being disregarded. +# CHECK: s_store_dword s5, s[2:3], s64 ; encoding: [0x41,0x41,0x40,0xc0,0x00,0x00,0x00,0x80] +0x41,0x41,0x40,0xc0,0x65,0x00,0x00,0x80 + # CHECK: s_store_dword s1, s[4:5], flat_scratch_lo ; encoding: [0x42,0x00,0x40,0xc0,0x66,0x00,0x00,0x00] 0x42,0x00,0x40,0xc0,0x66,0x00,0x00,0x00 @@ -10565,6 +10571,15 @@ # CHECK: s_store_dword s1, s[4:5], 0x0 ; encoding: [0x42,0x00,0x42,0xc0,0x00,0x00,0x00,0x00] 0x42,0x00,0x42,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_store_dword s1, s[4:5], s7 offset:0x12345 ; encoding: [0x42,0x40,0x42,0xc0,0x45,0x23,0x01,0x0e] +0x42,0x40,0x42,0xc0,0x45,0x23,0x01,0x0e + +# SP3 prefers to decode instructions with imm=1 and soffset_en=1 to the +# form with the 'offset:' modifier, even if the offset is 0. +# CHECK: s_store_dword s5, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x41,0x42,0xc0,0x00,0x00,0x00,0x00] +0x41,0x41,0x42,0xc0,0x00,0x00,0x00,0x00 + + # CHECK: s_store_dword s1, s[4:5], s0 glc ; encoding: [0x42,0x00,0x41,0xc0,0x00,0x00,0x00,0x00] 0x42,0x00,0x41,0xc0,0x00,0x00,0x00,0x00 From b4dbcba3b7b7dbc5b03fd2c349785ce4a6ae6fed Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Mon, 30 May 2022 10:31:59 +0100 Subject: [PATCH 812/908] [AMDGPU][GFX9][NFC] Rename the base class for SMEM stores. --- llvm/lib/Target/AMDGPU/SMInstructions.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index a1064e86440660..1d74a6be1bdde6 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -590,7 +590,7 @@ multiclass SM_Real_Loads_vi op, string ps> { } } -class SMEM_Real_Store_Probe_vi op, SM_Pseudo ps> : SMEM_Real_vi { +class SMEM_Real_Store_Base_vi op, SM_Pseudo ps> : SMEM_Real_vi { // encoding bits<7> sdata; @@ -599,7 +599,7 @@ class SMEM_Real_Store_Probe_vi op, SM_Pseudo ps> : SMEM_Real_vi op, string ps, dag offsets> - : SMEM_Real_Store_Probe_vi (ps)> { + : SMEM_Real_Store_Base_vi (ps)> { RegisterClass SrcClass = !cast(ps).SrcClass; RegisterClass BaseClass = !cast(ps).BaseClass; let InOperandList = !con((ins SrcClass:$sdata, BaseClass:$sbase), @@ -623,8 +623,8 @@ multiclass SM_Real_Stores_vi op, string ps> { } multiclass SM_Real_Probe_vi op, string ps> { - def _IMM_vi : SMEM_Real_Store_Probe_vi (ps#_IMM)>; - def _SGPR_vi : SMEM_Real_Store_Probe_vi (ps#_SGPR)>; + def _IMM_vi : SMEM_Real_Store_Base_vi (ps#_IMM)>; + def _SGPR_vi : SMEM_Real_Store_Base_vi (ps#_SGPR)>; } defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">; From 5cde5a5739069a4be7f86a17bd20cc6e8f2daf68 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 24 May 2022 18:25:57 +0200 Subject: [PATCH 813/908] [mlir] add interchange, pad and scalarize to structured transform dialect Add ops to the structured transform extension of the transform dialect that perform interchange, padding and scalarization on structured ops. Along with tiling that is already defined, this provides a minimal set of transformations necessary to build vectorizable code for a single structured op. Define two helper traits: one that implements TransformOpInterface by applying a function to each payload op independently and another that provides a simple "functional-style" producer/consumer list of memory effects for the transform ops. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D126374 --- .../Linalg/TransformOps/LinalgTransformOps.h | 7 + .../Linalg/TransformOps/LinalgTransformOps.td | 75 ++++++++ .../Transform/IR/TransformInterfaces.h | 166 +++++++++++++++++ .../Transform/IR/TransformInterfaces.td | 9 + .../TransformOps/LinalgTransformOps.cpp | 173 ++++++++++++++++++ .../Linalg/transform-op-interchange.mlir | 60 ++++++ .../test/Dialect/Linalg/transform-op-pad.mlir | 133 ++++++++++++++ .../Linalg/transform-op-scalarize.mlir | 29 +++ .../Dialect/Linalg/transform-op-tile.mlir | 46 +++++ .../Dialect/Linalg/transform-ops-invalid.mlir | 39 ++++ mlir/test/Dialect/Linalg/transform-ops.mlir | 65 +++---- 11 files changed, 762 insertions(+), 40 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/transform-op-interchange.mlir create mode 100644 mlir/test/Dialect/Linalg/transform-op-pad.mlir create mode 100644 mlir/test/Dialect/Linalg/transform-op-scalarize.mlir create mode 100644 mlir/test/Dialect/Linalg/transform-op-tile.mlir create mode 100644 mlir/test/Dialect/Linalg/transform-ops-invalid.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h index 9213e0ddef7229..ddd9aa2aef0dd9 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h @@ -9,9 +9,16 @@ #ifndef MLIR_DIALECT_LINALG_TRANSFORMOPS_LINALGTRANSFORMOPS_H #define MLIR_DIALECT_LINALG_TRANSFORMOPS_LINALGTRANSFORMOPS_H +#include "mlir/Dialect/PDL/IR/PDLTypes.h" #include "mlir/Dialect/Transform/IR/TransformInterfaces.h" #include "mlir/IR/OpImplementation.h" +namespace mlir { +namespace linalg { +class LinalgOp; +} // namespace linalg +} // namespace mlir + //===----------------------------------------------------------------------===// // Linalg Transform Operations //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 34a5e9f7140a4b..3557f3323779bc 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -16,6 +16,81 @@ include "mlir/Dialect/PDL/IR/PDLTypes.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/IR/OpBase.td" +def InterchangeOp : Op { + let description = [{ + Interchanges the iterators of the operations pointed to by the target handle + using the iterator interchange attribute. + }]; + + let arguments = + (ins PDL_Operation:$target, + DefaultValuedAttr:$iterator_interchange); + let results = (outs PDL_Operation:$transformed); + + let assemblyFormat = "$target attr-dict"; + let hasVerifier = 1; + + let extraClassDeclaration = [{ + ::mlir::FailureOr<::mlir::linalg::LinalgOp> applyToOne( + ::mlir::linalg::LinalgOp target); + }]; +} + +def PadOp : Op { + let description = [{ + Pads the operations pointed to by the target handle using the options + provides as operation attributes. + }]; + + let arguments = + (ins PDL_Operation:$target, + DefaultValuedAttr:$padding_values, + DefaultValuedAttr:$padding_dimensions, + DefaultValuedAttr:$pack_paddings, + DefaultValuedAttr:$hoist_paddings, + DefaultValuedAttr< + TypedArrayAttrBase, + "{}">:$transpose_paddings); + let results = (outs PDL_Operation:$transformed); + + let assemblyFormat = "$target attr-dict"; + let hasVerifier = 1; + + let extraClassDeclaration = [{ + ::mlir::FailureOr<::mlir::linalg::LinalgOp> applyToOne( + ::mlir::linalg::LinalgOp target); + }]; +} + +def ScalarizeOp : Op { + let description = [{ + Indicates that ops of a specific kind in the given function should be + scalarized (i.e. their dynamic dimensions tiled by 1). + + This operation returns the tiled op but not the loops. + + We make this design choice because it is hard to know ahead of time the + number of loops that will be produced (it depends on the number of dynamic + dimensions after multiple transformations have been applied). + }]; + + let arguments = (ins PDL_Operation:$target); + let results = (outs PDL_Operation:$result); + + let assemblyFormat = "$target attr-dict"; + + let extraClassDeclaration = [{ + ::mlir::FailureOr<::mlir::linalg::LinalgOp> applyToOne( + ::mlir::linalg::LinalgOp target); + }]; +} + def TileOp : Op, DeclareOpInterfaceMethods]> { diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h b/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h index d029c214b49e2a..d08267c72c4dfe 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h @@ -397,6 +397,31 @@ class PossibleTopLevelTransformOpTrait } }; +/// Trait implementing the TransformOpInterface for operations applying a +/// transformation to a single operation handle and producing a single operation +/// handle. The op must implement a method with one of the following signatures: +/// - FailureOr applyToOne(OpTy) +/// - LogicalResult applyToOne(OpTy) +/// to perform a transformation that is applied in turn to all payload IR +/// operations that correspond to the handle of the transform IR operation. +/// In the functions above, OpTy is either Operation * or a concrete payload IR +/// Op class that the transformation is applied to (NOT the class of the +/// transform IR op). The op is expected to have one operand and zero or one +/// results. +template +class TransformEachOpTrait + : public OpTrait::TraitBase { +public: + /// Calls `applyToOne` for every payload operation associated with the operand + /// of this transform IR op. If `applyToOne` returns ops, associates them with + /// the result of this transform op. + LogicalResult apply(TransformResults &transformResults, + TransformState &state); + + /// Checks that the op matches the expectations of this trait. + static LogicalResult verifyTrait(Operation *op); +}; + /// Side effect resource corresponding to the mapping between Transform IR /// values and Payload IR operations. An Allocate effect from this resource /// means creating a new mapping entry, it is always accompanied by a Write @@ -426,9 +451,150 @@ struct PayloadIRResource StringRef getName() override { return "transform.payload_ir"; } }; +/// Trait implementing the MemoryEffectOpInterface for single-operand +/// single-result operations that "consume" their operand and produce a new +/// result. +template +class FunctionalStyleTransformOpTrait + : public OpTrait::TraitBase { +public: + /// This op "consumes" the operand by reading and freeing it, "produces" the + /// result by allocating and writing it and reads/writes the payload IR in the + /// process. + void getEffects(SmallVectorImpl &effects) { + effects.emplace_back(MemoryEffects::Read::get(), + this->getOperation()->getOperand(0), + TransformMappingResource::get()); + effects.emplace_back(MemoryEffects::Free::get(), + this->getOperation()->getOperand(0), + TransformMappingResource::get()); + effects.emplace_back(MemoryEffects::Allocate::get(), + this->getOperation()->getResult(0), + TransformMappingResource::get()); + effects.emplace_back(MemoryEffects::Write::get(), + this->getOperation()->getResult(0), + TransformMappingResource::get()); + effects.emplace_back(MemoryEffects::Read::get(), PayloadIRResource::get()); + effects.emplace_back(MemoryEffects::Write::get(), PayloadIRResource::get()); + } + + /// Checks that the op matches the expectations of this trait. + static LogicalResult verifyTrait(Operation *op) { + static_assert(OpTy::template hasTrait(), + "expected single-operand op"); + static_assert(OpTy::template hasTrait(), + "expected single-result op"); + if (!op->getName().getInterface()) { + op->emitError() + << "FunctionalStyleTransformOpTrait should only be attached to ops " + "that implement MemoryEffectOpInterface"; + } + return success(); + } +}; + } // namespace transform } // namespace mlir #include "mlir/Dialect/Transform/IR/TransformInterfaces.h.inc" +namespace mlir { +namespace transform { +namespace detail { +/// Appends `result` to the vector assuming it corresponds to the success state +/// in `FailureOr`. If `result` is just a +/// `LogicalResult`, does nothing. +template +std::enable_if_t::value, LogicalResult> +appendTransformResultToVector(Ty result, + SmallVectorImpl &results) { + return result; +} +template +std::enable_if_t::value, LogicalResult> +appendTransformResultToVector(Ty result, + SmallVectorImpl &results) { + static_assert( + std::is_convertible::value, + "expected transform function to return operations"); + if (failed(result)) + return failure(); + + results.push_back(*result); + return success(); +} + +/// Applies a one-to-one transform to each of the given targets. Puts the +/// results of transforms, if any, in `results` in the same order. Fails if any +/// of the application fails. Individual transforms must be callable with +/// one of the following signatures: +/// - FailureOr(OpTy) +/// - LogicalResult(OpTy) +/// where OpTy is either +/// - Operation *, in which case the transform is always applied; +/// - a concrete Op class, in which case a check is performed whether +/// `targets` contains operations of the same class and a failure is reported +/// if it does not. +template +LogicalResult applyTransformToEach(ArrayRef targets, + SmallVectorImpl &results, + FnTy transform) { + using OpTy = typename llvm::function_traits::template arg_t<0>; + static_assert(std::is_convertible::value, + "expected transform function to take an operation"); + using RetTy = typename llvm::function_traits::result_t; + static_assert(std::is_convertible::value, + "expected transform function to return LogicalResult or " + "FailureOr"); + for (Operation *target : targets) { + auto specificOp = dyn_cast(target); + if (!specificOp) + return failure(); + + auto result = transform(specificOp); + if (failed(appendTransformResultToVector(result, results))) + return failure(); + } + return success(); +} +} // namespace detail +} // namespace transform +} // namespace mlir + +template +mlir::LogicalResult mlir::transform::TransformEachOpTrait::apply( + TransformResults &transformResults, TransformState &state) { + using TransformOpType = typename llvm::function_traits< + decltype(&OpTy::applyToOne)>::template arg_t<0>; + ArrayRef targets = + state.getPayloadOps(this->getOperation()->getOperand(0)); + SmallVector results; + if (failed(detail::applyTransformToEach( + targets, results, [&](TransformOpType specificOp) { + return static_cast(this)->applyToOne(specificOp); + }))) + return failure(); + if (OpTy::template hasTrait()) { + transformResults.set( + this->getOperation()->getResult(0).template cast(), results); + } + return success(); +} + +template +mlir::LogicalResult +mlir::transform::TransformEachOpTrait::verifyTrait(Operation *op) { + static_assert(OpTy::template hasTrait(), + "expected single-operand op"); + static_assert(OpTy::template hasTrait() || + OpTy::template hasTrait(), + "expected zero- or single-result op"); + if (!op->getName().getInterface()) { + return op->emitError() << "TransformEachOpTrait should only be attached to " + "ops that implement TransformOpInterface"; + } + + return success(); +} + #endif // DIALECT_TRANSFORM_IR_TRANSFORMINTERFACES_H diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.td b/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.td index 5b8d4202f0b4d1..fad845cc7f51d8 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.td @@ -49,4 +49,13 @@ def TransformOpInterface : OpInterface<"TransformOpInterface"> { ]; } +def FunctionalStyleTransformOpTrait + : NativeOpTrait<"FunctionalStyleTransformOpTrait"> { + let cppNamespace = "::mlir::transform"; +} + +def TransformEachOpTrait : NativeOpTrait<"TransformEachOpTrait"> { + let cppNamespace = "::mlir::transform"; +} + #endif // MLIR_DIALECT_TRANSFORM_IR_TRANSFORM_INTERFACES_TD diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 89c5815c62956d..689157d01a837f 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -49,6 +49,179 @@ class SimpleRewriter : public PatternRewriter { }; } // namespace +//===----------------------------------------------------------------------===// +// InterchangeOp +//===----------------------------------------------------------------------===// + +FailureOr transform::InterchangeOp::applyToOne(LinalgOp target) { + SmallVector interchangeVector = + extractUIntArray(getIteratorInterchange()); + // Exit early if no transformation is needed. + if (interchangeVector.empty()) + return target; + + auto genericTarget = dyn_cast(target.getOperation()); + if (!genericTarget) { + InFlightDiagnostic diag = emitOpError() + << "applies to " << GenericOp::getOperationName() + << " ops"; + diag.attachNote(target.getLoc()) << "attempted to apply to this op"; + return diag; + } + + GenericOpInterchangePattern pattern(getContext(), interchangeVector); + SimpleRewriter rewriter(getContext()); + rewriter.setInsertionPoint(target); + FailureOr result = + pattern.returningMatchAndRewrite(genericTarget, rewriter); + if (failed(result)) + return failure(); + + return cast(result->getOperation()); +} + +LogicalResult transform::InterchangeOp::verify() { + SmallVector permutation = + extractUIntArray(getIteratorInterchange()); + auto sequence = llvm::to_vector(llvm::seq(0, permutation.size())); + if (!std::is_permutation(sequence.begin(), sequence.end(), + permutation.begin(), permutation.end())) { + return emitOpError() + << "expects iterator_interchange to be a permutation, found " + << getIteratorInterchange(); + } + return success(); +} + +//===---------------------------------------------------------------------===// +// PadOp +//===---------------------------------------------------------------------===// + +FailureOr transform::PadOp::applyToOne(LinalgOp target) { + // Convert the integer packing flags to booleans. + SmallVector packPaddings; + for (int64_t packPadding : extractI64Array(getPackPaddings())) + packPaddings.push_back(static_cast(packPadding)); + + // Convert the padding values to attributes. + SmallVector paddingValues; + for (auto const &it : + llvm::zip(getPaddingValues(), target->getOperandTypes())) { + Attribute attr = std::get<0>(it); + Type elementType = getElementTypeOrSelf(std::get<1>(it)); + // Try to parse string attributes to obtain an attribute of element type. + if (auto stringAttr = attr.dyn_cast()) { + paddingValues.push_back( + parseAttribute(attr.cast(), elementType)); + if (!paddingValues.back()) { + InFlightDiagnostic diag = emitOpError() + << "expects a padding value that parses to " + << elementType << ", got " << std::get<0>(it); + diag.attachNote(target.getLoc()) << "when applied to this op"; + return diag; + } + continue; + } + // Otherwise, add the attribute directly. + if (attr.getType() != elementType) { + InFlightDiagnostic diag = emitOpError() + << "expects a padding value of type " + << elementType << ", got " << attr; + diag.attachNote(target.getLoc()) << "when applied to this op"; + return diag; + } + paddingValues.push_back(attr); + } + + // Extract the transpose vectors. + SmallVector> transposePaddings; + for (Attribute transposeVector : getTransposePaddings().cast()) + transposePaddings.push_back( + extractI64Array(transposeVector.cast())); + + LinalgPaddingOptions paddingOptions; + paddingOptions.setPaddingValues(paddingValues); + paddingOptions.setPaddingDimensions(extractI64Array(getPaddingDimensions())); + paddingOptions.setPackPaddings(packPaddings); + paddingOptions.setHoistPaddings(extractI64Array(getHoistPaddings())); + paddingOptions.setTransposePaddings(transposePaddings); + + LinalgPaddingPattern pattern(getContext(), paddingOptions); + SimpleRewriter rewriter(getContext()); + rewriter.setInsertionPoint(target); + FailureOr patternResult = + pattern.returningMatchAndRewrite(target, rewriter); + if (failed(patternResult)) { + InFlightDiagnostic diag = emitError() + << "failed to apply pattern to target op"; + diag.attachNote(target.getLoc()) << "target op"; + return diag; + } + return patternResult; +} + +LogicalResult transform::PadOp::verify() { + SmallVector packPaddings = extractI64Array(getPackPaddings()); + if (any_of(packPaddings, [](int64_t packPadding) { + return packPadding != 0 && packPadding != 1; + })) { + return emitOpError() + << "expects pack_paddings to contain booleans (0/1), found " + << getPackPaddings(); + } + + SmallVector paddingDimensions = + extractI64Array(getPaddingDimensions()); + if (any_of(paddingDimensions, + [](int64_t paddingDimension) { return paddingDimension < 0; })) { + return emitOpError() + << "expects padding_dimensions to contain positive integers, found " + << getPaddingDimensions(); + } + + SmallVector hoistPaddings = extractI64Array(getHoistPaddings()); + if (any_of(hoistPaddings, + [](int64_t hoistPadding) { return hoistPadding < 0; })) { + return emitOpError() + << "expects hoist_paddings to contain positive integers, found " + << getHoistPaddings(); + } + + ArrayAttr transposes = getTransposePaddings(); + for (Attribute attr : transposes) { + SmallVector transpose = extractFromI64ArrayAttr(attr); + auto sequence = llvm::to_vector(llvm::seq(0, transpose.size())); + if (!std::is_permutation(sequence.begin(), sequence.end(), + transpose.begin(), transpose.end())) { + return emitOpError() + << "expects transpose_paddings to be a permutation, found " + << attr; + } + } + return success(); +} + +//===----------------------------------------------------------------------===// +// ScalarizeOp +//===----------------------------------------------------------------------===// + +FailureOr transform::ScalarizeOp::applyToOne(LinalgOp target) { + LinalgTilingOptions tilingOptions; + tilingOptions.scalarizeDynamicDims(); + // Tiling with "scalarize_dyn_dims" actually sets the same lambda as the tile + // sizes and asserts that it is not already set. + SmallVector emptyTileSizes; + LinalgTilingPattern pattern(getContext(), tilingOptions); + SimpleRewriter rewriter(getContext()); + rewriter.setInsertionPoint(target); + FailureOr result = + pattern.returningMatchAndRewrite(target, rewriter); + if (failed(result)) + return failure(); + + return result->op; +} + //===----------------------------------------------------------------------===// // TileOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir new file mode 100644 index 00000000000000..cb8badec200499 --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir @@ -0,0 +1,60 @@ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file -verify-diagnostics | FileCheck %s + +// CHECK: #[[$MAP:.*]] = affine_map<(d0, d1) -> (d1, d0)> + +// CHECK-LABEL: @interchange_generic +func.func @interchange_generic(%arg0: tensor, %arg1: tensor) -> tensor { + + // CHECK: linalg.generic + // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]] + %0 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%arg0 : tensor) outs(%arg1 : tensor) { + ^bb0(%arg2: f32, %arg3: f32): + %1 = math.exp %arg2 : f32 + linalg.yield %1 : f32 + } -> tensor + return %0 : tensor +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @match_generic : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.generic"(%args : !pdl.range) -> (%results : !pdl.range) + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @match_generic in %arg1 + transform.structured.interchange %0 { iterator_interchange = [1, 0]} + } +} + +// ----- + +func.func @interchange_matmul(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + // expected-note @below {{attempted to apply to this op}} + %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) outs(%arg2 : tensor) -> tensor + return %0 : tensor +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @match_generic : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @match_generic in %arg1 + // expected-error @below {{applies to linalg.generic ops}} + transform.structured.interchange %0 { iterator_interchange = [1, 0]} + } +} diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir new file mode 100644 index 00000000000000..2a9025aa1f6afc --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir @@ -0,0 +1,133 @@ +// RUN: mlir-opt --test-transform-dialect-interpreter -split-input-file -verify-diagnostics %s | FileCheck %s + +#map = affine_map<()[s0] -> (-s0 + 12, 7)> + +// CHECK-LABEL: @static_sizes_output_divisible +func.func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { + %0 = affine.min #map()[%iv2] + + // CHECK: %[[T0:.*]] = tensor.extract_slice % + // CHECK: %[[T1:.*]] = tensor.extract_slice % + // CHECK: %[[T2:.*]] = tensor.extract_slice % + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // CHECK-DAG: %[[CST:.*]] = arith.constant 0. + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + + // CHECK: %[[T3:.*]] = tensor.pad %[[T0]] nofold + // CHECK: tensor.yield %[[CST]] + // CHECK: %[[T4:.*]] = tensor.pad %[[T1]] nofold + + // CHECK: %[[T5:.*]] = linalg.matmul + // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) + // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + // TODO: we don't want this, but it is the required terminator for pdl.pattern + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + %1 = transform.structured.pad %0 {padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 0]} + } +} + +// ----- + +func.func @pad(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { + // expected-note @below {{when applied to this op}} + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + // TODO: we don't want this, but it is the required terminator for pdl.pattern + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + // expected-error @below {{op expects a padding value of type 'f32', got 0 : i32}} + %1 = transform.structured.pad %0 {padding_values=[0: i32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 0]} + } +} + +// ----- + +func.func @pad(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { + // expected-note @below {{when applied to this op}} + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + // TODO: we don't want this, but it is the required terminator for pdl.pattern + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + // expected-error @below {{expects a padding value that parses to 'f32', got "foo"}} + %1 = transform.structured.pad %0 {padding_values=["foo", 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 0]} + } +} + +// ----- + +func.func @pad(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { + // expected-note @below {{target op}} + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + // TODO: we don't want this, but it is the required terminator for pdl.pattern + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + // expected-error @below {{failed to apply pattern to target op}} + %1 = transform.structured.pad %0 {padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 0]} + } +} diff --git a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir new file mode 100644 index 00000000000000..ab25777adeefae --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir @@ -0,0 +1,29 @@ +// RUN: mlir-opt -test-transform-dialect-interpreter %s | FileCheck %s + +func.func @scalarize(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { + // The op is first tiled by 10 in the first dimension, which creates a + // dynamic size, and then scalarized, which brings the dimension to static 1. + // CHECK: linalg.matmul ins(%{{.*}}, %{{.*}} : tensor<1x12 + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + // TODO: we don't want this, but it is the required terminator for pdl.pattern + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + %1, %loops = transform.structured.tile %0 {sizes = [10, 0, 0]} + %2 = transform.structured.scalarize %1 + } +} diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir new file mode 100644 index 00000000000000..a5310944357d3f --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir @@ -0,0 +1,46 @@ +// RUN: mlir-opt --test-transform-dialect-interpreter %s | FileCheck %s + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + sequence %arg0 { + ^bb0(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + %1, %loops:3 = transform.structured.tile %0 {sizes = [4, 4, 4]} + } + + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + rewrite %0 with "transform.dialect" + } +} + +// CHECK-LABEL: func @tile_linalg_matmul( +// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor<128x128xf32> +// CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor<128x128xf32> +// CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor<128x128xf32> +// CHECK-SAME: -> tensor<128x128xf32> { +func.func @tile_linalg_matmul( + %arg0: tensor<128x128xf32>, %arg1: tensor<128x128xf32>, %arg2: tensor<128x128xf32>) + -> tensor<128x128xf32> { +// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<128x128xf32>) { +// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<128x128xf32>) { +// CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<128x128xf32>) { +// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> +// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> +// CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> +// CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<4x4xf32>, tensor<4x4xf32>) +// CHECK-SAME: outs(%[[sTC]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor<4x4xf32> into tensor<128x128xf32> +// CHECK: scf.yield %[[TD]] : tensor<128x128xf32> +// CHECK: scf.yield %[[TD2]] : tensor<128x128xf32> +// CHECK: scf.yield %[[TD1]] : tensor<128x128xf32> + %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) + outs(%arg2: tensor<128x128xf32>) + -> tensor<128x128xf32> + +// CHECK: return %[[TD0]] : tensor<128x128xf32> + return %0 : tensor<128x128xf32> +} + diff --git a/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir b/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir new file mode 100644 index 00000000000000..9b80b2300f771b --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir @@ -0,0 +1,39 @@ +// RUN: mlir-opt %s --split-input-file --verify-diagnostics + +transform.sequence { +^bb0(%arg0: !pdl.operation): + // expected-error@below {{expects iterator_interchange to be a permutation, found [1, 1]}} + transform.structured.interchange %arg0 {iterator_interchange = [1, 1]} +} + +// ----- + +transform.sequence { +^bb0(%arg0: !pdl.operation): + // expected-error@below {{expects padding_dimensions to contain positive integers, found [1, -7]}} + transform.structured.pad %arg0 {padding_dimensions=[1, -7]} +} + +// ----- + +transform.sequence { +^bb0(%arg0: !pdl.operation): + // expected-error@below {{expects pack_paddings to contain booleans (0/1), found [1, 7]}} + transform.structured.pad %arg0 {pack_paddings=[1, 7]} +} + +// ----- + +transform.sequence { +^bb0(%arg0: !pdl.operation): + // expected-error@below {{expects hoist_paddings to contain positive integers, found [1, -7]}} + transform.structured.pad %arg0 {hoist_paddings=[1, -7]} +} + +// ----- + +transform.sequence { +^bb0(%arg0: !pdl.operation): + // expected-error@below {{expects transpose_paddings to be a permutation, found [1, 1]}} + transform.structured.pad %arg0 {transpose_paddings=[[1, 1]]} +} diff --git a/mlir/test/Dialect/Linalg/transform-ops.mlir b/mlir/test/Dialect/Linalg/transform-ops.mlir index a5310944357d3f..ae01f3d571d3b7 100644 --- a/mlir/test/Dialect/Linalg/transform-ops.mlir +++ b/mlir/test/Dialect/Linalg/transform-ops.mlir @@ -1,46 +1,31 @@ -// RUN: mlir-opt --test-transform-dialect-interpreter %s | FileCheck %s +// RUN: mlir-opt %s | mlir-opt | FileCheck %s -transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): - sequence %arg0 { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @pdl_target in %arg1 - %1, %loops:3 = transform.structured.tile %0 {sizes = [4, 4, 4]} - } - - pdl.pattern @pdl_target : benefit(1) { - %args = operands - %results = types - %0 = operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) - rewrite %0 with "transform.dialect" - } +transform.sequence { +^bb1(%arg0: !pdl.operation): + // CHECK %{{.*}}, %{{.*}}:2 = transform.structured.tile + %0, %1:2 = transform.structured.tile %arg0 { sizes = [2, 0, 3] } } -// CHECK-LABEL: func @tile_linalg_matmul( -// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor<128x128xf32> -// CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor<128x128xf32> -// CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor<128x128xf32> -// CHECK-SAME: -> tensor<128x128xf32> { -func.func @tile_linalg_matmul( - %arg0: tensor<128x128xf32>, %arg1: tensor<128x128xf32>, %arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> { -// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<128x128xf32>) { -// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<128x128xf32>) { -// CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<128x128xf32>) { -// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> -// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> -// CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> -// CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<4x4xf32>, tensor<4x4xf32>) -// CHECK-SAME: outs(%[[sTC]] : tensor<4x4xf32>) -> tensor<4x4xf32> -// CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor<4x4xf32> into tensor<128x128xf32> -// CHECK: scf.yield %[[TD]] : tensor<128x128xf32> -// CHECK: scf.yield %[[TD2]] : tensor<128x128xf32> -// CHECK: scf.yield %[[TD1]] : tensor<128x128xf32> - %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) - -> tensor<128x128xf32> +//===----------------------------------------------------------------------===// +// Check that operations are registered correctly through the extension +// mechanism. Their syntax is generated and requries no additional testing since +// we test the generator. +//===----------------------------------------------------------------------===// + +transform.sequence { +^bb1(%arg0: !pdl.operation): + // CHECK: transform.structured.pad + %0 = transform.structured.pad %arg0 +} -// CHECK: return %[[TD0]] : tensor<128x128xf32> - return %0 : tensor<128x128xf32> +transform.sequence { +^bb1(%arg0: !pdl.operation): + // CHECK: transform.structured.interchange + %0 = transform.structured.interchange %arg0 } +transform.sequence { +^bb1(%arg0: !pdl.operation): + // CHECK: transform.structured.scalarize + %0 = transform.structured.scalarize %arg0 +} From 99b007806420f8fc5cc9db7f4b71ba511fedb686 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 30 May 2022 10:47:44 +0100 Subject: [PATCH 814/908] [AArch64] Tests for showing MachineCombiner COPY patterns. NFC --- .../CodeGen/AArch64/machine-combiner-copy.ll | 111 ++++++++++++++++++ .../AArch64/machine-combiner-fmul-dup.mir | 72 ++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/machine-combiner-copy.ll diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll new file mode 100644 index 00000000000000..27767435eec2ce --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -mattr=+fullfp16 -O3 | FileCheck %s + +define void @fma_dup_f16(ptr noalias nocapture noundef readonly %A, half noundef %B, ptr noalias nocapture noundef %C, i32 noundef %n) { +; CHECK-LABEL: fma_dup_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-NEXT: cbz w2, .LBB0_8 +; CHECK-NEXT: // %bb.1: // %for.body.preheader +; CHECK-NEXT: mov w8, w2 +; CHECK-NEXT: cmp w2, #15 +; CHECK-NEXT: b.hi .LBB0_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov x9, xzr +; CHECK-NEXT: b .LBB0_6 +; CHECK-NEXT: .LBB0_3: // %vector.ph +; CHECK-NEXT: and x9, x8, #0xfffffff0 +; CHECK-NEXT: add x10, x1, #16 +; CHECK-NEXT: add x11, x0, #16 +; CHECK-NEXT: mov x12, x9 +; CHECK-NEXT: dup v1.8h, v0.h[0] +; CHECK-NEXT: .LBB0_4: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldp q2, q3, [x11, #-16] +; CHECK-NEXT: subs x12, x12, #16 +; CHECK-NEXT: add x11, x11, #32 +; CHECK-NEXT: ldp q4, q5, [x10, #-16] +; CHECK-NEXT: fmla v4.8h, v2.8h, v1.8h +; CHECK-NEXT: fmla v5.8h, v3.8h, v0.h[0] +; CHECK-NEXT: stp q4, q5, [x10, #-16] +; CHECK-NEXT: add x10, x10, #32 +; CHECK-NEXT: b.ne .LBB0_4 +; CHECK-NEXT: // %bb.5: // %middle.block +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.eq .LBB0_8 +; CHECK-NEXT: .LBB0_6: // %for.body.preheader1 +; CHECK-NEXT: lsl x10, x9, #1 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: add x9, x1, x10 +; CHECK-NEXT: add x10, x0, x10 +; CHECK-NEXT: .LBB0_7: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr h1, [x10], #2 +; CHECK-NEXT: ldr h2, [x9] +; CHECK-NEXT: subs x8, x8, #1 +; CHECK-NEXT: fmadd h1, h1, h0, h2 +; CHECK-NEXT: str h1, [x9], #2 +; CHECK-NEXT: b.ne .LBB0_7 +; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %cmp6.not = icmp eq i32 %n, 0 + br i1 %cmp6.not, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + %min.iters.check = icmp ult i32 %n, 16 + br i1 %min.iters.check, label %for.body.preheader14, label %vector.ph + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i64 %wide.trip.count, 4294967280 + %broadcast.splatinsert = insertelement <8 x half> poison, half %B, i64 0 + %broadcast.splat = shufflevector <8 x half> %broadcast.splatinsert, <8 x half> poison, <8 x i32> zeroinitializer + %broadcast.splatinsert10 = insertelement <8 x half> poison, half %B, i64 0 + %broadcast.splat11 = shufflevector <8 x half> %broadcast.splatinsert10, <8 x half> poison, <8 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds half, ptr %A, i64 %index + %wide.load = load <8 x half>, ptr %0, align 2 + %1 = getelementptr inbounds half, ptr %0, i64 8 + %wide.load9 = load <8 x half>, ptr %1, align 2 + %2 = fmul fast <8 x half> %wide.load, %broadcast.splat + %3 = fmul fast <8 x half> %wide.load9, %broadcast.splat11 + %4 = getelementptr inbounds half, ptr %C, i64 %index + %wide.load12 = load <8 x half>, ptr %4, align 2 + %5 = getelementptr inbounds half, ptr %4, i64 8 + %wide.load13 = load <8 x half>, ptr %5, align 2 + %6 = fadd fast <8 x half> %wide.load12, %2 + %7 = fadd fast <8 x half> %wide.load13, %3 + store <8 x half> %6, ptr %4, align 2 + store <8 x half> %7, ptr %5, align 2 + %index.next = add nuw i64 %index, 16 + %8 = icmp eq i64 %index.next, %n.vec + br i1 %8, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %n.vec, %wide.trip.count + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader14 + +for.body.preheader14: ; preds = %for.body.preheader, %middle.block + %indvars.iv.ph = phi i64 [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void + +for.body: ; preds = %for.body.preheader14, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader14 ] + %arrayidx = getelementptr inbounds half, ptr %A, i64 %indvars.iv + %9 = load half, ptr %arrayidx, align 2 + %mul = fmul fast half %9, %B + %arrayidx2 = getelementptr inbounds half, ptr %C, i64 %indvars.iv + %10 = load half, ptr %arrayidx2, align 2 + %add = fadd fast half %10, %mul + store half %add, ptr %arrayidx2, align 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir index 4de93fca36f123..a2a28419a872f8 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir @@ -95,6 +95,10 @@ br label %for.cond } + define void @extracopy(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad, <2 x float>* %ret) #0 { + unreachable + } + attributes #0 = { "target-cpu"="cortex-a57" } ... @@ -545,3 +549,71 @@ body: | B %bb.1 ... +--- +name: extracopy +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr64 } + - { id: 1, class: fpr64 } + - { id: 2, class: fpr64 } + - { id: 3, class: fpr64 } + - { id: 4, class: gpr64common } + - { id: 5, class: fpr64 } + - { id: 6, class: fpr64 } + - { id: 7, class: fpr128 } + - { id: 8, class: fpr128 } + - { id: 9, class: fpr64 } + - { id: 10, class: fpr64 } + - { id: 11, class: fpr64 } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } + - { reg: '$d2', virtual-reg: '%3' } + - { reg: '$x0', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: extracopy + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $d0, $d1, $d2, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY3]], %subreg.dsub + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr64 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr64 = COPY [[COPY2]] + ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane killed [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr64 = COPY [[DUPv2i32lane]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: [[FMULv2f32_:%[0-9]+]]:fpr64 = FMULv2f32 [[COPY5]], [[COPY6]] + ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2f32_]], [[COPY4]] + ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64), align 16) + ; CHECK-NEXT: B %bb.1 + bb.0: + liveins: $d0, $d1, $d2, $x0 + + %4:gpr64common = COPY $x0 + %3:fpr64 = COPY $d2 + %2:fpr64 = COPY $d1 + %1:fpr64 = COPY $d0 + %8:fpr128 = IMPLICIT_DEF + %7:fpr128 = INSERT_SUBREG %8, %1, %subreg.dsub + %6:fpr64 = COPY %3 + %5:fpr64 = COPY %2 + %11:fpr64 = DUPv2i32lane killed %7, 0 + %0:fpr64 = COPY %11 + + bb.1: + %9:fpr64 = FMULv2f32 %5, %0 + %10:fpr64 = FADDv2f32 killed %9, %6 + STRDui killed %10, %4, 0 :: (store 8, align 16) + B %bb.1 + +... From a5cf17f8ae752f1b62b40b907bfee4faa3600b21 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 30 May 2022 10:48:49 +0100 Subject: [PATCH 815/908] [OpenCL] Expose wg collective functions for CL3 SPIR targets Since the SPIR/SPIR-V targets enable all known features, we must ensure the Work-group Collective Functions feature macro is set for OpenCL 3.0. Fixes https://github.com/llvm/llvm-project/issues/55770 --- clang/lib/Headers/opencl-c-base.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index 20048d97bc7448..c433b4f7eb1af0 100644 --- a/clang/lib/Headers/opencl-c-base.h +++ b/clang/lib/Headers/opencl-c-base.h @@ -68,6 +68,7 @@ #if (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300) // For the SPIR and SPIR-V target all features are supported. #if defined(__SPIR__) || defined(__SPIRV__) +#define __opencl_c_work_group_collective_functions 1 #define __opencl_c_atomic_order_seq_cst 1 #define __opencl_c_atomic_scope_device 1 #define __opencl_c_atomic_scope_all_devices 1 From d245974e1a7809f40d994548b46b4694f247fc99 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Mon, 30 May 2022 10:18:22 +0100 Subject: [PATCH 816/908] Test stackmap support for floating point types. It appears that float support is complete, or at least, the stackmap records emitted are not inconceivable (I must admit that I don't know about many of the architectures under test here). One curiosity, the SystemZ tests highlight an undocumented (or maybe incorrect) quirk of the stackmap format: in the case of a Register record, the Offset or SmallConstant field can encode a sub-register index! I've only ever seen this field zero for Register entries up until now. --- llvm/test/CodeGen/AArch64/arm64-stackmap.ll | 61 +++++++++++++++++++- llvm/test/CodeGen/AArch64/stackmap.ll | 61 +++++++++++++++++++- llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll | 64 ++++++++++++++++++++- llvm/test/CodeGen/SystemZ/stackmap.ll | 61 +++++++++++++++++++- llvm/test/CodeGen/X86/stackmap.ll | 61 +++++++++++++++++++- 5 files changed, 298 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-stackmap.ll b/llvm/test/CodeGen/AArch64/arm64-stackmap.ll index 59e2b1cb3473cf..bc4ea434f31e2b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-stackmap.ll +++ b/llvm/test/CodeGen/AArch64/arm64-stackmap.ll @@ -14,11 +14,11 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 0 ; Num Functions -; CHECK-NEXT: .long 11 +; CHECK-NEXT: .long 12 ; Num LargeConstants ; CHECK-NEXT: .long 3 ; Num Callsites -; CHECK-NEXT: .long 11 +; CHECK-NEXT: .long 12 ; Functions and stack size ; CHECK-NEXT: .quad _constantargs @@ -54,6 +54,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-NEXT: .quad _clobberLR ; CHECK-NEXT: .quad 112 ; CHECK-NEXT: .quad 1 +; CHECK-NEXT: .quad _floats +; CHECK-NEXT: .quad 32 +; CHECK-NEXT: .quad 1 ; Num LargeConstants ; CHECK-NEXT: .quad 4294967295 @@ -348,6 +351,60 @@ define void @clobberLR(i32 %a) { ret void } +; CHECK-LABEL: .long L{{.*}}-_floats +; CHECK-NEXT: .short 0 +; Num Locations +; CHECK-NEXT: .short 6 +; Loc 0: constant float stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 4 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 0: constant double stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 1: float value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 4 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 2: double value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 3: float on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long -{{.*}} +; Loc 4: double on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long -{{.*}} +define void @floats(float %f, double %g) { + %ff = alloca float + %gg = alloca double + call void (i64, i32, ...) @llvm.experimental.stackmap(i64 888, i32 0, float 1.25, + double 1.5, float %f, double %g, float* %ff, double* %gg) + ret void +} + declare void @llvm.experimental.stackmap(i64, i32, ...) declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/llvm/test/CodeGen/AArch64/stackmap.ll b/llvm/test/CodeGen/AArch64/stackmap.ll index 601e3f7dbb6bfa..6780229aff1937 100644 --- a/llvm/test/CodeGen/AArch64/stackmap.ll +++ b/llvm/test/CodeGen/AArch64/stackmap.ll @@ -9,11 +9,11 @@ ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .hword 0 ; Num Functions -; CHECK-NEXT: .word 14 +; CHECK-NEXT: .word 15 ; Num LargeConstants ; CHECK-NEXT: .word 4 ; Num Callsites -; CHECK-NEXT: .word 18 +; CHECK-NEXT: .word 19 ; Functions and stack size ; CHECK-NEXT: .xword constantargs @@ -58,6 +58,9 @@ ; CHECK-NEXT: .xword needsStackRealignment ; CHECK-NEXT: .xword -1 ; CHECK-NEXT: .xword 1 +; CHECK-NEXT: .xword floats +; CHECK-NEXT: .xword 32 +; CHECK-NEXT: .xword 1 ; Large Constants ; CHECK-NEXT: .xword 2147483648 @@ -502,6 +505,60 @@ define void @needsStackRealignment() { } declare void @escape_values(...) +; CHECK-LABEL: .word .L{{.*}}-floats +; CHECK-NEXT: .hword 0 +; Num Locations +; CHECK-NEXT: .hword 6 +; Loc 0: constant float stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 4 +; CHECK-NEXT: .hword {{.*}} +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .word 0 +; Loc 0: constant double stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 8 +; CHECK-NEXT: .hword {{.*}} +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .word 0 +; Loc 1: float value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 4 +; CHECK-NEXT: .hword {{.*}} +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .word 0 +; Loc 2: double value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 8 +; CHECK-NEXT: .hword {{.*}} +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .word 0 +; Loc 3: float on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 8 +; CHECK-NEXT: .hword {{.*}} +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .word -{{.*}} +; Loc 4: double on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 8 +; CHECK-NEXT: .hword {{.*}} +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .word -{{.*}} +define void @floats(float %f, double %g) { + %ff = alloca float + %gg = alloca double + call void (i64, i32, ...) @llvm.experimental.stackmap(i64 888, i32 0, float 1.25, + double 1.5, float %f, double %g, float* %ff, double* %gg) + ret void +} + declare void @llvm.experimental.stackmap(i64, i32, ...) declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll b/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll index 6bb01492a95a6c..9cb1b4e5624617 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll @@ -40,6 +40,9 @@ target triple = "powerpc64-unknown-linux-gnu" ; CHECK-LABEL: clobberLR: ; CHECK: {{^}}.L[[clobberLR_BEGIN:.*]]:{{$}} +; CHECK-LABEL: floats: +; CHECK: {{^}}.L[[floats_BEGIN:.*]]:{{$}} + ; CHECK-LABEL: .section .llvm_stackmaps ; CHECK-NEXT: __LLVM_StackMaps: @@ -48,11 +51,11 @@ target triple = "powerpc64-unknown-linux-gnu" ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 0 ; Num Functions -; CHECK-NEXT: .long 11 +; CHECK-NEXT: .long 12 ; Num LargeConstants ; CHECK-NEXT: .long 3 ; Num Callsites -; CHECK-NEXT: .long 11 +; CHECK-NEXT: .long 12 ; Functions and stack size ; CHECK-NEXT: .quad constantargs @@ -88,6 +91,9 @@ target triple = "powerpc64-unknown-linux-gnu" ; CHECK-NEXT: .quad clobberLR ; CHECK-NEXT: .quad 208 ; CHECK-NEXT: .quad 1 +; CHECK-NEXT: .quad floats +; CHECK-NEXT: .quad 80 +; CHECK-NEXT: .quad 1 ; Num LargeConstants ; CHECK-NEXT: .quad 4294967295 @@ -382,6 +388,60 @@ define void @clobberLR(i32 %a) { ret void } +; CHECK: .long .L{{.*}}-.L[[floats_BEGIN]] +; CHECK-NEXT: .short 0 +; Num Locations +; CHECK-NEXT: .short 6 +; Loc 0: constant float stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 0: constant double stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 1: float value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 2: double value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 3: float on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long {{.*}} +; Loc 4: double on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long {{.*}} +define void @floats(float %f, double %g) { + %ff = alloca float + %gg = alloca double + call void (i64, i32, ...) @llvm.experimental.stackmap(i64 888, i32 0, float 1.25, + double 1.5, float %f, double %g, float* %ff, double* %gg) + ret void +} + declare void @llvm.experimental.stackmap(i64, i32, ...) declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/llvm/test/CodeGen/SystemZ/stackmap.ll b/llvm/test/CodeGen/SystemZ/stackmap.ll index 45c09d088d589b..1fc3aa9c6cc27f 100644 --- a/llvm/test/CodeGen/SystemZ/stackmap.ll +++ b/llvm/test/CodeGen/SystemZ/stackmap.ll @@ -9,11 +9,11 @@ ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 0 ; Num Functions -; CHECK-NEXT: .long 15 +; CHECK-NEXT: .long 16 ; Num LargeConstants ; CHECK-NEXT: .long 4 ; Num Callsites -; CHECK-NEXT: .long 19 +; CHECK-NEXT: .long 20 ; Functions and stack size ; CHECK-NEXT: .quad constantargs @@ -61,6 +61,9 @@ ; CHECK-NEXT: .quad needsStackRealignment ; CHECK-NEXT: .quad -1 ; CHECK-NEXT: .quad 1 +; CHECK-NEXT: .quad floats +; CHECK-NEXT: .quad 176 +; CHECK-NEXT: .quad 1 ; Large Constants ; CHECK-NEXT: .quad 2147483648 @@ -547,6 +550,60 @@ define void @needsStackRealignment() { } declare void @escape_values(...) +; CHECK-LABEL: .long .L{{.*}}-floats +; CHECK-NEXT: .short 0 +; Num Locations +; CHECK-NEXT: .short 6 +; Loc 0: constant float stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 4 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 32 +; Loc 0: constant double stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 1: float value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 4 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 32 +; Loc 2: double value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 3: float on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long {{.*}} +; Loc 4: double on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long {{.*}} +define void @floats(float %f, double %g) { + %ff = alloca float + %gg = alloca double + call void (i64, i32, ...) @llvm.experimental.stackmap(i64 888, i32 0, float 1.25, + double 1.5, float %f, double %g, float* %ff, double* %gg) + ret void +} + declare void @llvm.experimental.stackmap(i64, i32, ...) declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/llvm/test/CodeGen/X86/stackmap.ll b/llvm/test/CodeGen/X86/stackmap.ll index 3db2865ff44679..2eceb2aacc8239 100644 --- a/llvm/test/CodeGen/X86/stackmap.ll +++ b/llvm/test/CodeGen/X86/stackmap.ll @@ -9,11 +9,11 @@ ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 0 ; Num Functions -; CHECK-NEXT: .long 16 +; CHECK-NEXT: .long 17 ; Num LargeConstants ; CHECK-NEXT: .long 4 ; Num Callsites -; CHECK-NEXT: .long 20 +; CHECK-NEXT: .long 21 ; Functions and stack size ; CHECK-NEXT: .quad _constantargs @@ -64,6 +64,9 @@ ; CHECK-NEXT: .quad _needsStackRealignment ; CHECK-NEXT: .quad -1 ; CHECK-NEXT: .quad 1 +; CHECK-NEXT: .quad _floats +; CHECK-NEXT: .quad 24 +; CHECK-NEXT: .quad 1 ; Large Constants ; CHECK-NEXT: .quad 2147483648 @@ -585,6 +588,60 @@ define void @needsStackRealignment() { } declare void @escape_values(...) +; CHECK-LABEL: .long L{{.*}}-_floats +; CHECK-NEXT: .short 0 +; Num Locations +; CHECK-NEXT: .short 6 +; Loc 0: constant float stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 16 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 0: constant double stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 16 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 1: float value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 16 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 2: double value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 16 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 +; Loc 3: float on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long -{{.*}} +; Loc 4: double on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long -{{.*}} +define void @floats(float %f, double %g) { + %ff = alloca float + %gg = alloca double + call void (i64, i32, ...) @llvm.experimental.stackmap(i64 888, i32 0, float 1.25, + double 1.5, float %f, double %g, float* %ff, double* %gg) + ret void +} + declare void @llvm.experimental.stackmap(i64, i32, ...) declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) From 10d2195305ac49605f2b7b6a25a4076c31923191 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 30 May 2022 11:51:28 +0200 Subject: [PATCH 817/908] Update the Windows packaging script Check in updates based on how the latest release was built [0] and add the bug fix from [1] which allows LLDB to start. Other changes which had accumulated in the local release script: - Don't build the clang format plugin (VS has the functionality built in now) - Disable tests that have been failing (I'll try to follow up and re-enable them) - Switch to Python 3.10 - Jump through more hoops to make LLDB pick the right Python. 0. https://discourse.llvm.org/t/14-0-4-final-has-been-tagged/62750/3 1. https://github.com/llvm/llvm-project/issues/54589 --- llvm/utils/release/build_llvm_package.bat | 64 ++++++++++------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/llvm/utils/release/build_llvm_package.bat b/llvm/utils/release/build_llvm_package.bat index 418f72efcb8243..5b29eabddde5c1 100755 --- a/llvm/utils/release/build_llvm_package.bat +++ b/llvm/utils/release/build_llvm_package.bat @@ -21,8 +21,8 @@ REM https://github.com/swig/swig/issues/769 REM You need to modify the paths below: set vsdevcmd=C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\Common7\Tools\VsDevCmd.bat -set python32_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python36-32 -set python64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python36 +set python32_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python310-32 +set python64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python310 for /f "usebackq" %%i in (`PowerShell ^(Get-Date^).ToString^('yyyyMMdd'^)`) do set datestamp=%%i @@ -33,7 +33,6 @@ set build_dir=llvm_package_%revision:~0,8% echo Revision: %revision% echo Package version: %package_version% -echo Clang format plugin version: %clang_format_vs_version% echo Build dir: %build_dir% echo. pause @@ -49,47 +48,49 @@ mv llvm-project-* llvm-project || exit /b REM Setting CMAKE_CL_SHOWINCLUDES_PREFIX to work around PR27226. set cmake_flags=^ -DCMAKE_BUILD_TYPE=Release ^ - -DLLVM_ENABLE_ASSERTIONS=ON ^ + -DLLVM_ENABLE_ASSERTIONS=OFF ^ -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON ^ -DLLVM_BUILD_LLVM_C_DYLIB=ON ^ -DCMAKE_INSTALL_UCRT_LIBRARIES=ON ^ - -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% ^ + -DPython3_FIND_REGISTRY=NEVER ^ -DPACKAGE_VERSION=%package_version% ^ -DLLDB_RELOCATABLE_PYTHON=1 ^ + -DLLDB_EMBED_PYTHON_HOME=OFF ^ -DLLDB_TEST_COMPILER=%cd%\build32_stage0\bin\clang.exe ^ -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: " ^ - -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;lld;compiler-rt;openmp;lldb" + -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;lld;compiler-rt;lldb;openmp" REM TODO: Run the "check-all" tests. +set OLDPATH=%PATH% + set "VSCMD_START_DIR=%CD%" call "%vsdevcmd%" -arch=x86 +set PATH=%python32_dir%;%PATH% set CC= set CXX= mkdir build32_stage0 cd build32_stage0 -cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DPYTHON_EXECUTABLE=%python32_dir%\python.exe ..\llvm-project\llvm || exit /b -ninja all || ninja all || ninja all || exit /b -ninja check || ninja check || ninja check || exit /b -ninja check-clang || ninja check-clang || ninja check-clang || exit /b +cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DPython3_ROOT_DIR=%python32_dir% ..\llvm-project\llvm || exit /b +ninja || ninja || ninja || exit /b +REM ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b +REM ninja check-clang || ninja check-clang || ninja check-clang || exit /b ninja check-lld || ninja check-lld || ninja check-lld || exit /b ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b -ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b -ninja check-clangd || ninja check-clangd || ninja check-clangd || exit /b +REM ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b cd.. mkdir build32 cd build32 set CC=..\build32_stage0\bin\clang-cl set CXX=..\build32_stage0\bin\clang-cl -cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DPYTHON_EXECUTABLE=%python32_dir%\python.exe ..\llvm-project\llvm || exit /b -ninja all || ninja all || ninja all || exit /b -ninja check || ninja check || ninja check || exit /b -ninja check-clang || ninja check-clang || ninja check-clang || exit /b +cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DPython3_ROOT_DIR=%python32_dir% ..\llvm-project\llvm || exit /b +ninja || ninja || ninja || exit /b +REM ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b +REM ninja check-clang || ninja check-clang || ninja check-clang || exit /b ninja check-lld || ninja check-lld || ninja check-lld || exit /b ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b -ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b -ninja check-clangd || ninja check-clangd || ninja check-clangd || exit /b +REM ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b ninja package || exit /b 7z x LLVM-%package_version%-win32.exe -orepack @@ -99,28 +100,17 @@ del repack\Uninstall.exe cd .. -REM The plug-in is built separately as it uses a statically linked clang-format.exe. -mkdir build_vsix -cd build_vsix -REM Having VSSDKINSTALL set makes devenv *not* find the SDK for some reason. -set VSSDKINSTALL= -set CC=..\build32_stage0\bin\clang-cl -set CXX=..\build32_stage0\bin\clang-cl -cmake -GNinja %cmake_flags% -DLLVM_USE_CRT_RELEASE=MT -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DPYTHON_HOME=%python32_dir% -DPYTHON_EXECUTABLE=%python32_dir%\python.exe ..\llvm-project\llvm || exit /b -ninja clang_format_vsix || exit /b -copy ..\llvm-project\llvm\tools\clang\tools\clang-format-vs\ClangFormat\bin\Release\ClangFormat.vsix ClangFormat-r%revision%.vsix -cd .. - - set "VSCMD_START_DIR=%CD%" +set PATH=%OLDPATH% call "%vsdevcmd%" -arch=amd64 +set PATH=%python64_dir%;%PATH% set CC= set CXX= mkdir build64_stage0 cd build64_stage0 -cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DPYTHON_EXECUTABLE=%python64_dir%\python.exe ..\llvm-project\llvm || exit /b -ninja all || ninja all || ninja all || exit /b -ninja check || ninja check || ninja check || exit /b +cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DPython3_ROOT_DIR=%python64_dir% ..\llvm-project\llvm || exit /b +ninja || ninja || ninja || exit /b +ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b ninja check-clang || ninja check-clang || ninja check-clang || exit /b ninja check-lld || ninja check-lld || ninja check-lld || exit /b ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b @@ -132,9 +122,9 @@ mkdir build64 cd build64 set CC=..\build64_stage0\bin\clang-cl set CXX=..\build64_stage0\bin\clang-cl -cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DPYTHON_EXECUTABLE=%python64_dir%\python.exe ..\llvm-project\llvm || exit /b -ninja all || ninja all || ninja all || exit /b -ninja check || ninja check || ninja check || exit /b +cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DPython3_ROOT_DIR=%python64_dir% ..\llvm-project\llvm || exit /b +ninja || ninja || ninja || exit /b +ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b ninja check-clang || ninja check-clang || ninja check-clang || exit /b ninja check-lld || ninja check-lld || ninja check-lld || exit /b ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b From 180d3f251d1ad5473705d3f00e6d426b5f8162e6 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Mon, 30 May 2022 17:16:42 +0700 Subject: [PATCH 818/908] [MemDep][NFCI] Remove redundant dyn_cast, replace with cast When `IsLoad` is `true`, we don't need to check if the instruction is actually a load with dyn_cast. Saves some petty amount of CT. --- llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 81d2143233ddc2..c1ca9a9c569248 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -402,8 +402,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // forwarding, but any mayalias write can be assumed to be noalias. // Arguably, this logic should be pushed inside AliasAnalysis itself. if (isLoad && QueryInst) { - LoadInst *LI = dyn_cast(QueryInst); - if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load)) + LoadInst *LI = cast(QueryInst); + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) isInvariantLoad = true; } From 78a18d2b54e7e8e0e2c1d1cb33d015d7f69b8cc7 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Mon, 30 May 2022 12:19:48 +0200 Subject: [PATCH 819/908] [VP] vp intrinsics are not speculatable VP intrinsics show UB if the %evl parameter is out of bounds - they must not carry the speculatable attribute. The out-of-bounds UB disappears when the %evl parameter is expanded into the mask or expansion replaces the entire VP intrinsic with non-VP code. This patch - Removes the speculatable attribute on all VP intrinsics. - Generalizes the isSafeToSpeculativelyExecute function to let VP expansion know whether the VP intrinsic replacement will be speculatable. VP expansion may only discard %evl where this is the case. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D125296 --- llvm/include/llvm/Analysis/ValueTracking.h | 22 ++ llvm/include/llvm/IR/Intrinsics.td | 380 +++++++++---------- llvm/lib/Analysis/ValueTracking.cpp | 35 +- llvm/lib/CodeGen/ExpandVectorPredication.cpp | 10 +- 4 files changed, 242 insertions(+), 205 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index b97d6285ea5ead..3b29bf1d53b47b 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -463,6 +463,28 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); + /// This returns the same result as isSafeToSpeculativelyExecute if Opcode is + /// the actual opcode of Inst. If the provided and actual opcode differ, the + /// function (virtually) overrides the opcode of Inst with the provided + /// Opcode. There are come constraints in this case: + /// * If Opcode has a fixed number of operands (eg, as binary operators do), + /// then Inst has to have at least as many leading operands. The function + /// will ignore all trailing operands beyond that number. + /// * If Opcode allows for an arbitrary number of operands (eg, as CallInsts + /// do), then all operands are considered. + /// * The virtual instruction has to satisfy all typing rules of the provided + /// Opcode. + /// * This function is pessimistic in the following sense: If one actually + /// materialized the virtual instruction, then isSafeToSpeculativelyExecute + /// may say that the materialized instruction is speculatable whereas this + /// function may have said that the instruction wouldn't be speculatable. + /// This behavior is a shortcoming in the current implementation and not + /// intentional. + bool isSafeToSpeculativelyExecuteWithOpcode( + unsigned Opcode, const Operator *Inst, const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); + /// Returns true if the result or effects of the given instructions \p I /// depend values not reachable through the def use graph. /// * Memory dependence arises for example if the instruction reads from diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index cdbbe015892945..c40f0d4ca412ce 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1415,11 +1415,11 @@ def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty], [ IntrReadMem, IntrNoSync, IntrWillReturn, IntrArgMemOnly ]>; def int_vp_scatter: DefaultAttrsIntrinsic<[], - [ llvm_anyvector_ty, - LLVMVectorOfAnyPointersToElt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty], - [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers + [ llvm_anyvector_ty, + LLVMVectorOfAnyPointersToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers // Experimental strided memory accesses def int_experimental_vp_strided_store : DefaultAttrsIntrinsic<[], @@ -1437,8 +1437,9 @@ def int_experimental_vp_strided_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty llvm_i32_ty], [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; -// Speculatable Binary operators -let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { +// Operators +let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { + // Integer arithmetic def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1450,30 +1451,30 @@ let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] i LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_mul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_ashr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_lshr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_shl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_or : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_and : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1484,35 +1485,28 @@ let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] i LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; -} - -// Non-speculatable binary operators. -let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -} + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; -// Floating-point arithmetic. -let IntrProperties = - [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + // Floating-point arithmetic def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1524,177 +1518,169 @@ let IntrProperties = LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_fmul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_fdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_frem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_fneg : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - + [ LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_fma : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -} + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; -// Casts. -def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - -// Shuffles. -def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>, - LLVMMatchType<0>, - llvm_i32_ty]>; - -def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>, - LLVMMatchType<0>, - llvm_i32_ty]>; - -// Comparisons. -let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { - def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], - [ llvm_anyvector_ty, - LLVMMatchType<0>, - llvm_metadata_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + // Casts + def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + // Shuffles + def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; + def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; + + // Comparisons + def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + [ llvm_anyvector_ty, + LLVMMatchType<0>, + llvm_metadata_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_icmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], - [ llvm_anyvector_ty, - LLVMMatchType<0>, - llvm_metadata_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -} + [ llvm_anyvector_ty, + LLVMMatchType<0>, + llvm_metadata_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; -// Reductions -let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + // Reductions def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; } def int_get_active_lane_mask: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 473b3cb74be832..5adf44f145ba8c 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4599,13 +4599,38 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Operator *Inst = dyn_cast(V); if (!Inst) return false; + return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, DT, TLI); +} + +bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, + const Operator *Inst, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + if (Inst->getOpcode() != Opcode) { + // Check that the operands are actually compatible with the Opcode override. + auto hasEqualReturnAndLeadingOperandTypes = + [](const Operator *Inst, unsigned NumLeadingOperands) { + if (Inst->getNumOperands() < NumLeadingOperands) + return false; + const Type *ExpectedType = Inst->getType(); + for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp) + if (Inst->getOperand(ItOp)->getType() != ExpectedType) + return false; + return true; + }; + assert(!Instruction::isBinaryOp(Opcode) || + hasEqualReturnAndLeadingOperandTypes(Inst, 2)); + assert(!Instruction::isUnaryOp(Opcode) || + hasEqualReturnAndLeadingOperandTypes(Inst, 1)); + } for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast(Inst->getOperand(i))) if (C->canTrap()) return false; - switch (Inst->getOpcode()) { + switch (Opcode) { default: return true; case Instruction::UDiv: @@ -4636,7 +4661,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; } case Instruction::Load: { - const LoadInst *LI = cast(Inst); + const LoadInst *LI = dyn_cast(Inst); + if (!LI) + return false; if (mustSuppressSpeculation(*LI)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); @@ -4645,7 +4672,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, TLI); } case Instruction::Call: { - auto *CI = cast(Inst); + auto *CI = dyn_cast(Inst); + if (!CI) + return false; const Function *Callee = CI->getCalledFunction(); // The called function could have undefined behavior or side-effects, even diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index aa52914f25b054..fbb89a7d68d952 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -118,10 +118,10 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) { if (isa(VPI)) return false; // Fallback to whether the intrinsic is speculatable. - // FIXME: Check whether the replacing non-VP code will be speculatable - // instead. VP intrinsics themselves are never speculatable because of - // UB if %evl is greater than the runtime vector length. - return isSafeToSpeculativelyExecute(cast(&VPI)); + Optional OpcOpt = VPI.getFunctionalOpcode(); + unsigned FunctionalOpc = OpcOpt.getValueOr((unsigned)Instruction::Call); + return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, + cast(&VPI)); } //// } Helpers @@ -481,7 +481,7 @@ struct TransformJob { }; void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { - // Speculatable instructions do not strictly need predication. + // Operations with speculatable lanes do not strictly need predication. if (maySpeculateLanes(VPI)) { // Converting a speculatable VP intrinsic means dropping %mask and %evl. // No need to expand %evl into the %mask only to ignore that code. From 7e5a730473a7b2909cc3a4b9198d8418c8146372 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Mon, 30 May 2022 17:28:33 +0700 Subject: [PATCH 820/908] [MemDep][NFC] Remove duplicating check in `if` and `else` branch Same check is done whether the condition is true or false. Just hoist it out of conditional. --- llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index c1ca9a9c569248..586cac6c3f2066 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -502,10 +502,10 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // If we found a pointer, check if it could be the same as our pointer. AliasResult R = BatchAA.alias(LoadLoc, MemLoc); - if (isLoad) { - if (R == AliasResult::NoAlias) - continue; + if (R == AliasResult::NoAlias) + continue; + if (isLoad) { // Must aliased loads are defs of each other. if (R == AliasResult::MustAlias) return MemDepResult::getDef(Inst); @@ -522,10 +522,6 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( continue; } - // Stores don't depend on other no-aliased accesses. - if (R == AliasResult::NoAlias) - continue; - // Stores don't alias loads from read-only memory. if (BatchAA.pointsToConstantMemory(LoadLoc)) continue; From 820146abe9083dc3c522de03998cf296f97ffe52 Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Mon, 22 Mar 2021 15:28:31 +0000 Subject: [PATCH 821/908] [OpenMP] Pass chunk-size to MLIR while lowering from parse-tree Test that chunk size is passed to the static init function. Using three different variations: 1. Single constant. 2. Expression with constants. 3. Variable value. Reviewed By: peixin, shraiysh Differential Revision: https://reviews.llvm.org/D126383 --- flang/lib/Lower/OpenMP.cpp | 16 ++++- flang/test/Lower/OpenMP/omp-wsloop-chunks.f90 | 69 +++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/OpenMP/omp-wsloop-chunks.f90 diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 535ffac82d9e5b..04cd39adbe8b0b 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -577,6 +577,21 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, &*std::next(doConstructEval->getNestedEvaluations().begin()); } while (collapseValue > 0); + for (const auto &clause : wsLoopOpClauseList.v) { + if (const auto &scheduleClause = + std::get_if(&clause.u)) { + if (const auto &chunkExpr = + std::get>( + scheduleClause->v.t)) { + if (const auto *expr = Fortran::semantics::GetExpr(*chunkExpr)) { + Fortran::lower::StatementContext stmtCtx; + scheduleChunkClauseOperand = + fir::getBase(converter.genExprValue(*expr, stmtCtx)); + } + } + } + } + // The types of lower bound, upper bound, and step are converted into the // type of the loop variable if necessary. mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); @@ -592,7 +607,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, // FIXME: Add support for following clauses: // 1. linear // 2. order - // 3. schedule (with chunk) auto wsLoopOp = firOpBuilder.create( currentLocation, lowerBound, upperBound, step, linearVars, linearStepVars, reductionVars, /*reductions=*/nullptr, diff --git a/flang/test/Lower/OpenMP/omp-wsloop-chunks.f90 b/flang/test/Lower/OpenMP/omp-wsloop-chunks.f90 new file mode 100644 index 00000000000000..879c15c41b984e --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-chunks.f90 @@ -0,0 +1,69 @@ +! This test checks that chunk size is passed correctly when lowering of +! OpenMP DO Directive(Worksharing) with chunk size + +! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s + +program wsloop + integer :: i + integer :: chunk + +! CHECK-LABEL: func.func @_QQmain() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "chunk", uniq_name = "_QFEchunk"} + +!$OMP DO SCHEDULE(static, 4) + +do i=1, 9 + print*, i + +! CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_3:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 +! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait for (%[[VAL_6:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { +! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_6]]) : (!fir.ref, i32) -> i1 +! CHECK: omp.yield +! CHECK: } + +end do +!$OMP END DO NOWAIT +!$OMP DO SCHEDULE(static, 2+2) + +do i=1, 9 + print*, i*2 + +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_15:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 +! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait for (%[[VAL_18:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { +! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[VAL_18]] : i32 +! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) : (!fir.ref, i32) -> i1 +! CHECK: omp.yield +! CHECK: } + +end do +!$OMP END DO NOWAIT +chunk = 6 +!$OMP DO SCHEDULE(static, chunk) + +do i=1, 9 + print*, i*3 +end do +!$OMP END DO NOWAIT +! CHECK: %[[VAL_28:.*]] = arith.constant 6 : i32 +! CHECK: fir.store %[[VAL_28]] to %[[VAL_0]] : !fir.ref +! CHECK: %[[VAL_29:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_30:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref +! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait for (%[[VAL_33:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { +! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 +! CHECK: %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[VAL_33]] : i32 +! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_40]]) : (!fir.ref, i32) -> i1 +! CHECK: omp.yield +! CHECK: } +! CHECK: return +! CHECK: } + +end From bac4934c84f3ec82e771753f90f6a0e340808ba2 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 30 May 2022 13:55:54 +0200 Subject: [PATCH 822/908] Revert "build_llvm_package.bat: Produce zip files in addition to the installers" The zip files were too large to be practical, so they were never shipped. Reverting to reduce build time and complexity of the script. This reverts commit 4486aa03c5f431ba33a1d1ac9991da912e3decd9. --- llvm/utils/release/build_llvm_package.bat | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/llvm/utils/release/build_llvm_package.bat b/llvm/utils/release/build_llvm_package.bat index 5b29eabddde5c1..fd138e90500e5d 100755 --- a/llvm/utils/release/build_llvm_package.bat +++ b/llvm/utils/release/build_llvm_package.bat @@ -11,7 +11,7 @@ REM REM Visual Studio 2019, CMake, Ninja, GNUWin32, SWIG, Python 3, REM NSIS with the strlen_8192 patch, REM Visual Studio 2019 SDK and Nuget (for the clang-format plugin), -REM Perl (for the OpenMP run-time), 7Zip. +REM Perl (for the OpenMP run-time). REM REM REM For LLDB, SWIG version <= 3.0.8 needs to be used to work around @@ -92,14 +92,8 @@ ninja check-lld || ninja check-lld || ninja check-lld || exit /b ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b REM ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b ninja package || exit /b - -7z x LLVM-%package_version%-win32.exe -orepack -rmdir /s /q repack\$PLUGINSDIR -del repack\Uninstall.exe -7z a LLVM-%package_version%-win32.zip .\repack\* -mx9 cd .. - set "VSCMD_START_DIR=%CD%" set PATH=%OLDPATH% call "%vsdevcmd%" -arch=amd64 @@ -131,9 +125,4 @@ ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b ninja check-clangd || ninja check-clangd || ninja check-clangd || exit /b ninja package || exit /b - -7z x LLVM-%package_version%-win64.exe -orepack -rmdir /s /q repack\$PLUGINSDIR -del repack\Uninstall.exe -7z a LLVM-%package_version%-win64.zip .\repack\* -mx9 cd .. From 0f68c959d2faf55d5e186775a332994a9cbe69b9 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 24 May 2022 00:41:23 +0000 Subject: [PATCH 823/908] Apply clang-tidy fixes for modernize-use-override in SparseTensorUtils.cpp (NFC) --- mlir/lib/ExecutionEngine/SparseTensorUtils.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 3a595fdb26e934..ba73af28068805 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -450,21 +450,21 @@ class SparseTensorStorage final : public SparseTensorStorageBase { const uint64_t *perm, const DimLevelType *sparsity, const SparseTensorStorageBase &tensor); - ~SparseTensorStorage() final override = default; + ~SparseTensorStorage() final = default; /// Partially specialize these getter methods based on template types. - void getPointers(std::vector

**out, uint64_t d) final override { + void getPointers(std::vector

**out, uint64_t d) final { assert(d < getRank()); *out = &pointers[d]; } - void getIndices(std::vector **out, uint64_t d) final override { + void getIndices(std::vector **out, uint64_t d) final { assert(d < getRank()); *out = &indices[d]; } - void getValues(std::vector **out) final override { *out = &values; } + void getValues(std::vector **out) final { *out = &values; } /// Partially specialize lexicographical insertions based on template types. - void lexInsert(const uint64_t *cursor, V val) final override { + void lexInsert(const uint64_t *cursor, V val) final { // First, wrap up pending insertion path. uint64_t diff = 0; uint64_t top = 0; @@ -481,7 +481,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Note that this method resets the values/filled-switch array back /// to all-zero/false while only iterating over the nonzero elements. void expInsert(uint64_t *cursor, V *values, bool *filled, uint64_t *added, - uint64_t count) final override { + uint64_t count) final { if (count == 0) return; // Sort. @@ -507,7 +507,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } /// Finalizes lexicographic insertions. - void endInsert() final override { + void endInsert() final { if (values.empty()) finalizeSegment(0); else @@ -515,7 +515,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase { } void newEnumerator(SparseTensorEnumeratorBase **out, uint64_t rank, - const uint64_t *perm) const final override { + const uint64_t *perm) const final { *out = new SparseTensorEnumerator(*this, rank, perm); } From eacfd0474427a14a1f104d8de7c5cb9711964962 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 24 May 2022 01:28:57 +0000 Subject: [PATCH 824/908] Apply clang-tidy fixes for llvm-else-after-return in OpPythonBindingGen.cpp (NFC) --- mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp index 83d2acce3ba2c9..44c60c1dc61929 100644 --- a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp +++ b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp @@ -894,7 +894,7 @@ static void emitDefaultOpBuilder(const Operator &op, raw_ostream &os) { Argument a = op.getArg(builderArgIndex - numResultArgs); if (auto *nattr = a.dyn_cast()) return (nattr->attr.isOptional() || nattr->attr.hasDefaultValue()); - else if (auto *ntype = a.dyn_cast()) + if (auto *ntype = a.dyn_cast()) return ntype->isOptional(); else return false; From 2e2a8a2d9082250e4aad312c6008a526f2b007c7 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 30 May 2022 12:26:16 +0000 Subject: [PATCH 825/908] Revert "[VP] vp intrinsics are not speculatable" This reverts commit 78a18d2b54e7e8e0e2c1d1cb33d015d7f69b8cc7. Break MLIR bot: https://lab.llvm.org/buildbot/#/builders/61/builds/27127 --- llvm/include/llvm/Analysis/ValueTracking.h | 22 -- llvm/include/llvm/IR/Intrinsics.td | 380 ++++++++++--------- llvm/lib/Analysis/ValueTracking.cpp | 35 +- llvm/lib/CodeGen/ExpandVectorPredication.cpp | 10 +- 4 files changed, 205 insertions(+), 242 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 3b29bf1d53b47b..b97d6285ea5ead 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -463,28 +463,6 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); - /// This returns the same result as isSafeToSpeculativelyExecute if Opcode is - /// the actual opcode of Inst. If the provided and actual opcode differ, the - /// function (virtually) overrides the opcode of Inst with the provided - /// Opcode. There are come constraints in this case: - /// * If Opcode has a fixed number of operands (eg, as binary operators do), - /// then Inst has to have at least as many leading operands. The function - /// will ignore all trailing operands beyond that number. - /// * If Opcode allows for an arbitrary number of operands (eg, as CallInsts - /// do), then all operands are considered. - /// * The virtual instruction has to satisfy all typing rules of the provided - /// Opcode. - /// * This function is pessimistic in the following sense: If one actually - /// materialized the virtual instruction, then isSafeToSpeculativelyExecute - /// may say that the materialized instruction is speculatable whereas this - /// function may have said that the instruction wouldn't be speculatable. - /// This behavior is a shortcoming in the current implementation and not - /// intentional. - bool isSafeToSpeculativelyExecuteWithOpcode( - unsigned Opcode, const Operator *Inst, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr, - const TargetLibraryInfo *TLI = nullptr); - /// Returns true if the result or effects of the given instructions \p I /// depend values not reachable through the def use graph. /// * Memory dependence arises for example if the instruction reads from diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index c40f0d4ca412ce..cdbbe015892945 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1415,11 +1415,11 @@ def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty], [ IntrReadMem, IntrNoSync, IntrWillReturn, IntrArgMemOnly ]>; def int_vp_scatter: DefaultAttrsIntrinsic<[], - [ llvm_anyvector_ty, - LLVMVectorOfAnyPointersToElt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty], - [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers + [ llvm_anyvector_ty, + LLVMVectorOfAnyPointersToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers // Experimental strided memory accesses def int_experimental_vp_strided_store : DefaultAttrsIntrinsic<[], @@ -1437,9 +1437,8 @@ def int_experimental_vp_strided_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty llvm_i32_ty], [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; -// Operators -let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { - // Integer arithmetic +// Speculatable Binary operators +let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1451,30 +1450,30 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_mul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_ashr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_lshr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_shl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_or : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_and : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1485,28 +1484,35 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; +} + +// Non-speculatable binary operators. +let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +} - // Floating-point arithmetic +// Floating-point arithmetic. +let IntrProperties = + [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1518,169 +1524,177 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_fmul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_fdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_frem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_fneg : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_fma : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; - // Casts - def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + def int_vp_fneg : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; - // Shuffles - def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>, - LLVMMatchType<0>, - llvm_i32_ty]>; - def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>, - LLVMMatchType<0>, - llvm_i32_ty]>; + def int_vp_fma : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +} - // Comparisons +// Casts. +def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + +// Shuffles. +def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; + +def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; + +// Comparisons. +let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], - [ llvm_anyvector_ty, - LLVMMatchType<0>, - llvm_metadata_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ llvm_anyvector_ty, + LLVMMatchType<0>, + llvm_metadata_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_icmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], - [ llvm_anyvector_ty, - LLVMMatchType<0>, - llvm_metadata_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ llvm_anyvector_ty, + LLVMMatchType<0>, + llvm_metadata_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +} - // Reductions +// Reductions +let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [ LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; } def int_get_active_lane_mask: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5adf44f145ba8c..473b3cb74be832 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4599,38 +4599,13 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Operator *Inst = dyn_cast(V); if (!Inst) return false; - return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, DT, TLI); -} - -bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, - const Operator *Inst, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { - if (Inst->getOpcode() != Opcode) { - // Check that the operands are actually compatible with the Opcode override. - auto hasEqualReturnAndLeadingOperandTypes = - [](const Operator *Inst, unsigned NumLeadingOperands) { - if (Inst->getNumOperands() < NumLeadingOperands) - return false; - const Type *ExpectedType = Inst->getType(); - for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp) - if (Inst->getOperand(ItOp)->getType() != ExpectedType) - return false; - return true; - }; - assert(!Instruction::isBinaryOp(Opcode) || - hasEqualReturnAndLeadingOperandTypes(Inst, 2)); - assert(!Instruction::isUnaryOp(Opcode) || - hasEqualReturnAndLeadingOperandTypes(Inst, 1)); - } for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast(Inst->getOperand(i))) if (C->canTrap()) return false; - switch (Opcode) { + switch (Inst->getOpcode()) { default: return true; case Instruction::UDiv: @@ -4661,9 +4636,7 @@ bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, return false; } case Instruction::Load: { - const LoadInst *LI = dyn_cast(Inst); - if (!LI) - return false; + const LoadInst *LI = cast(Inst); if (mustSuppressSpeculation(*LI)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); @@ -4672,9 +4645,7 @@ bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, TLI); } case Instruction::Call: { - auto *CI = dyn_cast(Inst); - if (!CI) - return false; + auto *CI = cast(Inst); const Function *Callee = CI->getCalledFunction(); // The called function could have undefined behavior or side-effects, even diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index fbb89a7d68d952..aa52914f25b054 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -118,10 +118,10 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) { if (isa(VPI)) return false; // Fallback to whether the intrinsic is speculatable. - Optional OpcOpt = VPI.getFunctionalOpcode(); - unsigned FunctionalOpc = OpcOpt.getValueOr((unsigned)Instruction::Call); - return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, - cast(&VPI)); + // FIXME: Check whether the replacing non-VP code will be speculatable + // instead. VP intrinsics themselves are never speculatable because of + // UB if %evl is greater than the runtime vector length. + return isSafeToSpeculativelyExecute(cast(&VPI)); } //// } Helpers @@ -481,7 +481,7 @@ struct TransformJob { }; void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { - // Operations with speculatable lanes do not strictly need predication. + // Speculatable instructions do not strictly need predication. if (maySpeculateLanes(VPI)) { // Converting a speculatable VP intrinsic means dropping %mask and %evl. // No need to expand %evl into the %mask only to ignore that code. From 18c1ee04de448a6a4babbaf8e5ba42866339b466 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Mon, 30 May 2022 14:38:59 +0200 Subject: [PATCH 826/908] Re-land "[VP] vp intrinsics are not speculatable" with test fix Update the llvmir-intrinsics.mlir test to account for the modified attribute sets. This reverts commit 2e2a8a2d9082250e4aad312c6008a526f2b007c7. --- llvm/include/llvm/Analysis/ValueTracking.h | 22 + llvm/include/llvm/IR/Intrinsics.td | 380 +++++++++--------- llvm/lib/Analysis/ValueTracking.cpp | 35 +- llvm/lib/CodeGen/ExpandVectorPredication.cpp | 10 +- .../test/Target/LLVMIR/llvmir-intrinsics.mlir | 80 ++-- 5 files changed, 282 insertions(+), 245 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index b97d6285ea5ead..3b29bf1d53b47b 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -463,6 +463,28 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); + /// This returns the same result as isSafeToSpeculativelyExecute if Opcode is + /// the actual opcode of Inst. If the provided and actual opcode differ, the + /// function (virtually) overrides the opcode of Inst with the provided + /// Opcode. There are come constraints in this case: + /// * If Opcode has a fixed number of operands (eg, as binary operators do), + /// then Inst has to have at least as many leading operands. The function + /// will ignore all trailing operands beyond that number. + /// * If Opcode allows for an arbitrary number of operands (eg, as CallInsts + /// do), then all operands are considered. + /// * The virtual instruction has to satisfy all typing rules of the provided + /// Opcode. + /// * This function is pessimistic in the following sense: If one actually + /// materialized the virtual instruction, then isSafeToSpeculativelyExecute + /// may say that the materialized instruction is speculatable whereas this + /// function may have said that the instruction wouldn't be speculatable. + /// This behavior is a shortcoming in the current implementation and not + /// intentional. + bool isSafeToSpeculativelyExecuteWithOpcode( + unsigned Opcode, const Operator *Inst, const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); + /// Returns true if the result or effects of the given instructions \p I /// depend values not reachable through the def use graph. /// * Memory dependence arises for example if the instruction reads from diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index cdbbe015892945..c40f0d4ca412ce 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1415,11 +1415,11 @@ def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty], [ IntrReadMem, IntrNoSync, IntrWillReturn, IntrArgMemOnly ]>; def int_vp_scatter: DefaultAttrsIntrinsic<[], - [ llvm_anyvector_ty, - LLVMVectorOfAnyPointersToElt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty], - [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers + [ llvm_anyvector_ty, + LLVMVectorOfAnyPointersToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers // Experimental strided memory accesses def int_experimental_vp_strided_store : DefaultAttrsIntrinsic<[], @@ -1437,8 +1437,9 @@ def int_experimental_vp_strided_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty llvm_i32_ty], [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; -// Speculatable Binary operators -let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { +// Operators +let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { + // Integer arithmetic def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1450,30 +1451,30 @@ let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] i LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_mul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_ashr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_lshr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_shl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_or : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_and : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1484,35 +1485,28 @@ let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] i LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; -} - -// Non-speculatable binary operators. -let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -} + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; -// Floating-point arithmetic. -let IntrProperties = - [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + // Floating-point arithmetic def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1524,177 +1518,169 @@ let IntrProperties = LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; def int_vp_fmul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_fdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_frem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_fneg : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - + [ LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_fma : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -} + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; -// Casts. -def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - -// Shuffles. -def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>, - LLVMMatchType<0>, - llvm_i32_ty]>; - -def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>, - LLVMMatchType<0>, - llvm_i32_ty]>; - -// Comparisons. -let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { - def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], - [ llvm_anyvector_ty, - LLVMMatchType<0>, - llvm_metadata_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + // Casts + def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + // Shuffles + def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; + def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; + + // Comparisons + def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + [ llvm_anyvector_ty, + LLVMMatchType<0>, + llvm_metadata_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_icmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], - [ llvm_anyvector_ty, - LLVMMatchType<0>, - llvm_metadata_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; -} + [ llvm_anyvector_ty, + LLVMMatchType<0>, + llvm_metadata_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; -// Reductions -let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + // Reductions def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], - [LLVMVectorElementType<0>, - llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; + [ LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; } def int_get_active_lane_mask: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 473b3cb74be832..5adf44f145ba8c 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4599,13 +4599,38 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Operator *Inst = dyn_cast(V); if (!Inst) return false; + return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, DT, TLI); +} + +bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, + const Operator *Inst, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + if (Inst->getOpcode() != Opcode) { + // Check that the operands are actually compatible with the Opcode override. + auto hasEqualReturnAndLeadingOperandTypes = + [](const Operator *Inst, unsigned NumLeadingOperands) { + if (Inst->getNumOperands() < NumLeadingOperands) + return false; + const Type *ExpectedType = Inst->getType(); + for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp) + if (Inst->getOperand(ItOp)->getType() != ExpectedType) + return false; + return true; + }; + assert(!Instruction::isBinaryOp(Opcode) || + hasEqualReturnAndLeadingOperandTypes(Inst, 2)); + assert(!Instruction::isUnaryOp(Opcode) || + hasEqualReturnAndLeadingOperandTypes(Inst, 1)); + } for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast(Inst->getOperand(i))) if (C->canTrap()) return false; - switch (Inst->getOpcode()) { + switch (Opcode) { default: return true; case Instruction::UDiv: @@ -4636,7 +4661,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; } case Instruction::Load: { - const LoadInst *LI = cast(Inst); + const LoadInst *LI = dyn_cast(Inst); + if (!LI) + return false; if (mustSuppressSpeculation(*LI)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); @@ -4645,7 +4672,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, TLI); } case Instruction::Call: { - auto *CI = cast(Inst); + auto *CI = dyn_cast(Inst); + if (!CI) + return false; const Function *Callee = CI->getCalledFunction(); // The called function could have undefined behavior or side-effects, even diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index aa52914f25b054..fbb89a7d68d952 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -118,10 +118,10 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) { if (isa(VPI)) return false; // Fallback to whether the intrinsic is speculatable. - // FIXME: Check whether the replacing non-VP code will be speculatable - // instead. VP intrinsics themselves are never speculatable because of - // UB if %evl is greater than the runtime vector length. - return isSafeToSpeculativelyExecute(cast(&VPI)); + Optional OpcOpt = VPI.getFunctionalOpcode(); + unsigned FunctionalOpc = OpcOpt.getValueOr((unsigned)Instruction::Call); + return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, + cast(&VPI)); } //// } Helpers @@ -481,7 +481,7 @@ struct TransformJob { }; void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { - // Speculatable instructions do not strictly need predication. + // Operations with speculatable lanes do not strictly need predication. if (maySpeculateLanes(VPI)) { // Converting a speculatable VP intrinsic means dropping %mask and %evl. // No need to expand %evl into the %mask only to ignore that code. diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index ea0b40b380ecb3..31f6d624bd816f 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -735,49 +735,49 @@ llvm.func @vector_predication_intrinsics(%A: vector<8xi32>, %B: vector<8xi32>, // CHECK-DAG: declare i1 @llvm.coro.end(i8*, i1) // CHECK-DAG: declare i8* @llvm.coro.free(token, i8* nocapture readonly) // CHECK-DAG: declare void @llvm.coro.resume(i8*) -// CHECK-DAG: declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0 +// CHECK-DAG: declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 // CHECK-DAG: declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 // CHECK-DAG: declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 // CHECK-DAG: declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 // CHECK-DAG: declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 -// CHECK-DAG: declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x float> @llvm.vp.fadd.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x float> @llvm.vp.fsub.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x float> @llvm.vp.fmul.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x float> @llvm.vp.fdiv.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x float> @llvm.vp.frem.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare i32 @llvm.vp.reduce.add.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare i32 @llvm.vp.reduce.mul.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare i32 @llvm.vp.reduce.and.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare i32 @llvm.vp.reduce.or.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare i32 @llvm.vp.reduce.xor.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare i32 @llvm.vp.reduce.smax.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare i32 @llvm.vp.reduce.smin.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare i32 @llvm.vp.reduce.umax.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare i32 @llvm.vp.reduce.umin.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0 -// CHECK-DAG: declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare float @llvm.vp.reduce.fmin.v8f32(float, <8 x float>, <8 x i1>, i32) #0 -// CHECK-DAG: declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #12 -// CHECK-DAG: declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #12 +// CHECK-DAG: declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x float> @llvm.vp.fadd.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x float> @llvm.vp.fsub.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x float> @llvm.vp.fmul.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x float> @llvm.vp.fdiv.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x float> @llvm.vp.frem.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare i32 @llvm.vp.reduce.add.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare i32 @llvm.vp.reduce.mul.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare i32 @llvm.vp.reduce.and.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare i32 @llvm.vp.reduce.or.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare i32 @llvm.vp.reduce.xor.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare i32 @llvm.vp.reduce.smax.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare i32 @llvm.vp.reduce.smin.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare i32 @llvm.vp.reduce.umax.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare i32 @llvm.vp.reduce.umin.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare float @llvm.vp.reduce.fmin.v8f32(float, <8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #2 +// CHECK-DAG: declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #2 // CHECK-DAG: declare void @llvm.experimental.vp.strided.store.v8i32.p0i32.i32(<8 x i32>, i32* nocapture, i32, <8 x i1>, i32) #4 // CHECK-DAG: declare <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0i32.i32(i32* nocapture, i32, <8 x i1>, i32) #3 -// CHECK-DAG: declare <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64>, <8 x i1>, i32) #12 -// CHECK-DAG: declare <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) #12 -// CHECK-DAG: declare <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) #12 -// CHECK-DAG: declare <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double>, <8 x i1>, i32) #12 -// CHECK-DAG: declare <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float>, <8 x i1>, i32) #12 -// CHECK-DAG: declare <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double>, <8 x i1>, i32) #12 -// CHECK-DAG: declare <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double>, <8 x i1>, i32) #12 -// CHECK-DAG: declare <8 x i64> @llvm.vp.ptrtoint.v8i64.v8p0i32(<8 x i32*>, <8 x i1>, i32) #12 -// CHECK-DAG: declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i64(<8 x i64>, <8 x i1>, i32) #12 +// CHECK-DAG: declare <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i64> @llvm.vp.ptrtoint.v8i64.v8p0i32(<8 x i32*>, <8 x i1>, i32) #2 +// CHECK-DAG: declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i64(<8 x i64>, <8 x i1>, i32) #2 From 3e6ba89055c80e6360b7605464520711b30084a6 Mon Sep 17 00:00:00 2001 From: zhongyunde Date: Mon, 30 May 2022 21:04:40 +0800 Subject: [PATCH 827/908] [InstCombine] Fold a mul with bool value into and Fixes https://github.com/llvm/llvm-project/issues/55599 X * Y --> X & Y, iff X, Y can be only {0, 1}. https://alive2.llvm.org/ce/z/_RsTKF Reviewed By: spatel, nikic Differential Revision: https://reviews.llvm.org/D126040 --- .../InstCombine/InstCombineMulDivRem.cpp | 10 ++++++-- .../Transforms/InstCombine/mul-masked-bits.ll | 23 ++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 897580a4148768..da31d6260c88ba 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -301,9 +301,15 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { } } - // i1 mul -> i1 and. + // Fold the following two scenarios: + // 1) i1 mul -> i1 and. + // 2) X * Y --> X & Y, iff X, Y can be only {0,1}. + // Note: We could use known bits to generalize this and related patterns with + // shifts/truncs Type *Ty = I.getType(); - if (Ty->isIntOrIntVectorTy(1)) + if (Ty->isIntOrIntVectorTy(1) || + (match(Op0, m_And(m_Value(), m_One())) && + match(Op1, m_And(m_Value(), m_One())))) return BinaryOperator::CreateAnd(Op0, Op1); // X*(1 << Y) --> X << Y diff --git a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll index 553788561f759e..9b7caff982865c 100644 --- a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll +++ b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll @@ -169,13 +169,31 @@ define i33 @squared_demanded_3_low_bits(i33 %x) { ; Scalar tests define i64 @scalar_mul_bit_x0_y0(i64 %x, i64 %y) { ; CHECK-LABEL: @scalar_mul_bit_x0_y0( +; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], 1 +; CHECK-NEXT: [[MUL:%.*]] = and i64 [[AND2]], [[X:%.*]] +; CHECK-NEXT: ret i64 [[MUL]] +; + %and1 = and i64 %x, 1 + %and2 = and i64 %y, 1 + %mul = mul i64 %and1, %and2 + ret i64 %mul +} + +declare void @use(i64) + +define i64 @scalar_mul_bit_x0_y0_uses(i64 %x, i64 %y) { +; CHECK-LABEL: @scalar_mul_bit_x0_y0_uses( ; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1 +; CHECK-NEXT: call void @use(i64 [[AND1]]) ; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], 1 -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[AND1]], [[AND2]] +; CHECK-NEXT: call void @use(i64 [[AND2]]) +; CHECK-NEXT: [[MUL:%.*]] = and i64 [[AND2]], [[X]] ; CHECK-NEXT: ret i64 [[MUL]] ; %and1 = and i64 %x, 1 + call void @use(i64 %and1) %and2 = and i64 %y, 1 + call void @use(i64 %and2) %mul = mul i64 %and1, %and2 ret i64 %mul } @@ -210,9 +228,8 @@ define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) { ; Vector tests define <2 x i64> @vector_mul_bit_x0_y0(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @vector_mul_bit_x0_y0( -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[Y:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw <2 x i64> [[AND1]], [[AND2]] +; CHECK-NEXT: [[MUL:%.*]] = and <2 x i64> [[AND2]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; %and1 = and <2 x i64> %x, From be3fc66f83b08a03ef3bc849ca3b37ff11e09199 Mon Sep 17 00:00:00 2001 From: Jake Egan Date: Mon, 30 May 2022 09:33:10 -0400 Subject: [PATCH 828/908] Revert "[clang][test] mark tests added in ee8524087c78 as unsupported on AIX" The tests pass now on a clean build. This reverts commit 1b34f1e996565bc5e4f2be14b89f881f8fe0f3b9. --- clang/test/Index/index-concept-kind.cpp | 2 +- clang/test/Index/index-concepts.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Index/index-concept-kind.cpp b/clang/test/Index/index-concept-kind.cpp index f33fc6bc5cc0d2..7aaf814f5f989a 100644 --- a/clang/test/Index/index-concept-kind.cpp +++ b/clang/test/Index/index-concept-kind.cpp @@ -1,5 +1,5 @@ // RUN: c-index-test -index-file %s -std=gnu++20 | FileCheck %s -// UNSUPPORTED: aix + template concept LargeType = sizeof(T) > 8; // CHECK: [indexDeclaration]: kind: concept | name: LargeType | USR: c:@CT@LargeType | lang: C | cursor: ConceptDecl=LargeType:[[@LINE-1]]:9 (Definition) | loc: [[@LINE-1]]:9 | semantic-container: [TU] | lexical-container: [TU] | isRedecl: 0 | isDef: 1 | isContainer: 0 | isImplicit: 0 diff --git a/clang/test/Index/index-concepts.cpp b/clang/test/Index/index-concepts.cpp index b8d41e841e6195..d29b9dcf79522b 100644 --- a/clang/test/Index/index-concepts.cpp +++ b/clang/test/Index/index-concepts.cpp @@ -1,5 +1,5 @@ // RUN: c-index-test -test-load-source all %s -std=gnu++20 -fno-delayed-template-parsing | FileCheck %s -// UNSUPPORTED: aix + template struct type_trait { const static bool value = false; From 1f1de06165bb39f66049cdc942151fc6ed193123 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 May 2022 15:33:23 +0200 Subject: [PATCH 829/908] [SimplifyCFG] Add test for invoke of nounwind non-willreturn function (NFC) Test both the case with and without willreturn attribute. --- llvm/test/Transforms/SimplifyCFG/invoke.ll | 43 ++++++++++++++++------ 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/llvm/test/Transforms/SimplifyCFG/invoke.ll b/llvm/test/Transforms/SimplifyCFG/invoke.ll index e3bee6f8ac432d..ef0067f1893390 100644 --- a/llvm/test/Transforms/SimplifyCFG/invoke.ll +++ b/llvm/test/Transforms/SimplifyCFG/invoke.ll @@ -4,8 +4,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 declare i32 @__gxx_personality_v0(...) declare void @__cxa_call_unexpected(i8*) -declare void @purefn() nounwind readnone -declare i32 @read_only() nounwind readonly +declare void @purefn() nounwind readnone willreturn +declare i32 @readonly() nounwind readonly +declare i32 @readonly_willreturn() nounwind readonly willreturn declare i32 @nounwind_fn() nounwind declare i32 @fn() @@ -61,7 +62,7 @@ define i8* @f2_no_null_opt() nounwind uwtable ssp #0 personality i8* bitcast (i3 ; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: filter [0 x i8*] zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i8*, i32 } [[TMP0]], 0 -; CHECK-NEXT: tail call void @__cxa_call_unexpected(i8* [[TMP1]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: tail call void @__cxa_call_unexpected(i8* [[TMP1]]) #[[ATTR7:[0-9]+]] ; CHECK-NEXT: unreachable ; entry: @@ -79,13 +80,13 @@ lpad: unreachable } -define i32 @f3() nounwind uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: @f3( +define i32 @invoke_readonly_may_not_return() nounwind personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @invoke_readonly_may_not_return( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret i32 3 ; entry: - %call = invoke i32 @read_only() + %call = invoke i32 @readonly() to label %invoke.cont unwind label %lpad invoke.cont: @@ -99,14 +100,34 @@ lpad: unreachable } -define i32 @f4() nounwind uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: @f4( +define i32 @invoke_readonly_willreturn() nounwind personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @invoke_readonly_willreturn( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @read_only() +; CHECK-NEXT: ret i32 3 +; +entry: + %call = invoke i32 @readonly_willreturn() + to label %invoke.cont unwind label %lpad + +invoke.cont: + ret i32 3 + +lpad: + %0 = landingpad { i8*, i32 } + filter [0 x i8*] zeroinitializer + %1 = extractvalue { i8*, i32 } %0, 0 + tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind + unreachable +} + +define i32 @invoke_readonly_willreturn_with_used_retval() nounwind personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @invoke_readonly_willreturn_with_used_retval( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @readonly_willreturn() ; CHECK-NEXT: ret i32 [[CALL]] ; entry: - %call = invoke i32 @read_only() + %call = invoke i32 @readonly_willreturn() to label %invoke.cont unwind label %lpad invoke.cont: @@ -136,7 +157,7 @@ define i32 @f5(i1 %cond, i8* %a, i8* %b) personality i8* bitcast (i32 (...)* @__ ; CHECK: lpad: ; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: filter [0 x i8*] zeroinitializer -; CHECK-NEXT: tail call void @__cxa_call_unexpected(i8* [[A:%.*]]) #[[ATTR6]] +; CHECK-NEXT: tail call void @__cxa_call_unexpected(i8* [[A:%.*]]) #[[ATTR7]] ; CHECK-NEXT: unreachable ; entry: From 2e101cca690645d63ae4de1eb7b0e11d322448fd Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 May 2022 15:37:46 +0200 Subject: [PATCH 830/908] [Local] Don't remove invoke of non-willreturn function The code was only checking for memory side-effects, but not for divergence side-effects. Replace this with a generic check. --- llvm/lib/Transforms/Utils/Local.cpp | 2 +- llvm/test/Transforms/SimplifyCFG/invoke.ll | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 5e37f387c19b27..a0609750852bdd 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2366,7 +2366,7 @@ static bool markAliveBlocks(Function &F, Changed = true; } if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { - if (II->use_empty() && II->onlyReadsMemory()) { + if (II->use_empty() && !II->mayHaveSideEffects()) { // jump to the normal destination branch. BasicBlock *NormalDestBB = II->getNormalDest(); BasicBlock *UnwindDestBB = II->getUnwindDest(); diff --git a/llvm/test/Transforms/SimplifyCFG/invoke.ll b/llvm/test/Transforms/SimplifyCFG/invoke.ll index ef0067f1893390..fdbb9e6fbca6fc 100644 --- a/llvm/test/Transforms/SimplifyCFG/invoke.ll +++ b/llvm/test/Transforms/SimplifyCFG/invoke.ll @@ -83,6 +83,7 @@ lpad: define i32 @invoke_readonly_may_not_return() nounwind personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @invoke_readonly_may_not_return( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @readonly() ; CHECK-NEXT: ret i32 3 ; entry: From 590fd54ca1e7d82a3c5f519fe08decaafd9bcde8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 May 2022 16:40:18 +0200 Subject: [PATCH 831/908] [InstCombine] Add tests for inbounds handling in loop invariant GEP fold (NFC) --- .../InstCombine/gep-combine-loop-invariant.ll | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll index 848ce8cb64b2ee..adfe20bb2ad772 100644 --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -254,3 +254,88 @@ for.body.i: ; preds = %for.cond.i %add11.i = add nsw i64 %idx, 1 br label %for.cond.i } + +declare void @use(i8*) + +define void @only_one_inbounds(i8* %ptr, i1 %c, i32 noundef %arg1, i32 noundef %arg2) { +; CHECK-LABEL: @only_one_inbounds( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARG2_EXT:%.*]] = zext i32 [[ARG2:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[ARG1_EXT:%.*]] = zext i32 [[ARG1:%.*]] to i64 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[ARG2_EXT]] +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, i8* [[PTR2]], i64 [[ARG1_EXT]] +; CHECK-NEXT: call void @use(i8* [[PTR3]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %arg2.ext = zext i32 %arg2 to i64 + br label %loop + +loop: + %arg1.ext = zext i32 %arg1 to i64 + %ptr2 = getelementptr inbounds i8, i8* %ptr, i64 %arg1.ext + %ptr3 = getelementptr i8, i8* %ptr2, i64 %arg2.ext + call void @use(i8* %ptr3) + br i1 %c, label %loop, label %exit + +exit: + ret void +} + +define void @both_inbounds_one_neg(i8* %ptr, i1 %c, i32 noundef %arg) { +; CHECK-LABEL: @both_inbounds_one_neg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i32 [[ARG:%.*]] to i64 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 -1 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, i8* [[PTR2]], i64 [[ARG_EXT]] +; CHECK-NEXT: call void @use(i8* nonnull [[PTR3]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %arg.ext = zext i32 %arg to i64 + %ptr2 = getelementptr inbounds i8, i8* %ptr, i64 %arg.ext + %ptr3 = getelementptr inbounds i8, i8* %ptr2, i64 -1 + call void @use(i8* %ptr3) + br i1 %c, label %loop, label %exit + +exit: + ret void +} + +define void @both_inbounds_pos(i8* %ptr, i1 %c, i32 noundef %arg) { +; CHECK-LABEL: @both_inbounds_pos( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i32 [[ARG:%.*]] to i64 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 1 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, i8* [[PTR2]], i64 [[ARG_EXT]] +; CHECK-NEXT: call void @use(i8* nonnull [[PTR3]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %arg.ext = zext i32 %arg to i64 + %ptr2 = getelementptr inbounds i8, i8* %ptr, i64 %arg.ext + %ptr3 = getelementptr inbounds i8, i8* %ptr2, i64 1 + call void @use(i8* %ptr3) + br i1 %c, label %loop, label %exit + +exit: + ret void +} From 2d7bab666f5a71a70da0ed7925fda5ce634d21e5 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 May 2022 16:46:53 +0200 Subject: [PATCH 832/908] [InstCombine] Always create new GEPs when swapping GEPs As the long explanatory comment attests, performing the modification in place is pretty tricky. Drop this unnecessary complexity and always create new instructions. This should be NFC-ish, but can probably cause difference due to worklist order. --- .../InstCombine/InstructionCombining.cpp | 46 ++++--------------- .../InstCombine/gep-combine-loop-invariant.ll | 22 ++++----- 2 files changed, 20 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index cd1e4f409d9e00..ad17f8ab5394a8 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1969,43 +1969,15 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, // invariant: this breaks the dependence between GEPs and allows LICM // to hoist the invariant part out of the loop. if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { - // We have to be careful here. - // We have something like: - // %src = getelementptr , * %base, %idx - // %gep = getelementptr , * %src, %idx2 - // If we just swap idx & idx2 then we could inadvertantly - // change %src from a vector to a scalar, or vice versa. - // Cases: - // 1) %base a scalar & idx a scalar & idx2 a vector - // => Swapping idx & idx2 turns %src into a vector type. - // 2) %base a scalar & idx a vector & idx2 a scalar - // => Swapping idx & idx2 turns %src in a scalar type - // 3) %base, %idx, and %idx2 are scalars - // => %src & %gep are scalars - // => swapping idx & idx2 is safe - // 4) %base a vector - // => %src is a vector - // => swapping idx & idx2 is safe. - auto *SO0 = Src->getOperand(0); - auto *SO0Ty = SO0->getType(); - if (!isa(GEP.getType()) || // case 3 - isa(SO0Ty)) { // case 4 - Src->setOperand(1, GO1); - GEP.setOperand(1, SO1); - return &GEP; - } else { - // Case 1 or 2 - // -- have to recreate %src & %gep - // put NewSrc at same location as %src - Builder.SetInsertPoint(cast(Src)); - Value *NewSrc = - Builder.CreateGEP(GEP.getSourceElementType(), SO0, GO1, - Src->getName(), Src->isInBounds()); - GetElementPtrInst *NewGEP = GetElementPtrInst::Create( - GEP.getSourceElementType(), NewSrc, {SO1}); - NewGEP->setIsInBounds(GEP.isInBounds()); - return NewGEP; - } + // Put NewSrc at same location as %src. + Builder.SetInsertPoint(cast(Src)); + Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(), + Src->getPointerOperand(), GO1, + Src->getName(), Src->isInBounds()); + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + GEP.getSourceElementType(), NewSrc, {SO1}); + NewGEP->setIsInBounds(GEP.isInBounds()); + return NewGEP; } } } diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll index adfe20bb2ad772..5a98df1536ed30 100644 --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -20,9 +20,9 @@ define i32 @foo(i8* nocapture readnone %match, i32 %cur_match, i32 %best_len, i3 ; CHECK-NEXT: br label [[IF_THEN:%.*]] ; CHECK: do.body: ; CHECK-NEXT: [[IDX_EXT:%.*]] = zext i32 [[TMP4:%.*]] to i64 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[WIN]], i64 [[IDX_EXT1]] -; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 -1 -; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR2]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds i8, i8* [[WIN]], i64 [[IDX_EXT1]] +; CHECK-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR1]], i64 -1 +; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR22]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[ADD_PTR3]] to i32* ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP3]], [[SCAN_END]] @@ -164,8 +164,8 @@ define void @PR37005_3(<2 x i8*> %base, i8** %in) { ; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x i8**> [[E4]] to <2 x i64> ; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], ; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], -; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, <2 x i8*> [[BASE:%.*]], i64 80 -; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E5]], <2 x i64> [[SL1]] +; CHECK-NEXT: [[E51:%.*]] = getelementptr inbounds i8, <2 x i8*> [[BASE:%.*]], i64 80 +; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E51]], <2 x i64> [[SL1]] ; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]]) ; CHECK-NEXT: br label [[LOOP]] ; @@ -264,8 +264,8 @@ define void @only_one_inbounds(i8* %ptr, i1 %c, i32 noundef %arg1, i32 noundef % ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[ARG1_EXT:%.*]] = zext i32 [[ARG1:%.*]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[ARG2_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, i8* [[PTR2]], i64 [[ARG1_EXT]] +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[ARG2_EXT]] +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, i8* [[PTR21]], i64 [[ARG1_EXT]] ; CHECK-NEXT: call void @use(i8* [[PTR3]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -292,8 +292,8 @@ define void @both_inbounds_one_neg(i8* %ptr, i1 %c, i32 noundef %arg) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i32 [[ARG:%.*]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 -1 -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, i8* [[PTR2]], i64 [[ARG_EXT]] +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 -1 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, i8* [[PTR21]], i64 [[ARG_EXT]] ; CHECK-NEXT: call void @use(i8* nonnull [[PTR3]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -319,8 +319,8 @@ define void @both_inbounds_pos(i8* %ptr, i1 %c, i32 noundef %arg) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i32 [[ARG:%.*]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 1 -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, i8* [[PTR2]], i64 [[ARG_EXT]] +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 1 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, i8* [[PTR21]], i64 [[ARG_EXT]] ; CHECK-NEXT: call void @use(i8* nonnull [[PTR3]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: From 314abe3922c1dc5050b3eefc09ff794ba4dd807c Mon Sep 17 00:00:00 2001 From: Arnamoy Bhattacharyya Date: Mon, 30 May 2022 10:10:54 -0400 Subject: [PATCH 833/908] [flang][OpenMP] Check for occurrence of multiple list items in nontemporal clause for simd directive This patch implements the following semantic check: A list-item cannot appear in more than one nontemporal clause. Reviewed By: kiranchandramohan, shraiysh Differential Revision: https://reviews.llvm.org/D110270 --- flang/lib/Semantics/check-omp-structure.cpp | 57 +++++++++++------ flang/lib/Semantics/check-omp-structure.h | 1 + flang/test/Semantics/omp-simd-nontemporal.f90 | 63 +++++++++++++++++++ 3 files changed, 103 insertions(+), 18 deletions(-) create mode 100644 flang/test/Semantics/omp-simd-nontemporal.f90 diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 83acaba367ffd2..38ca2e11ad36a9 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -170,6 +170,42 @@ bool OmpStructureChecker::IsCloselyNestedRegion(const OmpDirectiveSet &set) { return false; } +void OmpStructureChecker::CheckMultListItems() { + semantics::UnorderedSymbolSet listVars; + auto checkMultipleOcurrence = [&](const std::list &nameList, + const parser::CharBlock &item, + const std::string &clauseName) { + for (auto const &var : nameList) { + if (llvm::is_contained(listVars, *(var.symbol))) { + context_.Say(item, + "List item '%s' present at multiple %s clauses"_err_en_US, + var.ToString(), clauseName); + } + listVars.insert(*(var.symbol)); + } + }; + + // Aligned clause + auto alignedClauses{FindClauses(llvm::omp::Clause::OMPC_aligned)}; + for (auto itr = alignedClauses.first; itr != alignedClauses.second; ++itr) { + const auto &alignedClause{ + std::get(itr->second->u)}; + const auto &alignedNameList{ + std::get>(alignedClause.v.t)}; + checkMultipleOcurrence(alignedNameList, itr->second->source, "ALIGNED"); + } + + // Nontemporal clause + auto nonTemporalClauses{FindClauses(llvm::omp::Clause::OMPC_nontemporal)}; + for (auto itr = nonTemporalClauses.first; itr != nonTemporalClauses.second; + ++itr) { + const auto &nontempClause{ + std::get(itr->second->u)}; + const auto &nontempNameList{nontempClause.v}; + checkMultipleOcurrence(nontempNameList, itr->second->source, "NONTEMPORAL"); + } +} + bool OmpStructureChecker::HasInvalidWorksharingNesting( const parser::CharBlock &source, const OmpDirectiveSet &set) { // set contains all the invalid closely nested directives @@ -1660,24 +1696,9 @@ void OmpStructureChecker::Leave(const parser::OmpClauseList &) { } } } - // A list-item cannot appear in more than one aligned clause - semantics::UnorderedSymbolSet alignedVars; - auto clauseAll = FindClauses(llvm::omp::Clause::OMPC_aligned); - for (auto itr = clauseAll.first; itr != clauseAll.second; ++itr) { - const auto &alignedClause{ - std::get(itr->second->u)}; - const auto &alignedNameList{ - std::get>(alignedClause.v.t)}; - for (auto const &var : alignedNameList) { - if (alignedVars.count(*(var.symbol)) == 1) { - context_.Say(itr->second->source, - "List item '%s' present at multiple ALIGNED clauses"_err_en_US, - var.ToString()); - break; - } - alignedVars.insert(*(var.symbol)); - } - } + // Sema checks related to presence of multiple list items within the same + // clause + CheckMultListItems(); } // SIMD // 2.7.3 Single Construct Restriction diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 6352b80bfa6049..ccf3b06cf4d72d 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -191,6 +191,7 @@ class OmpStructureChecker } private: + void CheckMultListItems(); bool HasInvalidWorksharingNesting( const parser::CharBlock &, const OmpDirectiveSet &); bool IsCloselyNestedRegion(const OmpDirectiveSet &set); diff --git a/flang/test/Semantics/omp-simd-nontemporal.f90 b/flang/test/Semantics/omp-simd-nontemporal.f90 new file mode 100644 index 00000000000000..a6b7123651a66d --- /dev/null +++ b/flang/test/Semantics/omp-simd-nontemporal.f90 @@ -0,0 +1,63 @@ +! RUN: %python %S/test_errors.py %s %flang -fopenmp + +! OpenMP Version 4.5 +! 2.8.1 simd Construct +! Semantic error for correct test case + +program omp_simd + integer i, j, k + integer, allocatable :: a(:), b(:) + + allocate(a(10)) + allocate(b(10)) + + !ERROR: List item 'a' present at multiple NONTEMPORAL clauses + !$omp simd nontemporal(a, a) + do i = 1, 10 + a(i) = i + end do + !$omp end simd + + !ERROR: List item 'a' present at multiple NONTEMPORAL clauses + !ERROR: List item 'b' present at multiple NONTEMPORAL clauses + !$omp simd nontemporal(a,a) nontemporal(b) nontemporal(b) + do i = 1, 10 + a(i) = i + b(i) = i + end do + !$omp end simd + + !ERROR: List item 'a' present at multiple NONTEMPORAL clauses + !$omp simd nontemporal(a) nontemporal(a) + do i = 1, 10 + a(i) = i + end do + !$omp end simd + + !$omp simd nontemporal(a) nontemporal(b) + do i = 1, 10 + a(i) = i + b(i) = i + end do + !$omp end simd + + !ERROR: List item 'a' present at multiple NONTEMPORAL clauses + !$omp simd nontemporal(a) private(a) nontemporal(a) + do i = 1, 10 + a(i) = i + b(i) = i + end do + !$omp end simd + + !ERROR: List item 'a' present at multiple NONTEMPORAL clauses + !ERROR: List item 'b' present at multiple NONTEMPORAL clauses + !$omp simd nontemporal(a,a,b,b) + do i = 1, 10 + a(i) = i + b(i) = i + end do + !$omp end simd + + print *, a + +end program omp_simd From d2e3cb737417a2e5ffad34f666fa8510e88e8bc2 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Mon, 30 May 2022 11:01:10 -0400 Subject: [PATCH 834/908] [OpenMP][Clang] Fix atomic compare for signed vs. unsigned Without this patch, arguments to the `llvm::OpenMPIRBuilder::AtomicOpValue` initializer are reversed. Reviewed By: ABataev, tianshilei1992 Differential Revision: https://reviews.llvm.org/D126619 --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 +- clang/test/OpenMP/atomic_compare_codegen.cpp | 480 +++++++++--------- .../offloading/atomic-compare-signedness.c | 42 ++ .../atomic/omp-atomic-compare-signedness.c | 40 ++ 4 files changed, 324 insertions(+), 242 deletions(-) create mode 100644 openmp/libomptarget/test/offloading/atomic-compare-signedness.c create mode 100644 openmp/runtime/test/atomic/omp-atomic-compare-signedness.c diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 61e3661e59be0d..f78443fd20bcd1 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6188,8 +6188,8 @@ static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF, llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ XAddr.getPointer(), XAddr.getElementType(), - X->getType().isVolatileQualified(), - X->getType()->hasSignedIntegerRepresentation()}; + X->getType()->hasSignedIntegerRepresentation(), + X->getType().isVolatileQualified()}; CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( CGF.Builder, XOpVal, EVal, DVal, AO, Op, IsXBinopExpr)); diff --git a/clang/test/OpenMP/atomic_compare_codegen.cpp b/clang/test/OpenMP/atomic_compare_codegen.cpp index cc36eae2cd50fd..825832399e3c49 100644 --- a/clang/test/OpenMP/atomic_compare_codegen.cpp +++ b/clang/test/OpenMP/atomic_compare_codegen.cpp @@ -1979,21 +1979,21 @@ void foo(void) { // CHECK-NEXT: [[ULLE:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[ULLD:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP0]] monotonic, align 1 +// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP0]] monotonic, align 1 // CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP2]] monotonic, align 1 +// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP2]] monotonic, align 1 // CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP4]] monotonic, align 1 +// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP4]] monotonic, align 1 // CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP6]] monotonic, align 1 +// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP6]] monotonic, align 1 // CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP8]] monotonic, align 1 +// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP8]] monotonic, align 1 // CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP10]] monotonic, align 1 +// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP10]] monotonic, align 1 // CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP13:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP12]] monotonic, align 1 +// CHECK-NEXT: [[TMP13:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP12]] monotonic, align 1 // CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP15:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP14]] monotonic, align 1 +// CHECK-NEXT: [[TMP15:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP14]] monotonic, align 1 // CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[CE]], align 1 // CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[CD]], align 1 // CHECK-NEXT: [[TMP18:%.*]] = cmpxchg i8* [[CX]], i8 [[TMP16]], i8 [[TMP17]] monotonic monotonic, align 1 @@ -2035,28 +2035,28 @@ void foo(void) { // CHECK-NEXT: [[TMP54:%.*]] = load i8, i8* [[UCD]], align 1 // CHECK-NEXT: [[TMP55:%.*]] = cmpxchg i8* [[UCX]], i8 [[TMP53]], i8 [[TMP54]] monotonic monotonic, align 1 // CHECK-NEXT: [[TMP56:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP57:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP56]] acq_rel, align 1 +// CHECK-NEXT: [[TMP57:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP56]] acq_rel, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK-NEXT: [[TMP58:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP59:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP58]] acq_rel, align 1 +// CHECK-NEXT: [[TMP59:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP58]] acq_rel, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP60:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP61:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP60]] acq_rel, align 1 +// CHECK-NEXT: [[TMP61:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP60]] acq_rel, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP62:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP63:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP62]] acq_rel, align 1 +// CHECK-NEXT: [[TMP63:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP62]] acq_rel, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP64:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP65:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP64]] acq_rel, align 1 +// CHECK-NEXT: [[TMP65:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP64]] acq_rel, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP66:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP67:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP66]] acq_rel, align 1 +// CHECK-NEXT: [[TMP67:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP66]] acq_rel, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP68:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP69:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP68]] acq_rel, align 1 +// CHECK-NEXT: [[TMP69:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP68]] acq_rel, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP70:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP71:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP70]] acq_rel, align 1 +// CHECK-NEXT: [[TMP71:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP70]] acq_rel, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP72:%.*]] = load i8, i8* [[CE]], align 1 // CHECK-NEXT: [[TMP73:%.*]] = load i8, i8* [[CD]], align 1 @@ -2115,21 +2115,21 @@ void foo(void) { // CHECK-NEXT: [[TMP111:%.*]] = cmpxchg i8* [[UCX]], i8 [[TMP109]], i8 [[TMP110]] acq_rel acquire, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP112:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP113:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP112]] acquire, align 1 +// CHECK-NEXT: [[TMP113:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP112]] acquire, align 1 // CHECK-NEXT: [[TMP114:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP115:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP114]] acquire, align 1 +// CHECK-NEXT: [[TMP115:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP114]] acquire, align 1 // CHECK-NEXT: [[TMP116:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP117:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP116]] acquire, align 1 +// CHECK-NEXT: [[TMP117:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP116]] acquire, align 1 // CHECK-NEXT: [[TMP118:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP119:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP118]] acquire, align 1 +// CHECK-NEXT: [[TMP119:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP118]] acquire, align 1 // CHECK-NEXT: [[TMP120:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP121:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP120]] acquire, align 1 +// CHECK-NEXT: [[TMP121:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP120]] acquire, align 1 // CHECK-NEXT: [[TMP122:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP123:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP122]] acquire, align 1 +// CHECK-NEXT: [[TMP123:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP122]] acquire, align 1 // CHECK-NEXT: [[TMP124:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP125:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP124]] acquire, align 1 +// CHECK-NEXT: [[TMP125:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP124]] acquire, align 1 // CHECK-NEXT: [[TMP126:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP127:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP126]] acquire, align 1 +// CHECK-NEXT: [[TMP127:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP126]] acquire, align 1 // CHECK-NEXT: [[TMP128:%.*]] = load i8, i8* [[CE]], align 1 // CHECK-NEXT: [[TMP129:%.*]] = load i8, i8* [[CD]], align 1 // CHECK-NEXT: [[TMP130:%.*]] = cmpxchg i8* [[CX]], i8 [[TMP128]], i8 [[TMP129]] acquire acquire, align 1 @@ -2171,21 +2171,21 @@ void foo(void) { // CHECK-NEXT: [[TMP166:%.*]] = load i8, i8* [[UCD]], align 1 // CHECK-NEXT: [[TMP167:%.*]] = cmpxchg i8* [[UCX]], i8 [[TMP165]], i8 [[TMP166]] acquire acquire, align 1 // CHECK-NEXT: [[TMP168:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP169:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP168]] monotonic, align 1 +// CHECK-NEXT: [[TMP169:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP168]] monotonic, align 1 // CHECK-NEXT: [[TMP170:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP171:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP170]] monotonic, align 1 +// CHECK-NEXT: [[TMP171:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP170]] monotonic, align 1 // CHECK-NEXT: [[TMP172:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP173:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP172]] monotonic, align 1 +// CHECK-NEXT: [[TMP173:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP172]] monotonic, align 1 // CHECK-NEXT: [[TMP174:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP175:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP174]] monotonic, align 1 +// CHECK-NEXT: [[TMP175:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP174]] monotonic, align 1 // CHECK-NEXT: [[TMP176:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP177:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP176]] monotonic, align 1 +// CHECK-NEXT: [[TMP177:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP176]] monotonic, align 1 // CHECK-NEXT: [[TMP178:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP179:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP178]] monotonic, align 1 +// CHECK-NEXT: [[TMP179:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP178]] monotonic, align 1 // CHECK-NEXT: [[TMP180:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP181:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP180]] monotonic, align 1 +// CHECK-NEXT: [[TMP181:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP180]] monotonic, align 1 // CHECK-NEXT: [[TMP182:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP183:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP182]] monotonic, align 1 +// CHECK-NEXT: [[TMP183:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP182]] monotonic, align 1 // CHECK-NEXT: [[TMP184:%.*]] = load i8, i8* [[CE]], align 1 // CHECK-NEXT: [[TMP185:%.*]] = load i8, i8* [[CD]], align 1 // CHECK-NEXT: [[TMP186:%.*]] = cmpxchg i8* [[CX]], i8 [[TMP184]], i8 [[TMP185]] monotonic monotonic, align 1 @@ -2227,28 +2227,28 @@ void foo(void) { // CHECK-NEXT: [[TMP222:%.*]] = load i8, i8* [[UCD]], align 1 // CHECK-NEXT: [[TMP223:%.*]] = cmpxchg i8* [[UCX]], i8 [[TMP221]], i8 [[TMP222]] monotonic monotonic, align 1 // CHECK-NEXT: [[TMP224:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP225:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP224]] release, align 1 +// CHECK-NEXT: [[TMP225:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP224]] release, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP226:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP227:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP226]] release, align 1 +// CHECK-NEXT: [[TMP227:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP226]] release, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP228:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP229:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP228]] release, align 1 +// CHECK-NEXT: [[TMP229:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP228]] release, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP230:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP231:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP230]] release, align 1 +// CHECK-NEXT: [[TMP231:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP230]] release, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP232:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP233:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP232]] release, align 1 +// CHECK-NEXT: [[TMP233:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP232]] release, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP234:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP235:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP234]] release, align 1 +// CHECK-NEXT: [[TMP235:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP234]] release, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP236:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP237:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP236]] release, align 1 +// CHECK-NEXT: [[TMP237:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP236]] release, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP238:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP239:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP238]] release, align 1 +// CHECK-NEXT: [[TMP239:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP238]] release, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP240:%.*]] = load i8, i8* [[CE]], align 1 // CHECK-NEXT: [[TMP241:%.*]] = load i8, i8* [[CD]], align 1 @@ -2307,28 +2307,28 @@ void foo(void) { // CHECK-NEXT: [[TMP279:%.*]] = cmpxchg i8* [[UCX]], i8 [[TMP277]], i8 [[TMP278]] release monotonic, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP280:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP281:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP280]] seq_cst, align 1 +// CHECK-NEXT: [[TMP281:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP280]] seq_cst, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP282:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP283:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP282]] seq_cst, align 1 +// CHECK-NEXT: [[TMP283:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP282]] seq_cst, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP284:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP285:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP284]] seq_cst, align 1 +// CHECK-NEXT: [[TMP285:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP284]] seq_cst, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP286:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP287:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP286]] seq_cst, align 1 +// CHECK-NEXT: [[TMP287:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP286]] seq_cst, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP288:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP289:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP288]] seq_cst, align 1 +// CHECK-NEXT: [[TMP289:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP288]] seq_cst, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP290:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP291:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP290]] seq_cst, align 1 +// CHECK-NEXT: [[TMP291:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP290]] seq_cst, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP292:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP293:%.*]] = atomicrmw umax i8* [[CX]], i8 [[TMP292]] seq_cst, align 1 +// CHECK-NEXT: [[TMP293:%.*]] = atomicrmw max i8* [[CX]], i8 [[TMP292]] seq_cst, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP294:%.*]] = load i8, i8* [[CE]], align 1 -// CHECK-NEXT: [[TMP295:%.*]] = atomicrmw umin i8* [[CX]], i8 [[TMP294]] seq_cst, align 1 +// CHECK-NEXT: [[TMP295:%.*]] = atomicrmw min i8* [[CX]], i8 [[TMP294]] seq_cst, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP296:%.*]] = load i8, i8* [[CE]], align 1 // CHECK-NEXT: [[TMP297:%.*]] = load i8, i8* [[CD]], align 1 @@ -2387,21 +2387,21 @@ void foo(void) { // CHECK-NEXT: [[TMP335:%.*]] = cmpxchg i8* [[UCX]], i8 [[TMP333]], i8 [[TMP334]] seq_cst seq_cst, align 1 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP336:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP337:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP336]] monotonic, align 2 +// CHECK-NEXT: [[TMP337:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP336]] monotonic, align 2 // CHECK-NEXT: [[TMP338:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP339:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP338]] monotonic, align 2 +// CHECK-NEXT: [[TMP339:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP338]] monotonic, align 2 // CHECK-NEXT: [[TMP340:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP341:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP340]] monotonic, align 2 +// CHECK-NEXT: [[TMP341:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP340]] monotonic, align 2 // CHECK-NEXT: [[TMP342:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP343:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP342]] monotonic, align 2 +// CHECK-NEXT: [[TMP343:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP342]] monotonic, align 2 // CHECK-NEXT: [[TMP344:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP345:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP344]] monotonic, align 2 +// CHECK-NEXT: [[TMP345:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP344]] monotonic, align 2 // CHECK-NEXT: [[TMP346:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP347:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP346]] monotonic, align 2 +// CHECK-NEXT: [[TMP347:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP346]] monotonic, align 2 // CHECK-NEXT: [[TMP348:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP349:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP348]] monotonic, align 2 +// CHECK-NEXT: [[TMP349:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP348]] monotonic, align 2 // CHECK-NEXT: [[TMP350:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP351:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP350]] monotonic, align 2 +// CHECK-NEXT: [[TMP351:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP350]] monotonic, align 2 // CHECK-NEXT: [[TMP352:%.*]] = load i16, i16* [[SE]], align 2 // CHECK-NEXT: [[TMP353:%.*]] = load i16, i16* [[SD]], align 2 // CHECK-NEXT: [[TMP354:%.*]] = cmpxchg i16* [[SX]], i16 [[TMP352]], i16 [[TMP353]] monotonic monotonic, align 2 @@ -2443,28 +2443,28 @@ void foo(void) { // CHECK-NEXT: [[TMP390:%.*]] = load i16, i16* [[USD]], align 2 // CHECK-NEXT: [[TMP391:%.*]] = cmpxchg i16* [[USX]], i16 [[TMP389]], i16 [[TMP390]] monotonic monotonic, align 2 // CHECK-NEXT: [[TMP392:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP393:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP392]] acq_rel, align 2 +// CHECK-NEXT: [[TMP393:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP392]] acq_rel, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP394:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP395:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP394]] acq_rel, align 2 +// CHECK-NEXT: [[TMP395:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP394]] acq_rel, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP396:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP397:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP396]] acq_rel, align 2 +// CHECK-NEXT: [[TMP397:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP396]] acq_rel, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP398:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP399:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP398]] acq_rel, align 2 +// CHECK-NEXT: [[TMP399:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP398]] acq_rel, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP400:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP401:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP400]] acq_rel, align 2 +// CHECK-NEXT: [[TMP401:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP400]] acq_rel, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP402:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP403:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP402]] acq_rel, align 2 +// CHECK-NEXT: [[TMP403:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP402]] acq_rel, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP404:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP405:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP404]] acq_rel, align 2 +// CHECK-NEXT: [[TMP405:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP404]] acq_rel, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP406:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP407:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP406]] acq_rel, align 2 +// CHECK-NEXT: [[TMP407:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP406]] acq_rel, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP408:%.*]] = load i16, i16* [[SE]], align 2 // CHECK-NEXT: [[TMP409:%.*]] = load i16, i16* [[SD]], align 2 @@ -2523,21 +2523,21 @@ void foo(void) { // CHECK-NEXT: [[TMP447:%.*]] = cmpxchg i16* [[USX]], i16 [[TMP445]], i16 [[TMP446]] acq_rel acquire, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP448:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP449:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP448]] acquire, align 2 +// CHECK-NEXT: [[TMP449:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP448]] acquire, align 2 // CHECK-NEXT: [[TMP450:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP451:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP450]] acquire, align 2 +// CHECK-NEXT: [[TMP451:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP450]] acquire, align 2 // CHECK-NEXT: [[TMP452:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP453:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP452]] acquire, align 2 +// CHECK-NEXT: [[TMP453:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP452]] acquire, align 2 // CHECK-NEXT: [[TMP454:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP455:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP454]] acquire, align 2 +// CHECK-NEXT: [[TMP455:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP454]] acquire, align 2 // CHECK-NEXT: [[TMP456:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP457:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP456]] acquire, align 2 +// CHECK-NEXT: [[TMP457:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP456]] acquire, align 2 // CHECK-NEXT: [[TMP458:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP459:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP458]] acquire, align 2 +// CHECK-NEXT: [[TMP459:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP458]] acquire, align 2 // CHECK-NEXT: [[TMP460:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP461:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP460]] acquire, align 2 +// CHECK-NEXT: [[TMP461:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP460]] acquire, align 2 // CHECK-NEXT: [[TMP462:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP463:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP462]] acquire, align 2 +// CHECK-NEXT: [[TMP463:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP462]] acquire, align 2 // CHECK-NEXT: [[TMP464:%.*]] = load i16, i16* [[SE]], align 2 // CHECK-NEXT: [[TMP465:%.*]] = load i16, i16* [[SD]], align 2 // CHECK-NEXT: [[TMP466:%.*]] = cmpxchg i16* [[SX]], i16 [[TMP464]], i16 [[TMP465]] acquire acquire, align 2 @@ -2579,21 +2579,21 @@ void foo(void) { // CHECK-NEXT: [[TMP502:%.*]] = load i16, i16* [[USD]], align 2 // CHECK-NEXT: [[TMP503:%.*]] = cmpxchg i16* [[USX]], i16 [[TMP501]], i16 [[TMP502]] acquire acquire, align 2 // CHECK-NEXT: [[TMP504:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP505:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP504]] monotonic, align 2 +// CHECK-NEXT: [[TMP505:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP504]] monotonic, align 2 // CHECK-NEXT: [[TMP506:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP507:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP506]] monotonic, align 2 +// CHECK-NEXT: [[TMP507:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP506]] monotonic, align 2 // CHECK-NEXT: [[TMP508:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP509:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP508]] monotonic, align 2 +// CHECK-NEXT: [[TMP509:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP508]] monotonic, align 2 // CHECK-NEXT: [[TMP510:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP511:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP510]] monotonic, align 2 +// CHECK-NEXT: [[TMP511:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP510]] monotonic, align 2 // CHECK-NEXT: [[TMP512:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP513:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP512]] monotonic, align 2 +// CHECK-NEXT: [[TMP513:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP512]] monotonic, align 2 // CHECK-NEXT: [[TMP514:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP515:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP514]] monotonic, align 2 +// CHECK-NEXT: [[TMP515:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP514]] monotonic, align 2 // CHECK-NEXT: [[TMP516:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP517:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP516]] monotonic, align 2 +// CHECK-NEXT: [[TMP517:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP516]] monotonic, align 2 // CHECK-NEXT: [[TMP518:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP519:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP518]] monotonic, align 2 +// CHECK-NEXT: [[TMP519:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP518]] monotonic, align 2 // CHECK-NEXT: [[TMP520:%.*]] = load i16, i16* [[SE]], align 2 // CHECK-NEXT: [[TMP521:%.*]] = load i16, i16* [[SD]], align 2 // CHECK-NEXT: [[TMP522:%.*]] = cmpxchg i16* [[SX]], i16 [[TMP520]], i16 [[TMP521]] monotonic monotonic, align 2 @@ -2635,28 +2635,28 @@ void foo(void) { // CHECK-NEXT: [[TMP558:%.*]] = load i16, i16* [[USD]], align 2 // CHECK-NEXT: [[TMP559:%.*]] = cmpxchg i16* [[USX]], i16 [[TMP557]], i16 [[TMP558]] monotonic monotonic, align 2 // CHECK-NEXT: [[TMP560:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP561:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP560]] release, align 2 +// CHECK-NEXT: [[TMP561:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP560]] release, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP562:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP563:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP562]] release, align 2 +// CHECK-NEXT: [[TMP563:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP562]] release, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP564:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP565:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP564]] release, align 2 +// CHECK-NEXT: [[TMP565:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP564]] release, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP566:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP567:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP566]] release, align 2 +// CHECK-NEXT: [[TMP567:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP566]] release, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP568:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP569:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP568]] release, align 2 +// CHECK-NEXT: [[TMP569:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP568]] release, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP570:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP571:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP570]] release, align 2 +// CHECK-NEXT: [[TMP571:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP570]] release, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP572:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP573:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP572]] release, align 2 +// CHECK-NEXT: [[TMP573:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP572]] release, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP574:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP575:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP574]] release, align 2 +// CHECK-NEXT: [[TMP575:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP574]] release, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP576:%.*]] = load i16, i16* [[SE]], align 2 // CHECK-NEXT: [[TMP577:%.*]] = load i16, i16* [[SD]], align 2 @@ -2715,28 +2715,28 @@ void foo(void) { // CHECK-NEXT: [[TMP615:%.*]] = cmpxchg i16* [[USX]], i16 [[TMP613]], i16 [[TMP614]] release monotonic, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP616:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP617:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP616]] seq_cst, align 2 +// CHECK-NEXT: [[TMP617:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP616]] seq_cst, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP618:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP619:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP618]] seq_cst, align 2 +// CHECK-NEXT: [[TMP619:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP618]] seq_cst, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP620:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP621:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP620]] seq_cst, align 2 +// CHECK-NEXT: [[TMP621:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP620]] seq_cst, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP622:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP623:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP622]] seq_cst, align 2 +// CHECK-NEXT: [[TMP623:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP622]] seq_cst, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP624:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP625:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP624]] seq_cst, align 2 +// CHECK-NEXT: [[TMP625:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP624]] seq_cst, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP626:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP627:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP626]] seq_cst, align 2 +// CHECK-NEXT: [[TMP627:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP626]] seq_cst, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP628:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP629:%.*]] = atomicrmw umax i16* [[SX]], i16 [[TMP628]] seq_cst, align 2 +// CHECK-NEXT: [[TMP629:%.*]] = atomicrmw max i16* [[SX]], i16 [[TMP628]] seq_cst, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP630:%.*]] = load i16, i16* [[SE]], align 2 -// CHECK-NEXT: [[TMP631:%.*]] = atomicrmw umin i16* [[SX]], i16 [[TMP630]] seq_cst, align 2 +// CHECK-NEXT: [[TMP631:%.*]] = atomicrmw min i16* [[SX]], i16 [[TMP630]] seq_cst, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP632:%.*]] = load i16, i16* [[SE]], align 2 // CHECK-NEXT: [[TMP633:%.*]] = load i16, i16* [[SD]], align 2 @@ -2795,21 +2795,21 @@ void foo(void) { // CHECK-NEXT: [[TMP671:%.*]] = cmpxchg i16* [[USX]], i16 [[TMP669]], i16 [[TMP670]] seq_cst seq_cst, align 2 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP672:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP673:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP672]] monotonic, align 4 +// CHECK-NEXT: [[TMP673:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP672]] monotonic, align 4 // CHECK-NEXT: [[TMP674:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP675:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP674]] monotonic, align 4 +// CHECK-NEXT: [[TMP675:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP674]] monotonic, align 4 // CHECK-NEXT: [[TMP676:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP677:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP676]] monotonic, align 4 +// CHECK-NEXT: [[TMP677:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP676]] monotonic, align 4 // CHECK-NEXT: [[TMP678:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP679:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP678]] monotonic, align 4 +// CHECK-NEXT: [[TMP679:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP678]] monotonic, align 4 // CHECK-NEXT: [[TMP680:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP681:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP680]] monotonic, align 4 +// CHECK-NEXT: [[TMP681:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP680]] monotonic, align 4 // CHECK-NEXT: [[TMP682:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP683:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP682]] monotonic, align 4 +// CHECK-NEXT: [[TMP683:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP682]] monotonic, align 4 // CHECK-NEXT: [[TMP684:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP685:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP684]] monotonic, align 4 +// CHECK-NEXT: [[TMP685:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP684]] monotonic, align 4 // CHECK-NEXT: [[TMP686:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP687:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP686]] monotonic, align 4 +// CHECK-NEXT: [[TMP687:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP686]] monotonic, align 4 // CHECK-NEXT: [[TMP688:%.*]] = load i32, i32* [[IE]], align 4 // CHECK-NEXT: [[TMP689:%.*]] = load i32, i32* [[ID]], align 4 // CHECK-NEXT: [[TMP690:%.*]] = cmpxchg i32* [[IX]], i32 [[TMP688]], i32 [[TMP689]] monotonic monotonic, align 4 @@ -2851,28 +2851,28 @@ void foo(void) { // CHECK-NEXT: [[TMP726:%.*]] = load i32, i32* [[UID]], align 4 // CHECK-NEXT: [[TMP727:%.*]] = cmpxchg i32* [[UIX]], i32 [[TMP725]], i32 [[TMP726]] monotonic monotonic, align 4 // CHECK-NEXT: [[TMP728:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP729:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP728]] acq_rel, align 4 +// CHECK-NEXT: [[TMP729:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP728]] acq_rel, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP730:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP731:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP730]] acq_rel, align 4 +// CHECK-NEXT: [[TMP731:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP730]] acq_rel, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP732:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP733:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP732]] acq_rel, align 4 +// CHECK-NEXT: [[TMP733:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP732]] acq_rel, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP734:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP735:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP734]] acq_rel, align 4 +// CHECK-NEXT: [[TMP735:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP734]] acq_rel, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP736:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP737:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP736]] acq_rel, align 4 +// CHECK-NEXT: [[TMP737:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP736]] acq_rel, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP738:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP739:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP738]] acq_rel, align 4 +// CHECK-NEXT: [[TMP739:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP738]] acq_rel, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP740:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP741:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP740]] acq_rel, align 4 +// CHECK-NEXT: [[TMP741:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP740]] acq_rel, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP742:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP743:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP742]] acq_rel, align 4 +// CHECK-NEXT: [[TMP743:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP742]] acq_rel, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP744:%.*]] = load i32, i32* [[IE]], align 4 // CHECK-NEXT: [[TMP745:%.*]] = load i32, i32* [[ID]], align 4 @@ -2931,21 +2931,21 @@ void foo(void) { // CHECK-NEXT: [[TMP783:%.*]] = cmpxchg i32* [[UIX]], i32 [[TMP781]], i32 [[TMP782]] acq_rel acquire, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP784:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP785:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP784]] acquire, align 4 +// CHECK-NEXT: [[TMP785:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP784]] acquire, align 4 // CHECK-NEXT: [[TMP786:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP787:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP786]] acquire, align 4 +// CHECK-NEXT: [[TMP787:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP786]] acquire, align 4 // CHECK-NEXT: [[TMP788:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP789:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP788]] acquire, align 4 +// CHECK-NEXT: [[TMP789:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP788]] acquire, align 4 // CHECK-NEXT: [[TMP790:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP791:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP790]] acquire, align 4 +// CHECK-NEXT: [[TMP791:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP790]] acquire, align 4 // CHECK-NEXT: [[TMP792:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP793:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP792]] acquire, align 4 +// CHECK-NEXT: [[TMP793:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP792]] acquire, align 4 // CHECK-NEXT: [[TMP794:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP795:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP794]] acquire, align 4 +// CHECK-NEXT: [[TMP795:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP794]] acquire, align 4 // CHECK-NEXT: [[TMP796:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP797:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP796]] acquire, align 4 +// CHECK-NEXT: [[TMP797:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP796]] acquire, align 4 // CHECK-NEXT: [[TMP798:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP799:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP798]] acquire, align 4 +// CHECK-NEXT: [[TMP799:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP798]] acquire, align 4 // CHECK-NEXT: [[TMP800:%.*]] = load i32, i32* [[IE]], align 4 // CHECK-NEXT: [[TMP801:%.*]] = load i32, i32* [[ID]], align 4 // CHECK-NEXT: [[TMP802:%.*]] = cmpxchg i32* [[IX]], i32 [[TMP800]], i32 [[TMP801]] acquire acquire, align 4 @@ -2987,21 +2987,21 @@ void foo(void) { // CHECK-NEXT: [[TMP838:%.*]] = load i32, i32* [[UID]], align 4 // CHECK-NEXT: [[TMP839:%.*]] = cmpxchg i32* [[UIX]], i32 [[TMP837]], i32 [[TMP838]] acquire acquire, align 4 // CHECK-NEXT: [[TMP840:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP841:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP840]] monotonic, align 4 +// CHECK-NEXT: [[TMP841:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP840]] monotonic, align 4 // CHECK-NEXT: [[TMP842:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP843:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP842]] monotonic, align 4 +// CHECK-NEXT: [[TMP843:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP842]] monotonic, align 4 // CHECK-NEXT: [[TMP844:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP845:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP844]] monotonic, align 4 +// CHECK-NEXT: [[TMP845:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP844]] monotonic, align 4 // CHECK-NEXT: [[TMP846:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP847:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP846]] monotonic, align 4 +// CHECK-NEXT: [[TMP847:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP846]] monotonic, align 4 // CHECK-NEXT: [[TMP848:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP849:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP848]] monotonic, align 4 +// CHECK-NEXT: [[TMP849:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP848]] monotonic, align 4 // CHECK-NEXT: [[TMP850:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP851:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP850]] monotonic, align 4 +// CHECK-NEXT: [[TMP851:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP850]] monotonic, align 4 // CHECK-NEXT: [[TMP852:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP853:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP852]] monotonic, align 4 +// CHECK-NEXT: [[TMP853:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP852]] monotonic, align 4 // CHECK-NEXT: [[TMP854:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP855:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP854]] monotonic, align 4 +// CHECK-NEXT: [[TMP855:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP854]] monotonic, align 4 // CHECK-NEXT: [[TMP856:%.*]] = load i32, i32* [[IE]], align 4 // CHECK-NEXT: [[TMP857:%.*]] = load i32, i32* [[ID]], align 4 // CHECK-NEXT: [[TMP858:%.*]] = cmpxchg i32* [[IX]], i32 [[TMP856]], i32 [[TMP857]] monotonic monotonic, align 4 @@ -3043,28 +3043,28 @@ void foo(void) { // CHECK-NEXT: [[TMP894:%.*]] = load i32, i32* [[UID]], align 4 // CHECK-NEXT: [[TMP895:%.*]] = cmpxchg i32* [[UIX]], i32 [[TMP893]], i32 [[TMP894]] monotonic monotonic, align 4 // CHECK-NEXT: [[TMP896:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP897:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP896]] release, align 4 +// CHECK-NEXT: [[TMP897:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP896]] release, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP898:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP899:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP898]] release, align 4 +// CHECK-NEXT: [[TMP899:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP898]] release, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP900:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP901:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP900]] release, align 4 +// CHECK-NEXT: [[TMP901:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP900]] release, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP902:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP903:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP902]] release, align 4 +// CHECK-NEXT: [[TMP903:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP902]] release, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP904:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP905:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP904]] release, align 4 +// CHECK-NEXT: [[TMP905:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP904]] release, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP906:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP907:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP906]] release, align 4 +// CHECK-NEXT: [[TMP907:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP906]] release, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP908:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP909:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP908]] release, align 4 +// CHECK-NEXT: [[TMP909:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP908]] release, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP910:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP911:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP910]] release, align 4 +// CHECK-NEXT: [[TMP911:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP910]] release, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP912:%.*]] = load i32, i32* [[IE]], align 4 // CHECK-NEXT: [[TMP913:%.*]] = load i32, i32* [[ID]], align 4 @@ -3123,28 +3123,28 @@ void foo(void) { // CHECK-NEXT: [[TMP951:%.*]] = cmpxchg i32* [[UIX]], i32 [[TMP949]], i32 [[TMP950]] release monotonic, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP952:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP953:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP952]] seq_cst, align 4 +// CHECK-NEXT: [[TMP953:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP952]] seq_cst, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP954:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP955:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP954]] seq_cst, align 4 +// CHECK-NEXT: [[TMP955:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP954]] seq_cst, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP956:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP957:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP956]] seq_cst, align 4 +// CHECK-NEXT: [[TMP957:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP956]] seq_cst, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP958:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP959:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP958]] seq_cst, align 4 +// CHECK-NEXT: [[TMP959:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP958]] seq_cst, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP960:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP961:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP960]] seq_cst, align 4 +// CHECK-NEXT: [[TMP961:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP960]] seq_cst, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP962:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP963:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP962]] seq_cst, align 4 +// CHECK-NEXT: [[TMP963:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP962]] seq_cst, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP964:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP965:%.*]] = atomicrmw umax i32* [[IX]], i32 [[TMP964]] seq_cst, align 4 +// CHECK-NEXT: [[TMP965:%.*]] = atomicrmw max i32* [[IX]], i32 [[TMP964]] seq_cst, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP966:%.*]] = load i32, i32* [[IE]], align 4 -// CHECK-NEXT: [[TMP967:%.*]] = atomicrmw umin i32* [[IX]], i32 [[TMP966]] seq_cst, align 4 +// CHECK-NEXT: [[TMP967:%.*]] = atomicrmw min i32* [[IX]], i32 [[TMP966]] seq_cst, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP968:%.*]] = load i32, i32* [[IE]], align 4 // CHECK-NEXT: [[TMP969:%.*]] = load i32, i32* [[ID]], align 4 @@ -3203,21 +3203,21 @@ void foo(void) { // CHECK-NEXT: [[TMP1007:%.*]] = cmpxchg i32* [[UIX]], i32 [[TMP1005]], i32 [[TMP1006]] seq_cst seq_cst, align 4 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1008:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1009:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1008]] monotonic, align 8 +// CHECK-NEXT: [[TMP1009:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1008]] monotonic, align 8 // CHECK-NEXT: [[TMP1010:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1011:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1010]] monotonic, align 8 +// CHECK-NEXT: [[TMP1011:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1010]] monotonic, align 8 // CHECK-NEXT: [[TMP1012:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1013:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1012]] monotonic, align 8 +// CHECK-NEXT: [[TMP1013:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1012]] monotonic, align 8 // CHECK-NEXT: [[TMP1014:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1015:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1014]] monotonic, align 8 +// CHECK-NEXT: [[TMP1015:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1014]] monotonic, align 8 // CHECK-NEXT: [[TMP1016:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1017:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1016]] monotonic, align 8 +// CHECK-NEXT: [[TMP1017:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1016]] monotonic, align 8 // CHECK-NEXT: [[TMP1018:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1019:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1018]] monotonic, align 8 +// CHECK-NEXT: [[TMP1019:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1018]] monotonic, align 8 // CHECK-NEXT: [[TMP1020:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1021:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1020]] monotonic, align 8 +// CHECK-NEXT: [[TMP1021:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1020]] monotonic, align 8 // CHECK-NEXT: [[TMP1022:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1023:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1022]] monotonic, align 8 +// CHECK-NEXT: [[TMP1023:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1022]] monotonic, align 8 // CHECK-NEXT: [[TMP1024:%.*]] = load i64, i64* [[LE]], align 8 // CHECK-NEXT: [[TMP1025:%.*]] = load i64, i64* [[LD]], align 8 // CHECK-NEXT: [[TMP1026:%.*]] = cmpxchg i64* [[LX]], i64 [[TMP1024]], i64 [[TMP1025]] monotonic monotonic, align 8 @@ -3259,28 +3259,28 @@ void foo(void) { // CHECK-NEXT: [[TMP1062:%.*]] = load i64, i64* [[ULD]], align 8 // CHECK-NEXT: [[TMP1063:%.*]] = cmpxchg i64* [[ULX]], i64 [[TMP1061]], i64 [[TMP1062]] monotonic monotonic, align 8 // CHECK-NEXT: [[TMP1064:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1065:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1064]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1065:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1064]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1066:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1067:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1066]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1067:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1066]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1068:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1069:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1068]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1069:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1068]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1070:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1071:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1070]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1071:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1070]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1072:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1073:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1072]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1073:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1072]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1074:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1075:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1074]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1075:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1074]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1076:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1077:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1076]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1077:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1076]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1078:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1079:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1078]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1079:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1078]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1080:%.*]] = load i64, i64* [[LE]], align 8 // CHECK-NEXT: [[TMP1081:%.*]] = load i64, i64* [[LD]], align 8 @@ -3339,21 +3339,21 @@ void foo(void) { // CHECK-NEXT: [[TMP1119:%.*]] = cmpxchg i64* [[ULX]], i64 [[TMP1117]], i64 [[TMP1118]] acq_rel acquire, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1120:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1121:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1120]] acquire, align 8 +// CHECK-NEXT: [[TMP1121:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1120]] acquire, align 8 // CHECK-NEXT: [[TMP1122:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1123:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1122]] acquire, align 8 +// CHECK-NEXT: [[TMP1123:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1122]] acquire, align 8 // CHECK-NEXT: [[TMP1124:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1125:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1124]] acquire, align 8 +// CHECK-NEXT: [[TMP1125:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1124]] acquire, align 8 // CHECK-NEXT: [[TMP1126:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1127:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1126]] acquire, align 8 +// CHECK-NEXT: [[TMP1127:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1126]] acquire, align 8 // CHECK-NEXT: [[TMP1128:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1129:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1128]] acquire, align 8 +// CHECK-NEXT: [[TMP1129:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1128]] acquire, align 8 // CHECK-NEXT: [[TMP1130:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1131:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1130]] acquire, align 8 +// CHECK-NEXT: [[TMP1131:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1130]] acquire, align 8 // CHECK-NEXT: [[TMP1132:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1133:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1132]] acquire, align 8 +// CHECK-NEXT: [[TMP1133:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1132]] acquire, align 8 // CHECK-NEXT: [[TMP1134:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1135:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1134]] acquire, align 8 +// CHECK-NEXT: [[TMP1135:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1134]] acquire, align 8 // CHECK-NEXT: [[TMP1136:%.*]] = load i64, i64* [[LE]], align 8 // CHECK-NEXT: [[TMP1137:%.*]] = load i64, i64* [[LD]], align 8 // CHECK-NEXT: [[TMP1138:%.*]] = cmpxchg i64* [[LX]], i64 [[TMP1136]], i64 [[TMP1137]] acquire acquire, align 8 @@ -3395,21 +3395,21 @@ void foo(void) { // CHECK-NEXT: [[TMP1174:%.*]] = load i64, i64* [[ULD]], align 8 // CHECK-NEXT: [[TMP1175:%.*]] = cmpxchg i64* [[ULX]], i64 [[TMP1173]], i64 [[TMP1174]] acquire acquire, align 8 // CHECK-NEXT: [[TMP1176:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1177:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1176]] monotonic, align 8 +// CHECK-NEXT: [[TMP1177:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1176]] monotonic, align 8 // CHECK-NEXT: [[TMP1178:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1179:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1178]] monotonic, align 8 +// CHECK-NEXT: [[TMP1179:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1178]] monotonic, align 8 // CHECK-NEXT: [[TMP1180:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1181:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1180]] monotonic, align 8 +// CHECK-NEXT: [[TMP1181:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1180]] monotonic, align 8 // CHECK-NEXT: [[TMP1182:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1183:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1182]] monotonic, align 8 +// CHECK-NEXT: [[TMP1183:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1182]] monotonic, align 8 // CHECK-NEXT: [[TMP1184:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1185:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1184]] monotonic, align 8 +// CHECK-NEXT: [[TMP1185:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1184]] monotonic, align 8 // CHECK-NEXT: [[TMP1186:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1187:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1186]] monotonic, align 8 +// CHECK-NEXT: [[TMP1187:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1186]] monotonic, align 8 // CHECK-NEXT: [[TMP1188:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1189:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1188]] monotonic, align 8 +// CHECK-NEXT: [[TMP1189:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1188]] monotonic, align 8 // CHECK-NEXT: [[TMP1190:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1191:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1190]] monotonic, align 8 +// CHECK-NEXT: [[TMP1191:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1190]] monotonic, align 8 // CHECK-NEXT: [[TMP1192:%.*]] = load i64, i64* [[LE]], align 8 // CHECK-NEXT: [[TMP1193:%.*]] = load i64, i64* [[LD]], align 8 // CHECK-NEXT: [[TMP1194:%.*]] = cmpxchg i64* [[LX]], i64 [[TMP1192]], i64 [[TMP1193]] monotonic monotonic, align 8 @@ -3451,28 +3451,28 @@ void foo(void) { // CHECK-NEXT: [[TMP1230:%.*]] = load i64, i64* [[ULD]], align 8 // CHECK-NEXT: [[TMP1231:%.*]] = cmpxchg i64* [[ULX]], i64 [[TMP1229]], i64 [[TMP1230]] monotonic monotonic, align 8 // CHECK-NEXT: [[TMP1232:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1233:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1232]] release, align 8 +// CHECK-NEXT: [[TMP1233:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1232]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1234:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1235:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1234]] release, align 8 +// CHECK-NEXT: [[TMP1235:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1234]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1236:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1237:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1236]] release, align 8 +// CHECK-NEXT: [[TMP1237:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1236]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1238:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1239:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1238]] release, align 8 +// CHECK-NEXT: [[TMP1239:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1238]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1240:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1241:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1240]] release, align 8 +// CHECK-NEXT: [[TMP1241:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1240]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1242:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1243:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1242]] release, align 8 +// CHECK-NEXT: [[TMP1243:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1242]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1244:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1245:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1244]] release, align 8 +// CHECK-NEXT: [[TMP1245:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1244]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1246:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1247:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1246]] release, align 8 +// CHECK-NEXT: [[TMP1247:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1246]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1248:%.*]] = load i64, i64* [[LE]], align 8 // CHECK-NEXT: [[TMP1249:%.*]] = load i64, i64* [[LD]], align 8 @@ -3531,28 +3531,28 @@ void foo(void) { // CHECK-NEXT: [[TMP1287:%.*]] = cmpxchg i64* [[ULX]], i64 [[TMP1285]], i64 [[TMP1286]] release monotonic, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1288:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1289:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1288]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1289:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1288]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1290:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1291:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1290]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1291:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1290]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1292:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1293:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1292]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1293:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1292]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1294:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1295:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1294]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1295:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1294]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1296:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1297:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1296]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1297:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1296]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1298:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1299:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1298]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1299:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1298]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1300:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1301:%.*]] = atomicrmw umax i64* [[LX]], i64 [[TMP1300]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1301:%.*]] = atomicrmw max i64* [[LX]], i64 [[TMP1300]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1302:%.*]] = load i64, i64* [[LE]], align 8 -// CHECK-NEXT: [[TMP1303:%.*]] = atomicrmw umin i64* [[LX]], i64 [[TMP1302]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1303:%.*]] = atomicrmw min i64* [[LX]], i64 [[TMP1302]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1304:%.*]] = load i64, i64* [[LE]], align 8 // CHECK-NEXT: [[TMP1305:%.*]] = load i64, i64* [[LD]], align 8 @@ -3611,21 +3611,21 @@ void foo(void) { // CHECK-NEXT: [[TMP1343:%.*]] = cmpxchg i64* [[ULX]], i64 [[TMP1341]], i64 [[TMP1342]] seq_cst seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1344:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1345:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1344]] monotonic, align 8 +// CHECK-NEXT: [[TMP1345:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1344]] monotonic, align 8 // CHECK-NEXT: [[TMP1346:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1347:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1346]] monotonic, align 8 +// CHECK-NEXT: [[TMP1347:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1346]] monotonic, align 8 // CHECK-NEXT: [[TMP1348:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1349:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1348]] monotonic, align 8 +// CHECK-NEXT: [[TMP1349:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1348]] monotonic, align 8 // CHECK-NEXT: [[TMP1350:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1351:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1350]] monotonic, align 8 +// CHECK-NEXT: [[TMP1351:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1350]] monotonic, align 8 // CHECK-NEXT: [[TMP1352:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1353:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1352]] monotonic, align 8 +// CHECK-NEXT: [[TMP1353:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1352]] monotonic, align 8 // CHECK-NEXT: [[TMP1354:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1355:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1354]] monotonic, align 8 +// CHECK-NEXT: [[TMP1355:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1354]] monotonic, align 8 // CHECK-NEXT: [[TMP1356:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1357:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1356]] monotonic, align 8 +// CHECK-NEXT: [[TMP1357:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1356]] monotonic, align 8 // CHECK-NEXT: [[TMP1358:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1359:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1358]] monotonic, align 8 +// CHECK-NEXT: [[TMP1359:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1358]] monotonic, align 8 // CHECK-NEXT: [[TMP1360:%.*]] = load i64, i64* [[LLE]], align 8 // CHECK-NEXT: [[TMP1361:%.*]] = load i64, i64* [[LLD]], align 8 // CHECK-NEXT: [[TMP1362:%.*]] = cmpxchg i64* [[LLX]], i64 [[TMP1360]], i64 [[TMP1361]] monotonic monotonic, align 8 @@ -3667,28 +3667,28 @@ void foo(void) { // CHECK-NEXT: [[TMP1398:%.*]] = load i64, i64* [[ULLD]], align 8 // CHECK-NEXT: [[TMP1399:%.*]] = cmpxchg i64* [[ULLX]], i64 [[TMP1397]], i64 [[TMP1398]] monotonic monotonic, align 8 // CHECK-NEXT: [[TMP1400:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1401:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1400]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1401:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1400]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1402:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1403:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1402]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1403:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1402]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1404:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1405:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1404]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1405:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1404]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1406:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1407:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1406]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1407:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1406]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1408:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1409:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1408]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1409:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1408]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1410:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1411:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1410]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1411:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1410]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1412:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1413:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1412]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1413:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1412]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1414:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1415:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1414]] acq_rel, align 8 +// CHECK-NEXT: [[TMP1415:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1414]] acq_rel, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1416:%.*]] = load i64, i64* [[LLE]], align 8 // CHECK-NEXT: [[TMP1417:%.*]] = load i64, i64* [[LLD]], align 8 @@ -3747,21 +3747,21 @@ void foo(void) { // CHECK-NEXT: [[TMP1455:%.*]] = cmpxchg i64* [[ULLX]], i64 [[TMP1453]], i64 [[TMP1454]] acq_rel acquire, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1456:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1457:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1456]] acquire, align 8 +// CHECK-NEXT: [[TMP1457:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1456]] acquire, align 8 // CHECK-NEXT: [[TMP1458:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1459:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1458]] acquire, align 8 +// CHECK-NEXT: [[TMP1459:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1458]] acquire, align 8 // CHECK-NEXT: [[TMP1460:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1461:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1460]] acquire, align 8 +// CHECK-NEXT: [[TMP1461:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1460]] acquire, align 8 // CHECK-NEXT: [[TMP1462:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1463:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1462]] acquire, align 8 +// CHECK-NEXT: [[TMP1463:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1462]] acquire, align 8 // CHECK-NEXT: [[TMP1464:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1465:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1464]] acquire, align 8 +// CHECK-NEXT: [[TMP1465:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1464]] acquire, align 8 // CHECK-NEXT: [[TMP1466:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1467:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1466]] acquire, align 8 +// CHECK-NEXT: [[TMP1467:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1466]] acquire, align 8 // CHECK-NEXT: [[TMP1468:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1469:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1468]] acquire, align 8 +// CHECK-NEXT: [[TMP1469:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1468]] acquire, align 8 // CHECK-NEXT: [[TMP1470:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1471:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1470]] acquire, align 8 +// CHECK-NEXT: [[TMP1471:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1470]] acquire, align 8 // CHECK-NEXT: [[TMP1472:%.*]] = load i64, i64* [[LLE]], align 8 // CHECK-NEXT: [[TMP1473:%.*]] = load i64, i64* [[LLD]], align 8 // CHECK-NEXT: [[TMP1474:%.*]] = cmpxchg i64* [[LLX]], i64 [[TMP1472]], i64 [[TMP1473]] acquire acquire, align 8 @@ -3803,21 +3803,21 @@ void foo(void) { // CHECK-NEXT: [[TMP1510:%.*]] = load i64, i64* [[ULLD]], align 8 // CHECK-NEXT: [[TMP1511:%.*]] = cmpxchg i64* [[ULLX]], i64 [[TMP1509]], i64 [[TMP1510]] acquire acquire, align 8 // CHECK-NEXT: [[TMP1512:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1513:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1512]] monotonic, align 8 +// CHECK-NEXT: [[TMP1513:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1512]] monotonic, align 8 // CHECK-NEXT: [[TMP1514:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1515:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1514]] monotonic, align 8 +// CHECK-NEXT: [[TMP1515:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1514]] monotonic, align 8 // CHECK-NEXT: [[TMP1516:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1517:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1516]] monotonic, align 8 +// CHECK-NEXT: [[TMP1517:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1516]] monotonic, align 8 // CHECK-NEXT: [[TMP1518:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1519:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1518]] monotonic, align 8 +// CHECK-NEXT: [[TMP1519:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1518]] monotonic, align 8 // CHECK-NEXT: [[TMP1520:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1521:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1520]] monotonic, align 8 +// CHECK-NEXT: [[TMP1521:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1520]] monotonic, align 8 // CHECK-NEXT: [[TMP1522:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1523:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1522]] monotonic, align 8 +// CHECK-NEXT: [[TMP1523:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1522]] monotonic, align 8 // CHECK-NEXT: [[TMP1524:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1525:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1524]] monotonic, align 8 +// CHECK-NEXT: [[TMP1525:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1524]] monotonic, align 8 // CHECK-NEXT: [[TMP1526:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1527:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1526]] monotonic, align 8 +// CHECK-NEXT: [[TMP1527:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1526]] monotonic, align 8 // CHECK-NEXT: [[TMP1528:%.*]] = load i64, i64* [[LLE]], align 8 // CHECK-NEXT: [[TMP1529:%.*]] = load i64, i64* [[LLD]], align 8 // CHECK-NEXT: [[TMP1530:%.*]] = cmpxchg i64* [[LLX]], i64 [[TMP1528]], i64 [[TMP1529]] monotonic monotonic, align 8 @@ -3859,28 +3859,28 @@ void foo(void) { // CHECK-NEXT: [[TMP1566:%.*]] = load i64, i64* [[ULLD]], align 8 // CHECK-NEXT: [[TMP1567:%.*]] = cmpxchg i64* [[ULLX]], i64 [[TMP1565]], i64 [[TMP1566]] monotonic monotonic, align 8 // CHECK-NEXT: [[TMP1568:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1569:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1568]] release, align 8 +// CHECK-NEXT: [[TMP1569:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1568]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1570:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1571:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1570]] release, align 8 +// CHECK-NEXT: [[TMP1571:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1570]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1572:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1573:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1572]] release, align 8 +// CHECK-NEXT: [[TMP1573:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1572]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1574:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1575:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1574]] release, align 8 +// CHECK-NEXT: [[TMP1575:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1574]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1576:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1577:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1576]] release, align 8 +// CHECK-NEXT: [[TMP1577:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1576]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1578:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1579:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1578]] release, align 8 +// CHECK-NEXT: [[TMP1579:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1578]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1580:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1581:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1580]] release, align 8 +// CHECK-NEXT: [[TMP1581:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1580]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1582:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1583:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1582]] release, align 8 +// CHECK-NEXT: [[TMP1583:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1582]] release, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1584:%.*]] = load i64, i64* [[LLE]], align 8 // CHECK-NEXT: [[TMP1585:%.*]] = load i64, i64* [[LLD]], align 8 @@ -3939,28 +3939,28 @@ void foo(void) { // CHECK-NEXT: [[TMP1623:%.*]] = cmpxchg i64* [[ULLX]], i64 [[TMP1621]], i64 [[TMP1622]] release monotonic, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1624:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1625:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1624]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1625:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1624]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1626:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1627:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1626]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1627:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1626]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1628:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1629:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1628]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1629:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1628]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1630:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1631:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1630]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1631:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1630]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1632:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1633:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1632]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1633:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1632]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1634:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1635:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1634]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1635:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1634]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1636:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1637:%.*]] = atomicrmw umax i64* [[LLX]], i64 [[TMP1636]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1637:%.*]] = atomicrmw max i64* [[LLX]], i64 [[TMP1636]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1638:%.*]] = load i64, i64* [[LLE]], align 8 -// CHECK-NEXT: [[TMP1639:%.*]] = atomicrmw umin i64* [[LLX]], i64 [[TMP1638]] seq_cst, align 8 +// CHECK-NEXT: [[TMP1639:%.*]] = atomicrmw min i64* [[LLX]], i64 [[TMP1638]] seq_cst, align 8 // CHECK-NEXT: call void @__kmpc_flush(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP1640:%.*]] = load i64, i64* [[LLE]], align 8 // CHECK-NEXT: [[TMP1641:%.*]] = load i64, i64* [[LLD]], align 8 diff --git a/openmp/libomptarget/test/offloading/atomic-compare-signedness.c b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c new file mode 100644 index 00000000000000..c171a2e6124df1 --- /dev/null +++ b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c @@ -0,0 +1,42 @@ +// Check that omp atomic compare handles signedness of integer comparisons +// correctly. +// +// At one time, a bug sometimes reversed the signedness. + +// RUN: %libomptarget-compile-generic -fopenmp-version=51 +// RUN: %libomptarget-run-generic | %fcheck-generic + +// High parallelism increases our chances of detecting a lack of atomicity. +#define NUM_THREADS_TRY 256 + +#include +#include +#include + +int main() { + // CHECK: signed: num_threads=[[#NUM_THREADS:]]{{$}} + // CHECK-NEXT: signed: xs=[[#NUM_THREADS-1]]{{$}} + int xs = -1; + int numThreads; + #pragma omp target parallel for num_threads(NUM_THREADS_TRY) \ + map(tofrom:xs, numThreads) + for (int i = 0; i < omp_get_num_threads(); ++i) { + #pragma omp atomic compare + if (xs < i) { xs = i; } + if (i == 0) + numThreads = omp_get_num_threads(); + } + printf("signed: num_threads=%d\n", numThreads); + printf("signed: xs=%d\n", xs); + + // CHECK-NEXT: unsigned: xu=0x0{{$}} + unsigned xu = UINT_MAX; + #pragma omp target parallel for num_threads(NUM_THREADS_TRY) \ + map(tofrom:xu) + for (int i = 0; i < omp_get_num_threads(); ++i) { + #pragma omp atomic compare + if (xu > i) { xu = i; } + } + printf("unsigned: xu=0x%x\n", xu); + return 0; +} diff --git a/openmp/runtime/test/atomic/omp-atomic-compare-signedness.c b/openmp/runtime/test/atomic/omp-atomic-compare-signedness.c new file mode 100644 index 00000000000000..de0bfede28e5ae --- /dev/null +++ b/openmp/runtime/test/atomic/omp-atomic-compare-signedness.c @@ -0,0 +1,40 @@ +// Check that omp atomic compare handles signedness of integer comparisons +// correctly. +// +// At one time, a bug sometimes reversed the signedness. + +// RUN: %libomp-compile -fopenmp-version=51 +// RUN: %libomp-run | FileCheck %s + +// High parallelism increases our chances of detecting a lack of atomicity. +#define NUM_THREADS_TRY 256 + +#include +#include +#include + +int main() { + // CHECK: signed: num_threads=[[#NUM_THREADS:]]{{$}} + // CHECK-NEXT: signed: xs=[[#NUM_THREADS-1]]{{$}} + int xs = -1; + int numThreads; + #pragma omp parallel for num_threads(NUM_THREADS_TRY) + for (int i = 0; i < omp_get_num_threads(); ++i) { + #pragma omp atomic compare + if (xs < i) { xs = i; } + if (i == 0) + numThreads = omp_get_num_threads(); + } + printf("signed: num_threads=%d\n", numThreads); + printf("signed: xs=%d\n", xs); + + // CHECK-NEXT: unsigned: xu=0x0{{$}} + unsigned xu = UINT_MAX; + #pragma omp parallel for num_threads(NUM_THREADS_TRY) + for (int i = 0; i < omp_get_num_threads(); ++i) { + #pragma omp atomic compare + if (xu > i) { xu = i; } + } + printf("unsigned: xu=0x%x\n", xu); + return 0; +} From a770f534e6f647d1f6a25544117e7b3d8a0c3486 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 May 2022 16:52:11 +0200 Subject: [PATCH 835/908] [InstCombine] When swapping GEPs, only keep inbounds if both are If only one of the GEPs is inbounds, then after swapping, there is no guarantee that one of them will be inbounds as well (see e.g. https://alive2.llvm.org/ce/z/agaCnp). This is only a partial fix, because even if both are inbounds, the result is not necessarily inbounds (if the offsets have different signs). --- .../InstCombine/InstructionCombining.cpp | 5 +- .../InstCombine/gep-combine-loop-invariant.ll | 2 +- .../LoopVectorize/first-order-recurrence.ll | 873 +++++++++--------- 3 files changed, 441 insertions(+), 439 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index ad17f8ab5394a8..4e8f3af2b5beff 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1969,14 +1969,15 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, // invariant: this breaks the dependence between GEPs and allows LICM // to hoist the invariant part out of the loop. if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { + bool IsInBounds = Src->isInBounds() && GEP.isInBounds(); // Put NewSrc at same location as %src. Builder.SetInsertPoint(cast(Src)); Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(), Src->getPointerOperand(), GO1, - Src->getName(), Src->isInBounds()); + Src->getName(), IsInBounds); GetElementPtrInst *NewGEP = GetElementPtrInst::Create( GEP.getSourceElementType(), NewSrc, {SO1}); - NewGEP->setIsInBounds(GEP.isInBounds()); + NewGEP->setIsInBounds(IsInBounds); return NewGEP; } } diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll index 5a98df1536ed30..b17a9774c59ffc 100644 --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -264,7 +264,7 @@ define void @only_one_inbounds(i8* %ptr, i1 %c, i32 noundef %arg1, i32 noundef % ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[ARG1_EXT:%.*]] = zext i32 [[ARG1:%.*]] to i64 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[ARG2_EXT]] +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i64 [[ARG2_EXT]] ; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, i8* [[PTR21]], i64 [[ARG1_EXT]] ; CHECK-NEXT: call void @use(i8* [[PTR3]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 9288110b8ec8be..fd4b962a292d9a 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -39,18 +39,18 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3 @@ -60,17 +60,17 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[SCALAR_BODY:%.*]] ; CHECK: scalar.body: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP15]] = load i32, i32* [[ARRAYIDX32]], align 4 +; CHECK-NEXT: [[TMP13]] = load i32, i32* [[ARRAYIDX32]], align 4 ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[ADD35:%.*]] = add i32 [[TMP15]], [[SCALAR_RECUR]] +; CHECK-NEXT: [[ADD35:%.*]] = add i32 [[TMP13]], [[SCALAR_RECUR]] ; CHECK-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -97,47 +97,47 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1 -; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] +; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD3:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 1 +; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] +; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* +; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 +; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 4 ; UNROLL-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* -; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 -; UNROLL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i64 4 -; UNROLL-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* -; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4 -; UNROLL-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> -; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; UNROLL-NEXT: [[TMP15:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP12]] -; UNROLL-NEXT: [[TMP16:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[TMP13]] -; UNROLL-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 -; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 4 -; UNROLL-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4 +; UNROLL-NEXT: [[WIDE_LOAD3]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 +; UNROLL-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD3]], <4 x i32> +; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] +; UNROLL-NEXT: [[TMP13:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP10]] +; UNROLL-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD3]], [[TMP11]] +; UNROLL-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP15]], align 4 +; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 4 +; UNROLL-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP17]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; UNROLL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD7]], i64 3 +; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 3 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL: scalar.body: -; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NEXT: [[TMP21]] = load i32, i32* [[ARRAYIDX32]], align 4 +; UNROLL-NEXT: [[TMP19]] = load i32, i32* [[ARRAYIDX32]], align 4 ; UNROLL-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; UNROLL-NEXT: [[ADD35:%.*]] = add i32 [[TMP21]], [[SCALAR_RECUR]] +; UNROLL-NEXT: [[ADD35:%.*]] = add i32 [[TMP19]], [[SCALAR_RECUR]] ; UNROLL-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; UNROLL: for.exit: ; UNROLL-NEXT: ret void ; @@ -147,7 +147,7 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; UNROLL-NO-IC-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 ; UNROLL-NO-IC-NEXT: br label [[FOR_PREHEADER:%.*]] ; UNROLL-NO-IC: for.preheader: -; UNROLL-NO-IC-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0 +; UNROLL-NO-IC-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 0 ; UNROLL-NO-IC-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 @@ -166,55 +166,55 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP7]], 1 -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP8]], 1 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD3:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP5]], 1 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP6]], 1 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]] +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP8]] -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP17]] -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[TMP18]] -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD3]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD3]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP5]] +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP6]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP15]] +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = add <4 x i32> [[WIDE_LOAD3]], [[TMP16]] +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP22]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* [[TMP24]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP26]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP24]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD7]], i32 3 -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD7]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-IC-NEXT: [[TMP28]] = load i32, i32* [[ARRAYIDX32]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP26]] = load i32, i32* [[ARRAYIDX32]], align 4 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; UNROLL-NO-IC-NEXT: [[ADD35:%.*]] = add i32 [[TMP28]], [[SCALAR_RECUR]] +; UNROLL-NO-IC-NEXT: [[ADD35:%.*]] = add i32 [[TMP26]], [[SCALAR_RECUR]] ; UNROLL-NO-IC-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; UNROLL-NO-IC: for.exit: ; UNROLL-NO-IC-NEXT: ret void ; @@ -224,7 +224,7 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; UNROLL-NO-VF-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 ; UNROLL-NO-VF-NEXT: br label [[FOR_PREHEADER:%.*]] ; UNROLL-NO-VF: for.preheader: -; UNROLL-NO-VF-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0 +; UNROLL-NO-VF-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 0 ; UNROLL-NO-VF-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 @@ -242,43 +242,43 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[INDUCTION]], 1 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]] -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP12]] = load i32, i32* [[TMP10]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]] -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = add i32 [[TMP11]], [[VECTOR_RECUR]] -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = add i32 [[TMP12]], [[TMP11]] -; UNROLL-NO-VF-NEXT: store i32 [[TMP15]], i32* [[TMP13]], align 4 -; UNROLL-NO-VF-NEXT: store i32 [[TMP16]], i32* [[TMP14]], align 4 +; UNROLL-NO-VF-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDUCTION]], 1 +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[INDUCTION3]], 1 +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP10]] = load i32, i32* [[TMP8]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION3]] +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = add i32 [[TMP9]], [[VECTOR_RECUR]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = add i32 [[TMP10]], [[TMP9]] +; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], i32* [[TMP11]], align 4 +; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], i32* [[TMP12]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-VF-NEXT: [[TMP18]] = load i32, i32* [[ARRAYIDX32]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP16]] = load i32, i32* [[ARRAYIDX32]], align 4 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; UNROLL-NO-VF-NEXT: [[ADD35:%.*]] = add i32 [[TMP18]], [[SCALAR_RECUR]] +; UNROLL-NO-VF-NEXT: [[ADD35:%.*]] = add i32 [[TMP16]], [[SCALAR_RECUR]] ; UNROLL-NO-VF-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; UNROLL-NO-VF: for.exit: ; UNROLL-NO-VF-NEXT: ret void ; @@ -308,21 +308,21 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 1 -; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]] -; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 -; SINK-AFTER-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4 -; SINK-AFTER-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP7]] -; SINK-AFTER-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP12]] -; SINK-AFTER-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 0 -; SINK-AFTER-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 4 +; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1 +; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] +; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0 +; SINK-AFTER-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 +; SINK-AFTER-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP5]] +; SINK-AFTER-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP10]] +; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 +; SINK-AFTER-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* +; SINK-AFTER-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP14]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 @@ -333,17 +333,17 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; SINK-AFTER-NEXT: [[TMP18]] = load i32, i32* [[ARRAYIDX32]], align 4 +; SINK-AFTER-NEXT: [[TMP16]] = load i32, i32* [[ARRAYIDX32]], align 4 ; SINK-AFTER-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; SINK-AFTER-NEXT: [[ADD35:%.*]] = add i32 [[TMP18]], [[SCALAR_RECUR]] +; SINK-AFTER-NEXT: [[ADD35:%.*]] = add i32 [[TMP16]], [[SCALAR_RECUR]] ; SINK-AFTER-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; SINK-AFTER: for.exit: ; SINK-AFTER-NEXT: ret void ; @@ -411,7 +411,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; CHECK-NEXT: [[TMP8]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP7]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] @@ -440,7 +440,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ; UNROLL-LABEL: @recurrence_2( ; UNROLL-NEXT: entry: @@ -479,7 +479,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NEXT: [[TMP14]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP12]]) ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP16:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP13]], <4 x i32> [[TMP14]]) ; UNROLL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP16]]) @@ -509,7 +509,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ; UNROLL-NO-IC-LABEL: @recurrence_2( ; UNROLL-NO-IC-NEXT: entry: @@ -557,7 +557,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP22]] = select <4 x i1> [[TMP20]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP18]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP21]], [[TMP22]] ; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP21]], <4 x i32> [[TMP22]] @@ -591,7 +591,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @recurrence_2( ; UNROLL-NO-VF-NEXT: entry: @@ -632,7 +632,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NO-VF-NEXT: [[TMP16]] = select i1 [[TMP14]], i32 [[VEC_PHI1]], i32 [[TMP12]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]] ; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP15]], i32 [[TMP16]] @@ -663,7 +663,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ; SINK-AFTER-LABEL: @recurrence_2( ; SINK-AFTER-NEXT: entry: @@ -699,7 +699,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; SINK-AFTER-NEXT: [[TMP12]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP10]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP12]]) ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] @@ -731,7 +731,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; entry: %cmp27 = icmp sgt i32 %n, 0 @@ -821,7 +821,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2 +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !6 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> ; CHECK-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double> @@ -829,10 +829,10 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; CHECK-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP17]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP17]], align 8, !alias.scope !9, !noalias !6 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3 @@ -855,7 +855,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; CHECK-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: @@ -907,10 +907,10 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]] ; UNROLL-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2 +; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !6 ; UNROLL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i64 4 ; UNROLL-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2 +; UNROLL-NEXT: [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2, !alias.scope !6 ; UNROLL-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; UNROLL-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD8]], <4 x i32> ; UNROLL-NEXT: [[TMP15:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> @@ -923,13 +923,13 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NEXT: [[TMP22:%.*]] = fsub fast <4 x double> [[TMP16]], [[TMP20]] ; UNROLL-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]] ; UNROLL-NEXT: [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>* -; UNROLL-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[TMP24]], align 8 +; UNROLL-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[TMP24]], align 8, !alias.scope !9, !noalias !6 ; UNROLL-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 4 ; UNROLL-NEXT: [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>* -; UNROLL-NEXT: store <4 x double> [[TMP22]], <4 x double>* [[TMP26]], align 8 +; UNROLL-NEXT: store <4 x double> [[TMP22]], <4 x double>* [[TMP26]], align 8, !alias.scope !9, !noalias !6 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i64 3 @@ -952,7 +952,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL: for.end.loopexit: ; UNROLL-NEXT: br label [[FOR_END]] ; UNROLL: for.end: @@ -1011,10 +1011,10 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2, !alias.scope !6 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = bitcast i16* [[TMP13]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP14]], align 2 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP14]], align 2, !alias.scope !6 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD8]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> @@ -1029,13 +1029,13 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = bitcast double* [[TMP27]] to <4 x double>* -; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP23]], <4 x double>* [[TMP28]], align 8 +; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP23]], <4 x double>* [[TMP28]], align 8, !alias.scope !9, !noalias !6 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>* -; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[TMP30]], align 8 +; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[TMP30]], align 8, !alias.scope !9, !noalias !6 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i32 3 @@ -1059,7 +1059,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-IC-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL-NO-IC: for.end.loopexit: ; UNROLL-NO-IC-NEXT: br label [[FOR_END]] ; UNROLL-NO-IC: for.end: @@ -1111,8 +1111,8 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-VF-NEXT: [[INDUCTION8:%.*]] = add i64 [[OFFSET_IDX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION]] ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION8]] -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]], align 2 -; UNROLL-NO-VF-NEXT: [[TMP10]] = load i16, i16* [[TMP8]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]], align 2, !alias.scope !5 +; UNROLL-NO-VF-NEXT: [[TMP10]] = load i16, i16* [[TMP8]], align 2, !alias.scope !5 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sitofp i16 [[TMP9]] to double ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sitofp i16 [[TMP10]] to double ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double @@ -1123,11 +1123,11 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = fsub fast double [[TMP12]], [[TMP16]] ; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION]] ; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION8]] -; UNROLL-NO-VF-NEXT: store double [[TMP17]], double* [[TMP19]], align 8 -; UNROLL-NO-VF-NEXT: store double [[TMP18]], double* [[TMP20]], align 8 +; UNROLL-NO-VF-NEXT: store double [[TMP17]], double* [[TMP19]], align 8, !alias.scope !8, !noalias !5 +; UNROLL-NO-VF-NEXT: store double [[TMP18]], double* [[TMP20]], align 8, !alias.scope !8, !noalias !5 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -1149,7 +1149,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-VF-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL-NO-VF: for.end.loopexit: ; UNROLL-NO-VF-NEXT: br label [[FOR_END]] ; UNROLL-NO-VF: for.end: @@ -1204,7 +1204,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP7]] ; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 ; SINK-AFTER-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2 +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !6 ; SINK-AFTER-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> ; SINK-AFTER-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double> @@ -1213,10 +1213,10 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; SINK-AFTER-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]] ; SINK-AFTER-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 0 ; SINK-AFTER-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to <4 x double>* -; SINK-AFTER-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP18]], align 8 +; SINK-AFTER-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP18]], align 8, !alias.scope !9, !noalias !6 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 @@ -1240,7 +1240,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; SINK-AFTER-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SINK-AFTER: for.end.loopexit: ; SINK-AFTER-NEXT: br label [[FOR_END]] ; SINK-AFTER: for.end: @@ -1641,7 +1641,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1652,7 +1652,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -1671,7 +1671,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1682,7 +1682,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 ; UNROLL-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; UNROLL-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -1747,7 +1747,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP34]], <4 x i32> [[TMP42]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP42]], i32 3 @@ -1764,7 +1764,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]] ; UNROLL-NO-IC-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -1794,7 +1794,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP8]] = load i32, i32* [[TMP6]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1809,7 +1809,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]] ; UNROLL-NO-VF-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -1853,7 +1853,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 @@ -1870,7 +1870,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]] ; SINK-AFTER-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4 ; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -1898,13 +1898,13 @@ define void @constant_folded_previous_value() { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[SCALAR_BODY:%.*]] ; CHECK: scalar.body: -; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -1914,13 +1914,13 @@ define void @constant_folded_previous_value() { ; UNROLL: vector.ph: ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: -; UNROLL-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL: scalar.body: -; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -1935,7 +1935,7 @@ define void @constant_folded_previous_value() { ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NO-IC-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1949,7 +1949,7 @@ define void @constant_folded_previous_value() { ; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef -; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -1965,7 +1965,7 @@ define void @constant_folded_previous_value() { ; UNROLL-NO-VF-NEXT: [[TMP1]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1979,7 +1979,7 @@ define void @constant_folded_previous_value() { ; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef -; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -1994,7 +1994,7 @@ define void @constant_folded_previous_value() { ; SINK-AFTER-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2008,7 +2008,7 @@ define void @constant_folded_previous_value() { ; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1 ; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef -; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -2045,14 +2045,14 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1]] = add i32 [[TMP0]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP0]], [[X:%.*]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2068,7 +2068,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP1]] = add i32 [[TMP0]], 8 ; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 4 ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP3]], [[X:%.*]] @@ -2076,7 +2076,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2102,7 +2102,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -2119,7 +2119,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; UNROLL-NO-IC-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 -; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2138,7 +2138,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NO-VF-NEXT: [[TMP1]] = add i32 [[INDUCTION1]], [[X]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2153,7 +2153,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 -; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2174,7 +2174,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 @@ -2191,7 +2191,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; SINK-AFTER-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; SINK-AFTER-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 -; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2243,20 +2243,20 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 25 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 25 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[NEXT_GEP26:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 25 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[NEXT_GEP37:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 25 -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[TMP6]] +; CHECK-NEXT: [[NEXT_GEP48:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, double* [[NEXT_GEP5]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[NEXT_GEP26]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr double, double* [[NEXT_GEP37]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[NEXT_GEP48]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = load double, double* [[TMP7]], align 8 ; CHECK-NEXT: [[TMP12:%.*]] = load double, double* [[TMP8]], align 8 ; CHECK-NEXT: [[TMP13:%.*]] = load double, double* [[TMP9]], align 8 @@ -2272,7 +2272,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP23]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -2282,7 +2282,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; CHECK-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[A_1_LCSSA]] ; CHECK: for.body: -; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; ; UNROLL-LABEL: @PR33613( ; UNROLL-NEXT: entry: @@ -2297,36 +2297,36 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 25 -; UNROLL-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]] +; UNROLL-NEXT: [[NEXT_GEP10:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]] ; UNROLL-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 25 -; UNROLL-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; UNROLL-NEXT: [[NEXT_GEP211:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] ; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 25 -; UNROLL-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; UNROLL-NEXT: [[NEXT_GEP312:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] ; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3 ; UNROLL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 25 -; UNROLL-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; UNROLL-NEXT: [[NEXT_GEP413:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] ; UNROLL-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 4 ; UNROLL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 25 -; UNROLL-NEXT: [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; UNROLL-NEXT: [[NEXT_GEP514:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] ; UNROLL-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 5 ; UNROLL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 25 -; UNROLL-NEXT: [[NEXT_GEP6:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; UNROLL-NEXT: [[NEXT_GEP615:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] ; UNROLL-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 6 ; UNROLL-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 25 -; UNROLL-NEXT: [[NEXT_GEP7:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; UNROLL-NEXT: [[NEXT_GEP716:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] ; UNROLL-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 7 ; UNROLL-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 25 -; UNROLL-NEXT: [[NEXT_GEP8:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] -; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[TMP0]] -; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[TMP2]] -; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[TMP4]] -; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[TMP6]] -; UNROLL-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP5]], i64 [[TMP8]] -; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP6]], i64 [[TMP10]] -; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP7]], i64 [[TMP12]] -; UNROLL-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP8]], i64 [[TMP14]] +; UNROLL-NEXT: [[NEXT_GEP817:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]] +; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr double, double* [[NEXT_GEP10]], i64 [[TMP0]] +; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr double, double* [[NEXT_GEP211]], i64 [[TMP2]] +; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr double, double* [[NEXT_GEP312]], i64 [[TMP4]] +; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr double, double* [[NEXT_GEP413]], i64 [[TMP6]] +; UNROLL-NEXT: [[TMP19:%.*]] = getelementptr double, double* [[NEXT_GEP514]], i64 [[TMP8]] +; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr double, double* [[NEXT_GEP615]], i64 [[TMP10]] +; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr double, double* [[NEXT_GEP716]], i64 [[TMP12]] +; UNROLL-NEXT: [[TMP22:%.*]] = getelementptr double, double* [[NEXT_GEP817]], i64 [[TMP14]] ; UNROLL-NEXT: [[TMP23:%.*]] = load double, double* [[TMP15]], align 8 ; UNROLL-NEXT: [[TMP24:%.*]] = load double, double* [[TMP16]], align 8 ; UNROLL-NEXT: [[TMP25:%.*]] = load double, double* [[TMP17]], align 8 @@ -2355,7 +2355,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NEXT: [[TMP48]] = add <4 x i32> [[VEC_PHI9]], [[TMP46]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP48]], [[TMP47]] ; UNROLL-NEXT: [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -2366,7 +2366,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[A_1_LCSSA]] ; UNROLL: for.body: -; UNROLL-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; ; UNROLL-NO-IC-LABEL: @PR33613( ; UNROLL-NO-IC-NEXT: entry: @@ -2441,7 +2441,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NO-IC-NEXT: [[TMP49]] = add <4 x i32> [[VEC_PHI9]], [[TMP47]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]] ; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -2472,7 +2472,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NO-IC-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1 ; UNROLL-NO-IC-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240 -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @PR33613( ; UNROLL-NO-VF-NEXT: entry: @@ -2506,7 +2506,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NO-VF-NEXT: [[TMP15]] = add i32 [[VEC_PHI3]], [[TMP13]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP14]] ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240 @@ -2534,7 +2534,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NO-VF-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1 ; UNROLL-NO-VF-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240 -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; ; SINK-AFTER-LABEL: @PR33613( ; SINK-AFTER-NEXT: entry: @@ -2579,7 +2579,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240 @@ -2609,7 +2609,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1 ; SINK-AFTER-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240 -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; entry: %idxprom = sext i32 %d to i64 @@ -2670,17 +2670,17 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2 +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !21 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP11]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP11]], align 4, !alias.scope !24, !noalias !21 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3 @@ -2701,7 +2701,7 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -2731,10 +2731,10 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] ; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2 +; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !21 ; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4 ; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2 +; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2, !alias.scope !21 ; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; UNROLL-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> ; UNROLL-NEXT: [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32> @@ -2745,13 +2745,13 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]] ; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; UNROLL-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP17]], align 4 +; UNROLL-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP17]], align 4, !alias.scope !24, !noalias !21 ; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i64 4 ; UNROLL-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP19]], align 4 +; UNROLL-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP19]], align 4, !alias.scope !24, !noalias !21 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i64 3 @@ -2772,7 +2772,7 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -2810,10 +2810,10 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2, !alias.scope !21 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !21 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32> @@ -2826,13 +2826,13 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP22]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP22]], align 4, !alias.scope !24, !noalias !21 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP24]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP24]], align 4, !alias.scope !24, !noalias !21 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 3 @@ -2854,7 +2854,7 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -2889,8 +2889,8 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1 ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2 -; UNROLL-NO-VF-NEXT: [[TMP6]] = load i16, i16* [[TMP4]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2, !alias.scope !20 +; UNROLL-NO-VF-NEXT: [[TMP6]] = load i16, i16* [[TMP4]], align 2, !alias.scope !20 ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP5]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP5]] to i32 @@ -2899,11 +2899,11 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]] ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]] -; UNROLL-NO-VF-NEXT: store i32 [[TMP11]], i32* [[TMP13]], align 4 -; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], i32* [[TMP14]], align 4 +; UNROLL-NO-VF-NEXT: store i32 [[TMP11]], i32* [[TMP13]], align 4, !alias.scope !23, !noalias !20 +; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], i32* [[TMP14]], align 4, !alias.scope !23, !noalias !20 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2923,7 +2923,7 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -2958,7 +2958,7 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] ; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 ; SINK-AFTER-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2 +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !21 ; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> @@ -2966,10 +2966,10 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] ; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 ; SINK-AFTER-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4 +; SINK-AFTER-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4, !alias.scope !24, !noalias !21 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 @@ -2991,7 +2991,7 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -3088,7 +3088,7 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; CHECK-NEXT: store <4 x i32> [[TMP24]], <4 x i32>* [[TMP26]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -3110,7 +3110,7 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -3133,7 +3133,7 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; UNROLL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[TMP1]] ; UNROLL-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP3]], 32 ; UNROLL-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]] - +; UNROLL-NEXT: br i1 [[CONFLICT_RDX6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 ; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3 @@ -3194,7 +3194,7 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; UNROLL-NEXT: store <4 x i32> [[TMP46]], <4 x i32>* [[TMP50]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -3216,7 +3216,7 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; UNROLL-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -3225,7 +3225,7 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[A3:%.*]] = ptrtoint [2 x i16]* [[A:%.*]] to i64 ; UNROLL-NO-IC-NEXT: [[C2:%.*]] = ptrtoint i32* [[C:%.*]] to i64 ; UNROLL-NO-IC-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 -; UNROLL-NO-IC-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0 +; UNROLL-NO-IC-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 0, i64 0 ; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] @@ -3247,90 +3247,90 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP0]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 2 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 5 +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 6 +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 7 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]] +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> , <4 x i32>* [[TMP19]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> , <4 x i32>* [[TMP21]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP10]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP11]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP12]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i16, i16* [[TMP13]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP22]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP23]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = insertelement <4 x i16> [[TMP27]], i16 [[TMP24]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP25]], i32 3 -; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = load i16, i16* [[TMP14]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = load i16, i16* [[TMP15]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = load i16, i16* [[TMP16]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = load i16, i16* [[TMP17]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = insertelement <4 x i16> poison, i16 [[TMP30]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = insertelement <4 x i16> [[TMP34]], i16 [[TMP31]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP32]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP37]] = insertelement <4 x i16> [[TMP36]], i16 [[TMP33]], i32 3 -; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP29]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = shufflevector <4 x i16> [[TMP29]], <4 x i16> [[TMP37]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = sext <4 x i16> [[TMP38]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = sext <4 x i16> [[TMP39]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = sext <4 x i16> [[TMP29]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = sext <4 x i16> [[TMP37]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = mul nsw <4 x i32> [[TMP42]], [[TMP40]] -; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP41]] -; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] -; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP44]], <4 x i32>* [[TMP49]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP51]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP8]], i64 1 +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP9]], i64 1 +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP10]], i64 1 +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP11]], i64 1 +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> , <4 x i32>* [[TMP23]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> , <4 x i32>* [[TMP25]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load i16, i16* [[TMP14]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP15]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP17]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <4 x i16> poison, i16 [[TMP26]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = insertelement <4 x i16> [[TMP30]], i16 [[TMP27]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> [[TMP31]], i16 [[TMP28]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP29]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = load i16, i16* [[TMP18]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = load i16, i16* [[TMP19]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = load i16, i16* [[TMP20]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = load i16, i16* [[TMP21]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> poison, i16 [[TMP34]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = insertelement <4 x i16> [[TMP38]], i16 [[TMP35]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP36]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP41]] = insertelement <4 x i16> [[TMP40]], i16 [[TMP37]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP33]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = shufflevector <4 x i16> [[TMP33]], <4 x i16> [[TMP41]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = sext <4 x i16> [[TMP42]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = sext <4 x i16> [[TMP43]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = sext <4 x i16> [[TMP33]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = sext <4 x i16> [[TMP41]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = mul nsw <4 x i32> [[TMP46]], [[TMP44]] +; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = mul nsw <4 x i32> [[TMP47]], [[TMP45]] +; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]] +; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP8]] +; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, i32* [[TMP50]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP53:%.*]] = bitcast i32* [[TMP52]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP48]], <4 x i32>* [[TMP53]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, i32* [[TMP50]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP55:%.*]] = bitcast i32* [[TMP54]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[TMP55]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP56:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP56]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP37]], i32 3 -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP37]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP41]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP41]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP53:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP57:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1 ; UNROLL-NO-IC-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4 ; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP53]] = load i16, i16* [[CUR_INDEX]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP53]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP57]] = load i16, i16* [[CUR_INDEX]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP57]] to i32 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -3339,7 +3339,7 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; UNROLL-NO-VF-NEXT: [[A3:%.*]] = ptrtoint [2 x i16]* [[A:%.*]] to i64 ; UNROLL-NO-VF-NEXT: [[C2:%.*]] = ptrtoint i32* [[C:%.*]] to i64 ; UNROLL-NO-VF-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 -; UNROLL-NO-VF-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0 +; UNROLL-NO-VF-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 0, i64 0 ; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2 ; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 ; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] @@ -3360,52 +3360,52 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION17:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION17]] -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION]], i64 1 -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION17]], i64 1 -; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP0]], align 4 -; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP1]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2 -; UNROLL-NO-VF-NEXT: [[TMP5]] = load i16, i16* [[TMP3]], align 2 -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[TMP4]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP4]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP5]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP8]], [[TMP6]] -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]] -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION17]] -; UNROLL-NO-VF-NEXT: store i32 [[TMP10]], i32* [[TMP12]], align 4 -; UNROLL-NO-VF-NEXT: store i32 [[TMP11]], i32* [[TMP13]], align 4 +; UNROLL-NO-VF-NEXT: [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION7]] +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION]], i64 1 +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION7]], i64 1 +; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP4]], align 4 +; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP5]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP6]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP9]] = load i16, i16* [[TMP7]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sext i16 [[TMP8]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sext i16 [[TMP8]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sext i16 [[TMP9]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP12]], [[TMP10]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = mul nsw i32 [[TMP13]], [[TMP11]] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]] +; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], i32* [[TMP16]], align 4 +; UNROLL-NO-VF-NEXT: store i32 [[TMP15]], i32* [[TMP17]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1 ; UNROLL-NO-VF-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4 ; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP15]] = load i16, i16* [[CUR_INDEX]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP15]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP19]] = load i16, i16* [[CUR_INDEX]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP19]] to i32 ; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -3414,7 +3414,7 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; SINK-AFTER-NEXT: [[A3:%.*]] = ptrtoint [2 x i16]* [[A:%.*]] to i64 ; SINK-AFTER-NEXT: [[C2:%.*]] = ptrtoint i32* [[C:%.*]] to i64 ; SINK-AFTER-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 -; SINK-AFTER-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0 +; SINK-AFTER-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 0, i64 0 ; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2 ; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 ; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] @@ -3436,65 +3436,66 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP0]] -; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1 -; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1 -; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1 -; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1 -; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 -; SINK-AFTER-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> , <4 x i32>* [[TMP10]], align 4 -; SINK-AFTER-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP5]], align 2 -; SINK-AFTER-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP6]], align 2 -; SINK-AFTER-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP7]], align 2 -; SINK-AFTER-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP8]], align 2 -; SINK-AFTER-NEXT: [[TMP15:%.*]] = insertelement <4 x i16> poison, i16 [[TMP11]], i32 0 -; SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 1 -; SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 2 -; SINK-AFTER-NEXT: [[TMP18]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i32 3 -; SINK-AFTER-NEXT: [[TMP19:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP18]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP19]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP21:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP22:%.*]] = mul nsw <4 x i32> [[TMP21]], [[TMP20]] -; SINK-AFTER-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] -; SINK-AFTER-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0 -; SINK-AFTER-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP25]], align 4 +; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 2 +; SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 +; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]] +; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1 +; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1 +; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1 +; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1 +; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; SINK-AFTER-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* +; SINK-AFTER-NEXT: store <4 x i32> , <4 x i32>* [[TMP14]], align 4 +; SINK-AFTER-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP9]], align 2 +; SINK-AFTER-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP10]], align 2 +; SINK-AFTER-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP11]], align 2 +; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP12]], align 2 +; SINK-AFTER-NEXT: [[TMP19:%.*]] = insertelement <4 x i16> poison, i16 [[TMP15]], i32 0 +; SINK-AFTER-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP19]], i16 [[TMP16]], i32 1 +; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i16> [[TMP20]], i16 [[TMP17]], i32 2 +; SINK-AFTER-NEXT: [[TMP22]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP18]], i32 3 +; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP22]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP24:%.*]] = sext <4 x i16> [[TMP23]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP25:%.*]] = sext <4 x i16> [[TMP22]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP26:%.*]] = mul nsw <4 x i32> [[TMP25]], [[TMP24]] +; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]] +; SINK-AFTER-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 0 +; SINK-AFTER-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* +; SINK-AFTER-NEXT: store <4 x i32> [[TMP26]], <4 x i32>* [[TMP29]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP18]], i32 3 -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP18]], i32 2 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP22]], i32 3 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP22]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP31:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1 ; SINK-AFTER-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4 ; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; SINK-AFTER-NEXT: [[TMP27]] = load i16, i16* [[CUR_INDEX]], align 2 -; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP27]] to i32 +; SINK-AFTER-NEXT: [[TMP31]] = load i16, i16* [[CUR_INDEX]], align 2 +; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP31]] to i32 ; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; + entry: %pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0 %.pre = load i16, i16* %pre.index @@ -3554,7 +3555,7 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2 +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !30 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], @@ -3562,10 +3563,10 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4, !alias.scope !33, !noalias !30 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3 @@ -3587,7 +3588,7 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -3617,10 +3618,10 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] ; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2 +; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !30 ; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4 ; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2 +; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2, !alias.scope !30 ; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; UNROLL-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> ; UNROLL-NEXT: [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32> @@ -3633,13 +3634,13 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP15]] ; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; UNROLL-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4 +; UNROLL-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4, !alias.scope !33, !noalias !30 ; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i64 4 ; UNROLL-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP21]], align 4 +; UNROLL-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP21]], align 4, !alias.scope !33, !noalias !30 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i64 3 @@ -3661,7 +3662,7 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -3699,10 +3700,10 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2, !alias.scope !30 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !30 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32> @@ -3717,13 +3718,13 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP24]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP24]], align 4, !alias.scope !33, !noalias !30 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP26]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP26]], align 4, !alias.scope !33, !noalias !30 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 3 @@ -3746,7 +3747,7 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -3781,8 +3782,8 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1 ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2 -; UNROLL-NO-VF-NEXT: [[TMP6]] = load i16, i16* [[TMP4]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2, !alias.scope !29 +; UNROLL-NO-VF-NEXT: [[TMP6]] = load i16, i16* [[TMP4]], align 2, !alias.scope !29 ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP5]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP7]], 2 @@ -3793,11 +3794,11 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]] ; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]] -; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], i32* [[TMP15]], align 4 -; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], i32* [[TMP16]], align 4 +; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], i32* [[TMP15]], align 4, !alias.scope !32, !noalias !29 +; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], i32* [[TMP16]], align 4, !alias.scope !32, !noalias !29 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -3818,7 +3819,7 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -3853,7 +3854,7 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] ; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 ; SINK-AFTER-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2 +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !30 ; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], @@ -3862,10 +3863,10 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] ; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 ; SINK-AFTER-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP13]], align 4 +; SINK-AFTER-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP13]], align 4, !alias.scope !33, !noalias !30 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 @@ -3888,7 +3889,7 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -4081,7 +4082,7 @@ define void @sink_dead_inst() { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1]] = add i16 [[TMP0]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = or i16 [[TMP0]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = zext i16 [[TMP3]] to i32 @@ -4096,7 +4097,7 @@ define void @sink_dead_inst() { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15 ; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; CHECK-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -4111,7 +4112,7 @@ define void @sink_dead_inst() { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP1]] = add i16 [[TMP0]], 8 ; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 -; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP3:%.*]] = or i16 [[TMP0]], 5 ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = zext i16 [[TMP3]] to i32 @@ -4126,7 +4127,7 @@ define void @sink_dead_inst() { ; UNROLL-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR6]], 15 ; UNROLL-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 -; UNROLL-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] +; UNROLL-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -4154,7 +4155,7 @@ define void @sink_dead_inst() { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 -; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 40 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP7]], i32 3 @@ -4176,7 +4177,7 @@ define void @sink_dead_inst() { ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-IC-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 -; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -4201,7 +4202,7 @@ define void @sink_dead_inst() { ; UNROLL-NO-VF-NEXT: [[TMP6]] = add i16 [[TMP2]], 5 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42 -; UNROLL-NO-VF-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 42 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -4219,7 +4220,7 @@ define void @sink_dead_inst() { ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 -; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -4241,7 +4242,7 @@ define void @sink_dead_inst() { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 -; SINK-AFTER-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 40 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3 @@ -4263,7 +4264,7 @@ define void @sink_dead_inst() { ; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; SINK-AFTER-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 -; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -4346,7 +4347,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; CHECK-NEXT: [[TMP22]] = add <4 x i32> [[VEC_PHI]], [[TMP21]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39:![0-9]+]], !llvm.loop [[LOOP40:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP22]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) @@ -4357,7 +4358,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; CHECK-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAR]] ; CHECK: bb2: -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF41:![0-9]+]], !llvm.loop [[LOOP42:![0-9]+]] ; ; UNROLL-LABEL: @sink_into_replication_region( ; UNROLL-NEXT: bb: @@ -4459,7 +4460,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NEXT: [[TMP45]] = add <4 x i32> [[VEC_PHI1]], [[TMP43]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP46:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39:![0-9]+]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP47:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI1]] ; UNROLL-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP44]], <4 x i32> [[VEC_PHI]] @@ -4472,7 +4473,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[VAR]] ; UNROLL: bb2: -; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF41:![0-9]+]], !llvm.loop [[LOOP42:![0-9]+]] ; ; UNROLL-NO-IC-LABEL: @sink_into_replication_region( ; UNROLL-NO-IC-NEXT: bb: @@ -4581,7 +4582,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39:![0-9]+]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]] ; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -4604,7 +4605,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF41:![0-9]+]], !llvm.loop [[LOOP42:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @sink_into_replication_region( ; UNROLL-NO-VF-NEXT: bb: @@ -4649,7 +4650,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI1]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF51:![0-9]+]], !llvm.loop [[LOOP52:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF38:![0-9]+]], !llvm.loop [[LOOP39:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP10]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]] @@ -4669,7 +4670,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF53:![0-9]+]], !llvm.loop [[LOOP54:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF40:![0-9]+]], !llvm.loop [[LOOP41:![0-9]+]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region( ; SINK-AFTER-NEXT: bb: @@ -4736,7 +4737,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39:![0-9]+]], !llvm.loop [[LOOP40:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 @@ -4758,7 +4759,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF41:![0-9]+]], !llvm.loop [[LOOP42:![0-9]+]] ; bb: br label %bb2 @@ -4873,7 +4874,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39]], !llvm.loop [[LOOP43:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP39:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP22]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP39]]) @@ -4884,7 +4885,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; CHECK-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP40]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAR]] ; CHECK: bb2: -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF41]], !llvm.loop [[LOOP44:![0-9]+]] ; ; UNROLL-LABEL: @sink_into_replication_region_multiple( ; UNROLL-NEXT: bb: @@ -5056,7 +5057,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; UNROLL-NEXT: [[TMP77:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP77]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP77]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39]], !llvm.loop [[LOOP43:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI3]] ; UNROLL-NEXT: [[TMP79:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP44]], <4 x i32> [[VEC_PHI]] @@ -5069,7 +5070,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP80]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[VAR]] ; UNROLL: bb2: -; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF41]], !llvm.loop [[LOOP44:![0-9]+]] ; ; UNROLL-NO-IC-LABEL: @sink_into_replication_region_multiple( ; UNROLL-NO-IC-NEXT: bb: @@ -5241,7 +5242,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39]], !llvm.loop [[LOOP43:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP73]], [[TMP72]] ; UNROLL-NO-IC-NEXT: [[TMP75:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -5269,7 +5270,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF41]], !llvm.loop [[LOOP44:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @sink_into_replication_region_multiple( ; UNROLL-NO-VF-NEXT: bb: @@ -5328,7 +5329,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI2]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF51]], !llvm.loop [[LOOP55:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF38]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]] @@ -5353,7 +5354,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF53]], !llvm.loop [[LOOP56:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF40]], !llvm.loop [[LOOP43:![0-9]+]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region_multiple( ; SINK-AFTER-NEXT: bb: @@ -5451,7 +5452,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39]], !llvm.loop [[LOOP43:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]]) ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 @@ -5478,7 +5479,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; SINK-AFTER-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF41]], !llvm.loop [[LOOP44:![0-9]+]] ; bb: br label %bb2 @@ -5520,13 +5521,13 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP46:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -5548,13 +5549,13 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP4]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: -; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP46:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -5590,7 +5591,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3 @@ -5613,7 +5614,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; UNROLL-NO-IC-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]] ; UNROLL-NO-IC-NEXT: store i32 0, i32* [[A_GEP]], align 4 -; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP46:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -5640,7 +5641,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NO-VF-NEXT: store i32 0, i32* [[TMP7]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; UNROLL-NO-VF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP57:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -5661,7 +5662,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NO-VF-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; UNROLL-NO-VF-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]] ; UNROLL-NO-VF-NEXT: store i32 0, i32* [[A_GEP]], align 4 -; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP58:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP45:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -5687,7 +5688,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 @@ -5710,7 +5711,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; SINK-AFTER-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; SINK-AFTER-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]] ; SINK-AFTER-NEXT: store i32 0, i32* [[A_GEP]], align 4 -; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP46:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; From b2719349d2a640187b66baeb91115a00e706f4c6 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 30 May 2022 17:06:41 +0200 Subject: [PATCH 836/908] Change build_llvm_package.bat to build_llvm_release.bat We don't build snapshot packages anymore, so repurpose this for doing release builds instead. --- ...lvm_package.bat => build_llvm_release.bat} | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) rename llvm/utils/release/{build_llvm_package.bat => build_llvm_release.bat} (89%) diff --git a/llvm/utils/release/build_llvm_package.bat b/llvm/utils/release/build_llvm_release.bat similarity index 89% rename from llvm/utils/release/build_llvm_package.bat rename to llvm/utils/release/build_llvm_release.bat index fd138e90500e5d..e44fc141f92c32 100755 --- a/llvm/utils/release/build_llvm_package.bat +++ b/llvm/utils/release/build_llvm_release.bat @@ -1,10 +1,20 @@ @echo off setlocal -REM Script for building the LLVM installer on Windows, -REM used for the the weekly snapshots at http://www.llvm.org/builds. -REM -REM Usage: build_llvm_package.bat +if "%1"=="" goto usage +goto begin + +:usage +echo Script for building the LLVM installer on Windows, +echo used for the releases at https://github.com/llvm/llvm-project/releases +echo. +echo Usage: build_llvm_release.bat ^ +echo. +echo Example: build_llvm_release.bat 14.0.4 +echo. +exit /b + +:begin REM Prerequisites: REM @@ -24,12 +34,9 @@ set vsdevcmd=C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\Co set python32_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python310-32 set python64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python310 -for /f "usebackq" %%i in (`PowerShell ^(Get-Date^).ToString^('yyyyMMdd'^)`) do set datestamp=%%i - -set revision=%1 -set package_version=15.0.0-%revision:~0,8% -set clang_format_vs_version=15.0.0.%datestamp% -set build_dir=llvm_package_%revision:~0,8% +set revision=llvmorg-%1 +set package_version=%1 +set build_dir=llvm_package_%package_version% echo Revision: %revision% echo Package version: %package_version% From cc6c15920358a93185eced1d9dcec201815debf0 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Mon, 30 May 2022 15:13:23 +0200 Subject: [PATCH 837/908] [mlir] add VectorizeOp to structured transform ops Vectorization is a key transformation to achieve high performance on most architectures. In the transform dialect, vectorization is implemented as a parameterizable transform op. It currently applies to a scope of payload IR delimited by some isolated-from-above op, mainly because several enabling transformations (such as affine simplification) are needed to perform vectorization and these transformation would apply to ops other than the "main" computational payload op. A separate "navigation" transform op that obtains the isolated-from-above ancestor of an op is introduced in the core transform dialect. Even though it is currently only useful for vectorization, isolated-from-above ops are a common anchor for transformations (usually implemented as passes) that is likely to be reused in the future. Depends On D126374 Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D126542 --- .../Linalg/TransformOps/LinalgTransformOps.td | 31 +++ .../mlir/Dialect/Transform/IR/TransformOps.td | 26 +++ .../Linalg/TransformOps/CMakeLists.txt | 1 + .../TransformOps/LinalgTransformOps.cpp | 36 ++++ .../lib/Dialect/Transform/IR/TransformOps.cpp | 33 ++++ .../Linalg/transform-op-vectorize.mlir | 182 ++++++++++++++++++ .../Dialect/Transform/test-interpreter.mlir | 31 +++ .../llvm-project-overlay/mlir/BUILD.bazel | 1 + 8 files changed, 341 insertions(+) create mode 100644 mlir/test/Dialect/Linalg/transform-op-vectorize.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 3557f3323779bc..387521a0e72452 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -117,4 +117,35 @@ def TileOp : Op { + let description = [{ + Indicates that the given `target` op all the ops it contains should be + vectorized with the configuration specified by the attributes of this op. + This vectorization only handles structured ops that operate on shaped types + and does not vectorize loops or straight-line. Internally, it applies a + set of rewrite patterns, some of which enable vectorization and some of + which clean up the results. Therefore, it can only be applied to an op with + the "isolated from above property". If finer granularity is required, it can + be achieved by outlining the target part of the payload IR into, e.g., a + function, performing the transformation, and inlining it back. This + transformation only fails if the entire pattern rewriting failed, i.e., it + does **not** fail when no ops were vectorized. + + Note that this transformation is invalidating the handles to any payload IR + operation that is contained inside the vectoriztaion target. + }]; + + let arguments = (ins PDL_Operation:$target, + DefaultValuedAttr:$vectorize_padding); + let results = (outs PDL_Operation:$transformed); + + let assemblyFormat = "$target attr-dict"; + + let extraClassDeclaration = [{ + ::mlir::FailureOr applyToOne(::mlir::Operation *target); + }]; +} + #endif // LINALG_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index 0f1db4549e04f8..9bfd2d3b6313ee 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -17,6 +17,32 @@ include "mlir/Dialect/Transform/IR/TransformDialect.td" include "mlir/Dialect/Transform/IR/TransformEffects.td" include "mlir/Dialect/Transform/IR/TransformInterfaces.td" +def GetClosestIsolatedParentOp : TransformDialectOp<"get_closest_isolated_parent", + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { + let summary = "Gets handles to the closest isolated-from-above parents"; + let description = [{ + The handles defined by this Transform op correspond to the closest isolated + from above ancestor of the Payload IR operations associated with its + operand. If any of the given Payload IR ops has no such parent (unlikely as + there usually is a top-level ModuleOp), the transformation is considered to + have failed. + + Ancestor ops follow the same order as the ops assocaited with the + operand, except for potential duplicates (multiple Payload IR ops assocaited + with the operand have the same parent) for which the ancestor will only be + listed once for the first time it occurs. For example, given the list + "(childof(A), childof(B), childof(B), childof(A), childof(B))", the + resulting list will be just "(A, B)". Note that no other semantic ordering + is applied, e.g., "B" may itself be a parent of "A". This may have an impact + on the further transformation applied to the handle produced here. + }]; + + let arguments = (ins PDL_Operation:$target); + let results = (outs PDL_Operation:$parent); + let assemblyFormat = "$target attr-dict"; +} + def PDLMatchOp : TransformDialectOp<"pdl_match", [DeclareOpInterfaceMethods]> { let summary = "Finds ops that match the named PDL pattern"; diff --git a/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt index 4b1492652f10d3..599adc6bfc3a9a 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt @@ -15,4 +15,5 @@ add_mlir_dialect_library(MLIRLinalgTransformOps MLIRPDL MLIRSideEffectInterfaces MLIRTransformDialect + MLIRVector ) diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 689157d01a837f..f80ba4fc286f7e 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/Transform/IR/TransformDialect.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Parser/Parser.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/Support/FormatVariadic.h" using namespace mlir; @@ -338,6 +339,40 @@ void TileOp::getEffects( effects.emplace_back(MemoryEffects::Write::get(), PayloadIRResource::get()); } +//===----------------------------------------------------------------------===// +// VectorizeOp +//===----------------------------------------------------------------------===// + +FailureOr VectorizeOp::applyToOne(Operation *target) { + if (!target->hasTrait()) { + InFlightDiagnostic diag = emitOpError() + << "applies only to isolated-from-above targets"; + diag.attachNote(target->getLoc()) << "non-isolated target"; + return diag; + } + + MLIRContext *ctx = getContext(); + RewritePatternSet patterns(ctx); + patterns.add(ctx); + + vector::populateVectorTransferPermutationMapLoweringPatterns(patterns); + vector::populateVectorReductionToContractPatterns(patterns); + patterns.add(ctx, + /*benefit=*/2); + vector::TransferReadOp::getCanonicalizationPatterns(patterns, ctx); + vector::TransferWriteOp::getCanonicalizationPatterns(patterns, ctx); + if (getVectorizePadding()) + linalg::populatePadOpVectorizationPatterns(patterns); + + if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns)))) { + InFlightDiagnostic diag = emitError() << "failed to apply"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + return target; +} + //===----------------------------------------------------------------------===// // Transform op registration //===----------------------------------------------------------------------===// @@ -352,6 +387,7 @@ class LinalgTransformDialectExtension LinalgTransformDialectExtension() { declareDependentDialect(); declareDependentDialect(); + declareDependentDialect(); registerTransformOps< #define GET_OP_LIST #include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp.inc" diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index fa3ba7e21d7de3..91ad5ff4db04a4 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -115,6 +115,39 @@ LogicalResult PatternApplicatorExtension::findAllMatches( } } // namespace +//===----------------------------------------------------------------------===// +// GetClosestIsolatedParentOp +//===----------------------------------------------------------------------===// + +LogicalResult transform::GetClosestIsolatedParentOp::apply( + transform::TransformResults &results, transform::TransformState &state) { + SetVector parents; + for (Operation *target : state.getPayloadOps(getTarget())) { + Operation *parent = + target->getParentWithTrait(); + if (!parent) { + InFlightDiagnostic diag = + emitError() << "could not find an isolated-from-above parent op"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + parents.insert(parent); + } + results.set(getResult().cast(), parents.getArrayRef()); + return success(); +} + +void transform::GetClosestIsolatedParentOp::getEffects( + SmallVectorImpl &effects) { + effects.emplace_back(MemoryEffects::Read::get(), getTarget(), + TransformMappingResource::get()); + effects.emplace_back(MemoryEffects::Allocate::get(), getParent(), + TransformMappingResource::get()); + effects.emplace_back(MemoryEffects::Write::get(), getParent(), + TransformMappingResource::get()); + effects.emplace_back(MemoryEffects::Read::get(), PayloadIRResource::get()); +} + //===----------------------------------------------------------------------===// // PDLMatchOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir new file mode 100644 index 00000000000000..39ec5dfa4588b3 --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir @@ -0,0 +1,182 @@ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file -verify-diagnostics | FileCheck %s + +// CHECK-LABEL: @vectorize_matmul +// CHECK-SAME: %[[A:.*]]: tensor<24x12xf32> +// CHECK-SAME: %[[B:.*]]: tensor<12x25xf32> +// CHECK-SAME: %[[C:.*]]: tensor<24x25xf32> +func.func @vectorize_matmul(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { + // CHECK: %[[vA:.+]] = vector.transfer_read %[[A]] + // CHECK: %[[vB:.+]] = vector.transfer_read %[[B]] + // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] + // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]] + // CHECK: %[[vS:.+]] = arith.addf %[[vR]], %[[vC]] + // CHECK: vector.transfer_write %[[vS]], %[[C]] + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + // TODO: we don't want this, but it is the required terminator for pdl.pattern + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + %1 = get_closest_isolated_parent %0 + %2 = transform.structured.vectorize %1 + } +} + +// ----- + +#map0 = affine_map<()[s0] -> (-s0 + 12, 7)> +#map1 = affine_map<()[s0] -> (-s0 + 7)> + +// CHECK-LABEL: @vectorize_keep_pad +// CHECK-SAME: %[[C:[a-zA-Z0-9_]+]]: tensor<24x25xf32> +func.func @vectorize_keep_pad( + %arg0: tensor<24x12xf32>, %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, %arg3: index, %arg4: index, + %arg5: index) -> tensor<24x25xf32> { + %c0 = arith.constant 0 : index + %cst = arith.constant 0.000000e+00 : f32 + %0 = affine.min #map0()[%arg5] + %1 = tensor.extract_slice %arg0[%arg3, %arg5] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + %2 = tensor.extract_slice %arg1[%arg5, %arg4] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + %3 = tensor.extract_slice %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + %4 = affine.apply #map1()[%0] + // CHECK: %[[pA:.*]] = tensor.pad + %5 = tensor.pad %1 nofold low[%c0, %c0] high[%c0, %4] { + ^bb0(%arg6: index, %arg7: index): + tensor.yield %cst : f32 + } : tensor<4x?xf32> to tensor<4x7xf32> + %6 = affine.apply #map1()[%0] + // CHECK: %[[pB:.*]] = tensor.pad + %7 = tensor.pad %2 nofold low[%c0, %c0] high[%6, %c0] { + ^bb0(%arg6: index, %arg7: index): + tensor.yield %cst : f32 + } : tensor to tensor<7x5xf32> + // CHECK: %[[vA:.+]] = vector.transfer_read %[[pA]] + // CHECK: %[[vB:.+]] = vector.transfer_read %[[pB]] + // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] + // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]] + // CHECK: %[[vS:.+]] = arith.addf %[[vR]], %[[vC]] + // CHECK: vector.transfer_write %[[vS]], %[[C]] + %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + return %9 : tensor<24x25xf32> +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + // TODO: we don't want this, but it is the required terminator for pdl.pattern + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + %1 = get_closest_isolated_parent %0 + %2 = transform.structured.vectorize %1 + } +} + +// ----- + +#map0 = affine_map<()[s0] -> (-s0 + 12, 7)> +#map1 = affine_map<()[s0] -> (-s0 + 7)> + +// CHECK-LABEL: @vectorize_pad +// CHECK-SAME: %[[A:.+]]: tensor<24x12xf32> +// CHECK-SAME: %[[B:.+]]: tensor<12x25xf32> +// CHECK-SAME: %[[C:.+]]: tensor<24x25xf32> +func.func @vectorize_pad( + %arg0: tensor<24x12xf32>, %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, %arg3: index, %arg4: index, + %arg5: index) -> tensor<24x25xf32> { + %c0 = arith.constant 0 : index + %cst = arith.constant 0.000000e+00 : f32 + %0 = affine.min #map0()[%arg5] + // CHECK: %[[sA:.+]] = tensor.extract_slice %[[A]] + // CHECK: %[[sB:.+]] = tensor.extract_slice %[[B]] + %1 = tensor.extract_slice %arg0[%arg3, %arg5] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + %2 = tensor.extract_slice %arg1[%arg5, %arg4] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + %3 = tensor.extract_slice %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + // CHECK: %[[vA:.+]] = vector.transfer_read %[[sA]] + %4 = affine.apply #map1()[%0] + %5 = tensor.pad %1 nofold low[%c0, %c0] high[%c0, %4] { + ^bb0(%arg6: index, %arg7: index): + tensor.yield %cst : f32 + } : tensor<4x?xf32> to tensor<4x7xf32> + %6 = affine.apply #map1()[%0] + // CHECK: %[[vB:.+]] = vector.transfer_read %[[sB]] + %7 = tensor.pad %2 nofold low[%c0, %c0] high[%6, %c0] { + ^bb0(%arg6: index, %arg7: index): + tensor.yield %cst : f32 + } : tensor to tensor<7x5xf32> + // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] + // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]] + // CHECK: %[[vS:.+]] = arith.addf %[[vR]], %[[vC]] + // CHECK: vector.transfer_write %[[vS]], %[[C]] + %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + return %9 : tensor<24x25xf32> +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + // TODO: we don't want this, but it is the required terminator for pdl.pattern + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + %1 = get_closest_isolated_parent %0 + %2 = transform.structured.vectorize %1 {vectorize_padding = true} + } +} + +// ----- + +func.func @vectorize(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { + // expected-note @below {{non-isolated target}} + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @pdl_target : benefit(1) { + %args = operands + %results = types + %0 = pdl.operation "linalg.matmul"(%args : !pdl.range) -> (%results : !pdl.range) + // TODO: we don't want this, but it is the required terminator for pdl.pattern + rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %0 = pdl_match @pdl_target in %arg1 + // expected-error @below {{applies only to isolated-from-above targets}} + %2 = transform.structured.vectorize %0 + } +} diff --git a/mlir/test/Dialect/Transform/test-interpreter.mlir b/mlir/test/Dialect/Transform/test-interpreter.mlir index 2b2416480af127..530af50df2950c 100644 --- a/mlir/test/Dialect/Transform/test-interpreter.mlir +++ b/mlir/test/Dialect/Transform/test-interpreter.mlir @@ -97,3 +97,34 @@ transform.with_pdl_patterns { // expected-remark @below {{matched}} "test.some_op"() : () -> () +// ----- + +// expected-remark @below {{parent function}} +func.func @foo() { + %0 = arith.constant 0 : i32 + return +} + +// expected-remark @below {{parent function}} +func.func @bar() { + %0 = arith.constant 0 : i32 + %1 = arith.constant 1 : i32 + return +} + +transform.with_pdl_patterns { +^bb0(%arg0: !pdl.operation): + pdl.pattern @const : benefit(1) { + %r = pdl.types + %0 = pdl.operation "arith.constant" -> (%r : !pdl.range) + pdl.rewrite %0 with "transform.dialect" + } + + transform.sequence %arg0 { + ^bb1(%arg1: !pdl.operation): + %f = pdl_match @const in %arg1 + // CHECK: %{{.+}} = get_closest_isolated_parent %{{.+}} + %m = get_closest_isolated_parent %f + test_print_remark_at_operand %m, "parent function" + } +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 6dc36aeff09591..07e2c4701d324d 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -7381,6 +7381,7 @@ cc_library( ":Parser", ":SideEffectInterfaces", ":TransformDialect", + ":TransformUtils", "//llvm:Support", ], ) From 3f71765a71ca97433723b43c4f64931cace11ca1 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Mon, 30 May 2022 15:14:02 +0200 Subject: [PATCH 838/908] [mlir] provide Python bindings for the Transform dialect Python bindings for extensions of the Transform dialect are defined in separate Python source files that can be imported on-demand, i.e., that are not imported with the "main" transform dialect. This requires a minor addition to the ODS-based bindings generator. This approach is consistent with the current model for downstream projects that are expected to bundle MLIR Python bindings: such projects can include their custom extensions into the bundle similarly to how they include their dialects. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D126208 --- mlir/cmake/modules/AddMLIRPython.cmake | 55 ++++++ mlir/python/CMakeLists.txt | 19 ++ .../dialects/LinalgStructuredTransformOps.td | 21 +++ mlir/python/mlir/dialects/TransformOps.td | 15 ++ .../dialects/_structured_transform_ops_ext.py | 178 ++++++++++++++++++ .../mlir/dialects/_transform_ops_ext.py | 106 +++++++++++ .../mlir/dialects/transform/__init__.py | 5 + .../mlir/dialects/transform/structured.py | 5 + mlir/test/python/dialects/transform.py | 84 +++++++++ .../dialects/transform_structured_ext.py | 118 ++++++++++++ mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp | 18 +- .../mlir/python/BUILD.bazel | 68 +++++++ 12 files changed, 690 insertions(+), 2 deletions(-) create mode 100644 mlir/python/mlir/dialects/LinalgStructuredTransformOps.td create mode 100644 mlir/python/mlir/dialects/TransformOps.td create mode 100644 mlir/python/mlir/dialects/_structured_transform_ops_ext.py create mode 100644 mlir/python/mlir/dialects/_transform_ops_ext.py create mode 100644 mlir/python/mlir/dialects/transform/__init__.py create mode 100644 mlir/python/mlir/dialects/transform/structured.py create mode 100644 mlir/test/python/dialects/transform.py create mode 100644 mlir/test/python/dialects/transform_structured_ext.py diff --git a/mlir/cmake/modules/AddMLIRPython.cmake b/mlir/cmake/modules/AddMLIRPython.cmake index 8750b10c963711..ff9edd17307b8d 100644 --- a/mlir/cmake/modules/AddMLIRPython.cmake +++ b/mlir/cmake/modules/AddMLIRPython.cmake @@ -355,6 +355,61 @@ function(declare_mlir_dialect_python_bindings) endif() endfunction() +# Function: declare_mlir_dialect_extension_python_bindings +# Helper to generate source groups for dialect extensions, including both +# static source files and a TD_FILE to generate wrappers. +# +# This will generate a source group named ${ADD_TO_PARENT}.${EXTENSION_NAME}. +# +# Arguments: +# ROOT_DIR: Same as for declare_mlir_python_sources(). +# ADD_TO_PARENT: Same as for declare_mlir_python_sources(). Unique names +# for the subordinate source groups are derived from this. +# TD_FILE: Tablegen file to generate source for (relative to ROOT_DIR). +# DIALECT_NAME: Python name of the dialect. +# EXTENSION_NAME: Python name of the dialect extension. +# SOURCES: Same as declare_mlir_python_sources(). +# SOURCES_GLOB: Same as declare_mlir_python_sources(). +# DEPENDS: Additional dependency targets. +function(declare_mlir_dialect_extension_python_bindings) + cmake_parse_arguments(ARG + "" + "ROOT_DIR;ADD_TO_PARENT;TD_FILE;DIALECT_NAME;EXTENSION_NAME" + "SOURCES;SOURCES_GLOB;DEPENDS" + ${ARGN}) + # Source files. + set(_extension_target "${ARG_ADD_TO_PARENT}.${ARG_EXTENSION_NAME}") + declare_mlir_python_sources(${_extension_target} + ROOT_DIR "${ARG_ROOT_DIR}" + ADD_TO_PARENT "${ARG_ADD_TO_PARENT}" + SOURCES "${ARG_SOURCES}" + SOURCES_GLOB "${ARG_SOURCES_GLOB}" + ) + + # Tablegen + if(ARG_TD_FILE) + set(tblgen_target "${ARG_ADD_TO_PARENT}.${ARG_EXTENSION_NAME}.tablegen") + set(td_file "${ARG_ROOT_DIR}/${ARG_TD_FILE}") + get_filename_component(relative_td_directory "${ARG_TD_FILE}" DIRECTORY) + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${relative_td_directory}") + set(output_filename "${relative_td_directory}/_${ARG_EXTENSION_NAME}_ops_gen.py") + set(LLVM_TARGET_DEFINITIONS ${td_file}) + mlir_tablegen("${output_filename}" -gen-python-op-bindings + -bind-dialect=${ARG_DIALECT_NAME} + -dialect-extension=${ARG_EXTENSION_NAME}) + add_public_tablegen_target(${tblgen_target}) + if(ARG_DEPENDS) + add_dependencies(${tblgen_target} ${ARG_DEPENDS}) + endif() + + declare_mlir_python_sources("${_extension_target}.ops_gen" + ROOT_DIR "${CMAKE_CURRENT_BINARY_DIR}" + ADD_TO_PARENT "${_extension_target}" + SOURCES "${output_filename}" + ) + endif() +endfunction() + # Function: mlir_python_setup_extension_rpath # Sets RPATH properties on a target, assuming that it is being output to # an _mlir_libs directory with all other libraries. For static linkage, diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index d280bf105aef07..17048e8cb9e30d 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -116,6 +116,25 @@ declare_mlir_dialect_python_bindings( DIALECT_NAME linalg DEPENDS LinalgOdsGen) +declare_mlir_dialect_python_bindings( + ADD_TO_PARENT MLIRPythonSources.Dialects + ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" + TD_FILE dialects/TransformOps.td + SOURCES + dialects/_transform_ops_ext.py + dialects/transform/__init__.py + DIALECT_NAME transform) + +declare_mlir_dialect_extension_python_bindings( + ADD_TO_PARENT MLIRPythonSources.Dialects + ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" + TD_FILE dialects/LinalgStructuredTransformOps.td + SOURCES + dialects/_structured_transform_ops_ext.py + dialects/transform/structured.py + DIALECT_NAME transform + EXTENSION_NAME structured_transform) + declare_mlir_dialect_python_bindings( ADD_TO_PARENT MLIRPythonSources.Dialects ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" diff --git a/mlir/python/mlir/dialects/LinalgStructuredTransformOps.td b/mlir/python/mlir/dialects/LinalgStructuredTransformOps.td new file mode 100644 index 00000000000000..a9a53fe6db288e --- /dev/null +++ b/mlir/python/mlir/dialects/LinalgStructuredTransformOps.td @@ -0,0 +1,21 @@ +//===-- LinalgStructuredTransformOps.td --------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Entry point of the Python bindings generator for the structured transform ops +// provided by Linalg (and other dialects). +// +//===----------------------------------------------------------------------===// + + +#ifndef PYTHON_BINDINGS_LINALG_STRUCTURED_TRANSFORM_OPS +#define PYTHON_BINDINGS_LINALG_STRUCTURED_TRANSFORM_OPS + +include "mlir/Bindings/Python/Attributes.td" +include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td" + +#endif // PYTHON_BINDINGS_LINALG_STRUCTURED_TRANSFORM_OPS diff --git a/mlir/python/mlir/dialects/TransformOps.td b/mlir/python/mlir/dialects/TransformOps.td new file mode 100644 index 00000000000000..7f0d80ead4e635 --- /dev/null +++ b/mlir/python/mlir/dialects/TransformOps.td @@ -0,0 +1,15 @@ +//===-- TransformOps.td - Transform ops bind entry point ---*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef PYTHON_BINDINGS_TRANSFORM_OPS +#define PYTHON_BINDINGS_TRANSFORM_OPS + +include "mlir/Bindings/Python/Attributes.td" +include "mlir/Dialect/Transform/IR/TransformOps.td" + +#endif // PYTHON_BINDINGS_TRANSFORM_OPS diff --git a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py new file mode 100644 index 00000000000000..70e39be5289da7 --- /dev/null +++ b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py @@ -0,0 +1,178 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +try: + from ..ir import * + from ._ods_common import get_op_result_or_value as _get_op_result_or_value + from ..dialects import pdl +except ImportError as e: + raise RuntimeError("Error loading imports from extension module") from e + +from typing import List, Optional, Sequence, Union + +IntOrAttrList = Sequence[Union[IntegerAttr, int]] +OptionalIntList = Optional[Union[ArrayAttr, IntOrAttrList]] + + +def _get_array_attr( + values: Optional[Union[ArrayAttr, Sequence[Attribute]]]) -> ArrayAttr: + """Creates an array attribute from its operand.""" + if values is None: + return ArrayAttr.get([]) + if isinstance(values, ArrayAttr): + return values + + return ArrayAttr.get(values) + + +def _get_int_array_attr( + values: Optional[Union[ArrayAttr, Sequence[Union[IntegerAttr, int]]]] +) -> ArrayAttr: + """Creates an integer array attribute from its operand. + + If the operand is already an array attribute, forwards it. Otherwise treats + the operand as a list of attributes or integers, possibly intersperced, to + create a new array attribute containing integer attributes. Expects the + thread-local MLIR context to have been set by the context manager. + """ + if values is None: + return ArrayAttr.get([]) + if isinstance(values, ArrayAttr): + return values + + attributes = [] + for value in values: + if isinstance(value, IntegerAttr): + attributes.append(value) + else: + attributes.append(IntegerAttr.get(IntegerType.get_signless(64), value)) + return ArrayAttr.get(attributes) + + +def _get_int_int_array_attr( + values: Optional[Union[ArrayAttr, Sequence[Union[ArrayAttr, + IntOrAttrList]]]] +) -> ArrayAttr: + """Creates an array attribute containing array attributes of integers. + + If the operand is already an array attribute, forwards it. Otherwise treats + the operand as a list of attributes or integers, potentially interpserced, to + create a new array-of-array attribute. Expects the thread-local MLIR context + to have been set by the context manager. + """ + if values is None: + return ArrayAttr.get([]) + if isinstance(values, ArrayAttr): + return values + + return ArrayAttr.get([_get_int_array_attr(value) for value in values]) + + +class InterchangeOp: + """Specialization for InterchangeOp class.""" + + def __init__(self, + target: Union[Operation, Value], + *, + iterator_interchange: OptionalIntList = None, + loc=None, + ip=None): + pdl_operation_type = pdl.OperationType.get() + interchange_attr = _get_int_array_attr(iterator_interchange) + super().__init__( + pdl_operation_type, + _get_op_result_or_value(target), + iterator_interchange=interchange_attr, + loc=loc, + ip=ip) + + +class PadOp: + """Specialization for PadOp class.""" + + def __init__(self, + target: Union[Operation, Value], + *, + padding_values: Optional[Union[ArrayAttr, + Sequence[Attribute]]] = None, + padding_dimensions: OptionalIntList = None, + pack_paddings: OptionalIntList = None, + hoist_paddings: OptionalIntList = None, + transpose_paddings: Optional[Union[ArrayAttr, Sequence[Union[ + ArrayAttr, IntOrAttrList]]]] = None, + loc=None, + ip=None): + pdl_operation_type = pdl.OperationType.get() + padding_values_attr = _get_array_attr(padding_values) + padding_dimensions_attr = _get_int_array_attr(padding_dimensions) + pack_paddings_attr = _get_int_array_attr(pack_paddings) + hoist_paddings_attr = _get_int_array_attr(hoist_paddings) + transpose_paddings_attr = _get_int_int_array_attr(transpose_paddings) + super().__init__( + pdl_operation_type, + _get_op_result_or_value(target), + padding_values=padding_values_attr, + padding_dimensions=padding_dimensions_attr, + pack_paddings=pack_paddings_attr, + hoist_paddings=hoist_paddings_attr, + transpose_paddings=transpose_paddings_attr, + loc=loc, + ip=ip) + + +class ScalarizeOp: + """Specialization for ScalarizeOp class.""" + + def __init__(self, target: Union[Operation, Value], *, loc=None, ip=None): + pdl_operation_type = pdl.OperationType.get() + super().__init__( + pdl_operation_type, _get_op_result_or_value(target), loc=loc, ip=ip) + + +class TileOp: + """Specialization for TileOp class.""" + + def __init__(self, + target: Union[Operation, Value], + *, + sizes: OptionalIntList = None, + interchange: OptionalIntList = None, + loc=None, + ip=None): + pdl_operation_type = pdl.OperationType.get() + sizes_attr = _get_int_array_attr(sizes) + num_loops = sum( + v if v == 0 else 1 for v in self.__extract_values(sizes_attr)) + super().__init__( + pdl_operation_type, [pdl_operation_type] * num_loops, + _get_op_result_or_value(target), + sizes=sizes_attr, + interchange=_get_int_array_attr(interchange) if interchange else None, + loc=loc, + ip=ip) + + def __extract_values(self, attr: Optional[ArrayAttr]) -> List[int]: + if not attr: + return [] + return [IntegerAttr(element).value for element in attr] + + +class VectorizeOp: + """Specialization for VectorizeOp class.""" + + def __init__(self, + target: Union[Operation, Value], + *, + vectorize_padding: Union[bool, BoolAttr] = False, + loc=None, + ip=None): + pdl_operation_type = pdl.OperationType.get() + if isinstance(vectorize_padding, bool): + vectorize_padding = BoolAttr.get(vectorize_padding) + super().__init__( + pdl_operation_type, + _get_op_result_or_value(target), + vectorize_padding=vectorize_padding, + loc=loc, + ip=ip) diff --git a/mlir/python/mlir/dialects/_transform_ops_ext.py b/mlir/python/mlir/dialects/_transform_ops_ext.py new file mode 100644 index 00000000000000..138195dca73e80 --- /dev/null +++ b/mlir/python/mlir/dialects/_transform_ops_ext.py @@ -0,0 +1,106 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +try: + from ..ir import * + from ._ods_common import get_op_result_or_value as _get_op_result_or_value, get_op_results_or_values as _get_op_results_or_values + from ..dialects import pdl +except ImportError as e: + raise RuntimeError("Error loading imports from extension module") from e + +from typing import Optional, overload, Sequence, Union + + +def _get_symbol_ref_attr(value: Union[Attribute, str]): + if isinstance(value, Attribute): + return value + return FlatSymbolRefAttr.get(value) + + +class GetClosestIsolatedParentOp: + + def __init__(self, target: Union[Operation, Value], *, loc=None, ip=None): + super().__init__( + pdl.OperationType.get(), + _get_op_result_or_value(target), + loc=loc, + ip=ip) + + +class PDLMatchOp: + + def __init__(self, + target: Union[Operation, Value], + pattern_name: Union[Attribute, str], + *, + loc=None, + ip=None): + super().__init__( + pdl.OperationType.get(), + _get_op_result_or_value(target), + _get_symbol_ref_attr(pattern_name), + loc=loc, + ip=ip) + + +class SequenceOp: + + @overload + def __init__(self, resultsOrRoot: Sequence[Type], + optionalRoot: Optional[Union[Operation, Value]]): + ... + + @overload + def __init__(self, resultsOrRoot: Optional[Union[Operation, Value]], + optionalRoot: NoneType): + ... + + def __init__(self, resultsOrRoot=None, optionalRoot=None): + results = resultsOrRoot if isinstance(resultsOrRoot, Sequence) else [] + root = ( + resultsOrRoot + if not isinstance(resultsOrRoot, Sequence) else optionalRoot) + root = _get_op_result_or_value(root) if root else None + super().__init__(results_=results, root=root) + self.regions[0].blocks.append(pdl.OperationType.get()) + + @property + def body(self) -> Block: + return self.regions[0].blocks[0] + + @property + def bodyTarget(self) -> Value: + return self.body.arguments[0] + + +class WithPDLPatternsOp: + + def __init__(self, + target: Optional[Union[Operation, Value]] = None, + *, + loc=None, + ip=None): + super().__init__( + root=_get_op_result_or_value(target) if target else None, + loc=loc, + ip=ip) + self.regions[0].blocks.append(pdl.OperationType.get()) + + @property + def body(self) -> Block: + return self.regions[0].blocks[0] + + @property + def bodyTarget(self) -> Value: + return self.body.arguments[0] + + +class YieldOp: + + def __init__(self, + operands: Union[Operation, Sequence[Value]] = [], + *, + loc=None, + ip=None): + super().__init__(_get_op_results_or_values(operands), loc=loc, ip=ip) diff --git a/mlir/python/mlir/dialects/transform/__init__.py b/mlir/python/mlir/dialects/transform/__init__.py new file mode 100644 index 00000000000000..ab4fa5631433cd --- /dev/null +++ b/mlir/python/mlir/dialects/transform/__init__.py @@ -0,0 +1,5 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from .._transform_ops_gen import * diff --git a/mlir/python/mlir/dialects/transform/structured.py b/mlir/python/mlir/dialects/transform/structured.py new file mode 100644 index 00000000000000..b8ee48c42945af --- /dev/null +++ b/mlir/python/mlir/dialects/transform/structured.py @@ -0,0 +1,5 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from .._structured_transform_ops_gen import * diff --git a/mlir/test/python/dialects/transform.py b/mlir/test/python/dialects/transform.py new file mode 100644 index 00000000000000..472201731ba513 --- /dev/null +++ b/mlir/test/python/dialects/transform.py @@ -0,0 +1,84 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +from mlir.dialects import transform +from mlir.dialects import pdl + + +def run(f): + with Context(), Location.unknown(): + module = Module.create() + with InsertionPoint(module.body): + print("\nTEST:", f.__name__) + f() + print(module) + return f + + +@run +def testSequenceOp(): + sequence = transform.SequenceOp([pdl.OperationType.get()]) + with InsertionPoint(sequence.body): + transform.YieldOp([sequence.bodyTarget]) + # CHECK-LABEL: TEST: testSequenceOp + # CHECK: = transform.sequence { + # CHECK: ^{{.*}}(%[[ARG0:.+]]: !pdl.operation): + # CHECK: yield %[[ARG0]] : !pdl.operation + # CHECK: } : !pdl.operation + + +@run +def testNestedSequenceOp(): + sequence = transform.SequenceOp() + with InsertionPoint(sequence.body): + nested = transform.SequenceOp(sequence.bodyTarget) + with InsertionPoint(nested.body): + doubly_nested = transform.SequenceOp([pdl.OperationType.get()], + nested.bodyTarget) + with InsertionPoint(doubly_nested.body): + transform.YieldOp([doubly_nested.bodyTarget]) + transform.YieldOp() + transform.YieldOp() + # CHECK-LABEL: TEST: testNestedSequenceOp + # CHECK: transform.sequence { + # CHECK: ^{{.*}}(%[[ARG0:.+]]: !pdl.operation): + # CHECK: sequence %[[ARG0]] { + # CHECK: ^{{.*}}(%[[ARG1:.+]]: !pdl.operation): + # CHECK: = sequence %[[ARG1]] { + # CHECK: ^{{.*}}(%[[ARG2:.+]]: !pdl.operation): + # CHECK: yield %[[ARG2]] : !pdl.operation + # CHECK: } : !pdl.operation + # CHECK: } + # CHECK: } + + +@run +def testTransformPDLOps(): + withPdl = transform.WithPDLPatternsOp() + with InsertionPoint(withPdl.body): + sequence = transform.SequenceOp([pdl.OperationType.get()], + withPdl.bodyTarget) + with InsertionPoint(sequence.body): + match = transform.PDLMatchOp(sequence.bodyTarget, "pdl_matcher") + transform.YieldOp(match) + # CHECK-LABEL: TEST: testTransformPDLOps + # CHECK: transform.with_pdl_patterns { + # CHECK: ^{{.*}}(%[[ARG0:.+]]: !pdl.operation): + # CHECK: = sequence %[[ARG0]] { + # CHECK: ^{{.*}}(%[[ARG1:.+]]: !pdl.operation): + # CHECK: %[[RES:.+]] = pdl_match @pdl_matcher in %[[ARG1]] + # CHECK: yield %[[RES]] : !pdl.operation + # CHECK: } : !pdl.operation + # CHECK: } + + +@run +def testGetClosestIsolatedParentOp(): + sequence = transform.SequenceOp() + with InsertionPoint(sequence.body): + transform.GetClosestIsolatedParentOp(sequence.bodyTarget) + transform.YieldOp() + # CHECK-LABEL: TEST: testGetClosestIsolatedParentOp + # CHECK: transform.sequence + # CHECK: ^{{.*}}(%[[ARG1:.+]]: !pdl.operation): + # CHECK: = get_closest_isolated_parent %[[ARG1]] diff --git a/mlir/test/python/dialects/transform_structured_ext.py b/mlir/test/python/dialects/transform_structured_ext.py new file mode 100644 index 00000000000000..463dec10d7bd5c --- /dev/null +++ b/mlir/test/python/dialects/transform_structured_ext.py @@ -0,0 +1,118 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +from mlir.dialects import transform +from mlir.dialects import pdl +from mlir.dialects.transform import structured + + +def run(f): + with Context(), Location.unknown(): + module = Module.create() + with InsertionPoint(module.body): + print("\nTEST:", f.__name__) + f() + print(module) + return f + + +@run +def testInterchange(): + sequence = transform.SequenceOp() + with InsertionPoint(sequence.body): + structured.InterchangeOp( + sequence.bodyTarget, + iterator_interchange=[ + IntegerAttr.get(IntegerType.get_signless(64), 1), 0 + ]) + transform.YieldOp() + # CHECK-LABEL: TEST: testInterchange + # CHECK: transform.sequence + # CHECK: transform.structured.interchange + # CHECK: iterator_interchange = [1, 0] + + +@run +def testPad(): + sequence = transform.SequenceOp() + with InsertionPoint(sequence.body): + structured.PadOp( + sequence.bodyTarget, + padding_values=[FloatAttr.get_f32(42.0)], + padding_dimensions=[1], + transpose_paddings=[[1, 0]]) + transform.YieldOp() + # CHECK-LABEL: TEST: testPad + # CHECK: transform.sequence + # CHECK: transform.structured.pad + # CHECK-DAG: padding_values = [4.200000e+01 : f32] + # CHECK-DAG: padding_dimensions = [1] + # CHECK-DAG: transpose_paddings = {{\[}}[1, 0]] + # CHECK-DAG: hoist_paddings = [] + # CHECK-DAG: pack_paddings = [] + + +@run +def testScalarize(): + sequence = transform.SequenceOp() + with InsertionPoint(sequence.body): + structured.ScalarizeOp(sequence.bodyTarget) + transform.YieldOp() + # CHECK-LABEL: TEST: testScalarize + # CHECK: transform.structured.scalarize + + +@run +def testTileCompact(): + sequence = transform.SequenceOp() + with InsertionPoint(sequence.body): + structured.TileOp(sequence.bodyTarget, sizes=[4, 8], interchange=[0, 1]) + transform.YieldOp() + # CHECK-LABEL: TEST: testTileCompact + # CHECK: transform.sequence + # CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile + # CHECK-DAG: interchange = [0, 1] + # CHECK-DAG: sizes = [4, 8] + + +@run +def testTileAttributes(): + sequence = transform.SequenceOp() + attr = ArrayAttr.get( + [IntegerAttr.get(IntegerType.get_signless(64), x) for x in [4, 8]]) + ichange = ArrayAttr.get( + [IntegerAttr.get(IntegerType.get_signless(64), x) for x in [0, 1]]) + with InsertionPoint(sequence.body): + structured.TileOp(sequence.bodyTarget, sizes=attr, interchange=ichange) + transform.YieldOp() + # CHECK-LABEL: TEST: testTileAttributes + # CHECK: transform.sequence + # CHECK: structured.tile + # CHECK-DAG: interchange = [0, 1] + # CHECK-DAG: sizes = [4, 8] + + +@run +def testTileZero(): + sequence = transform.SequenceOp() + with InsertionPoint(sequence.body): + structured.TileOp( + sequence.bodyTarget, sizes=[4, 0, 2, 0], interchange=[0, 1, 2, 3]) + transform.YieldOp() + # CHECK-LABEL: TEST: testTileZero + # CHECK: transform.sequence + # CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile + # CHECK-DAG: interchange = [0, 1, 2, 3] + # CHECK-DAG: sizes = [4, 0, 2, 0] + + +@run +def testVectorize(): + sequence = transform.SequenceOp() + with InsertionPoint(sequence.body): + structured.VectorizeOp(sequence.bodyTarget, vectorize_padding=True) + transform.YieldOp() + # CHECK-LABEL: TEST: testVectorize + # CHECK: transform.sequence + # CHECK: = transform.structured.vectorize + # CHECK: vectorize_padding = true diff --git a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp index 44c60c1dc61929..d6b3aa60c97f42 100644 --- a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp +++ b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp @@ -50,6 +50,10 @@ class _Dialect(_ods_ir.Dialect): )Py"; +constexpr const char *dialectExtensionTemplate = R"Py( +from ._{0}_ops_gen import _Dialect +)Py"; + /// Template for operation class: /// {0} is the Python class name; /// {1} is the operation name. @@ -270,6 +274,10 @@ static llvm::cl::opt llvm::cl::desc("The dialect to run the generator for"), llvm::cl::init(""), llvm::cl::cat(clOpPythonBindingCat)); +static llvm::cl::opt clDialectExtensionName( + "dialect-extension", llvm::cl::desc("The prefix of the dialect extension"), + llvm::cl::init(""), llvm::cl::cat(clOpPythonBindingCat)); + using AttributeClasses = DenseMap; /// Checks whether `str` is a Python keyword. @@ -1014,8 +1022,14 @@ static bool emitAllOps(const llvm::RecordKeeper &records, raw_ostream &os) { AttributeClasses attributeClasses; constructAttributeMapping(records, attributeClasses); - os << llvm::formatv(fileHeader, clDialectName.getValue()); - os << llvm::formatv(dialectClassTemplate, clDialectName.getValue()); + bool isExtension = !clDialectExtensionName.empty(); + os << llvm::formatv(fileHeader, isExtension + ? clDialectExtensionName.getValue() + : clDialectName.getValue()); + if (isExtension) + os << llvm::formatv(dialectExtensionTemplate, clDialectName.getValue()); + else + os << llvm::formatv(dialectClassTemplate, clDialectName.getValue()); for (const llvm::Record *rec : records.getAllDerivedDefinitions("Op")) { Operator op(rec); diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel index c3cba2f044cf46..c94bc5dbd60ff5 100644 --- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel @@ -825,6 +825,74 @@ filegroup( ], ) +##---------------------------------------------------------------------------## +# Transform dialect and extensions. +##---------------------------------------------------------------------------## + +td_library( + name = "TransformOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + deps = [ + "//mlir:OpBaseTdFiles", + "//mlir:TransformDialectTdFiles", + ], +) + +gentbl_filegroup( + name = "TransformOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=transform", + ], + "mlir/dialects/_transform_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/TransformOps.td", + deps = [ + ":TransformOpsPyTdFiles", + ], +) + +gentbl_filegroup( + name = "StructuredTransformOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=transform", + "-dialect-extension=structured_transform", + ], + "mlir/dialects/_structured_transform_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/LinalgStructuredTransformOps.td", + deps = [ + ":TransformOpsPyTdFiles", + "//mlir:LinalgTransformOpsTdFiles", + ], +) + +filegroup( + name = "TransformOpsPyFiles", + srcs = [ + "mlir/dialects/_structured_transform_ops_ext.py", + "mlir/dialects/_transform_ops_ext.py", + ":StructuredTransformOpsPyGen", + ":TransformOpsPyGen", + ], +) + +filegroup( + name = "TransformOpsPackagePyFiles", + srcs = glob(["mlir/dialects/transform/*.py"]), +) + ##---------------------------------------------------------------------------## # Vector dialect. ##---------------------------------------------------------------------------## From 4278b7e16a5b13d529c92d682ac76b53f93a4961 Mon Sep 17 00:00:00 2001 From: Pierre Gousseau Date: Mon, 30 May 2022 17:00:10 +0100 Subject: [PATCH 839/908] [sanitizers] Fixes strndup API behaviour when intercepted by sanitizers Sanitizers ignore flag allocator_may_return_null=1 in strndup() calls. When OOM is emulated, this causes to the unexpected crash. Committed by pgousseau on behalf of "Kostyantyn Melnik, kmnls.kmnls@gmail.com" Reviewed by: pgousseau Differential Revision: https://reviews.llvm.org/D126452 --- .../sanitizer_common_interceptors.inc | 8 ++++--- .../TestCases/max_allocation_size.cpp | 23 +++++++++++++++---- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 99bd59abb9ee2a..3b316f5edea805 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -385,9 +385,11 @@ extern const short *_tolower_tab_; if (common_flags()->intercept_strndup) { \ COMMON_INTERCEPTOR_READ_STRING(ctx, s, Min(size, copy_length + 1)); \ } \ - COMMON_INTERCEPTOR_COPY_STRING(ctx, new_mem, s, copy_length); \ - internal_memcpy(new_mem, s, copy_length); \ - new_mem[copy_length] = '\0'; \ + if (new_mem) { \ + COMMON_INTERCEPTOR_COPY_STRING(ctx, new_mem, s, copy_length); \ + internal_memcpy(new_mem, s, copy_length); \ + new_mem[copy_length] = '\0'; \ + } \ return new_mem; #endif diff --git a/compiler-rt/test/sanitizer_common/TestCases/max_allocation_size.cpp b/compiler-rt/test/sanitizer_common/TestCases/max_allocation_size.cpp index 9cc799a3245d0e..fc9bc3a0448124 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/max_allocation_size.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/max_allocation_size.cpp @@ -35,6 +35,10 @@ // RUN: | FileCheck %s --check-prefix=CHECK-nnCRASH // RUN: %env_tool_opts=max_allocation_size_mb=2:allocator_may_return_null=1 \ // RUN: %run %t new-nothrow 2>&1 | FileCheck %s --check-prefix=CHECK-NULL +// RUN: %env_tool_opts=max_allocation_size_mb=2:allocator_may_return_null=0 \ +// RUN: not %run %t strndup 2>&1 | FileCheck %s --check-prefix=CHECK-sCRASH +// RUN: %env_tool_opts=max_allocation_size_mb=2:allocator_may_return_null=1 \ +// RUN: %run %t strndup 2>&1 | FileCheck %s --check-prefix=CHECK-NULL // win32 is disabled due to failing errno tests. // UNSUPPORTED: ubsan, windows-msvc @@ -47,6 +51,8 @@ #include #include +constexpr size_t MaxAllocationSize = size_t{2} << 20; + static void *allocate(const char *Action, size_t Size) { if (!strcmp(Action, "malloc")) return malloc(Size); @@ -65,12 +71,21 @@ static void *allocate(const char *Action, size_t Size) { return ::operator new(Size); if (!strcmp(Action, "new-nothrow")) return ::operator new(Size, std::nothrow); + if (!strcmp(Action, "strndup")) { + static char pstr[MaxAllocationSize + 1] = {'a'}; + for (size_t i = 0; i < MaxAllocationSize + 1; i++) + pstr[i] = 'a'; + if (Size == MaxAllocationSize) + pstr[MaxAllocationSize - 1] = '\0'; + return strndup(pstr, Size); + } assert(0); } static void deallocate(const char *Action, void *Ptr) { if (!strcmp(Action, "malloc") || !strcmp(Action, "calloc") || - !strcmp(Action, "realloc") || !strcmp(Action, "realloc-after-malloc")) + !strcmp(Action, "realloc") || !strcmp(Action, "realloc-after-malloc") || + !strcmp(Action, "strndup")) return free(Ptr); if (!strcmp(Action, "new")) return ::operator delete(Ptr); @@ -84,8 +99,6 @@ int main(int Argc, char **Argv) { const char *Action = Argv[1]; fprintf(stderr, "%s:\n", Action); - constexpr size_t MaxAllocationSize = size_t{2} << 20; - // Should succeed when max_allocation_size_mb is set. void *volatile P = allocate(Action, MaxAllocationSize); assert(P); @@ -120,8 +133,10 @@ int main(int Argc, char **Argv) { // CHECK-nCRASH-OOM: {{SUMMARY: .*Sanitizer: out-of-memory}} // CHECK-nnCRASH: new-nothrow: // CHECK-nnCRASH: {{SUMMARY: .*Sanitizer: allocation-size-too-big}} +// CHECK-sCRASH: strndup: +// CHECK-sCRASH: {{SUMMARY: .*Sanitizer: allocation-size-too-big}} -// CHECK-NULL: {{malloc|calloc|calloc-overflow|realloc|realloc-after-malloc|new-nothrow}} +// CHECK-NULL: {{malloc|calloc|calloc-overflow|realloc|realloc-after-malloc|new-nothrow|strndup}} // CHECK-NULL: errno: 12, P: 0 // // CHECK-NOTNULL-NOT: P: 0 From f8239eec8decd3459d4f638b98c4f582add31263 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 30 May 2022 11:06:19 -0400 Subject: [PATCH 840/908] [libc++] Reduce the verbosity when running the libc++ Lit configuration We print the same information as before, however we do it with less verbosity unless `--debug` is used. --- libcxx/utils/libcxx/test/newconfig.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/libcxx/utils/libcxx/test/newconfig.py b/libcxx/utils/libcxx/test/newconfig.py index 644438724f7ebc..2ac5cf239d63d9 100644 --- a/libcxx/utils/libcxx/test/newconfig.py +++ b/libcxx/utils/libcxx/test/newconfig.py @@ -13,6 +13,8 @@ def _getSubstitution(substitution, config): raise ValueError('Substitution {} is not in the config.'.format(substitution)) def configure(parameters, features, config, lit_config): + note = lambda s: lit_config.note("({}) {}".format(config.name, s)) + # Apply the actions supplied by parameters to the configuration first, since # parameters are things that we request explicitly and which might influence # what features are implicitly made available next. @@ -20,19 +22,24 @@ def configure(parameters, features, config, lit_config): actions = param.getActions(config, lit_config.params) for action in actions: action.applyTo(config) - lit_config.note("Applied '{}' as a result of parameter '{}'".format( - action.pretty(config, lit_config.params), - param.pretty(config, lit_config.params))) + if lit_config.debug: + note("Applied '{}' as a result of parameter '{}'".format( + action.pretty(config, lit_config.params), + param.pretty(config, lit_config.params))) # Then, apply the automatically-detected features. for feature in features: actions = feature.getActions(config) for action in actions: action.applyTo(config) - lit_config.note("Applied '{}' as a result of implicitly detected feature '{}'".format( - action.pretty(config, lit_config.params), - feature.pretty(config))) + if lit_config.debug: + note("Applied '{}' as a result of implicitly detected feature '{}'".format( + action.pretty(config, lit_config.params), + feature.pretty(config))) # Print the basic substitutions for sub in ('%{cxx}', '%{flags}', '%{compile_flags}', '%{link_flags}', '%{exec}'): - lit_config.note("Using {} substitution: '{}'".format(sub, _getSubstitution(sub, config))) + note("Using {} substitution: '{}'".format(sub, _getSubstitution(sub, config))) + + # Print all available features + note("All available features: {}".format(', '.join(config.available_features))) From e576280380d3f5221cfcc14e9fabeacc8506a43c Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Thu, 5 May 2022 15:31:22 -0700 Subject: [PATCH 841/908] [HLSL] Enable vector types for hlsl. Vector types in hlsl is using clang ext_vector_type. Declaration of vector types is in builtin header hlsl.h. hlsl.h will be included by default for hlsl shader. Reviewed By: Anastasia Differential Revision: https://reviews.llvm.org/D125052 --- clang/include/clang/Driver/Options.td | 4 +- clang/lib/Basic/LangOptions.cpp | 2 + clang/lib/Driver/ToolChains/Clang.cpp | 3 + clang/lib/Frontend/CompilerInvocation.cpp | 4 ++ clang/lib/Frontend/InitPreprocessor.cpp | 4 ++ clang/lib/Headers/CMakeLists.txt | 16 +++++ clang/lib/Headers/hlsl.h | 14 +++++ clang/lib/Headers/hlsl/hlsl_basic_types.h | 64 +++++++++++++++++++ clang/test/CodeGenHLSL/basic_types.hlsl | 76 +++++++++++++++++++++++ clang/test/Driver/hlsl_no_stdinc.hlsl | 12 ++++ 10 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 clang/lib/Headers/hlsl.h create mode 100644 clang/lib/Headers/hlsl/hlsl_basic_types.h create mode 100644 clang/test/CodeGenHLSL/basic_types.hlsl create mode 100644 clang/test/Driver/hlsl_no_stdinc.hlsl diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0343e48c1a9d11..44fd4d50ca0517 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6051,7 +6051,7 @@ def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">, // These options cannot be marshalled, because they are used to set up the LangOptions defaults. def finclude_default_header : Flag<["-"], "finclude-default-header">, - HelpText<"Include default header file for OpenCL">; + HelpText<"Include default header file for OpenCL and HLSL">; def fdeclare_opencl_builtins : Flag<["-"], "fdeclare-opencl-builtins">, HelpText<"Add OpenCL builtin function declarations (experimental)">; @@ -6780,6 +6780,8 @@ class DXCJoinedOrSeparate : Option<["/", "-"], name, def dxc_help : Option<["/", "-", "--"], "help", KIND_JOINED>, Group, Flags<[DXCOption, NoXarchOption]>, Alias, HelpText<"Display available options">; +def dxc_no_stdinc : DXCFlag<"hlsl-no-stdinc">, + HelpText<"HLSL only. Disables all standard includes containing non-native compiler types and functions.">; def Fo : DXCJoinedOrSeparate<"Fo">, Alias, HelpText<"Output object file">; def dxil_validator_version : Option<["/", "-"], "validator-version", KIND_SEPARATE>, diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index 7791bff388be70..7549f3f2e23b4f 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -117,6 +117,8 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang, Opts.Digraphs = Std.hasDigraphs(); Opts.HLSL = Lang == Language::HLSL; + if (Opts.HLSL && Opts.IncludeDefaultHeader) + Includes.push_back("hlsl.h"); // Set OpenCL Version. Opts.OpenCL = Std.isOpenCL(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 5106c0e327b87f..a8706215bf6059 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3484,6 +3484,9 @@ static void RenderHLSLOptions(const ArgList &Args, ArgStringList &CmdArgs, for (const auto &Arg : ForwardedArguments) if (const auto *A = Args.getLastArg(Arg)) A->renderAsInput(Args, CmdArgs); + // Add the default headers if dxc_no_stdinc is not set. + if (!Args.hasArg(options::OPT_dxc_no_stdinc)) + CmdArgs.push_back("-finclude-default-header"); } static void RenderARCMigrateToolOptions(const Driver &D, const ArgList &Args, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 32b084dfedecc5..a51d4621ba6827 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4202,6 +4202,10 @@ static void GeneratePreprocessorArgs(PreprocessorOptions &Opts, ((LangOpts.DeclareOpenCLBuiltins && I == "opencl-c-base.h") || I == "opencl-c.h")) continue; + // Don't generate HLSL includes. They are implied by other flags that are + // generated elsewhere. + if (LangOpts.HLSL && I == "hlsl.h") + continue; GenerateArg(Args, OPT_include, I, SA); } diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 3921f7949a0a99..4c5e7325d79600 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -377,6 +377,10 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__HLSL_VERSION", Twine((unsigned)LangOpts.getHLSLVersion())); + if (LangOpts.NativeHalfType) + Builder.defineMacro("__HLSL_ENABLE_16_BIT", + Twine((unsigned)LangOpts.getHLSLVersion())); + // Shader target information // "enums" for shader stages Builder.defineMacro("__SHADER_STAGE_VERTEX", diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 8bf7dd570384fe..fc321684f0c69c 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -62,6 +62,11 @@ set(hip_files __clang_hip_runtime_wrapper.h ) +set(hlsl_files + hlsl.h + hlsl/hlsl_basic_types.h + ) + set(mips_msa_files msa.h ) @@ -212,6 +217,7 @@ set(files ${cuda_files} ${hexagon_files} ${hip_files} + ${hlsl_files} ${mips_msa_files} ${opencl_files} ${ppc_files} @@ -405,6 +411,7 @@ add_header_target("webassembly-resource-headers" "${webassembly_files}") add_header_target("x86-resource-headers" "${x86_files}") # Other header groupings +add_header_target("hlsl-resource-headers" ${hlsl_files}) add_header_target("opencl-resource-headers" ${opencl_files}) add_header_target("openmp-resource-headers" ${openmp_wrapper_files}) add_header_target("windows-resource-headers" ${windows_only_files}) @@ -538,6 +545,12 @@ install( EXCLUDE_FROM_ALL COMPONENT x86-resource-headers) +install( + FILES ${hlsl_files} + DESTINATION ${header_install_dir} + EXCLUDE_FROM_ALL + COMPONENT hlsl-resource-headers) + install( FILES ${opencl_files} DESTINATION ${header_install_dir} @@ -614,6 +627,9 @@ if (NOT LLVM_ENABLE_IDE) DEPENDS webassembly-resource-headers COMPONENT webassembly-resource-headers) + add_llvm_install_targets(install-hlsl-resource-headers + DEPENDS hlsl-resource-headers + COMPONENT hlsl-resource-headers) add_llvm_install_targets(install-opencl-resource-headers DEPENDS opencl-resource-headers COMPONENT opencl-resource-headers) diff --git a/clang/lib/Headers/hlsl.h b/clang/lib/Headers/hlsl.h new file mode 100644 index 00000000000000..0db8a4ed1fe40f --- /dev/null +++ b/clang/lib/Headers/hlsl.h @@ -0,0 +1,14 @@ +//===----- hlsl.h - HLSL definitions --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _HLSL_H_ +#define _HLSL_H_ + +#include "hlsl/hlsl_basic_types.h" + +#endif //_HLSL_H_ diff --git a/clang/lib/Headers/hlsl/hlsl_basic_types.h b/clang/lib/Headers/hlsl/hlsl_basic_types.h new file mode 100644 index 00000000000000..2069990f5c06c4 --- /dev/null +++ b/clang/lib/Headers/hlsl/hlsl_basic_types.h @@ -0,0 +1,64 @@ +//===----- hlsl_basic_types.h - HLSL definitions for basic types ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _HLSL_HLSL_BASIC_TYPES_H_ +#define _HLSL_HLSL_BASIC_TYPES_H_ + +// built-in scalar data types: + +#ifdef __HLSL_ENABLE_16_BIT +// 16-bit integer. +typedef unsigned short uint16_t; +typedef short int16_t; +#endif + +// unsigned 32-bit integer. +typedef unsigned int uint; + +// 64-bit integer. +typedef unsigned long uint64_t; +typedef long int64_t; + +// built-in vector data types: + +#ifdef __HLSL_ENABLE_16_BIT +typedef int16_t int16_t2 __attribute__((ext_vector_type(2))); +typedef int16_t int16_t3 __attribute__((ext_vector_type(3))); +typedef int16_t int16_t4 __attribute__((ext_vector_type(4))); +typedef uint16_t uint16_t2 __attribute__((ext_vector_type(2))); +typedef uint16_t uint16_t3 __attribute__((ext_vector_type(3))); +typedef uint16_t uint16_t4 __attribute__((ext_vector_type(4))); +#endif + +typedef int int2 __attribute__((ext_vector_type(2))); +typedef int int3 __attribute__((ext_vector_type(3))); +typedef int int4 __attribute__((ext_vector_type(4))); +typedef uint uint2 __attribute__((ext_vector_type(2))); +typedef uint uint3 __attribute__((ext_vector_type(3))); +typedef uint uint4 __attribute__((ext_vector_type(4))); +typedef int64_t int64_t2 __attribute__((ext_vector_type(2))); +typedef int64_t int64_t3 __attribute__((ext_vector_type(3))); +typedef int64_t int64_t4 __attribute__((ext_vector_type(4))); +typedef uint64_t uint64_t2 __attribute__((ext_vector_type(2))); +typedef uint64_t uint64_t3 __attribute__((ext_vector_type(3))); +typedef uint64_t uint64_t4 __attribute__((ext_vector_type(4))); + +#ifdef __HLSL_ENABLE_16_BIT +typedef half half2 __attribute__((ext_vector_type(2))); +typedef half half3 __attribute__((ext_vector_type(3))); +typedef half half4 __attribute__((ext_vector_type(4))); +#endif + +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef double double2 __attribute__((ext_vector_type(2))); +typedef double double3 __attribute__((ext_vector_type(3))); +typedef double double4 __attribute__((ext_vector_type(4))); + +#endif //_HLSL_HLSL_BASIC_TYPES_H_ diff --git a/clang/test/CodeGenHLSL/basic_types.hlsl b/clang/test/CodeGenHLSL/basic_types.hlsl new file mode 100644 index 00000000000000..fcb0815c3af57b --- /dev/null +++ b/clang/test/CodeGenHLSL/basic_types.hlsl @@ -0,0 +1,76 @@ +// RUN: %clang_dxc -Tlib_6_7 -fcgl -Fo - %s | FileCheck %s + +// FIXME: check 16bit types once enable-16bit-types is ready. + +// CHECK:@uint_Val = global i32 0, align 4 +// CHECK:@uint64_t_Val = global i64 0, align 8 +// CHECK:@int64_t_Val = global i64 0, align 8 +// CHECK:@int2_Val = global <2 x i32> zeroinitializer, align 8 +// CHECK:@int3_Val = global <3 x i32> zeroinitializer, align 16 +// CHECK:@int4_Val = global <4 x i32> zeroinitializer, align 16 +// CHECK:@uint2_Val = global <2 x i32> zeroinitializer, align 8 +// CHECK:@uint3_Val = global <3 x i32> zeroinitializer, align 16 +// CHECK:@uint4_Val = global <4 x i32> zeroinitializer, align 16 +// CHECK:@int64_t2_Val = global <2 x i64> zeroinitializer, align 16 +// CHECK:@int64_t3_Val = global <3 x i64> zeroinitializer, align 32 +// CHECK:@int64_t4_Val = global <4 x i64> zeroinitializer, align 32 +// CHECK:@uint64_t2_Val = global <2 x i64> zeroinitializer, align 16 +// CHECK:@uint64_t3_Val = global <3 x i64> zeroinitializer, align 32 +// CHECK:@uint64_t4_Val = global <4 x i64> zeroinitializer, align 32 +// CHECK:@float2_Val = global <2 x float> zeroinitializer, align 8 +// CHECK:@float3_Val = global <3 x float> zeroinitializer, align 16 +// CHECK:@float4_Val = global <4 x float> zeroinitializer, align 16 +// CHECK:@double2_Val = global <2 x double> zeroinitializer, align 16 +// CHECK:@double3_Val = global <3 x double> zeroinitializer, align 32 +// CHECK:@double4_Val = global <4 x double> zeroinitializer, align 32 + +#define TYPE_DECL(T) T T##_Val + +#ifdef __HLSL_ENABLE_16_BIT +TYPE_DECL(uint16_t); +TYPE_DECL(int16_t); +#endif + +// unsigned 32-bit integer. +TYPE_DECL(uint); + +// 64-bit integer. +TYPE_DECL(uint64_t); +TYPE_DECL(int64_t); + +// built-in vector data types: + +#ifdef __HLSL_ENABLE_16_BIT +TYPE_DECL(int16_t2 ); +TYPE_DECL(int16_t3 ); +TYPE_DECL(int16_t4 ); +TYPE_DECL( uint16_t2 ); +TYPE_DECL( uint16_t3 ); +TYPE_DECL( uint16_t4 ); +#endif + +TYPE_DECL( int2 ); +TYPE_DECL( int3 ); +TYPE_DECL( int4 ); +TYPE_DECL( uint2 ); +TYPE_DECL( uint3 ); +TYPE_DECL( uint4 ); +TYPE_DECL( int64_t2 ); +TYPE_DECL( int64_t3 ); +TYPE_DECL( int64_t4 ); +TYPE_DECL( uint64_t2 ); +TYPE_DECL( uint64_t3 ); +TYPE_DECL( uint64_t4 ); + +#ifdef __HLSL_ENABLE_16_BIT +TYPE_DECL(half2 ); +TYPE_DECL(half3 ); +TYPE_DECL(half4 ); +#endif + +TYPE_DECL( float2 ); +TYPE_DECL( float3 ); +TYPE_DECL( float4 ); +TYPE_DECL( double2 ); +TYPE_DECL( double3 ); +TYPE_DECL( double4 ); diff --git a/clang/test/Driver/hlsl_no_stdinc.hlsl b/clang/test/Driver/hlsl_no_stdinc.hlsl new file mode 100644 index 00000000000000..ec6d0612a9ed7d --- /dev/null +++ b/clang/test/Driver/hlsl_no_stdinc.hlsl @@ -0,0 +1,12 @@ +// RUN: %clang_dxc -Tlib_6_7 -fcgl -Fo - %s -### 2>&1 | FileCheck %s --check-prefix=STDINC +// RUN: %clang_dxc -Tlib_6_7 -hlsl-no-stdinc -fcgl -Fo - %s -### 2>&1 | FileCheck %s --check-prefix=NOSTDINC + +// RUN: %clang -cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o - %s -verify + +// Make sure hlsl-no-stdinc is translated into finclude-default-header. +// STDINC:"-finclude-default-header" +// NOSTDINC-NOT:"-finclude-default-header" + +// Make sure uint not work when finclude-default-header is off. +// expected-error@+1 {{unknown type name 'uint'}} +uint a; From 85322e82be5014fb0abddb3a36df928d16760fba Mon Sep 17 00:00:00 2001 From: Denis Antrushin Date: Sun, 10 Apr 2022 15:31:31 +0700 Subject: [PATCH 842/908] [TwoAddressInstructionPass] Special processing of STATEPOINT instruction. STATEPOINT is a special pseudo instruction which represent Moving GC semantic to LLVM. Every tied def/use VReg pair in STATEPOINT represent same physical register which can 'magically' change during call wrapped by statepoint. (By construction, tied use operand is not live across STATEPOINT). This means that when converting into two-address form, there is not need to insert COPY instruction before stateppoint, what TwoAddressInstruction pass does for 'regular' instructions. Reviewed By: MatzeB Differential Revision: https://reviews.llvm.org/D124631 --- .../lib/CodeGen/TwoAddressInstructionPass.cpp | 59 +++++++ .../test/CodeGen/X86/statepoint-invoke-ra.mir | 26 +-- .../CodeGen/X86/statepoint-vreg-twoaddr.mir | 64 +++++++ .../statepoint-vreg-unlimited-tied-opnds.ll | 156 +++++++++--------- 4 files changed, 212 insertions(+), 93 deletions(-) create mode 100644 llvm/test/CodeGen/X86/statepoint-vreg-twoaddr.mir diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index bebb35e1204bbd..0ba2abaf76fdc2 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -161,6 +161,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); void eliminateRegSequence(MachineBasicBlock::iterator&); + void processStatepoint(MachineInstr *MI, TiedOperandMap &TiedOperands); public: static char ID; // Pass identification, replacement for typeid @@ -1627,6 +1628,56 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } +// For every tied operand pair this function transforms statepoint from +// RegA = STATEPOINT ... RegB(tied-def N) +// to +// RegB = STATEPOINT ... RegB(tied-def N) +// and replaces all uses of RegA with RegB. +// No extra COPY instruction is necessary because tied use is killed at +// STATEPOINT. +void TwoAddressInstructionPass::processStatepoint( + MachineInstr *MI, TiedOperandMap &TiedOperands) { + + for (auto &TO : TiedOperands) { + Register RegB = TO.first; + assert(TO.second.size() == 1 && "statepoints has single tied use"); + + unsigned SrcIdx = TO.second[0].first; + unsigned DstIdx = TO.second[0].second; + + MachineOperand &DstMO = MI->getOperand(DstIdx); + Register RegA = DstMO.getReg(); + + assert(RegB == MI->getOperand(SrcIdx).getReg()); + + if (RegA == RegB) + continue; + + MRI->replaceRegWith(RegA, RegB); + + if (LIS) { + VNInfo::Allocator &A = LIS->getVNInfoAllocator(); + LiveInterval &LI = LIS->getInterval(RegB); + for (auto &S : LIS->getInterval(RegA)) { + VNInfo *VNI = LI.getNextValue(S.start, A); + LiveRange::Segment NewSeg(S.start, S.end, VNI); + LI.addSegment(NewSeg); + } + LIS->removeInterval(RegA); + } + + if (LV) { + if (MI->getOperand(SrcIdx).isKill()) + LV->removeVirtualRegisterKilled(RegB, *MI); + LiveVariables::VarInfo &SrcInfo = LV->getVarInfo(RegB); + LiveVariables::VarInfo &DstInfo = LV->getVarInfo(RegA); + SrcInfo.AliveBlocks |= DstInfo.AliveBlocks; + for (auto *KillMI : DstInfo.Kills) + LV->addVirtualRegisterKilled(RegB, *KillMI, false); + } + } +} + /// Reduce two-address instructions to two operands. bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; @@ -1720,6 +1771,14 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { } } + if (mi->getOpcode() == TargetOpcode::STATEPOINT) { + processStatepoint(&*mi, TiedOperands); + TiedOperands.clear(); + LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + mi = nmi; + continue; + } + // Now iterate over the information collected above. for (auto &TO : TiedOperands) { processTiedPairs(&*mi, TO.second, Dist); diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra.mir index 0ca9b79c8e65a5..e506ee4b668de6 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra.mir @@ -9,14 +9,14 @@ # CHECK: %8:gr64 = COPY $rdx # CHECK: %7:gr32 = COPY $esi # CHECK: %6:gr64 = COPY $rdi -# CHECK: %30:gr64 = MOV64rm $noreg, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) +# CHECK: %13:gr64 = MOV64rm $noreg, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) # CHECK: undef %33.sub_32bit:gr64_nosp = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load (s32) from `i32 addrspace(1)* null`, addrspace 1) # CHECK: %35:gr32 = MOV32rm %8, 1, $noreg, 96, $noreg :: (load (s32) from %ir.tmp4, addrspace 1) # CHECK: %43:gr32 = MOV32rm %8, 1, $noreg, 160, $noreg :: (load (s32) from %ir.tmp6, addrspace 1) -# CHECK: %41:gr64 = MOV64rm undef %15:gr64, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) -# CHECK: %38:gr32 = MOV32rm %8, 1, $noreg, 352, $noreg :: (load (s32) from %ir.tmp10, addrspace 1) -# CHECK: %31:gr64 = MOV64rm %6, 1, $noreg, 96, $noreg :: (load (s64) from %ir.tmp13, addrspace 1) -# CHECK: %32:gr64 = MOV64rm %6, 1, $noreg, 104, $noreg :: (load (s64) from %ir.tmp16, addrspace 1) +# CHECK: %38:gr64 = MOV64rm undef %15:gr64, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) +# CHECK: %40:gr32 = MOV32rm %8, 1, $noreg, 352, $noreg :: (load (s32) from %ir.tmp10, addrspace 1) +# CHECK: %16:gr64 = MOV64rm %6, 1, $noreg, 96, $noreg :: (load (s64) from %ir.tmp13, addrspace 1) +# CHECK: %17:gr64 = MOV64rm %6, 1, $noreg, 104, $noreg :: (load (s64) from %ir.tmp16, addrspace 1) # CHECK: %45:gr32 = LEA64_32r %33, 1, $noreg, -1, $noreg # CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, %7 :: (store (s32) into %stack.1) # CHECK: MOV32mr %stack.9, 1, $noreg, 0, $noreg, %45 :: (store (s32) into %stack.9) @@ -26,23 +26,23 @@ # CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, %35 :: (store (s32) into %stack.3) # CHECK: MOV32mr %stack.8, 1, $noreg, 0, $noreg, %43 :: (store (s32) into %stack.8) # CHECK: MOV32mr %stack.4, 1, $noreg, 0, $noreg, %43 :: (store (s32) into %stack.4) -# CHECK: MOV32mr %stack.7, 1, $noreg, 0, $noreg, %38 :: (store (s32) into %stack.7) -# CHECK: MOV32mr %stack.5, 1, $noreg, 0, $noreg, %38 :: (store (s32) into %stack.5) +# CHECK: MOV32mr %stack.7, 1, $noreg, 0, $noreg, %40 :: (store (s32) into %stack.7) +# CHECK: MOV32mr %stack.5, 1, $noreg, 0, $noreg, %40 :: (store (s32) into %stack.5) # CHECK: EH_LABEL # CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: $edi = MOV32r0 implicit-def dead $eflags # CHECK: dead $esi = MOV32r0 implicit-def dead $eflags, implicit-def $rsi # CHECK: $ecx = COPY %7 # CHECK: $r8d = MOV32r0 implicit-def dead $eflags -# CHECK: %40:gr64 = COPY %41 -# CHECK: %32:gr64, %31:gr64, %30:gr64, %40:gr64 = STATEPOINT 1, 16, 5, undef %23:gr64, $edi, $rsi, undef $edx, $ecx, $r8d, 2, 0, 2, 0, 2, 11, 1, 4, %stack.0, 0, %30, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, 1, 4, %stack.4, 0, 1, 4, %stack.2, 0, %40, 1, 4, %stack.5, 0, %31, %32, 2, 4, %32(tied-def 0), %31(tied-def 1), %30(tied-def 2), %40(tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1), (volatile load store (s32) on %stack.2), (volatile load store (s32) on %stack.3), (volatile load store (s32) on %stack.4), (volatile load store (s32) on %stack.5) +# CHECK: %37:gr64 = COPY %38 +# CHECK: %17:gr64, %16:gr64, %13:gr64, %37:gr64 = STATEPOINT 1, 16, 5, undef %23:gr64, $edi, $rsi, undef $edx, $ecx, $r8d, 2, 0, 2, 0, 2, 11, 1, 4, %stack.0, 0, %13, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, 1, 4, %stack.4, 0, 1, 4, %stack.2, 0, %37, 1, 4, %stack.5, 0, %16, %17, 2, 4, %17(tied-def 0), %16(tied-def 1), %13(tied-def 2), %37(tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1), (volatile load store (s32) on %stack.2), (volatile load store (s32) on %stack.3), (volatile load store (s32) on %stack.4), (volatile load store (s32) on %stack.5) # CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: EH_LABEL # CHECK: JMP_1 %bb.1 # CHECK: bb.1.bb21: # CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: $edi = MOV32ri 10 -# CHECK: dead %30:gr64, dead %31:gr64, dead %32:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, $edi, 2, 0, 2, 2, 2, 10, 1, 4, %stack.9, 0, %30, %7, %33.sub_32bit, 1, 4, %stack.6, 0, 1, 4, %stack.8, 0, %33.sub_32bit, 1, 4, %stack.7, 0, %31, %32, 2, 3, %30(tied-def 0), %31(tied-def 1), %32(tied-def 2), 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_64, implicit-def $rsp, implicit-def $ssp :: (load (s32) from %stack.6), (load (s32) from %stack.7), (load (s32) from %stack.8), (load (s32) from %stack.9) +# CHECK: dead %13:gr64, dead %16:gr64, dead %17:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, $edi, 2, 0, 2, 2, 2, 10, 1, 4, %stack.9, 0, %13, %7, %33.sub_32bit, 1, 4, %stack.6, 0, 1, 4, %stack.8, 0, %33.sub_32bit, 1, 4, %stack.7, 0, %16, %17, 2, 3, %13(tied-def 0), %16(tied-def 1), %17(tied-def 2), 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_64, implicit-def $rsp, implicit-def $ssp :: (load (s32) from %stack.6), (load (s32) from %stack.7), (load (s32) from %stack.8), (load (s32) from %stack.9) # CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: RET 0 # CHECK: bb.2.bb26 (landing-pad): @@ -53,11 +53,11 @@ # CHECK: %36:gr32 = MOV32rm %stack.6, 1, $noreg, 0, $noreg :: (load (s32) from %stack.6) # CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, %36 :: (store (s32) into %stack.2) # CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, %33.sub_32bit :: (store (s32) into %stack.3) -# CHECK: %39:gr32 = MOV32rm %stack.7, 1, $noreg, 0, $noreg :: (load (s32) from %stack.7) -# CHECK: MOV32mr %stack.4, 1, $noreg, 0, $noreg, %39 :: (store (s32) into %stack.4) +# CHECK: %41:gr32 = MOV32rm %stack.7, 1, $noreg, 0, $noreg :: (load (s32) from %stack.7) +# CHECK: MOV32mr %stack.4, 1, $noreg, 0, $noreg, %41 :: (store (s32) into %stack.4) # CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: $edi = MOV32ri -271 -# CHECK: dead %40:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, $edi, 2, 0, 2, 0, 2, 6, 1, 4, %stack.0, 0, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, %40, 1, 4, %stack.4, 0, 2, 1, %40(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1), (volatile load store (s32) on %stack.2), (volatile load store (s32) on %stack.3), (volatile load store (s32) on %stack.4) +# CHECK: dead %37:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, $edi, 2, 0, 2, 0, 2, 6, 1, 4, %stack.0, 0, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, %37, 1, 4, %stack.4, 0, 2, 1, %37(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1), (volatile load store (s32) on %stack.2), (volatile load store (s32) on %stack.3), (volatile load store (s32) on %stack.4) # CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp --- | diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-twoaddr.mir b/llvm/test/CodeGen/X86/statepoint-vreg-twoaddr.mir new file mode 100644 index 00000000000000..0246ff57bc2300 --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-vreg-twoaddr.mir @@ -0,0 +1,64 @@ +# RUN: llc -x mir -run-pass=twoaddressinstruction < %s | FileCheck %s + +# This test checks that TwoAddressInstruction pass does not create redundate COPY +# instruction for STATEPOINT tied operands. + +--- | + ; ModuleID = 'statepoint-vreg-twoaddr.ll' + source_filename = "statepoint-vreg-twoaddr.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-pc-linux-gnu" + + declare i1 @return_i1() + + declare void @consume(i32 addrspace(1)*) + + define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" { + entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* %a) ] + %rel1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 0, i32 0) ; (%a, %a) + %res1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + call void @consume(i32 addrspace(1)* %rel1) + ret i1 %res1 + } + + ; Function Attrs: nounwind readnone + declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32 immarg, i32 immarg) #0 + + declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64 immarg, i32 immarg, i1 ()*, i32 immarg, i32 immarg, ...) + + ; Function Attrs: nounwind readnone + declare i1 @llvm.experimental.gc.result.i1(token) #0 + + attributes #0 = { nounwind readnone } + +... +--- +name: test_relocate +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr64 } + - { id: 2, class: gr8 } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $rdi + + ; CHECK-LABEL: name: test_relocate + ; CHECK: [[VREG1:%[0-9]+]]:gr64 = COPY killed $rdi + ; CHECK-NOT: [[VREG2:%[0-9]+]]:gr64 = COPY [[VREG1]] + ; CHECK: [[VREG1]]:gr64 = STATEPOINT 0, 0, 0, target-flags(x86-plt) @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, killed [[VREG1]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al + ; CHECK: $rdi = COPY killed [[VREG1]] + ; CHECK: CALL64pcrel32 target-flags(x86-plt) @consume, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp + + %0:gr64 = COPY killed $rdi + %1:gr64 = STATEPOINT 0, 0, 0, target-flags(x86-plt) @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, killed %0(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al + %2:gr8 = COPY killed $al + $rdi = COPY killed %1 + CALL64pcrel32 target-flags(x86-plt) @consume, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp + $al = COPY killed %2 + RET 0, killed $al + +... diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll index 4520e7a684ea5c..876a70fb50abf3 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll @@ -12,105 +12,101 @@ define i32 @test_spill( i32 addrspace(1)* %arg12, i32 addrspace(1)* %arg13, i32 addrspace(1)* %arg14, i32 addrspace(1)* %arg15, i32 addrspace(1)* %arg16, i32 addrspace(1)* %arg17 ) gc "statepoint-example" { ; CHECK-VREG-LABEL: test_spill -; CHECK-VREG: %30:gr64 = COPY $r9 -; CHECK-VREG: %31:gr64 = COPY $r8 -; CHECK-VREG: %32:gr64 = COPY $rcx -; CHECK-VREG: %33:gr64 = COPY $rdx -; CHECK-VREG: %34:gr64 = COPY $rsi -; CHECK-VREG: %35:gr64 = COPY $rdi -; CHECK-VREG: %29:gr64 = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.11, align 16) -; CHECK-VREG: %28:gr64 = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) -; CHECK-VREG: %27:gr64 = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.9, align 16) -; CHECK-VREG: %26:gr64 = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) -; CHECK-VREG: %25:gr64 = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) -; CHECK-VREG: %24:gr64 = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) -; CHECK-VREG: %23:gr64 = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) -; CHECK-VREG: %22:gr64 = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) -; CHECK-VREG: %21:gr64 = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) -; CHECK-VREG: %20:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) -; CHECK-VREG: %19:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) -; CHECK-VREG: %18:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) -; CHECK-VREG: %18:gr64, %19:gr64, %20:gr64, %21:gr64, %22:gr64, %23:gr64, %24:gr64, %25:gr64, %26:gr64, %27:gr64, %28:gr64, %29:gr64, %30:gr64, %31:gr64, %32:gr64, %33:gr64, %34:gr64, %35:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, %18(tied-def 0), %19(tied-def 1), %20(tied-def 2), %21(tied-def 3), %22(tied-def 4), %23(tied-def 5), %24(tied-def 6), %25(tied-def 7), %26(tied-def 8), %27(tied-def 9), %28(tied-def 10), %29(tied-def 11), %30(tied-def 12), %31(tied-def 13), %32(tied-def 14), %33(tied-def 15), %34(tied-def 16), %35(tied-def 17), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp -; CHECK-VREG: %38:gr32 = MOV32rm %35, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %34, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %33, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %32, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %31, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %30, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %29, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %28, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %27, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %26, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %25, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %24, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %23, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %22, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %21, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %20, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %19, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %18, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) +; CHECK-VREG: %5:gr64 = COPY $r9 +; CHECK-VREG: %4:gr64 = COPY $r8 +; CHECK-VREG: %3:gr64 = COPY $rcx +; CHECK-VREG: %2:gr64 = COPY $rdx +; CHECK-VREG: %1:gr64 = COPY $rsi +; CHECK-VREG: %0:gr64 = COPY $rdi +; CHECK-VREG: %6:gr64 = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.11, align 16) +; CHECK-VREG: %7:gr64 = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) +; CHECK-VREG: %8:gr64 = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.9, align 16) +; CHECK-VREG: %9:gr64 = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) +; CHECK-VREG: %10:gr64 = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) +; CHECK-VREG: %11:gr64 = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) +; CHECK-VREG: %12:gr64 = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) +; CHECK-VREG: %13:gr64 = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) +; CHECK-VREG: %14:gr64 = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +; CHECK-VREG: %15:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) +; CHECK-VREG: %16:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) +; CHECK-VREG: %17:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) +; CHECK-VREG: %17:gr64, %16:gr64, %15:gr64, %14:gr64, %13:gr64, %12:gr64, %11:gr64, %10:gr64, %9:gr64, %8:gr64, %7:gr64, %6:gr64, %5:gr64, %4:gr64, %3:gr64, %2:gr64, %1:gr64, %0:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, %17(tied-def 0), %16(tied-def 1), %15(tied-def 2), %14(tied-def 3), %13(tied-def 4), %12(tied-def 5), %11(tied-def 6), %10(tied-def 7), %9(tied-def 8), %8(tied-def 9), %7(tied-def 10), %6(tied-def 11), %5(tied-def 12), %4(tied-def 13), %3(tied-def 14), %2(tied-def 15), %1(tied-def 16), %0(tied-def 17), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK-VREG: %38:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %1, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %2, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %3, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %4, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %5, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %6, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %7, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %8, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %9, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %10, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %11, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %12, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %13, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %14, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %15, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %16, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %17, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) ; CHECK-VREG: $eax = COPY %38 -; CHECK-PREG: renamable $rbx = COPY $r9 +; CHECK-PREG: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $r9 :: (store (s64) into %stack.2) ; CHECK-PREG: MOV64mr %stack.6, 1, $noreg, 0, $noreg, $r8 :: (store (s64) into %stack.6) -; CHECK-PREG: renamable $r12 = COPY $rcx -; CHECK-PREG: renamable $r14 = COPY $rdx -; CHECK-PREG: renamable $r15 = COPY $rsi -; CHECK-PREG: renamable $r13 = COPY $rdi +; CHECK-PREG: MOV64mr %stack.9, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.9) +; CHECK-PREG: MOV64mr %stack.10, 1, $noreg, 0, $noreg, $rdx :: (store (s64) into %stack.10) +; CHECK-PREG: MOV64mr %stack.11, 1, $noreg, 0, $noreg, $rsi :: (store (s64) into %stack.11) +; CHECK-PREG: renamable $rbp = COPY $rdi ; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.11, align 16) +; CHECK-PREG: MOV64mr %stack.8, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.8) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) ; CHECK-PREG: MOV64mr %stack.7, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.7) -; CHECK-PREG: renamable $rbp = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) ; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.9, align 16) -; CHECK-PREG: MOV64mr %stack.11, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.11) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) ; CHECK-PREG: MOV64mr %stack.5, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.5) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) ; CHECK-PREG: MOV64mr %stack.4, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.4) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) ; CHECK-PREG: MOV64mr %stack.3, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.3) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) -; CHECK-PREG: MOV64mr %stack.2, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.2) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) ; CHECK-PREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.1) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) ; CHECK-PREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) -; CHECK-PREG: MOV64mr %stack.8, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.8) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) -; CHECK-PREG: MOV64mr %stack.9, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.9) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) -; CHECK-PREG: MOV64mr %stack.10, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.10) -; CHECK-PREG: renamable $rbp, renamable $rbx, renamable $r12, renamable $r14, renamable $r15, renamable $r13 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, 1, 8, %stack.10, 0, 1, 8, %stack.9, 0, 1, 8, %stack.8, 0, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, 1, 8, %stack.3, 0, 1, 8, %stack.4, 0, 1, 8, %stack.5, 0, 1, 8, %stack.11, 0, killed renamable $rbp(tied-def 0), 1, 8, %stack.7, 0, killed renamable $rbx(tied-def 1), 1, 8, %stack.6, 0, killed renamable $r12(tied-def 2), killed renamable $r14(tied-def 3), killed renamable $r15(tied-def 4), killed renamable $r13(tied-def 5), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store (s64) on %stack.0), (load store (s64) on %stack.1), (load store (s64) on %stack.2), (load store (s64) on %stack.3), (load store (s64) on %stack.4), (load store (s64) on %stack.5), (load store (s64) on %stack.6), (load store (s64) on %stack.7), (load store (s64) on %stack.8), (load store (s64) on %stack.9), (load store (s64) on %stack.10), (load store (s64) on %stack.11) -; CHECK-PREG: renamable $eax = MOV32rm killed renamable $r13, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r15, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r14, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r12, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) +; CHECK-PREG: renamable $rbx = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) +; CHECK-PREG: renamable $r13 = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +; CHECK-PREG: renamable $r12 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) +; CHECK-PREG: renamable $r14 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) +; CHECK-PREG: renamable $r15 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) +; CHECK-PREG: renamable $r15, renamable $r14, renamable $r12, renamable $r13, renamable $rbx, renamable $rbp = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, killed renamable $r15(tied-def 0), killed renamable $r14(tied-def 1), killed renamable $r12(tied-def 2), killed renamable $r13(tied-def 3), killed renamable $rbx(tied-def 4), 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.3, 0, 1, 8, %stack.4, 0, 1, 8, %stack.5, 0, 1, 8, %stack.7, 0, 1, 8, %stack.8, 0, 1, 8, %stack.2, 0, 1, 8, %stack.6, 0, 1, 8, %stack.9, 0, 1, 8, %stack.10, 0, 1, 8, %stack.11, 0, killed renamable $rbp(tied-def 5), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store (s64) on %stack.0), (load store (s64) on %stack.1), (load store (s64) on %stack.2), (load store (s64) on %stack.3), (load store (s64) on %stack.4), (load store (s64) on %stack.5), (load store (s64) on %stack.6), (load store (s64) on %stack.7), (load store (s64) on %stack.8), (load store (s64) on %stack.9), (load store (s64) on %stack.10), (load store (s64) on %stack.11) +; CHECK-PREG: renamable $eax = MOV32rm killed renamable $rbp, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) +; CHECK-PREG: renamable $rdi = MOV64rm %stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %stack.11) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) +; CHECK-PREG: renamable $rdi = MOV64rm %stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %stack.10) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) +; CHECK-PREG: renamable $rdi = MOV64rm %stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %stack.9) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) ; CHECK-PREG: renamable $rdi = MOV64rm %stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %stack.6) ; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rbx, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) -; CHECK-PREG: renamable $rdi = MOV64rm %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7) +; CHECK-PREG: renamable $rdi = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) +; CHECK-PREG: renamable $rdi = MOV64rm %stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %stack.8) ; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rbp, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) -; CHECK-PREG: renamable $rcx = MOV64rm %stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %stack.11) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rcx, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) +; CHECK-PREG: renamable $rdi = MOV64rm %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) ; CHECK-PREG: renamable $rdi = MOV64rm %stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %stack.5) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) ; CHECK-PREG: renamable $rdi = MOV64rm %stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %stack.4) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) ; CHECK-PREG: renamable $rdi = MOV64rm %stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %stack.3) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) -; CHECK-PREG: renamable $rdi = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) ; CHECK-PREG: renamable $rdi = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) ; CHECK-PREG: renamable $rdi = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) -; CHECK-PREG: renamable $rsi = MOV64rm %stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %stack.8) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rsi, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %stack.9) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) -; CHECK-PREG: renamable $rcx = MOV64rm %stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %stack.10) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rcx, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdi, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rbx, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r13, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r12, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r14, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r15, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* %arg00, i32 addrspace(1)* %arg01, i32 addrspace(1)* %arg02, i32 addrspace(1)* %arg03, i32 addrspace(1)* %arg04, i32 addrspace(1)* %arg05, i32 addrspace(1)* %arg06, i32 addrspace(1)* %arg07, i32 addrspace(1)* %arg08, i32 addrspace(1)* %arg09, i32 addrspace(1)* %arg10, i32 addrspace(1)* %arg11, i32 addrspace(1)* %arg12, i32 addrspace(1)* %arg13, i32 addrspace(1)* %arg14, i32 addrspace(1)* %arg15, i32 addrspace(1)* %arg16, i32 addrspace(1)* %arg17) ] From 1e01b1ec72031fcaceb4e77e1c5c8e34f1e862e8 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 30 May 2022 16:11:40 +0000 Subject: [PATCH 843/908] [gn build] Port e576280380d3 --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 03e0cfec3a2adc..690536cdfcbe60 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -154,6 +154,8 @@ copy("Headers") { "hexagon_circ_brev_intrinsics.h", "hexagon_protos.h", "hexagon_types.h", + "hlsl.h", + "hlsl/hlsl_basic_types.h", "hresetintrin.h", "htmintrin.h", "htmxlintrin.h", From aff271930e8ac2ca520ba92cf6d736f94edfaaf6 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 30 May 2022 16:21:38 +0000 Subject: [PATCH 844/908] Fix warning for unused variable in the non-assert build (NFC) --- llvm/lib/Analysis/ValueTracking.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5adf44f145ba8c..04752c4e0f5fc9 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4607,6 +4607,7 @@ bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *CtxI, const DominatorTree *DT, const TargetLibraryInfo *TLI) { +#ifndef NDEBUG if (Inst->getOpcode() != Opcode) { // Check that the operands are actually compatible with the Opcode override. auto hasEqualReturnAndLeadingOperandTypes = @@ -4624,6 +4625,7 @@ bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, assert(!Instruction::isUnaryOp(Opcode) || hasEqualReturnAndLeadingOperandTypes(Inst, 1)); } +#endif for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast(Inst->getOperand(i))) From f3defc23488eb29c69d2a33c0c5b652c874fb0f3 Mon Sep 17 00:00:00 2001 From: Volodymyr Sapsai Date: Fri, 27 May 2022 14:05:12 -0700 Subject: [PATCH 845/908] [ODRHash][NFC] Add missing 'select' case for `ODRMismatchDecl`. No test changes because `err_module_odr_violation_mismatch_decl_unknown` is a catch-all when custom diagnostic is missing. And missing custom diagnostic we should fix by implementing it, not by improving the general case. But if we pass enum value not covered by 'select', clang can crash, so protect against that. Differential Revision: https://reviews.llvm.org/D126566 --- clang/include/clang/Basic/DiagnosticSerializationKinds.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSerializationKinds.td b/clang/include/clang/Basic/DiagnosticSerializationKinds.td index 3fcdb616bd21af..33eba6cf0c3f26 100644 --- a/clang/include/clang/Basic/DiagnosticSerializationKinds.td +++ b/clang/include/clang/Basic/DiagnosticSerializationKinds.td @@ -375,12 +375,14 @@ def err_module_odr_violation_mismatch_decl_unknown : Error< "%q0 %select{with definition in module '%2'|defined here}1 has different " "definitions in different modules; first difference is this " "%select{||||static assert|field|method|type alias|typedef|data member|" - "friend declaration|unexpected decl}3">; + "friend declaration|function template|" + "unexpected decl}3">; def note_module_odr_violation_mismatch_decl_unknown : Note< "but in '%0' found " "%select{||||different static assert|different field|different method|" "different type alias|different typedef|different data member|" - "different friend declaration|another unexpected decl}1">; + "different friend declaration|different function template|" + "another unexpected decl}1">; def warn_duplicate_module_file_extension : Warning< "duplicate module file extension block name '%0'">, From b7d2b160c3ba85c42427d5db96e0af01a0a9d1b5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 30 May 2022 18:19:32 +0100 Subject: [PATCH 846/908] [VPlan] Add test for printing VPlan for outer loop vectorization. Test coverage for D123005. --- .../vplan-printing-outer-loop.ll | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll new file mode 100644 index 00000000000000..038b15700cf1c4 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -0,0 +1,72 @@ +; REQUIRES: asserts + +; RUN: opt -loop-vectorize -enable-vplan-native-path -debug -disable-output %s 2>&1 | FileCheck %s + +@arr2 = external global [8 x i64], align 16 +@arr = external global [8 x [8 x i64]], align 16 + +define void @foo(i64 %n) { +; CHECK: VPlan 'HCFGBuilder: Plain CFG +; CHECK-NEXT: { +; CHECK-NEXT: TopRegion: { +; CHECK-NEXT: entry: +; CHECK-NEXT: Successor(s): vector.body +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> +; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2> ir<0> ir<%outer.iv> +; CHECK-NEXT: EMIT store ir<%outer.iv> ir<%gep.1> +; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv> ir<%n> +; CHECK-NEXT: Successor(s): inner +; CHECK-EMPTY: +; CHECK-NEXT: inner: +; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next> +; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr> ir<0> ir<%inner.iv> ir<%outer.iv> +; CHECK-NEXT: EMIT store ir<%add> ir<%gep.2> +; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv> ir<1> +; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next> ir<8> +; CHECK-NEXT: Successor(s): outer.latch, inner +; CHECK-NEXT: CondBit: ir<%inner.ec> (inner) +; CHECK-EMPTY: +; CHECK-NEXT: outer.latch: +; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv> ir<1> +; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next> ir<8> +; CHECK-NEXT: Successor(s): exit, vector.body +; CHECK-NEXT: CondBit: ir<%outer.ec> (outer.latch) +; CHECK-EMPTY: +; CHECK-NEXT: exit: +; CHECK-NEXT: EMIT ret +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: No successors +; CHECK-NEXT: } +entry: + br label %outer.header + +outer.header: + %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + %gep.1 = getelementptr inbounds [8 x i64], [8 x i64]* @arr2, i64 0, i64 %outer.iv + store i64 %outer.iv, i64* %gep.1, align 4 + %add = add nsw i64 %outer.iv, %n + br label %inner + +inner: + %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ] + %gep.2 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arr, i64 0, i64 %inner.iv, i64 %outer.iv + store i64 %add, i64* %gep.2, align 4 + %inner.iv.next = add nuw nsw i64 %inner.iv, 1 + %inner.ec = icmp eq i64 %inner.iv.next, 8 + br i1 %inner.ec, label %outer.latch, label %inner + +outer.latch: + %outer.iv.next = add nuw nsw i64 %outer.iv, 1 + %outer.ec = icmp eq i64 %outer.iv.next, 8 + br i1 %outer.ec, label %exit, label %outer.header, !llvm.loop !1 + +exit: + ret void +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} From c4eb8035ed6647e58d4c5161f393e9220f7402cf Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 30 May 2022 14:10:21 -0400 Subject: [PATCH 847/908] Revert "[HLSL] Enable vector types for hlsl." This reverts commit e576280380d3f5221cfcc14e9fabeacc8506a43c. Breaks tests on mac/arm, see comment on https://reviews.llvm.org/D125052 Also revert follow-up "[gn build] Port e576280380d3" This reverts commit 1e01b1ec72031fcaceb4e77e1c5c8e34f1e862e8. --- clang/include/clang/Driver/Options.td | 4 +- clang/lib/Basic/LangOptions.cpp | 2 - clang/lib/Driver/ToolChains/Clang.cpp | 3 - clang/lib/Frontend/CompilerInvocation.cpp | 4 - clang/lib/Frontend/InitPreprocessor.cpp | 4 - clang/lib/Headers/CMakeLists.txt | 16 ---- clang/lib/Headers/hlsl.h | 14 ---- clang/lib/Headers/hlsl/hlsl_basic_types.h | 64 ---------------- clang/test/CodeGenHLSL/basic_types.hlsl | 76 ------------------- clang/test/Driver/hlsl_no_stdinc.hlsl | 12 --- .../gn/secondary/clang/lib/Headers/BUILD.gn | 2 - 11 files changed, 1 insertion(+), 200 deletions(-) delete mode 100644 clang/lib/Headers/hlsl.h delete mode 100644 clang/lib/Headers/hlsl/hlsl_basic_types.h delete mode 100644 clang/test/CodeGenHLSL/basic_types.hlsl delete mode 100644 clang/test/Driver/hlsl_no_stdinc.hlsl diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 44fd4d50ca0517..0343e48c1a9d11 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6051,7 +6051,7 @@ def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">, // These options cannot be marshalled, because they are used to set up the LangOptions defaults. def finclude_default_header : Flag<["-"], "finclude-default-header">, - HelpText<"Include default header file for OpenCL and HLSL">; + HelpText<"Include default header file for OpenCL">; def fdeclare_opencl_builtins : Flag<["-"], "fdeclare-opencl-builtins">, HelpText<"Add OpenCL builtin function declarations (experimental)">; @@ -6780,8 +6780,6 @@ class DXCJoinedOrSeparate : Option<["/", "-"], name, def dxc_help : Option<["/", "-", "--"], "help", KIND_JOINED>, Group, Flags<[DXCOption, NoXarchOption]>, Alias, HelpText<"Display available options">; -def dxc_no_stdinc : DXCFlag<"hlsl-no-stdinc">, - HelpText<"HLSL only. Disables all standard includes containing non-native compiler types and functions.">; def Fo : DXCJoinedOrSeparate<"Fo">, Alias, HelpText<"Output object file">; def dxil_validator_version : Option<["/", "-"], "validator-version", KIND_SEPARATE>, diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index 7549f3f2e23b4f..7791bff388be70 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -117,8 +117,6 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang, Opts.Digraphs = Std.hasDigraphs(); Opts.HLSL = Lang == Language::HLSL; - if (Opts.HLSL && Opts.IncludeDefaultHeader) - Includes.push_back("hlsl.h"); // Set OpenCL Version. Opts.OpenCL = Std.isOpenCL(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a8706215bf6059..5106c0e327b87f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3484,9 +3484,6 @@ static void RenderHLSLOptions(const ArgList &Args, ArgStringList &CmdArgs, for (const auto &Arg : ForwardedArguments) if (const auto *A = Args.getLastArg(Arg)) A->renderAsInput(Args, CmdArgs); - // Add the default headers if dxc_no_stdinc is not set. - if (!Args.hasArg(options::OPT_dxc_no_stdinc)) - CmdArgs.push_back("-finclude-default-header"); } static void RenderARCMigrateToolOptions(const Driver &D, const ArgList &Args, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index a51d4621ba6827..32b084dfedecc5 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4202,10 +4202,6 @@ static void GeneratePreprocessorArgs(PreprocessorOptions &Opts, ((LangOpts.DeclareOpenCLBuiltins && I == "opencl-c-base.h") || I == "opencl-c.h")) continue; - // Don't generate HLSL includes. They are implied by other flags that are - // generated elsewhere. - if (LangOpts.HLSL && I == "hlsl.h") - continue; GenerateArg(Args, OPT_include, I, SA); } diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 4c5e7325d79600..3921f7949a0a99 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -377,10 +377,6 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__HLSL_VERSION", Twine((unsigned)LangOpts.getHLSLVersion())); - if (LangOpts.NativeHalfType) - Builder.defineMacro("__HLSL_ENABLE_16_BIT", - Twine((unsigned)LangOpts.getHLSLVersion())); - // Shader target information // "enums" for shader stages Builder.defineMacro("__SHADER_STAGE_VERTEX", diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index fc321684f0c69c..8bf7dd570384fe 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -62,11 +62,6 @@ set(hip_files __clang_hip_runtime_wrapper.h ) -set(hlsl_files - hlsl.h - hlsl/hlsl_basic_types.h - ) - set(mips_msa_files msa.h ) @@ -217,7 +212,6 @@ set(files ${cuda_files} ${hexagon_files} ${hip_files} - ${hlsl_files} ${mips_msa_files} ${opencl_files} ${ppc_files} @@ -411,7 +405,6 @@ add_header_target("webassembly-resource-headers" "${webassembly_files}") add_header_target("x86-resource-headers" "${x86_files}") # Other header groupings -add_header_target("hlsl-resource-headers" ${hlsl_files}) add_header_target("opencl-resource-headers" ${opencl_files}) add_header_target("openmp-resource-headers" ${openmp_wrapper_files}) add_header_target("windows-resource-headers" ${windows_only_files}) @@ -545,12 +538,6 @@ install( EXCLUDE_FROM_ALL COMPONENT x86-resource-headers) -install( - FILES ${hlsl_files} - DESTINATION ${header_install_dir} - EXCLUDE_FROM_ALL - COMPONENT hlsl-resource-headers) - install( FILES ${opencl_files} DESTINATION ${header_install_dir} @@ -627,9 +614,6 @@ if (NOT LLVM_ENABLE_IDE) DEPENDS webassembly-resource-headers COMPONENT webassembly-resource-headers) - add_llvm_install_targets(install-hlsl-resource-headers - DEPENDS hlsl-resource-headers - COMPONENT hlsl-resource-headers) add_llvm_install_targets(install-opencl-resource-headers DEPENDS opencl-resource-headers COMPONENT opencl-resource-headers) diff --git a/clang/lib/Headers/hlsl.h b/clang/lib/Headers/hlsl.h deleted file mode 100644 index 0db8a4ed1fe40f..00000000000000 --- a/clang/lib/Headers/hlsl.h +++ /dev/null @@ -1,14 +0,0 @@ -//===----- hlsl.h - HLSL definitions --------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _HLSL_H_ -#define _HLSL_H_ - -#include "hlsl/hlsl_basic_types.h" - -#endif //_HLSL_H_ diff --git a/clang/lib/Headers/hlsl/hlsl_basic_types.h b/clang/lib/Headers/hlsl/hlsl_basic_types.h deleted file mode 100644 index 2069990f5c06c4..00000000000000 --- a/clang/lib/Headers/hlsl/hlsl_basic_types.h +++ /dev/null @@ -1,64 +0,0 @@ -//===----- hlsl_basic_types.h - HLSL definitions for basic types ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _HLSL_HLSL_BASIC_TYPES_H_ -#define _HLSL_HLSL_BASIC_TYPES_H_ - -// built-in scalar data types: - -#ifdef __HLSL_ENABLE_16_BIT -// 16-bit integer. -typedef unsigned short uint16_t; -typedef short int16_t; -#endif - -// unsigned 32-bit integer. -typedef unsigned int uint; - -// 64-bit integer. -typedef unsigned long uint64_t; -typedef long int64_t; - -// built-in vector data types: - -#ifdef __HLSL_ENABLE_16_BIT -typedef int16_t int16_t2 __attribute__((ext_vector_type(2))); -typedef int16_t int16_t3 __attribute__((ext_vector_type(3))); -typedef int16_t int16_t4 __attribute__((ext_vector_type(4))); -typedef uint16_t uint16_t2 __attribute__((ext_vector_type(2))); -typedef uint16_t uint16_t3 __attribute__((ext_vector_type(3))); -typedef uint16_t uint16_t4 __attribute__((ext_vector_type(4))); -#endif - -typedef int int2 __attribute__((ext_vector_type(2))); -typedef int int3 __attribute__((ext_vector_type(3))); -typedef int int4 __attribute__((ext_vector_type(4))); -typedef uint uint2 __attribute__((ext_vector_type(2))); -typedef uint uint3 __attribute__((ext_vector_type(3))); -typedef uint uint4 __attribute__((ext_vector_type(4))); -typedef int64_t int64_t2 __attribute__((ext_vector_type(2))); -typedef int64_t int64_t3 __attribute__((ext_vector_type(3))); -typedef int64_t int64_t4 __attribute__((ext_vector_type(4))); -typedef uint64_t uint64_t2 __attribute__((ext_vector_type(2))); -typedef uint64_t uint64_t3 __attribute__((ext_vector_type(3))); -typedef uint64_t uint64_t4 __attribute__((ext_vector_type(4))); - -#ifdef __HLSL_ENABLE_16_BIT -typedef half half2 __attribute__((ext_vector_type(2))); -typedef half half3 __attribute__((ext_vector_type(3))); -typedef half half4 __attribute__((ext_vector_type(4))); -#endif - -typedef float float2 __attribute__((ext_vector_type(2))); -typedef float float3 __attribute__((ext_vector_type(3))); -typedef float float4 __attribute__((ext_vector_type(4))); -typedef double double2 __attribute__((ext_vector_type(2))); -typedef double double3 __attribute__((ext_vector_type(3))); -typedef double double4 __attribute__((ext_vector_type(4))); - -#endif //_HLSL_HLSL_BASIC_TYPES_H_ diff --git a/clang/test/CodeGenHLSL/basic_types.hlsl b/clang/test/CodeGenHLSL/basic_types.hlsl deleted file mode 100644 index fcb0815c3af57b..00000000000000 --- a/clang/test/CodeGenHLSL/basic_types.hlsl +++ /dev/null @@ -1,76 +0,0 @@ -// RUN: %clang_dxc -Tlib_6_7 -fcgl -Fo - %s | FileCheck %s - -// FIXME: check 16bit types once enable-16bit-types is ready. - -// CHECK:@uint_Val = global i32 0, align 4 -// CHECK:@uint64_t_Val = global i64 0, align 8 -// CHECK:@int64_t_Val = global i64 0, align 8 -// CHECK:@int2_Val = global <2 x i32> zeroinitializer, align 8 -// CHECK:@int3_Val = global <3 x i32> zeroinitializer, align 16 -// CHECK:@int4_Val = global <4 x i32> zeroinitializer, align 16 -// CHECK:@uint2_Val = global <2 x i32> zeroinitializer, align 8 -// CHECK:@uint3_Val = global <3 x i32> zeroinitializer, align 16 -// CHECK:@uint4_Val = global <4 x i32> zeroinitializer, align 16 -// CHECK:@int64_t2_Val = global <2 x i64> zeroinitializer, align 16 -// CHECK:@int64_t3_Val = global <3 x i64> zeroinitializer, align 32 -// CHECK:@int64_t4_Val = global <4 x i64> zeroinitializer, align 32 -// CHECK:@uint64_t2_Val = global <2 x i64> zeroinitializer, align 16 -// CHECK:@uint64_t3_Val = global <3 x i64> zeroinitializer, align 32 -// CHECK:@uint64_t4_Val = global <4 x i64> zeroinitializer, align 32 -// CHECK:@float2_Val = global <2 x float> zeroinitializer, align 8 -// CHECK:@float3_Val = global <3 x float> zeroinitializer, align 16 -// CHECK:@float4_Val = global <4 x float> zeroinitializer, align 16 -// CHECK:@double2_Val = global <2 x double> zeroinitializer, align 16 -// CHECK:@double3_Val = global <3 x double> zeroinitializer, align 32 -// CHECK:@double4_Val = global <4 x double> zeroinitializer, align 32 - -#define TYPE_DECL(T) T T##_Val - -#ifdef __HLSL_ENABLE_16_BIT -TYPE_DECL(uint16_t); -TYPE_DECL(int16_t); -#endif - -// unsigned 32-bit integer. -TYPE_DECL(uint); - -// 64-bit integer. -TYPE_DECL(uint64_t); -TYPE_DECL(int64_t); - -// built-in vector data types: - -#ifdef __HLSL_ENABLE_16_BIT -TYPE_DECL(int16_t2 ); -TYPE_DECL(int16_t3 ); -TYPE_DECL(int16_t4 ); -TYPE_DECL( uint16_t2 ); -TYPE_DECL( uint16_t3 ); -TYPE_DECL( uint16_t4 ); -#endif - -TYPE_DECL( int2 ); -TYPE_DECL( int3 ); -TYPE_DECL( int4 ); -TYPE_DECL( uint2 ); -TYPE_DECL( uint3 ); -TYPE_DECL( uint4 ); -TYPE_DECL( int64_t2 ); -TYPE_DECL( int64_t3 ); -TYPE_DECL( int64_t4 ); -TYPE_DECL( uint64_t2 ); -TYPE_DECL( uint64_t3 ); -TYPE_DECL( uint64_t4 ); - -#ifdef __HLSL_ENABLE_16_BIT -TYPE_DECL(half2 ); -TYPE_DECL(half3 ); -TYPE_DECL(half4 ); -#endif - -TYPE_DECL( float2 ); -TYPE_DECL( float3 ); -TYPE_DECL( float4 ); -TYPE_DECL( double2 ); -TYPE_DECL( double3 ); -TYPE_DECL( double4 ); diff --git a/clang/test/Driver/hlsl_no_stdinc.hlsl b/clang/test/Driver/hlsl_no_stdinc.hlsl deleted file mode 100644 index ec6d0612a9ed7d..00000000000000 --- a/clang/test/Driver/hlsl_no_stdinc.hlsl +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %clang_dxc -Tlib_6_7 -fcgl -Fo - %s -### 2>&1 | FileCheck %s --check-prefix=STDINC -// RUN: %clang_dxc -Tlib_6_7 -hlsl-no-stdinc -fcgl -Fo - %s -### 2>&1 | FileCheck %s --check-prefix=NOSTDINC - -// RUN: %clang -cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o - %s -verify - -// Make sure hlsl-no-stdinc is translated into finclude-default-header. -// STDINC:"-finclude-default-header" -// NOSTDINC-NOT:"-finclude-default-header" - -// Make sure uint not work when finclude-default-header is off. -// expected-error@+1 {{unknown type name 'uint'}} -uint a; diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 690536cdfcbe60..03e0cfec3a2adc 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -154,8 +154,6 @@ copy("Headers") { "hexagon_circ_brev_intrinsics.h", "hexagon_protos.h", "hexagon_types.h", - "hlsl.h", - "hlsl/hlsl_basic_types.h", "hresetintrin.h", "htmintrin.h", "htmxlintrin.h", From 80b3dcc045f8ea6e5e532d8891bbf1305bce89e8 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Mon, 30 May 2022 19:16:06 +0100 Subject: [PATCH 848/908] [Support] Make report_fatal_error respect its GenCrashDiag argument so it doesn't generate a backtrace There are a few places where we use report_fatal_error when the input is broken. Currently, this function always crashes LLVM with an abort signal, which then triggers the backtrace printing code. I think this is excessive, as wrong input shouldn't give a link to LLVM's github issue URL and tell users to file a bug report. We shouldn't print a stack trace either. This patch changes report_fatal_error so it uses exit() rather than abort() when its argument GenCrashDiag=false. Reviewed by: nikic, MaskRay, RKSimon Differential Revision: https://reviews.llvm.org/D126550 --- llvm/lib/Support/ErrorHandling.cpp | 5 ++++- llvm/lib/Target/Mips/MipsSubtarget.cpp | 2 +- llvm/lib/Transforms/IPO/BlockExtractor.cpp | 11 +++++++---- llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp | 2 +- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 2 +- llvm/test/CodeGen/ARM/codemodel.ll | 4 ++-- llvm/test/CodeGen/BPF/xadd.ll | 4 ++-- llvm/test/CodeGen/Lanai/codemodel.ll | 4 ++-- llvm/test/CodeGen/Mips/cpus.ll | 2 +- llvm/test/CodeGen/Mips/fp64a.ll | 6 +++--- llvm/test/CodeGen/Mips/fpxx.ll | 4 ++-- llvm/test/CodeGen/Mips/micromips64-unsupported.ll | 4 ++-- llvm/test/CodeGen/Mips/mips32r6/compatibility.ll | 2 +- llvm/test/CodeGen/Mips/mips64r6/compatibility.ll | 2 +- llvm/test/CodeGen/Mips/msa/3r-a.ll | 2 +- llvm/test/CodeGen/PowerPC/codemodel.ll | 4 ++-- llvm/test/CodeGen/SPARC/codemodel.ll | 4 ++-- llvm/test/CodeGen/SystemZ/codemodel.ll | 4 ++-- llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll | 4 ++-- llvm/test/CodeGen/X86/codemodel.ll | 2 +- llvm/test/MC/Mips/micromips64-unsupported.s | 8 ++++---- llvm/test/MC/Mips/micromips64r6-unsupported.s | 2 +- llvm/test/Other/optimization-remarks-inline.ll | 2 +- llvm/test/Transforms/BlockExtractor/invalid-block.ll | 2 +- .../Transforms/BlockExtractor/invalid-function.ll | 2 +- llvm/test/Transforms/BlockExtractor/invalid-line.ll | 2 +- llvm/test/Transforms/GCOVProfiling/version.ll | 2 +- .../Transforms/LoopUnroll/peel-loop-and-unroll.ll | 2 +- 28 files changed, 51 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp index 80c0e00439a580..3acae3f790a379 100644 --- a/llvm/lib/Support/ErrorHandling.cpp +++ b/llvm/lib/Support/ErrorHandling.cpp @@ -119,7 +119,10 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { // files registered with RemoveFileOnSignal. sys::RunInterruptHandlers(); - abort(); + if (GenCrashDiag) + abort(); + else + exit(126); } void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler, diff --git a/llvm/lib/Target/Mips/MipsSubtarget.cpp b/llvm/lib/Target/Mips/MipsSubtarget.cpp index 5c6127550bac75..10530cdafeed45 100644 --- a/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -116,7 +116,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, if (isFP64bit() && !hasMips64() && hasMips32() && !hasMips32r2()) report_fatal_error( "FPU with 64-bit registers is not available on MIPS32 pre revision 2. " - "Use -mcpu=mips32r2 or greater."); + "Use -mcpu=mips32r2 or greater.", false); if (!isABI_O32() && !useOddSPReg()) report_fatal_error("-mattr=+nooddspreg requires the O32 ABI.", false); diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 7c178f9a983454..9e27ae49a90126 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -135,7 +135,8 @@ void BlockExtractor::loadFile() { if (LineSplit.empty()) continue; if (LineSplit.size()!=2) - report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'"); + report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'", + /*GenCrashDiag=*/false); SmallVector BBNames; LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1, /*KeepEmpty=*/false); @@ -194,13 +195,15 @@ bool BlockExtractor::runOnModule(Module &M) { for (const auto &BInfo : BlocksByName) { Function *F = M.getFunction(BInfo.first); if (!F) - report_fatal_error("Invalid function name specified in the input file"); + report_fatal_error("Invalid function name specified in the input file", + /*GenCrashDiag=*/false); for (const auto &BBInfo : BInfo.second) { auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { return BB.getName().equals(BBInfo); }); if (Res == F->end()) - report_fatal_error("Invalid block name specified in the input file"); + report_fatal_error("Invalid block name specified in the input file", + /*GenCrashDiag=*/false); GroupsOfBlocks[NextGroupIdx].push_back(&*Res); } ++NextGroupIdx; @@ -212,7 +215,7 @@ bool BlockExtractor::runOnModule(Module &M) { for (BasicBlock *BB : BBs) { // Check if the module contains BB. if (BB->getParent()->getParent() != &M) - report_fatal_error("Invalid basic block"); + report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false); LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting " << BB->getParent()->getName() << ":" << BB->getName() << "\n"); diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 6de797c8107e56..ac4a1fd6bb7eaf 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -81,7 +81,7 @@ GCOVOptions GCOVOptions::getDefault() { if (DefaultGCOVVersion.size() != 4) { llvm::report_fatal_error(Twine("Invalid -default-gcov-version: ") + - DefaultGCOVVersion); + DefaultGCOVVersion, /*GenCrashDiag=*/false); } memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4); return Options; diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 45fbf2a858529c..4bb1d3f0766d07 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -909,7 +909,7 @@ bool llvm::computeUnrollCount( if (PP.PeelCount) { if (UnrollCount.getNumOccurrences() > 0) { report_fatal_error("Cannot specify both explicit peel count and " - "explicit unroll count"); + "explicit unroll count", /*GenCrashDiag=*/false); } UP.Count = 1; UP.Runtime = false; diff --git a/llvm/test/CodeGen/ARM/codemodel.ll b/llvm/test/CodeGen/ARM/codemodel.ll index ee435986c61157..ec9982faba12bf 100644 --- a/llvm/test/CodeGen/ARM/codemodel.ll +++ b/llvm/test/CodeGen/ARM/codemodel.ll @@ -1,5 +1,5 @@ -; RUN: not --crash llc -verify-machineinstrs -o - -mtriple=arm-none-eabi -code-model=tiny < %s 2>&1 | FileCheck %s --check-prefix=TINY -; RUN: not --crash llc -verify-machineinstrs -o - -mtriple=arm-none-eabi -code-model=kernel < %s 2>&1 | FileCheck %s --check-prefix=KERNEL +; RUN: not llc -verify-machineinstrs -o - -mtriple=arm-none-eabi -code-model=tiny < %s 2>&1 | FileCheck %s --check-prefix=TINY +; RUN: not llc -verify-machineinstrs -o - -mtriple=arm-none-eabi -code-model=kernel < %s 2>&1 | FileCheck %s --check-prefix=KERNEL ; TINY: Target does not support the tiny CodeModel ; KERNEL: Target does not support the kernel CodeModel diff --git a/llvm/test/CodeGen/BPF/xadd.ll b/llvm/test/CodeGen/BPF/xadd.ll index db0fbd14194a10..4f16c061d9d812 100644 --- a/llvm/test/CodeGen/BPF/xadd.ll +++ b/llvm/test/CodeGen/BPF/xadd.ll @@ -1,5 +1,5 @@ -; RUN: not --crash llc -march=bpfel < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -march=bpfeb < %s 2>&1 | FileCheck %s +; RUN: not llc -march=bpfel < %s 2>&1 | FileCheck %s +; RUN: not llc -march=bpfeb < %s 2>&1 | FileCheck %s ; This file is generated with the source command and source ; $ clang -target bpf -O2 -g -S -emit-llvm t.c diff --git a/llvm/test/CodeGen/Lanai/codemodel.ll b/llvm/test/CodeGen/Lanai/codemodel.ll index acfbabacf33c4f..72d1d65daf52d2 100644 --- a/llvm/test/CodeGen/Lanai/codemodel.ll +++ b/llvm/test/CodeGen/Lanai/codemodel.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=lanai < %s | FileCheck %s ; RUN: llc -march=lanai < %s -code-model=small | FileCheck -check-prefix CHECK-SMALL %s -; RUN: not --crash llc -march=lanai < %s -code-model=tiny 2>&1 | FileCheck -check-prefix CHECK-TINY %s -; RUN: not --crash llc -march=lanai < %s -code-model=kernel 2>&1 | FileCheck -check-prefix CHECK-KERNEL %s +; RUN: not llc -march=lanai < %s -code-model=tiny 2>&1 | FileCheck -check-prefix CHECK-TINY %s +; RUN: not llc -march=lanai < %s -code-model=kernel 2>&1 | FileCheck -check-prefix CHECK-KERNEL %s ; CHECK-TINY: Target does not support the tiny CodeModel ; CHECK-KERNEL: Target does not support the kernel CodeModel diff --git a/llvm/test/CodeGen/Mips/cpus.ll b/llvm/test/CodeGen/Mips/cpus.ll index 995b0fd820104b..077f8fb48bae00 100644 --- a/llvm/test/CodeGen/Mips/cpus.ll +++ b/llvm/test/CodeGen/Mips/cpus.ll @@ -60,7 +60,7 @@ ; Check that we reject CPUs that are not implemented. -; RUN: not --crash llc < %s -o /dev/null -mtriple=mips64 -mcpu=mips5 2>&1 \ +; RUN: not llc < %s -o /dev/null -mtriple=mips64 -mcpu=mips5 2>&1 \ ; RUN: | FileCheck %s --check-prefix=ERROR ; ERROR: LLVM ERROR: Code generation for MIPS-{{.}} is not implemented diff --git a/llvm/test/CodeGen/Mips/fp64a.ll b/llvm/test/CodeGen/Mips/fp64a.ll index 2bf59052761db8..317afd7003bba4 100644 --- a/llvm/test/CodeGen/Mips/fp64a.ll +++ b/llvm/test/CodeGen/Mips/fp64a.ll @@ -7,16 +7,16 @@ ; We don't test MIPS32r1 since support for 64-bit coprocessors (such as a 64-bit ; FPU) on a 32-bit architecture was added in MIPS32r2. -; RUN: not --crash llc -march=mips -mcpu=mips32 -mattr=fp64 < %s 2>&1 | FileCheck %s -check-prefix=32R1-FP64 +; RUN: not llc -march=mips -mcpu=mips32 -mattr=fp64 < %s 2>&1 | FileCheck %s -check-prefix=32R1-FP64 ; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefixes=ALL,32R2-NO-FP64A-BE ; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefixes=ALL,32R2-FP64A ; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefixes=ALL,32R2-NO-FP64A-LE ; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefixes=ALL,32R2-FP64A ; RUN: llc -march=mips64 -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefixes=ALL,64-NO-FP64A -; RUN: not --crash llc -march=mips64 -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A +; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A ; RUN: llc -march=mips64el -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefixes=ALL,64-NO-FP64A -; RUN: not --crash llc -march=mips64el -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A +; RUN: not llc -march=mips64el -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A ; 32R1-FP64: LLVM ERROR: FPU with 64-bit registers is not available on MIPS32 pre revision 2. Use -mcpu=mips32r2 or greater. ; 64-FP64A: LLVM ERROR: -mattr=+nooddspreg requires the O32 ABI. diff --git a/llvm/test/CodeGen/Mips/fpxx.ll b/llvm/test/CodeGen/Mips/fpxx.ll index 075eff1e580f06..6fdb95efe8ecf4 100644 --- a/llvm/test/CodeGen/Mips/fpxx.ll +++ b/llvm/test/CodeGen/Mips/fpxx.ll @@ -5,10 +5,10 @@ ; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fpxx < %s | FileCheck %s -check-prefixes=ALL,32R2-FPXX ; RUN: llc -march=mips64 -mcpu=mips4 < %s | FileCheck %s -check-prefixes=ALL,4-NOFPXX -; RUN: not --crash llc -march=mips64 -mcpu=mips4 -mattr=fpxx < %s 2>&1 | FileCheck %s -check-prefix=4-FPXX +; RUN: not llc -march=mips64 -mcpu=mips4 -mattr=fpxx < %s 2>&1 | FileCheck %s -check-prefix=4-FPXX ; RUN: llc -march=mips64 -mcpu=mips64 < %s | FileCheck %s -check-prefixes=ALL,64-NOFPXX -; RUN: not --crash llc -march=mips64 -mcpu=mips64 -mattr=fpxx < %s 2>&1 | FileCheck %s -check-prefix=64-FPXX +; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fpxx < %s 2>&1 | FileCheck %s -check-prefix=64-FPXX ; RUN-TODO: llc -march=mips64 -mcpu=mips4 -target-abi o32 < %s | FileCheck %s -check-prefixes=ALL,4-O32-NOFPXX ; RUN-TODO: llc -march=mips64 -mcpu=mips4 -target-abi o32 -mattr=fpxx < %s | FileCheck %s -check-prefixes=ALL,4-O32-FPXX diff --git a/llvm/test/CodeGen/Mips/micromips64-unsupported.ll b/llvm/test/CodeGen/Mips/micromips64-unsupported.ll index 0f24167e315016..713722ea120068 100644 --- a/llvm/test/CodeGen/Mips/micromips64-unsupported.ll +++ b/llvm/test/CodeGen/Mips/micromips64-unsupported.ll @@ -1,5 +1,5 @@ -; RUN: not --crash llc -mtriple=mips64-unknown-linux -mcpu=mips64r6 -mattr=+micromips %s 2>&1 | FileCheck %s --check-prefix=MICROMIPS64R6 -; RUN: not --crash llc -mtriple=mips64-unknown-linux -mcpu=mips64 -mattr=+micromips %s 2>&1 | FileCheck %s --check-prefix=MICROMIPS64 +; RUN: not llc -mtriple=mips64-unknown-linux -mcpu=mips64r6 -mattr=+micromips %s 2>&1 | FileCheck %s --check-prefix=MICROMIPS64R6 +; RUN: not llc -mtriple=mips64-unknown-linux -mcpu=mips64 -mattr=+micromips %s 2>&1 | FileCheck %s --check-prefix=MICROMIPS64 ; Test that microMIPS64(R6) is not supported. diff --git a/llvm/test/CodeGen/Mips/mips32r6/compatibility.ll b/llvm/test/CodeGen/Mips/mips32r6/compatibility.ll index 1adb1bc944e12f..8eac8d4683d151 100644 --- a/llvm/test/CodeGen/Mips/mips32r6/compatibility.ll +++ b/llvm/test/CodeGen/Mips/mips32r6/compatibility.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck %s -; RUN: not --crash llc -march=mipsel -mcpu=mips32r6 -mattr=+dsp < %s 2>&1 | FileCheck --check-prefix=DSP %s +; RUN: not llc -march=mipsel -mcpu=mips32r6 -mattr=+dsp < %s 2>&1 | FileCheck --check-prefix=DSP %s ; CHECK: foo: ; DSP: MIPS32r6 is not compatible with the DSP ASE diff --git a/llvm/test/CodeGen/Mips/mips64r6/compatibility.ll b/llvm/test/CodeGen/Mips/mips64r6/compatibility.ll index 8b7607eccc752b..174f4ce1771ad5 100644 --- a/llvm/test/CodeGen/Mips/mips64r6/compatibility.ll +++ b/llvm/test/CodeGen/Mips/mips64r6/compatibility.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=mipsel -mcpu=mips64r6 -target-abi n64 < %s | FileCheck %s -; RUN: not --crash llc -march=mipsel -mcpu=mips64r6 -target-abi n64 -mattr=+dsp < %s 2>&1 | FileCheck --check-prefix=DSP %s +; RUN: not llc -march=mipsel -mcpu=mips64r6 -target-abi n64 -mattr=+dsp < %s 2>&1 | FileCheck --check-prefix=DSP %s ; CHECK: foo: ; DSP: MIPS64r6 is not compatible with the DSP ASE diff --git a/llvm/test/CodeGen/Mips/msa/3r-a.ll b/llvm/test/CodeGen/Mips/msa/3r-a.ll index 47d8eaa68beb4a..933c4ed6946d45 100644 --- a/llvm/test/CodeGen/Mips/msa/3r-a.ll +++ b/llvm/test/CodeGen/Mips/msa/3r-a.ll @@ -5,7 +5,7 @@ ; RUN: llc -march=mipsel -mattr=+msa,+fp64,+mips32r2 -relocation-model=pic < %s | FileCheck %s ; It should fail to compile without fp64. -; RUN: not --crash llc -march=mips -mattr=+msa < %s 2>&1 | \ +; RUN: not llc -march=mips -mattr=+msa < %s 2>&1 | \ ; RUN: FileCheck -check-prefix=FP32ERROR %s ; FP32ERROR: LLVM ERROR: MSA requires a 64-bit FPU register file (FR=1 mode). diff --git a/llvm/test/CodeGen/PowerPC/codemodel.ll b/llvm/test/CodeGen/PowerPC/codemodel.ll index cbceaf78b17633..ee3ceae6df234f 100644 --- a/llvm/test/CodeGen/PowerPC/codemodel.ll +++ b/llvm/test/CodeGen/PowerPC/codemodel.ll @@ -1,5 +1,5 @@ -; RUN: not --crash llc -verify-machineinstrs -o - -mtriple=powerpc-pc-linux -code-model=tiny < %s 2>&1 | FileCheck %s --check-prefix=TINY -; RUN: not --crash llc -verify-machineinstrs -o - -mtriple=powerpc-pc-linux -code-model=kernel < %s 2>&1 | FileCheck %s --check-prefix=KERNEL +; RUN: not llc -verify-machineinstrs -o - -mtriple=powerpc-pc-linux -code-model=tiny < %s 2>&1 | FileCheck %s --check-prefix=TINY +; RUN: not llc -verify-machineinstrs -o - -mtriple=powerpc-pc-linux -code-model=kernel < %s 2>&1 | FileCheck %s --check-prefix=KERNEL ; TINY: Target does not support the tiny CodeModel ; KERNEL: Target does not support the kernel CodeModel diff --git a/llvm/test/CodeGen/SPARC/codemodel.ll b/llvm/test/CodeGen/SPARC/codemodel.ll index fae56b801c9cbc..68da48a0e95064 100644 --- a/llvm/test/CodeGen/SPARC/codemodel.ll +++ b/llvm/test/CodeGen/SPARC/codemodel.ll @@ -1,5 +1,5 @@ -; RUN: not --crash llc -verify-machineinstrs -o - -mtriple=sparc64-unknown-linux -code-model=tiny < %s 2>&1 | FileCheck %s --check-prefix=TINY -; RUN: not --crash llc -verify-machineinstrs -o - -mtriple=sparc64-unknown-linux -code-model=kernel < %s 2>&1 | FileCheck %s --check-prefix=KERNEL +; RUN: not llc -verify-machineinstrs -o - -mtriple=sparc64-unknown-linux -code-model=tiny < %s 2>&1 | FileCheck %s --check-prefix=TINY +; RUN: not llc -verify-machineinstrs -o - -mtriple=sparc64-unknown-linux -code-model=kernel < %s 2>&1 | FileCheck %s --check-prefix=KERNEL ; TINY: Target does not support the tiny CodeModel ; KERNEL: Target does not support the kernel CodeModel diff --git a/llvm/test/CodeGen/SystemZ/codemodel.ll b/llvm/test/CodeGen/SystemZ/codemodel.ll index a96a28a5167c3f..4375366cfd7997 100644 --- a/llvm/test/CodeGen/SystemZ/codemodel.ll +++ b/llvm/test/CodeGen/SystemZ/codemodel.ll @@ -1,5 +1,5 @@ -; RUN: not --crash llc -verify-machineinstrs -o - -mtriple=s390x-linux-gnu -code-model=tiny < %s 2>&1 | FileCheck %s --check-prefix=TINY -; RUN: not --crash llc -verify-machineinstrs -o - -mtriple=s390x-linux-gnu -code-model=kernel < %s 2>&1 | FileCheck %s --check-prefix=KERNEL +; RUN: not llc -verify-machineinstrs -o - -mtriple=s390x-linux-gnu -code-model=tiny < %s 2>&1 | FileCheck %s --check-prefix=TINY +; RUN: not llc -verify-machineinstrs -o - -mtriple=s390x-linux-gnu -code-model=kernel < %s 2>&1 | FileCheck %s --check-prefix=KERNEL ; TINY: Target does not support the tiny CodeModel ; KERNEL: Target does not support the kernel CodeModel diff --git a/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll index 214c2585800453..50f9df99e6b5a5 100644 --- a/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll +++ b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll @@ -1,5 +1,5 @@ -; RUN: not --crash llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics 2>&1 | FileCheck %s --check-prefix=ERROR -; RUN: not --crash llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics -fast-isel 2>&1 | FileCheck %s --check-prefix=ERROR +; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics 2>&1 | FileCheck %s --check-prefix=ERROR +; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics -fast-isel 2>&1 | FileCheck %s --check-prefix=ERROR ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics --mtriple wasm32-unknown-emscripten | FileCheck %s --check-prefixes=CHECK,TLS ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics --mtriple wasm32-unknown-emscripten -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory,atomics | FileCheck %s --check-prefixes=CHECK,NO-TLS diff --git a/llvm/test/CodeGen/X86/codemodel.ll b/llvm/test/CodeGen/X86/codemodel.ll index 1d9818cfeb9fc4..ea754ad1d6c502 100644 --- a/llvm/test/CodeGen/X86/codemodel.ll +++ b/llvm/test/CodeGen/X86/codemodel.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -code-model=small | FileCheck -check-prefix CHECK-SMALL %s ; RUN: llc < %s -code-model=kernel | FileCheck -check-prefix CHECK-KERNEL %s -; RUN: not --crash llc < %s -code-model=tiny 2>&1 | FileCheck -check-prefix CHECK-TINY %s +; RUN: not llc < %s -code-model=tiny 2>&1 | FileCheck -check-prefix CHECK-TINY %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/MC/Mips/micromips64-unsupported.s b/llvm/test/MC/Mips/micromips64-unsupported.s index 05c4bc9df043cf..bc38cfb41f74ae 100644 --- a/llvm/test/MC/Mips/micromips64-unsupported.s +++ b/llvm/test/MC/Mips/micromips64-unsupported.s @@ -1,8 +1,8 @@ -# RUN: not --crash llvm-mc %s -arch=mips64 -mcpu=mips64r6 -target-abi n64 2>&1 -mattr=+micromips | FileCheck %s --check-prefix=64R6 -# RUN: not --crash llvm-mc %s -arch=mips64 -mcpu=mips64r6 -target-abi n32 2>&1 -mattr=+micromips | FileCheck %s --check-prefix=64R6 +# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 -target-abi n64 2>&1 -mattr=+micromips | FileCheck %s --check-prefix=64R6 +# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 -target-abi n32 2>&1 -mattr=+micromips | FileCheck %s --check-prefix=64R6 -# RUN: not --crash llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 2>&1 -mattr=+micromips | FileCheck %s --check-prefix=64 -# RUN: not --crash llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n32 2>&1 -mattr=+micromips | FileCheck %s --check-prefix=64 +# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 2>&1 -mattr=+micromips | FileCheck %s --check-prefix=64 +# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n32 2>&1 -mattr=+micromips | FileCheck %s --check-prefix=64 # 64R6: microMIPS64R6 is not supported # 64: microMIPS64 is not supported diff --git a/llvm/test/MC/Mips/micromips64r6-unsupported.s b/llvm/test/MC/Mips/micromips64r6-unsupported.s index d2afff72aa71c5..402e66724e46b8 100644 --- a/llvm/test/MC/Mips/micromips64r6-unsupported.s +++ b/llvm/test/MC/Mips/micromips64r6-unsupported.s @@ -1,4 +1,4 @@ -# RUN: not --crash llvm-mc -filetype=obj -triple=mips64-unknown-linux -mattr=+micromips \ +# RUN: not llvm-mc -filetype=obj -triple=mips64-unknown-linux -mattr=+micromips \ # RUN: -mcpu=mips64r6 %s 2>&1 | FileCheck %s -check-prefix=CHECK-OPTION # RUN: not llvm-mc -filetype=obj -triple=mips64-unknown-linux -mcpu=mips64r6 \ # RUN: %s 2>&1 | FileCheck %s -check-prefix=CHECK-MM-DIRECTIVE diff --git a/llvm/test/Other/optimization-remarks-inline.ll b/llvm/test/Other/optimization-remarks-inline.ll index 3e51ac113b15cc..1ba993126c9a30 100644 --- a/llvm/test/Other/optimization-remarks-inline.ll +++ b/llvm/test/Other/optimization-remarks-inline.ll @@ -10,7 +10,7 @@ ; RUN: opt < %s -inline -pass-remarks='inl' -pass-remarks='vector' -S 2>&1 | FileCheck --check-prefix=REMARKS %s ; RUN: opt < %s -inline -S 2>&1 | FileCheck --check-prefix=REMARKS %s -; RUN: not --crash opt < %s -pass-remarks='(' 2>&1 | FileCheck --check-prefix=BAD-REGEXP %s +; RUN: not opt < %s -pass-remarks='(' 2>&1 | FileCheck --check-prefix=BAD-REGEXP %s define i32 @foo(i32 %x, i32 %y) #0 { entry: diff --git a/llvm/test/Transforms/BlockExtractor/invalid-block.ll b/llvm/test/Transforms/BlockExtractor/invalid-block.ll index 4b284ddbcdbafb..f444764e991d2f 100644 --- a/llvm/test/Transforms/BlockExtractor/invalid-block.ll +++ b/llvm/test/Transforms/BlockExtractor/invalid-block.ll @@ -1,5 +1,5 @@ ; RUN: echo 'bar invalidbb' > %t -; RUN: not --crash opt -S -extract-blocks -extract-blocks-file=%t %s 2>&1 | FileCheck %s +; RUN: not opt -S -extract-blocks -extract-blocks-file=%t %s 2>&1 | FileCheck %s ; CHECK: Invalid block define void @bar() { diff --git a/llvm/test/Transforms/BlockExtractor/invalid-function.ll b/llvm/test/Transforms/BlockExtractor/invalid-function.ll index 9af46ef2dcf951..4044815893e58d 100644 --- a/llvm/test/Transforms/BlockExtractor/invalid-function.ll +++ b/llvm/test/Transforms/BlockExtractor/invalid-function.ll @@ -1,5 +1,5 @@ ; RUN: echo 'foo bb' > %t -; RUN: not --crash opt -S -extract-blocks -extract-blocks-file=%t %s 2>&1 | FileCheck %s +; RUN: not opt -S -extract-blocks -extract-blocks-file=%t %s 2>&1 | FileCheck %s ; CHECK: Invalid function define void @bar() { diff --git a/llvm/test/Transforms/BlockExtractor/invalid-line.ll b/llvm/test/Transforms/BlockExtractor/invalid-line.ll index f0a4231660d75c..7e409d35916f9d 100644 --- a/llvm/test/Transforms/BlockExtractor/invalid-line.ll +++ b/llvm/test/Transforms/BlockExtractor/invalid-line.ll @@ -1,5 +1,5 @@ ; RUN: echo 'foo' > %t -; RUN: not --crash opt -S -extract-blocks -extract-blocks-file=%t %s 2>&1 | FileCheck %s +; RUN: not opt -S -extract-blocks -extract-blocks-file=%t %s 2>&1 | FileCheck %s ; CHECK: Invalid line define void @bar() { diff --git a/llvm/test/Transforms/GCOVProfiling/version.ll b/llvm/test/Transforms/GCOVProfiling/version.ll index 1cf574684bc90e..bfac2557da0b1b 100644 --- a/llvm/test/Transforms/GCOVProfiling/version.ll +++ b/llvm/test/Transforms/GCOVProfiling/version.ll @@ -7,7 +7,7 @@ ; RUN: opt -passes=insert-gcov-profiling -disable-output < %t/2 ; RUN: head -c8 %t/version.gcno | grep '^oncg.804' ; RUN: rm %t/version.gcno -; RUN: not --crash opt -passes=insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t/2 +; RUN: not opt -passes=insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t/2 ; RUN: opt -passes=insert-gcov-profiling -default-gcov-version='402*' -disable-output < %t/2 ; RUN: head -c8 %t/version.gcno | grep '^oncg.204' ; RUN: rm %t/version.gcno diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll index e855ee8888c585..0cc10bd7b78b64 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll @@ -1,4 +1,4 @@ -; RUN: not --crash opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S < %s 2>&1 | FileCheck %s +; RUN: not opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S < %s 2>&1 | FileCheck %s ; CHECK: LLVM ERROR: Cannot specify both explicit peel count and explicit unroll count From 8c55de9ee7f63a26e6edf7bcf4279f64602b3bf1 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Mon, 30 May 2022 19:44:06 +0100 Subject: [PATCH 849/908] fix tests after my commit 80b3dcc045f8ea6e5e532d8891bbf1305bce89e8 doesn't like exit code 126 I'm afraid --- llvm/lib/Support/ErrorHandling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp index 3acae3f790a379..b8b3b7424ac6b1 100644 --- a/llvm/lib/Support/ErrorHandling.cpp +++ b/llvm/lib/Support/ErrorHandling.cpp @@ -122,7 +122,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { if (GenCrashDiag) abort(); else - exit(126); + exit(1); } void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler, From 118d9ebd521317d6f593fd2b9cea66043336846b Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 30 May 2022 12:59:27 +0000 Subject: [PATCH 850/908] Apply clang-tidy fixes for llvm-else-after-return in OpenMPToLLVM.cpp (NFC) --- mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp index 3f0964dc1115b4..3c8416b75511fe 100644 --- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp +++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp @@ -65,9 +65,8 @@ struct RegionLessOpConversion : public ConvertOpToLLVMPattern { // TODO: Support memref type in variable operands return rewriter.notifyMatchFailure(curOp, "memref is not supported yet"); - } else { - convertedOperands.emplace_back(adaptor.getOperands()[idx]); } + convertedOperands.emplace_back(adaptor.getOperands()[idx]); } rewriter.replaceOpWithNewOp(curOp, resTypes, convertedOperands, curOp->getAttrs()); From 940e290860894d274c986c88bea2dcc17f1e93c3 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 30 May 2022 13:18:19 +0000 Subject: [PATCH 851/908] Apply clang-tidy fixes for performance-unnecessary-value-param in OneShotModuleBufferize.cpp (NFC) --- .../Bufferization/Transforms/OneShotModuleBufferize.h | 6 +++--- .../Bufferization/Transforms/OneShotModuleBufferize.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h index 367eddeeb45c75..63a1e07d16a223 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h @@ -32,9 +32,9 @@ LogicalResult bufferizeModuleOp(ModuleOp moduleOp, /// function call analysis to determine which function arguments are /// inplaceable. Then analyzes and bufferizes FuncOps one-by-one with One-Shot /// Bufferize. -LogicalResult -runOneShotModuleBufferize(ModuleOp moduleOp, - bufferization::OneShotBufferizationOptions options); +LogicalResult runOneShotModuleBufferize( + ModuleOp moduleOp, + const bufferization::OneShotBufferizationOptions &options); } // namespace bufferization } // namespace mlir diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index e358979868403e..f00097064cc029 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -486,7 +486,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( } LogicalResult mlir::bufferization::runOneShotModuleBufferize( - ModuleOp moduleOp, OneShotBufferizationOptions options) { + ModuleOp moduleOp, const OneShotBufferizationOptions &options) { assert(options.bufferizeFunctionBoundaries && "expected that function boundary bufferization is activated"); OneShotAnalysisState analysisState(moduleOp, options); From a0044389590afa609def60584d5e500a28d2f206 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 30 May 2022 14:04:56 -0400 Subject: [PATCH 852/908] [InstCombine] add/move tests for shift-of-constant-by-same-shift-by-constant; NFC --- .../Transforms/InstCombine/shift-shift.ll | 162 +++++++++++++++++- llvm/test/Transforms/InstCombine/shift.ll | 23 --- 2 files changed, 156 insertions(+), 29 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll index 6ce89723ace8a8..f10b0381206fef 100644 --- a/llvm/test/Transforms/InstCombine/shift-shift.ll +++ b/llvm/test/Transforms/InstCombine/shift-shift.ll @@ -152,9 +152,9 @@ define i8 @shl_trunc_bigger_lshr(i32 %x) { define i8 @shl_trunc_smaller_lshr(i32 %x) { ; CHECK-LABEL: @shl_trunc_smaller_lshr( -; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i8 -; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = shl i8 [[X_TR]], 2 -; CHECK-NEXT: [[LT:%.*]] = and i8 [[TR_SH_DIFF]], -32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 2 +; CHECK-NEXT: [[LT:%.*]] = and i8 [[TMP2]], -32 ; CHECK-NEXT: ret i8 [[LT]] ; %rt = lshr i32 %x, 3 @@ -178,9 +178,9 @@ define i24 @shl_trunc_bigger_ashr(i32 %x) { define i24 @shl_trunc_smaller_ashr(i32 %x) { ; CHECK-LABEL: @shl_trunc_smaller_ashr( -; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i24 -; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = shl i24 [[X_TR]], 3 -; CHECK-NEXT: [[LT:%.*]] = and i24 [[TR_SH_DIFF]], -8192 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i24 +; CHECK-NEXT: [[TMP2:%.*]] = shl i24 [[TMP1]], 3 +; CHECK-NEXT: [[LT:%.*]] = and i24 [[TMP2]], -8192 ; CHECK-NEXT: ret i24 [[LT]] ; %rt = ashr i32 %x, 10 @@ -272,3 +272,153 @@ define i8 @shl_trunc_smaller_lshr_use2(i32 %x) { %lt = shl i8 %tr, 5 ret i8 %lt } + +define i32 @ashr_ashr_constants_use(i32 %x) { +; CHECK-LABEL: @ashr_ashr_constants_use( +; CHECK-NEXT: [[S:%.*]] = ashr i32 -33, [[X:%.*]] +; CHECK-NEXT: call void @use32(i32 [[S]]) +; CHECK-NEXT: [[R:%.*]] = ashr i32 [[S]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %s = ashr i32 -33, %x + call void @use32(i32 %s) + %r = ashr i32 %s, 3 + ret i32 %r +} + +define <3 x i8> @ashr_ashr_constants_vec(<3 x i8> %x) { +; CHECK-LABEL: @ashr_ashr_constants_vec( +; CHECK-NEXT: [[S:%.*]] = ashr <3 x i8> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> [[S]], +; CHECK-NEXT: ret <3 x i8> [[R]] +; + %s = ashr <3 x i8> , %x + %r = ashr <3 x i8> %s, + ret <3 x i8> %r +} + +define i32 @lshr_lshr_constants_use(i32 %x) { +; CHECK-LABEL: @lshr_lshr_constants_use( +; CHECK-NEXT: [[S:%.*]] = lshr i32 -33, [[X:%.*]] +; CHECK-NEXT: call void @use32(i32 [[S]]) +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[S]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %s = lshr i32 -33, %x + call void @use32(i32 %s) + %r = lshr i32 %s, 3 + ret i32 %r +} + +define <3 x i8> @lshr_lshr_constants_vec(<3 x i8> %x) { +; CHECK-LABEL: @lshr_lshr_constants_vec( +; CHECK-NEXT: [[S:%.*]] = lshr <3 x i8> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr <3 x i8> [[S]], +; CHECK-NEXT: ret <3 x i8> [[R]] +; + %s = lshr <3 x i8> , %x + %r = lshr <3 x i8> %s, + ret <3 x i8> %r +} + +define i32 @shl_shl_constants_use(i32 %x) { +; CHECK-LABEL: @shl_shl_constants_use( +; CHECK-NEXT: [[S:%.*]] = shl i32 -2013265920, [[X:%.*]] +; CHECK-NEXT: call void @use32(i32 [[S]]) +; CHECK-NEXT: [[R:%.*]] = shl i32 [[S]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %s = shl i32 2281701376, %x ; 0x8800_0000 + call void @use32(i32 %s) + %r = shl i32 %s, 3 + ret i32 %r +} + +define <3 x i8> @shl_shl_constants_vec(<3 x i8> %x) { +; CHECK-LABEL: @shl_shl_constants_vec( +; CHECK-NEXT: [[R:%.*]] = shl <3 x i8> , [[X:%.*]] +; CHECK-NEXT: ret <3 x i8> [[R]] +; + %s = shl <3 x i8> , %x + %r = shl <3 x i8> %s, + ret <3 x i8> %r +} + +; PR9809 +define i32 @shl_shl_constants_div(i32 %a, i32 %b) { +; CHECK-LABEL: @shl_shl_constants_div( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], 2 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i32 [[A:%.*]], [[TMP1]] +; CHECK-NEXT: ret i32 [[DIV1]] +; + %shl1 = shl i32 1, %b + %shl2 = shl i32 %shl1, 2 + %div = udiv i32 %a, %shl2 + ret i32 %div +} + +define i32 @ashr_lshr_constants(i32 %x) { +; CHECK-LABEL: @ashr_lshr_constants( +; CHECK-NEXT: [[S:%.*]] = ashr i32 -33, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[S]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %s = ashr i32 -33, %x + %r = lshr i32 %s, 3 + ret i32 %r +} + +define i32 @ashr_shl_constants(i32 %x) { +; CHECK-LABEL: @ashr_shl_constants( +; CHECK-NEXT: [[S:%.*]] = ashr i32 -33, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nsw i32 [[S]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %s = ashr i32 -33, %x + %r = shl i32 %s, 3 + ret i32 %r +} + +define i32 @lshr_ashr_constants(i32 %x) { +; CHECK-LABEL: @lshr_ashr_constants( +; CHECK-NEXT: [[S:%.*]] = lshr i32 -33, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = ashr i32 [[S]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %s = lshr i32 -33, %x + %r = ashr i32 %s, 3 + ret i32 %r +} + +define i32 @lshr_shl_constants(i32 %x) { +; CHECK-LABEL: @lshr_shl_constants( +; CHECK-NEXT: [[S:%.*]] = lshr i32 -33, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl i32 [[S]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %s = lshr i32 -33, %x + %r = shl i32 %s, 3 + ret i32 %r +} + +define i32 @shl_ashr_constants(i32 %x) { +; CHECK-LABEL: @shl_ashr_constants( +; CHECK-NEXT: [[S:%.*]] = shl i32 -33, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = ashr i32 [[S]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %s = shl i32 -33, %x + %r = ashr i32 %s, 3 + ret i32 %r +} + +define i32 @shl_lshr_constants(i32 %x) { +; CHECK-LABEL: @shl_lshr_constants( +; CHECK-NEXT: [[S:%.*]] = shl i32 -33, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[S]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %s = shl i32 -33, %x + %r = lshr i32 %s, 3 + ret i32 %r +} diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 8d9b1ec9f95467..5855c684ca3978 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -669,29 +669,6 @@ entry: ret i8 %i55 } -; PR9809 -define i32 @test40(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: @test40( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], 2 -; CHECK-NEXT: [[DIV1:%.*]] = lshr i32 [[A:%.*]], [[TMP1]] -; CHECK-NEXT: ret i32 [[DIV1]] -; - %shl1 = shl i32 1, %b - %shl2 = shl i32 %shl1, 2 - %div = udiv i32 %a, %shl2 - ret i32 %div -} - -define i32 @test41(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: @test41( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 8, [[B:%.*]] -; CHECK-NEXT: ret i32 [[TMP1]] -; - %1 = shl i32 1, %b - %2 = shl i32 %1, 3 - ret i32 %2 -} - define i32 @test42(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: @test42( ; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 4096, [[B:%.*]] From c5d942a4fb21fba404ceb504ed087c297a1af7c3 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 30 May 2022 15:15:27 -0400 Subject: [PATCH 853/908] [InstCombine] remove unnecessary one-use check from (C2 << X) << C1 fold The restriction goes back to: 16f18ed7b555bce51 ...but the fold only replaces a shift with a shift, so that's not necessary. Generalizing to other opcodes is planned as a follow-up. --- llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp | 2 +- llvm/test/Transforms/InstCombine/shift-shift.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index e5b0e32aa85eae..8d5dbf83b7ea36 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1010,7 +1010,7 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { Constant *C2; Value *X; // (C2 << X) << C1 --> (C2 << C1) << X - if (match(Op0, m_OneUse(m_Shl(m_Constant(C2), m_Value(X))))) + if (match(Op0, m_Shl(m_Constant(C2), m_Value(X)))) return BinaryOperator::CreateShl(ConstantExpr::getShl(C2, C1), X); // (X * C2) << C1 --> X * (C2 << C1) diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll index f10b0381206fef..d76f3c00261a0a 100644 --- a/llvm/test/Transforms/InstCombine/shift-shift.ll +++ b/llvm/test/Transforms/InstCombine/shift-shift.ll @@ -325,7 +325,7 @@ define i32 @shl_shl_constants_use(i32 %x) { ; CHECK-LABEL: @shl_shl_constants_use( ; CHECK-NEXT: [[S:%.*]] = shl i32 -2013265920, [[X:%.*]] ; CHECK-NEXT: call void @use32(i32 [[S]]) -; CHECK-NEXT: [[R:%.*]] = shl i32 [[S]], 3 +; CHECK-NEXT: [[R:%.*]] = shl i32 1073741824, [[X]] ; CHECK-NEXT: ret i32 [[R]] ; %s = shl i32 2281701376, %x ; 0x8800_0000 From a0c3c60728ee5bc7d6ca40a5b6e446cde72c6700 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 30 May 2022 15:30:01 -0400 Subject: [PATCH 854/908] [InstCombine] fold shift-right-by-constant with shift-right-of-constant operand (C2 >> X) >> C1 --> (C2 >> C1) >> X The shift-left form of this transform has existed since: 16f18ed7b555bce5163 ...but it applies to matching shift right opcodes too: https://alive2.llvm.org/ce/z/c5eQms --- .../InstCombine/InstCombineShifts.cpp | 28 +++++++++++-------- llvm/test/Transforms/InstCombine/and.ll | 12 +++----- .../Transforms/InstCombine/icmp-and-shift.ll | 15 ++++------ .../Transforms/InstCombine/shift-shift.ll | 10 +++---- 4 files changed, 29 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 8d5dbf83b7ea36..b25c83a2a96268 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -702,10 +702,18 @@ static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift, } } -Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, +Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *C1, BinaryOperator &I) { + // (C2 << X) << C1 --> (C2 << C1) << X + // (C2 >> X) >> C1 --> (C2 >> C1) >> X + Constant *C2; + Value *X; + if (match(Op0, m_BinOp(I.getOpcode(), m_Constant(C2), m_Value(X)))) + return BinaryOperator::Create(I.getOpcode(), + ConstantExpr::get(I.getOpcode(), C2, C1), X); + const APInt *Op1C; - if (!match(Op1, m_APInt(Op1C))) + if (!match(C1, m_APInt(Op1C))) return nullptr; // See if we can propagate this shift into the input, this covers the trivial @@ -743,10 +751,10 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, if (match(Op0BO->getOperand(1), m_APInt(Op0C))) { if (canShiftBinOpWithConstantRHS(I, Op0BO)) { Constant *NewRHS = ConstantExpr::get( - I.getOpcode(), cast(Op0BO->getOperand(1)), Op1); + I.getOpcode(), cast(Op0BO->getOperand(1)), C1); Value *NewShift = - Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); + Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), C1); NewShift->takeName(Op0BO); return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS); @@ -772,9 +780,9 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, match(TBO->getOperand(1), m_APInt(C)) && canShiftBinOpWithConstantRHS(I, TBO)) { Constant *NewRHS = ConstantExpr::get( - I.getOpcode(), cast(TBO->getOperand(1)), Op1); + I.getOpcode(), cast(TBO->getOperand(1)), C1); - Value *NewShift = Builder.CreateBinOp(I.getOpcode(), FalseVal, Op1); + Value *NewShift = Builder.CreateBinOp(I.getOpcode(), FalseVal, C1); Value *NewOp = Builder.CreateBinOp(TBO->getOpcode(), NewShift, NewRHS); return SelectInst::Create(Cond, NewOp, NewShift); } @@ -789,9 +797,9 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1, match(FBO->getOperand(1), m_APInt(C)) && canShiftBinOpWithConstantRHS(I, FBO)) { Constant *NewRHS = ConstantExpr::get( - I.getOpcode(), cast(FBO->getOperand(1)), Op1); + I.getOpcode(), cast(FBO->getOperand(1)), C1); - Value *NewShift = Builder.CreateBinOp(I.getOpcode(), TrueVal, Op1); + Value *NewShift = Builder.CreateBinOp(I.getOpcode(), TrueVal, C1); Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift, NewRHS); return SelectInst::Create(Cond, NewShift, NewOp); } @@ -1009,10 +1017,6 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { if (match(Op1, m_Constant(C1))) { Constant *C2; Value *X; - // (C2 << X) << C1 --> (C2 << C1) << X - if (match(Op0, m_Shl(m_Constant(C2), m_Value(X)))) - return BinaryOperator::CreateShl(ConstantExpr::getShl(C2, C1), X); - // (X * C2) << C1 --> X * (C2 << C1) if (match(Op0, m_Mul(m_Value(X), m_Constant(C2)))) return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1)); diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index c7a63d7fc5280c..2617ac1eb6defa 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -1699,8 +1699,7 @@ define i16 @shl_lshr_pow2_const_negative_overflow2(i16 %x) { define i16 @lshr_lshr_pow2_const(i16 %x) { ; CHECK-LABEL: @lshr_lshr_pow2_const( -; CHECK-NEXT: [[LSHR1:%.*]] = lshr i16 2048, [[X:%.*]] -; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 [[LSHR1]], 6 +; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 32, [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR2]], 4 ; CHECK-NEXT: ret i16 [[R]] ; @@ -1712,8 +1711,7 @@ define i16 @lshr_lshr_pow2_const(i16 %x) { define i16 @lshr_lshr_pow2_const_negative_oneuse(i16 %x) { ; CHECK-LABEL: @lshr_lshr_pow2_const_negative_oneuse( -; CHECK-NEXT: [[LSHR1:%.*]] = lshr i16 2048, [[X:%.*]] -; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 [[LSHR1]], 6 +; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 32, [[X:%.*]] ; CHECK-NEXT: call void @use16(i16 [[LSHR2]]) ; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR2]], 4 ; CHECK-NEXT: ret i16 [[R]] @@ -1727,8 +1725,7 @@ define i16 @lshr_lshr_pow2_const_negative_oneuse(i16 %x) { define i16 @lshr_lshr_pow2_const_negative_nopow2_1(i16 %x) { ; CHECK-LABEL: @lshr_lshr_pow2_const_negative_nopow2_1( -; CHECK-NEXT: [[LSHR1:%.*]] = lshr i16 2047, [[X:%.*]] -; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 [[LSHR1]], 6 +; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 31, [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR2]], 4 ; CHECK-NEXT: ret i16 [[R]] ; @@ -1740,8 +1737,7 @@ define i16 @lshr_lshr_pow2_const_negative_nopow2_1(i16 %x) { define i16 @lshr_lshr_pow2_const_negative_nopow2_2(i16 %x) { ; CHECK-LABEL: @lshr_lshr_pow2_const_negative_nopow2_2( -; CHECK-NEXT: [[LSHR1:%.*]] = lshr i16 8192, [[X:%.*]] -; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 [[LSHR1]], 6 +; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 128, [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR2]], 3 ; CHECK-NEXT: ret i16 [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll index 6193692976a1ba..9c9f1a99e82ddc 100644 --- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll +++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll @@ -335,8 +335,7 @@ define i32 @icmp_ne_and1_lshr_pow2(i32 %0) { define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and1_lshr_pow2_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[LSHR]], +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] ; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], ; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]] ; @@ -349,8 +348,7 @@ define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) { define i32 @icmp_eq_and_pow2_lshr_pow2(i32 %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2( -; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8, [[TMP0:%.*]] -; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[LSHR]], 2 +; CHECK-NEXT: [[AND:%.*]] = lshr i32 2, [[TMP0:%.*]] ; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -375,8 +373,7 @@ define i32 @icmp_eq_and_pow2_lshr_pow2_case2(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[LSHR]], +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] ; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], ; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], ; CHECK-NEXT: ret <2 x i32> [[TMP2]] @@ -390,8 +387,7 @@ define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) { define i32 @icmp_ne_and_pow2_lshr_pow2(i32 %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2( -; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8, [[TMP0:%.*]] -; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[LSHR]], 2 +; CHECK-NEXT: [[AND:%.*]] = lshr i32 2, [[TMP0:%.*]] ; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -416,8 +412,7 @@ define i32 @icmp_ne_and_pow2_lshr_pow2_case2(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[LSHR]], +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] ; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], ; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]] ; diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll index d76f3c00261a0a..aaf10f28b42317 100644 --- a/llvm/test/Transforms/InstCombine/shift-shift.ll +++ b/llvm/test/Transforms/InstCombine/shift-shift.ll @@ -277,7 +277,7 @@ define i32 @ashr_ashr_constants_use(i32 %x) { ; CHECK-LABEL: @ashr_ashr_constants_use( ; CHECK-NEXT: [[S:%.*]] = ashr i32 -33, [[X:%.*]] ; CHECK-NEXT: call void @use32(i32 [[S]]) -; CHECK-NEXT: [[R:%.*]] = ashr i32 [[S]], 3 +; CHECK-NEXT: [[R:%.*]] = ashr i32 -5, [[X]] ; CHECK-NEXT: ret i32 [[R]] ; %s = ashr i32 -33, %x @@ -288,8 +288,7 @@ define i32 @ashr_ashr_constants_use(i32 %x) { define <3 x i8> @ashr_ashr_constants_vec(<3 x i8> %x) { ; CHECK-LABEL: @ashr_ashr_constants_vec( -; CHECK-NEXT: [[S:%.*]] = ashr <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> [[S]], +; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> , [[X:%.*]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; %s = ashr <3 x i8> , %x @@ -301,7 +300,7 @@ define i32 @lshr_lshr_constants_use(i32 %x) { ; CHECK-LABEL: @lshr_lshr_constants_use( ; CHECK-NEXT: [[S:%.*]] = lshr i32 -33, [[X:%.*]] ; CHECK-NEXT: call void @use32(i32 [[S]]) -; CHECK-NEXT: [[R:%.*]] = lshr i32 [[S]], 3 +; CHECK-NEXT: [[R:%.*]] = lshr i32 536870907, [[X]] ; CHECK-NEXT: ret i32 [[R]] ; %s = lshr i32 -33, %x @@ -312,8 +311,7 @@ define i32 @lshr_lshr_constants_use(i32 %x) { define <3 x i8> @lshr_lshr_constants_vec(<3 x i8> %x) { ; CHECK-LABEL: @lshr_lshr_constants_vec( -; CHECK-NEXT: [[S:%.*]] = lshr <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = lshr <3 x i8> [[S]], +; CHECK-NEXT: [[R:%.*]] = lshr <3 x i8> , [[X:%.*]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; %s = lshr <3 x i8> , %x From 93319128d0d4eda22ce8d531ba668cb8c7c8c8e8 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Mon, 30 May 2022 20:33:50 +0200 Subject: [PATCH 855/908] cmake: fix clang standalone build D126308 broke building clang standalone, as LLVM_UTILS_INSTALL_DIR is not exported. Signed-off-by: Matheus Izvekov Differential Revision: https://reviews.llvm.org/D126671 --- clang/utils/hmaptool/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/utils/hmaptool/CMakeLists.txt b/clang/utils/hmaptool/CMakeLists.txt index 375d7b04488297..01b6a920fb9449 100644 --- a/clang/utils/hmaptool/CMakeLists.txt +++ b/clang/utils/hmaptool/CMakeLists.txt @@ -2,7 +2,7 @@ add_custom_command(OUTPUT "${LLVM_TOOLS_BINARY_DIR}/hmaptool" COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/hmaptool" "${LLVM_TOOLS_BINARY_DIR}" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/hmaptool") -install(PROGRAMS hmaptool DESTINATION "${LLVM_UTILS_INSTALL_DIR}" COMPONENT hmaptool) +install(PROGRAMS hmaptool DESTINATION "${LLVM_TOOLS_INSTALL_DIR}" COMPONENT hmaptool) add_custom_target(hmaptool ALL DEPENDS "${LLVM_TOOLS_BINARY_DIR}/hmaptool") set_target_properties(hmaptool PROPERTIES FOLDER "Utils") From d65fa2c43cfaa7a6a15e65f27b7e73dfb6f8f0d4 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 30 May 2022 12:14:59 -0700 Subject: [PATCH 856/908] [Hexagon] Widen vector types with non-power-of-2 element counts Such vector types cannot be split at the moment, because splitting expects an even number of elements in the source type. If a target marks such a type as "split", TargetLoweringBase::computeRegisterProperties will override it with widening to the next power of 2. This could lead to issues during instruction selection when conflicting information about how to handle this type is present. --- .../Target/Hexagon/HexagonISelLowering.cpp | 5 +++ llvm/test/CodeGen/Hexagon/isel-v3i16.ll | 40 +++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/isel-v3i16.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 11dd8e2c2b674a..9a3f65b5d6bff8 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2163,6 +2163,11 @@ HexagonTargetLowering::getPreferredVectorAction(MVT VT) const { // Always widen (remaining) vectors of i1. if (ElemTy == MVT::i1) return TargetLoweringBase::TypeWidenVector; + // Widen non-power-of-2 vectors. Such types cannot be split right now, + // and computeRegisterProperties will override "split" with "widen", + // which can cause other issues. + if (!isPowerOf2_32(VecLen)) + return TargetLoweringBase::TypeWidenVector; return TargetLoweringBase::TypeSplitVector; } diff --git a/llvm/test/CodeGen/Hexagon/isel-v3i16.ll b/llvm/test/CodeGen/Hexagon/isel-v3i16.ll new file mode 100644 index 00000000000000..ddca7f4e8014f8 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel-v3i16.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check for a successful compilation. +; CHECK: callr + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +@g0 = external dllexport global i8* (i32, i32, i64, i32, i32)*, align 4 + +define hidden void @f0(i32 %a0, <3 x i16>* %a1) #0 { +b0: + %v0 = load i8* (i32, i32, i64, i32, i32)*, i8* (i32, i32, i64, i32, i32)** @g0, align 4 + %v1 = call i8* %v0(i32 1, i32 %a0, i64 314646, i32 0, i32 16) + br label %b1 + +b1: ; preds = %b2, %b0 + br label %b2 + +b2: ; preds = %b5, %b1 + %v2 = icmp slt i32 0, 229 + br i1 %v2, label %b3, label %b1 + +b3: ; preds = %b2 + br i1 undef, label %b4, label %b5 + +b4: ; preds = %b3 + %v3 = load <3 x i16>, <3 x i16>* %a1, align 2 + br label %b5 + +b5: ; preds = %b4, %b3 + %v4 = phi <3 x i16> [ %v3, %b4 ], [ zeroinitializer, %b3 ] + %v5 = bitcast i8* %v1 to i16* + %v6 = getelementptr inbounds i16, i16* %v5, i32 undef + %v7 = bitcast i16* %v6 to <3 x i16>* + store <3 x i16> %v4, <3 x i16>* %v7, align 2 + br label %b2 +} + +attributes #0 = { "target-cpu"="hexagonv68" } From fe7cd1d199f2d082d5ed096fe60db5529e431896 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 30 May 2022 16:26:33 -0400 Subject: [PATCH 857/908] [libc++][NFC] Improve comment about vector and string base class ABI flags --- libcxx/include/__config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 14e6850ebdc108..5b50e056cf8d96 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -110,9 +110,9 @@ // compatible. This switch removes these workarounds for platforms that don't care // about ABI compatibility. # define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT -// Remove basic_string common base +// Don't export the legacy __basic_string_common class and its methods from the built library. # define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON -// Remove vector base class +// Don't export the legacy __vector_base_common class and its methods from the built library. # define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON // According to the Standard, `bitset::operator[] const` returns bool # define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL From 0dbaff2a49224b1d3089325d9d5fc9f56385c142 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Mon, 30 May 2022 16:26:17 -0400 Subject: [PATCH 858/908] [FileCheck] Use %ProtectFileCheckOutput in recently added test --- llvm/test/FileCheck/missspelled-directive.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/test/FileCheck/missspelled-directive.txt b/llvm/test/FileCheck/missspelled-directive.txt index bb6255823941e0..d8ef009edabb5a 100644 --- a/llvm/test/FileCheck/missspelled-directive.txt +++ b/llvm/test/FileCheck/missspelled-directive.txt @@ -1,10 +1,10 @@ -RUN: not FileCheck --check-prefix=P1 %s 2>&1 | FileCheck --check-prefix=CHECK1 %s -RUN: not FileCheck --check-prefix=P2 %s 2>&1 | FileCheck --check-prefix=CHECK2 %s -RUN: not FileCheck --check-prefix=P3 %s 2>&1 | FileCheck --check-prefix=CHECK3 %s -RUN: not FileCheck --check-prefix=P4 %s 2>&1 | FileCheck --check-prefix=CHECK4 %s -RUN: not FileCheck --check-prefix=P5 %s 2>&1 | FileCheck --check-prefix=CHECK5 %s -RUN: not FileCheck --check-prefix=P6 %s 2>&1 | FileCheck --check-prefix=CHECK6 %s -RUN: not FileCheck --check-prefix=P7 %s 2>&1 | FileCheck --check-prefix=CHECK7 %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefix=P1 %s 2>&1 | FileCheck --check-prefix=CHECK1 %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefix=P2 %s 2>&1 | FileCheck --check-prefix=CHECK2 %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefix=P3 %s 2>&1 | FileCheck --check-prefix=CHECK3 %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefix=P4 %s 2>&1 | FileCheck --check-prefix=CHECK4 %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefix=P5 %s 2>&1 | FileCheck --check-prefix=CHECK5 %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefix=P6 %s 2>&1 | FileCheck --check-prefix=CHECK6 %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefix=P7 %s 2>&1 | FileCheck --check-prefix=CHECK7 %s P1_LABEL: foo CHECK1: error: misspelled directive 'P1_LABEL:' From 167fb106d28ff026f53a3f3468bdcb914e094bd1 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 25 May 2022 21:35:01 +0000 Subject: [PATCH 859/908] compiler-rt/cpu_model: Ensure constructor priority is set and align with GCC GCC recently started setting constructor priority on init_have_lse_atomics [1] to avoid undefined initialization order with respect to other initializers, causing accidental use of ll/sc intrinsics on targets where this was not intended (which presents a minor performance problem as well as a compatibility problem for users wanting to use the rr debugger). I initially thought compiler-rt does not have the same issue as libgcc, since it looks like we're already setting init priority on the constructor. Unfortuantely, it does not appear that the HAVE_INIT_PRIORITY check is ever performed anyway, so despite appearances the init priority was not actually applied. Fix that by applying the init priority unconditionally. It has been supported in clang ever since it was first introduced and in any case for more than 14 years in both gcc and clang. MSVC is already excluded from this code path and we're already using constructors with init priority elsewhere in compiler-rt without additional check (though mostly in the sanitizer runtime, which may have more narrow target support). Regardless, I believe that for our supported compilers, if they support the constructor attribute, they should also support init priorities. While we're here, change the init priority from 101, which is the highest priority for end user applications, to instead use one of the priority levels reserved for implementations (1-100; lower integers are higher priority). GCC ended up using `90`, so this commit aligns the value in compiler-rt to the same value to ensure that there are no subtle initialization order differences between libgcc and compiler-rt. [1] https://github.com/gcc-mirror/gcc/commit/75c4e4909ae2667f56487434f99c2915b4570794 Differential Revision: https://reviews.llvm.org/D126424 --- compiler-rt/lib/builtins/cpu_model.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c index e52f2aa610a672..228be67acb8944 100644 --- a/compiler-rt/lib/builtins/cpu_model.c +++ b/compiler-rt/lib/builtins/cpu_model.c @@ -17,10 +17,17 @@ #define __has_attribute(attr) 0 #endif -#if defined(HAVE_INIT_PRIORITY) -#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) -#elif __has_attribute(__constructor__) -#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) +#if __has_attribute(constructor) +#if __GNUC__ >= 9 +// Ordinarily init priorities below 101 are disallowed as they are reserved for the +// implementation. However, we are the implementation, so silence the diagnostic, +// since it doesn't apply to us. +#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" +#endif +// We're choosing init priority 90 to force our constructors to run before any +// constructors in the end user application (starting at priority 101). This value +// matches the libgcc choice for the same functions. +#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90))) #else // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that // this runs during initialization. From c825abd6b0198fb088d9752f556a70705bc99dfd Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Mon, 30 May 2022 20:54:48 +0200 Subject: [PATCH 860/908] [clang] NFC: introduce test for D126620 Signed-off-by: Matheus Izvekov Reviewed By: v.g.vassilev Differential Revision: https://reviews.llvm.org/D126674 --- .../test/CXX/temp/temp.decls/temp.class.spec/p6.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/clang/test/CXX/temp/temp.decls/temp.class.spec/p6.cpp b/clang/test/CXX/temp/temp.decls/temp.class.spec/p6.cpp index 38dde7367fea3d..7eddef1cb71f31 100644 --- a/clang/test/CXX/temp/temp.decls/temp.class.spec/p6.cpp +++ b/clang/test/CXX/temp/temp.decls/temp.class.spec/p6.cpp @@ -74,3 +74,16 @@ namespace rdar8651930 { int array0[Outer::Inner::value? 1 : -1]; int array1[Outer::Inner::value? -1 : 1]; } + +namespace print_dependent_TemplateSpecializationType { + +template struct Foo { + template struct Bar; + template struct Bar<0, T, Y> {}; + // expected-note-re@-1 {{previous declaration {{.*}} 'Bar<0UL, int, type-parameter-0-0>' is here}} + template struct Bar<0, U, Y> {}; + // expected-error@-1 {{partial specialization 'Bar<0, int, Y>' cannot be redeclared}} +}; +template struct Foo; // expected-note {{requested here}} + +} // namespace print_dependent_TemplateSpecializationType From 51002bdb5e92bf8dec286b24ff8952bda6beb3b5 Mon Sep 17 00:00:00 2001 From: eopXD Date: Mon, 30 May 2022 13:40:06 -0700 Subject: [PATCH 861/908] [RISCV] Precommit test case to show bug in RISCVISelDagToDag The optimization level should not be restored into O2. This is a pre-commit test case to show fix in D126641. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D126677 --- llvm/test/CodeGen/RISCV/isel-optnone.ll | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/isel-optnone.ll diff --git a/llvm/test/CodeGen/RISCV/isel-optnone.ll b/llvm/test/CodeGen/RISCV/isel-optnone.ll new file mode 100644 index 00000000000000..28db6d73e04bda --- /dev/null +++ b/llvm/test/CodeGen/RISCV/isel-optnone.ll @@ -0,0 +1,24 @@ +; REQUIRES: asserts +; RUN: llc < %s -O0 -mtriple=riscv64 -debug-only=isel 2>&1 | FileCheck %s + +define i32* @fooOptnone(i32* %p, i32* %q, i32** %z) #0 { +; CHECK: Changing optimization level for Function fooOptnone +; CHECL: Before: -O2 ; After: -O0 + +; CHECK: Restoring optimization level for Function fooOptnone +; CHECK: Before: -O0 ; After: -O2 +entry: + %r = load i32, i32* %p + %s = load i32, i32* %q + %y = load i32*, i32** %z + + %t0 = add i32 %r, %s + %t1 = add i32 %t0, 1 + %t2 = getelementptr i32, i32* %y, i32 1 + %t3 = getelementptr i32, i32* %t2, i32 %t1 + + ret i32* %t3 + +} + +attributes #0 = { nounwind optnone noinline } From 40e52d30332e2e7da7f08018bb91f87440c01e8d Mon Sep 17 00:00:00 2001 From: Joe Loser Date: Mon, 30 May 2022 11:59:51 -0600 Subject: [PATCH 862/908] [libc++][test] Enable some ADL robust algorithm tests Some algorithm ADL robustness tests are commented out, but work as is. Uncomment them. Differential Revision: https://reviews.llvm.org/D126670 --- .../robust_against_adl.compile.pass.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp b/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp index c06118a4ef61b5..144d39c47b84ee 100644 --- a/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp +++ b/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp @@ -60,7 +60,7 @@ TEST_CONSTEXPR_CXX20 bool all_the_algorithms() #endif (void)std::copy(first, last, first2); (void)std::copy_backward(first, last, last2); - // TODO FIXME (void)std::copy_n(first, count, first2); + (void)std::copy_n(first, count, first2); (void)std::count(first, last, value); (void)std::count_if(first, last, UnaryTrue()); (void)std::distance(first, last); @@ -75,8 +75,8 @@ TEST_CONSTEXPR_CXX20 bool all_the_algorithms() (void)std::fill(first, last, value); (void)std::fill_n(first, count, value); (void)std::find(first, last, value); - // TODO FIXME (void)std::find_end(first, last, first2, mid2); - // TODO FIXME (void)std::find_end(first, last, first2, mid2, std::equal_to()); + (void)std::find_end(first, last, first2, mid2); + (void)std::find_end(first, last, first2, mid2, std::equal_to()); (void)std::find_if(first, last, UnaryTrue()); (void)std::find_if_not(first, last, UnaryTrue()); (void)std::for_each(first, last, UnaryVoid()); @@ -110,8 +110,8 @@ TEST_CONSTEXPR_CXX20 bool all_the_algorithms() // TODO: lexicographical_compare_three_way (void)std::lower_bound(first, last, value); (void)std::lower_bound(first, last, value, std::less()); - // RELIES ON ADL SWAP (void)std::make_heap(first, last); - // RELIES ON ADL SWAP (void)std::make_heap(first, last, std::less()); + (void)std::make_heap(first, last); + (void)std::make_heap(first, last, std::less()); (void)std::max(value, value); (void)std::max(value, value, std::less()); #if TEST_STD_VER >= 11 @@ -155,17 +155,17 @@ TEST_CONSTEXPR_CXX20 bool all_the_algorithms() // RELIES ON ADL SWAP (void)std::nth_element(first, mid, last, std::less()); // RELIES ON ADL SWAP (void)std::partial_sort(first, mid, last); // RELIES ON ADL SWAP (void)std::partial_sort(first, mid, last, std::less()); - // RELIES ON ADL SWAP (void)std::partial_sort_copy(first, last, first2, mid2); - // RELIES ON ADL SWAP (void)std::partial_sort_copy(first, last, first2, mid2, std::less()); + (void)std::partial_sort_copy(first, last, first2, mid2); + (void)std::partial_sort_copy(first, last, first2, mid2, std::less()); // RELIES ON ADL SWAP (void)std::partition(first, last, UnaryTrue()); (void)std::partition_copy(first, last, first2, last2, UnaryTrue()); (void)std::partition_point(first, last, UnaryTrue()); - // RELIES ON ADL SWAP (void)std::pop_heap(first, last); - // RELIES ON ADL SWAP (void)std::pop_heap(first, last, std::less()); + (void)std::pop_heap(first, last); + (void)std::pop_heap(first, last, std::less()); // RELIES ON ADL SWAP (void)std::prev_permutation(first, last); // RELIES ON ADL SWAP (void)std::prev_permutation(first, last, std::less()); - // RELIES ON ADL SWAP (void)std::push_heap(first, last); - // RELIES ON ADL SWAP (void)std::push_heap(first, last, std::less()); + (void)std::push_heap(first, last); + (void)std::push_heap(first, last, std::less()); (void)std::remove(first, last, value); (void)std::remove_copy(first, last, first2, value); (void)std::remove_copy_if(first, last, first2, UnaryTrue()); @@ -175,7 +175,7 @@ TEST_CONSTEXPR_CXX20 bool all_the_algorithms() (void)std::replace_copy_if(first, last, first2, UnaryTrue(), value); (void)std::replace_if(first, last, UnaryTrue(), value); // RELIES ON ADL SWAP (void)std::reverse(first, last); - // RELIES ON ADL SWAP (void)std::reverse_copy(first, last, first2); + (void)std::reverse_copy(first, last, first2); // RELIES ON ADL SWAP (void)std::rotate(first, mid, last); (void)std::rotate_copy(first, mid, last, first2); (void)std::search(first, last, first2, mid2); @@ -196,8 +196,8 @@ TEST_CONSTEXPR_CXX20 bool all_the_algorithms() #endif // RELIES ON ADL SWAP (void)std::sort(first, last); // RELIES ON ADL SWAP (void)std::sort(first, last, std::less()); - // RELIES ON ADL SWAP (void)std::sort_heap(first, last); - // RELIES ON ADL SWAP (void)std::sort_heap(first, last, std::less()); + (void)std::sort_heap(first, last); + (void)std::sort_heap(first, last, std::less()); // RELIES ON ADL SWAP (void)std::stable_partition(first, last, UnaryTrue()); // RELIES ON ADL SWAP (void)std::stable_sort(first, last); // RELIES ON ADL SWAP (void)std::stable_sort(first, last, std::less()); From 2cadf84fc817a6c89eca0556a078d7879091e2a5 Mon Sep 17 00:00:00 2001 From: eopXD Date: Mon, 30 May 2022 05:06:48 -0700 Subject: [PATCH 863/908] [RISCV] Pass OptLevel to `RISCVDAGToDAGISel` correctly Originally, `OptLevel` isn't passed into the `MachineFunctionPass`. This lets the default parameter of `SelectionDAGISel`, which is `CodeGenOpt::Default`, be passed in. OptLevelChanger captures the optimization level with the parameter, and rather not the value within `TargetMachine`. This lets the optimization be unintentionally overwriten if other value than `CodeGenOpt::Default` passed. This patch fixes this by passing the optimization level rather than using the default value. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D126641 --- llvm/lib/Target/RISCV/RISCV.h | 3 ++- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 5 +++-- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 5 +++-- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +- llvm/test/CodeGen/RISCV/O0-pipeline.ll | 7 ------- llvm/test/CodeGen/RISCV/isel-optnone.ll | 6 ++---- 6 files changed, 11 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 9a8b0623875469..917837a307adaa 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -35,7 +35,8 @@ bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &MCOp, const AsmPrinter &AP); -FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM); +FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM, + CodeGenOpt::Level OptLevel); FunctionPass *createRISCVMakeCompressibleOptPass(); void initializeRISCVMakeCompressibleOptPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 26f3614770c7a2..af91cd59d9c72a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2436,6 +2436,7 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. -FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { - return new RISCVDAGToDAGISel(TM); +FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new RISCVDAGToDAGISel(TM, OptLevel); } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 955dc4d609f281..8ddc5c32a3da22 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -24,8 +24,9 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { const RISCVSubtarget *Subtarget = nullptr; public: - explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine) - : SelectionDAGISel(TargetMachine) {} + explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TargetMachine, OptLevel) {} StringRef getPassName() const override { return "RISCV DAG->DAG Pattern Instruction Selection"; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index e5bfe782d11319..0dc6445926a90d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -182,7 +182,7 @@ bool RISCVPassConfig::addPreISel() { } bool RISCVPassConfig::addInstSelector() { - addPass(createRISCVISelDag(getRISCVTargetMachine())); + addPass(createRISCVISelDag(getRISCVTargetMachine(), getOptLevel())); return false; } diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 94c775c809ecaf..bc77a8eea079bb 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -35,13 +35,6 @@ ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: RISCV DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation diff --git a/llvm/test/CodeGen/RISCV/isel-optnone.ll b/llvm/test/CodeGen/RISCV/isel-optnone.ll index 28db6d73e04bda..53a4f2288267bf 100644 --- a/llvm/test/CodeGen/RISCV/isel-optnone.ll +++ b/llvm/test/CodeGen/RISCV/isel-optnone.ll @@ -2,11 +2,9 @@ ; RUN: llc < %s -O0 -mtriple=riscv64 -debug-only=isel 2>&1 | FileCheck %s define i32* @fooOptnone(i32* %p, i32* %q, i32** %z) #0 { -; CHECK: Changing optimization level for Function fooOptnone -; CHECL: Before: -O2 ; After: -O0 +; CHECK-NOT: Changing optimization level for Function fooOptnone +; CHECK-NOT: Restoring optimization level for Function fooOptnone -; CHECK: Restoring optimization level for Function fooOptnone -; CHECK: Before: -O0 ; After: -O2 entry: %r = load i32, i32* %p %s = load i32, i32* %q From 91b8d96fd12af6120e2988639455ba35d89a8d3e Mon Sep 17 00:00:00 2001 From: River Riddle Date: Fri, 29 Apr 2022 21:13:29 -0700 Subject: [PATCH 864/908] [mlir:PDLL] Add proper support for operation result type inference This allows for the results of operations to be inferred in certain contexts, and matches the support in PDL for result type inference. The main two initial circumstances are when used as a replacement of another operation, or when the operation being created implements InferTypeOpInterface. Differential Revision: https://reviews.llvm.org/D124782 --- mlir/docs/PDLL.md | 97 +++++++++++++-- mlir/include/mlir/Tools/PDLL/ODS/Context.h | 3 +- mlir/include/mlir/Tools/PDLL/ODS/Dialect.h | 3 +- mlir/include/mlir/Tools/PDLL/ODS/Operation.h | 15 ++- mlir/lib/Tools/PDLL/ODS/Context.cpp | 9 +- mlir/lib/Tools/PDLL/ODS/Dialect.cpp | 10 +- mlir/lib/Tools/PDLL/ODS/Operation.cpp | 3 +- mlir/lib/Tools/PDLL/Parser/Parser.cpp | 123 +++++++++++++++++-- mlir/test/mlir-pdll/Parser/expr-failure.pdll | 30 +++++ 9 files changed, 260 insertions(+), 33 deletions(-) diff --git a/mlir/docs/PDLL.md b/mlir/docs/PDLL.md index 11ab5831d2b6a7..984ba2ed26f14f 100644 --- a/mlir/docs/PDLL.md +++ b/mlir/docs/PDLL.md @@ -577,9 +577,10 @@ let root = op<>; #### Operands The operands section corresponds to the operands of the operation. This section -of an operation expression may be elided, in which case the operands are not -constrained in any way. When present, the operands of an operation expression -are interpreted in the following ways: +of an operation expression may be elided, which within a `match` section means +that the operands are not constrained in any way. If elided within a `rewrite` +section, the operation is treated as having no operands. When present, the +operands of an operation expression are interpreted in the following ways: 1) A single instance of type `ValueRange`: @@ -612,10 +613,11 @@ let root = op(call: Value, args: ValueRange); #### Results -The results section corresponds to the result types of the operation. This -section of an operation expression may be elided, in which case the result types -are not constrained in any way. When present, the result types of an operation -expression are interpreted in the following ways: +The results section corresponds to the result types of the operation. This section +of an operation expression may be elided, which within a `match` section means +that the result types are not constrained in any way. If elided within a `rewrite` +section, the results of the operation are [inferred](#inferred-results). When present, +the result types of an operation expression are interpreted in the following ways: 1) A single instance of type `TypeRange`: @@ -646,6 +648,87 @@ We can match the result types as so: let root = op -> (result: Type, otherResults: TypeRange); ``` +#### Inferred Results + +Within the `rewrite` section of a pattern, the result types of an +operation are inferred if they are elided or otherwise not +previously bound. The ["variable binding"](#variable-binding) section above +discusses the concept of "binding" in more detail. Below are various examples +that build upon this to help showcase how a result type may be "bound": + +* Binding to a [constant](#type-expression): + +```pdll +op -> (type<"i32">); +``` + +* Binding to types within the `match` section: + +```pdll +Pattern { + replace op -> (resultTypes: TypeRange) + with op -> (resultTypes); +} +``` + +* Binding to previously inferred types: + +```pdll +Pattern { + rewrite root: Op with { + // `resultTypes` here is *not* yet bound, and will be inferred when + // creating `dialect.op`. Any uses of `resultTypes` after this expression, + // will use the types inferred when creating this operation. + op -> (resultTypes: TypeRange); + + // `resultTypes` here is bound to the types inferred when creating `dialect.op`. + op -> (resultTypes); + }; +} +``` + +* Binding to a [`Native Rewrite`](#native-rewriters) method result: + +```pdll +Rewrite BuildTypes() -> TypeRange; + +Pattern { + rewrite root: Op with { + op -> (BuildTypes()); + }; +} +``` + +Below are the set of contexts in which result type inferrence is supported: + +##### Inferred Results of Replacement Operation + +Replacements have the invariant that the types of the replacement values must +match the result types of the input operation. This means that when replacing +one operation with another, the result types of the replacement operation may +be inferred from the result types of the operation being replaced. For example, +consider the following pattern: + +```pdll +Pattern => replace op with op; +``` + +This pattern could be written in a more explicit way as: + +```pdll +Pattern { + replace op -> (resultTypes: TypeRange) + with op -> (resultTypes); +} +``` + +##### Inferred Results with InferTypeOpInterface + +`InferTypeOpInterface` is an interface that enables operations to infer its result +types from its input attributes, operands, regions, etc. When the result types of +an operation cannot be inferred from any other context, this interface is invoked +to infer the result types of the operation. + #### Attributes The attributes section of the operation expression corresponds to the attribute diff --git a/mlir/include/mlir/Tools/PDLL/ODS/Context.h b/mlir/include/mlir/Tools/PDLL/ODS/Context.h index ea3751eac35419..6baa90af44adf4 100644 --- a/mlir/include/mlir/Tools/PDLL/ODS/Context.h +++ b/mlir/include/mlir/Tools/PDLL/ODS/Context.h @@ -62,7 +62,8 @@ class Context { /// and a boolean indicating if the operation newly inserted (false if the /// operation already existed). std::pair - insertOperation(StringRef name, StringRef summary, StringRef desc, SMLoc loc); + insertOperation(StringRef name, StringRef summary, StringRef desc, + bool supportsResultTypeInferrence, SMLoc loc); /// Lookup an operation registered with the given name, or null if no /// operation with that name is registered. diff --git a/mlir/include/mlir/Tools/PDLL/ODS/Dialect.h b/mlir/include/mlir/Tools/PDLL/ODS/Dialect.h index f75d497867b8ce..de8181843e84c0 100644 --- a/mlir/include/mlir/Tools/PDLL/ODS/Dialect.h +++ b/mlir/include/mlir/Tools/PDLL/ODS/Dialect.h @@ -34,7 +34,8 @@ class Dialect { /// and a boolean indicating if the operation newly inserted (false if the /// operation already existed). std::pair - insertOperation(StringRef name, StringRef summary, StringRef desc, SMLoc loc); + insertOperation(StringRef name, StringRef summary, StringRef desc, + bool supportsResultTypeInferrence, SMLoc loc); /// Lookup an operation registered with the given name, or null if no /// operation with that name is registered. diff --git a/mlir/include/mlir/Tools/PDLL/ODS/Operation.h b/mlir/include/mlir/Tools/PDLL/ODS/Operation.h index c5b86e1733d0be..8ad36a6872b7c0 100644 --- a/mlir/include/mlir/Tools/PDLL/ODS/Operation.h +++ b/mlir/include/mlir/Tools/PDLL/ODS/Operation.h @@ -75,6 +75,12 @@ class OperandOrResult { /// Return the name of this value. StringRef getName() const { return name; } + /// Returns true if this value is variable length, i.e. if it is Variadic or + /// Optional. + bool isVariableLength() const { + return variableLengthKind != VariableLengthKind::Single; + } + /// Returns true if this value is variadic (Note this is false if the value is /// Optional). bool isVariadic() const { @@ -157,8 +163,12 @@ class Operation { /// Returns the results of this operation. ArrayRef getResults() const { return results; } + /// Return if the operation is known to support result type inferrence. + bool hasResultTypeInferrence() const { return supportsTypeInferrence; } + private: - Operation(StringRef name, StringRef summary, StringRef desc, SMLoc loc); + Operation(StringRef name, StringRef summary, StringRef desc, + bool supportsTypeInferrence, SMLoc loc); /// The name of the operation. std::string name; @@ -167,6 +177,9 @@ class Operation { std::string summary; std::string description; + /// Flag indicating if the operation is known to support type inferrence. + bool supportsTypeInferrence; + /// The source location of this operation. SMRange location; diff --git a/mlir/lib/Tools/PDLL/ODS/Context.cpp b/mlir/lib/Tools/PDLL/ODS/Context.cpp index 8186af9fe1b26f..00f7cb26b432d2 100644 --- a/mlir/lib/Tools/PDLL/ODS/Context.cpp +++ b/mlir/lib/Tools/PDLL/ODS/Context.cpp @@ -59,13 +59,12 @@ const Dialect *Context::lookupDialect(StringRef name) const { return it == dialects.end() ? nullptr : &*it->second; } -std::pair Context::insertOperation(StringRef name, - StringRef summary, - StringRef desc, - SMLoc loc) { +std::pair +Context::insertOperation(StringRef name, StringRef summary, StringRef desc, + bool supportsResultTypeInferrence, SMLoc loc) { std::pair dialectAndName = name.split('.'); return insertDialect(dialectAndName.first) - .insertOperation(name, summary, desc, loc); + .insertOperation(name, summary, desc, supportsResultTypeInferrence, loc); } const Operation *Context::lookupOperation(StringRef name) const { diff --git a/mlir/lib/Tools/PDLL/ODS/Dialect.cpp b/mlir/lib/Tools/PDLL/ODS/Dialect.cpp index ce9c23421c0e92..2e084c5d6cfd64 100644 --- a/mlir/lib/Tools/PDLL/ODS/Dialect.cpp +++ b/mlir/lib/Tools/PDLL/ODS/Dialect.cpp @@ -21,15 +21,15 @@ using namespace mlir::pdll::ods; Dialect::Dialect(StringRef name) : name(name.str()) {} Dialect::~Dialect() = default; -std::pair Dialect::insertOperation(StringRef name, - StringRef summary, - StringRef desc, - llvm::SMLoc loc) { +std::pair +Dialect::insertOperation(StringRef name, StringRef summary, StringRef desc, + bool supportsResultTypeInferrence, llvm::SMLoc loc) { std::unique_ptr &operation = operations[name]; if (operation) return std::make_pair(&*operation, /*wasInserted*/ false); - operation.reset(new Operation(name, summary, desc, loc)); + operation.reset( + new Operation(name, summary, desc, supportsResultTypeInferrence, loc)); return std::make_pair(&*operation, /*wasInserted*/ true); } diff --git a/mlir/lib/Tools/PDLL/ODS/Operation.cpp b/mlir/lib/Tools/PDLL/ODS/Operation.cpp index 121c6c8c4c886b..3b8a3a9e973333 100644 --- a/mlir/lib/Tools/PDLL/ODS/Operation.cpp +++ b/mlir/lib/Tools/PDLL/ODS/Operation.cpp @@ -18,8 +18,9 @@ using namespace mlir::pdll::ods; //===----------------------------------------------------------------------===// Operation::Operation(StringRef name, StringRef summary, StringRef desc, - llvm::SMLoc loc) + bool supportsTypeInferrence, llvm::SMLoc loc) : name(name.str()), summary(summary.str()), + supportsTypeInferrence(supportsTypeInferrence), location(loc, llvm::SMLoc::getFromPointer(loc.getPointer() + 1)) { llvm::raw_string_ostream descOS(description); raw_indented_ostream(descOS).printReindented(desc.rtrim(" \t")); diff --git a/mlir/lib/Tools/PDLL/Parser/Parser.cpp b/mlir/lib/Tools/PDLL/Parser/Parser.cpp index 1672c11ca72948..49eaa41672f357 100644 --- a/mlir/lib/Tools/PDLL/Parser/Parser.cpp +++ b/mlir/lib/Tools/PDLL/Parser/Parser.cpp @@ -76,6 +76,19 @@ class Parser { Rewrite, }; + /// The current specification context of an operations result type. This + /// indicates how the result types of an operation may be inferred. + enum class OpResultTypeContext { + /// The result types of the operation are not known to be inferred. + Explicit, + /// The result types of the operation are inferred from the root input of a + /// `replace` statement. + Replacement, + /// The result types of the operation are inferred by using the + /// `InferTypeOpInterface` interface provided by the operation. + Interface, + }; + //===--------------------------------------------------------------------===// // Parsing //===--------------------------------------------------------------------===// @@ -280,7 +293,9 @@ class Parser { FailureOr parseMemberAccessExpr(ast::Expr *parentExpr); FailureOr parseOperationName(bool allowEmptyName = false); FailureOr parseWrappedOperationName(bool allowEmptyName); - FailureOr parseOperationExpr(); + FailureOr + parseOperationExpr(OpResultTypeContext inputResultTypeContext = + OpResultTypeContext::Explicit); FailureOr parseTupleExpr(); FailureOr parseTypeExpr(); FailureOr parseUnderscoreExpr(); @@ -378,6 +393,7 @@ class Parser { StringRef name, SMRange loc); FailureOr createOperationExpr(SMRange loc, const ast::OpNameDecl *name, + OpResultTypeContext resultTypeContext, MutableArrayRef operands, MutableArrayRef attributes, MutableArrayRef results); @@ -388,6 +404,8 @@ class Parser { LogicalResult validateOperationResults(SMRange loc, Optional name, const ods::Operation *odsOp, MutableArrayRef results); + void checkOperationResultTypeInferrence(SMRange loc, StringRef name, + const ods::Operation *odsOp); LogicalResult validateOperationOperandsOrResults( StringRef groupName, SMRange loc, Optional odsOpLoc, Optional name, MutableArrayRef values, @@ -795,11 +813,15 @@ void Parser::processTdIncludeRecords(llvm::RecordKeeper &tdRecords, for (llvm::Record *def : tdRecords.getAllDerivedDefinitions("Op")) { tblgen::Operator op(def); + // Check to see if this operation is known to support type inferrence. + bool supportsResultTypeInferrence = + op.getTrait("::mlir::InferTypeOpInterface::Trait"); + bool inserted = false; ods::Operation *odsOp = nullptr; - std::tie(odsOp, inserted) = - odsContext.insertOperation(op.getOperationName(), op.getSummary(), - op.getDescription(), op.getLoc().front()); + std::tie(odsOp, inserted) = odsContext.insertOperation( + op.getOperationName(), op.getSummary(), op.getDescription(), + supportsResultTypeInferrence, op.getLoc().front()); // Ignore operations that have already been added. if (!inserted) @@ -1917,7 +1939,8 @@ Parser::parseWrappedOperationName(bool allowEmptyName) { return opNameDecl; } -FailureOr Parser::parseOperationExpr() { +FailureOr +Parser::parseOperationExpr(OpResultTypeContext inputResultTypeContext) { SMRange loc = curToken.getLoc(); consumeToken(Token::kw_op); @@ -1994,13 +2017,23 @@ FailureOr Parser::parseOperationExpr() { return failure(); } - // Check for the optional list of result types. + // Handle the result types of the operation. SmallVector resultTypes; + OpResultTypeContext resultTypeContext = inputResultTypeContext; + + // Check for an explicit list of result types. if (consumeIf(Token::arrow)) { if (failed(parseToken(Token::l_paren, "expected `(` before operation result type list"))) return failure(); + // If result types are provided, initially assume that the operation does + // not rely on type inferrence. We don't assert that it isn't, because we + // may be inferring the value of some type/type range variables, but given + // that these variables may be defined in calls we can't always discern when + // this is the case. + resultTypeContext = OpResultTypeContext::Explicit; + // Handle the case of an empty result list. if (!consumeIf(Token::r_paren)) { do { @@ -2027,10 +2060,14 @@ FailureOr Parser::parseOperationExpr() { // "unconstrained results". resultTypes.push_back(createImplicitRangeVar( ast::TypeRangeConstraintDecl::create(ctx, loc), typeRangeTy)); + } else if (resultTypeContext == OpResultTypeContext::Explicit) { + // If the result list isn't specified and we are in a rewrite, try to infer + // them at runtime instead. + resultTypeContext = OpResultTypeContext::Interface; } - return createOperationExpr(loc, *opNameDecl, operands, attributes, - resultTypes); + return createOperationExpr(loc, *opNameDecl, resultTypeContext, operands, + attributes, resultTypes); } FailureOr Parser::parseTupleExpr() { @@ -2294,7 +2331,13 @@ FailureOr Parser::parseReplaceStmt() { "expected `)` after replacement values"))) return failure(); } else { - FailureOr replExpr = parseExpr(); + // Handle replacement with an operation uniquely, as the replacement + // operation supports type inferrence from the root operation. + FailureOr replExpr; + if (curToken.is(Token::kw_op)) + replExpr = parseOperationExpr(OpResultTypeContext::Replacement); + else + replExpr = parseExpr(); if (failed(replExpr)) return failure(); replValues.emplace_back(*replExpr); @@ -2710,6 +2753,7 @@ FailureOr Parser::validateMemberAccess(ast::Expr *parentExpr, FailureOr Parser::createOperationExpr( SMRange loc, const ast::OpNameDecl *name, + OpResultTypeContext resultTypeContext, MutableArrayRef operands, MutableArrayRef attributes, MutableArrayRef results) { @@ -2731,9 +2775,22 @@ FailureOr Parser::createOperationExpr( } } - // Verify the result types. - if (failed(validateOperationResults(loc, opNameRef, odsOp, results))) - return failure(); + assert( + (resultTypeContext == OpResultTypeContext::Explicit || results.empty()) && + "unexpected inferrence when results were explicitly specified"); + + // If we aren't relying on type inferrence, or explicit results were provided, + // validate them. + if (resultTypeContext == OpResultTypeContext::Explicit) { + if (failed(validateOperationResults(loc, opNameRef, odsOp, results))) + return failure(); + + // Validate the use of interface based type inferrence for this operation. + } else if (resultTypeContext == OpResultTypeContext::Interface) { + assert(opNameRef && + "expected valid operation name when inferring operation results"); + checkOperationResultTypeInferrence(loc, *opNameRef, odsOp); + } return ast::OperationExpr::create(ctx, loc, name, operands, results, attributes); @@ -2758,6 +2815,48 @@ Parser::validateOperationResults(SMRange loc, Optional name, results, odsOp ? odsOp->getResults() : llvm::None, typeTy, typeRangeTy); } +void Parser::checkOperationResultTypeInferrence(SMRange loc, StringRef opName, + const ods::Operation *odsOp) { + // If the operation might not have inferrence support, emit a warning to the + // user. We don't emit an error because the interface might be added to the + // operation at runtime. It's rare, but it could still happen. We emit a + // warning here instead. + + // Handle inferrence warnings for unknown operations. + if (!odsOp) { + ctx.getDiagEngine().emitWarning( + loc, llvm::formatv( + "operation result types are marked to be inferred, but " + "`{0}` is unknown. Ensure that `{0}` supports zero " + "results or implements `InferTypeOpInterface`. Include " + "the ODS definition of this operation to remove this warning.", + opName)); + return; + } + + // Handle inferrence warnings for known operations that expected at least one + // result, but don't have inference support. An elided results list can mean + // "zero-results", and we don't want to warn when that is the expected + // behavior. + bool requiresInferrence = + llvm::any_of(odsOp->getResults(), [](const ods::OperandOrResult &result) { + return !result.isVariableLength(); + }); + if (requiresInferrence && !odsOp->hasResultTypeInferrence()) { + ast::InFlightDiagnostic diag = ctx.getDiagEngine().emitWarning( + loc, + llvm::formatv("operation result types are marked to be inferred, but " + "`{0}` does not provide an implementation of " + "`InferTypeOpInterface`. Ensure that `{0}` attaches " + "`InferTypeOpInterface` at runtime, or add support to " + "the ODS definition to remove this warning.", + opName)); + diag->attachNote(llvm::formatv("see the definition of `{0}` here", opName), + odsOp->getLoc()); + return; + } +} + LogicalResult Parser::validateOperationOperandsOrResults( StringRef groupName, SMRange loc, Optional odsOpLoc, Optional name, MutableArrayRef values, diff --git a/mlir/test/mlir-pdll/Parser/expr-failure.pdll b/mlir/test/mlir-pdll/Parser/expr-failure.pdll index 4a766c817d23fc..de8c6163895e34 100644 --- a/mlir/test/mlir-pdll/Parser/expr-failure.pdll +++ b/mlir/test/mlir-pdll/Parser/expr-failure.pdll @@ -296,6 +296,36 @@ Pattern { // ----- +Pattern { + // CHECK: warning: operation result types are marked to be inferred, but + // CHECK-SAME: `test.unknown_inferred_result_op` is unknown. + // CHECK-SAME: Ensure that `test.unknown_inferred_result_op` supports zero + // CHECK-SAME: results or implements `InferTypeOpInterface`. + // CHECK-SAME: Include the ODS definition of this operation to remove this + // CHECK-SAME: warning. + rewrite _: Op with { + op; + }; +} + +// ----- + +#include "include/ops.td" + +Pattern { + // CHECK: warning: operation result types are marked to be inferred, but + // CHECK-SAME: `test.multiple_single_result` does not provide an implementation + // CHECK-SAME: of `InferTypeOpInterface`. Ensure that `test.multiple_single_result` + // CHECK-SAME: attaches `InferTypeOpInterface` at runtime, or add support + // CHECK-SAME: to the ODS definition to remove this warning. + // CHECK: see the definition of `test.multiple_single_result` here + rewrite _: Op with { + op; + }; +} + +// ----- + //===----------------------------------------------------------------------===// // `type` Expr //===----------------------------------------------------------------------===// From 01652d889cf31708bc7ebbf18ef0a4d925112bc7 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Sat, 30 Apr 2022 00:29:49 -0700 Subject: [PATCH 865/908] [mlir:PDLL-LSP] Add a custom LSP command for viewing the output of PDLL This commit adds a new PDLL specific LSP command, pdll.viewOutput, that allows for viewing the intermediate outputs of a given PDLL file. The available intermediate forms currently mirror those in mlir-pdll, namely: AST, MLIR, CPP. This is extremely useful for a developer of PDLL, as it simplifies various testing, and is also quite useful for users as they can easily view what is actually being generated for their PDLL files. This new command is added to the vscode client, and is available in the right client context menu of PDLL files, or via the vscode command palette. Differential Revision: https://reviews.llvm.org/D124783 --- .../CompilationDatabase.cpp | 2 +- .../Tools/mlir-pdll-lsp-server/CMakeLists.txt | 2 + .../Tools/mlir-pdll-lsp-server/LSPServer.cpp | 21 ++++- .../Tools/mlir-pdll-lsp-server/PDLLServer.cpp | 69 ++++++++++++++++- .../Tools/mlir-pdll-lsp-server/PDLLServer.h | 7 ++ .../Tools/mlir-pdll-lsp-server/Protocol.cpp | 77 +++++++++++++++++++ .../lib/Tools/mlir-pdll-lsp-server/Protocol.h | 69 +++++++++++++++++ .../mlir-pdll-lsp-server/view-output.test | 43 +++++++++++ mlir/utils/vscode/package.json | 15 +++- .../vscode/src/PDLL/commands/viewOutput.ts | 66 ++++++++++++++++ mlir/utils/vscode/src/PDLL/pdll.ts | 12 +++ mlir/utils/vscode/src/command.ts | 25 ++++++ mlir/utils/vscode/src/extension.ts | 2 + mlir/utils/vscode/src/mlirContext.ts | 15 ++++ 14 files changed, 421 insertions(+), 4 deletions(-) create mode 100644 mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.cpp create mode 100644 mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.h create mode 100644 mlir/test/mlir-pdll-lsp-server/view-output.test create mode 100644 mlir/utils/vscode/src/PDLL/commands/viewOutput.ts create mode 100644 mlir/utils/vscode/src/PDLL/pdll.ts create mode 100644 mlir/utils/vscode/src/command.ts diff --git a/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp b/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp index 31a6a308252929..229e50a5de2354 100644 --- a/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp +++ b/mlir/lib/Tools/lsp-server-support/CompilationDatabase.cpp @@ -8,7 +8,7 @@ #include "CompilationDatabase.h" #include "../lsp-server-support/Logging.h" -#include "../lsp-server-support/Protocol.h" +#include "Protocol.h" #include "mlir/Support/FileUtilities.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt b/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt index f9ff5470eca0ea..bf25b7e0a64f3c 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/CMakeLists.txt @@ -1,12 +1,14 @@ llvm_add_library(MLIRPdllLspServerLib LSPServer.cpp PDLLServer.cpp + Protocol.cpp MlirPdllLspServerMain.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Tools/mlir-pdll-lsp-server LINK_LIBS PUBLIC + MLIRPDLLCodeGen MLIRPDLLParser MLIRLspServerSupportLib ) diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.cpp index 5f4967ffb8e3ad..ff955a7fc30ea8 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.cpp +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/LSPServer.cpp @@ -9,9 +9,9 @@ #include "LSPServer.h" #include "../lsp-server-support/Logging.h" -#include "../lsp-server-support/Protocol.h" #include "../lsp-server-support/Transport.h" #include "PDLLServer.h" +#include "Protocol.h" #include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/StringMap.h" @@ -82,6 +82,12 @@ struct LSPServer { void onSignatureHelp(const TextDocumentPositionParams ¶ms, Callback reply); + //===--------------------------------------------------------------------===// + // PDLL View Output + + void onPDLLViewOutput(const PDLLViewOutputParams ¶ms, + Callback> reply); + //===--------------------------------------------------------------------===// // Fields //===--------------------------------------------------------------------===// @@ -248,6 +254,15 @@ void LSPServer::onSignatureHelp(const TextDocumentPositionParams ¶ms, reply(server.getSignatureHelp(params.textDocument.uri, params.position)); } +//===----------------------------------------------------------------------===// +// PDLL ViewOutput + +void LSPServer::onPDLLViewOutput( + const PDLLViewOutputParams ¶ms, + Callback> reply) { + reply(server.getPDLLViewOutput(params.uri, params.kind)); +} + //===----------------------------------------------------------------------===// // Entry Point //===----------------------------------------------------------------------===// @@ -296,6 +311,10 @@ LogicalResult mlir::lsp::runPdllLSPServer(PDLLServer &server, messageHandler.method("textDocument/signatureHelp", &lspServer, &LSPServer::onSignatureHelp); + // PDLL ViewOutput + messageHandler.method("pdll/viewOutput", &lspServer, + &LSPServer::onPDLLViewOutput); + // Diagnostics lspServer.publishDiagnostics = messageHandler.outgoingNotification( diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp index cbfd421c722241..215a0e67452419 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp @@ -10,11 +10,14 @@ #include "../lsp-server-support/CompilationDatabase.h" #include "../lsp-server-support/Logging.h" -#include "../lsp-server-support/Protocol.h" #include "../lsp-server-support/SourceMgrUtils.h" +#include "Protocol.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/Tools/PDLL/AST/Context.h" #include "mlir/Tools/PDLL/AST/Nodes.h" #include "mlir/Tools/PDLL/AST/Types.h" +#include "mlir/Tools/PDLL/CodeGen/CPPGen.h" +#include "mlir/Tools/PDLL/CodeGen/MLIRGen.h" #include "mlir/Tools/PDLL/ODS/Constraint.h" #include "mlir/Tools/PDLL/ODS/Context.h" #include "mlir/Tools/PDLL/ODS/Dialect.h" @@ -304,6 +307,12 @@ struct PDLDocument { lsp::SignatureHelp getSignatureHelp(const lsp::URIForFile &uri, const lsp::Position &helpPos); + //===--------------------------------------------------------------------===// + // PDLL ViewOutput + //===--------------------------------------------------------------------===// + + void getPDLLViewOutput(raw_ostream &os, lsp::PDLLViewOutputKind kind); + //===--------------------------------------------------------------------===// // Fields //===--------------------------------------------------------------------===// @@ -1085,6 +1094,39 @@ lsp::SignatureHelp PDLDocument::getSignatureHelp(const lsp::URIForFile &uri, return signatureHelp; } +//===----------------------------------------------------------------------===// +// PDLL ViewOutput +//===----------------------------------------------------------------------===// + +void PDLDocument::getPDLLViewOutput(raw_ostream &os, + lsp::PDLLViewOutputKind kind) { + if (failed(astModule)) + return; + if (kind == lsp::PDLLViewOutputKind::AST) { + (*astModule)->print(os); + return; + } + + // Generate the MLIR for the ast module. We also capture diagnostics here to + // show to the user, which may be useful if PDLL isn't capturing constraints + // expected by PDL. + MLIRContext mlirContext; + SourceMgrDiagnosticHandler diagHandler(sourceMgr, &mlirContext, os); + OwningOpRef pdlModule = + codegenPDLLToMLIR(&mlirContext, astContext, sourceMgr, **astModule); + if (!pdlModule) + return; + if (kind == lsp::PDLLViewOutputKind::MLIR) { + pdlModule->print(os, OpPrintingFlags().enableDebugInfo()); + return; + } + + // Otherwise, generate the output for C++. + assert(kind == lsp::PDLLViewOutputKind::CPP && + "unexpected PDLLViewOutputKind"); + codegenPDLLToCPP(**astModule, *pdlModule, os); +} + //===----------------------------------------------------------------------===// // PDLTextFileChunk //===----------------------------------------------------------------------===// @@ -1148,6 +1190,7 @@ class PDLTextFile { lsp::Position completePos); lsp::SignatureHelp getSignatureHelp(const lsp::URIForFile &uri, lsp::Position helpPos); + lsp::PDLLViewOutputResult getPDLLViewOutput(lsp::PDLLViewOutputKind kind); private: /// Find the PDL document that contains the given position, and update the @@ -1321,6 +1364,21 @@ lsp::SignatureHelp PDLTextFile::getSignatureHelp(const lsp::URIForFile &uri, return getChunkFor(helpPos).document.getSignatureHelp(uri, helpPos); } +lsp::PDLLViewOutputResult +PDLTextFile::getPDLLViewOutput(lsp::PDLLViewOutputKind kind) { + lsp::PDLLViewOutputResult result; + { + llvm::raw_string_ostream outputOS(result.output); + llvm::interleave( + llvm::make_pointee_range(chunks), + [&](PDLTextFileChunk &chunk) { + chunk.document.getPDLLViewOutput(outputOS, kind); + }, + [&] { outputOS << "\n// -----\n\n"; }); + } + return result; +} + PDLTextFileChunk &PDLTextFile::getChunkFor(lsp::Position &pos) { if (chunks.size() == 1) return *chunks.front(); @@ -1439,3 +1497,12 @@ lsp::SignatureHelp lsp::PDLLServer::getSignatureHelp(const URIForFile &uri, return fileIt->second->getSignatureHelp(uri, helpPos); return SignatureHelp(); } + +Optional +lsp::PDLLServer::getPDLLViewOutput(const URIForFile &uri, + PDLLViewOutputKind kind) { + auto fileIt = impl->files.find(uri.file()); + if (fileIt != impl->files.end()) + return fileIt->second->getPDLLViewOutput(kind); + return llvm::None; +} diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.h b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.h index 12716f491e2d17..0fecc35b8f5a53 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.h +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.h @@ -18,6 +18,8 @@ namespace mlir { namespace lsp { struct Diagnostic; class CompilationDatabase; +struct PDLLViewOutputResult; +enum class PDLLViewOutputKind; struct CompletionList; struct DocumentLink; struct DocumentSymbol; @@ -88,6 +90,11 @@ class PDLLServer { SignatureHelp getSignatureHelp(const URIForFile &uri, const Position &helpPos); + /// Get the output of the given PDLL file, or None if there is no valid + /// output. + Optional getPDLLViewOutput(const URIForFile &uri, + PDLLViewOutputKind kind); + private: struct Impl; std::unique_ptr impl; diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.cpp new file mode 100644 index 00000000000000..8b767ee31d0741 --- /dev/null +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.cpp @@ -0,0 +1,77 @@ +//===--- Protocol.cpp - Language Server Protocol Implementation -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the serialization code for the PDLL specific LSP structs. +// +//===----------------------------------------------------------------------===// + +#include "Protocol.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::lsp; + +// Helper that doesn't treat `null` and absent fields as failures. +template +static bool mapOptOrNull(const llvm::json::Value ¶ms, + llvm::StringLiteral prop, T &out, + llvm::json::Path path) { + const llvm::json::Object *o = params.getAsObject(); + assert(o); + + // Field is missing or null. + auto *v = o->get(prop); + if (!v || v->getAsNull().hasValue()) + return true; + return fromJSON(*v, out, path.field(prop)); +} + +//===----------------------------------------------------------------------===// +// PDLLViewOutputParams +//===----------------------------------------------------------------------===// + +bool mlir::lsp::fromJSON(const llvm::json::Value &value, + PDLLViewOutputKind &result, llvm::json::Path path) { + if (Optional str = value.getAsString()) { + if (*str == "ast") { + result = PDLLViewOutputKind::AST; + return true; + } + if (*str == "mlir") { + result = PDLLViewOutputKind::MLIR; + return true; + } + if (*str == "cpp") { + result = PDLLViewOutputKind::CPP; + return true; + } + } + return false; +} + +bool mlir::lsp::fromJSON(const llvm::json::Value &value, + PDLLViewOutputParams &result, llvm::json::Path path) { + llvm::json::ObjectMapper o(value, path); + return o && o.map("uri", result.uri) && o.map("kind", result.kind); +} + +//===----------------------------------------------------------------------===// +// PDLLViewOutputResult +//===----------------------------------------------------------------------===// + +llvm::json::Value mlir::lsp::toJSON(const PDLLViewOutputResult &value) { + return llvm::json::Object{{"output", value.output}}; +} diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.h b/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.h new file mode 100644 index 00000000000000..3de2ae0f56c915 --- /dev/null +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/Protocol.h @@ -0,0 +1,69 @@ +//===--- Protocol.h - Language Server Protocol Implementation ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains structs for LSP commands that are specific to the PDLL +// server. +// +// Each struct has a toJSON and fromJSON function, that converts between +// the struct and a JSON representation. (See JSON.h) +// +// Some structs also have operator<< serialization. This is for debugging and +// tests, and is not generally machine-readable. +// +//===----------------------------------------------------------------------===// + +#ifndef LIB_MLIR_TOOLS_MLIRPDLLLSPSERVER_PROTOCOL_H_ +#define LIB_MLIR_TOOLS_MLIRPDLLLSPSERVER_PROTOCOL_H_ + +#include "../lsp-server-support/Protocol.h" + +namespace mlir { +namespace lsp { +//===----------------------------------------------------------------------===// +// PDLLViewOutputParams +//===----------------------------------------------------------------------===// + +/// The type of output to view from PDLL. +enum class PDLLViewOutputKind { + AST, + MLIR, + CPP, +}; + +/// Represents the parameters used when viewing the output of a PDLL file. +struct PDLLViewOutputParams { + /// The URI of the document to view the output of. + URIForFile uri; + + /// The kind of output to generate. + PDLLViewOutputKind kind; +}; + +/// Add support for JSON serialization. +bool fromJSON(const llvm::json::Value &value, PDLLViewOutputKind &result, + llvm::json::Path path); +bool fromJSON(const llvm::json::Value &value, PDLLViewOutputParams &result, + llvm::json::Path path); + +//===----------------------------------------------------------------------===// +// PDLLViewOutputResult +//===----------------------------------------------------------------------===// + +/// Represents the result of viewing the output of a PDLL file. +struct PDLLViewOutputResult { + /// The string representation of the output. + std::string output; +}; + +/// Add support for JSON serialization. +llvm::json::Value toJSON(const PDLLViewOutputResult &value); + +} // namespace lsp +} // namespace mlir + +#endif diff --git a/mlir/test/mlir-pdll-lsp-server/view-output.test b/mlir/test/mlir-pdll-lsp-server/view-output.test new file mode 100644 index 00000000000000..c45ac2ed810fed --- /dev/null +++ b/mlir/test/mlir-pdll-lsp-server/view-output.test @@ -0,0 +1,43 @@ +// RUN: mlir-pdll-lsp-server -lit-test < %s | FileCheck -strict-whitespace %s +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"pdll","capabilities":{},"trace":"off"}} +// ----- +{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{ + "uri":"test:///foo.pdll", + "languageId":"pdll", + "version":1, + "text":"Pattern TestPat => erase op;" +}}} +// ----- +{"jsonrpc":"2.0","id":1,"method":"pdll/viewOutput","params":{ + "uri":"test:///foo.pdll", + "kind":"ast" +}} +// CHECK: "id": 1 +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": { +// CHECK-NEXT: "output": "-Module{{.*}}PatternDecl{{.*}}Name{{.*}}\n" +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":2,"method":"pdll/viewOutput","params":{ + "uri":"test:///foo.pdll", + "kind":"mlir" +}} +// CHECK: "id": 2 +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": { +// CHECK-NEXT: "output": "module {\n pdl.pattern @TestPat {{.*}}\n" +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":3,"method":"pdll/viewOutput","params":{ + "uri":"test:///foo.pdll", + "kind":"cpp" +}} +// CHECK: "id": 3 +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": { +// CHECK-NEXT: "output": "{{.*}}struct TestPat : ::mlir::PDLPatternModule{{.*}}\n" +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":3,"method":"shutdown"} +// ----- +{"jsonrpc":"2.0","method":"exit"} diff --git a/mlir/utils/vscode/package.json b/mlir/utils/vscode/package.json index cca2ba1ace033d..d0a0493ff54686 100644 --- a/mlir/utils/vscode/package.json +++ b/mlir/utils/vscode/package.json @@ -182,7 +182,20 @@ { "command": "mlir.restart", "title": "mlir: Restart language server" + }, + { + "command": "mlir.viewPDLLOutput", + "title": "mlir-pdll: View PDLL output" } - ] + ], + "menus": { + "editor/context": [ + { + "command": "mlir.viewPDLLOutput", + "group": "z_commands", + "when": "editorLangId == pdll" + } + ] + } } } diff --git a/mlir/utils/vscode/src/PDLL/commands/viewOutput.ts b/mlir/utils/vscode/src/PDLL/commands/viewOutput.ts new file mode 100644 index 00000000000000..4a666a0a2c07d3 --- /dev/null +++ b/mlir/utils/vscode/src/PDLL/commands/viewOutput.ts @@ -0,0 +1,66 @@ +import * as vscode from 'vscode' + +import {Command} from '../../command'; +import {MLIRContext} from '../../mlirContext'; + +/** + * The parameters to the pdll/viewOutput command. These parameters are: + * - `uri`: The URI of the file to view. + * - `kind`: The kind of the output to generate. + */ +type ViewOutputParams = Partial<{uri : string, kind : string}>; + +/** + * The output of the commands: + * - `output`: The output string of the command, e.g. a .mlir PDL string. + */ +type ViewOutputResult = Partial<{output : string}>; + +/** + * A command that displays the output of the current PDLL document. + */ +export class ViewPDLLCommand extends Command { + constructor(context: MLIRContext) { super('mlir.viewPDLLOutput', context); } + + async execute() { + const editor = vscode.window.activeTextEditor; + if (editor.document.languageId != 'pdll') + return; + + // Check to see if a language client is active for this document. + const workspaceFolder = + vscode.workspace.getWorkspaceFolder(editor.document.uri); + const pdllClient = this.context.getLanguageClient(workspaceFolder, "pdll"); + if (!pdllClient) { + return; + } + + // Ask the user for the desired output type. + const outputType = + await vscode.window.showQuickPick([ 'ast', 'mlir', 'cpp' ]); + if (!outputType) { + return; + } + + // If we have the language client, ask it to try compiling the document. + let outputParams: ViewOutputParams = { + uri : editor.document.uri.toString(), + kind : outputType, + }; + const result: ViewOutputResult|undefined = + await pdllClient.sendRequest('pdll/viewOutput', outputParams); + if (!result || result.output.length === 0) { + return; + } + + // Display the output in a new editor. + let outputFileType = 'plaintext'; + if (outputType == 'mlir') { + outputFileType = 'mlir'; + } else if (outputType == 'cpp') { + outputFileType = 'cpp'; + } + await vscode.workspace.openTextDocument( + {language : outputFileType, content : result.output}); + } +} diff --git a/mlir/utils/vscode/src/PDLL/pdll.ts b/mlir/utils/vscode/src/PDLL/pdll.ts new file mode 100644 index 00000000000000..bc65837be2a847 --- /dev/null +++ b/mlir/utils/vscode/src/PDLL/pdll.ts @@ -0,0 +1,12 @@ +import * as vscode from 'vscode'; + +import {MLIRContext} from '../mlirContext'; +import {ViewPDLLCommand} from './commands/viewOutput'; + +/** + * Register the necessary context and commands for PDLL. + */ +export function registerPDLLCommands(context: vscode.ExtensionContext, + mlirContext: MLIRContext) { + context.subscriptions.push(new ViewPDLLCommand(mlirContext)); +} diff --git a/mlir/utils/vscode/src/command.ts b/mlir/utils/vscode/src/command.ts new file mode 100644 index 00000000000000..4623a5bed085ae --- /dev/null +++ b/mlir/utils/vscode/src/command.ts @@ -0,0 +1,25 @@ +import * as vscode from 'vscode'; +import {MLIRContext} from './mlirContext'; + +/** + * This class represents a base vscode command. It handles all of the necessary + * command registration and disposal boilerplate. + */ +export abstract class Command extends vscode.Disposable { + private disposable: vscode.Disposable; + protected context: MLIRContext; + + constructor(command: string, context: MLIRContext) { + super(() => this.dispose()); + this.disposable = + vscode.commands.registerCommand(command, this.execute, this); + this.context = context; + } + + dispose() { this.disposable && this.disposable.dispose(); } + + /** + * The function executed when this command is invoked. + */ + abstract execute(...args: any[]): any; +} diff --git a/mlir/utils/vscode/src/extension.ts b/mlir/utils/vscode/src/extension.ts index 7d2d4a7383ebce..72c754d0c9642d 100644 --- a/mlir/utils/vscode/src/extension.ts +++ b/mlir/utils/vscode/src/extension.ts @@ -1,6 +1,7 @@ import * as vscode from 'vscode'; import {MLIRContext} from './mlirContext'; +import {registerPDLLCommands} from './PDLL/pdll'; /** * This method is called when the extension is activated. The extension is @@ -20,6 +21,7 @@ export function activate(context: vscode.ExtensionContext) { mlirContext.dispose(); await mlirContext.activate(outputChannel); })); + registerPDLLCommands(context, mlirContext); mlirContext.activate(outputChannel); } diff --git a/mlir/utils/vscode/src/mlirContext.ts b/mlir/utils/vscode/src/mlirContext.ts index c0c2b53edbaf30..859d61663186c5 100644 --- a/mlir/utils/vscode/src/mlirContext.ts +++ b/mlir/utils/vscode/src/mlirContext.ts @@ -356,6 +356,21 @@ export class MLIRContext implements vscode.Disposable { return this.resolvePath(serverPath, defaultPath, workspaceFolder); } + /** + * Return the language client for the given language and workspace folder, or + * null if no client is active. + */ + getLanguageClient(workspaceFolder: vscode.WorkspaceFolder, + languageName: string): vscodelc.LanguageClient { + let workspaceFolderStr = + workspaceFolder ? workspaceFolder.uri.toString() : ""; + let folderContext = this.workspaceFolders.get(workspaceFolderStr); + if (!folderContext) { + return null; + } + return folderContext.clients.get(languageName); + } + dispose() { this.subscriptions.forEach((d) => { d.dispose(); }); this.subscriptions = []; From 0429472efe6a894a6554d75fd77baeb9cdb32dd5 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Sat, 30 Apr 2022 00:32:24 -0700 Subject: [PATCH 866/908] [mlir:PDLL] Fix signature help for operation operands We were currently only completing on the first operand because the completion check was outside of the parse loop. Differential Revision: https://reviews.llvm.org/D124784 --- mlir/lib/Tools/PDLL/Parser/Parser.cpp | 12 +- .../mlir-pdll-lsp-server/include/included.td | 10 +- .../mlir-pdll-lsp-server/signature-help.test | 154 +++++++++++++++++- 3 files changed, 162 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Tools/PDLL/Parser/Parser.cpp b/mlir/lib/Tools/PDLL/Parser/Parser.cpp index 49eaa41672f357..e7ec5a047b9fc1 100644 --- a/mlir/lib/Tools/PDLL/Parser/Parser.cpp +++ b/mlir/lib/Tools/PDLL/Parser/Parser.cpp @@ -1982,14 +1982,14 @@ Parser::parseOperationExpr(OpResultTypeContext inputResultTypeContext) { ast::ValueRangeConstraintDecl::create(ctx, loc), valueRangeTy)); } } else if (!consumeIf(Token::r_paren)) { - // Check for operand signature code completion. - if (curToken.is(Token::code_complete)) { - codeCompleteOperationOperandsSignature(opName, operands.size()); - return failure(); - } - // If the operand list was specified and non-empty, parse the operands. do { + // Check for operand signature code completion. + if (curToken.is(Token::code_complete)) { + codeCompleteOperationOperandsSignature(opName, operands.size()); + return failure(); + } + FailureOr operand = parseExpr(); if (failed(operand)) return failure(); diff --git a/mlir/test/mlir-pdll-lsp-server/include/included.td b/mlir/test/mlir-pdll-lsp-server/include/included.td index 881abd0ba95934..21ab1b300f8694 100644 --- a/mlir/test/mlir-pdll-lsp-server/include/included.td +++ b/mlir/test/mlir-pdll-lsp-server/include/included.td @@ -1,4 +1,10 @@ include "mlir/IR/OpBase.td" -// This file is merely to test the processing of includes, it has -// no other purpose or contents. +def Test_Dialect : Dialect { + let name = "test"; +} + +def OpMulti : Op { + let arguments = (outs I64:$operand, I64:$operand2); + let results = (outs I64:$result, I64:$result2); +} diff --git a/mlir/test/mlir-pdll-lsp-server/signature-help.test b/mlir/test/mlir-pdll-lsp-server/signature-help.test index 30423ea4b6eb80..d7e88d40d370de 100644 --- a/mlir/test/mlir-pdll-lsp-server/signature-help.test +++ b/mlir/test/mlir-pdll-lsp-server/signature-help.test @@ -1,16 +1,16 @@ -// RUN: mlir-pdll-lsp-server -lit-test < %s | FileCheck -strict-whitespace %s +// RUN: mlir-pdll-lsp-server -pdll-extra-dir %S -pdll-extra-dir %S/../../include -lit-test < %s | FileCheck -strict-whitespace %s {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"pdll","capabilities":{},"trace":"off"}} // ----- {"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{ "uri":"test:///foo.pdll", "languageId":"pdll", "version":1, - "text":"Constraint ValueCst(value: Value);\nPattern {\nlet root = op() -> ();\nValueCst(root);\nerase root;\n}" + "text":"#include \"include/included.td\"\nConstraint ValueCst(value: Value, value2: Value);\nPattern {\nlet root = op(val: Value, val) -> (ty: Type, ty);\nValueCst(root.result, root.result2);\nerase root;\n}" }}} // ----- {"jsonrpc":"2.0","id":1,"method":"textDocument/signatureHelp","params":{ "textDocument":{"uri":"test:///foo.pdll"}, - "position":{"line":2,"character":23} + "position":{"line":3,"character":26} }} // CHECK: "id": 1 // CHECK-NEXT: "jsonrpc": "2.0", @@ -19,6 +19,26 @@ // CHECK-NEXT: "activeSignature": 0, // CHECK-NEXT: "signatures": [ // CHECK-NEXT: { +// CHECK-NEXT: "documentation": "`op` ODS operand specification", +// CHECK-NEXT: "label": "(operand: Value, operand2: Value)", +// CHECK-NEXT: "parameters": [ +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "64-bit signless integer", +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 1, +// CHECK-NEXT: 15 +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "64-bit signless integer", +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 17, +// CHECK-NEXT: 32 +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { // CHECK-NEXT: "documentation": "Generic operation operand specification", // CHECK-NEXT: "label": "(: ValueRange)", // CHECK-NEXT: "parameters": [ @@ -36,7 +56,40 @@ // ----- {"jsonrpc":"2.0","id":1,"method":"textDocument/signatureHelp","params":{ "textDocument":{"uri":"test:///foo.pdll"}, - "position":{"line":2,"character":29} + "position":{"line":3,"character":38} +}} +// CHECK: "id": 1 +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": { +// CHECK-NEXT: "activeParameter": 1, +// CHECK-NEXT: "activeSignature": 0, +// CHECK-NEXT: "signatures": [ +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "`op` ODS operand specification", +// CHECK-NEXT: "label": "(operand: Value, operand2: Value)", +// CHECK-NEXT: "parameters": [ +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "64-bit signless integer", +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 1, +// CHECK-NEXT: 15 +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "64-bit signless integer", +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 17, +// CHECK-NEXT: 32 +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":1,"method":"textDocument/signatureHelp","params":{ + "textDocument":{"uri":"test:///foo.pdll"}, + "position":{"line":3,"character":47} }} // CHECK: "id": 1 // CHECK-NEXT: "jsonrpc": "2.0", @@ -45,6 +98,26 @@ // CHECK-NEXT: "activeSignature": 0, // CHECK-NEXT: "signatures": [ // CHECK-NEXT: { +// CHECK-NEXT: "documentation": "`op` ODS result specification", +// CHECK-NEXT: "label": "(result: Type, result2: Type)", +// CHECK-NEXT: "parameters": [ +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "64-bit signless integer", +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 1, +// CHECK-NEXT: 13 +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "64-bit signless integer", +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 15, +// CHECK-NEXT: 28 +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { // CHECK-NEXT: "documentation": "Generic operation result specification", // CHECK-NEXT: "label": "(: TypeRange)", // CHECK-NEXT: "parameters": [ @@ -62,7 +135,40 @@ // ----- {"jsonrpc":"2.0","id":1,"method":"textDocument/signatureHelp","params":{ "textDocument":{"uri":"test:///foo.pdll"}, - "position":{"line":3,"character":9} + "position":{"line":3,"character":57} +}} +// CHECK: "id": 1 +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": { +// CHECK-NEXT: "activeParameter": 1, +// CHECK-NEXT: "activeSignature": 0, +// CHECK-NEXT: "signatures": [ +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "`op` ODS result specification", +// CHECK-NEXT: "label": "(result: Type, result2: Type)", +// CHECK-NEXT: "parameters": [ +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "64-bit signless integer", +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 1, +// CHECK-NEXT: 13 +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "documentation": "64-bit signless integer", +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 15, +// CHECK-NEXT: 28 +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":1,"method":"textDocument/signatureHelp","params":{ + "textDocument":{"uri":"test:///foo.pdll"}, + "position":{"line":4,"character":9} }} // CHECK: "id": 1 // CHECK-NEXT: "jsonrpc": "2.0", @@ -71,13 +177,49 @@ // CHECK-NEXT: "activeSignature": 0, // CHECK-NEXT: "signatures": [ // CHECK-NEXT: { -// CHECK-NEXT: "label": "ValueCst(value: Value) -> Tuple<>", +// CHECK-NEXT: "label": "ValueCst(value: Value, value2: Value) -> Tuple<>", // CHECK-NEXT: "parameters": [ // CHECK-NEXT: { // CHECK-NEXT: "label": [ // CHECK-NEXT: 9, // CHECK-NEXT: 21 // CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 23, +// CHECK-NEXT: 36 +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// ----- +{"jsonrpc":"2.0","id":1,"method":"textDocument/signatureHelp","params":{ + "textDocument":{"uri":"test:///foo.pdll"}, + "position":{"line":4,"character":21} +}} +// CHECK: "id": 1 +// CHECK-NEXT: "jsonrpc": "2.0", +// CHECK-NEXT: "result": { +// CHECK-NEXT: "activeParameter": 1, +// CHECK-NEXT: "activeSignature": 0, +// CHECK-NEXT: "signatures": [ +// CHECK-NEXT: { +// CHECK-NEXT: "label": "ValueCst(value: Value, value2: Value) -> Tuple<>", +// CHECK-NEXT: "parameters": [ +// CHECK-NEXT: { +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 9, +// CHECK-NEXT: 21 +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "label": [ +// CHECK-NEXT: 23, +// CHECK-NEXT: 36 +// CHECK-NEXT: ] // CHECK-NEXT: } // CHECK-NEXT: ] // CHECK-NEXT: } From 1c2edb026ed67ddbb30ebe3e2d2f4f17a882a881 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Mon, 2 May 2022 19:24:53 -0700 Subject: [PATCH 867/908] [mlir:PDLL] Rework the C++ generation of native Constraint/Rewrite arguments and results The current translation uses the old "ugly"/"raw" form which used PDLValue for the arguments and results. This commit updates the C++ generation to use the recently added sugar that allows for directly using the desired types for the arguments and result of PDL functions. In addition, this commit also properly imports the C++ class for ODS operations, constraints, and interfaces. This allows for a much more convienent C++ API than previously granted with the raw/low-level types. Differential Revision: https://reviews.llvm.org/D124817 --- mlir/docs/PDLL.md | 178 +++++++++++++++--- mlir/include/mlir/IR/PatternMatch.h | 6 + mlir/include/mlir/Tools/PDLL/AST/Nodes.h | 53 ++++-- mlir/include/mlir/Tools/PDLL/AST/Types.h | 11 +- mlir/include/mlir/Tools/PDLL/ODS/Context.h | 3 +- mlir/include/mlir/Tools/PDLL/ODS/Dialect.h | 3 +- mlir/include/mlir/Tools/PDLL/ODS/Operation.h | 8 +- mlir/lib/Tools/PDLL/AST/Nodes.cpp | 41 ++-- mlir/lib/Tools/PDLL/AST/TypeDetail.h | 10 +- mlir/lib/Tools/PDLL/AST/Types.cpp | 13 +- mlir/lib/Tools/PDLL/CodeGen/CPPGen.cpp | 121 ++++++++---- mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp | 20 +- mlir/lib/Tools/PDLL/ODS/Context.cpp | 4 +- mlir/lib/Tools/PDLL/ODS/Dialect.cpp | 5 +- mlir/lib/Tools/PDLL/ODS/Operation.cpp | 4 +- mlir/lib/Tools/PDLL/Parser/Parser.cpp | 55 +++--- .../Tools/mlir-pdll-lsp-server/PDLLServer.cpp | 4 +- mlir/test/mlir-pdll/CodeGen/CPP/general.pdll | 43 ++--- 18 files changed, 415 insertions(+), 167 deletions(-) diff --git a/mlir/docs/PDLL.md b/mlir/docs/PDLL.md index 984ba2ed26f14f..2aadbb6035d0f3 100644 --- a/mlir/docs/PDLL.md +++ b/mlir/docs/PDLL.md @@ -1146,17 +1146,86 @@ Pattern { ``` The arguments of the constraint are accessible within the code block via the -same name. The type of these native variables are mapped directly to the -corresponding MLIR type of the [core constraint](#core-constraints) used. For -example, an `Op` corresponds to a variable of type `Operation *`. +same name. See the ["type translation"](#native-constraint-type-translations) below for +detailed information on how PDLL types are converted to native types. In addition to the +PDLL arguments, the code block may also access the current `PatternRewriter` using +`rewriter`. The result type of the native constraint function is implicitly defined +as a `::mlir::LogicalResult`. -The results of the constraint can be populated using the provided `results` -variable. This variable is a `PDLResultList`, and expects results to be -populated in the order that they are defined within the result list of the -constraint declaration. +Taking the constraints defined above as an example, these function would roughly be +translated into: -In addition to the above, the code block may also access the current -`PatternRewriter` using `rewriter`. +```c++ +LogicalResult HasOneUse(PatternRewriter &rewriter, Value value) { + return success(value.hasOneUse()); +} +LogicalResult HasSameElementType(Value value1, Value value2) { + return success(value1.getType().cast().getElementType() == + value2.getType().cast().getElementType()); +} +``` + +TODO: Native constraints should also be allowed to return values in certain cases. + +###### Native Constraint Type Translations + +The types of argument and result variables are generally mapped to the corresponding +MLIR type of the [constraint](#constraints) used. Below is a detailed description +of how the mapped type of a variable is determined for the various different types of +constraints. + +* Attr, Op, Type, TypeRange, Value, ValueRange: + +These are all core constraints, and are mapped directly to the MLIR equivalent +(that their names suggest), namely: + + * `Attr` -> "::mlir::Attribute" + * `Op` -> "::mlir::Operation *" + * `Type` -> "::mlir::Type" + * `TypeRange` -> "::mlir::TypeRange" + * `Value` -> "::mlir::Value" + * `ValueRange` -> "::mlir::ValueRange" + +* Op + +A named operation constraint has a unique translation. If the ODS registration of the +referenced operation has been included, the qualified C++ is used. If the ODS information +is not available, this constraint maps to "::mlir::Operation *", similarly to the unnamed +variant. For example, given the following: + +```pdll +// `my_ops.td` provides the ODS definition of the `my_dialect` operations, such as +// `my_dialect.bar` used below. +#include "my_ops.td" + +Constraint Cst(op: Op) [{ + return success(op ... ); +}]; +``` + +The native type used for `op` may be of the form `my_dialect::BarOp`, as opposed to the +default `::mlir::Operation *`. Below is a sample translation of the above constraint: + +```c++ +LogicalResult Cst(my_dialect::BarOp op) { + return success(op ... ); +} +``` + +* Imported ODS Constraints + +Aside from the core constraints, certain constraints imported from ODS may use a unique +native type. How to enable this unique type depends on the ODS constraint construct that +was imported: + + * `Attr` constraints + - Imported `Attr` constraints utilize the `storageType` field for native type translation. + + * `Type` constraints + - Imported `Type` constraints utilize the `cppClassName` field for native type translation. + + * `AttrInterface`/`OpInterface`/`TypeInterface` constraints + - Imported interfaces utilize the `cppClassName` field for native type translation. #### Defining Constraints Inline @@ -1414,10 +1483,7 @@ be defined by specifying a string code block after the rewrite declaration: ```pdll Rewrite BuildOp(value: Value) -> (foo: Op, bar: Op) [{ - // We push back the results into the `results` variable in the order defined - // by the result list of the rewrite declaration. - results.push_back(rewriter.create(value)); - results.push_back(rewriter.create()); + return {rewriter.create(value), rewriter.create()}; }]; Pattern { @@ -1431,17 +1497,85 @@ Pattern { ``` The arguments of the rewrite are accessible within the code block via the -same name. The type of these native variables are mapped directly to the -corresponding MLIR type of the [core constraint](#core-constraints) used. For -example, an `Op` corresponds to a variable of type `Operation *`. +same name. See the ["type translation"](#native-rewrite-type-translations) below for +detailed information on how PDLL types are converted to native types. In addition to the +PDLL arguments, the code block may also access the current `PatternRewriter` using +`rewriter`. See the ["result translation"](#native-rewrite-result-translation) section +for detailed information on how the result type of the native function is determined. + +Taking the rewrite defined above as an example, this function would roughly be +translated into: + +```c++ +std::tuple BuildOp(Value value) { + return {rewriter.create(value), rewriter.create()}; +} +``` -The results of the rewrite can be populated using the provided `results` -variable. This variable is a `PDLResultList`, and expects results to be -populated in the order that they are defined within the result list of the -rewrite declaration. +###### Native Rewrite Type Translations -In addition to the above, the code block may also access the current -`PatternRewriter` using `rewriter`. +The types of argument and result variables are generally mapped to the corresponding +MLIR type of the [constraint](#constraints) used. The rules of native `Rewrite` type translation +are identical to those of native `Constraint`s, please view the corresponding +[native `Constraint` type translation](#native-constraint-type-translations) section for a +detailed description of how the mapped type of a variable is determined. + +###### Native Rewrite Result Translation + +The results of a native rewrite are directly translated to the results of the native function, +using the type translation rules [described above](#native-rewrite-type-translations). The section +below describes the various result translation scenarios: + +* Zero Result + +```pdll +Rewrite createOp() [{ + rewriter.create(); +}]; +``` + +In the case where a native `Rewrite` has no results, the native function returns `void`: + +```c++ +void createOp(PatternRewriter &rewriter) { + rewriter.create(); +} +``` + +* Single Result + +```pdll +Rewrite createOp() -> Op [{ + return rewriter.create(); +}]; +``` + +In the case where a native `Rewrite` has a single result, the native function returns the corresponding +native type for that single result: + +```c++ +my_dialect::FooOp createOp(PatternRewriter &rewriter) { + return rewriter.create(); +} +``` + +* Multi Result + +```pdll +Rewrite complexRewrite(value: Value) -> (Op, FunctionOpInterface) [{ + ... +}]; +``` + +In the case where a native `Rewrite` has multiple results, the native function returns a `std::tuple<...>` +containing the corresponding native types for each of the results: + +```c++ +std::tuple +complexRewrite(PatternRewriter &rewriter, Value value) { + ... +} +``` #### Defining Rewrites Inline diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index 751ec2af2c9d2e..f3236dcff7fac8 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -943,9 +943,13 @@ struct ProcessDerivedPDLValue : public ProcessPDLValueBasedOn { " to be of type: " + llvm::getTypeName()); }); } + using ProcessPDLValueBasedOn::verifyAsArg; + static T processAsArg(BaseT baseValue) { return baseValue.template cast(); } + using ProcessPDLValueBasedOn::processAsArg; + static void processAsResult(PatternRewriter &, PDLResultList &results, T value) { results.push_back(value); @@ -967,6 +971,8 @@ template <> struct ProcessPDLValue : public ProcessPDLValueBasedOn { static StringRef processAsArg(StringAttr value) { return value.getValue(); } + using ProcessPDLValueBasedOn::processAsArg; + static void processAsResult(PatternRewriter &rewriter, PDLResultList &results, StringRef value) { results.push_back(rewriter.getStringAttr(value)); diff --git a/mlir/include/mlir/Tools/PDLL/AST/Nodes.h b/mlir/include/mlir/Tools/PDLL/AST/Nodes.h index 6635dc764e04c2..ab1a53f90b8fcd 100644 --- a/mlir/include/mlir/Tools/PDLL/AST/Nodes.h +++ b/mlir/include/mlir/Tools/PDLL/AST/Nodes.h @@ -506,6 +506,7 @@ class OperationExpr final NamedAttributeDecl *> { public: static OperationExpr *create(Context &ctx, SMRange loc, + const ods::Operation *odsOp, const OpNameDecl *nameDecl, ArrayRef operands, ArrayRef resultTypes, @@ -830,16 +831,15 @@ class ValueRangeConstraintDecl /// - This is a constraint which is defined using only PDLL constructs. class UserConstraintDecl final : public Node::NodeBase, - llvm::TrailingObjects { + llvm::TrailingObjects { public: /// Create a native constraint with the given optional code block. - static UserConstraintDecl *createNative(Context &ctx, const Name &name, - ArrayRef inputs, - ArrayRef results, - Optional codeBlock, - Type resultType) { - return createImpl(ctx, name, inputs, results, codeBlock, /*body=*/nullptr, - resultType); + static UserConstraintDecl * + createNative(Context &ctx, const Name &name, ArrayRef inputs, + ArrayRef results, Optional codeBlock, + Type resultType, ArrayRef nativeInputTypes = {}) { + return createImpl(ctx, name, inputs, nativeInputTypes, results, codeBlock, + /*body=*/nullptr, resultType); } /// Create a PDLL constraint with the given body. @@ -848,8 +848,8 @@ class UserConstraintDecl final ArrayRef results, const CompoundStmt *body, Type resultType) { - return createImpl(ctx, name, inputs, results, /*codeBlock=*/llvm::None, - body, resultType); + return createImpl(ctx, name, inputs, /*nativeInputTypes=*/llvm::None, + results, /*codeBlock=*/llvm::None, body, resultType); } /// Return the name of the constraint. @@ -863,6 +863,10 @@ class UserConstraintDecl final return const_cast(this)->getInputs(); } + /// Return the explicit native type to use for the given input. Returns None + /// if no explicit type was set. + Optional getNativeInputType(unsigned index) const; + /// Return the explicit results of the constraint declaration. May be empty, /// even if the constraint has results (e.g. in the case of inferred results). MutableArrayRef getResults() { @@ -891,10 +895,12 @@ class UserConstraintDecl final /// components. static UserConstraintDecl * createImpl(Context &ctx, const Name &name, ArrayRef inputs, + ArrayRef nativeInputTypes, ArrayRef results, Optional codeBlock, const CompoundStmt *body, Type resultType); - UserConstraintDecl(const Name &name, unsigned numInputs, unsigned numResults, + UserConstraintDecl(const Name &name, unsigned numInputs, + bool hasNativeInputTypes, unsigned numResults, Optional codeBlock, const CompoundStmt *body, Type resultType) : Base(name.getLoc(), &name), numInputs(numInputs), @@ -916,8 +922,14 @@ class UserConstraintDecl final /// The result type of the constraint. Type resultType; + /// Flag indicating if this constraint has explicit native input types. + bool hasNativeInputTypes; + /// Allow access to various internals. - friend llvm::TrailingObjects; + friend llvm::TrailingObjects; + size_t numTrailingObjects(OverloadToken) const { + return numInputs + numResults; + } }; //===----------------------------------------------------------------------===// @@ -1145,6 +1157,23 @@ class CallableDecl : public Decl { return cast(this)->getResultType(); } + /// Return the explicit results of the declaration. Note that these may be + /// empty, even if the callable has results (e.g. in the case of inferred + /// results). + ArrayRef getResults() const { + if (const auto *cst = dyn_cast(this)) + return cst->getResults(); + return cast(this)->getResults(); + } + + /// Return the optional code block of this callable, if this is a native + /// callable with a provided implementation. + Optional getCodeBlock() const { + if (const auto *cst = dyn_cast(this)) + return cst->getCodeBlock(); + return cast(this)->getCodeBlock(); + } + /// Support LLVM type casting facilities. static bool classof(const Node *decl) { return isa(decl); diff --git a/mlir/include/mlir/Tools/PDLL/AST/Types.h b/mlir/include/mlir/Tools/PDLL/AST/Types.h index 75a80b5bf92d47..a4c0888441e2ca 100644 --- a/mlir/include/mlir/Tools/PDLL/AST/Types.h +++ b/mlir/include/mlir/Tools/PDLL/AST/Types.h @@ -14,6 +14,10 @@ namespace mlir { namespace pdll { +namespace ods { +class Operation; +} // namespace ods + namespace ast { class Context; @@ -151,10 +155,15 @@ class OperationType : public Type::TypeBase { /// Return an instance of the Operation type with an optional operation name. /// If no name is provided, this type may refer to any operation. static OperationType get(Context &context, - Optional name = llvm::None); + Optional name = llvm::None, + const ods::Operation *odsOp = nullptr); /// Return the name of this operation type, or None if it doesn't have on. Optional getName() const; + + /// Return the ODS operation that this type refers to, or nullptr if the ODS + /// operation is unknown. + const ods::Operation *getODSOperation() const; }; //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Tools/PDLL/ODS/Context.h b/mlir/include/mlir/Tools/PDLL/ODS/Context.h index 6baa90af44adf4..8a57bb791e6390 100644 --- a/mlir/include/mlir/Tools/PDLL/ODS/Context.h +++ b/mlir/include/mlir/Tools/PDLL/ODS/Context.h @@ -63,7 +63,8 @@ class Context { /// operation already existed). std::pair insertOperation(StringRef name, StringRef summary, StringRef desc, - bool supportsResultTypeInferrence, SMLoc loc); + StringRef nativeClassName, bool supportsResultTypeInferrence, + SMLoc loc); /// Lookup an operation registered with the given name, or null if no /// operation with that name is registered. diff --git a/mlir/include/mlir/Tools/PDLL/ODS/Dialect.h b/mlir/include/mlir/Tools/PDLL/ODS/Dialect.h index de8181843e84c0..c5c60977dd2f14 100644 --- a/mlir/include/mlir/Tools/PDLL/ODS/Dialect.h +++ b/mlir/include/mlir/Tools/PDLL/ODS/Dialect.h @@ -35,7 +35,8 @@ class Dialect { /// operation already existed). std::pair insertOperation(StringRef name, StringRef summary, StringRef desc, - bool supportsResultTypeInferrence, SMLoc loc); + StringRef nativeClassName, bool supportsResultTypeInferrence, + SMLoc loc); /// Lookup an operation registered with the given name, or null if no /// operation with that name is registered. diff --git a/mlir/include/mlir/Tools/PDLL/ODS/Operation.h b/mlir/include/mlir/Tools/PDLL/ODS/Operation.h index 8ad36a6872b7c0..f4c5a518fbfa98 100644 --- a/mlir/include/mlir/Tools/PDLL/ODS/Operation.h +++ b/mlir/include/mlir/Tools/PDLL/ODS/Operation.h @@ -154,6 +154,9 @@ class Operation { /// Returns the description of the operation. StringRef getDescription() const { return description; } + /// Returns the native class name of the operation. + StringRef getNativeClassName() const { return nativeClassName; } + /// Returns the attributes of this operation. ArrayRef getAttributes() const { return attributes; } @@ -168,7 +171,7 @@ class Operation { private: Operation(StringRef name, StringRef summary, StringRef desc, - bool supportsTypeInferrence, SMLoc loc); + StringRef nativeClassName, bool supportsTypeInferrence, SMLoc loc); /// The name of the operation. std::string name; @@ -177,6 +180,9 @@ class Operation { std::string summary; std::string description; + /// The native class name of the operation, used when generating native code. + std::string nativeClassName; + /// Flag indicating if the operation is known to support type inferrence. bool supportsTypeInferrence; diff --git a/mlir/lib/Tools/PDLL/AST/Nodes.cpp b/mlir/lib/Tools/PDLL/AST/Nodes.cpp index 5f1af1dc7b2ebd..417483444615ca 100644 --- a/mlir/lib/Tools/PDLL/AST/Nodes.cpp +++ b/mlir/lib/Tools/PDLL/AST/Nodes.cpp @@ -298,17 +298,18 @@ MemberAccessExpr *MemberAccessExpr::create(Context &ctx, SMRange loc, // OperationExpr //===----------------------------------------------------------------------===// -OperationExpr *OperationExpr::create( - Context &ctx, SMRange loc, const OpNameDecl *name, - ArrayRef operands, ArrayRef resultTypes, - ArrayRef attributes) { +OperationExpr * +OperationExpr::create(Context &ctx, SMRange loc, const ods::Operation *odsOp, + const OpNameDecl *name, ArrayRef operands, + ArrayRef resultTypes, + ArrayRef attributes) { unsigned allocSize = OperationExpr::totalSizeToAlloc( operands.size() + resultTypes.size(), attributes.size()); void *rawData = ctx.getAllocator().Allocate(allocSize, alignof(OperationExpr)); - Type resultType = OperationType::get(ctx, name->getName()); + Type resultType = OperationType::get(ctx, name->getName(), odsOp); OperationExpr *opExpr = new (rawData) OperationExpr(loc, resultType, name, operands.size(), resultTypes.size(), attributes.size(), name->getLoc()); @@ -426,23 +427,41 @@ ValueRangeConstraintDecl *ValueRangeConstraintDecl::create(Context &ctx, // UserConstraintDecl //===----------------------------------------------------------------------===// +Optional +UserConstraintDecl::getNativeInputType(unsigned index) const { + return hasNativeInputTypes ? getTrailingObjects()[index] + : Optional(); +} + UserConstraintDecl *UserConstraintDecl::createImpl( Context &ctx, const Name &name, ArrayRef inputs, - ArrayRef results, Optional codeBlock, - const CompoundStmt *body, Type resultType) { - unsigned allocSize = UserConstraintDecl::totalSizeToAlloc( - inputs.size() + results.size()); + ArrayRef nativeInputTypes, ArrayRef results, + Optional codeBlock, const CompoundStmt *body, Type resultType) { + bool hasNativeInputTypes = !nativeInputTypes.empty(); + assert(!hasNativeInputTypes || nativeInputTypes.size() == inputs.size()); + + unsigned allocSize = + UserConstraintDecl::totalSizeToAlloc( + inputs.size() + results.size(), + hasNativeInputTypes ? inputs.size() : 0); void *rawData = ctx.getAllocator().Allocate(allocSize, alignof(UserConstraintDecl)); if (codeBlock) codeBlock = codeBlock->copy(ctx.getAllocator()); - UserConstraintDecl *decl = new (rawData) UserConstraintDecl( - name, inputs.size(), results.size(), codeBlock, body, resultType); + UserConstraintDecl *decl = new (rawData) + UserConstraintDecl(name, inputs.size(), hasNativeInputTypes, + results.size(), codeBlock, body, resultType); std::uninitialized_copy(inputs.begin(), inputs.end(), decl->getInputs().begin()); std::uninitialized_copy(results.begin(), results.end(), decl->getResults().begin()); + if (hasNativeInputTypes) { + StringRef *nativeInputTypesPtr = decl->getTrailingObjects(); + for (unsigned i = 0, e = inputs.size(); i < e; ++i) + nativeInputTypesPtr[i] = nativeInputTypes[i].copy(ctx.getAllocator()); + } + return decl; } diff --git a/mlir/lib/Tools/PDLL/AST/TypeDetail.h b/mlir/lib/Tools/PDLL/AST/TypeDetail.h index e6719fb9612162..4e2a686d704f17 100644 --- a/mlir/lib/Tools/PDLL/AST/TypeDetail.h +++ b/mlir/lib/Tools/PDLL/AST/TypeDetail.h @@ -75,13 +75,15 @@ struct ConstraintTypeStorage : public TypeStorageBase {}; //===----------------------------------------------------------------------===// struct OperationTypeStorage - : public TypeStorageBase { + : public TypeStorageBase> { using Base::Base; static OperationTypeStorage * - construct(StorageUniquer::StorageAllocator &alloc, StringRef key) { - return new (alloc.allocate()) - OperationTypeStorage(alloc.copyInto(key)); + construct(StorageUniquer::StorageAllocator &alloc, + const std::pair &key) { + return new (alloc.allocate()) OperationTypeStorage( + std::make_pair(alloc.copyInto(key.first), key.second)); } }; diff --git a/mlir/lib/Tools/PDLL/AST/Types.cpp b/mlir/lib/Tools/PDLL/AST/Types.cpp index 4164cabac4dbc3..02b01e8854aae7 100644 --- a/mlir/lib/Tools/PDLL/AST/Types.cpp +++ b/mlir/lib/Tools/PDLL/AST/Types.cpp @@ -11,6 +11,7 @@ #include "mlir/Tools/PDLL/AST/Context.h" using namespace mlir; +using namespace mlir::pdll; using namespace mlir::pdll::ast; MLIR_DEFINE_EXPLICIT_TYPE_ID(mlir::pdll::ast::detail::AttributeTypeStorage) @@ -68,16 +69,22 @@ ConstraintType ConstraintType::get(Context &context) { // OperationType //===----------------------------------------------------------------------===// -OperationType OperationType::get(Context &context, Optional name) { +OperationType OperationType::get(Context &context, Optional name, + const ods::Operation *odsOp) { return context.getTypeUniquer().get( - /*initFn=*/function_ref(), name.getValueOr("")); + /*initFn=*/function_ref(), + std::make_pair(name.getValueOr(""), odsOp)); } Optional OperationType::getName() const { - StringRef name = getImplAs()->getValue(); + StringRef name = getImplAs()->getValue().first; return name.empty() ? Optional() : Optional(name); } +const ods::Operation *OperationType::getODSOperation() const { + return getImplAs()->getValue().second; +} + //===----------------------------------------------------------------------===// // RangeType //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Tools/PDLL/CodeGen/CPPGen.cpp b/mlir/lib/Tools/PDLL/CodeGen/CPPGen.cpp index c3b5c957007e8d..bf73be9cbff6db 100644 --- a/mlir/lib/Tools/PDLL/CodeGen/CPPGen.cpp +++ b/mlir/lib/Tools/PDLL/CodeGen/CPPGen.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/PDL/IR/PDLOps.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/Tools/PDLL/AST/Nodes.h" +#include "mlir/Tools/PDLL/ODS/Operation.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" @@ -51,11 +52,16 @@ class CodeGen { void generate(const ast::UserConstraintDecl *decl, StringSet<> &nativeFunctions); void generate(const ast::UserRewriteDecl *decl, StringSet<> &nativeFunctions); - void generateConstraintOrRewrite(StringRef name, bool isConstraint, - ArrayRef inputs, - StringRef codeBlock, + void generateConstraintOrRewrite(const ast::CallableDecl *decl, + bool isConstraint, StringSet<> &nativeFunctions); + /// Return the native name for the type of the given type. + StringRef getNativeTypeName(ast::Type type); + + /// Return the native name for the type of the given variable decl. + StringRef getNativeTypeName(ast::VariableDecl *decl); + /// The stream to output to. raw_ostream &os; }; @@ -152,55 +158,88 @@ void CodeGen::generateConstraintAndRewrites(const ast::Module &astModule, void CodeGen::generate(const ast::UserConstraintDecl *decl, StringSet<> &nativeFunctions) { - return generateConstraintOrRewrite(decl->getName().getName(), - /*isConstraint=*/true, decl->getInputs(), - *decl->getCodeBlock(), nativeFunctions); + return generateConstraintOrRewrite(cast(decl), + /*isConstraint=*/true, nativeFunctions); } void CodeGen::generate(const ast::UserRewriteDecl *decl, StringSet<> &nativeFunctions) { - return generateConstraintOrRewrite(decl->getName().getName(), - /*isConstraint=*/false, decl->getInputs(), - *decl->getCodeBlock(), nativeFunctions); + return generateConstraintOrRewrite(cast(decl), + /*isConstraint=*/false, nativeFunctions); +} + +StringRef CodeGen::getNativeTypeName(ast::Type type) { + return llvm::TypeSwitch(type) + .Case([&](ast::AttributeType) { return "::mlir::Attribute"; }) + .Case([&](ast::OperationType opType) -> StringRef { + // Use the derived Op class when available. + if (const auto *odsOp = opType.getODSOperation()) + return odsOp->getNativeClassName(); + return "::mlir::Operation *"; + }) + .Case([&](ast::TypeType) { return "::mlir::Type"; }) + .Case([&](ast::ValueType) { return "::mlir::Value"; }) + .Case([&](ast::TypeRangeType) { return "::mlir::TypeRange"; }) + .Case([&](ast::ValueRangeType) { return "::mlir::ValueRange"; }); +} + +StringRef CodeGen::getNativeTypeName(ast::VariableDecl *decl) { + // Try to extract a type name from the variable's constraints. + for (ast::ConstraintRef &cst : decl->getConstraints()) { + if (auto *userCst = dyn_cast(cst.constraint)) { + if (Optional name = userCst->getNativeInputType(0)) + return *name; + return getNativeTypeName(userCst->getInputs()[0]); + } + } + + // Otherwise, use the type of the variable. + return getNativeTypeName(decl->getType()); } -void CodeGen::generateConstraintOrRewrite(StringRef name, bool isConstraint, - ArrayRef inputs, - StringRef codeBlock, +void CodeGen::generateConstraintOrRewrite(const ast::CallableDecl *decl, + bool isConstraint, StringSet<> &nativeFunctions) { + StringRef name = decl->getName()->getName(); nativeFunctions.insert(name); - // TODO: Should there be something explicit for handling optionality? - auto getCppType = [&](ast::Type type) -> StringRef { - return llvm::TypeSwitch(type) - .Case([&](ast::AttributeType) { return "::mlir::Attribute"; }) - .Case([&](ast::OperationType) { - // TODO: Allow using the derived Op class when possible. - return "::mlir::Operation *"; - }) - .Case([&](ast::TypeType) { return "::mlir::Type"; }) - .Case([&](ast::ValueType) { return "::mlir::Value"; }) - .Case([&](ast::TypeRangeType) { return "::mlir::TypeRange"; }) - .Case([&](ast::ValueRangeType) { return "::mlir::ValueRange"; }); - }; - os << "static " << (isConstraint ? "::mlir::LogicalResult " : "void ") << name - << "PDLFn(::mlir::PatternRewriter &rewriter, " - << (isConstraint ? "" : "::mlir::PDLResultList &results, ") - << "::llvm::ArrayRef<::mlir::PDLValue> values) {\n"; - - const char *argumentInitStr = R"( - {0} {1} = {{}; - if (values[{2}]) - {1} = values[{2}].cast<{0}>(); - (void){1}; -)"; - for (const auto &it : llvm::enumerate(inputs)) { - const ast::VariableDecl *input = it.value(); - os << llvm::formatv(argumentInitStr, getCppType(input->getType()), - input->getName().getName(), it.index()); + os << "static "; + + // TODO: Work out a proper modeling for "optionality". + + // Emit the result type. + // If this is a constraint, we always return a LogicalResult. + // TODO: This will need to change if we allow Constraints to return values as + // well. + if (isConstraint) { + os << "::mlir::LogicalResult"; + } else { + // Otherwise, generate a type based on the results of the callable. + // If the callable has explicit results, use those to build the result. + // Otherwise, use the type of the callable. + ArrayRef results = decl->getResults(); + if (results.empty()) { + os << "void"; + } else if (results.size() == 1) { + os << getNativeTypeName(results[0]); + } else { + os << "std::tuple<"; + llvm::interleaveComma(results, os, [&](ast::VariableDecl *result) { + os << getNativeTypeName(result); + }); + os << ">"; + } } - os << " " << codeBlock.trim() << "\n}\n"; + os << " " << name << "PDLFn(::mlir::PatternRewriter &rewriter"; + if (!decl->getInputs().empty()) { + os << ", "; + llvm::interleaveComma(decl->getInputs(), os, [&](ast::VariableDecl *input) { + os << getNativeTypeName(input) << " " << input->getName().getName(); + }); + } + os << ") {\n"; + os << " " << decl->getCodeBlock()->trim() << "\n}\n\n"; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp b/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp index 5678761e718055..1e4e14e3df4c9b 100644 --- a/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp +++ b/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp @@ -316,11 +316,12 @@ Value CodeGen::genNonInitializerVar(const ast::VariableDecl *varDecl, Value typeValue = TypeSwitch(constraint.constraint) .Case([&, this](auto *cst) -> Value { - if (auto *typeConstraintExpr = cst->getTypeExpr()) - return this->genSingleExpr(typeConstraintExpr); - return Value(); - }) + ast::ValueRangeConstraintDecl>( + [&, this](auto *cst) -> Value { + if (auto *typeConstraintExpr = cst->getTypeExpr()) + return this->genSingleExpr(typeConstraintExpr); + return Value(); + }) .Default(Value()); if (typeValue) return typeValue; @@ -442,16 +443,15 @@ Value CodeGen::genExprImpl(const ast::MemberAccessExpr *expr) { return builder.create(loc, mlirType, parentExprs[0]); } - assert(opType.getName() && "expected valid operation name"); - const ods::Operation *odsOp = odsContext.lookupOperation(*opType.getName()); - + const ods::Operation *odsOp = opType.getODSOperation(); if (!odsOp) { - assert(llvm::isDigit(name[0]) && "unregistered op only allows numeric indexing"); + assert(llvm::isDigit(name[0]) && + "unregistered op only allows numeric indexing"); unsigned resultIndex; name.getAsInteger(/*Radix=*/10, resultIndex); IntegerAttr index = builder.getI32IntegerAttr(resultIndex); return builder.create(loc, genType(expr->getType()), - parentExprs[0], index); + parentExprs[0], index); } // Find the result with the member name or by index. diff --git a/mlir/lib/Tools/PDLL/ODS/Context.cpp b/mlir/lib/Tools/PDLL/ODS/Context.cpp index 00f7cb26b432d2..a3933c9c73b204 100644 --- a/mlir/lib/Tools/PDLL/ODS/Context.cpp +++ b/mlir/lib/Tools/PDLL/ODS/Context.cpp @@ -61,10 +61,12 @@ const Dialect *Context::lookupDialect(StringRef name) const { std::pair Context::insertOperation(StringRef name, StringRef summary, StringRef desc, + StringRef nativeClassName, bool supportsResultTypeInferrence, SMLoc loc) { std::pair dialectAndName = name.split('.'); return insertDialect(dialectAndName.first) - .insertOperation(name, summary, desc, supportsResultTypeInferrence, loc); + .insertOperation(name, summary, desc, nativeClassName, + supportsResultTypeInferrence, loc); } const Operation *Context::lookupOperation(StringRef name) const { diff --git a/mlir/lib/Tools/PDLL/ODS/Dialect.cpp b/mlir/lib/Tools/PDLL/ODS/Dialect.cpp index 2e084c5d6cfd64..b4654a6ad5b2eb 100644 --- a/mlir/lib/Tools/PDLL/ODS/Dialect.cpp +++ b/mlir/lib/Tools/PDLL/ODS/Dialect.cpp @@ -23,13 +23,14 @@ Dialect::~Dialect() = default; std::pair Dialect::insertOperation(StringRef name, StringRef summary, StringRef desc, + StringRef nativeClassName, bool supportsResultTypeInferrence, llvm::SMLoc loc) { std::unique_ptr &operation = operations[name]; if (operation) return std::make_pair(&*operation, /*wasInserted*/ false); - operation.reset( - new Operation(name, summary, desc, supportsResultTypeInferrence, loc)); + operation.reset(new Operation(name, summary, desc, nativeClassName, + supportsResultTypeInferrence, loc)); return std::make_pair(&*operation, /*wasInserted*/ true); } diff --git a/mlir/lib/Tools/PDLL/ODS/Operation.cpp b/mlir/lib/Tools/PDLL/ODS/Operation.cpp index 3b8a3a9e973333..c991c33be7d229 100644 --- a/mlir/lib/Tools/PDLL/ODS/Operation.cpp +++ b/mlir/lib/Tools/PDLL/ODS/Operation.cpp @@ -18,8 +18,10 @@ using namespace mlir::pdll::ods; //===----------------------------------------------------------------------===// Operation::Operation(StringRef name, StringRef summary, StringRef desc, - bool supportsTypeInferrence, llvm::SMLoc loc) + StringRef nativeClassName, bool supportsTypeInferrence, + llvm::SMLoc loc) : name(name.str()), summary(summary.str()), + nativeClassName(nativeClassName.str()), supportsTypeInferrence(supportsTypeInferrence), location(loc, llvm::SMLoc::getFromPointer(loc.getPointer() + 1)) { llvm::raw_string_ostream descOS(description); diff --git a/mlir/lib/Tools/PDLL/Parser/Parser.cpp b/mlir/lib/Tools/PDLL/Parser/Parser.cpp index e7ec5a047b9fc1..9cd3d8caf40bf5 100644 --- a/mlir/lib/Tools/PDLL/Parser/Parser.cpp +++ b/mlir/lib/Tools/PDLL/Parser/Parser.cpp @@ -142,11 +142,13 @@ class Parser { template ast::Decl *createODSNativePDLLConstraintDecl(StringRef name, StringRef codeBlock, SMRange loc, - ast::Type type); + ast::Type type, + StringRef nativeType); template ast::Decl * createODSNativePDLLConstraintDecl(const tblgen::Constraint &constraint, - SMRange loc, ast::Type type); + SMRange loc, ast::Type type, + StringRef nativeType); //===--------------------------------------------------------------------===// // Decls @@ -610,8 +612,7 @@ LogicalResult Parser::convertExpressionTo( if (type == valueTy) { // If the operation is registered, we can verify if it can ever have a // single result. - Optional opName = exprOpType.getName(); - if (const ods::Operation *odsOp = lookupODSOperation(opName)) { + if (const ods::Operation *odsOp = exprOpType.getODSOperation()) { if (odsOp->getResults().empty()) { return emitConvertError()->attachNote( llvm::formatv("see the definition of `{0}`, which was defined " @@ -821,7 +822,8 @@ void Parser::processTdIncludeRecords(llvm::RecordKeeper &tdRecords, ods::Operation *odsOp = nullptr; std::tie(odsOp, inserted) = odsContext.insertOperation( op.getOperationName(), op.getSummary(), op.getDescription(), - supportsResultTypeInferrence, op.getLoc().front()); + op.getQualCppClassName(), supportsResultTypeInferrence, + op.getLoc().front()); // Ignore operations that have already been added. if (!inserted) @@ -846,19 +848,21 @@ void Parser::processTdIncludeRecords(llvm::RecordKeeper &tdRecords, /// Attr constraints. for (llvm::Record *def : tdRecords.getAllDerivedDefinitions("Attr")) { if (!def->isAnonymous() && !curDeclScope->lookup(def->getName())) { + tblgen::Attribute constraint(def); decls.push_back( createODSNativePDLLConstraintDecl( - tblgen::AttrConstraint(def), - convertLocToRange(def->getLoc().front()), attrTy)); + constraint, convertLocToRange(def->getLoc().front()), attrTy, + constraint.getStorageType())); } } /// Type constraints. for (llvm::Record *def : tdRecords.getAllDerivedDefinitions("Type")) { if (!def->isAnonymous() && !curDeclScope->lookup(def->getName())) { + tblgen::TypeConstraint constraint(def); decls.push_back( createODSNativePDLLConstraintDecl( - tblgen::TypeConstraint(def), - convertLocToRange(def->getLoc().front()), typeTy)); + constraint, convertLocToRange(def->getLoc().front()), typeTy, + constraint.getCPPClassName())); } } /// Interfaces. @@ -870,24 +874,26 @@ void Parser::processTdIncludeRecords(llvm::RecordKeeper &tdRecords, continue; SMRange loc = convertLocToRange(def->getLoc().front()); - StringRef className = def->getValueAsString("cppClassName"); - StringRef cppNamespace = def->getValueAsString("cppNamespace"); + std::string cppClassName = + llvm::formatv("{0}::{1}", def->getValueAsString("cppNamespace"), + def->getValueAsString("cppClassName")) + .str(); std::string codeBlock = - llvm::formatv("return ::mlir::success(llvm::isa<{0}::{1}>(self));", - cppNamespace, className) + llvm::formatv("return ::mlir::success(llvm::isa<{0}>(self));", + cppClassName) .str(); if (def->isSubClassOf("OpInterface")) { decls.push_back(createODSNativePDLLConstraintDecl( - name, codeBlock, loc, opTy)); + name, codeBlock, loc, opTy, cppClassName)); } else if (def->isSubClassOf("AttrInterface")) { decls.push_back( createODSNativePDLLConstraintDecl( - name, codeBlock, loc, attrTy)); + name, codeBlock, loc, attrTy, cppClassName)); } else if (def->isSubClassOf("TypeInterface")) { decls.push_back( createODSNativePDLLConstraintDecl( - name, codeBlock, loc, typeTy)); + name, codeBlock, loc, typeTy, cppClassName)); } } } @@ -895,7 +901,8 @@ void Parser::processTdIncludeRecords(llvm::RecordKeeper &tdRecords, template ast::Decl * Parser::createODSNativePDLLConstraintDecl(StringRef name, StringRef codeBlock, - SMRange loc, ast::Type type) { + SMRange loc, ast::Type type, + StringRef nativeType) { // Build the single input parameter. ast::DeclScope *argScope = pushDeclScope(); auto *paramVar = ast::VariableDecl::create( @@ -907,7 +914,7 @@ Parser::createODSNativePDLLConstraintDecl(StringRef name, StringRef codeBlock, // Build the native constraint. auto *constraintDecl = ast::UserConstraintDecl::createNative( ctx, ast::Name::create(ctx, name, loc), paramVar, - /*results=*/llvm::None, codeBlock, ast::TupleType::get(ctx)); + /*results=*/llvm::None, codeBlock, ast::TupleType::get(ctx), nativeType); curDeclScope->add(constraintDecl); return constraintDecl; } @@ -915,7 +922,8 @@ Parser::createODSNativePDLLConstraintDecl(StringRef name, StringRef codeBlock, template ast::Decl * Parser::createODSNativePDLLConstraintDecl(const tblgen::Constraint &constraint, - SMRange loc, ast::Type type) { + SMRange loc, ast::Type type, + StringRef nativeType) { // Format the condition template. tblgen::FmtContext fmtContext; fmtContext.withSelf("self"); @@ -924,7 +932,7 @@ Parser::createODSNativePDLLConstraintDecl(const tblgen::Constraint &constraint, &fmtContext); return createODSNativePDLLConstraintDecl( - constraint.getUniqueDefName(), codeBlock, loc, type); + constraint.getUniqueDefName(), codeBlock, loc, type, nativeType); } //===----------------------------------------------------------------------===// @@ -2534,7 +2542,8 @@ LogicalResult Parser::validateVariableConstraint(const ast::ConstraintRef &ref, constraintType = ast::AttributeType::get(ctx); } else if (const auto *cst = dyn_cast(ref.constraint)) { - constraintType = ast::OperationType::get(ctx, cst->getName()); + constraintType = ast::OperationType::get( + ctx, cst->getName(), lookupODSOperation(cst->getName())); } else if (isa(ref.constraint)) { constraintType = typeTy; } else if (isa(ref.constraint)) { @@ -2710,7 +2719,7 @@ FailureOr Parser::validateMemberAccess(ast::Expr *parentExpr, return valueRangeTy; // Verify member access based on the operation type. - if (const ods::Operation *odsOp = lookupODSOperation(opType.getName())) { + if (const ods::Operation *odsOp = opType.getODSOperation()) { auto results = odsOp->getResults(); // Handle indexed results. @@ -2792,7 +2801,7 @@ FailureOr Parser::createOperationExpr( checkOperationResultTypeInferrence(loc, *opNameRef, odsOp); } - return ast::OperationExpr::create(ctx, loc, name, operands, results, + return ast::OperationExpr::create(ctx, loc, odsOp, name, operands, results, attributes); } diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp index 215a0e67452419..cbce9f8ab0cd5a 100644 --- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp +++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp @@ -651,9 +651,7 @@ class LSPCodeCompleteContext : public CodeCompleteContext { } void codeCompleteOperationMemberAccess(ast::OperationType opType) final { - Optional opName = opType.getName(); - const ods::Operation *odsOp = - opName ? odsContext.lookupOperation(*opName) : nullptr; + const ods::Operation *odsOp = opType.getODSOperation(); if (!odsOp) return; diff --git a/mlir/test/mlir-pdll/CodeGen/CPP/general.pdll b/mlir/test/mlir-pdll/CodeGen/CPP/general.pdll index 802958a3872f22..4dae177f10993f 100644 --- a/mlir/test/mlir-pdll/CodeGen/CPP/general.pdll +++ b/mlir/test/mlir-pdll/CodeGen/CPP/general.pdll @@ -44,43 +44,22 @@ Pattern => erase op; // Check the generation of native constraints and rewrites. // CHECK: static ::mlir::LogicalResult TestCstPDLFn(::mlir::PatternRewriter &rewriter, -// CHECK-SAME: ::llvm::ArrayRef<::mlir::PDLValue> values) { -// CHECK: ::mlir::Attribute attr = {}; -// CHECK: if (values[0]) -// CHECK: attr = values[0].cast<::mlir::Attribute>(); -// CHECK: ::mlir::Operation * op = {}; -// CHECK: if (values[1]) -// CHECK: op = values[1].cast<::mlir::Operation *>(); -// CHECK: ::mlir::Type type = {}; -// CHECK: if (values[2]) -// CHECK: type = values[2].cast<::mlir::Type>(); -// CHECK: ::mlir::Value value = {}; -// CHECK: if (values[3]) -// CHECK: value = values[3].cast<::mlir::Value>(); -// CHECK: ::mlir::TypeRange typeRange = {}; -// CHECK: if (values[4]) -// CHECK: typeRange = values[4].cast<::mlir::TypeRange>(); -// CHECK: ::mlir::ValueRange valueRange = {}; -// CHECK: if (values[5]) -// CHECK: valueRange = values[5].cast<::mlir::ValueRange>(); - -// CHECK: return success(); +// CHECK-SAME: ::mlir::Attribute attr, ::mlir::Operation * op, ::mlir::Type type, +// CHECK-SAME: ::mlir::Value value, ::mlir::TypeRange typeRange, ::mlir::ValueRange valueRange) { +// CHECK-NEXT: return success(); // CHECK: } // CHECK-NOT: TestUnusedCst -// CHECK: static void TestRewritePDLFn(::mlir::PatternRewriter &rewriter, ::mlir::PDLResultList &results, -// CHECK-SAME: ::llvm::ArrayRef<::mlir::PDLValue> values) { -// CHECK: ::mlir::Attribute attr = {}; -// CHECK: ::mlir::Operation * op = {}; -// CHECK: ::mlir::Type type = {}; -// CHECK: ::mlir::Value value = {}; -// CHECK: ::mlir::TypeRange typeRange = {}; -// CHECK: ::mlir::ValueRange valueRange = {}; - +// CHECK: static void TestRewritePDLFn(::mlir::PatternRewriter &rewriter, +// CHECK-SAME: ::mlir::Attribute attr, ::mlir::Operation * op, ::mlir::Type type, +// CHECK-SAME: ::mlir::Value value, ::mlir::TypeRange typeRange, ::mlir::ValueRange valueRange) { // CHECK: foo; // CHECK: } +// CHECK: static ::mlir::Attribute TestRewriteSinglePDLFn(::mlir::PatternRewriter &rewriter) { +// CHECK: std::tuple<::mlir::Attribute, ::mlir::Type> TestRewriteTuplePDLFn(::mlir::PatternRewriter &rewriter) { + // CHECK-NOT: TestUnusedRewrite // CHECK: struct TestCstAndRewrite : ::mlir::PDLPatternModule { @@ -93,6 +72,8 @@ Constraint TestCst(attr: Attr, op: Op, type: Type, value: Value, typeRange: Type Constraint TestUnusedCst() [{ return success(); }]; Rewrite TestRewrite(attr: Attr, op: Op, type: Type, value: Value, typeRange: TypeRange, valueRange: ValueRange) [{ foo; }]; +Rewrite TestRewriteSingle() -> Attr [{}]; +Rewrite TestRewriteTuple() -> (Attr, Type) [{}]; Rewrite TestUnusedRewrite(op: Op) [{}]; Pattern TestCstAndRewrite { @@ -100,6 +81,8 @@ Pattern TestCstAndRewrite { TestCst(attr<"true">, root, type, operand, types, operands); rewrite root with { TestRewrite(attr<"true">, root, type, operand, types, operands); + TestRewriteSingle(); + TestRewriteTuple(); erase root; }; } From 5d5aba78dbbee75508f01bcaa69aedb2ab79065a Mon Sep 17 00:00:00 2001 From: Xiang1 Zhang Date: Mon, 30 May 2022 08:37:36 +0800 Subject: [PATCH 868/908] [X86][NFC] Refine X86 Domain Reassignment for compiling time Differential Revision: https://reviews.llvm.org/D126622 --- llvm/lib/Target/X86/X86DomainReassignment.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 70a7f8edf6e3d6..9d4338deca3523 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -15,6 +15,7 @@ #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/STLExtras.h" @@ -374,7 +375,7 @@ class X86DomainReassignment : public MachineFunctionPass { const X86InstrInfo *TII = nullptr; /// All edges that are included in some closure - DenseSet EnclosedEdges; + BitVector EnclosedEdges{8, false}; /// All instructions that are included in some closure. DenseMap EnclosedInstrs; @@ -429,10 +430,10 @@ char X86DomainReassignment::ID = 0; void X86DomainReassignment::visitRegister(Closure &C, Register Reg, RegDomain &Domain, SmallVectorImpl &Worklist) { - if (EnclosedEdges.count(Reg)) + if (!Reg.isVirtual()) return; - if (!Reg.isVirtual()) + if (EnclosedEdges.test(Register::virtReg2Index(Reg))) return; if (!MRI->hasOneDef(Reg)) @@ -550,7 +551,7 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { // Register already in this closure. if (!C.insertEdge(CurReg)) continue; - EnclosedEdges.insert(Reg); + EnclosedEdges.set(Register::virtReg2Index(Reg)); MachineInstr *DefMI = MRI->getVRegDef(CurReg); encloseInstr(C, DefMI); @@ -742,6 +743,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; EnclosedEdges.clear(); + EnclosedEdges.resize(MRI->getNumVirtRegs()); EnclosedInstrs.clear(); std::vector Closures; @@ -756,7 +758,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { continue; // Register already in closure. - if (EnclosedEdges.count(Reg)) + if (EnclosedEdges.test(Idx)) continue; // Calculate closure starting with Reg. From 563cc3fda9a2a35582d274e1d2d66687ecf2fc77 Mon Sep 17 00:00:00 2001 From: "Zi Xuan Wu (Zeson)" Date: Fri, 20 May 2022 11:59:17 +0800 Subject: [PATCH 869/908] [Clang][CSKY] Add support about CSKYABIInfo According to the CSKY ABIv2 document, https://github.com/c-sky/csky-doc/blob/master/C-SKY_V2_CPU_Applications_Binary_Interface_Standards_Manual.pdf construct the ABIInfo to handle argument passing and return of clang data type. It also includes how to emit and expand VAArg intrinsic. Differential Revision: https://reviews.llvm.org/D126451 --- clang/lib/CodeGen/TargetInfo.cpp | 167 ++++++++++ clang/test/CodeGen/CSKY/csky-abi.c | 347 +++++++++++++++++++++ clang/test/CodeGen/CSKY/csky-hard-abi.c | 394 +++++++++++++++++++++++ clang/test/CodeGen/CSKY/csky-soft-abi.c | 395 ++++++++++++++++++++++++ 4 files changed, 1303 insertions(+) create mode 100644 clang/test/CodeGen/CSKY/csky-abi.c create mode 100644 clang/test/CodeGen/CSKY/csky-hard-abi.c create mode 100644 clang/test/CodeGen/CSKY/csky-soft-abi.c diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index ecbb3505bb91cf..4b7b301594d778 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -11325,6 +11325,165 @@ class VETargetCodeGenInfo : public TargetCodeGenInfo { }; } // end anonymous namespace +//===----------------------------------------------------------------------===// +// CSKY ABI Implementation +//===----------------------------------------------------------------------===// +namespace { +class CSKYABIInfo : public DefaultABIInfo { + static const int NumArgGPRs = 4; + static const int NumArgFPRs = 4; + + static const unsigned XLen = 32; + unsigned FLen; + +public: + CSKYABIInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen) + : DefaultABIInfo(CGT), FLen(FLen) {} + + void computeInfo(CGFunctionInfo &FI) const override; + ABIArgInfo classifyArgumentType(QualType Ty, int &ArgGPRsLeft, + int &ArgFPRsLeft, + bool isReturnType = false) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +} // end anonymous namespace + +void CSKYABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; + + // We must track the number of GPRs used in order to conform to the CSKY + // ABI, as integer scalars passed in registers should have signext/zeroext + // when promoted. + int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; + int ArgFPRsLeft = FLen ? NumArgFPRs : 0; + + for (auto &ArgInfo : FI.arguments()) { + ArgInfo.info = classifyArgumentType(ArgInfo.type, ArgGPRsLeft, ArgFPRsLeft); + } +} + +Address CSKYABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), + getVAListElementType(CGF), SlotSize); + Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); + return Addr; + } + + auto TInfo = getContext().getTypeInfoInChars(Ty); + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, false, TInfo, SlotSize, + /*AllowHigherAlign=*/true); +} + +ABIArgInfo CSKYABIInfo::classifyArgumentType(QualType Ty, int &ArgGPRsLeft, + int &ArgFPRsLeft, + bool isReturnType) const { + assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (ArgGPRsLeft) + ArgGPRsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + if (!Ty->getAsUnionType()) + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + uint64_t Size = getContext().getTypeSize(Ty); + // Pass floating point values via FPRs if possible. + if (Ty->isFloatingType() && !Ty->isComplexType() && FLen >= Size && + ArgFPRsLeft) { + ArgFPRsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the hard float ABI must be passed direct rather than + // using CoerceAndExpand. + if (Ty->isComplexType() && FLen && !isReturnType) { + QualType EltTy = Ty->castAs()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FLen) { + ArgFPRsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (!isAggregateTypeForABI(Ty)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to XLen width, unless passed on the + // stack. + if (Size < XLen && Ty->isIntegralOrEnumerationType()) + return ABIArgInfo::getExtend(Ty); + + if (const auto *EIT = Ty->getAs()) { + if (EIT->getNumBits() < XLen) + return ABIArgInfo::getExtend(Ty); + } + + return ABIArgInfo::getDirect(); + } + + // For argument type, the first 4*XLen parts of aggregate will be passed + // in registers, and the rest will be passed in stack. + // So we can coerce to integers directly and let backend handle it correctly. + // For return type, aggregate which <= 2*XLen will be returned in registers. + // Otherwise, aggregate will be returned indirectly. + if (!isReturnType || (isReturnType && Size <= 2 * XLen)) { + if (Size <= XLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), XLen)); + } else { + return ABIArgInfo::getDirect(llvm::ArrayType::get( + llvm::IntegerType::get(getVMContext(), XLen), (Size + 31) / XLen)); + } + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo CSKYABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + int ArgGPRsLeft = 2; + int ArgFPRsLeft = FLen ? 1 : 0; + + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + return classifyArgumentType(RetTy, ArgGPRsLeft, ArgFPRsLeft, true); +} + +namespace { +class CSKYTargetCodeGenInfo : public TargetCodeGenInfo { +public: + CSKYTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen) + : TargetCodeGenInfo(std::make_unique(CGT, FLen)) {} +}; +} // end anonymous namespace + //===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// @@ -11545,6 +11704,14 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new SPIRVTargetCodeGenInfo(Types)); case llvm::Triple::ve: return SetCGInfo(new VETargetCodeGenInfo(Types)); + case llvm::Triple::csky: { + bool IsSoftFloat = !getTarget().hasFeature("hard-float-abi"); + bool hasFP64 = getTarget().hasFeature("fpuv2_df") || + getTarget().hasFeature("fpuv3_df"); + return SetCGInfo(new CSKYTargetCodeGenInfo(Types, IsSoftFloat ? 0 + : hasFP64 ? 64 + : 32)); + } } } diff --git a/clang/test/CodeGen/CSKY/csky-abi.c b/clang/test/CodeGen/CSKY/csky-abi.c new file mode 100644 index 00000000000000..b32d637d171549 --- /dev/null +++ b/clang/test/CodeGen/CSKY/csky-abi.c @@ -0,0 +1,347 @@ +// RUN: %clang_cc1 -no-opaque-pointers -triple csky -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -triple csky -target-feature +fpuv2_df -target-feature +fpuv2_sf \ +// RUN: -target-feature +hard-float -target-feature +hard-float-abi -emit-llvm %s -o - | FileCheck %s + +// This file contains test cases that will have the same output for the hard-float +// and soft-float ABIs. + +#include +#include + +// CHECK-LABEL: define{{.*}} void @f_void() +void f_void(void) {} + +// Scalar arguments and return values smaller than the word size are extended +// according to the sign of their type, up to 32 bits + +// CHECK-LABEL: define{{.*}} zeroext i1 @f_scalar_0(i1 noundef zeroext %x) +_Bool f_scalar_0(_Bool x) { return x; } + +// CHECK-LABEL: define{{.*}} signext i8 @f_scalar_1(i8 noundef signext %x) +int8_t f_scalar_1(int8_t x) { return x; } + +// CHECK-LABEL: define{{.*}} zeroext i8 @f_scalar_2(i8 noundef zeroext %x) +uint8_t f_scalar_2(uint8_t x) { return x; } + +// CHECK-LABEL: define{{.*}} i32 @f_scalar_3(i32 noundef %x) +int32_t f_scalar_3(int32_t x) { return x; } + +// CHECK-LABEL: define{{.*}} i64 @f_scalar_4(i64 noundef %x) +int64_t f_scalar_4(int64_t x) { return x; } + +// CHECK-LABEL: define{{.*}} float @f_fp_scalar_1(float noundef %x) +float f_fp_scalar_1(float x) { return x; } + +// CHECK-LABEL: define{{.*}} double @f_fp_scalar_2(double noundef %x) +double f_fp_scalar_2(double x) { return x; } + +// CHECK-LABEL: define{{.*}} double @f_fp_scalar_3(double noundef %x) +long double f_fp_scalar_3(long double x) { return x; } + +// Empty structs or unions are ignored. + +struct empty_s {}; + +// CHECK-LABEL: define{{.*}} void @f_agg_empty_struct() +struct empty_s f_agg_empty_struct(struct empty_s x) { + return x; +} + +union empty_u {}; + +// CHECK-LABEL: define{{.*}} void @f_agg_empty_union() +union empty_u f_agg_empty_union(union empty_u x) { + return x; +} + +// Aggregates <= 4*xlen may be passed in registers, so will be coerced to +// integer arguments. The rules for return are <= 2*xlen. + +struct tiny { + uint8_t a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} void @f_agg_tiny(i32 %x.coerce) +void f_agg_tiny(struct tiny x) { + x.a += x.b; + x.c += x.d; +} + +// CHECK-LABEL: define{{.*}} i32 @f_agg_tiny_ret() +struct tiny f_agg_tiny_ret(void) { + return (struct tiny){1, 2, 3, 4}; +} + +struct small { + int32_t a, *b; +}; + +// CHECK-LABEL: define{{.*}} void @f_agg_small([2 x i32] %x.coerce) +void f_agg_small(struct small x) { + x.a += *x.b; + x.b = &x.a; +} + +// CHECK-LABEL: define{{.*}} [2 x i32] @f_agg_small_ret() +struct small f_agg_small_ret(void) { + return (struct small){1, 0}; +} + +struct small_aligned { + int64_t a; +}; + +// CHECK-LABEL: define{{.*}} void @f_agg_small_aligned(i64 %x.coerce) +void f_agg_small_aligned(struct small_aligned x) { + x.a += x.a; +} + +// CHECK-LABEL: define{{.*}} i64 @f_agg_small_aligned_ret(i64 %x.coerce) +struct small_aligned f_agg_small_aligned_ret(struct small_aligned x) { + return (struct small_aligned){10}; +} + +// For argument type, the first 4*XLen parts of aggregate will be passed +// in registers, and the rest will be passed in stack. +// So we can coerce to integers directly and let backend handle it correctly. +// For return type, aggregate which <= 2*XLen will be returned in registers. +// Otherwise, aggregate will be returned indirectly. +struct large { + int32_t a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} void @f_agg_large([4 x i32] %x.coerce) +void f_agg_large(struct large x) { + x.a = x.b + x.c + x.d; +} + +// The address where the struct should be written to will be the first +// argument +// CHECK-LABEL: define{{.*}} void @f_agg_large_ret(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 noundef %i, i8 noundef signext %j) +struct large f_agg_large_ret(int32_t i, int8_t j) { + return (struct large){1, 2, 3, 4}; +} + +typedef unsigned char v16i8 __attribute__((vector_size(16))); + +// CHECK-LABEL: define{{.*}} void @f_vec_large_v16i8(<16 x i8> noundef %x) +void f_vec_large_v16i8(v16i8 x) { + x[0] = x[7]; +} + +// CHECK-LABEL: define{{.*}} <16 x i8> @f_vec_large_v16i8_ret() +v16i8 f_vec_large_v16i8_ret(void) { + return (v16i8){1, 2, 3, 4, 5, 6, 7, 8}; +} + +// CHECK-LABEL: define{{.*}} i32 @f_scalar_stack_1(i32 %a.coerce, [2 x i32] %b.coerce, i64 %c.coerce, [4 x i32] %d.coerce, i8 noundef zeroext %e, i8 noundef signext %f, i8 noundef zeroext %g, i8 noundef signext %h) +int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c, + struct large d, uint8_t e, int8_t f, uint8_t g, int8_t h) { + return g + h; +} + +// Ensure that scalars passed on the stack are still determined correctly in +// the presence of large return values that consume a register due to the need +// to pass a pointer. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_2(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 noundef %a, i64 noundef %b, i64 noundef %c, double noundef %d, i8 noundef zeroext %e, i8 noundef signext %f, i8 noundef zeroext %g) +struct large f_scalar_stack_2(int32_t a, int64_t b, int64_t c, long double d, + uint8_t e, int8_t f, uint8_t g) { + return (struct large){a, e, f, g}; +} + +// CHECK-LABEL: define{{.*}} double @f_scalar_stack_4(i32 noundef %a, i64 noundef %b, i64 noundef %c, double noundef %d, i8 noundef zeroext %e, i8 noundef signext %f, i8 noundef zeroext %g) +long double f_scalar_stack_4(int32_t a, int64_t b, int64_t c, long double d, + uint8_t e, int8_t f, uint8_t g) { + return d; +} + +// Aggregates should be coerced integer arrary. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_5(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 noundef %e, i64 noundef %f, float noundef %g, double noundef %h, double noundef %i) +void f_scalar_stack_5(double a, int64_t b, double c, int64_t d, int e, + int64_t f, float g, double h, long double i) {} + +// CHECK-LABEL: define{{.*}} void @f_agg_stack(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, [4 x i32] %h.coerce) +void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e, + struct small f, struct small_aligned g, struct large h) {} + +// Ensure that ABI lowering happens as expected for vararg calls. For CSKY +// with the base integer calling convention there will be no observable +// differences in the lowered IR for a call with varargs vs without. + +int f_va_callee(int, ...); + +// CHECK-LABEL: define{{.*}} void @f_va_caller() +// CHECK: call i32 (i32, ...) @f_va_callee(i32 noundef 1, i32 noundef 2, i64 noundef 3, double noundef 4.000000e+00, double noundef 5.000000e+00, i32 {{%.*}}, [2 x i32] {{%.*}}, i64 {{%.*}}, [4 x i32] {{%.*}}) +void f_va_caller(void) { + f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct tiny){6, 7, 8, 9}, + (struct small){10, NULL}, (struct small_aligned){11}, + (struct large){12, 13, 14, 15}); +} + +// CHECK-LABEL: define{{.*}} i32 @f_va_1(i8* noundef %fmt, ...) {{.*}} { +// CHECK: [[FMT_ADDR:%.*]] = alloca i8*, align 4 +// CHECK: [[VA:%.*]] = alloca i8*, align 4 +// CHECK: [[V:%.*]] = alloca i32, align 4 +// CHECK: store i8* %fmt, i8** [[FMT_ADDR]], align 4 +// CHECK: [[VA1:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK: call void @llvm.va_start(i8* [[VA1]]) +// CHECK: [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 4 +// CHECK: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4 +// CHECK: [[TMP0:%.*]] = bitcast i8* [[ARGP_CUR]] to i32* +// CHECK: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK: store i32 [[TMP1]], i32* [[V]], align 4 +// CHECK: [[VA2:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK: call void @llvm.va_end(i8* [[VA2]]) +// CHECK: [[TMP2:%.*]] = load i32, i32* [[V]], align 4 +// CHECK: ret i32 [[TMP2]] +// CHECK: } +int f_va_1(char *fmt, ...) { + __builtin_va_list va; + + __builtin_va_start(va, fmt); + int v = __builtin_va_arg(va, int); + __builtin_va_end(va); + + return v; +} + +// CHECK-LABEL: @f_va_2( +// CHECK: [[FMT_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[VA:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[V:%.*]] = alloca double, align 4 +// CHECK-NEXT: store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 4 +// CHECK-NEXT: [[VA1:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_start(i8* [[VA1]]) +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARGP_CUR]] to double* +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 4 +// CHECK-NEXT: store double [[TMP4]], double* [[V]], align 4 +// CHECK-NEXT: [[VA2:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_end(i8* [[VA2]]) +// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[V]], align 4 +// CHECK-NEXT: ret double [[TMP5]] +double f_va_2(char *fmt, ...) { + __builtin_va_list va; + + __builtin_va_start(va, fmt); + double v = __builtin_va_arg(va, double); + __builtin_va_end(va); + + return v; +} + +// CHECK-LABEL: @f_va_3( +// CHECK: [[FMT_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[VA:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[V:%.*]] = alloca double, align 4 +// CHECK-NEXT: [[W:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[X:%.*]] = alloca double, align 4 +// CHECK-NEXT: store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 4 +// CHECK-NEXT: [[VA1:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_start(i8* [[VA1]]) +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARGP_CUR]] to double* +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 4 +// CHECK-NEXT: store double [[TMP4]], double* [[V]], align 4 +// CHECK-NEXT: [[ARGP_CUR2:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT3:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR2]], i32 4 +// CHECK-NEXT: store i8* [[ARGP_NEXT3]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[ARGP_CUR2]] to i32* +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], i32* [[W]], align 4 +// CHECK-NEXT: [[ARGP_CUR4:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT5:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR4]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT5]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[ARGP_CUR4]] to double* +// CHECK-NEXT: [[TMP11:%.*]] = load double, double* [[TMP10]], align 4 +// CHECK-NEXT: store double [[TMP11]], double* [[X]], align 4 +// CHECK-NEXT: [[VA6:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_end(i8* [[VA6]]) +// CHECK-NEXT: [[TMP12:%.*]] = load double, double* [[V]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load double, double* [[X]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]] +// CHECK-NEXT: ret double [[ADD]] +double f_va_3(char *fmt, ...) { + __builtin_va_list va; + + __builtin_va_start(va, fmt); + double v = __builtin_va_arg(va, double); + int w = __builtin_va_arg(va, int); + double x = __builtin_va_arg(va, double); + __builtin_va_end(va); + + return v + x; +} + +// CHECK-LABEL: define{{.*}} i32 @f_va_4(i8* noundef %fmt, ...) {{.*}} { +// CHECK: [[FMT_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[VA:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LD:%.*]] = alloca double, align 4 +// CHECK-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 +// CHECK-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 +// CHECK-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 +// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 4 +// CHECK-NEXT: [[VA1:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_start(i8* [[VA1]]) +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARGP_CUR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], i32* [[V]], align 4 +// CHECK-NEXT: [[ARGP_CUR2:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT3:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR2]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT3]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[ARGP_CUR2]] to double* +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[TMP2]], align 4 +// CHECK-NEXT: store double [[TMP4]], double* [[LD]], align 4 +// CHECK-NEXT: [[ARGP_CUR4:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT5:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR4]], i32 4 +// CHECK-NEXT: store i8* [[ARGP_NEXT5]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[ARGP_CUR4]] to %struct.tiny* +// CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.tiny* [[TS]] to i8* +// CHECK-NEXT: [[TMP7:%.*]] = bitcast %struct.tiny* [[TMP5]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[TMP6]], i8* align 4 [[TMP7]], i32 4, i1 false) +// CHECK-NEXT: [[ARGP_CUR6:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT7:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR6]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT7]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[ARGP_CUR6]] to %struct.small* +// CHECK-NEXT: [[TMP9:%.*]] = bitcast %struct.small* [[SS]] to i8* +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct.small* [[TMP8]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK-NEXT: [[ARGP_CUR8:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT9:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR8]], i32 16 +// CHECK-NEXT: store i8* [[ARGP_NEXT9]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[ARGP_CUR8]] to %struct.large* +// CHECK-NEXT: [[TMP13:%.*]] = bitcast %struct.large* [[LS]] to i8* +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.large* [[TMP11]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 16, i1 false) +// CHECK-NEXT: [[VA10:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_end(i8* [[VA10]]) +int f_va_4(char *fmt, ...) { + __builtin_va_list va; + + __builtin_va_start(va, fmt); + int v = __builtin_va_arg(va, int); + long double ld = __builtin_va_arg(va, long double); + struct tiny ts = __builtin_va_arg(va, struct tiny); + struct small ss = __builtin_va_arg(va, struct small); + struct large ls = __builtin_va_arg(va, struct large); + __builtin_va_end(va); + + int ret = (int)((long double)v + ld); + ret = ret + ts.a + ts.b + ts.c + ts.d; + ret = ret + ss.a + (int)ss.b; + ret = ret + ls.a + ls.b + ls.c + ls.d; + + return ret; +} diff --git a/clang/test/CodeGen/CSKY/csky-hard-abi.c b/clang/test/CodeGen/CSKY/csky-hard-abi.c new file mode 100644 index 00000000000000..d5ed00e4a07558 --- /dev/null +++ b/clang/test/CodeGen/CSKY/csky-hard-abi.c @@ -0,0 +1,394 @@ +// RUN: %clang_cc1 -no-opaque-pointers -triple csky -target-feature +fpuv2_sf -target-feature +fpuv2_df -target-feature +hard-float-abi -target-feature +hard-float -emit-llvm %s -o - | FileCheck %s + +#include + +// Verify that the tracking of used GPRs and FPRs works correctly by checking +// that small integers are sign/zero extended when passed in registers. + +// Doubles are passed in FPRs, so argument 'i' will be passed zero-extended +// because it will be passed in a GPR. + +// CHECK: define{{.*}} void @f_fpr_tracking(double noundef %a, double noundef %b, double noundef %c, double noundef %d, i8 noundef zeroext %i) +void f_fpr_tracking(double a, double b, double c, double d, uint8_t i) {} + +// A struct containing just one floating-point real is passed as though it +// were a standalone floating-point real. +struct double_s { + double f; +}; + +// CHECK: define{{.*}} void @f_double_s_arg(double %a.coerce) +void f_double_s_arg(struct double_s a) {} + +// CHECK: define{{.*}} double @f_ret_double_s() +struct double_s f_ret_double_s(void) { + return (struct double_s){1.0}; +} + +// A struct containing a double and any number of zero-width bitfields is +// passed as though it were a standalone floating-point real. + +struct zbf_double_s { + int : 0; + double f; +}; +struct zbf_double_zbf_s { + int : 0; + double f; + int : 0; +}; + +// CHECK: define{{.*}} void @f_zbf_double_s_arg(double %a.coerce) +void f_zbf_double_s_arg(struct zbf_double_s a) {} + +// CHECK: define{{.*}} double @f_ret_zbf_double_s() +struct zbf_double_s f_ret_zbf_double_s(void) { + return (struct zbf_double_s){1.0}; +} + +// CHECK: define{{.*}} void @f_zbf_double_zbf_s_arg(double %a.coerce) +void f_zbf_double_zbf_s_arg(struct zbf_double_zbf_s a) {} + +// CHECK: define{{.*}} double @f_ret_zbf_double_zbf_s() +struct zbf_double_zbf_s f_ret_zbf_double_zbf_s(void) { + return (struct zbf_double_zbf_s){1.0}; +} + +// For argument type, the first 4*XLen parts of aggregate will be passed +// in registers, and the rest will be passed in stack. +// So we can coerce to integers directly and let backend handle it correctly. +// For return type, aggregate which <= 2*XLen will be returned in registers. +// Otherwise, aggregate will be returned indirectly. + +struct double_double_s { + double f; + double g; +}; +struct double_float_s { + double f; + float g; +}; + +// CHECK: define{{.*}} void @f_double_double_s_arg([4 x i32] %a.coerce) +void f_double_double_s_arg(struct double_double_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_double_s(%struct.double_double_s* noalias sret(%struct.double_double_s) align 4 %agg.result) +struct double_double_s f_ret_double_double_s(void) { + return (struct double_double_s){1.0, 2.0}; +} + +// CHECK: define{{.*}} void @f_double_float_s_arg([3 x i32] %a.coerce) +void f_double_float_s_arg(struct double_float_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_float_s(%struct.double_float_s* noalias sret(%struct.double_float_s) align 4 %agg.result) +struct double_float_s f_ret_double_float_s(void) { + return (struct double_float_s){1.0, 2.0}; +} + +// CHECK: define{{.*}} void @f_double_double_s_arg_insufficient_fprs(float noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double noundef %f, double noundef %g, double noundef %i, [4 x i32] %h.coerce) +void f_double_double_s_arg_insufficient_fprs(float a, double b, double c, double d, + double e, double f, double g, double i, struct double_double_s h) {} + +struct double_int8_s { + double f; + int8_t i; +}; +struct double_uint8_s { + double f; + uint8_t i; +}; +struct double_int32_s { + double f; + int32_t i; +}; +struct double_int64_s { + double f; + int64_t i; +}; +struct double_int64bf_s { + double f; + int64_t i : 32; +}; +struct double_int8_zbf_s { + double f; + int8_t i; + int : 0; +}; + +// CHECK: define{{.*}} @f_double_int8_s_arg([3 x i32] %a.coerce) +void f_double_int8_s_arg(struct double_int8_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int8_s(%struct.double_int8_s* noalias sret(%struct.double_int8_s) align 4 %agg.result) +struct double_int8_s f_ret_double_int8_s(void) { + return (struct double_int8_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_uint8_s_arg([3 x i32] %a.coerce) +void f_double_uint8_s_arg(struct double_uint8_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_uint8_s(%struct.double_uint8_s* noalias sret(%struct.double_uint8_s) align 4 %agg.result) +struct double_uint8_s f_ret_double_uint8_s(void) { + return (struct double_uint8_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int32_s_arg([3 x i32] %a.coerce) +void f_double_int32_s_arg(struct double_int32_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int32_s(%struct.double_int32_s* noalias sret(%struct.double_int32_s) align 4 %agg.result) +struct double_int32_s f_ret_double_int32_s(void) { + return (struct double_int32_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int64_s_arg([4 x i32] %a.coerce) +void f_double_int64_s_arg(struct double_int64_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int64_s(%struct.double_int64_s* noalias sret(%struct.double_int64_s) align 4 %agg.result) +struct double_int64_s f_ret_double_int64_s(void) { + return (struct double_int64_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int64bf_s_arg([3 x i32] %a.coerce) +void f_double_int64bf_s_arg(struct double_int64bf_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int64bf_s(%struct.double_int64bf_s* noalias sret(%struct.double_int64bf_s) align 4 %agg.result) +struct double_int64bf_s f_ret_double_int64bf_s(void) { + return (struct double_int64bf_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int8_zbf_s([3 x i32] %a.coerce) +void f_double_int8_zbf_s(struct double_int8_zbf_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int8_zbf_s(%struct.double_int8_zbf_s* noalias sret(%struct.double_int8_zbf_s) align 4 %agg.result) +struct double_int8_zbf_s f_ret_double_int8_zbf_s(void) { + return (struct double_int8_zbf_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int8_s_arg_insufficient_gprs(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, i32 noundef %h, [3 x i32] %i.coerce) +void f_double_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e, + int f, int g, int h, struct double_int8_s i) {} + +// CHECK: define{{.*}} void @f_struct_double_int8_insufficient_fprs(float noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double noundef %f, double noundef %g, double noundef %h, [3 x i32] %i.coerce) +void f_struct_double_int8_insufficient_fprs(float a, double b, double c, double d, + double e, double f, double g, double h, struct double_int8_s i) {} + +// Complex floating-point values are special in passing argument, +// and it's not same as structs containing a single complex. +// Complex floating-point value should be passed in two consecutive fprs. +// But the return process is same as struct. + +// CHECK: define{{.*}} void @f_doublecomplex(double noundef %a.coerce0, double noundef %a.coerce1) +void f_doublecomplex(double __complex__ a) {} + +// CHECK: define{{.*}} void @f_ret_doublecomplex({ double, double }* noalias sret({ double, double }) align 4 %agg.result) +double __complex__ f_ret_doublecomplex(void) { + return 1.0; +} + +struct doublecomplex_s { + double __complex__ c; +}; + +// CHECK: define{{.*}} void @f_doublecomplex_s_arg([4 x i32] %a.coerce) +void f_doublecomplex_s_arg(struct doublecomplex_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublecomplex_s(%struct.doublecomplex_s* noalias sret(%struct.doublecomplex_s) align 4 %agg.result) +struct doublecomplex_s f_ret_doublecomplex_s(void) { + return (struct doublecomplex_s){1.0}; +} + +// Test single or two-element structs that need flattening. e.g. those +// containing nested structs, doubles in small arrays, zero-length structs etc. + +struct doublearr1_s { + double a[1]; +}; + +// CHECK: define{{.*}} void @f_doublearr1_s_arg(double %a.coerce) +void f_doublearr1_s_arg(struct doublearr1_s a) {} + +// CHECK: define{{.*}} double @f_ret_doublearr1_s() +struct doublearr1_s f_ret_doublearr1_s(void) { + return (struct doublearr1_s){{1.0}}; +} + +struct doublearr2_s { + double a[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_s_arg([4 x i32] %a.coerce) +void f_doublearr2_s_arg(struct doublearr2_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_s(%struct.doublearr2_s* noalias sret(%struct.doublearr2_s) align 4 %agg.result) +struct doublearr2_s f_ret_doublearr2_s(void) { + return (struct doublearr2_s){{1.0, 2.0}}; +} + +struct doublearr2_tricky1_s { + struct { + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky1_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky1_s_arg(struct doublearr2_tricky1_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky1_s(%struct.doublearr2_tricky1_s* noalias sret(%struct.doublearr2_tricky1_s) align 4 %agg.result) +struct doublearr2_tricky1_s f_ret_doublearr2_tricky1_s(void) { + return (struct doublearr2_tricky1_s){{{{1.0}}, {{2.0}}}}; +} + +struct doublearr2_tricky2_s { + struct {}; + struct { + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky2_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky2_s_arg(struct doublearr2_tricky2_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky2_s(%struct.doublearr2_tricky2_s* noalias sret(%struct.doublearr2_tricky2_s) align 4 %agg.result) +struct doublearr2_tricky2_s f_ret_doublearr2_tricky2_s(void) { + return (struct doublearr2_tricky2_s){{}, {{{1.0}}, {{2.0}}}}; +} + +struct doublearr2_tricky3_s { + union {}; + struct { + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky3_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky3_s_arg(struct doublearr2_tricky3_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky3_s(%struct.doublearr2_tricky3_s* noalias sret(%struct.doublearr2_tricky3_s) align 4 %agg.result) +struct doublearr2_tricky3_s f_ret_doublearr2_tricky3_s(void) { + return (struct doublearr2_tricky3_s){{}, {{{1.0}}, {{2.0}}}}; +} + +struct doublearr2_tricky4_s { + union {}; + struct { + struct {}; + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky4_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky4_s_arg(struct doublearr2_tricky4_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky4_s(%struct.doublearr2_tricky4_s* noalias sret(%struct.doublearr2_tricky4_s) align 4 %agg.result) +struct doublearr2_tricky4_s f_ret_doublearr2_tricky4_s(void) { + return (struct doublearr2_tricky4_s){{}, {{{}, {1.0}}, {{}, {2.0}}}}; +} + +struct int_double_int_s { + int a; + double b; + int c; +}; + +// CHECK: define{{.*}} void @f_int_double_int_s_arg([4 x i32] %a.coerce) +void f_int_double_int_s_arg(struct int_double_int_s a) {} + +// CHECK: define{{.*}} void @f_ret_int_double_int_s(%struct.int_double_int_s* noalias sret(%struct.int_double_int_s) align 4 %agg.result) +struct int_double_int_s f_ret_int_double_int_s(void) { + return (struct int_double_int_s){1, 2.0, 3}; +} + +struct int64_double_s { + int64_t a; + double b; +}; + +// CHECK: define{{.*}} void @f_int64_double_s_arg([4 x i32] %a.coerce) +void f_int64_double_s_arg(struct int64_double_s a) {} + +// CHECK: define{{.*}} void @f_ret_int64_double_s(%struct.int64_double_s* noalias sret(%struct.int64_double_s) align 4 %agg.result) +struct int64_double_s f_ret_int64_double_s(void) { + return (struct int64_double_s){1, 2.0}; +} + +struct char_char_double_s { + char a; + char b; + double c; +}; + +// CHECK-LABEL: define{{.*}} void @f_char_char_double_s_arg([3 x i32] %a.coerce) +void f_char_char_double_s_arg(struct char_char_double_s a) {} + +// CHECK: define{{.*}} void @f_ret_char_char_double_s(%struct.char_char_double_s* noalias sret(%struct.char_char_double_s) align 4 %agg.result) +struct char_char_double_s f_ret_char_char_double_s(void) { + return (struct char_char_double_s){1, 2, 3.0}; +} + +// A union containing just one floating-point real can not be passed as though it +// were a standalone floating-point real. +union double_u { + double a; +}; + +// CHECK: define{{.*}} void @f_double_u_arg([2 x i32] %a.coerce) +void f_double_u_arg(union double_u a) {} + +// CHECK: define{{.*}} [2 x i32] @f_ret_double_u() +union double_u f_ret_double_u(void) { + return (union double_u){1.0}; +} + +// CHECK: define{{.*}} void @f_ret_double_int32_s_double_int32_s_just_sufficient_gprs(%struct.double_int32_s* noalias sret(%struct.double_int32_s) align 4 %agg.result, i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, [3 x i32] %h.coerce) +struct double_int32_s f_ret_double_int32_s_double_int32_s_just_sufficient_gprs( + int a, int b, int c, int d, int e, int f, int g, struct double_int32_s h) { + return (struct double_int32_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_ret_double_double_s_double_int32_s_just_sufficient_gprs(%struct.double_double_s* noalias sret(%struct.double_double_s) align 4 %agg.result, i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, [3 x i32] %h.coerce) +struct double_double_s f_ret_double_double_s_double_int32_s_just_sufficient_gprs( + int a, int b, int c, int d, int e, int f, int g, struct double_int32_s h) { + return (struct double_double_s){1.0, 2.0}; +} + +// CHECK: define{{.*}} void @f_ret_doublecomplex_double_int32_s_just_sufficient_gprs({ double, double }* noalias sret({ double, double }) align 4 %agg.result, i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, [3 x i32] %h.coerce) +double __complex__ f_ret_doublecomplex_double_int32_s_just_sufficient_gprs( + int a, int b, int c, int d, int e, int f, int g, struct double_int32_s h) { + return 1.0; +} + +struct tiny { + uint8_t a, b, c, d; +}; + +struct small { + int32_t a, *b; +}; + +struct small_aligned { + int64_t a; +}; + +struct large { + int32_t a, b, c, d; +}; + +// Ensure that scalars passed on the stack are still determined correctly in +// the presence of large return values that consume a register due to the need +// to pass a pointer. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_2(%struct.large* noalias sret(%struct.large) align 4 %agg.result, float noundef %a, i64 noundef %b, double noundef %c, double noundef %d, i8 noundef zeroext %e, i8 noundef signext %f, i8 noundef zeroext %g) +struct large f_scalar_stack_2(float a, int64_t b, double c, long double d, + uint8_t e, int8_t f, uint8_t g) { + return (struct large){a, e, f, g}; +} + +// Aggregates and >=XLen scalars passed on the stack should be lowered just as +// they would be if passed via registers. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_3(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 noundef %e, i64 noundef %f, float noundef %g, double noundef %h, double noundef %i) +void f_scalar_stack_3(double a, int64_t b, double c, int64_t d, int e, + int64_t f, float g, double h, long double i) {} + +// CHECK-LABEL: define{{.*}} void @f_agg_stack(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, [4 x i32] %h.coerce) +void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e, + struct small f, struct small_aligned g, struct large h) {} diff --git a/clang/test/CodeGen/CSKY/csky-soft-abi.c b/clang/test/CodeGen/CSKY/csky-soft-abi.c new file mode 100644 index 00000000000000..b50feee32aa5a3 --- /dev/null +++ b/clang/test/CodeGen/CSKY/csky-soft-abi.c @@ -0,0 +1,395 @@ +// RUN: %clang_cc1 -no-opaque-pointers -triple csky -target-feature +fpuv2_sf -target-feature +fpuv2_df -target-feature +hard-float -emit-llvm %s -o - | FileCheck %s + +#include + +// Verify that the tracking of used GPRs and FPRs works correctly by checking +// that small integers are sign/zero extended when passed in registers. + +// Doubles are passed in FPRs, so argument 'i' will be passed zero-extended +// because it will be passed in a GPR. + +// CHECK: define{{.*}} void @f_fpr_tracking(double noundef %a, double noundef %b, double noundef %c, double noundef %d, i8 noundef zeroext %i) +void f_fpr_tracking(double a, double b, double c, double d, uint8_t i) {} + +// A struct containing just one floating-point real is passed as though it +// were a standalone floating-point real. +struct double_s { + double f; +}; + +// CHECK: define{{.*}} void @f_double_s_arg(double %a.coerce) +void f_double_s_arg(struct double_s a) {} + +// CHECK: define{{.*}} double @f_ret_double_s() +struct double_s f_ret_double_s(void) { + return (struct double_s){1.0}; +} + +// A struct containing a double and any number of zero-width bitfields is +// passed as though it were a standalone floating-point real. + +struct zbf_double_s { + int : 0; + double f; +}; +struct zbf_double_zbf_s { + int : 0; + double f; + int : 0; +}; + +// CHECK: define{{.*}} void @f_zbf_double_s_arg(double %a.coerce) +void f_zbf_double_s_arg(struct zbf_double_s a) {} + +// CHECK: define{{.*}} double @f_ret_zbf_double_s() +struct zbf_double_s f_ret_zbf_double_s(void) { + return (struct zbf_double_s){1.0}; +} + +// CHECK: define{{.*}} void @f_zbf_double_zbf_s_arg(double %a.coerce) +void f_zbf_double_zbf_s_arg(struct zbf_double_zbf_s a) {} + +// CHECK: define{{.*}} double @f_ret_zbf_double_zbf_s() +struct zbf_double_zbf_s f_ret_zbf_double_zbf_s(void) { + return (struct zbf_double_zbf_s){1.0}; +} + +// For argument type, the first 4*XLen parts of aggregate will be passed +// in registers, and the rest will be passed in stack. +// So we can coerce to integers directly and let backend handle it correctly. +// For return type, aggregate which <= 2*XLen will be returned in registers. +// Otherwise, aggregate will be returned indirectly. + +struct double_double_s { + double f; + double g; +}; +struct double_float_s { + double f; + float g; +}; + +// CHECK: define{{.*}} void @f_double_double_s_arg([4 x i32] %a.coerce) +void f_double_double_s_arg(struct double_double_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_double_s(%struct.double_double_s* noalias sret(%struct.double_double_s) align 4 %agg.result) +struct double_double_s f_ret_double_double_s(void) { + return (struct double_double_s){1.0, 2.0}; +} + +// CHECK: define{{.*}} void @f_double_float_s_arg([3 x i32] %a.coerce) +void f_double_float_s_arg(struct double_float_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_float_s(%struct.double_float_s* noalias sret(%struct.double_float_s) align 4 %agg.result) +struct double_float_s f_ret_double_float_s(void) { + return (struct double_float_s){1.0, 2.0}; +} + +// CHECK: define{{.*}} void @f_double_double_s_arg_insufficient_fprs(float noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double noundef %f, double noundef %g, double noundef %i, [4 x i32] %h.coerce) +void f_double_double_s_arg_insufficient_fprs(float a, double b, double c, double d, + double e, double f, double g, double i, struct double_double_s h) {} + +struct double_int8_s { + double f; + int8_t i; +}; +struct double_uint8_s { + double f; + uint8_t i; +}; +struct double_int32_s { + double f; + int32_t i; +}; +struct double_int64_s { + double f; + int64_t i; +}; +struct double_int64bf_s { + double f; + int64_t i : 32; +}; +struct double_int8_zbf_s { + double f; + int8_t i; + int : 0; +}; + +// CHECK: define{{.*}} @f_double_int8_s_arg([3 x i32] %a.coerce) +void f_double_int8_s_arg(struct double_int8_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int8_s(%struct.double_int8_s* noalias sret(%struct.double_int8_s) align 4 %agg.result) +struct double_int8_s f_ret_double_int8_s(void) { + return (struct double_int8_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_uint8_s_arg([3 x i32] %a.coerce) +void f_double_uint8_s_arg(struct double_uint8_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_uint8_s(%struct.double_uint8_s* noalias sret(%struct.double_uint8_s) align 4 %agg.result) +struct double_uint8_s f_ret_double_uint8_s(void) { + return (struct double_uint8_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int32_s_arg([3 x i32] %a.coerce) +void f_double_int32_s_arg(struct double_int32_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int32_s(%struct.double_int32_s* noalias sret(%struct.double_int32_s) align 4 %agg.result) +struct double_int32_s f_ret_double_int32_s(void) { + return (struct double_int32_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int64_s_arg([4 x i32] %a.coerce) +void f_double_int64_s_arg(struct double_int64_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int64_s(%struct.double_int64_s* noalias sret(%struct.double_int64_s) align 4 %agg.result) +struct double_int64_s f_ret_double_int64_s(void) { + return (struct double_int64_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int64bf_s_arg([3 x i32] %a.coerce) +void f_double_int64bf_s_arg(struct double_int64bf_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int64bf_s(%struct.double_int64bf_s* noalias sret(%struct.double_int64bf_s) align 4 %agg.result) +struct double_int64bf_s f_ret_double_int64bf_s(void) { + return (struct double_int64bf_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int8_zbf_s([3 x i32] %a.coerce) +void f_double_int8_zbf_s(struct double_int8_zbf_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int8_zbf_s(%struct.double_int8_zbf_s* noalias sret(%struct.double_int8_zbf_s) align 4 %agg.result) +struct double_int8_zbf_s f_ret_double_int8_zbf_s(void) { + return (struct double_int8_zbf_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int8_s_arg_insufficient_gprs(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, i32 noundef %h, [3 x i32] %i.coerce) +void f_double_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e, + int f, int g, int h, struct double_int8_s i) {} + +// CHECK: define{{.*}} void @f_struct_double_int8_insufficient_fprs(float noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double noundef %f, double noundef %g, double noundef %h, [3 x i32] %i.coerce) +void f_struct_double_int8_insufficient_fprs(float a, double b, double c, double d, + double e, double f, double g, double h, struct double_int8_s i) {} + +// Complex floating-point values are special in passing argument, +// and it's not same as structs containing a single complex. +// Complex floating-point value should be passed in two consecutive fprs. +// But the return process is same as struct. + +// But now we test in soft-float, it's coerced and passing in gprs. +// CHECK: define{{.*}} void @f_doublecomplex([4 x i32] noundef %a.coerce) +void f_doublecomplex(double __complex__ a) {} + +// CHECK: define{{.*}} void @f_ret_doublecomplex({ double, double }* noalias sret({ double, double }) align 4 %agg.result) +double __complex__ f_ret_doublecomplex(void) { + return 1.0; +} + +struct doublecomplex_s { + double __complex__ c; +}; + +// CHECK: define{{.*}} void @f_doublecomplex_s_arg([4 x i32] %a.coerce) +void f_doublecomplex_s_arg(struct doublecomplex_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublecomplex_s(%struct.doublecomplex_s* noalias sret(%struct.doublecomplex_s) align 4 %agg.result) +struct doublecomplex_s f_ret_doublecomplex_s(void) { + return (struct doublecomplex_s){1.0}; +} + +// Test single or two-element structs that need flattening. e.g. those +// containing nested structs, doubles in small arrays, zero-length structs etc. + +struct doublearr1_s { + double a[1]; +}; + +// CHECK: define{{.*}} void @f_doublearr1_s_arg(double %a.coerce) +void f_doublearr1_s_arg(struct doublearr1_s a) {} + +// CHECK: define{{.*}} double @f_ret_doublearr1_s() +struct doublearr1_s f_ret_doublearr1_s(void) { + return (struct doublearr1_s){{1.0}}; +} + +struct doublearr2_s { + double a[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_s_arg([4 x i32] %a.coerce) +void f_doublearr2_s_arg(struct doublearr2_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_s(%struct.doublearr2_s* noalias sret(%struct.doublearr2_s) align 4 %agg.result) +struct doublearr2_s f_ret_doublearr2_s(void) { + return (struct doublearr2_s){{1.0, 2.0}}; +} + +struct doublearr2_tricky1_s { + struct { + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky1_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky1_s_arg(struct doublearr2_tricky1_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky1_s(%struct.doublearr2_tricky1_s* noalias sret(%struct.doublearr2_tricky1_s) align 4 %agg.result) +struct doublearr2_tricky1_s f_ret_doublearr2_tricky1_s(void) { + return (struct doublearr2_tricky1_s){{{{1.0}}, {{2.0}}}}; +} + +struct doublearr2_tricky2_s { + struct {}; + struct { + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky2_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky2_s_arg(struct doublearr2_tricky2_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky2_s(%struct.doublearr2_tricky2_s* noalias sret(%struct.doublearr2_tricky2_s) align 4 %agg.result) +struct doublearr2_tricky2_s f_ret_doublearr2_tricky2_s(void) { + return (struct doublearr2_tricky2_s){{}, {{{1.0}}, {{2.0}}}}; +} + +struct doublearr2_tricky3_s { + union {}; + struct { + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky3_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky3_s_arg(struct doublearr2_tricky3_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky3_s(%struct.doublearr2_tricky3_s* noalias sret(%struct.doublearr2_tricky3_s) align 4 %agg.result) +struct doublearr2_tricky3_s f_ret_doublearr2_tricky3_s(void) { + return (struct doublearr2_tricky3_s){{}, {{{1.0}}, {{2.0}}}}; +} + +struct doublearr2_tricky4_s { + union {}; + struct { + struct {}; + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky4_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky4_s_arg(struct doublearr2_tricky4_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky4_s(%struct.doublearr2_tricky4_s* noalias sret(%struct.doublearr2_tricky4_s) align 4 %agg.result) +struct doublearr2_tricky4_s f_ret_doublearr2_tricky4_s(void) { + return (struct doublearr2_tricky4_s){{}, {{{}, {1.0}}, {{}, {2.0}}}}; +} + +struct int_double_int_s { + int a; + double b; + int c; +}; + +// CHECK: define{{.*}} void @f_int_double_int_s_arg([4 x i32] %a.coerce) +void f_int_double_int_s_arg(struct int_double_int_s a) {} + +// CHECK: define{{.*}} void @f_ret_int_double_int_s(%struct.int_double_int_s* noalias sret(%struct.int_double_int_s) align 4 %agg.result) +struct int_double_int_s f_ret_int_double_int_s(void) { + return (struct int_double_int_s){1, 2.0, 3}; +} + +struct int64_double_s { + int64_t a; + double b; +}; + +// CHECK: define{{.*}} void @f_int64_double_s_arg([4 x i32] %a.coerce) +void f_int64_double_s_arg(struct int64_double_s a) {} + +// CHECK: define{{.*}} void @f_ret_int64_double_s(%struct.int64_double_s* noalias sret(%struct.int64_double_s) align 4 %agg.result) +struct int64_double_s f_ret_int64_double_s(void) { + return (struct int64_double_s){1, 2.0}; +} + +struct char_char_double_s { + char a; + char b; + double c; +}; + +// CHECK-LABEL: define{{.*}} void @f_char_char_double_s_arg([3 x i32] %a.coerce) +void f_char_char_double_s_arg(struct char_char_double_s a) {} + +// CHECK: define{{.*}} void @f_ret_char_char_double_s(%struct.char_char_double_s* noalias sret(%struct.char_char_double_s) align 4 %agg.result) +struct char_char_double_s f_ret_char_char_double_s(void) { + return (struct char_char_double_s){1, 2, 3.0}; +} + +// A union containing just one floating-point real can not be passed as though it +// were a standalone floating-point real. +union double_u { + double a; +}; + +// CHECK: define{{.*}} void @f_double_u_arg([2 x i32] %a.coerce) +void f_double_u_arg(union double_u a) {} + +// CHECK: define{{.*}} [2 x i32] @f_ret_double_u() +union double_u f_ret_double_u(void) { + return (union double_u){1.0}; +} + +// CHECK: define{{.*}} void @f_ret_double_int32_s_double_int32_s_just_sufficient_gprs(%struct.double_int32_s* noalias sret(%struct.double_int32_s) align 4 %agg.result, i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, [3 x i32] %h.coerce) +struct double_int32_s f_ret_double_int32_s_double_int32_s_just_sufficient_gprs( + int a, int b, int c, int d, int e, int f, int g, struct double_int32_s h) { + return (struct double_int32_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_ret_double_double_s_double_int32_s_just_sufficient_gprs(%struct.double_double_s* noalias sret(%struct.double_double_s) align 4 %agg.result, i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, [3 x i32] %h.coerce) +struct double_double_s f_ret_double_double_s_double_int32_s_just_sufficient_gprs( + int a, int b, int c, int d, int e, int f, int g, struct double_int32_s h) { + return (struct double_double_s){1.0, 2.0}; +} + +// CHECK: define{{.*}} void @f_ret_doublecomplex_double_int32_s_just_sufficient_gprs({ double, double }* noalias sret({ double, double }) align 4 %agg.result, i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, [3 x i32] %h.coerce) +double __complex__ f_ret_doublecomplex_double_int32_s_just_sufficient_gprs( + int a, int b, int c, int d, int e, int f, int g, struct double_int32_s h) { + return 1.0; +} + +struct tiny { + uint8_t a, b, c, d; +}; + +struct small { + int32_t a, *b; +}; + +struct small_aligned { + int64_t a; +}; + +struct large { + int32_t a, b, c, d; +}; + +// Ensure that scalars passed on the stack are still determined correctly in +// the presence of large return values that consume a register due to the need +// to pass a pointer. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_2(%struct.large* noalias sret(%struct.large) align 4 %agg.result, float noundef %a, i64 noundef %b, double noundef %c, double noundef %d, i8 noundef zeroext %e, i8 noundef signext %f, i8 noundef zeroext %g) +struct large f_scalar_stack_2(float a, int64_t b, double c, long double d, + uint8_t e, int8_t f, uint8_t g) { + return (struct large){a, e, f, g}; +} + +// Aggregates and >=XLen scalars passed on the stack should be lowered just as +// they would be if passed via registers. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_3(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 noundef %e, i64 noundef %f, float noundef %g, double noundef %h, double noundef %i) +void f_scalar_stack_3(double a, int64_t b, double c, int64_t d, int e, + int64_t f, float g, double h, long double i) {} + +// CHECK-LABEL: define{{.*}} void @f_agg_stack(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, [4 x i32] %h.coerce) +void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e, + struct small f, struct small_aligned g, struct large h) {} From 575e297fcb289f0a9b0ac4b01d1d0fa051f5cc29 Mon Sep 17 00:00:00 2001 From: Vassil Vassilev Date: Tue, 31 May 2022 06:24:24 +0000 Subject: [PATCH 870/908] Revert "[clang-repl] Recover the lookup tables of the primary context." This reverts commit 5ff27fe1ff03d5aeaf8567c97618170f0cef8f58. This patch caused failures in asan: https://lab.llvm.org/buildbot/#/builders/5/builds/24221 --- clang/lib/Interpreter/IncrementalParser.cpp | 2 +- clang/test/Interpreter/execute.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp index 87687ea6906d7a..0f1ef3233a2a1b 100644 --- a/clang/lib/Interpreter/IncrementalParser.cpp +++ b/clang/lib/Interpreter/IncrementalParser.cpp @@ -188,7 +188,7 @@ IncrementalParser::ParseOrWrapTopLevelDecl() { S.TUScope->setEntity(PreviousTU); // Clean up the lookup table - if (StoredDeclsMap *Map = PreviousTU->getPrimaryContext()->getLookupPtr()) { + if (StoredDeclsMap *Map = PreviousTU->getLookupPtr()) { for (auto I = Map->begin(); I != Map->end(); ++I) { StoredDeclsList &List = I->second; DeclContextLookupResult R = List.getLookupResult(); diff --git a/clang/test/Interpreter/execute.cpp b/clang/test/Interpreter/execute.cpp index 5f3fe3e837d9a6..298046c068c37f 100644 --- a/clang/test/Interpreter/execute.cpp +++ b/clang/test/Interpreter/execute.cpp @@ -1,4 +1,3 @@ -// RUN: clang-repl "int x = 10;" "int y=7; err;" "int y = 10;" // RUN: clang-repl "int i = 10;" 'extern "C" int printf(const char*,...);' \ // RUN: 'auto r1 = printf("i = %d\n", i);' | FileCheck --check-prefix=CHECK-DRIVER %s // REQUIRES: host-supports-jit From 11fb1aa5a40885188b014b3ccd326cc92e4a3b9e Mon Sep 17 00:00:00 2001 From: Diana Picus Date: Wed, 25 May 2022 12:51:10 +0000 Subject: [PATCH 871/908] [flang] Upstream the lowering of the while loop Upstream the code for handling while loops from the fir-dev branch at https://github.com/flang-compiler/f18-llvm-project/tree/fir-dev/ Also add tests. The while loop is lowered to a header block that checks the loop condition and branches either to the exit block or to the body of the loop. The body of the loop will unconditionally branch back to the header. Differential Revision: https://reviews.llvm.org/D126636 Co-authored-by: Eric Schweitz Co-authored-by: V Donaldson --- flang/lib/Lower/Bridge.cpp | 22 ++++-- flang/test/Lower/mixed_loops.f90 | 118 +++++++++++++++++++++++++++++++ flang/test/Lower/while_loop.f90 | 91 ++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 5 deletions(-) create mode 100644 flang/test/Lower/mixed_loops.f90 create mode 100644 flang/test/Lower/while_loop.f90 diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 28dbc8b8b0b4a2..a33ddee64e1985 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -981,8 +981,16 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; IncrementLoopNestInfo incrementLoopNestInfo; - if (const auto *bounds = std::get_if( - &loopControl->u)) { + const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; + if ((whileCondition = std::get_if( + &loopControl->u))) { + assert(unstructuredContext && "while loop must be unstructured"); + maybeStartBlock(preheaderBlock); // no block or empty block + startBlock(headerBlock); + genFIRConditionalBranch(*whileCondition, bodyBlock, exitBlock); + } else if (const auto *bounds = + std::get_if( + &loopControl->u)) { // Non-concurrent increment loop. IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( *bounds->name.thing.symbol, bounds->lower, bounds->upper, @@ -999,15 +1007,19 @@ class FirConverter : public Fortran::lower::AbstractConverter { // Increment loop begin code. (TODO: Infinite/while code was already // generated.) - genFIRIncrementLoopBegin(incrementLoopNestInfo); + if (!whileCondition) + genFIRIncrementLoopBegin(incrementLoopNestInfo); // Loop body code - NonLabelDoStmt and EndDoStmt code is generated here. // Their genFIR calls are nops except for block management in some cases. for (Fortran::lower::pft::Evaluation &e : eval.getNestedEvaluations()) genFIR(e, unstructuredContext); - // Loop end code. (TODO: infinite/while loop) - genFIRIncrementLoopEnd(incrementLoopNestInfo); + // Loop end code. (TODO: infinite loop) + if (whileCondition) + genFIRBranch(headerBlock); + else + genFIRIncrementLoopEnd(incrementLoopNestInfo); } /// Generate FIR to begin a structured or unstructured increment loop nest. diff --git a/flang/test/Lower/mixed_loops.f90 b/flang/test/Lower/mixed_loops.f90 new file mode 100644 index 00000000000000..3135f8a86a2ce6 --- /dev/null +++ b/flang/test/Lower/mixed_loops.f90 @@ -0,0 +1,118 @@ +! RUN: bbc -emit-fir -o - %s | FileCheck %s +! RUN: %flang_fc1 -emit-fir -o - %s | FileCheck %s + +! Test while loop inside do loop. +! CHECK-LABEL: while_inside_do_loop +subroutine while_inside_do_loop + ! CHECK-DAG: %[[T_REF:.*]] = fir.alloca i32 + ! CHECK-DAG: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFwhile_inside_do_loopEi"} + ! CHECK-DAG: %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFwhile_inside_do_loopEj"} + integer :: i, j + + ! CHECK-DAG: %[[C1:.*]] = arith.constant 1 : i32 + ! CHECK-DAG: %[[C8:.*]] = arith.constant 8 : i32 + ! CHECK-DAG: %[[C13:.*]] = arith.constant 13 : i32 + ! CHECK: %[[DIFF:.*]] = arith.subi %[[C13]], %[[C8]] : i32 + ! CHECK: %[[RANGE:.*]] = arith.addi %[[DIFF]], %[[C1]] : i32 + ! CHECK: %[[HIGH:.*]] = arith.divsi %[[RANGE]], %[[C1]] : i32 + ! CHECK: fir.store %[[HIGH]] to %[[T_REF]] : !fir.ref + ! CHECK: fir.store %[[C8]] to %[[I_REF]] : !fir.ref + + ! CHECK: br ^[[HDR1:.*]] + ! CHECK: ^[[HDR1]]: // 2 preds: ^{{.*}}, ^[[EXIT2:.*]] + ! CHECK-DAG: %[[T:.*]] = fir.load %[[T_REF]] : !fir.ref + ! CHECK-DAG: %[[C0:.*]] = arith.constant 0 : i32 + ! CHECK: %[[COND:.*]] = arith.cmpi sgt, %[[T]], %[[C0]] : i32 + ! CHECK: cond_br %[[COND]], ^[[BODY1:.*]], ^[[EXIT1:.*]] + do i=8,13 + ! CHECK: ^[[BODY1]]: // pred: ^[[HDR1]] + ! CHECK: %[[C3:.*]] = arith.constant 3 : i32 + ! CHECK: fir.store %[[C3]] to %[[J_REF]] : !fir.ref + j=3 + + ! CHECK: br ^[[HDR2:.*]] + ! CHECK: ^[[HDR2]]: // 2 preds: ^[[BODY1]], ^[[BODY2:.*]] + ! CHECK-DAG: %[[J:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK-DAG: %[[I:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: %[[COND2:.*]] = arith.cmpi slt, %[[J]], %[[I]] : i32 + ! CHECK: cond_br %[[COND2]], ^[[BODY2]], ^[[EXIT2]] + do while (j .lt. i) + ! CHECK: ^[[BODY2]]: // pred: ^[[HDR2]] + ! CHECK-DAG: %[[J2:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 + ! CHECK: %[[INC2:.*]] = arith.muli %[[C2]], %[[J2]] : i32 + ! CHECK: fir.store %[[INC2]] to %[[J_REF]] : !fir.ref + j=j*2 + ! CHECK: br ^[[HDR2]] + end do + + ! CHECK: ^[[EXIT2]]: // pred: ^[[HDR2]] + ! CHECK-DAG: %[[T2:.*]] = fir.load %[[T_REF]] : !fir.ref + ! CHECK-DAG: %[[C1_AGAIN:.*]] = arith.constant 1 : i32 + ! CHECK: %[[TDEC:.*]] = arith.subi %[[T2]], %[[C1_AGAIN]] : i32 + ! CHECK: fir.store %[[TDEC]] to %[[T_REF]] + ! CHECK: %[[I3:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: %[[IINC:.*]] = arith.addi %[[I3]], %[[C1]] : i32 + ! CHECK: fir.store %[[IINC]] to %[[I_REF]] : !fir.ref + ! CHECK: br ^[[HDR1]] + end do + + ! CHECK: ^[[EXIT1]]: // pred: ^[[HDR1]] + ! CHECK: %[[IPRINT:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[IPRINT]]) : (!fir.ref, i32) -> i1 + ! CHECK: %[[JPRINT:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[JPRINT]]) : (!fir.ref, i32) -> i1 + print *, i, j +end subroutine + +! Test do loop inside while loop. +! CHECK-LABEL: do_inside_while_loop +subroutine do_inside_while_loop + ! CHECK-DAG: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdo_inside_while_loopEi"} + ! CHECK-DAG: %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFdo_inside_while_loopEj"} + integer :: i, j + + ! CHECK: %[[C3:.*]] = arith.constant 3 : i32 + ! CHECK: fir.store %[[C3]] to %[[J_REF]] : !fir.ref + j=3 + + ! CHECK: br ^[[HDR1:.*]] + ! CHECK: ^[[HDR1]]: // 2 preds: ^{{.*}}, ^[[BODY1:.*]] + ! CHECK-DAG: %[[J:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK-DAG: %[[UL:.*]] = arith.constant 21 : i32 + ! CHECK: %[[COND:.*]] = arith.cmpi slt, %[[J]], %[[UL]] : i32 + ! CHECK: cond_br %[[COND]], ^[[BODY1]], ^[[EXIT1:.*]] + do while (j .lt. 21) + ! CHECK: ^[[BODY1]]: // pred: ^[[HDR1]] + + ! CHECK-DAG: %[[C8_I32:.*]] = arith.constant 8 : i32 + ! CHECK-DAG: %[[C8:.*]] = fir.convert %[[C8_I32]] : (i32) -> index + ! CHECK-DAG: %[[C13_I32:.*]] = arith.constant 13 : i32 + ! CHECK-DAG: %[[C13:.*]] = fir.convert %[[C13_I32]] : (i32) -> index + ! CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index + ! CHECK: %[[RESULT:.*]] = fir.do_loop %[[IDX:.*]] = %[[C8]] to %[[C13]] step %[[C1]] -> index { + ! CHECK: %[[I32:.*]] = fir.convert %[[IDX]] : (index) -> i32 + ! CHECK: fir.store %[[I32]] to %[[I_REF]] : !fir.ref + ! CHECK-DAG: %[[J2:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 + ! CHECK: %[[JINC:.*]] = arith.muli %[[C2]], %[[J2]] : i32 + ! CHECK: fir.store %[[JINC]] to %[[J_REF]] : !fir.ref + ! CHECK: %[[IINC:.*]] = arith.addi %[[IDX]], %[[C1]] : index + ! CHECK: fir.result %[[IINC]] : index + do i=8,13 + j=j*2 + + ! CHECK: %[[IFINAL:.*]] = fir.convert %[[RESULT]] : (index) -> i32 + ! CHECK: fir.store %[[IFINAL]] to %[[I_REF]] : !fir.ref + end do + + ! CHECK: br ^[[HDR1]] + end do + + ! CHECK: ^[[EXIT1]]: // pred: ^[[HDR1]] + ! CHECK: %[[IPRINT:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[IPRINT]]) : (!fir.ref, i32) -> i1 + ! CHECK: %[[JPRINT:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[JPRINT]]) : (!fir.ref, i32) -> i1 + print *, i, j +end subroutine diff --git a/flang/test/Lower/while_loop.f90 b/flang/test/Lower/while_loop.f90 new file mode 100644 index 00000000000000..8320b366ff004f --- /dev/null +++ b/flang/test/Lower/while_loop.f90 @@ -0,0 +1,91 @@ +! RUN: bbc -emit-fir -o - %s | FileCheck %s +! RUN: %flang_fc1 -emit-fir -o - %s | FileCheck %s + +! Test a simple while loop. +! CHECK-LABEL: simple_loop +subroutine simple_loop + ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_loopEi"} + integer :: i + + ! CHECK: %[[C5:.*]] = arith.constant 5 : i32 + ! CHECK: fir.store %[[C5]] to %[[I_REF]] + i = 5 + + ! CHECK: br ^[[BB1:.*]] + ! CHECK: ^[[BB1]]: // 2 preds: ^{{.*}}, ^[[BB2:.*]] + ! CHECK-DAG: %[[I:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK-DAG: %[[C1:.*]] = arith.constant 1 : i32 + ! CHECK: %[[COND:.*]] = arith.cmpi sgt, %[[I]], %[[C1]] : i32 + ! CHECK: cond_br %[[COND]], ^[[BB2]], ^[[BB3:.*]] + ! CHECK: ^[[BB2]]: // pred: ^[[BB1]] + ! CHECK-DAG: %[[I2:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 + ! CHECK: %[[INC:.*]] = arith.subi %[[I2]], %[[C2]] : i32 + ! CHECK: fir.store %[[INC]] to %[[I_REF]] : !fir.ref + ! CHECK: br ^[[BB1]] + do while (i .gt. 1) + i = i - 2 + end do + + ! CHECK: ^[[BB3]]: // pred: ^[[BB1]] + ! CHECK: %[[I3:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[I3]]) : (!fir.ref, i32) -> i1 + print *, i +end subroutine + +! Test 2 nested while loops. +! CHECK-LABEL: while_inside_while_loop +subroutine while_inside_while_loop + ! CHECK-DAG: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFwhile_inside_while_loopEi"} + ! CHECK-DAG: %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFwhile_inside_while_loopEj"} + integer :: i, j + + ! CHECK: %[[C13:.*]] = arith.constant 13 : i32 + ! CHECK: fir.store %[[C13]] to %[[I_REF]] + i = 13 + + ! CHECK: br ^[[HDR1:.*]] + ! CHECK: ^[[HDR1]]: // 2 preds: ^{{.*}}, ^[[EXIT2:.*]] + ! CHECK-DAG: %[[I:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK-DAG: %[[C8:.*]] = arith.constant 8 : i32 + ! CHECK: %[[COND:.*]] = arith.cmpi sgt, %[[I]], %[[C8]] : i32 + ! CHECK: cond_br %[[COND]], ^[[BODY1:.*]], ^[[EXIT1:.*]] + do while (i .gt. 8) + ! CHECK: ^[[BODY1]]: // pred: ^[[HDR1]] + ! CHECK-DAG: %[[I2:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK-DAG: %[[C5:.*]] = arith.constant 5 : i32 + ! CHECK: %[[INC:.*]] = arith.subi %[[I2]], %[[C5]] : i32 + ! CHECK: fir.store %[[INC]] to %[[I_REF]] : !fir.ref + i = i - 5 + + ! CHECK: %[[C3:.*]] = arith.constant 3 : i32 + ! CHECK: fir.store %[[C3]] to %[[J_REF]] + j = 3 + + ! CHECK: br ^[[HDR2:.*]] + ! CHECK: ^[[HDR2]]: // 2 preds: ^[[BODY1]], ^[[BODY2:.*]] + ! CHECK-DAG: %[[J:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK-DAG: %[[I3:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: %[[COND2:.*]] = arith.cmpi slt, %[[J]], %[[I3]] : i32 + ! CHECK: cond_br %[[COND2]], ^[[BODY2]], ^[[EXIT2]] + do while (j .lt. i) + ! CHECK: ^[[BODY2]]: // pred: ^[[HDR2]] + ! CHECK-DAG: %[[J2:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 + ! CHECK: %[[INC2:.*]] = arith.muli %[[C2]], %[[J2]] : i32 + ! CHECK: fir.store %[[INC2]] to %[[J_REF]] : !fir.ref + j = j * 2 + ! CHECK: br ^[[HDR2]] + end do + + ! CHECK: ^[[EXIT2]]: // pred: ^[[HDR2]] + ! CHECK: br ^[[HDR1]] + end do + + ! CHECK: ^[[EXIT1]]: // pred: ^[[HDR1]] + ! CHECK: %[[IPRINT:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[IPRINT]]) : (!fir.ref, i32) -> i1 + ! CHECK: %[[JPRINT:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[JPRINT]]) : (!fir.ref, i32) -> i1 + print *, i, j +end subroutine From d384a4c530623c73048da040210d44fea1167321 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 31 May 2022 09:14:06 +0100 Subject: [PATCH 872/908] [X86] Adjust vector test costs to match SoG (Issue #54889) znver1/2 models were incorrectly modelling the latency/throughput/uops and znver1 ymm variants also require double pumping. Now matches what I can decipher from the AMD SoG, Agner and instlatx64 numbers vs the llvm-exegesis report provided by @fabian-r --- llvm/lib/Target/X86/X86ScheduleZnver1.td | 8 ++-- llvm/lib/Target/X86/X86ScheduleZnver2.td | 8 ++-- .../llvm-mca/X86/Znver1/resources-avx1.s | 38 +++++++++---------- .../llvm-mca/X86/Znver1/resources-sse41.s | 4 +- .../llvm-mca/X86/Znver2/resources-avx1.s | 24 ++++++------ .../llvm-mca/X86/Znver2/resources-sse41.s | 4 +- 6 files changed, 43 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index bfa5a1447f6a74..aada3e0bd90636 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -334,8 +334,8 @@ defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -413,8 +413,8 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 7a98b943191d45..c47d235eab9b29 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -331,8 +331,8 @@ defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -408,8 +408,8 @@ defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s index 905fbe09fae13b..c6315d540106a8 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s @@ -1604,10 +1604,10 @@ vzeroupper # CHECK-NEXT: 1 8 0.50 * vpsubusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsubw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpsubw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vptest %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vptest %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %ymm1 +# CHECK-NEXT: 1 2 1.00 vptest %xmm0, %xmm1 +# CHECK-NEXT: 2 9 1.00 * vptest (%rax), %xmm1 +# CHECK-NEXT: 3 4 2.00 vptest %ymm0, %ymm1 +# CHECK-NEXT: 5 11 2.00 * vptest (%rax), %ymm1 # CHECK-NEXT: 1 1 0.25 vpunpckhbw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpunpckhdq %xmm0, %xmm1, %xmm2 @@ -1683,14 +1683,14 @@ vzeroupper # CHECK-NEXT: 1 10 0.50 * vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 0.50 vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 10 0.50 * vsubss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1 -# CHECK-NEXT: 1 1 1.00 vtestps %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vtestps %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1 +# CHECK-NEXT: 1 2 1.00 vtestpd %xmm0, %xmm1 +# CHECK-NEXT: 2 9 1.00 * vtestpd (%rax), %xmm1 +# CHECK-NEXT: 3 4 2.00 vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 5 11 2.00 * vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1 2 1.00 vtestps %xmm0, %xmm1 +# CHECK-NEXT: 2 9 1.00 * vtestps (%rax), %xmm1 +# CHECK-NEXT: 3 4 2.00 vtestps %ymm0, %ymm1 +# CHECK-NEXT: 5 11 2.00 * vtestps (%rax), %ymm1 # CHECK-NEXT: 2 3 1.00 vucomisd %xmm0, %xmm1 # CHECK-NEXT: 2 10 1.00 * vucomisd (%rax), %xmm1 # CHECK-NEXT: 2 3 1.00 vucomiss %xmm0, %xmm1 @@ -1738,7 +1738,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 175.00 175.00 - - - - - 142.58 175.08 210.25 523.08 - +# CHECK-NEXT: 175.00 175.00 - - - - - 142.58 181.08 216.25 523.08 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2318,8 +2318,8 @@ vzeroupper # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vptest %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vptest (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vptest %ymm0, %ymm1 -# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vptest (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vptest %ymm0, %ymm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vptest (%rax), %ymm1 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpunpckhbw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpunpckhdq %xmm0, %xmm1, %xmm2 @@ -2397,12 +2397,12 @@ vzeroupper # CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestpd %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestpd (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestpd (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vtestpd (%rax), %ymm1 # CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestps %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestps (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestps %ymm0, %ymm1 -# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestps (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vtestps %ymm0, %ymm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vtestps (%rax), %ymm1 # CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - vucomisd %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 1.00 - - vucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - vucomiss %xmm0, %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s index 6c0f87708798be..fb39f9471654f8 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s @@ -241,8 +241,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 11 1.00 * pmuldq (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 ptest %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * ptest (%rax), %xmm1 +# CHECK-NEXT: 1 2 1.00 ptest %xmm0, %xmm1 +# CHECK-NEXT: 2 9 1.00 * ptest (%rax), %xmm1 # CHECK-NEXT: 1 4 1.00 roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * roundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 roundps $1, %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s index 755a754a2f0a62..4fd1cfa64f82bd 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s @@ -1604,10 +1604,10 @@ vzeroupper # CHECK-NEXT: 1 8 0.33 * vpsubusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsubw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.33 * vpsubw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vptest %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vptest %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %ymm1 +# CHECK-NEXT: 1 3 1.00 vptest %xmm0, %xmm1 +# CHECK-NEXT: 2 10 1.00 * vptest (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 vptest %ymm0, %ymm1 +# CHECK-NEXT: 2 10 1.00 * vptest (%rax), %ymm1 # CHECK-NEXT: 1 1 0.25 vpunpckhbw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.33 * vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpunpckhdq %xmm0, %xmm1, %xmm2 @@ -1683,14 +1683,14 @@ vzeroupper # CHECK-NEXT: 1 10 0.50 * vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 0.50 vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 10 0.50 * vsubss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1 -# CHECK-NEXT: 1 1 1.00 vtestps %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vtestps %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1 +# CHECK-NEXT: 1 3 1.00 vtestpd %xmm0, %xmm1 +# CHECK-NEXT: 2 10 1.00 * vtestpd (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 2 10 1.00 * vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1 3 1.00 vtestps %xmm0, %xmm1 +# CHECK-NEXT: 2 10 1.00 * vtestps (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 vtestps %ymm0, %ymm1 +# CHECK-NEXT: 2 10 1.00 * vtestps (%rax), %ymm1 # CHECK-NEXT: 2 3 1.00 vucomisd %xmm0, %xmm1 # CHECK-NEXT: 2 10 1.00 * vucomisd (%rax), %xmm1 # CHECK-NEXT: 2 3 1.00 vucomiss %xmm0, %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s index fb26a436cf5c3f..d15850a3c1f500 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s @@ -241,8 +241,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 11 1.00 * pmuldq (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 ptest %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * ptest (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 ptest %xmm0, %xmm1 +# CHECK-NEXT: 2 10 1.00 * ptest (%rax), %xmm1 # CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 1 10 1.00 * roundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2 From d86108802490f66eaa8a9075da777609c1539abd Mon Sep 17 00:00:00 2001 From: Mikhail Goncharov Date: Tue, 31 May 2022 10:15:12 +0200 Subject: [PATCH 873/908] Fix bazel build After 1c2edb026ed67ddbb30ebe3e2d2f4f17a882a881 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 07e2c4701d324d..b6af8b37502e0f 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -3216,8 +3216,10 @@ cc_library( ), includes = ["include"], deps = [ + ":IR", ":MlirLspServerSupportLib", ":PDLLAST", + ":PDLLCodeGen", ":PDLLODS", ":PDLLParser", ":Support", From 97715104c5f52c19d14b6be368f3cbb95073287f Mon Sep 17 00:00:00 2001 From: Yi Kong Date: Tue, 31 May 2022 16:13:23 +0800 Subject: [PATCH 874/908] [BOLT][NFC] Don't over-specify the size of SmallVector This is the recommended way, should make merging profiles ever so slightly faster. --- bolt/tools/merge-fdata/merge-fdata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp index d0764646b0ae5c..b28ac91e1d559a 100644 --- a/bolt/tools/merge-fdata/merge-fdata.cpp +++ b/bolt/tools/merge-fdata/merge-fdata.cpp @@ -265,7 +265,7 @@ void mergeLegacyProfiles(const cl::list &Filenames) { "cannot mix profile collected in BOLT and non-BOLT deployments"); } - SmallVector Lines; + SmallVector Lines; SplitString(Buf, Lines, "\n"); for (StringRef Line : Lines) { size_t Pos = Line.rfind(" "); From 5cb14dc5a3a6bcd0f6ca1e56a36ab0d7c4b0b202 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 31 May 2022 09:28:00 +0100 Subject: [PATCH 875/908] [AArch64] Look through copy in MachineCombiner FMUL patterns. This is a small addition to D99662, which added machine combiner patterns for FMUL(DUP(..)). Due to the way these are generated from ISel, they may also be FMUL(COPY(DUP(..))), which this patch now ignores the no-op COPY in. Differential Revision: https://reviews.llvm.org/D126632 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 +++++++ llvm/test/CodeGen/AArch64/machine-combiner-copy.ll | 11 +++++------ .../CodeGen/AArch64/machine-combiner-fmul-dup.mir | 6 +++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 699bfc2d421504..bb3f31cf87ff89 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5170,6 +5170,10 @@ static bool getFMULPatterns(MachineInstr &Root, MachineInstr *MI = nullptr; if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) MI = MRI.getUniqueVRegDef(MO.getReg()); + // Ignore No-op COPYs in FMUL(COPY(DUP(..))) + if (MI && MI->getOpcode() == TargetOpcode::COPY && + MI->getOperand(1).getReg().isVirtual()) + MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg()); if (MI && MI->getOpcode() == Opcode) { Patterns.push_back(Pattern); return true; @@ -5441,6 +5445,9 @@ genIndexedMultiply(MachineInstr &Root, MachineInstr *Dup = MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg()); + if (Dup->getOpcode() == TargetOpcode::COPY) + Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg()); + Register DupSrcReg = Dup->getOperand(1).getReg(); MRI.clearKillFlags(DupSrcReg); MRI.constrainRegClass(DupSrcReg, RC); diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll index 27767435eec2ce..5ede3c78fdcd71 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll @@ -18,16 +18,15 @@ define void @fma_dup_f16(ptr noalias nocapture noundef readonly %A, half noundef ; CHECK-NEXT: add x10, x1, #16 ; CHECK-NEXT: add x11, x0, #16 ; CHECK-NEXT: mov x12, x9 -; CHECK-NEXT: dup v1.8h, v0.h[0] ; CHECK-NEXT: .LBB0_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q2, q3, [x11, #-16] +; CHECK-NEXT: ldp q1, q2, [x11, #-16] ; CHECK-NEXT: subs x12, x12, #16 ; CHECK-NEXT: add x11, x11, #32 -; CHECK-NEXT: ldp q4, q5, [x10, #-16] -; CHECK-NEXT: fmla v4.8h, v2.8h, v1.8h -; CHECK-NEXT: fmla v5.8h, v3.8h, v0.h[0] -; CHECK-NEXT: stp q4, q5, [x10, #-16] +; CHECK-NEXT: ldp q3, q4, [x10, #-16] +; CHECK-NEXT: fmla v3.8h, v1.8h, v0.h[0] +; CHECK-NEXT: fmla v4.8h, v2.8h, v0.h[0] +; CHECK-NEXT: stp q3, q4, [x10, #-16] ; CHECK-NEXT: add x10, x10, #32 ; CHECK-NEXT: b.ne .LBB0_4 ; CHECK-NEXT: // %bb.5: // %middle.block diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir index a2a28419a872f8..108cadcefc474d 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir @@ -588,12 +588,12 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY3]], %subreg.dsub ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr64 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr64 = COPY [[COPY2]] - ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane killed [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr64 = COPY [[DUPv2i32lane]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: [[FMULv2f32_:%[0-9]+]]:fpr64 = FMULv2f32 [[COPY5]], [[COPY6]] - ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2f32_]], [[COPY4]] + ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]] ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64), align 16) ; CHECK-NEXT: B %bb.1 bb.0: From f199b2b00fdbc7234cb6362fe487259eb339dff9 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Tue, 31 May 2022 09:54:11 +0100 Subject: [PATCH 876/908] [AMDGPU][NFC] Refine defining the offset field for GFX10+ SMEM instructions. Reviewed By: dp Differential Revision: https://reviews.llvm.org/D126662 --- llvm/lib/Target/AMDGPU/SMInstructions.td | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 1d74a6be1bdde6..348ffd1482941d 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -947,7 +947,8 @@ def : GCNPat < // GFX10. //===----------------------------------------------------------------------===// -class SMEM_Real_10Plus_common op, SM_Pseudo ps, string opName, int subtarget> : +class SMEM_Real_10Plus_common op, SM_Pseudo ps, string opName, + int subtarget, RegisterWithSubRegs sgpr_null> : SM_Real, SIMCInstr, Enc64 { let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); @@ -956,18 +957,17 @@ class SMEM_Real_10Plus_common op, SM_Pseudo ps, string opName, int subta // There are SMEM instructions that do not employ any of the offset // fields, in which case we need them to remain undefined. let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?)); + let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, + !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); } class SMEM_Real_gfx10 op, SM_Pseudo ps> - : SMEM_Real_10Plus_common { + : SMEM_Real_10Plus_common { let AssemblerPredicate = isGFX10Only; let DecoderNamespace = "GFX10"; let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?); let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); - // There are SMEM instructions that do not employ any of the offset - // fields, in which case we need them to remain undefined. - let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, - !if(ps.has_offset, !cast(SGPR_NULL_gfxpre11.HWEncoding), ?)); } multiclass SM_Real_Loads_gfx10 op, string ps, @@ -1163,15 +1163,12 @@ def SMInfoTable : GenericTable { //===----------------------------------------------------------------------===// class SMEM_Real_gfx11 op, SM_Pseudo ps, string opName = ps.Mnemonic> : - SMEM_Real_10Plus_common { + SMEM_Real_10Plus_common { let AssemblerPredicate = isGFX11Plus; let DecoderNamespace = "GFX11"; let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); - // There are SMEM instructions that do not employ any of the offset - // fields, in which case we need them to remain undefined. - let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, - !if(ps.has_offset, !cast(SGPR_NULL_gfx11plus.HWEncoding), ?)); } class SMEM_Real_Load_gfx11 op, string ps, string opName, dag offsets> : From 86caa0371859d450ed60c4b186d8ccbcc72fef5e Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Tue, 31 May 2022 11:03:33 +0200 Subject: [PATCH 877/908] Revert "Round up zero-sized symbols to 1 byte in `.debug_aranges`." This reverts commit 256a52d9aac8a9e98fbfd6a3d91090bf127cef7d (and also the follow-up commit 38eb4fe74b3843ab0d7fc1e that moved a test case to a different directory). As discussed in https://reviews.llvm.org/D126257 there is a suspicion that something was wrong with this commit as text section range was shortened to 1 byte rather than rounded up as shown in the llvm/test/DebugInfo/X86/dwarf-aranges.ll test case. --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 10 +++----- .../CodeGen/X86/dwarf-aranges-zero-size.ll | 23 ------------------- llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll | 2 +- llvm/test/DebugInfo/X86/dwarf-aranges.ll | 2 +- 4 files changed, 5 insertions(+), 32 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/dwarf-aranges-zero-size.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 50032c887d839a..37ae84ad9bf17c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3042,17 +3042,13 @@ void DwarfDebug::emitDebugARanges() { for (const ArangeSpan &Span : List) { Asm->emitLabelReference(Span.Start, PtrSize); - // Calculate the size as being from the span start to its end. - // - // If the size is zero, then round it up to one byte. The DWARF - // specification requires that entries in this table have nonzero - // lengths. - uint64_t Size = SymSize[Span.Start]; - if (Size != 0 && Span.End) { + // Calculate the size as being from the span start to it's end. + if (Span.End) { Asm->emitLabelDifference(Span.End, Span.Start, PtrSize); } else { // For symbols without an end marker (e.g. common), we // write a single arange entry containing just that one symbol. + uint64_t Size = SymSize[Span.Start]; if (Size == 0) Size = 1; diff --git a/llvm/test/CodeGen/X86/dwarf-aranges-zero-size.ll b/llvm/test/CodeGen/X86/dwarf-aranges-zero-size.ll deleted file mode 100644 index 9a8be3c8f91d20..00000000000000 --- a/llvm/test/CodeGen/X86/dwarf-aranges-zero-size.ll +++ /dev/null @@ -1,23 +0,0 @@ -; Ensures that the AsmPrinter doesn't emit zero-sized symbols into `.debug_aranges`. -; -; RUN: llc --generate-arange-section < %s | FileCheck %s -; CHECK: .section .debug_aranges -; CHECK: .quad EXAMPLE -; CHECK-NEXT: .quad 1 -; CHECK: .section - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@EXAMPLE = constant <{ [0 x i8] }> zeroinitializer, align 1, !dbg !0 - -!llvm.module.flags = !{!3} -!llvm.dbg.cu = !{!4} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "EXAMPLE", linkageName: "EXAMPLE", scope: null, file: null, line: 161, type: !2, isLocal: false, isDefinition: true, align: 1) -!2 = !DIBasicType(name: "()", encoding: DW_ATE_unsigned) -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !5, producer: "rustc", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: null, globals: !6) -!5 = !DIFile(filename: "foo", directory: "") -!6 = !{!0} diff --git a/llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll b/llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll index 51d2c4a20589b9..98922f1b7d088f 100644 --- a/llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll +++ b/llvm/test/DebugInfo/MSP430/dwarf-basics-v5.ll @@ -98,7 +98,7 @@ ; CHECK: .debug_aranges contents: ; CHECK-NEXT: Address Range Header: length = 0x{{.*}}, format = DWARF32, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x02, seg_size = 0x00 -; CHECK-NEXT: [0x0000, 0x0001) +; CHECK-NEXT: [0x0000, 0x0006) ; CHECK: .debug_addr contents: ; CHECK-NEXT: Address table header: length = 0x{{.*}}, format = DWARF32, version = 0x0005, addr_size = 0x02, seg_size = 0x00 diff --git a/llvm/test/DebugInfo/X86/dwarf-aranges.ll b/llvm/test/DebugInfo/X86/dwarf-aranges.ll index c43e9eea8b710f..5358a30c6a06af 100644 --- a/llvm/test/DebugInfo/X86/dwarf-aranges.ll +++ b/llvm/test/DebugInfo/X86/dwarf-aranges.ll @@ -22,7 +22,7 @@ ; - it should have made one span covering all functions in this CU. ; CHECK-NEXT: .quad .Lfunc_begin0 -; CHECK-NEXT: .quad 1 +; CHECK-NEXT: .quad .Lsec_end2-.Lfunc_begin0 ; -- finish -- ; CHECK-NEXT: # ARange terminator From 5a2e640eb794fffeba6afa259058115d62e7b32e Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 31 May 2022 10:12:30 +0100 Subject: [PATCH 878/908] [RISCV][NFC] Adjust some comments in RISCVInsertVSETVLI Capitalize the first letter of comments like the others, and a few other tweaks. --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 5a0b242dc889a2..f3a22852c7422b 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1040,7 +1040,7 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { } // If we weren't able to prove a vsetvli was directly unneeded, it might still -// be/ unneeded if the AVL is a phi node where all incoming values are VL +// be unneeded if the AVL is a phi node where all incoming values are VL // outputs from the last VSETVLI in their respective basic blocks. bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB) const { @@ -1286,8 +1286,8 @@ void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) { static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { if (!Info.hasAVLImm()) // VLMAX is always the same value. - // TODO: Could extend to other registers by looking at the associated - // vreg def placement. + // TODO: Could extend to other registers by looking at the associated vreg + // def placement. return RISCV::X0 == Info.getAVLReg(); unsigned AVL = Info.getAVLImm(); @@ -1330,12 +1330,12 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { } } - // unreachable, single pred, or full redundancy. Note that FRE - // is handled by phase 3. + // Unreachable, single pred, or full redundancy. Note that FRE is handled by + // phase 3. if (!UnavailablePred || !AvailableInfo.isValid()) return; - // critical edge - TODO: consider splitting? + // Critical edge - TODO: consider splitting? if (UnavailablePred->succ_size() != 1) return; From b501503ca0bc5562e5ef4eb736ab718fd29c7bc3 Mon Sep 17 00:00:00 2001 From: kiranchandramohan Date: Tue, 31 May 2022 09:25:00 +0000 Subject: [PATCH 879/908] [Flang][OpenMP] Fix for unstructured regions in OpenMP constructs - 2 The following changes are made for OpenMP operations with unstructured region, 1. For combined constructs the outer operation is considered a structured region and the inner one as the unstructured. 2. Added a condition to ensure that we create new blocks only once for nested unstructured OpenMP constructs. Tests are added for checking the structure of the CFG. Note: This is part of upstreaming from the fir-dev branch of https://github.com/flang-compiler/f18-llvm-project. Code originally reviewed at https://github.com/flang-compiler/f18-llvm-project/pull/1394. Reviewed By: vdonaldson, shraiysh, peixin Differential Revision: https://reviews.llvm.org/D126375 --- flang/lib/Lower/OpenMP.cpp | 15 +- flang/test/Lower/OpenMP/omp-unstructured.f90 | 200 +++++++++++++++++++ 2 files changed, 209 insertions(+), 6 deletions(-) diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 04cd39adbe8b0b..85a54ebc412ca5 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -156,7 +156,7 @@ void createEmptyRegionBlocks( "expected terminator op"); } } - if (eval.hasNestedEvaluations()) + if (!eval.isDirective() && eval.hasNestedEvaluations()) createEmptyRegionBlocks(firOpBuilder, eval.getNestedEvaluations()); } } @@ -179,7 +179,7 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpClauseList *clauses = nullptr, const SmallVector &args = {}, bool outerCombined = false) { - auto &firOpBuilder = converter.getFirOpBuilder(); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // If an argument for the region is provided then create the block with that // argument. Also update the symbol's address with the mlir argument value. // e.g. For loops the argument is the induction variable. And all further @@ -205,13 +205,15 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter, } else { firOpBuilder.createBlock(&op.getRegion()); } - auto &block = op.getRegion().back(); - firOpBuilder.setInsertionPointToStart(&block); + mlir::Block &block = op.getRegion().back(); + firOpBuilder.setInsertionPointToEnd(&block); - if (eval.lowerAsUnstructured()) + // If it is an unstructured region and is not the outer region of a combined + // construct, create empty blocks for all evaluations. + if (eval.lowerAsUnstructured() && !outerCombined) createEmptyRegionBlocks(firOpBuilder, eval.getNestedEvaluations()); - // Ensure the block is well-formed by inserting terminators. + // Insert the terminator. if constexpr (std::is_same_v) { mlir::ValueRange results; firOpBuilder.create(loc, results); @@ -221,6 +223,7 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter, // Reset the insertion point to the start of the first block. firOpBuilder.setInsertionPointToStart(&block); + // Handle privatization. Do not privatize if this is the outer operation. if (clauses && !outerCombined) privatizeVars(converter, *clauses); diff --git a/flang/test/Lower/OpenMP/omp-unstructured.f90 b/flang/test/Lower/OpenMP/omp-unstructured.f90 index 68f944b91abea7..7272d0206f58f2 100644 --- a/flang/test/Lower/OpenMP/omp-unstructured.f90 +++ b/flang/test/Lower/OpenMP/omp-unstructured.f90 @@ -105,9 +105,209 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs !$omp end parallel end +! CHECK-LABEL: func @_QPss4{{.*}} { +! CHECK: omp.parallel { +! CHECK: omp.wsloop for (%[[ARG:.*]]) : {{.*}} { +! CHECK: cond_br %{{.*}}, ^bb1, ^bb2 +! CHECK: ^bb1: +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[ARG]]) +! CHECK: br ^bb2 +! CHECK: ^bb2: +! CHECK-NEXT: omp.yield +! CHECK-NEXT: } +! CHECK: omp.terminator +! CHECK-NEXT:} +subroutine ss4(n) ! CYCLE in OpenMP wsloop constructs + !$omp parallel + do i = 1, 3 + !$omp do + do j = 1, 3 + if (j .eq. n) cycle + print*, 'ss4', j + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss5() { +! CHECK: omp.parallel { +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]] +! CHECK: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]] +! CHECK: ^[[BB3]]: +! CHECK: br ^[[BB1]] +! CHECK: ^[[BB4]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +subroutine ss5() ! EXIT inside OpenMP wsloop (inside parallel) + integer :: x + !$omp parallel private(x) + !$omp do + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + x = j - 222 + enddo + !$omp end do + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss6() { +! CHECK: omp.parallel { +! CHECK: br ^[[BB1_OUTER:.*]] +! CHECK: ^[[BB1_OUTER]]: +! CHECK: cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]] +! CHECK: ^[[BB2_OUTER]]: +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]] +! CHECK: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]] +! CHECK: ^[[BB3]]: +! CHECK: br ^[[BB1]] +! CHECK: ^[[BB4]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: br ^[[BB1_OUTER]] +! CHECK: ^[[BB3_OUTER]]: +! CHECK: omp.terminator +! CHECK: } +subroutine ss6() ! EXIT inside OpenMP wsloop in a do loop (inside parallel) + integer :: x + !$omp parallel private(x) + do i = 1, 3 + !$omp do + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + x = j - 222 + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss7() { +! CHECK: br ^[[BB1_OUTER:.*]] +! CHECK: ^[[BB1_OUTER]]: +! CHECK: cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]] +! CHECK-NEXT: ^[[BB2_OUTER:.*]]: +! CHECK: omp.parallel { +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK-NEXT: ^[[BB1]]: +! CHECK: cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]] +! CHECK-NEXT: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]] +! CHECK-NEXT: ^[[BB3]]: +! CHECK: br ^bb1 +! CHECK-NEXT: ^[[BB4]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: br ^[[BB1_OUTER]] +! CHECK-NEXT: ^[[BB3_OUTER]]: +! CHECK-NEXT: return +subroutine ss7() ! EXIT inside OpenMP parallel do (inside do loop) + integer :: x + do i = 1, 3 + !$omp parallel do private(x) + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + enddo + !$omp end parallel do + enddo +end + +! CHECK-LABEL: func @_QPss8() { +! CHECK: omp.parallel { +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]] +! CHECK: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]] +! CHECK: ^[[BB3]]: +! CHECK: br ^[[BB1]] +! CHECK: ^[[BB4]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +subroutine ss8() ! EXIT inside OpenMP parallel do + integer :: x + !$omp parallel do private(x) + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + enddo + !$omp end parallel do +end + +! CHECK-LABEL: func @_QPss9() { +! CHECK: omp.parallel { +! CHECK-NEXT: omp.parallel { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]] +! CHECK-NEXT: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]] +! CHECK-NEXT: ^[[BB3]]: +! CHECK: br ^[[BB1]] +! CHECK-NEXT: ^[[BB4]]: +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK: omp.terminator +! CHECK-NEXT } +! CHECK: } +subroutine ss9() ! EXIT inside OpenMP parallel (inside parallel) + integer :: x + !$omp parallel + !$omp parallel private(x) + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + end do + !$omp end parallel + !$omp end parallel +end + ! CHECK-LABEL: func @_QQmain program p call ss1(2) call ss2(2) call ss3(2) + call ss4(2) + call ss5() + call ss6() + call ss7() + call ss8() + call ss9() end From 872d69e5d4e251bd70c6cf5dbf1b3ea34f976aa2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 31 May 2022 11:51:19 +0200 Subject: [PATCH 880/908] [InstCombine] Fix inbounds preservation when merging GEPs (PR55722) Even if the total offset is inbounds, we might represent it by first performing a large negative offset and then a small positive one. With inbounds semantics as currently specified, each offset must be inbounds individually, not just the overall offset of the GEP. Fix this by checking that the sign of all offsets is the same. Fixes https://github.com/llvm/llvm-project/issues/55722. --- .../Transforms/InstCombine/InstructionCombining.cpp | 11 +++++++++-- .../InstCombine/gep-merge-constant-indices.ll | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4e8f3af2b5beff..07f44036cce5aa 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2035,13 +2035,20 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) return nullptr; + bool IsInBounds = isMergedGEPInBounds(*Src, *cast(&GEP)); SmallVector Indices; append_range(Indices, drop_end(Src->indices(), Src->getNumIndices() - NumVarIndices)); - for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) + for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) { Indices.push_back(ConstantInt::get(GEP.getContext(), Idx)); + // Even if the total offset is inbounds, we may end up representing it + // by first performing a larger negative offset, and then a smaller + // positive one. The large negative offset might go out of bounds. Only + // preserve inbounds if all signs are the same. + IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative(); + } - return isMergedGEPInBounds(*Src, *cast(&GEP)) + return IsInBounds ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(), Src->getOperand(0), Indices, GEP.getName()) diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll index af79faf265febd..b82c366ca9e94c 100644 --- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll @@ -106,7 +106,7 @@ define ptr @struct1(ptr %p) { ; result = &((struct.A*) p - 1).member1 define ptr @struct2(ptr %p) { ; CHECK-LABEL: @struct2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[P:%.*]], i64 -1, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], ptr [[P:%.*]], i64 -1, i32 1 ; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds %struct.A, ptr %p, i64 0, i32 1 From 42c17073fcbafd397b66ab923700279180846af4 Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Mon, 30 May 2022 09:38:33 +0000 Subject: [PATCH 881/908] [mlir] Support import llvm intrinsics. This patch supports to convert the llvm intrinsic to the corresponding op. It still leaves some intrinsics to be handled specially. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D126639 --- .../mlir/Dialect/LLVMIR/CMakeLists.txt | 1 + .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 1 + mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 31 + mlir/test/Target/LLVMIR/Import/intrinsic.ll | 728 ++++++++++++++++++ .../tools/mlir-tblgen/LLVMIRConversionGen.cpp | 19 + 5 files changed, 780 insertions(+) create mode 100644 mlir/test/Target/LLVMIR/Import/intrinsic.ll diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt index 4ff345b73ca7a3..349585108328de 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt @@ -37,6 +37,7 @@ add_public_tablegen_target(MLIRLLVMConversionsIncGen) set(LLVM_TARGET_DEFINITIONS LLVMIntrinsicOps.td) mlir_tablegen(LLVMIntrinsicConversions.inc -gen-llvmir-conversions) +mlir_tablegen(LLVMIntrinsicToLLVMIROpPairs.inc -gen-llvmintrinsic-to-llvmirop-pairs) add_public_tablegen_target(MLIRLLVMIntrinsicConversionsIncGen) add_mlir_dialect(NVVMOps nvvm) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index 2fe1130c6bb734..f984493da7824f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -286,6 +286,7 @@ class LLVM_IntrOpBasegetModule(); llvm::Function *fn = llvm::Intrinsic::getDeclaration( diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index eced5896a167ec..53e0a3138489cd 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Support/Error.h" @@ -654,6 +655,15 @@ static StringRef lookupOperationNameFromOpcode(unsigned opcode) { return opcMap.lookup(opcode); } +/// Return the MLIR OperationName for the given LLVM intrinsic ID. +static StringRef lookupOperationNameFromIntrinsicID(unsigned id) { + // Maps from LLVM intrinsic ID to MLIR OperationName. + static const DenseMap intrMap = { +#include "mlir/Dialect/LLVMIR/LLVMIntrinsicToLLVMIROpPairs.inc" + }; + return intrMap.lookup(id); +} + static ICmpPredicate getICmpPredicate(llvm::CmpInst::Predicate p) { switch (p) { default: @@ -952,6 +962,20 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) { } Operation *op; if (llvm::Function *callee = ci->getCalledFunction()) { + // For all intrinsics, try to generate to the corresponding op. + if (callee->isIntrinsic()) { + auto id = callee->getIntrinsicID(); + StringRef opName = lookupOperationNameFromIntrinsicID(id); + if (!opName.empty()) { + OperationState state(loc, opName); + state.addOperands(ops); + state.addTypes(tys); + Operation *op = b.create(state); + if (!inst->getType()->isVoidTy()) + instMap[inst] = op->getResult(0); + return success(); + } + } op = b.create( loc, tys, SymbolRefAttr::get(b.getContext(), callee->getName()), ops); } else { @@ -1139,6 +1163,13 @@ LogicalResult Importer::processFunction(llvm::Function *f) { if (!functionType) return failure(); + if (f->isIntrinsic()) { + StringRef opName = lookupOperationNameFromIntrinsicID(f->getIntrinsicID()); + // Skip the intrinsic decleration if we could found a corresponding op. + if (!opName.empty()) + return success(); + } + bool dsoLocal = f->hasLocalLinkage(); CConv cconv = convertCConvFromLLVM(f->getCallingConv()); diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll new file mode 100644 index 00000000000000..82b3bc6baa8dda --- /dev/null +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -0,0 +1,728 @@ +; RUN: mlir-translate -import-llvm %s | FileCheck %s + +define void @intrinsics() { + ret void +} + +; CHECK-LABEL: llvm.func @fmuladd_test +define void @fmuladd_test(float %0, float %1, <8 x float> %2, i8* %3) { + ; CHECK: "llvm.intr.fmuladd"(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 + %5 = call float @llvm.fmuladd.f32(float %0, float %1, float %0) + ; CHECK: "llvm.intr.fmuladd"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %6 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %2, <8 x float> %2, <8 x float> %2) + ; CHECK: "llvm.intr.fma"(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 + %7 = call float @llvm.fma.f32(float %0, float %1, float %0) + ; CHECK: "llvm.intr.fma"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %8 = call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %2, <8 x float> %2) + ; CHECK: "llvm.intr.prefetch"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i32, i32, i32) -> () + call void @llvm.prefetch.p0i8(i8* %3, i32 0, i32 3, i32 1) + ret void +} + +; CHECK-LABEL: llvm.func @exp_test +define void @exp_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.exp"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.exp.f32(float %0) + ; CHECK: "llvm.intr.exp"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.exp.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @exp2_test +define void @exp2_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.exp2"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.exp2.f32(float %0) + ; CHECK: "llvm.intr.exp2"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.exp2.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @log_test +define void @log_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.log"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.log.f32(float %0) + ; CHECK: "llvm.intr.log"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.log.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @log10_test +define void @log10_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.log10"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.log10.f32(float %0) + ; CHECK: "llvm.intr.log10"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.log10.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @log2_test +define void @log2_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.log2"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.log2.f32(float %0) + ; CHECK: "llvm.intr.log2"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.log2.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @fabs_test +define void @fabs_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.fabs"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.fabs.f32(float %0) + ; CHECK: "llvm.intr.fabs"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.fabs.v8f32(<8 x float> %1) + ret void +} +; CHECK-LABEL: llvm.func @sqrt_test +define void @sqrt_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.sqrt.f32(float %0) + ; CHECK: "llvm.intr.sqrt"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %1) + ret void +} +; CHECK-LABEL: llvm.func @ceil_test +define void @ceil_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.ceil"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.ceil.f32(float %0) + ; CHECK: "llvm.intr.ceil"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %1) + ret void +} +; CHECK-LABEL: llvm.func @floor_test +define void @floor_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.floor"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.floor.f32(float %0) + ; CHECK: "llvm.intr.floor"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> %1) + ret void +} +; CHECK-LABEL: llvm.func @cos_test +define void @cos_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.cos"(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.cos.f32(float %0) + ; CHECK: "llvm.intr.cos"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.cos.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @copysign_test +define void @copysign_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.copysign"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + %5 = call float @llvm.copysign.f32(float %0, float %1) + ; CHECK: "llvm.intr.copysign"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %6 = call <8 x float> @llvm.copysign.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} +; CHECK-LABEL: llvm.func @pow_test +define void @pow_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.pow"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + %5 = call float @llvm.pow.f32(float %0, float %1) + ; CHECK: "llvm.intr.pow"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %6 = call <8 x float> @llvm.pow.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} +; CHECK-LABEL: llvm.func @bitreverse_test +define void @bitreverse_test(i32 %0, <8 x i32> %1) { + ; CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (i32) -> i32 + %3 = call i32 @llvm.bitreverse.i32(i32 %0) + ; CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> + %4 = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %1) + ret void +} +; CHECK-LABEL: llvm.func @ctlz_test +define void @ctlz_test(i32 %0, <8 x i32> %1) { + ; CHECK-DAG: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK-DAG: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval2]]) : (i32, i1) -> i32 + %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) + ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval1]]) : (vector<8xi32>, i1) -> vector<8xi32> + %4 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %1, i1 false) + ret void +} +; CHECK-LABEL: llvm.func @cttz_test +define void @cttz_test(i32 %0, <8 x i32> %1) { + ; CHECK-DAG: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK-DAG: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval2]]) : (i32, i1) -> i32 + %3 = call i32 @llvm.cttz.i32(i32 %0, i1 false) + ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval1]]) : (vector<8xi32>, i1) -> vector<8xi32> + %4 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %1, i1 false) + ret void +} + +; CHECK-LABEL: llvm.func @ctpop_test +define void @ctpop_test(i32 %0, <8 x i32> %1) { + ; CHECK: "llvm.intr.ctpop"(%{{.*}}) : (i32) -> i32 + %3 = call i32 @llvm.ctpop.i32(i32 %0) + ; CHECK: "llvm.intr.ctpop"(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> + %4 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %1) + ret void +} + +; CHECK-LABEL: llvm.func @maximum_test +define void @maximum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.maximum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + %5 = call float @llvm.maximum.f32(float %0, float %1) + ; CHECK: "llvm.intr.maximum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %6 = call <8 x float> @llvm.maximum.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} + +; CHECK-LABEL: llvm.func @minimum_test +define void @minimum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.minimum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + %5 = call float @llvm.minimum.f32(float %0, float %1) + ; CHECK: "llvm.intr.minimum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %6 = call <8 x float> @llvm.minimum.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} + +; CHECK-LABEL: llvm.func @maxnum_test +define void @maxnum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + %5 = call float @llvm.maxnum.f32(float %0, float %1) + ; CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %6 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} + +; CHECK-LABEL: llvm.func @minnum_test +define void @minnum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + %5 = call float @llvm.minnum.f32(float %0, float %1) + ; CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %6 = call <8 x float> @llvm.minnum.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} + +; CHECK-LABEL: llvm.func @smax_test +define void @smax_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + %5 = call i32 @llvm.smax.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + %6 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @smin_test +define void @smin_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + %5 = call i32 @llvm.smin.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + %6 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @umax_test +define void @umax_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.umax"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + %5 = call i32 @llvm.umax.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.umax"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + %6 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @umin_test +define void @umin_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.umin"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + %5 = call i32 @llvm.umin.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.umin"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + %6 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} +; CHECK-LABEL: llvm.func @vector_reductions +define void @vector_reductions(float %0, <8 x float> %1, <8 x i32> %2) { + ; CHECK: "llvm.intr.vector.reduce.add"(%{{.*}}) : (vector<8xi32>) -> i32 + %4 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %2) + ; CHECK: "llvm.intr.vector.reduce.and"(%{{.*}}) : (vector<8xi32>) -> i32 + %5 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %2) + ; CHECK: "llvm.intr.vector.reduce.fmax"(%{{.*}}) : (vector<8xf32>) -> f32 + %6 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %1) + ; CHECK: "llvm.intr.vector.reduce.fmin"(%{{.*}}) : (vector<8xf32>) -> f32 + %7 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %1) + ; CHECK: "llvm.intr.vector.reduce.mul"(%{{.*}}) : (vector<8xi32>) -> i32 + %8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %2) + ; CHECK: "llvm.intr.vector.reduce.or"(%{{.*}}) : (vector<8xi32>) -> i32 + %9 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %2) + ; CHECK: "llvm.intr.vector.reduce.smax"(%{{.*}}) : (vector<8xi32>) -> i32 + %10 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %2) + ; CHECK: "llvm.intr.vector.reduce.smin"(%{{.*}}) : (vector<8xi32>) -> i32 + %11 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %2) + ; CHECK: "llvm.intr.vector.reduce.umax"(%{{.*}}) : (vector<8xi32>) -> i32 + %12 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %2) + ; CHECK: "llvm.intr.vector.reduce.umin"(%{{.*}}) : (vector<8xi32>) -> i32 + %13 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %2) + ; TODO: vector reduce fadd and fmul should be handled specially. + %14 = call float @llvm.vector.reduce.fadd.v8f32(float %0, <8 x float> %1) + %15 = call float @llvm.vector.reduce.fmul.v8f32(float %0, <8 x float> %1) + %16 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float %0, <8 x float> %1) + %17 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float %0, <8 x float> %1) + ; CHECK: "llvm.intr.vector.reduce.xor"(%{{.*}}) : (vector<8xi32>) -> i32 + %18 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %2) + ret void +} + +; TODO: matrix intrinsic should be handled specially. +define void @matrix_intrinsics(<64 x float> %0, <48 x float> %1, float* %2, i64 %3) { + %5 = call <12 x float> @llvm.matrix.multiply.v12f32.v64f32.v48f32(<64 x float> %0, <48 x float> %1, i32 4, i32 16, i32 3) + %6 = call <48 x float> @llvm.matrix.transpose.v48f32(<48 x float> %1, i32 3, i32 16) + %7 = call <48 x float> @llvm.matrix.column.major.load.v48f32.i64(float* align 4 %2, i64 %3, i1 false, i32 3, i32 16) + call void @llvm.matrix.column.major.store.v48f32.i64(<48 x float> %7, float* align 4 %2, i64 %3, i1 false, i32 3, i32 16) + ret void +} + +; CHECK-LABEL: llvm.func @get_active_lane_mask +define <7 x i1> @get_active_lane_mask(i64 %0, i64 %1) { + ; CHECK: llvm.intr.get.active.lane.mask %{{.*}}, %{{.*}} : i64, i64 to vector<7xi1> + %3 = call <7 x i1> @llvm.get.active.lane.mask.v7i1.i64(i64 %0, i64 %1) + ret <7 x i1> %3 +} + +; TODO: masked load store intrinsics should be handled specially. +define void @masked_load_store_intrinsics(<7 x float>* %0, <7 x i1> %1) { + %3 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* %0, i32 1, <7 x i1> %1, <7 x float> undef) + %4 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* %0, i32 1, <7 x i1> %1, <7 x float> %3) + call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> %4, <7 x float>* %0, i32 1, <7 x i1> %1) + ret void +} + +; TODO: masked gather scatter intrinsics should be handled specially. +define void @masked_gather_scatter_intrinsics(<7 x float*> %0, <7 x i1> %1) { + %3 = call <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*> %0, i32 1, <7 x i1> %1, <7 x float> undef) + %4 = call <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*> %0, i32 1, <7 x i1> %1, <7 x float> %3) + call void @llvm.masked.scatter.v7f32.v7p0f32(<7 x float> %4, <7 x float*> %0, i32 1, <7 x i1> %1) + ret void +} + +; CHECK-LABEL: llvm.func @masked_expand_compress_intrinsics +define void @masked_expand_compress_intrinsics(float* %0, <7 x i1> %1, <7 x float> %2) { + ; CHECK: %[[val1:.+]] = "llvm.intr.masked.expandload"(%{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, vector<7xi1>, vector<7xf32>) -> vector<7xf32> + %4 = call <7 x float> @llvm.masked.expandload.v7f32(float* %0, <7 x i1> %1, <7 x float> %2) + ; CHECK: "llvm.intr.masked.compressstore"(%[[val1]], %{{.*}}, %{{.*}}) : (vector<7xf32>, !llvm.ptr, vector<7xi1>) -> () + call void @llvm.masked.compressstore.v7f32(<7 x float> %4, float* %0, <7 x i1> %1) + ret void +} + +; CHECK-LABEL: llvm.func @memcpy_test +define void @memcpy_test(i32 %0, i8* %1, i8* %2) { + ; CHECK: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: %[[constant:.+]] = llvm.mlir.constant(10 : i64) : i64 + ; CHECK: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %[[falseval2]]) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 %0, i1 false) + ; CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %[[constant]], %[[falseval1]]) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %1, i8* %2, i64 10, i1 false) + ret void +} + +; CHECK-LABEL: llvm.func @memmove_test +define void @memmove_test(i32 %0, i8* %1, i8* %2) { + ; CHECK: %[[falseval:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.memmove"(%{{.*}}, %{{.*}}, %{{.*}}, %[[falseval]]) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + call void @llvm.memmove.p0i8.p0i8.i32(i8* %1, i8* %2, i32 %0, i1 false) + ret void +} + +; CHECK-LABEL: llvm.func @memset_test +define void @memset_test(i32 %0, i8* %1, i8 %2) { + ; CHECK: %[[falseval:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.memset"(%{{.*}}, %{{.*}}, %{{.*}}, %[[falseval]]) : (!llvm.ptr, i8, i32, i1) -> () + call void @llvm.memset.p0i8.i32(i8* %1, i8 %2, i32 %0, i1 false) + ret void +} + +; CHECK-LABEL: llvm.func @sadd_with_overflow_test +define void @sadd_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.sadd.with.overflow"(%{{.*}}, %{{.*}}) : (i32, i32) -> !llvm.struct<(i32, i1)> + %5 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.sadd.with.overflow"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> + %6 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @uadd_with_overflow_test +define void @uadd_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.uadd.with.overflow"(%{{.*}}, %{{.*}}) : (i32, i32) -> !llvm.struct<(i32, i1)> + %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.uadd.with.overflow"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> + %6 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @ssub_with_overflow_test +define void @ssub_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.ssub.with.overflow"(%{{.*}}, %{{.*}}) : (i32, i32) -> !llvm.struct<(i32, i1)> + %5 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.ssub.with.overflow"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> + %6 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @usub_with_overflow_test +define void @usub_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.usub.with.overflow"(%{{.*}}, %{{.*}}) : (i32, i32) -> !llvm.struct<(i32, i1)> + %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.usub.with.overflow"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> + %6 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @smul_with_overflow_test +define void @smul_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.smul.with.overflow"(%{{.*}}, %{{.*}}) : (i32, i32) -> !llvm.struct<(i32, i1)> + %5 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.smul.with.overflow"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> + %6 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @umul_with_overflow_test +define void @umul_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.umul.with.overflow"(%{{.*}}, %{{.*}}) : (i32, i32) -> !llvm.struct<(i32, i1)> + %5 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %0, i32 %1) + ; CHECK: "llvm.intr.umul.with.overflow"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> + %6 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; TODO : support token type. +; define void @coro_id(i32 %0, i8* %1) { +; %3 = call token @llvm.coro.id(i32 %0, i8* %1, i8* %1, i8* null) +; ret void +; } + +; TODO : support token type. +; define void @coro_begin(i32 %0, i8* %1) { +; %3 = call token @llvm.coro.id(i32 %0, i8* %1, i8* %1, i8* null) +; %4 = call i8* @llvm.coro.begin(token %3, i8* %1) +; ret void +; } + +; CHECK-LABEL: llvm.func @coro_size() +define void @coro_size() { + ; CHECK: llvm.intr.coro.size : i64 + %1 = call i64 @llvm.coro.size.i64() + ; CHECK: llvm.intr.coro.size : i32 + %2 = call i32 @llvm.coro.size.i32() + ret void +} +; CHECK-LABEL: llvm.func @coro_align() +define void @coro_align() { + ; CHECK: llvm.intr.coro.align : i64 + %1 = call i64 @llvm.coro.align.i64() + ; CHECK: llvm.intr.coro.align : i32 + %2 = call i32 @llvm.coro.align.i32() + ret void +} + +; TODO : support token type. +; define void @coro_save(i8* %0) { +; %2 = call token @llvm.coro.save(i8* %0) +; ret void +; } + +; TODO : support token type. +; define void @coro_suspend(i32 %0, i1 %1, i8* %2) { +; %4 = call token @llvm.coro.id(i32 %0, i8* %2, i8* %2, i8* null) +; %5 = call i8 @llvm.coro.suspend(token %4, i1 %1) +; ret void +; } + +; CHECK-LABEL: llvm.func @coro_end +define void @coro_end(i8* %0, i1 %1) { + ; CHECK: llvm.intr.coro.end + call i1 @llvm.coro.end(i8* %0, i1 %1) + ret void +} + +; TODO : support token type. +; define void @coro_free(i32 %0, i8* %1) { +; %3 = call token @llvm.coro.id(i32 %0, i8* %1, i8* %1, i8* null) +; %4 = call i8* @llvm.coro.free(token %3, i8* %1) +; ret void +; } + +; CHECK-LABEL: llvm.func @coro_resume +define void @coro_resume(i8* %0) { + ; CHECK: llvm.intr.coro.resume %{{.*}} + call void @llvm.coro.resume(i8* %0) + ret void +} + +; CHECK-LABEL: llvm.func @eh_typeid_for +define void @eh_typeid_for(i8* %0) { + ; CHECK: llvm.intr.eh.typeid.for %{{.*}} : i32 + %2 = call i32 @llvm.eh.typeid.for(i8* %0) + ret void +} + +; CHECK-LABEL: llvm.func @stack_save() { +define void @stack_save() { + ; CHECK: llvm.intr.stacksave : !llvm.ptr + %1 = call i8* @llvm.stacksave() + ret void +} + +; CHECK-LABEL: llvm.func @stack_restore +define void @stack_restore(i8* %0) { + ; CHECK: llvm.intr.stackrestore %{{.*}} + call void @llvm.stackrestore(i8* %0) + ret void +} + +; CHECK-LABEL: llvm.func @vector_predication_intrinsics +define void @vector_predication_intrinsics(<8 x i32> %0, <8 x i32> %1, <8 x float> %2, <8 x float> %3, <8 x i64> %4, <8 x double> %5, <8 x i32*> %6, i32 %7, float %8, i32* %9, float* %10, <8 x i1> %11, i32 %12) { + ; CHECK: "llvm.intr.vp.add"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %14 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.sub"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %15 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.mul"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %16 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.sdiv"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %17 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.udiv"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %18 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.srem"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %19 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.urem"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %20 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.ashr"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %21 = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.lshr"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %22 = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.shl"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %23 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.or"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %24 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.and"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %25 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.xor"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %26 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fadd"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + %27 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fsub"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + %28 = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fmul"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + %29 = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fdiv"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + %30 = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.frem"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + %31 = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fneg"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + %32 = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %2, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fma"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + %33 = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %2, <8 x float> %3, <8 x float> %3, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.add"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + %34 = call i32 @llvm.vp.reduce.add.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.mul"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + %35 = call i32 @llvm.vp.reduce.mul.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.and"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + %36 = call i32 @llvm.vp.reduce.and.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.or"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + %37 = call i32 @llvm.vp.reduce.or.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.xor"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + %38 = call i32 @llvm.vp.reduce.xor.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.smax"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + %39 = call i32 @llvm.vp.reduce.smax.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.smin"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + %40 = call i32 @llvm.vp.reduce.smin.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.umax"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + %41 = call i32 @llvm.vp.reduce.umax.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.umin"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + %42 = call i32 @llvm.vp.reduce.umin.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.fadd"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (f32, vector<8xf32>, vector<8xi1>, i32) -> f32 + %43 = call float @llvm.vp.reduce.fadd.v8f32(float %8, <8 x float> %2, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.fmul"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (f32, vector<8xf32>, vector<8xi1>, i32) -> f32 + %44 = call float @llvm.vp.reduce.fmul.v8f32(float %8, <8 x float> %2, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.fmax"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (f32, vector<8xf32>, vector<8xi1>, i32) -> f32 + %45 = call float @llvm.vp.reduce.fmax.v8f32(float %8, <8 x float> %2, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.reduce.fmin"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (f32, vector<8xf32>, vector<8xi1>, i32) -> f32 + %46 = call float @llvm.vp.reduce.fmin.v8f32(float %8, <8 x float> %2, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.select"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi1>, vector<8xi32>, vector<8xi32>, i32) -> vector<8xi32> + %47 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %11, <8 x i32> %0, <8 x i32> %1, i32 %12) + ; CHECK: "llvm.intr.vp.merge"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi1>, vector<8xi32>, vector<8xi32>, i32) -> vector<8xi32> + %48 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %11, <8 x i32> %0, <8 x i32> %1, i32 %12) + ; CHECK: "llvm.intr.vp.store"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, !llvm.ptr, vector<8xi1>, i32) -> () + call void @llvm.vp.store.v8i32.p0i32(<8 x i32> %0, i32* %9, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.load"(%{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, vector<8xi1>, i32) -> vector<8xi32> + %49 = call <8 x i32> @llvm.vp.load.v8i32.p0i32(i32* %9, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.experimental.vp.strided.store"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, !llvm.ptr, i32, vector<8xi1>, i32) -> () + call void @llvm.experimental.vp.strided.store.v8i32.p0i32.i32(<8 x i32> %0, i32* %9, i32 %7, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.experimental.vp.strided.load"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i32, vector<8xi1>, i32) -> vector<8xi32> + %50 = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0i32.i32(i32* %9, i32 %7, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.trunc"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi64>, vector<8xi1>, i32) -> vector<8xi32> + %51 = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %4, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.zext"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi1>, i32) -> vector<8xi64> + %52 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.sext"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi1>, i32) -> vector<8xi64> + %53 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> %0, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fptrunc"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf64>, vector<8xi1>, i32) -> vector<8xf32> + %54 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %5, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fpext"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xi1>, i32) -> vector<8xf64> + %55 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %2, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fptoui"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf64>, vector<8xi1>, i32) -> vector<8xi64> + %56 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double> %5, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.fptosi"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf64>, vector<8xi1>, i32) -> vector<8xi64> + %57 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double> %5, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.ptrtoint"(%{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.vec<8 x ptr>, vector<8xi1>, i32) -> vector<8xi64> + %58 = call <8 x i64> @llvm.vp.ptrtoint.v8i64.v8p0i32(<8 x i32*> %6, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.inttoptr"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi64>, vector<8xi1>, i32) -> !llvm.vec<8 x ptr> + %59 = call <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i64(<8 x i64> %4, <8 x i1> %11, i32 %12) + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) +declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) +declare float @llvm.fma.f32(float, float, float) +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) +declare void @llvm.prefetch.p0i8(i8* nocapture readonly, i32 immarg, i32 immarg, i32) +declare float @llvm.exp.f32(float) +declare <8 x float> @llvm.exp.v8f32(<8 x float>) +declare float @llvm.exp2.f32(float) +declare <8 x float> @llvm.exp2.v8f32(<8 x float>) +declare float @llvm.log.f32(float) +declare <8 x float> @llvm.log.v8f32(<8 x float>) +declare float @llvm.log10.f32(float) +declare <8 x float> @llvm.log10.v8f32(<8 x float>) +declare float @llvm.log2.f32(float) +declare <8 x float> @llvm.log2.v8f32(<8 x float>) +declare float @llvm.fabs.f32(float) +declare <8 x float> @llvm.fabs.v8f32(<8 x float>) +declare float @llvm.sqrt.f32(float) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare float @llvm.ceil.f32(float) +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) +declare float @llvm.floor.f32(float) +declare <8 x float> @llvm.floor.v8f32(<8 x float>) +declare float @llvm.cos.f32(float) +declare <8 x float> @llvm.cos.v8f32(<8 x float>) +declare float @llvm.copysign.f32(float, float) +declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) +declare float @llvm.pow.f32(float, float) +declare <8 x float> @llvm.pow.v8f32(<8 x float>, <8 x float>) +declare i32 @llvm.bitreverse.i32(i32) +declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>) +declare i32 @llvm.ctlz.i32(i32, i1 immarg) +declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1 immarg) +declare i32 @llvm.cttz.i32(i32, i1 immarg) +declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1 immarg) +declare i32 @llvm.ctpop.i32(i32) +declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) +declare float @llvm.maximum.f32(float, float) +declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>) +declare float @llvm.minimum.f32(float, float) +declare <8 x float> @llvm.minimum.v8f32(<8 x float>, <8 x float>) +declare float @llvm.maxnum.f32(float, float) +declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) +declare float @llvm.minnum.f32(float, float) +declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) +declare i32 @llvm.smax.i32(i32, i32) +declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) +declare i32 @llvm.smin.i32(i32, i32) +declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) +declare i32 @llvm.umax.i32(i32, i32) +declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>) +declare i32 @llvm.umin.i32(i32, i32) +declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) +declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) +declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) +declare <12 x float> @llvm.matrix.multiply.v12f32.v64f32.v48f32(<64 x float>, <48 x float>, i32 immarg, i32 immarg, i32 immarg) +declare <48 x float> @llvm.matrix.transpose.v48f32(<48 x float>, i32 immarg, i32 immarg) +declare <48 x float> @llvm.matrix.column.major.load.v48f32.i64(float* nocapture, i64, i1 immarg, i32 immarg, i32 immarg) +declare void @llvm.matrix.column.major.store.v48f32.i64(<48 x float>, float* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) +declare <7 x i1> @llvm.get.active.lane.mask.v7i1.i64(i64, i64) +declare <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>*, i32 immarg, <7 x i1>, <7 x float>) +declare void @llvm.masked.store.v7f32.p0v7f32(<7 x float>, <7 x float>*, i32 immarg, <7 x i1>) +declare <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*>, i32 immarg, <7 x i1>, <7 x float>) +declare void @llvm.masked.scatter.v7f32.v7p0f32(<7 x float>, <7 x float*>, i32 immarg, <7 x i1>) +declare <7 x float> @llvm.masked.expandload.v7f32(float*, <7 x i1>, <7 x float>) +declare void @llvm.masked.compressstore.v7f32(<7 x float>, float*, <7 x i1>) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg) +declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg) +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32>, <8 x i32>) +; declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +; declare i8* @llvm.coro.begin(token, i8* writeonly) +declare i64 @llvm.coro.size.i64() +declare i32 @llvm.coro.size.i32() +declare i64 @llvm.coro.align.i64() +declare i32 @llvm.coro.align.i32() +; declare token @llvm.coro.save(i8*) +; declare i8 @llvm.coro.suspend(token, i1) +declare i1 @llvm.coro.end(i8*, i1) +; declare i8* @llvm.coro.free(token, i8* nocapture readonly) +declare void @llvm.coro.resume(i8*) +declare i32 @llvm.eh.typeid.for(i8*) +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) +declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fadd.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fsub.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fmul.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fdiv.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.frem.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.add.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.mul.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.and.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.or.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.xor.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.smax.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.smin.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.umax.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.umin.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32) +declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32) +declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32) +declare float @llvm.vp.reduce.fmin.v8f32(float, <8 x float>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) +declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) +declare void @llvm.vp.store.v8i32.p0i32(<8 x i32>, i32* nocapture, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.load.v8i32.p0i32(i32* nocapture, <8 x i1>, i32) +declare void @llvm.experimental.vp.strided.store.v8i32.p0i32.i32(<8 x i32>, i32* nocapture, i32, <8 x i1>, i32) +declare <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0i32.i32(i32* nocapture, i32, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.ptrtoint.v8i64.v8p0i32(<8 x i32*>, <8 x i1>, i32) +declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i64(<8 x i64>, <8 x i1>, i32) diff --git a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp index 50eb2e7978af5a..9dc27fe0108f8a 100644 --- a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp +++ b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp @@ -377,6 +377,20 @@ static bool emitEnumConversionDefs(const RecordKeeper &recordKeeper, return false; } +static void emitIntrOpPair(const Record &record, raw_ostream &os) { + auto op = tblgen::Operator(record); + os << "{llvm::Intrinsic::" << record.getValueAsString("llvmEnumName") << ", " + << op.getQualCppClassName() << "::getOperationName()},\n"; +} + +static bool emitIntrOpPairs(const RecordKeeper &recordKeeper, raw_ostream &os) { + for (const auto *def : + recordKeeper.getAllDerivedDefinitions("LLVM_IntrOpBase")) + emitIntrOpPair(*def, os); + + return false; +} + static mlir::GenRegistration genLLVMIRConversions("gen-llvmir-conversions", "Generate LLVM IR conversions", emitBuilders); @@ -390,3 +404,8 @@ static mlir::GenRegistration genEnumFromLLVMConversion("gen-enum-from-llvmir-conversions", "Generate conversions of EnumAttrs from LLVM IR", emitEnumConversionDefs); + +static mlir::GenRegistration + genLLVMIntrinsicToOpPairs("gen-llvmintrinsic-to-llvmirop-pairs", + "Generate LLVM intrinsic to LLVMIR op pairs", + emitIntrOpPairs); From 62c46093f18f744be80758bfc6c9110e65b4434c Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Tue, 31 May 2022 14:29:06 +0300 Subject: [PATCH 882/908] [AMDGPU][DOC][NFC] Add GFX90C and GFX940 assembler syntax description --- llvm/docs/AMDGPU/AMDGPUAsmGFX900.rst | 6 +- llvm/docs/AMDGPU/AMDGPUAsmGFX940.rst | 2118 ++++++++++++++++++++ llvm/docs/AMDGPU/gfx940_dst_4f3f9a.rst | 13 + llvm/docs/AMDGPU/gfx940_dst_95761f.rst | 13 + llvm/docs/AMDGPU/gfx940_fx_operand.rst | 16 + llvm/docs/AMDGPU/gfx940_hwreg.rst | 82 + llvm/docs/AMDGPU/gfx940_imask.rst | 65 + llvm/docs/AMDGPU/gfx940_imm16_73139a.rst | 13 + llvm/docs/AMDGPU/gfx940_imm16_a04fb3.rst | 13 + llvm/docs/AMDGPU/gfx940_label.rst | 36 + llvm/docs/AMDGPU/gfx940_m_254bcb.rst | 13 + llvm/docs/AMDGPU/gfx940_m_f5d306.rst | 13 + llvm/docs/AMDGPU/gfx940_msg.rst | 100 + llvm/docs/AMDGPU/gfx940_opt_0d447d.rst | 13 + llvm/docs/AMDGPU/gfx940_opt_7c211e.rst | 13 + llvm/docs/AMDGPU/gfx940_probe.rst | 24 + llvm/docs/AMDGPU/gfx940_saddr_22dbc1.rst | 19 + llvm/docs/AMDGPU/gfx940_saddr_a37373.rst | 19 + llvm/docs/AMDGPU/gfx940_sbase_010ce0.rst | 17 + llvm/docs/AMDGPU/gfx940_sbase_044055.rst | 17 + llvm/docs/AMDGPU/gfx940_sbase_0cd545.rst | 17 + llvm/docs/AMDGPU/gfx940_sdata_595c25.rst | 17 + llvm/docs/AMDGPU/gfx940_sdata_7cbd60.rst | 17 + llvm/docs/AMDGPU/gfx940_sdata_aefe00.rst | 21 + llvm/docs/AMDGPU/gfx940_sdata_c6aec1.rst | 21 + llvm/docs/AMDGPU/gfx940_sdata_e9f591.rst | 17 + llvm/docs/AMDGPU/gfx940_sdata_eb6f2a.rst | 21 + llvm/docs/AMDGPU/gfx940_sdst_06b266.rst | 17 + llvm/docs/AMDGPU/gfx940_sdst_0804b1.rst | 17 + llvm/docs/AMDGPU/gfx940_sdst_362c37.rst | 17 + llvm/docs/AMDGPU/gfx940_sdst_3bc700.rst | 17 + llvm/docs/AMDGPU/gfx940_sdst_59204c.rst | 17 + llvm/docs/AMDGPU/gfx940_sdst_718cc4.rst | 17 + llvm/docs/AMDGPU/gfx940_sdst_94342d.rst | 17 + llvm/docs/AMDGPU/gfx940_sdst_a319e6.rst | 17 + llvm/docs/AMDGPU/gfx940_simm32_6f0844.rst | 14 + llvm/docs/AMDGPU/gfx940_simm32_a3e80c.rst | 13 + llvm/docs/AMDGPU/gfx940_simm32_be0c1c.rst | 14 + llvm/docs/AMDGPU/gfx940_soffset_4318ca.rst | 17 + llvm/docs/AMDGPU/gfx940_soffset_7b8c50.rst | 20 + llvm/docs/AMDGPU/gfx940_soffset_f33c5c.rst | 17 + llvm/docs/AMDGPU/gfx940_src_4de5c6.rst | 17 + llvm/docs/AMDGPU/gfx940_src_56ed80.rst | 17 + llvm/docs/AMDGPU/gfx940_src_64ea89.rst | 17 + llvm/docs/AMDGPU/gfx940_src_6cfc4e.rst | 17 + llvm/docs/AMDGPU/gfx940_src_a578ba.rst | 17 + llvm/docs/AMDGPU/gfx940_src_af08be.rst | 17 + llvm/docs/AMDGPU/gfx940_src_d578c4.rst | 17 + llvm/docs/AMDGPU/gfx940_src_d95796.rst | 17 + llvm/docs/AMDGPU/gfx940_src_e1561c.rst | 17 + llvm/docs/AMDGPU/gfx940_src_e5cc81.rst | 17 + llvm/docs/AMDGPU/gfx940_src_f73668.rst | 17 + llvm/docs/AMDGPU/gfx940_srsrc.rst | 17 + llvm/docs/AMDGPU/gfx940_ssrc_4db4a9.rst | 17 + llvm/docs/AMDGPU/gfx940_ssrc_57838b.rst | 17 + llvm/docs/AMDGPU/gfx940_ssrc_595c25.rst | 17 + llvm/docs/AMDGPU/gfx940_ssrc_65f041.rst | 17 + llvm/docs/AMDGPU/gfx940_ssrc_aee59c.rst | 17 + llvm/docs/AMDGPU/gfx940_ssrc_c31902.rst | 17 + llvm/docs/AMDGPU/gfx940_ssrc_c5d631.rst | 17 + llvm/docs/AMDGPU/gfx940_ssrc_c8a322.rst | 17 + llvm/docs/AMDGPU/gfx940_ssrc_e9f591.rst | 17 + llvm/docs/AMDGPU/gfx940_type_deviation.rst | 13 + llvm/docs/AMDGPU/gfx940_vaddr_0212e3.rst | 20 + llvm/docs/AMDGPU/gfx940_vaddr_6ab80d.rst | 19 + llvm/docs/AMDGPU/gfx940_vaddr_9f7133.rst | 17 + llvm/docs/AMDGPU/gfx940_vaddr_b73dc0.rst | 22 + llvm/docs/AMDGPU/gfx940_vaddr_f20ee4.rst | 17 + llvm/docs/AMDGPU/gfx940_vcc.rst | 17 + llvm/docs/AMDGPU/gfx940_vdata0_9ad749.rst | 17 + llvm/docs/AMDGPU/gfx940_vdata0_be4895.rst | 17 + llvm/docs/AMDGPU/gfx940_vdata1_9ad749.rst | 17 + llvm/docs/AMDGPU/gfx940_vdata1_be4895.rst | 17 + llvm/docs/AMDGPU/gfx940_vdata_24882b.rst | 21 + llvm/docs/AMDGPU/gfx940_vdata_5eef12.rst | 21 + llvm/docs/AMDGPU/gfx940_vdata_848ff7.rst | 17 + llvm/docs/AMDGPU/gfx940_vdata_9ad749.rst | 17 + llvm/docs/AMDGPU/gfx940_vdata_be4895.rst | 17 + llvm/docs/AMDGPU/gfx940_vdata_c8a58b.rst | 21 + llvm/docs/AMDGPU/gfx940_vdata_cfb402.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_08b5ba.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_0c37de.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_0f48d1.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_180bef.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_260aca.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_5258b4.rst | 19 + llvm/docs/AMDGPU/gfx940_vdst_56baf6.rst | 19 + llvm/docs/AMDGPU/gfx940_vdst_63b743.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_69a144.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_78dd0a.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_89680f.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_8c77d4.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_a32035.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_bce42a.rst | 19 + llvm/docs/AMDGPU/gfx940_vdst_bdb32f.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_c3d63a.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_c8d317.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_d0c0cb.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_d6f4bd.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_d8236e.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_e2898f.rst | 17 + llvm/docs/AMDGPU/gfx940_vdst_fa7dbd.rst | 17 + llvm/docs/AMDGPU/gfx940_vsrc_1027ca.rst | 17 + llvm/docs/AMDGPU/gfx940_vsrc_6802ce.rst | 17 + llvm/docs/AMDGPU/gfx940_vsrc_848ff7.rst | 17 + llvm/docs/AMDGPU/gfx940_vsrc_9ad749.rst | 17 + llvm/docs/AMDGPU/gfx940_vsrc_be4895.rst | 17 + llvm/docs/AMDGPU/gfx940_vsrc_e016a1.rst | 17 + llvm/docs/AMDGPU/gfx940_vsrc_fd235e.rst | 17 + llvm/docs/AMDGPU/gfx940_waitcnt.rst | 64 + llvm/docs/AMDGPUModifierSyntax.rst | 88 + llvm/docs/AMDGPUUsage.rst | 7 + 112 files changed, 4321 insertions(+), 3 deletions(-) create mode 100644 llvm/docs/AMDGPU/AMDGPUAsmGFX940.rst create mode 100644 llvm/docs/AMDGPU/gfx940_dst_4f3f9a.rst create mode 100644 llvm/docs/AMDGPU/gfx940_dst_95761f.rst create mode 100644 llvm/docs/AMDGPU/gfx940_fx_operand.rst create mode 100644 llvm/docs/AMDGPU/gfx940_hwreg.rst create mode 100644 llvm/docs/AMDGPU/gfx940_imask.rst create mode 100644 llvm/docs/AMDGPU/gfx940_imm16_73139a.rst create mode 100644 llvm/docs/AMDGPU/gfx940_imm16_a04fb3.rst create mode 100644 llvm/docs/AMDGPU/gfx940_label.rst create mode 100644 llvm/docs/AMDGPU/gfx940_m_254bcb.rst create mode 100644 llvm/docs/AMDGPU/gfx940_m_f5d306.rst create mode 100644 llvm/docs/AMDGPU/gfx940_msg.rst create mode 100644 llvm/docs/AMDGPU/gfx940_opt_0d447d.rst create mode 100644 llvm/docs/AMDGPU/gfx940_opt_7c211e.rst create mode 100644 llvm/docs/AMDGPU/gfx940_probe.rst create mode 100644 llvm/docs/AMDGPU/gfx940_saddr_22dbc1.rst create mode 100644 llvm/docs/AMDGPU/gfx940_saddr_a37373.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sbase_010ce0.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sbase_044055.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sbase_0cd545.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdata_595c25.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdata_7cbd60.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdata_aefe00.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdata_c6aec1.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdata_e9f591.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdata_eb6f2a.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdst_06b266.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdst_0804b1.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdst_362c37.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdst_3bc700.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdst_59204c.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdst_718cc4.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdst_94342d.rst create mode 100644 llvm/docs/AMDGPU/gfx940_sdst_a319e6.rst create mode 100644 llvm/docs/AMDGPU/gfx940_simm32_6f0844.rst create mode 100644 llvm/docs/AMDGPU/gfx940_simm32_a3e80c.rst create mode 100644 llvm/docs/AMDGPU/gfx940_simm32_be0c1c.rst create mode 100644 llvm/docs/AMDGPU/gfx940_soffset_4318ca.rst create mode 100644 llvm/docs/AMDGPU/gfx940_soffset_7b8c50.rst create mode 100644 llvm/docs/AMDGPU/gfx940_soffset_f33c5c.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_4de5c6.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_56ed80.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_64ea89.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_6cfc4e.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_a578ba.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_af08be.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_d578c4.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_d95796.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_e1561c.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_e5cc81.rst create mode 100644 llvm/docs/AMDGPU/gfx940_src_f73668.rst create mode 100644 llvm/docs/AMDGPU/gfx940_srsrc.rst create mode 100644 llvm/docs/AMDGPU/gfx940_ssrc_4db4a9.rst create mode 100644 llvm/docs/AMDGPU/gfx940_ssrc_57838b.rst create mode 100644 llvm/docs/AMDGPU/gfx940_ssrc_595c25.rst create mode 100644 llvm/docs/AMDGPU/gfx940_ssrc_65f041.rst create mode 100644 llvm/docs/AMDGPU/gfx940_ssrc_aee59c.rst create mode 100644 llvm/docs/AMDGPU/gfx940_ssrc_c31902.rst create mode 100644 llvm/docs/AMDGPU/gfx940_ssrc_c5d631.rst create mode 100644 llvm/docs/AMDGPU/gfx940_ssrc_c8a322.rst create mode 100644 llvm/docs/AMDGPU/gfx940_ssrc_e9f591.rst create mode 100644 llvm/docs/AMDGPU/gfx940_type_deviation.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vaddr_0212e3.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vaddr_6ab80d.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vaddr_9f7133.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vaddr_b73dc0.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vaddr_f20ee4.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vcc.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata0_9ad749.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata0_be4895.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata1_9ad749.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata1_be4895.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata_24882b.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata_5eef12.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata_848ff7.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata_9ad749.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata_be4895.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata_c8a58b.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdata_cfb402.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_08b5ba.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_0c37de.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_0f48d1.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_180bef.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_260aca.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_5258b4.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_56baf6.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_63b743.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_69a144.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_78dd0a.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_89680f.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_8c77d4.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_a32035.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_bce42a.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_bdb32f.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_c3d63a.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_c8d317.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_d0c0cb.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_d6f4bd.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_d8236e.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_e2898f.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vdst_fa7dbd.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vsrc_1027ca.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vsrc_6802ce.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vsrc_848ff7.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vsrc_9ad749.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vsrc_be4895.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vsrc_e016a1.rst create mode 100644 llvm/docs/AMDGPU/gfx940_vsrc_fd235e.rst create mode 100644 llvm/docs/AMDGPU/gfx940_waitcnt.rst diff --git a/llvm/docs/AMDGPU/AMDGPUAsmGFX900.rst b/llvm/docs/AMDGPU/AMDGPUAsmGFX900.rst index c85163069615fd..b0ab856c442435 100644 --- a/llvm/docs/AMDGPU/AMDGPUAsmGFX900.rst +++ b/llvm/docs/AMDGPU/AMDGPUAsmGFX900.rst @@ -6,7 +6,7 @@ ************************************************** ==================================================================================== -Syntax of gfx900, gfx902 and gfx909 Instructions +Syntax of gfx900, gfx902, gfx909 and gfx90c Instructions ==================================================================================== .. contents:: @@ -15,9 +15,9 @@ Syntax of gfx900, gfx902 and gfx909 Instructions Introduction ============ -This document describes the syntax of *instructions specific to gfx900, gfx902 and gfx909*. +This document describes the syntax of *instructions specific to gfx900, gfx902, gfx909 and gfx90c*. -For a description of other gfx900, gfx902 and gfx909 instructions see :doc:`Syntax of Core GFX9 Instructions`. +For a description of other gfx900, gfx902, gfx909 and gfx90c instructions see :doc:`Syntax of Core GFX9 Instructions`. Notation ======== diff --git a/llvm/docs/AMDGPU/AMDGPUAsmGFX940.rst b/llvm/docs/AMDGPU/AMDGPUAsmGFX940.rst new file mode 100644 index 00000000000000..a1089960d7c417 --- /dev/null +++ b/llvm/docs/AMDGPU/AMDGPUAsmGFX940.rst @@ -0,0 +1,2118 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of GFX940 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of GFX940 instructions. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +DS +-- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + ds_add_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_append :ref:`vdst` :ref:`offset` :ref:`gds` + ds_bpermute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_cmpst_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_condxchg32_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_consume :ref:`vdst` :ref:`offset` :ref:`gds` + ds_dec_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_barrier :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_init :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_br :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_p :ref:`offset` :ref:`gds` + ds_gws_sema_release_all :ref:`offset` :ref:`gds` + ds_gws_sema_v :ref:`offset` :ref:`gds` + ds_inc_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_mskor_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_nop + ds_or_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_permute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_pk_add_bf16 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_pk_add_f16 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_pk_add_rtn_bf16 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_pk_add_rtn_f16 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_read2_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read_addtid_b32 :ref:`vdst` :ref:`offset` :ref:`gds` + ds_read_b128 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b32 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b64 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b96 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_swizzle_b32 :ref:`vdst`, :ref:`vaddr` :ref:`pattern` :ref:`gds` + ds_wrap_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_write2_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write_addtid_b32 :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b128 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b96 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_wrxchg2_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_wrxchg_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + +FLAT +---- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + flat_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_add_f32 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_add_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_max_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_min_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_pk_add_bf16 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_pk_add_f16 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_dword :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_dwordx2 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_dwordx3 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_dwordx4 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_sbyte :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_short_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_short_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_sshort :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_ubyte :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_load_ushort :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_store_byte :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_store_dword :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_store_dwordx2 :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_store_dwordx3 :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_store_dwordx4 :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_store_short :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + flat_store_short_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_add_f32 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_add_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_max_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_min_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_pk_add_bf16 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_pk_add_f16 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_lds_dword :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_lds_sbyte :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_lds_sshort :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_lds_ubyte :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_lds_ushort :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_store_byte :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_store_dword :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_store_dwordx2 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_store_dwordx3 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_store_dwordx4 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_store_short :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + global_store_short_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_lds_dword :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_lds_sbyte :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_lds_sshort :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_lds_ubyte :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_lds_ushort :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_store_byte :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_store_dword :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_store_dwordx2 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_store_dwordx3 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_store_dwordx4 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_store_short :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + scratch_store_short_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`sc0` :ref:`nt` :ref:`sc1` + +MTBUF +----- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + tbuffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + tbuffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + +MUBUF +----- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + buffer_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_add_f32 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_add_f64 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_add_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_and_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_cmpswap :ref:`vdata`::ref:`dst`::ref:`b32x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_cmpswap_x2 :ref:`vdata`::ref:`dst`::ref:`b64x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_dec :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_dec_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_inc :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_inc_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_max_f64 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_min_f64 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_or_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_pk_add_f16 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_smax :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_smax_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_smin :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_smin_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_sub_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_swap_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_umax :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_umax_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_umin :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_umin_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_atomic_xor_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_inv + buffer_load_dword :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` :ref:`lds` + buffer_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_format_d16_hi_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_format_x :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` :ref:`lds` + buffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_sbyte :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` :ref:`lds` + buffer_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_sshort :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` :ref:`lds` + buffer_load_ubyte :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` :ref:`lds` + buffer_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_load_ushort :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` :ref:`lds` + buffer_store_byte :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_byte_d16_hi :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_dword :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_dwordx2 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_dwordx3 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_dwordx4 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_format_d16_hi_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_short :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_store_short_d16_hi :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`sc0` :ref:`nt` :ref:`sc1` + buffer_wbl2 + +SMEM +---- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_atc_probe :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_atc_probe_buffer :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_atomic_add :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_add_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_and :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_and_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_cmpswap :ref:`sdata`::ref:`dst`::ref:`b32x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_cmpswap_x2 :ref:`sdata`::ref:`dst`::ref:`b64x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_dec :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_dec_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_inc :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_inc_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_or :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_or_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smax :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smax_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smin :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smin_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_sub :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_sub_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_swap :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_swap_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umax :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umax_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umin :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umin_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_xor :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_xor_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_add :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_add_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_and :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_and_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_cmpswap :ref:`sdata`::ref:`dst`::ref:`b32x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_cmpswap_x2 :ref:`sdata`::ref:`dst`::ref:`b64x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_dec :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_dec_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_inc :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_inc_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_or :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_or_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smax :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smax_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smin :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smin_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_sub :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_sub_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_swap :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_swap_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umax :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umax_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umin :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umin_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_xor :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_xor_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_dcache_discard :ref:`sbase`, :ref:`soffset` + s_dcache_discard_x2 :ref:`sbase`, :ref:`soffset` + s_dcache_inv + s_dcache_inv_vol + s_dcache_wb + s_dcache_wb_vol + s_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_memrealtime :ref:`sdst`::ref:`b64` + s_memtime :ref:`sdst`::ref:`b64` + s_scratch_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + +SOP1 +---- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_abs_i32 :ref:`sdst`, :ref:`ssrc` + s_and_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn1_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn1_wrexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_wrexec_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bitreplicate_b64_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_bitset1_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset1_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_brev_b32 :ref:`sdst`, :ref:`ssrc` + s_brev_b64 :ref:`sdst`, :ref:`ssrc` + s_cbranch_join :ref:`ssrc` + s_cmov_b32 :ref:`sdst`, :ref:`ssrc` + s_cmov_b64 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_i64 :ref:`sdst`, :ref:`ssrc` + s_getpc_b64 :ref:`sdst` + s_mov_b32 :ref:`sdst`, :ref:`ssrc` + s_mov_b64 :ref:`sdst`, :ref:`ssrc` + s_movreld_b32 :ref:`sdst`, :ref:`ssrc` + s_movreld_b64 :ref:`sdst`, :ref:`ssrc` + s_movrels_b32 :ref:`sdst`, :ref:`ssrc` + s_movrels_b64 :ref:`sdst`, :ref:`ssrc` + s_nand_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_nor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_not_b32 :ref:`sdst`, :ref:`ssrc` + s_not_b64 :ref:`sdst`, :ref:`ssrc` + s_or_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn1_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b32 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b64 :ref:`sdst`, :ref:`ssrc` + s_rfe_b64 :ref:`ssrc` + s_set_gpr_idx_idx :ref:`ssrc` + s_setpc_b64 :ref:`ssrc` + s_sext_i32_i16 :ref:`sdst`, :ref:`ssrc` + s_sext_i32_i8 :ref:`sdst`, :ref:`ssrc` + s_swappc_b64 :ref:`sdst`, :ref:`ssrc` + s_wqm_b32 :ref:`sdst`, :ref:`ssrc` + s_wqm_b64 :ref:`sdst`, :ref:`ssrc` + s_xnor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_xor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + +SOP2 +---- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_absdiff_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_addc_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_ashr_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_ashr_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfe_u64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfm_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfm_b64 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_cbranch_g_fork :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl1_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl2_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl3_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl4_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshl_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_max_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_max_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_hi_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_hi_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_pack_hh_b32_b16 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_pack_lh_b32_b16 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`b32` + s_pack_ll_b32_b16 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_rfe_restore_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`b32` + s_sub_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_sub_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_subb_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + +SOPC +---- + +.. parsed-literal:: + + **INSTRUCTION** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_bitcmp0_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp0_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bitcmp1_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp1_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_cmp_eq_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_set_gpr_idx_on :ref:`ssrc`, :ref:`imask` + s_setvskip :ref:`ssrc0`, :ref:`ssrc1` + +SOPK +---- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_addk_i32 :ref:`sdst`, :ref:`imm16` + s_call_b64 :ref:`sdst`, :ref:`label` + s_cbranch_i_fork :ref:`ssrc`, :ref:`label` + s_cmovk_i32 :ref:`sdst`, :ref:`imm16` + s_cmpk_eq_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_eq_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_u32 :ref:`ssrc`, :ref:`imm16` + s_getreg_b32 :ref:`sdst`, :ref:`hwreg` + s_movk_i32 :ref:`sdst`, :ref:`imm16` + s_mulk_i32 :ref:`sdst`, :ref:`imm16` + s_setreg_b32 :ref:`hwreg`, :ref:`ssrc` + s_setreg_imm32_b32 :ref:`hwreg`, :ref:`simm32` + +SOPP +---- + +.. parsed-literal:: + + **INSTRUCTION** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_barrier + s_branch :ref:`label` + s_cbranch_cdbgsys :ref:`label` + s_cbranch_cdbgsys_and_user :ref:`label` + s_cbranch_cdbgsys_or_user :ref:`label` + s_cbranch_cdbguser :ref:`label` + s_cbranch_execnz :ref:`label` + s_cbranch_execz :ref:`label` + s_cbranch_scc0 :ref:`label` + s_cbranch_scc1 :ref:`label` + s_cbranch_vccnz :ref:`label` + s_cbranch_vccz :ref:`label` + s_decperflevel :ref:`imm16` + s_endpgm + s_endpgm_ordered_ps_done + s_endpgm_saved + s_icache_inv + s_incperflevel :ref:`imm16` + s_nop :ref:`imm16` + s_sendmsg :ref:`msg` + s_sendmsghalt :ref:`msg` + s_set_gpr_idx_mode :ref:`imask` + s_set_gpr_idx_off + s_sethalt :ref:`imm16` + s_setkill :ref:`imm16` + s_setprio :ref:`imm16` + s_sleep :ref:`imm16` + s_trap :ref:`imm16` + s_ttracedata + s_waitcnt :ref:`waitcnt` + s_wakeup + +VOP1 +---- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_accvgpr_mov_b32 :ref:`vdst`, :ref:`vsrc` + v_bfrev_b32 :ref:`vdst`, :ref:`src` + v_bfrev_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_bfrev_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f16 :ref:`vdst`, :ref:`src` + v_ceil_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ceil_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f32 :ref:`vdst`, :ref:`src` + v_ceil_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ceil_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f64 :ref:`vdst`, :ref:`src` + v_ceil_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_clrexcp + v_cos_f16 :ref:`vdst`, :ref:`src` + v_cos_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cos_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cos_f32 :ref:`vdst`, :ref:`src` + v_cos_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cos_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_f32 :ref:`vdst`, :ref:`src` + v_cvt_f16_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_i16 :ref:`vdst`, :ref:`src` + v_cvt_f16_i16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_i16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_u16 :ref:`vdst`, :ref:`src` + v_cvt_f16_u16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_u16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_f16 :ref:`vdst`, :ref:`src` + v_cvt_f32_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_f64 :ref:`vdst`, :ref:`src` + v_cvt_f32_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_i32 :ref:`vdst`, :ref:`src` + v_cvt_f32_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_i32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_u32 :ref:`vdst`, :ref:`src` + v_cvt_f32_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_u32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte0 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte0_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte0_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte1 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte1_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte1_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte2 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte2_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte2_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte3 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte3_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte3_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f64_f32 :ref:`vdst`, :ref:`src` + v_cvt_f64_i32 :ref:`vdst`, :ref:`src` + v_cvt_f64_u32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_flr_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i16_f16 :ref:`vdst`, :ref:`src` + v_cvt_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i32_f64 :ref:`vdst`, :ref:`src` + v_cvt_i32_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_norm_i16_f16 :ref:`vdst`, :ref:`src` + v_cvt_norm_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_norm_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_norm_u16_f16 :ref:`vdst`, :ref:`src` + v_cvt_norm_u16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_norm_u16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_off_f32_i4 :ref:`vdst`, :ref:`src` + v_cvt_off_f32_i4_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_off_f32_i4_sdwa :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_rpi_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_rpi_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_rpi_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u16_f16 :ref:`vdst`, :ref:`src` + v_cvt_u16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_u16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u32_f32 :ref:`vdst`, :ref:`src` + v_cvt_u32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_u32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u32_f64 :ref:`vdst`, :ref:`src` + v_cvt_u32_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f16 :ref:`vdst`, :ref:`src` + v_exp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_f32 :ref:`vdst`, :ref:`src` + v_exp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_legacy_f32 :ref:`vdst`, :ref:`src` + v_exp_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_legacy_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_i32 :ref:`vdst`, :ref:`src` + v_ffbh_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbh_i32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_u32 :ref:`vdst`, :ref:`src` + v_ffbh_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbh_u32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbl_b32 :ref:`vdst`, :ref:`src` + v_ffbl_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbl_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f16 :ref:`vdst`, :ref:`src` + v_floor_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_floor_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f32 :ref:`vdst`, :ref:`src` + v_floor_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_floor_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f64 :ref:`vdst`, :ref:`src` + v_floor_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f16 :ref:`vdst`, :ref:`src` + v_fract_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f32 :ref:`vdst`, :ref:`src` + v_fract_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f64 :ref:`vdst`, :ref:`src` + v_fract_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i16_f16 :ref:`vdst`, :ref:`src` + v_frexp_exp_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i32_f32 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i32_f64 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f16 :ref:`vdst`, :ref:`src` + v_frexp_mant_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f32 :ref:`vdst`, :ref:`src` + v_frexp_mant_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f64 :ref:`vdst`, :ref:`src` + v_frexp_mant_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f16 :ref:`vdst`, :ref:`src` + v_log_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_log_f32 :ref:`vdst`, :ref:`src` + v_log_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_log_legacy_f32 :ref:`vdst`, :ref:`src` + v_log_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_legacy_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_mov_b32 :ref:`vdst`, :ref:`src` + v_mov_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mov_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_mov_b64 :ref:`vdst`, :ref:`src` + v_mov_b64_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_nop + v_not_b32 :ref:`vdst`, :ref:`src` + v_not_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_not_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f16 :ref:`vdst`, :ref:`src` + v_rcp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f32 :ref:`vdst`, :ref:`src` + v_rcp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f64 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_iflag_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_readfirstlane_b32 :ref:`sdst`, :ref:`vsrc` + v_rndne_f16 :ref:`vdst`, :ref:`src` + v_rndne_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rndne_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f32 :ref:`vdst`, :ref:`src` + v_rndne_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rndne_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f64 :ref:`vdst`, :ref:`src` + v_rsq_f16 :ref:`vdst`, :ref:`src` + v_rsq_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rsq_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f32 :ref:`vdst`, :ref:`src` + v_rsq_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rsq_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f64 :ref:`vdst`, :ref:`src` + v_sat_pk_u8_i16 :ref:`vdst`::ref:`u8x4`, :ref:`src` + v_sat_pk_u8_i16_dpp :ref:`vdst`::ref:`u8x4`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sat_pk_u8_i16_sdwa :ref:`vdst`::ref:`u8x4`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_screen_partition_4se_b32 :ref:`vdst`, :ref:`src` + v_screen_partition_4se_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_screen_partition_4se_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sin_f16 :ref:`vdst`, :ref:`src` + v_sin_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sin_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sin_f32 :ref:`vdst`, :ref:`src` + v_sin_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sin_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f16 :ref:`vdst`, :ref:`src` + v_sqrt_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sqrt_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f32 :ref:`vdst`, :ref:`src` + v_sqrt_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sqrt_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f64 :ref:`vdst`, :ref:`src` + v_swap_b32 :ref:`vdst`, :ref:`vsrc` + v_trunc_f16 :ref:`vdst`, :ref:`src` + v_trunc_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_trunc_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_trunc_f32 :ref:`vdst`, :ref:`src` + v_trunc_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_trunc_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_trunc_f64 :ref:`vdst`, :ref:`src` + v_trunc_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + +VOP2 +---- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_add_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_addc_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_addc_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_addc_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_and_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_and_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_and_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ashrrev_i16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_ashrrev_i16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ashrrev_i16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u16`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ashrrev_i32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_ashrrev_i32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ashrrev_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_cndmask_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_cndmask_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cndmask_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_dot2c_f32_f16 :ref:`vdst`, :ref:`src0`::ref:`f16x2`, :ref:`vsrc1`::ref:`f16x2` + v_dot2c_f32_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`f16x2`, :ref:`vsrc1`::ref:`f16x2` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_dot2c_i32_i16 :ref:`vdst`, :ref:`src0`::ref:`i16x2`, :ref:`vsrc1`::ref:`i16x2` + v_dot2c_i32_i16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i16x2`, :ref:`vsrc1`::ref:`i16x2` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_dot4c_i32_i8 :ref:`vdst`, :ref:`src0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4` + v_dot4c_i32_i8_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_dot8c_i32_i4 :ref:`vdst`, :ref:`src0`::ref:`i4x8`, :ref:`vsrc1`::ref:`i4x8` + v_dot8c_i32_i4_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i4x8`, :ref:`vsrc1`::ref:`i4x8` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fmaak_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_fmac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_fmac_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fmac_f64 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_fmac_f64_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fmamk_f32 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_ldexp_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`::ref:`i16` + v_ldexp_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`i16` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ldexp_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`i16` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshlrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_lshlrev_b16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshlrev_b16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u16`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshlrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshlrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshlrev_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshrrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_lshrrev_b16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshrrev_b16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u16`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshrrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshrrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshrrev_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mac_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mac_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_madak_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_madmk_f16 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_max_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_i16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_i16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_i16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_i16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_hi_i32_i24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_hi_u32_u24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_i32_i24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_lo_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_lo_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_lo_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_u32_u24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_or_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_or_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_pk_fmac_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_sub_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subb_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subb_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subb_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subbrev_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subbrev_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subbrev_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_subrev_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_xnor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xnor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_xnor_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_xor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_xor_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + +VOP3 +---- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_add_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`clamp` + v_add_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_lshl_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_add_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_addc_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_alignbit_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`b16` + v_alignbyte_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`b16` + v_and_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_and_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_ashrrev_i16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_ashrrev_i32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_ashrrev_i64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_bcnt_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfe_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_bfe_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfi_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfrev_b32_e64 :ref:`vdst`, :ref:`src` + v_ceil_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_clrexcp_e64 + v_cmp_class_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_eq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_neq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_t_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_tru_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_class_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_eq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_neq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_t_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_tru_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cndmask_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`ssrc2` + v_cos_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cos_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubeid_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubema_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubesc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubetc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_i16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f16_u16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte0_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte1_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte2_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte3_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f64_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_flr_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_norm_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_norm_u16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_off_f32_i4_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_pk_i16_i32 :ref:`vdst`, :ref:`src0`::ref:`i32`, :ref:`src1`::ref:`i32` + v_cvt_pk_u16_u32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1`::ref:`u32` + v_cvt_pk_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_cvt_pkaccum_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32` + v_cvt_pknorm_i16_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f16`, :ref:`src1`::ref:`m`::ref:`f16` :ref:`op_sel` + v_cvt_pknorm_i16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pknorm_u16_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f16`, :ref:`src1`::ref:`m`::ref:`f16` :ref:`op_sel` + v_cvt_pknorm_u16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pkrtz_f16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_rpi_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_u16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_u32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_u32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_div_fixup_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_div_fixup_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_legacy_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_scale_f32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_div_scale_f64 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_exp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ffbh_i32_e64 :ref:`vdst`, :ref:`src` + v_ffbh_u32_e64 :ref:`vdst`, :ref:`src` + v_ffbl_b32_e64 :ref:`vdst`, :ref:`src` + v_floor_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_fma_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_legacy_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fmac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_fmac_f64_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_exp_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_mant_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ldexp_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i16` :ref:`clamp` :ref:`omod` + v_ldexp_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_ldexp_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_lerp_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`b32`, :ref:`src1`::ref:`b32`, :ref:`src2`::ref:`b32` + v_log_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_lshl_add_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_lshl_add_u64 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2` + v_lshl_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2` + v_lshlrev_b16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshlrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshlrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshrrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_mac_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_mad_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`clamp` + v_mad_i32_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` :ref:`op_sel` :ref:`clamp` + v_mad_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_mad_i64_i32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i64` :ref:`clamp` + v_mad_legacy_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_legacy_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_mad_legacy_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_mad_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`clamp` + v_mad_u32_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` :ref:`op_sel` :ref:`clamp` + v_mad_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_mad_u64_u32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u64` :ref:`clamp` + v_max3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_max3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_max3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_max3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_max3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_i16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_hi_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_lo_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_med3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_med3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_med3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_med3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_med3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_med3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_min3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_min3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_min3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_min3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_i16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mov_b32_e64 :ref:`vdst`, :ref:`src` + v_mov_b64_e64 :ref:`vdst`, :ref:`src` + v_mqsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_mqsad_u32_u8 :ref:`vdst`::ref:`u32x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`vsrc2`::ref:`u32x4` :ref:`clamp` + v_msad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_mul_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_hi_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_mul_legacy_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_lo_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_lo_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_nop_e64 + v_not_b32_e64 :ref:`vdst`, :ref:`src` + v_or3_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_or_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_pack_b32_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`op_sel` + v_perm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_qsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_rcp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_iflag_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_readlane_b32 :ref:`sdst`, :ref:`vsrc0`, :ref:`ssrc1` + v_rndne_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sad_hi_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u16 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_sad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sat_pk_u8_i16_e64 :ref:`vdst`::ref:`u8x4`, :ref:`src` + v_screen_partition_4se_b32_e64 :ref:`vdst`, :ref:`src` + v_sin_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sin_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`clamp` + v_sub_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subb_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_subbrev_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_subrev_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subrev_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subrev_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_trig_preop_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`u32` :ref:`clamp` :ref:`omod` + v_trunc_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_writelane_b32 :ref:`vdst`, :ref:`ssrc0`, :ref:`ssrc1` + v_xad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_xnor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_xor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + +VOP3P +----- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_accvgpr_read_b32 :ref:`vdst`, :ref:`vsrc` + v_accvgpr_write_b32 :ref:`vdst`, :ref:`src` + v_dot2_f32_f16 :ref:`vdst`, :ref:`src0`::ref:`f16x2`, :ref:`src1`::ref:`f16x2`, :ref:`src2`::ref:`f32` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_dot2_i32_i16 :ref:`vdst`, :ref:`src0`::ref:`i16x2`, :ref:`src1`::ref:`i16x2`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot2_u32_u16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot4_i32_i8 :ref:`vdst`, :ref:`src0`::ref:`i8x4`, :ref:`src1`::ref:`i8x4`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot4_u32_u8 :ref:`vdst`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot8_i32_i4 :ref:`vdst`, :ref:`src0`::ref:`i4x8`, :ref:`src1`::ref:`i4x8`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot8_u32_u4 :ref:`vdst`, :ref:`src0`::ref:`u4x8`, :ref:`src1`::ref:`u4x8`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_fma_mix_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixhi_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixlo_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_mfma_f32_16x16x16_bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x16_f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x16bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x16bf16_1k :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x16f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x1_4b_f32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x1f32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4_4b_bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4_4b_f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4_f32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4bf16_1k :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4f32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x8_xf32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32x2`, :ref:`vsrc1`::ref:`f32x2`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x8xf32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32x2`, :ref:`vsrc1`::ref:`f32x2`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x1_2b_f32 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x1f32 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x2_f32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x2f32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4_2b_bf16 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4_2b_f16 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4_xf32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32x2`, :ref:`vsrc1`::ref:`f32x2`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4bf16 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4bf16_1k :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4f16 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4xf32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32x2`, :ref:`vsrc1`::ref:`f32x2`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x8_bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x8_f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x8bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x8bf16_1k :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x8f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x1_16b_f32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x1f32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x4_16b_bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x4_16b_f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x4bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x4bf16_1k :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x4f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f64_16x16x4_f64 :ref:`vdst`::ref:`f64x4`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`src2`::ref:`f64x4` :ref:`cbsz` :ref:`abid` :ref:`neg` + v_mfma_f64_16x16x4f64 :ref:`vdst`::ref:`f64x4`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`src2`::ref:`f64x4` :ref:`cbsz` :ref:`abid` :ref:`neg` + v_mfma_f64_4x4x4_4b_f64 :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`src2` :ref:`cbsz` :ref:`abid` :ref:`neg` + v_mfma_f64_4x4x4f64 :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`src2` :ref:`cbsz` :ref:`abid` :ref:`neg` + v_mfma_i32_16x16x32_i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x8`, :ref:`vsrc1`::ref:`i8x8`, :ref:`src2`::ref:`i32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_16x16x32i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x8`, :ref:`vsrc1`::ref:`i8x8`, :ref:`src2`::ref:`i32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_16x16x4_4b_i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_16x16x4i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_32x32x16_i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x8`, :ref:`vsrc1`::ref:`i8x8`, :ref:`src2`::ref:`i32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_32x32x16i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x8`, :ref:`vsrc1`::ref:`i8x8`, :ref:`src2`::ref:`i32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_32x32x4_2b_i8 :ref:`vdst`::ref:`i32x32`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_32x32x4i8 :ref:`vdst`::ref:`i32x32`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_4x4x4_16b_i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_4x4x4i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_pk_add_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_add_f32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_add_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_add_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_ashrrev_i16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_fma_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_fma_f32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_lshlrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_lshrrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mad_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_mad_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_max_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_max_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_max_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_min_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_min_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_min_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mov_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mul_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_mul_f32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mul_lo_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_sub_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_sub_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_smfmac_f32_16x16x32_bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x8`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_f32_16x16x32_f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x8`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_f32_16x16x32bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x8`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_f32_16x16x32f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x8`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_f32_32x32x16_bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x8`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_f32_32x32x16_f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x8`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_f32_32x32x16bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x8`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_f32_32x32x16f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x8`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_i32_16x16x64_i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x8`, :ref:`vsrc1`::ref:`i32x4`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_i32_16x16x64i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x8`, :ref:`vsrc1`::ref:`i32x4`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_i32_32x32x32_i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x8`, :ref:`vsrc1`::ref:`i32x4`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + v_smfmac_i32_32x32x32i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x8`, :ref:`vsrc1`::ref:`i32x4`, :ref:`vsrc2`::ref:`b32` :ref:`cbsz` :ref:`abid` + +VOPC +---- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_cmp_class_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_eq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_class_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_eq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx940_dst_4f3f9a + gfx940_dst_95761f + gfx940_fx_operand + gfx940_hwreg + gfx940_imask + gfx940_imm16_73139a + gfx940_imm16_a04fb3 + gfx940_label + gfx940_m_254bcb + gfx940_m_f5d306 + gfx940_msg + gfx940_opt_0d447d + gfx940_opt_7c211e + gfx940_probe + gfx940_saddr_22dbc1 + gfx940_saddr_a37373 + gfx940_sbase_010ce0 + gfx940_sbase_044055 + gfx940_sbase_0cd545 + gfx940_sdata_595c25 + gfx940_sdata_7cbd60 + gfx940_sdata_aefe00 + gfx940_sdata_c6aec1 + gfx940_sdata_e9f591 + gfx940_sdata_eb6f2a + gfx940_sdst_06b266 + gfx940_sdst_0804b1 + gfx940_sdst_362c37 + gfx940_sdst_3bc700 + gfx940_sdst_59204c + gfx940_sdst_718cc4 + gfx940_sdst_94342d + gfx940_sdst_a319e6 + gfx940_simm32_6f0844 + gfx940_simm32_a3e80c + gfx940_simm32_be0c1c + gfx940_soffset_4318ca + gfx940_soffset_7b8c50 + gfx940_soffset_f33c5c + gfx940_src_4de5c6 + gfx940_src_56ed80 + gfx940_src_64ea89 + gfx940_src_6cfc4e + gfx940_src_a578ba + gfx940_src_af08be + gfx940_src_d578c4 + gfx940_src_d95796 + gfx940_src_e1561c + gfx940_src_e5cc81 + gfx940_src_f73668 + gfx940_srsrc + gfx940_ssrc_4db4a9 + gfx940_ssrc_57838b + gfx940_ssrc_595c25 + gfx940_ssrc_65f041 + gfx940_ssrc_aee59c + gfx940_ssrc_c31902 + gfx940_ssrc_c5d631 + gfx940_ssrc_c8a322 + gfx940_ssrc_e9f591 + gfx940_type_deviation + gfx940_vaddr_0212e3 + gfx940_vaddr_6ab80d + gfx940_vaddr_9f7133 + gfx940_vaddr_b73dc0 + gfx940_vaddr_f20ee4 + gfx940_vcc + gfx940_vdata0_9ad749 + gfx940_vdata0_be4895 + gfx940_vdata1_9ad749 + gfx940_vdata1_be4895 + gfx940_vdata_24882b + gfx940_vdata_5eef12 + gfx940_vdata_848ff7 + gfx940_vdata_9ad749 + gfx940_vdata_be4895 + gfx940_vdata_c8a58b + gfx940_vdata_cfb402 + gfx940_vdst_08b5ba + gfx940_vdst_0c37de + gfx940_vdst_0f48d1 + gfx940_vdst_180bef + gfx940_vdst_260aca + gfx940_vdst_5258b4 + gfx940_vdst_56baf6 + gfx940_vdst_63b743 + gfx940_vdst_69a144 + gfx940_vdst_78dd0a + gfx940_vdst_89680f + gfx940_vdst_8c77d4 + gfx940_vdst_a32035 + gfx940_vdst_bce42a + gfx940_vdst_bdb32f + gfx940_vdst_c3d63a + gfx940_vdst_c8d317 + gfx940_vdst_d0c0cb + gfx940_vdst_d6f4bd + gfx940_vdst_d8236e + gfx940_vdst_e2898f + gfx940_vdst_fa7dbd + gfx940_vsrc_1027ca + gfx940_vsrc_6802ce + gfx940_vsrc_848ff7 + gfx940_vsrc_9ad749 + gfx940_vsrc_be4895 + gfx940_vsrc_e016a1 + gfx940_vsrc_fd235e + gfx940_waitcnt diff --git a/llvm/docs/AMDGPU/gfx940_dst_4f3f9a.rst b/llvm/docs/AMDGPU/gfx940_dst_4f3f9a.rst new file mode 100644 index 00000000000000..58782d53a70b9d --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_dst_4f3f9a.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_dst_4f3f9a: + +dst +=== + +This is an input operand. It may optionally serve as a destination if :ref:`sc0` is specified. diff --git a/llvm/docs/AMDGPU/gfx940_dst_95761f.rst b/llvm/docs/AMDGPU/gfx940_dst_95761f.rst new file mode 100644 index 00000000000000..fcffcea92df0d3 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_dst_95761f.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_dst_95761f: + +dst +=== + +This is an input operand. It may optionally serve as a destination if :ref:`glc` is specified. diff --git a/llvm/docs/AMDGPU/gfx940_fx_operand.rst b/llvm/docs/AMDGPU/gfx940_fx_operand.rst new file mode 100644 index 00000000000000..e935c669cb6f61 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_fx_operand.rst @@ -0,0 +1,16 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_fx_operand: + +FX Operand +========== + +This is an *f32* or *f16* operand depending on instruction modifiers: + +* Operand size is controlled by :ref:`m_op_sel_hi`. +* Location of 16-bit operand is controlled by :ref:`m_op_sel`. diff --git a/llvm/docs/AMDGPU/gfx940_hwreg.rst b/llvm/docs/AMDGPU/gfx940_hwreg.rst new file mode 100644 index 00000000000000..2ad2bd8932da62 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_hwreg.rst @@ -0,0 +1,82 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_hwreg: + +hwreg +===== + +Bits of a hardware register being accessed. + +The bits of this operand have the following meaning: + + ======= ===================== ============ + Bits Description Value Range + ======= ===================== ============ + 5:0 Register *id*. 0..63 + 10:6 First bit *offset*. 0..31 + 15:11 *Size* in bits. 1..32 + ======= ===================== ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* An *hwreg* value described below. + + ==================================== ============================================================================ + Hwreg Value Syntax Description + ==================================== ============================================================================ + hwreg({0..63}) All bits of a register indicated by its *id*. + hwreg(<*name*>) All bits of a register indicated by its *name*. + hwreg({0..63}, {0..31}, {1..32}) Register bits indicated by register *id*, first bit *offset* and *size*. + hwreg(<*name*>, {0..31}, {1..32}) Register bits indicated by register *name*, first bit *offset* and *size*. + ==================================== ============================================================================ + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Defined register *names* include: + + ============================== ========================================== + Name Description + ============================== ========================================== + HW_REG_MODE Shader writeable mode bits. + HW_REG_STATUS Shader read-only status. + HW_REG_TRAPSTS Trap status. + HW_REG_HW_ID Id of wave, simd, compute unit, etc. + HW_REG_GPR_ALLOC Per-wave SGPR and VGPR allocation. + HW_REG_LDS_ALLOC Per-wave LDS allocation. + HW_REG_IB_STS Counters of outstanding instructions. + HW_REG_SH_MEM_BASES Memory aperture. + HW_REG_TBA_LO tba_lo register. + HW_REG_TBA_HI tba_hi register. + HW_REG_TMA_LO tma_lo register. + HW_REG_TMA_HI tma_hi register. + HW_REG_XCC_ID ID of this XCC (compute accelerator chip). + HW_REG_SQ_PERF_SNAPSHOT_DATA Performance snapshot data (first part). + HW_REG_SQ_PERF_SNAPSHOT_DATA1 Performance snapshot data (second part). + HW_REG_SQ_PERF_SNAPSHOT_PC_LO PC.lo of wave when snapshot was taken. + HW_REG_SQ_PERF_SNAPSHOT_PC_HI PC.hi of wave when snapshot was taken. + ============================== ========================================== + +Examples: + +.. parsed-literal:: + + reg = 1 + offset = 2 + size = 4 + hwreg_enc = reg | (offset << 6) | ((size - 1) << 11) + + s_getreg_b32 s2, 0x1881 + s_getreg_b32 s2, hwreg_enc // the same as above + s_getreg_b32 s2, hwreg(1, 2, 4) // the same as above + s_getreg_b32 s2, hwreg(reg, offset, size) // the same as above + + s_getreg_b32 s2, hwreg(15) + s_getreg_b32 s2, hwreg(51, 1, 31) + s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) diff --git a/llvm/docs/AMDGPU/gfx940_imask.rst b/llvm/docs/AMDGPU/gfx940_imask.rst new file mode 100644 index 00000000000000..d904db56e1c106 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_imask.rst @@ -0,0 +1,65 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_imask: + +imask +===== + +This operand is a mask which controls indexing mode for operands of subsequent instructions. +Bits 0, 1 and 2 control indexing of *src0*, *src1* and *src2*, while bit 3 controls indexing of *dst*. +Value 1 enables indexing and value 0 disables it. + + ===== ======================================== + Bit Meaning + ===== ======================================== + 0 Enables or disables *src0* indexing. + 1 Enables or disables *src1* indexing. + 2 Enables or disables *src2* indexing. + 3 Enables or disables *dst* indexing. + ===== ======================================== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..15. +* A *gpr_idx* value described below. + + ==================================== =========================================== + Gpr_idx Value Syntax Description + ==================================== =========================================== + gpr_idx(**) Enable indexing for specified *operands* + and disable it for the rest. + *Operands* is a comma-separated list of + values which may include: + + * "SRC0" - enable *src0* indexing. + + * "SRC1" - enable *src1* indexing. + + * "SRC2" - enable *src2* indexing. + + * "DST" - enable *dst* indexing. + + Each of these values may be specified only + once. + + *Operands* list may be empty; this syntax + disables indexing for all operands. + ==================================== =========================================== + +Examples: + +.. parsed-literal:: + + s_set_gpr_idx_mode 0 + s_set_gpr_idx_mode gpr_idx() // the same as above + + s_set_gpr_idx_mode 15 + s_set_gpr_idx_mode gpr_idx(DST,SRC0,SRC1,SRC2) // the same as above + s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) // the same as above + + s_set_gpr_idx_mode gpr_idx(DST,SRC1) diff --git a/llvm/docs/AMDGPU/gfx940_imm16_73139a.rst b/llvm/docs/AMDGPU/gfx940_imm16_73139a.rst new file mode 100644 index 00000000000000..c4c0f28eefeb67 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_imm16_73139a.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_imm16_73139a: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff --git a/llvm/docs/AMDGPU/gfx940_imm16_a04fb3.rst b/llvm/docs/AMDGPU/gfx940_imm16_a04fb3.rst new file mode 100644 index 00000000000000..c6787df41aca77 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_imm16_a04fb3.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_imm16_a04fb3: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..65535. diff --git a/llvm/docs/AMDGPU/gfx940_label.rst b/llvm/docs/AMDGPU/gfx940_label.rst new file mode 100644 index 00000000000000..be1018b984ec87 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_label.rst @@ -0,0 +1,36 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_label: + +label +===== + +A branch target which is a 16-bit signed integer treated as a PC-relative dword offset. + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. +* A :ref:`symbol` (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker. + +Examples: + +.. parsed-literal:: + + offset = 30 + label_1: + label_2 = . + 4 + + s_branch 32 + s_branch offset + 2 + s_branch label_1 + s_branch label_2 + s_branch label_3 + s_branch label_4 + + label_3 = label_2 + 4 + label_4: diff --git a/llvm/docs/AMDGPU/gfx940_m_254bcb.rst b/llvm/docs/AMDGPU/gfx940_m_254bcb.rst new file mode 100644 index 00000000000000..f8a56f55339e39 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_m_254bcb.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_m_254bcb: + +m += + +This operand may be used with integer operand modifier :ref:`sext`. diff --git a/llvm/docs/AMDGPU/gfx940_m_f5d306.rst b/llvm/docs/AMDGPU/gfx940_m_f5d306.rst new file mode 100644 index 00000000000000..324faf5fff66f7 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_m_f5d306.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_m_f5d306: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff --git a/llvm/docs/AMDGPU/gfx940_msg.rst b/llvm/docs/AMDGPU/gfx940_msg.rst new file mode 100644 index 00000000000000..b357a689b8f1bb --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_msg.rst @@ -0,0 +1,100 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_msg: + +msg +=== + +A 16-bit message code. The bits of this operand have the following meaning: + + ============ =============================== =============== + Bits Description Value Range + ============ =============================== =============== + 3:0 Message *type*. 0..15 + 6:4 Optional *operation*. 0..7 + 7:7 Unused. \- + 9:8 Optional *stream*. 0..3 + 15:10 Unused. \- + ============ =============================== =============== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A *sendmsg* value described below. + + ==================================== ==================================================== + Sendmsg Value Syntax Description + ==================================== ==================================================== + sendmsg(<*type*>) A message identified by its *type*. + sendmsg(<*type*>,<*op*>) A message identified by its *type* and *operation*. + sendmsg(<*type*>,<*op*>,<*stream*>) A message identified by its *type* and *operation* + with a stream *id*. + ==================================== ==================================================== + +*Type* may be specified using message *name* or message *id*. + +*Op* may be specified using operation *name* or operation *id*. + +Stream *id* is an integer in the range 0..3. + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Each message type supports specific operations: + + ====================== ========== ============================== ============ ========== + Message name Message Id Supported Operations Operation Id Stream Id + ====================== ========== ============================== ============ ========== + MSG_INTERRUPT 1 \- \- \- + MSG_GS 2 GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_GS_DONE 3 GS_OP_NOP 0 \- + \ GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_SAVEWAVE 4 \- \- \- + MSG_STALL_WAVE_GEN 5 \- \- \- + MSG_HALT_WAVES 6 \- \- \- + MSG_ORDERED_PS_DONE 7 \- \- \- + MSG_EARLY_PRIM_DEALLOC 8 \- \- \- + MSG_GS_ALLOC_REQ 9 \- \- \- + MSG_GET_DOORBELL 10 \- \- \- + MSG_SYSMSG 15 SYSMSG_OP_ECC_ERR_INTERRUPT 1 \- + \ SYSMSG_OP_REG_RD 2 \- + \ SYSMSG_OP_TTRACE_PC 4 \- + ====================== ========== ============================== ============ ========== + +*Sendmsg* arguments are validated depending on how *type* value is specified: + +* If message *type* is specified by name, arguments values must satisfy limitations detailed in the table above. +* If message *type* is specified as a number, each argument must not exceed corresponding value range (see the first table). + +Examples: + +.. parsed-literal:: + + // numeric message code + msg = 0x10 + s_sendmsg 0x12 + s_sendmsg msg + 2 + + // sendmsg with strict arguments validation + s_sendmsg sendmsg(MSG_INTERRUPT) + s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT) + s_sendmsg sendmsg(MSG_GS, 2) + s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1) + s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC) + s_sendmsg sendmsg(MSG_GET_DOORBELL) + + // sendmsg with validation of value range only + msg = 2 + op = 3 + stream = 1 + s_sendmsg sendmsg(msg, op, stream) + s_sendmsg sendmsg(2, GS_OP_CUT) diff --git a/llvm/docs/AMDGPU/gfx940_opt_0d447d.rst b/llvm/docs/AMDGPU/gfx940_opt_0d447d.rst new file mode 100644 index 00000000000000..eef6323915fda8 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_opt_0d447d.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_opt_0d447d: + +opt +=== + +This is an optional operand. It must be used if and only if :ref:`lds` is omitted. diff --git a/llvm/docs/AMDGPU/gfx940_opt_7c211e.rst b/llvm/docs/AMDGPU/gfx940_opt_7c211e.rst new file mode 100644 index 00000000000000..5865e678a0fd8d --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_opt_7c211e.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_opt_7c211e: + +opt +=== + +This is an optional operand. It must be used if and only if :ref:`sc0` is specified. diff --git a/llvm/docs/AMDGPU/gfx940_probe.rst b/llvm/docs/AMDGPU/gfx940_probe.rst new file mode 100644 index 00000000000000..b5c2061e3cbcdc --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_probe.rst @@ -0,0 +1,24 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_probe: + +probe +===== + +A bit mask which indicates request permissions. + +This operand must be specified as an :ref:`integer_number` or an :ref:`absolute_expression`. +The value is truncated to 7 bits, but only 3 low bits are significant. + + ============ ============================== + Bit Number Description + ============ ============================== + 0 Request *read* permission. + 1 Request *write* permission. + 2 Request *execute* permission. + ============ ============================== diff --git a/llvm/docs/AMDGPU/gfx940_saddr_22dbc1.rst b/llvm/docs/AMDGPU/gfx940_saddr_22dbc1.rst new file mode 100644 index 00000000000000..b97ad24dd791b1 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_saddr_22dbc1.rst @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_saddr_22dbc1: + +saddr +===== + +An optional 32-bit flat scratch offset. Must be specified as :ref:`off` if not used. + +* Offset = [:ref:`vaddr`] + [:ref:`saddr`] + :ref:`offset13s`. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`off` diff --git a/llvm/docs/AMDGPU/gfx940_saddr_a37373.rst b/llvm/docs/AMDGPU/gfx940_saddr_a37373.rst new file mode 100644 index 00000000000000..75f28d3d4be4a4 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_saddr_a37373.rst @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_saddr_a37373: + +saddr +===== + +An optional 64-bit flat global address. Must be specified as :ref:`off` if not used. + +See :ref:`vaddr` for description of available addressing modes. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`off` diff --git a/llvm/docs/AMDGPU/gfx940_sbase_010ce0.rst b/llvm/docs/AMDGPU/gfx940_sbase_010ce0.rst new file mode 100644 index 00000000000000..38ba55b6713ee2 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sbase_010ce0.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sbase_010ce0: + +sbase +===== + +A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sbase_044055.rst b/llvm/docs/AMDGPU/gfx940_sbase_044055.rst new file mode 100644 index 00000000000000..7994cf169a5083 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sbase_044055.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sbase_044055: + +sbase +===== + +A 64-bit base address for scalar memory operations. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sbase_0cd545.rst b/llvm/docs/AMDGPU/gfx940_sbase_0cd545.rst new file mode 100644 index 00000000000000..42bfda6ea967f2 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sbase_0cd545.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sbase_0cd545: + +sbase +===== + +This operand is ignored by H/W and :ref:`flat_scratch` is supplied instead. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdata_595c25.rst b/llvm/docs/AMDGPU/gfx940_sdata_595c25.rst new file mode 100644 index 00000000000000..be7b69b053f820 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdata_595c25.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdata_595c25: + +sdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdata_7cbd60.rst b/llvm/docs/AMDGPU/gfx940_sdata_7cbd60.rst new file mode 100644 index 00000000000000..835bf626acf9ff --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdata_7cbd60.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdata_7cbd60: + +sdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdata_aefe00.rst b/llvm/docs/AMDGPU/gfx940_sdata_aefe00.rst new file mode 100644 index 00000000000000..ee9e6a86526e67 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdata_aefe00.rst @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdata_aefe00: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdata_c6aec1.rst b/llvm/docs/AMDGPU/gfx940_sdata_c6aec1.rst new file mode 100644 index 00000000000000..faf9818bec65eb --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdata_c6aec1.rst @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdata_c6aec1: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdata_e9f591.rst b/llvm/docs/AMDGPU/gfx940_sdata_e9f591.rst new file mode 100644 index 00000000000000..b66886a63b6c3e --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdata_e9f591.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdata_e9f591: + +sdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdata_eb6f2a.rst b/llvm/docs/AMDGPU/gfx940_sdata_eb6f2a.rst new file mode 100644 index 00000000000000..a7b06c28c30362 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdata_eb6f2a.rst @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdata_eb6f2a: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdst_06b266.rst b/llvm/docs/AMDGPU/gfx940_sdst_06b266.rst new file mode 100644 index 00000000000000..6b05e20a09c236 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdst_06b266.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdst_06b266: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec` diff --git a/llvm/docs/AMDGPU/gfx940_sdst_0804b1.rst b/llvm/docs/AMDGPU/gfx940_sdst_0804b1.rst new file mode 100644 index 00000000000000..e08e0d2a9c14f1 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdst_0804b1.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdst_0804b1: + +sdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdst_362c37.rst b/llvm/docs/AMDGPU/gfx940_sdst_362c37.rst new file mode 100644 index 00000000000000..64befcbd19f0d3 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdst_362c37.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdst_362c37: + +sdst +==== + +Instruction output. + +*Size:* 8 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdst_3bc700.rst b/llvm/docs/AMDGPU/gfx940_sdst_3bc700.rst new file mode 100644 index 00000000000000..cc7006d84f7e37 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdst_3bc700.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdst_3bc700: + +sdst +==== + +Instruction output. + +*Size:* 16 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdst_59204c.rst b/llvm/docs/AMDGPU/gfx940_sdst_59204c.rst new file mode 100644 index 00000000000000..afbc0e3da71b73 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdst_59204c.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdst_59204c: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdst_718cc4.rst b/llvm/docs/AMDGPU/gfx940_sdst_718cc4.rst new file mode 100644 index 00000000000000..4f4104edf7b651 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdst_718cc4.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdst_718cc4: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdst_94342d.rst b/llvm/docs/AMDGPU/gfx940_sdst_94342d.rst new file mode 100644 index 00000000000000..5bf066ba1bd482 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdst_94342d.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdst_94342d: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_sdst_a319e6.rst b/llvm/docs/AMDGPU/gfx940_sdst_a319e6.rst new file mode 100644 index 00000000000000..f502a1e9ee4b41 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_sdst_a319e6.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_sdst_a319e6: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec` diff --git a/llvm/docs/AMDGPU/gfx940_simm32_6f0844.rst b/llvm/docs/AMDGPU/gfx940_simm32_6f0844.rst new file mode 100644 index 00000000000000..4b049cc85ab524 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_simm32_6f0844.rst @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_simm32_6f0844: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f32* as described :ref:`here`. diff --git a/llvm/docs/AMDGPU/gfx940_simm32_a3e80c.rst b/llvm/docs/AMDGPU/gfx940_simm32_a3e80c.rst new file mode 100644 index 00000000000000..e60446373b8866 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_simm32_a3e80c.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_simm32_a3e80c: + +simm32 +====== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value is truncated to 32 bits. diff --git a/llvm/docs/AMDGPU/gfx940_simm32_be0c1c.rst b/llvm/docs/AMDGPU/gfx940_simm32_be0c1c.rst new file mode 100644 index 00000000000000..ea75a8f0055cfb --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_simm32_be0c1c.rst @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_simm32_be0c1c: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f16* as described :ref:`here`. diff --git a/llvm/docs/AMDGPU/gfx940_soffset_4318ca.rst b/llvm/docs/AMDGPU/gfx940_soffset_4318ca.rst new file mode 100644 index 00000000000000..cb7d014f1bef52 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_soffset_4318ca.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_soffset_4318ca: + +soffset +======= + +An unsigned byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff --git a/llvm/docs/AMDGPU/gfx940_soffset_7b8c50.rst b/llvm/docs/AMDGPU/gfx940_soffset_7b8c50.rst new file mode 100644 index 00000000000000..b76580d861b476 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_soffset_7b8c50.rst @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_soffset_7b8c50: + +soffset +======= + +An offset added to the base address to get memory address. + +* If offset is specified as a register, it supplies an unsigned byte offset. +* If offset is specified as a 21-bit immediate, it supplies a signed byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`simm21` diff --git a/llvm/docs/AMDGPU/gfx940_soffset_f33c5c.rst b/llvm/docs/AMDGPU/gfx940_soffset_f33c5c.rst new file mode 100644 index 00000000000000..9a115009db6af5 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_soffset_f33c5c.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_soffset_f33c5c: + +soffset +======= + +An unsigned 20-bit offset added to the base address to get memory address. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`uimm20` diff --git a/llvm/docs/AMDGPU/gfx940_src_4de5c6.rst b/llvm/docs/AMDGPU/gfx940_src_4de5c6.rst new file mode 100644 index 00000000000000..cb0bbb9577a3b2 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_4de5c6.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_4de5c6: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival`, :ref:`literal` diff --git a/llvm/docs/AMDGPU/gfx940_src_56ed80.rst b/llvm/docs/AMDGPU/gfx940_src_56ed80.rst new file mode 100644 index 00000000000000..292bedd3d418ca --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_56ed80.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_56ed80: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff --git a/llvm/docs/AMDGPU/gfx940_src_64ea89.rst b/llvm/docs/AMDGPU/gfx940_src_64ea89.rst new file mode 100644 index 00000000000000..cd4325949bfec5 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_64ea89.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_64ea89: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff --git a/llvm/docs/AMDGPU/gfx940_src_6cfc4e.rst b/llvm/docs/AMDGPU/gfx940_src_6cfc4e.rst new file mode 100644 index 00000000000000..518e17e65cefdf --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_6cfc4e.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_6cfc4e: + +src +=== + +Instruction input. + +*Size:* 32 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff --git a/llvm/docs/AMDGPU/gfx940_src_a578ba.rst b/llvm/docs/AMDGPU/gfx940_src_a578ba.rst new file mode 100644 index 00000000000000..7a3e530db9e91c --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_a578ba.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_a578ba: + +src +=== + +Instruction input. + +*Size:* 16 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff --git a/llvm/docs/AMDGPU/gfx940_src_af08be.rst b/llvm/docs/AMDGPU/gfx940_src_af08be.rst new file mode 100644 index 00000000000000..f80d885eee97c3 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_af08be.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_af08be: + +src +=== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff --git a/llvm/docs/AMDGPU/gfx940_src_d578c4.rst b/llvm/docs/AMDGPU/gfx940_src_d578c4.rst new file mode 100644 index 00000000000000..1309bea4b8a437 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_d578c4.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_d578c4: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff --git a/llvm/docs/AMDGPU/gfx940_src_d95796.rst b/llvm/docs/AMDGPU/gfx940_src_d95796.rst new file mode 100644 index 00000000000000..bac9554e63e827 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_d95796.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_d95796: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival` diff --git a/llvm/docs/AMDGPU/gfx940_src_e1561c.rst b/llvm/docs/AMDGPU/gfx940_src_e1561c.rst new file mode 100644 index 00000000000000..ae7b4682a56fc9 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_e1561c.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_e1561c: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff --git a/llvm/docs/AMDGPU/gfx940_src_e5cc81.rst b/llvm/docs/AMDGPU/gfx940_src_e5cc81.rst new file mode 100644 index 00000000000000..af1619656c9d3d --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_e5cc81.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_e5cc81: + +src +=== + +Instruction input. + +*Size:* 8 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff --git a/llvm/docs/AMDGPU/gfx940_src_f73668.rst b/llvm/docs/AMDGPU/gfx940_src_f73668.rst new file mode 100644 index 00000000000000..31ab428fb8c45d --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_src_f73668.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_src_f73668: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff --git a/llvm/docs/AMDGPU/gfx940_srsrc.rst b/llvm/docs/AMDGPU/gfx940_srsrc.rst new file mode 100644 index 00000000000000..708f5272c2e775 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_srsrc.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_srsrc: + +srsrc +===== + +Buffer resource constant which defines the address and characteristics of the buffer in memory. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_ssrc_4db4a9.rst b/llvm/docs/AMDGPU/gfx940_ssrc_4db4a9.rst new file mode 100644 index 00000000000000..05ec538cd8ab0d --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_ssrc_4db4a9.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_ssrc_4db4a9: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff --git a/llvm/docs/AMDGPU/gfx940_ssrc_57838b.rst b/llvm/docs/AMDGPU/gfx940_ssrc_57838b.rst new file mode 100644 index 00000000000000..fb1787d89be80f --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_ssrc_57838b.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_ssrc_57838b: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`iconst` diff --git a/llvm/docs/AMDGPU/gfx940_ssrc_595c25.rst b/llvm/docs/AMDGPU/gfx940_ssrc_595c25.rst new file mode 100644 index 00000000000000..cefddc64d61ae6 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_ssrc_595c25.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_ssrc_595c25: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_ssrc_65f041.rst b/llvm/docs/AMDGPU/gfx940_ssrc_65f041.rst new file mode 100644 index 00000000000000..dea559d1601247 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_ssrc_65f041.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_ssrc_65f041: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec` diff --git a/llvm/docs/AMDGPU/gfx940_ssrc_aee59c.rst b/llvm/docs/AMDGPU/gfx940_ssrc_aee59c.rst new file mode 100644 index 00000000000000..22f7130c6e165e --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_ssrc_aee59c.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_ssrc_aee59c: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff --git a/llvm/docs/AMDGPU/gfx940_ssrc_c31902.rst b/llvm/docs/AMDGPU/gfx940_ssrc_c31902.rst new file mode 100644 index 00000000000000..c33aec812372b3 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_ssrc_c31902.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_ssrc_c31902: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff --git a/llvm/docs/AMDGPU/gfx940_ssrc_c5d631.rst b/llvm/docs/AMDGPU/gfx940_ssrc_c5d631.rst new file mode 100644 index 00000000000000..c7c436fa8500ad --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_ssrc_c5d631.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_ssrc_c5d631: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff --git a/llvm/docs/AMDGPU/gfx940_ssrc_c8a322.rst b/llvm/docs/AMDGPU/gfx940_ssrc_c8a322.rst new file mode 100644 index 00000000000000..020a07a86b8320 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_ssrc_c8a322.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_ssrc_c8a322: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec` diff --git a/llvm/docs/AMDGPU/gfx940_ssrc_e9f591.rst b/llvm/docs/AMDGPU/gfx940_ssrc_e9f591.rst new file mode 100644 index 00000000000000..14138355393aec --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_ssrc_e9f591.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_ssrc_e9f591: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff --git a/llvm/docs/AMDGPU/gfx940_type_deviation.rst b/llvm/docs/AMDGPU/gfx940_type_deviation.rst new file mode 100644 index 00000000000000..95cac5f7d2dd26 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_type_deviation.rst @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_type_deviation: + +Type Deviation +============== + +*Type* of this operand differs from *type* :ref:`implied by the opcode`. This tag specifies actual operand *type*. diff --git a/llvm/docs/AMDGPU/gfx940_vaddr_0212e3.rst b/llvm/docs/AMDGPU/gfx940_vaddr_0212e3.rst new file mode 100644 index 00000000000000..828d62552ec3d9 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vaddr_0212e3.rst @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vaddr_0212e3: + +vaddr +===== + +A 64-bit flat global address or a 32-bit offset depending on addressing mode: + +* Address = :ref:`vaddr` + :ref:`offset13s`. :ref:`vaddr` is a 64-bit address. This mode is indicated by :ref:`saddr` set to :ref:`off`. +* Address = :ref:`saddr` + :ref:`vaddr` + :ref:`offset13s`. :ref:`vaddr` is a 32-bit offset. This mode is used when :ref:`saddr` is not :ref:`off`. + +*Size:* 1 or 2 dwords. + +*Operands:* :ref:`v` diff --git a/llvm/docs/AMDGPU/gfx940_vaddr_6ab80d.rst b/llvm/docs/AMDGPU/gfx940_vaddr_6ab80d.rst new file mode 100644 index 00000000000000..a4e400766164a5 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vaddr_6ab80d.rst @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vaddr_6ab80d: + +vaddr +===== + +An optional 32-bit flat scratch offset. Must be specified as :ref:`off` if not used. + +* Offset = [:ref:`vaddr`] + [:ref:`saddr`] + :ref:`offset13s`. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`off` diff --git a/llvm/docs/AMDGPU/gfx940_vaddr_9f7133.rst b/llvm/docs/AMDGPU/gfx940_vaddr_9f7133.rst new file mode 100644 index 00000000000000..b021672df12cf0 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vaddr_9f7133.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vaddr_9f7133: + +vaddr +===== + +A 64-bit flat address. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff --git a/llvm/docs/AMDGPU/gfx940_vaddr_b73dc0.rst b/llvm/docs/AMDGPU/gfx940_vaddr_b73dc0.rst new file mode 100644 index 00000000000000..2d307e721a36f7 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vaddr_b73dc0.rst @@ -0,0 +1,22 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vaddr_b73dc0: + +vaddr +===== + +This is an optional operand which may specify offset and/or index. + +*Size:* 0, 1 or 2 dwords. Size is controlled by modifiers :ref:`offen` and :ref:`idxen`: + +* If only :ref:`idxen` is specified, this operand supplies an index. Size is 1 dword. +* If only :ref:`offen` is specified, this operand supplies an offset. Size is 1 dword. +* If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords. +* If none of these modifiers are specified, this operand must be set to :ref:`off`. + +*Operands:* :ref:`v`, :ref:`off` diff --git a/llvm/docs/AMDGPU/gfx940_vaddr_f20ee4.rst b/llvm/docs/AMDGPU/gfx940_vaddr_f20ee4.rst new file mode 100644 index 00000000000000..94901560132c3e --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vaddr_f20ee4.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vaddr_f20ee4: + +vaddr +===== + +An offset from the start of GDS/LDS memory. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff --git a/llvm/docs/AMDGPU/gfx940_vcc.rst b/llvm/docs/AMDGPU/gfx940_vcc.rst new file mode 100644 index 00000000000000..b82480731d30bd --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vcc.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vcc: + +vcc +=== + +Vector condition code. + +*Size:* 2 dwords. + +*Operands:* :ref:`vcc` diff --git a/llvm/docs/AMDGPU/gfx940_vdata0_9ad749.rst b/llvm/docs/AMDGPU/gfx940_vdata0_9ad749.rst new file mode 100644 index 00000000000000..64cb5c6a64e876 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata0_9ad749.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata0_9ad749: + +vdata0 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata0_be4895.rst b/llvm/docs/AMDGPU/gfx940_vdata0_be4895.rst new file mode 100644 index 00000000000000..763545c830c1e0 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata0_be4895.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata0_be4895: + +vdata0 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata1_9ad749.rst b/llvm/docs/AMDGPU/gfx940_vdata1_9ad749.rst new file mode 100644 index 00000000000000..1d5cfe2b100b09 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata1_9ad749.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata1_9ad749: + +vdata1 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata1_be4895.rst b/llvm/docs/AMDGPU/gfx940_vdata1_be4895.rst new file mode 100644 index 00000000000000..68f12481c6158e --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata1_be4895.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata1_be4895: + +vdata1 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata_24882b.rst b/llvm/docs/AMDGPU/gfx940_vdata_24882b.rst new file mode 100644 index 00000000000000..f549d27e3fd3d4 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata_24882b.rst @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata_24882b: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`sc0` is specified, gets the memory value before the operation. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata_5eef12.rst b/llvm/docs/AMDGPU/gfx940_vdata_5eef12.rst new file mode 100644 index 00000000000000..279f4a2b527029 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata_5eef12.rst @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata_5eef12: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`sc0` is specified, gets the memory value before the operation. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata_848ff7.rst b/llvm/docs/AMDGPU/gfx940_vdata_848ff7.rst new file mode 100644 index 00000000000000..31e8402d7ef9ad --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata_848ff7.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata_848ff7: + +vdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata_9ad749.rst b/llvm/docs/AMDGPU/gfx940_vdata_9ad749.rst new file mode 100644 index 00000000000000..145c24fc276f5f --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata_9ad749.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata_9ad749: + +vdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata_be4895.rst b/llvm/docs/AMDGPU/gfx940_vdata_be4895.rst new file mode 100644 index 00000000000000..e907da7439cbea --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata_be4895.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata_be4895: + +vdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata_c8a58b.rst b/llvm/docs/AMDGPU/gfx940_vdata_c8a58b.rst new file mode 100644 index 00000000000000..46a39d72438807 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata_c8a58b.rst @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata_c8a58b: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`sc0` is specified, gets the memory value before the operation. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdata_cfb402.rst b/llvm/docs/AMDGPU/gfx940_vdata_cfb402.rst new file mode 100644 index 00000000000000..ed5a54cf721126 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdata_cfb402.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdata_cfb402: + +vdata +===== + +Instruction input. + +*Size:* 3 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_08b5ba.rst b/llvm/docs/AMDGPU/gfx940_vdst_08b5ba.rst new file mode 100644 index 00000000000000..ae0f3618795d6a --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_08b5ba.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_08b5ba: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_0c37de.rst b/llvm/docs/AMDGPU/gfx940_vdst_0c37de.rst new file mode 100644 index 00000000000000..d1bdf9b86ff733 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_0c37de.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_0c37de: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_0f48d1.rst b/llvm/docs/AMDGPU/gfx940_vdst_0f48d1.rst new file mode 100644 index 00000000000000..221ef8bb3e4ddb --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_0f48d1.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_0f48d1: + +vdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_180bef.rst b/llvm/docs/AMDGPU/gfx940_vdst_180bef.rst new file mode 100644 index 00000000000000..f022ba745c2009 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_180bef.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_180bef: + +vdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_260aca.rst b/llvm/docs/AMDGPU/gfx940_vdst_260aca.rst new file mode 100644 index 00000000000000..147e562dc2f490 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_260aca.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_260aca: + +vdst +==== + +Instruction output. + +*Size:* 3 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_5258b4.rst b/llvm/docs/AMDGPU/gfx940_vdst_5258b4.rst new file mode 100644 index 00000000000000..bd6c7b2520a915 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_5258b4.rst @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_5258b4: + +vdst +==== + +Instruction output: data read from a memory buffer. + +This is an optional operand. It must be used if and only if :ref:`lds` is omitted. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_56baf6.rst b/llvm/docs/AMDGPU/gfx940_vdst_56baf6.rst new file mode 100644 index 00000000000000..a5e524aa2c565f --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_56baf6.rst @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_56baf6: + +vdst +==== + +Data returned by a 32-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`sc0` is specified. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_63b743.rst b/llvm/docs/AMDGPU/gfx940_vdst_63b743.rst new file mode 100644 index 00000000000000..a53055b6113340 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_63b743.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_63b743: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 3 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_69a144.rst b/llvm/docs/AMDGPU/gfx940_vdst_69a144.rst new file mode 100644 index 00000000000000..3f05fed7cd828a --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_69a144.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_69a144: + +vdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_78dd0a.rst b/llvm/docs/AMDGPU/gfx940_vdst_78dd0a.rst new file mode 100644 index 00000000000000..0614436a997609 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_78dd0a.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_78dd0a: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_89680f.rst b/llvm/docs/AMDGPU/gfx940_vdst_89680f.rst new file mode 100644 index 00000000000000..f06d4f9016538a --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_89680f.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_89680f: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_8c77d4.rst b/llvm/docs/AMDGPU/gfx940_vdst_8c77d4.rst new file mode 100644 index 00000000000000..e9e560d4e3e739 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_8c77d4.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_8c77d4: + +vdst +==== + +Instruction output. + +*Size:* 32 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_a32035.rst b/llvm/docs/AMDGPU/gfx940_vdst_a32035.rst new file mode 100644 index 00000000000000..fe107d21f000aa --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_a32035.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_a32035: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_bce42a.rst b/llvm/docs/AMDGPU/gfx940_vdst_bce42a.rst new file mode 100644 index 00000000000000..0df073d458bd95 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_bce42a.rst @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_bce42a: + +vdst +==== + +Data returned by a 64-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`sc0` is specified. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_bdb32f.rst b/llvm/docs/AMDGPU/gfx940_vdst_bdb32f.rst new file mode 100644 index 00000000000000..041977c02a4c7b --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_bdb32f.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_bdb32f: + +vdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_c3d63a.rst b/llvm/docs/AMDGPU/gfx940_vdst_c3d63a.rst new file mode 100644 index 00000000000000..7828fb14d50e6e --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_c3d63a.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_c3d63a: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_c8d317.rst b/llvm/docs/AMDGPU/gfx940_vdst_c8d317.rst new file mode 100644 index 00000000000000..b3cce6e25b02c6 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_c8d317.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_c8d317: + +vdst +==== + +Instruction output. + +*Size:* 8 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_d0c0cb.rst b/llvm/docs/AMDGPU/gfx940_vdst_d0c0cb.rst new file mode 100644 index 00000000000000..bccff9d8844958 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_d0c0cb.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_d0c0cb: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_d6f4bd.rst b/llvm/docs/AMDGPU/gfx940_vdst_d6f4bd.rst new file mode 100644 index 00000000000000..6d73d09de6b641 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_d6f4bd.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_d6f4bd: + +vdst +==== + +Instruction output. + +*Size:* 16 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_d8236e.rst b/llvm/docs/AMDGPU/gfx940_vdst_d8236e.rst new file mode 100644 index 00000000000000..d600629fbfa7b7 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_d8236e.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_d8236e: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_e2898f.rst b/llvm/docs/AMDGPU/gfx940_vdst_e2898f.rst new file mode 100644 index 00000000000000..5c8e254d756510 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_e2898f.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_e2898f: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 3 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vdst_fa7dbd.rst b/llvm/docs/AMDGPU/gfx940_vdst_fa7dbd.rst new file mode 100644 index 00000000000000..96dc1e75034090 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vdst_fa7dbd.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vdst_fa7dbd: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vsrc_1027ca.rst b/llvm/docs/AMDGPU/gfx940_vsrc_1027ca.rst new file mode 100644 index 00000000000000..1f5f5e23c648d8 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vsrc_1027ca.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vsrc_1027ca: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vsrc_6802ce.rst b/llvm/docs/AMDGPU/gfx940_vsrc_6802ce.rst new file mode 100644 index 00000000000000..c719c2503b9ec7 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vsrc_6802ce.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vsrc_6802ce: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff --git a/llvm/docs/AMDGPU/gfx940_vsrc_848ff7.rst b/llvm/docs/AMDGPU/gfx940_vsrc_848ff7.rst new file mode 100644 index 00000000000000..8c390c62fbb48a --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vsrc_848ff7.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vsrc_848ff7: + +vsrc +==== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vsrc_9ad749.rst b/llvm/docs/AMDGPU/gfx940_vsrc_9ad749.rst new file mode 100644 index 00000000000000..81f54012e6b79d --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vsrc_9ad749.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vsrc_9ad749: + +vsrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vsrc_be4895.rst b/llvm/docs/AMDGPU/gfx940_vsrc_be4895.rst new file mode 100644 index 00000000000000..6cdf3686d030c3 --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vsrc_be4895.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vsrc_be4895: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff --git a/llvm/docs/AMDGPU/gfx940_vsrc_e016a1.rst b/llvm/docs/AMDGPU/gfx940_vsrc_e016a1.rst new file mode 100644 index 00000000000000..a10534b0cb1c8d --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vsrc_e016a1.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vsrc_e016a1: + +vsrc +==== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff --git a/llvm/docs/AMDGPU/gfx940_vsrc_fd235e.rst b/llvm/docs/AMDGPU/gfx940_vsrc_fd235e.rst new file mode 100644 index 00000000000000..92988838d6b82c --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_vsrc_fd235e.rst @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_vsrc_fd235e: + +vsrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff --git a/llvm/docs/AMDGPU/gfx940_waitcnt.rst b/llvm/docs/AMDGPU/gfx940_waitcnt.rst new file mode 100644 index 00000000000000..de3bcc5627ae9c --- /dev/null +++ b/llvm/docs/AMDGPU/gfx940_waitcnt.rst @@ -0,0 +1,64 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx940_waitcnt: + +waitcnt +======= + +Counts of outstanding instructions to wait for. + +The bits of this operand have the following meaning: + + ========== ========= ================================================ ============ + High Bits Low Bits Description Value Range + ========== ========= ================================================ ============ + 15:14 3:0 VM_CNT: vector memory operations count. 0..63 + \- 6:4 EXP_CNT: export count. 0..7 + \- 11:8 LGKM_CNT: LDS, GDS, Constant and Message count. 0..15 + ========== ========= ================================================ ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A combination of *vmcnt*, *expcnt*, *lgkmcnt* and other values described below. + + ====================== ====================================================================== + Syntax Description + ====================== ====================================================================== + vmcnt(<*N*>) A VM_CNT value. *N* must not exceed the largest VM_CNT value. + expcnt(<*N*>) An EXP_CNT value. *N* must not exceed the largest EXP_CNT value. + lgkmcnt(<*N*>) An LGKM_CNT value. *N* must not exceed the largest LGKM_CNT value. + vmcnt_sat(<*N*>) A VM_CNT value computed as min(*N*, the largest VM_CNT value). + expcnt_sat(<*N*>) An EXP_CNT value computed as min(*N*, the largest EXP_CNT value). + lgkmcnt_sat(<*N*>) An LGKM_CNT value computed as min(*N*, the largest LGKM_CNT value). + ====================== ====================================================================== + +These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators. + +*N* is either an +:ref:`integer number` or an +:ref:`absolute expression`. + +Examples: + +.. parsed-literal:: + + vm_cnt = 1 + exp_cnt = 2 + lgkm_cnt = 3 + cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8) + + s_waitcnt cnt + s_waitcnt 1 | (2 << 4) | (3 << 8) // the same as above + s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3) // the same as above + s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt) // the same as above + + s_waitcnt vmcnt(1) + s_waitcnt expcnt(2) lgkmcnt(3) + s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3) + s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2) diff --git a/llvm/docs/AMDGPUModifierSyntax.rst b/llvm/docs/AMDGPUModifierSyntax.rst index a39f13051c9a11..97e259517299a3 100644 --- a/llvm/docs/AMDGPUModifierSyntax.rst +++ b/llvm/docs/AMDGPUModifierSyntax.rst @@ -364,6 +364,21 @@ nv See a description :ref:`here`. +sc0 +~~~ + +See a description :ref:`here`. + +sc1 +~~~ + +See a description :ref:`here`. + +nt +~~ + +See a description :ref:`here`. + MIMG Modifiers -------------- @@ -658,6 +673,48 @@ See AMD documentation for details. tfe Set tfe bit to 1. ======================================== ================================================ +.. _amdgpu_synid_sc0: + +sc0 +~~~ + +For atomics, sc0 indicates that the atomic operation returns a value. +For other opcodes is is used together with :ref:`sc1` to specify cache +policy. See AMD documentation for details. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + sc0 Set sc0 bit to 1. + ======================================== ================================================ + +.. _amdgpu_synid_sc1: + +sc1 +~~~ + +This modifier is used together with :ref:`sc0` to specify cache +policy. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + sc1 Set sc1 bit to 1. + ======================================== ================================================ + +.. _amdgpu_synid_nt: + +nt +~~ + +Indicates an operation with non-temporal data. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + nt Set nt bit to 1. + ======================================== ================================================ + MUBUF/MTBUF Modifiers --------------------- @@ -1955,3 +2012,34 @@ Note: numeric value may be specified as either an :ref:`integer number` or an :ref:`absolute expression`. +.. _amdgpu_synid_mfma_neg: + +neg +~~~ + +Indicates operands that must be negated before the operation. +The number of values specified by this modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on. + +The value 0 indicates that the corresponding operand value is used unmodified, +the value 1 indicates that the operand value must be negated before the operation. + +By default, operand values are used unmodified. + +This modifier is valid for floating point operands only. + + =============================== ================================================================== + Syntax Description + =============================== ================================================================== + neg:[{0..1},{0..1},{0..1}] Select operands which must be negated before the operation. + =============================== ================================================================== + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + neg:[0,1,1] diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 2427df15e1f850..81dad6a487cce6 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -16,6 +16,7 @@ User Guide for AMDGPU Backend AMDGPU/AMDGPUAsmGFX906 AMDGPU/AMDGPUAsmGFX908 AMDGPU/AMDGPUAsmGFX90a + AMDGPU/AMDGPUAsmGFX940 AMDGPU/AMDGPUAsmGFX10 AMDGPU/AMDGPUAsmGFX1011 AMDGPU/AMDGPUAsmGFX1013 @@ -14242,8 +14243,14 @@ in this description. :doc:`gfx909` + :doc:`gfx90c` + CDNA 1 :doc:`GFX9` :doc:`gfx908` + CDNA 2 :doc:`GFX9` :doc:`gfx90a` + + CDNA 3 :doc:`GFX9` :doc:`gfx940` + RDNA 1 :doc:`GFX10 RDNA1` :doc:`gfx1010` :doc:`gfx1011` From 110a20b70e4384ef98093377dd710823dec4e5a9 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 31 May 2022 13:47:01 +0200 Subject: [PATCH 883/908] [bazel] Port 42c17073fcba --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index b6af8b37502e0f..1f0b030797259e 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4024,6 +4024,10 @@ gentbl_cc_library( ["-gen-llvmir-conversions"], "include/mlir/Dialect/LLVMIR/LLVMConversions.inc", ), + ( + ["-gen-llvmintrinsic-to-llvmirop-pairs"], + "include/mlir/Dialect/LLVMIR/LLVMIntrinsicToLLVMIROpPairs.inc", + ), ( ["-gen-enum-to-llvmir-conversions"], "include/mlir/Dialect/LLVMIR/LLVMConversionEnumsToLLVM.inc", From d8c46eb612e64e48f772ffdbee643cc28af73823 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 30 May 2022 13:56:16 +0000 Subject: [PATCH 884/908] Apply clang-tidy fixes for readability-identifier-naming in SparseTensorUtils.cpp (NFC) --- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index ba73af28068805..430ba85affa315 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -867,25 +867,25 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { } else if (src.isCompressedDim(d)) { // Look up the bounds of the `d`-level segment determined by the // `d-1`-level position `parentPos`. - const std::vector

&pointers_d = src.pointers[d]; - assert(parentPos + 1 < pointers_d.size() && + const std::vector

&pointersD = src.pointers[d]; + assert(parentPos + 1 < pointersD.size() && "Parent pointer position is out of bounds"); - const uint64_t pstart = static_cast(pointers_d[parentPos]); - const uint64_t pstop = static_cast(pointers_d[parentPos + 1]); + const uint64_t pstart = static_cast(pointersD[parentPos]); + const uint64_t pstop = static_cast(pointersD[parentPos + 1]); // Loop-invariant code for looking up the `d`-level coordinates/indices. - const std::vector &indices_d = src.indices[d]; - assert(pstop <= indices_d.size() && "Index position is out of bounds"); - uint64_t &cursor_reord_d = this->cursor[this->reord[d]]; + const std::vector &indicesD = src.indices[d]; + assert(pstop <= indicesD.size() && "Index position is out of bounds"); + uint64_t &cursorReordD = this->cursor[this->reord[d]]; for (uint64_t pos = pstart; pos < pstop; pos++) { - cursor_reord_d = static_cast(indices_d[pos]); + cursorReordD = static_cast(indicesD[pos]); forallElements(yield, pos, d + 1); } } else { // Dense dimension. const uint64_t sz = src.getDimSizes()[d]; const uint64_t pstart = parentPos * sz; - uint64_t &cursor_reord_d = this->cursor[this->reord[d]]; + uint64_t &cursorReordD = this->cursor[this->reord[d]]; for (uint64_t i = 0; i < sz; i++) { - cursor_reord_d = i; + cursorReordD = i; forallElements(yield, pstart + i, d + 1); } } From 5d93d2a9eb644efddf0afce15c52a8aae85a79c1 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 30 May 2022 14:44:00 +0000 Subject: [PATCH 885/908] Apply clang-tidy fixes for llvm-else-after-return in OpPythonBindingGen.cpp (NFC) --- mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp index d6b3aa60c97f42..34db5bf46953b3 100644 --- a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp +++ b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp @@ -904,8 +904,7 @@ static void emitDefaultOpBuilder(const Operator &op, raw_ostream &os) { return (nattr->attr.isOptional() || nattr->attr.hasDefaultValue()); if (auto *ntype = a.dyn_cast()) return ntype->isOptional(); - else - return false; + return false; }; // StringRefs in functionArgs refer to strings allocated by builderArgs. From c7bee26f4fe170a065e48a2650e70802e9fc9ece Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 31 May 2022 13:35:49 +0200 Subject: [PATCH 886/908] [mlir][Bazel] Adjust BUILD.bazel file --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 1f0b030797259e..27c5abae18ba8c 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4024,10 +4024,6 @@ gentbl_cc_library( ["-gen-llvmir-conversions"], "include/mlir/Dialect/LLVMIR/LLVMConversions.inc", ), - ( - ["-gen-llvmintrinsic-to-llvmirop-pairs"], - "include/mlir/Dialect/LLVMIR/LLVMIntrinsicToLLVMIROpPairs.inc", - ), ( ["-gen-enum-to-llvmir-conversions"], "include/mlir/Dialect/LLVMIR/LLVMConversionEnumsToLLVM.inc", @@ -4050,6 +4046,10 @@ gentbl_cc_library( ["-gen-llvmir-conversions"], "include/mlir/Dialect/LLVMIR/LLVMIntrinsicConversions.inc", ), + ( + ["-gen-llvmintrinsic-to-llvmirop-pairs"], + "include/mlir/Dialect/LLVMIR/LLVMIntrinsicToLLVMIROpPairs.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td", @@ -6069,6 +6069,7 @@ cc_library( ":IR", ":LLVMConversionIncGen", ":LLVMDialect", + ":LLVMIntrinsicConversionIncGen", ":Support", ":Translation", "//llvm:Core", From 4fb3fd7d82067a6d5ade618db54b6e9b1158204b Mon Sep 17 00:00:00 2001 From: Danila Malyutin Date: Mon, 30 May 2022 16:30:49 +0300 Subject: [PATCH 887/908] [InstCombine] Fix const folding of switches with default case In case phi was in the default block it could lead to multi-edge. Fixes #55721. Differential Revision: https://reviews.llvm.org/D126650 --- .../Transforms/InstCombine/InstCombinePHI.cpp | 7 ++ .../InstCombine/simple_phi_condition.ll | 66 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 85363d0f6c9ae6..8b6831152131d2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -1269,6 +1269,12 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, // ... ... // \ / // phi [true] [false] + // and + // switch (cond) + // case v1: / \ case v2: + // ... ... + // \ / + // phi [v1] [v2] // Make sure all inputs are constants. if (!all_of(PN.operands(), [](Value *V) { return isa(V); })) return nullptr; @@ -1297,6 +1303,7 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, AddSucc(ConstantInt::getFalse(Context), BI->getSuccessor(1)); } else if (auto *SI = dyn_cast(IDom->getTerminator())) { Cond = SI->getCondition(); + ++SuccCount[SI->getDefaultDest()]; for (auto Case : SI->cases()) AddSucc(Case.getCaseValue(), Case.getCaseSuccessor()); } else { diff --git a/llvm/test/Transforms/InstCombine/simple_phi_condition.ll b/llvm/test/Transforms/InstCombine/simple_phi_condition.ll index 154d58746650d0..51b5e16627fc31 100644 --- a/llvm/test/Transforms/InstCombine/simple_phi_condition.ll +++ b/llvm/test/Transforms/InstCombine/simple_phi_condition.ll @@ -585,3 +585,69 @@ merge: %ret = phi i8 [ 1, %sw.1 ], [ 7, %sw.7 ], [ 19, %sw.19 ], [ 42, %entry ] ret i8 %ret } + +define i8 @test_switch_default_edge_direct(i8 %cond) { +; CHECK-LABEL: @test_switch_default_edge_direct( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i8 [[COND:%.*]], label [[MERGE:%.*]] [ +; CHECK-NEXT: i8 1, label [[SW_1:%.*]] +; CHECK-NEXT: i8 7, label [[SW_7:%.*]] +; CHECK-NEXT: i8 19, label [[MERGE]] +; CHECK-NEXT: ] +; CHECK: sw.1: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: sw.7: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[RET:%.*]] = phi i8 [ 1, [[SW_1]] ], [ 7, [[SW_7]] ], [ 19, [[ENTRY:%.*]] ], [ 19, [[ENTRY]] ] +; CHECK-NEXT: ret i8 [[RET]] +; +entry: + switch i8 %cond, label %merge [ + i8 1, label %sw.1 + i8 7, label %sw.7 + i8 19, label %merge + ] +sw.1: + br label %merge +sw.7: + br label %merge +merge: + %ret = phi i8 [ 1, %sw.1 ], [ 7, %sw.7 ], [ 19, %entry ], [ 19, %entry ] + ret i8 %ret +} + +define i8 @test_switch_default_edge_duplicate(i8 %cond) { +; CHECK-LABEL: @test_switch_default_edge_duplicate( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i8 [[COND:%.*]], label [[SW_19:%.*]] [ +; CHECK-NEXT: i8 1, label [[SW_1:%.*]] +; CHECK-NEXT: i8 7, label [[SW_7:%.*]] +; CHECK-NEXT: i8 19, label [[SW_19]] +; CHECK-NEXT: ] +; CHECK: sw.1: +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: sw.7: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: sw.19: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[RET:%.*]] = phi i8 [ 1, [[SW_1]] ], [ 7, [[SW_7]] ], [ 19, [[SW_19]] ] +; CHECK-NEXT: ret i8 [[RET]] +; +entry: + switch i8 %cond, label %sw.19 [ + i8 1, label %sw.1 + i8 7, label %sw.7 + i8 19, label %sw.19 + ] +sw.1: + br label %merge +sw.7: + br label %merge +sw.19: + br label %merge +merge: + %ret = phi i8 [ 1, %sw.1 ], [ 7, %sw.7 ], [ 19, %sw.19 ] + ret i8 %ret +} From b0fc765350fe8f7f857985718a50463d22758ef1 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Tue, 31 May 2022 05:38:44 -0700 Subject: [PATCH 888/908] [NFC] Change LoopVectorizationCostModel::useOrderedReductions() to be a const function. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D126200 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 579d2dfb357dde..268a90c5d3dace 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1310,7 +1310,7 @@ class LoopVectorizationCostModel { /// RdxDesc. This is true if the -enable-strict-reductions flag is passed, /// the IsOrdered flag of RdxDesc is set and we do not allow reordering /// of FP operations. - bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) { + bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const { return !Hints->allowReordering() && RdxDesc.isOrdered(); } From b8f5732634feaf7a9ce37d33f97f1fb490efeddd Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Tue, 31 May 2022 09:21:34 -0400 Subject: [PATCH 889/908] [libc++abi][AIX] add personality and helper functions for the state table EH Summary: This patch adds the personality and helper functions for the state table based EH used by IBM legacy compilers xlC and xlclang++ on AIX. * A high level description of the state table based EH is provided in the code comments. * Function scan_state_tab() is added to scan the state table. It is invoked by the state table personality routine __xlcxx_personality_v0() and returns scan_results like scan_eh_tab() does. * A couple of EH helper functions used by xlC and xlclang++ generated code are also added, e.g., __xlc_catch_matchv2() which checks whether the thrown object matches the catch handler's exception type. * Debugging macros _LIBCXXABI_TRACE_STATETAB, _LIBCXXABI_TRACE_STATETAB0, and _LIBCXXABI_TRACING_STATETAB are added to dump state table scanning traces if environment variable LIBCXXABI_PRINT_STATTAB is set. * The state variable and state table data is the LSDA found from the traceback table of the function during unwinding. Reviewed by: MaskRay, cebowleratibm, libc++abi Differential Revision: https://reviews.llvm.org/D100504 --- libcxxabi/src/aix_state_tab_eh.inc | 681 +++++++++++++++++++++++++++++ libcxxabi/src/cxa_personality.cpp | 6 + 2 files changed, 687 insertions(+) create mode 100644 libcxxabi/src/aix_state_tab_eh.inc diff --git a/libcxxabi/src/aix_state_tab_eh.inc b/libcxxabi/src/aix_state_tab_eh.inc new file mode 100644 index 00000000000000..032a2c8af2f126 --- /dev/null +++ b/libcxxabi/src/aix_state_tab_eh.inc @@ -0,0 +1,681 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// This file implements the personality and helper functions for the state +// table based EH used by IBM legacy compilers xlC and xlclang++ on AIX. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +/* + The legacy IBM xlC and xlclang++ compilers use the state table for EH + instead of the range table. Destructors, or addresses of the possible catch + sites or cleanup code are specified in the state table which is a finite + state machine (FSM). Each function that has a state table also has an + autolocal state variable. The state variable represents the current state + of the function for EH and is found through the traceback table of the + function during unwinding, which is located at the end of each function. + The FSM is an array of state entries. Each state entry has the following + fields: + + * offset/address/pointer - the offset used to locate the object, or the + address of a global object, or the address of the next state if it is an + old conditional state change entry; + * dtor/landing pad - address of the destructor function to invoke, + or address of the catch block or cleanup code in the user code to branch to; + * element count/action flag - the number of elements or the flag for actions; + * element size - if the object is an array this is the size of one element + of the array; + * flags - flags used to control how fields in the entry are interpreted; + * next state - the state to execute next after the action for this state is + performed. The value of zero indicates the end of the state for this + function. + + The following is the description of 'element count/action flag' field. ++-----------------------------------------------------------------------------+ +| value | description | action | ++-------+------------------------+--------------------------------------------+ +| > 1 | object is an array | calls __cxa_vec_cleanup to run dtor for | +| | | each member of the array | ++-------+------------------------+--------------------------------------------+ +| 1, 0 | object is a scalar | calls dtor for the object | ++-------+------------------------+--------------------------------------------+ +| -1 | begin catch | branches to the handler which performes | +| | | catch-match. If there is no catch that | +| | | matches the exception it will be rethrown | ++-------+------------------------+--------------------------------------------+ +| -2 | end catch | ends current catch block and continues | +| | | attempting to catch the exception | ++-------+------------------------+--------------------------------------------+ +| -3 | delete the object | calls the delete function of the object | ++-------+------------------------+--------------------------------------------+ +| -4 | cleanup label | branches to the user code for cleaning up | ++-------+------------------------+--------------------------------------------+ +*/ + +namespace __cxxabiv1 { + +extern "C" { + +// Macros for debugging the state table parsing. +#ifdef NDEBUG +# define _LIBCXXABI_TRACE_STATETAB(msg, ...) +# define _LIBCXXABI_TRACE_STATETAB0(msg) +# define _LIBCXXABI_TRACE_STATETAB1(msg) +# define _LIBCXXABI_TRACING_STATETAB 0 +#else +static bool state_tab_dbg() { + static bool checked = false; + static bool log = false; + if (!checked) { + log = (getenv("LIBCXXABI_PRINT_STATTAB") != NULL); + checked = true; + } + return log; +} + +# define _LIBCXXABI_TRACE_STATETAB(msg, ...) \ + do { \ + if (state_tab_dbg()) \ + fprintf(stderr, "libcxxabi: " msg, __VA_ARGS__); \ + } while (0) +# define _LIBCXXABI_TRACE_STATETAB0(msg) \ + do { \ + if (state_tab_dbg()) \ + fprintf(stderr, "libcxxabi: " msg); \ + } while (0) +# define _LIBCXXABI_TRACE_STATETAB1(msg) \ + do { \ + if (state_tab_dbg()) \ + fprintf(stderr, msg); \ + } while (0) + +# define _LIBCXXABI_TRACING_STATETAB state_tab_dbg() +#endif // NDEBUG + +namespace __state_table_eh { + +using destruct_f = void (*)(void*); + +// Definition of flags for the state table entry field 'action flag'. +enum FSMEntryCount : intptr_t { beginCatch = -1, endCatch = -2, deleteObject = -3, cleanupLabel = -4, terminate = -5 }; + +// Definition of flags for the state table entry field 'flags'. +enum FSMEntryFlag : int16_t { + indirect = 0x100, // Object was thrown from a function where + // the return value optimization was used. + oldConditionalStateChange = 0x400, // State table entry is an indirect state + // change, dereference the address in + // offset as int for the target state. + // This is deprecated. This indicates + // the address is direct. (static local). + conditionalStateChange = 0x800, // State table entry is an indirect state + // change, dereference the address in + // offset as int for the target state. + // The temporary is an automatic. State + // change is used in cases such as + // (b?(T1(),foo()):(T2(),foo())),throw 42; + // which causes a conditional state change + // so that we know if T1 or T2 need to be + // destroyed. + thisFlag = 0x01, // The address of the object for the + // cleanup action is based on the + // StateVariable::thisValue. + vBaseFlag = 0x02, // The object is of a virtual base class. + globalObj = 0x04 // FSMEntry::address is the address of + // a global object. +}; + +namespace { +// The finite state machine to be walked. +struct FSMEntry { + union { + // Offset of the object within its stack frame or containing object. + intptr_t offset; + // Address of a global object. + intptr_t address; + // Address of the next state if it is an old conditional state change entry. + intptr_t nextStatePtr; + }; + union { + // Address of the destructor function. + void (*destructor)(void*, size_t); + // The address of the catch block or cleanup code. + void* landingPad; + }; + union { + // The flag for actions (when the value is negative). + FSMEntryCount actionFlag; + // The element count (when the value is positive or zero). + size_t elementCount; + }; + size_t elemSize; + FSMEntryFlag flags; + uint16_t nextState; +}; + +struct FSM { + uint32_t magic; // Magic number of the state table. + int32_t numberOfStates; + FSMEntry table[1]; // Actually table[numberOfStates]. +}; + +// The state variable on the stack. +struct StateVariable { + int32_t state; + struct FSM* table; + intptr_t thisValue; + int32_t ignoreVBasePtrs; +}; +} // namespace + +// State table magic number +enum FSMMagic : uint32_t { + number = 0xbeefdead, // State table generated by xlC compiler. + number2 = 0xbeeedead, // State table generated by early version xlC compiler. + number3 = 0x1cedbeef // State table generated by xlclang++ compiler. +}; + +constexpr uint32_t REG_EXCP_OBJ = 14; // Register to pass the address of the exception + // object from the personality to xlclang++ + // compiled code. + +constexpr size_t dtorArgument = 0x02; // Flag to destructor indicating to free + // virtual bases, don't delete object. + +static void invoke_destructor(FSMEntry* fsmEntry, void* addr) { + _LIBCXXABI_TRACE_STATETAB("Destruct object=%p, fsmEntry=%p\n", addr, reinterpret_cast(fsmEntry)); + try { + if (fsmEntry->elementCount == 1) { + _LIBCXXABI_TRACE_STATETAB0("calling scalar destructor\n"); + (*fsmEntry->destructor)(addr, dtorArgument); + _LIBCXXABI_TRACE_STATETAB0("returned from scalar destructor\n"); + } else { + _LIBCXXABI_TRACE_STATETAB0("calling vector destructor\n"); + __cxa_vec_cleanup(addr, reinterpret_cast(fsmEntry->elementCount), fsmEntry->elemSize, + reinterpret_cast(fsmEntry->destructor)); + _LIBCXXABI_TRACE_STATETAB0("returned from vector destructor\n"); + } + } catch (...) { + _LIBCXXABI_TRACE_STATETAB0("Uncaught exception in destructor, terminating\n"); + std::terminate(); + } +} + +static void invoke_delete(FSMEntry* fsmEntry, void* addr) { + char* objectAddress = *reinterpret_cast(addr); + + _LIBCXXABI_TRACE_STATETAB("Delete object=%p, fsmEntry=%p\n", reinterpret_cast(objectAddress), + reinterpret_cast(fsmEntry)); + try { + _LIBCXXABI_TRACE_STATETAB0("..calling delete()\n"); + // 'destructor' holds a function pointer to delete(). + (*fsmEntry->destructor)(objectAddress, fsmEntry->elemSize); + _LIBCXXABI_TRACE_STATETAB0("..returned from delete()\n"); + } catch (...) { + _LIBCXXABI_TRACE_STATETAB0("Uncaught exception in delete(), terminating\n"); + std::terminate(); + } +} + +// Get the frame address of the current function from its traceback table +// which is at the end of each function. +static uintptr_t get_frame_addr(_Unwind_Context* context) { + int framePointerReg = 1; // default frame pointer == SP. + uint32_t* p = reinterpret_cast(_Unwind_GetIP(context)); + + // Keep looking forward until a word of 0 is found. The traceback + // table starts at the following word. + while (*p) + ++p; + tbtable* TBTable = reinterpret_cast(p + 1); + + p = reinterpret_cast(&TBTable->tb_ext); + + // Skip field parminfo if it exists. + if (TBTable->tb.fixedparms || TBTable->tb.floatparms) + ++p; + + // Skip field tb_offset if it exists. + if (TBTable->tb.has_tboff) + ++p; + + // Skip field hand_mask if it exists. + if (TBTable->tb.int_hndl) + ++p; + + // Skip fields ctl_info and ctl_info_disp if they exist. + if (TBTable->tb.has_ctl) + p += 1 + *p; + + // Skip fields name_len and name if exist. + if (TBTable->tb.name_present) { + const uint16_t name_len = *reinterpret_cast(p); + p = reinterpret_cast(reinterpret_cast(p) + name_len + sizeof(uint16_t)); + } + + if (TBTable->tb.uses_alloca) + framePointerReg = *reinterpret_cast(p); + + return _Unwind_GetGR(context, framePointerReg); +} + +// Calculate the object address from the FSM entry. +static void* compute_addr_from_table(FSMEntry* fsmEntry, StateVariable* const state, _Unwind_Context* context) { + void* addr; + if (fsmEntry->flags & FSMEntryFlag::globalObj) { + addr = reinterpret_cast(fsmEntry->address); + _LIBCXXABI_TRACE_STATETAB("Address calculation (global obj) addr=fsmEntry->address=%p\n", addr); + } else if (fsmEntry->flags & FSMEntryFlag::thisFlag) { + addr = reinterpret_cast(state->thisValue + fsmEntry->offset); + _LIBCXXABI_TRACE_STATETAB("Address calculation (this obj) fsmEntry->offset=%ld : " + "state->thisValue=%ld addr=(fsmEntry->offset+state->thisValue)=%p\n", + fsmEntry->offset, state->thisValue, addr); + } else if (fsmEntry->flags & FSMEntryFlag::indirect) { + addr = reinterpret_cast( + *reinterpret_cast(get_frame_addr(context) + static_cast(fsmEntry->offset))); + _LIBCXXABI_TRACE_STATETAB("Address calculation (indirect obj) addr=%p, fsmEntry->offset=%ld \n", + addr, fsmEntry->offset); + } else { + addr = reinterpret_cast(get_frame_addr(context) + static_cast(fsmEntry->offset)); + _LIBCXXABI_TRACE_STATETAB("Address calculation. (local obj) addr=fsmEntry->offset=%p\n", + addr); + } + return addr; +} + +static void scan_state_tab(scan_results& results, _Unwind_Action actions, bool native_exception, + _Unwind_Exception* unwind_exception, _Unwind_Context* context) { + // Initialize results to found nothing but an error. + results.ttypeIndex = 0; + results.actionRecord = 0; + results.languageSpecificData = 0; + results.landingPad = 0; + results.adjustedPtr = 0; + results.reason = _URC_FATAL_PHASE1_ERROR; + + // Check for consistent actions. + if (actions & _UA_SEARCH_PHASE) { + // Do Phase 1 + if (actions & (_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME | _UA_FORCE_UNWIND)) { + // None of these flags should be set during Phase 1. + // Client error + results.reason = _URC_FATAL_PHASE1_ERROR; + return; + } + } else if (actions & _UA_CLEANUP_PHASE) { + if ((actions & _UA_HANDLER_FRAME) && (actions & _UA_FORCE_UNWIND)) { + // _UA_HANDLER_FRAME should only be set if phase 1 found a handler. + // If _UA_FORCE_UNWIND is set, phase 1 shouldn't have happened. + // Client error + results.reason = _URC_FATAL_PHASE2_ERROR; + return; + } + } else { + // Neither _UA_SEARCH_PHASE nor _UA_CLEANUP_PHASE is set. + // Client error + results.reason = _URC_FATAL_PHASE1_ERROR; + return; + } + + if (_LIBCXXABI_TRACING_STATETAB) { + _LIBCXXABI_TRACE_STATETAB1("\n"); + _LIBCXXABI_TRACE_STATETAB("%s: actions=%d (", __func__, actions); + + if (_UA_SEARCH_PHASE & actions) + _LIBCXXABI_TRACE_STATETAB1("_UA_SEARCH_PHASE "); + if (_UA_CLEANUP_PHASE & actions) + _LIBCXXABI_TRACE_STATETAB1("_UA_CLEANUP_PHASE "); + if (_UA_HANDLER_FRAME & actions) + _LIBCXXABI_TRACE_STATETAB1("_UA_HANDLER_FRAME "); + if (_UA_FORCE_UNWIND & actions) + _LIBCXXABI_TRACE_STATETAB1("_UA_FORCE_UNWIND "); + _LIBCXXABI_TRACE_STATETAB1(")\n"); + _LIBCXXABI_TRACE_STATETAB(" unwind_exception=%p context=%p\n", reinterpret_cast(unwind_exception), + reinterpret_cast(context)); + } + + // Start scan by getting state table address. + StateVariable* const state = reinterpret_cast(_Unwind_GetLanguageSpecificData(context)); + if (state->state <= 0) { + // The state is not correct - give up on this routine. + _LIBCXXABI_TRACE_STATETAB("state=%d and is <= 0), continue unwinding\n", state->state); + results.reason = _URC_CONTINUE_UNWIND; + return; + } + // Parse the state table. + FSM* const fsm = state->table; + FSMEntry* currFSMEntry; + + if (fsm->magic != FSMMagic::number && fsm->magic != FSMMagic::number2 && fsm->magic != FSMMagic::number3) { + // Something is wrong with the state table we found. + if (_UA_SEARCH_PHASE & actions) { + _LIBCXXABI_TRACE_STATETAB0("Invalid FSM table, return _URC_FATAL_PHASE1_ERROR\n"); + results.reason = _URC_FATAL_PHASE1_ERROR; + } else if (_UA_CLEANUP_PHASE & actions) { + _LIBCXXABI_TRACE_STATETAB0("Invalid FSM table, return _URC_FATAL_PHASE2_ERROR\n"); + results.reason = _URC_FATAL_PHASE2_ERROR; + } else { + // We should never get here. + _LIBCXXABI_TRACE_STATETAB0("Invalid FSM table + RT Internal error, return _URC_FATAL_PHASE2_ERROR\n"); + results.reason = _URC_FATAL_PHASE2_ERROR; + } + return; + } + + if (_LIBCXXABI_TRACING_STATETAB) { + // Print the state table for debugging purposes. + _LIBCXXABI_TRACE_STATETAB("state->state=%d, state->ignoreVBasePtrs=%d\n", state->state, state->ignoreVBasePtrs); + _LIBCXXABI_TRACE_STATETAB("fsm->magic=%#x, fsm->numberOfStates=%d\n", fsm->magic, fsm->numberOfStates); + // Print out the FSM table. + _LIBCXXABI_TRACE_STATETAB0("FSM table:\n"); + _LIBCXXABI_TRACE_STATETAB("%12s %10s %8s %10s %7s %7s %7s %7s\n", "Entry Addr", "state", "Offset", "DTR/lpad", + "count", "el_size", "flags", "next"); + for (int i = 0; i < fsm->numberOfStates; i++) { + currFSMEntry = &fsm->table[i]; + _LIBCXXABI_TRACE_STATETAB("%12p (%8d) %8ld %10p %7ld " + "%7ld %#7x %7d\n", + reinterpret_cast(&currFSMEntry), i + 1, currFSMEntry->offset, + reinterpret_cast(currFSMEntry->destructor), + currFSMEntry->elementCount, currFSMEntry->elemSize, currFSMEntry->flags, + currFSMEntry->nextState); + } + } + + if (_UA_SEARCH_PHASE & actions) { + // Start walking the state table. Use a local copy of state->state so when + // we return from search phase we don't change the state number. + int currState = state->state; + + while (currState > 0) { + currFSMEntry = &fsm->table[currState - 1]; + _LIBCXXABI_TRACE_STATETAB("Processing state=%d, flags=0x%hx\n", currState, currFSMEntry->flags); + + if (currFSMEntry->actionFlag == FSMEntryCount::beginCatch) { + // Found a catch handler. + if (fsm->magic == FSMMagic::number) { + _LIBCXXABI_TRACE_STATETAB0("Found a xlC catch handler, return _URC_FATAL_PHASE1_ERROR\n"); + // xlC catch handlers cannot be entered because they use a + // proprietary EH runtime that is not interoperable. + results.reason = _URC_FATAL_PHASE1_ERROR; + return; + } + // xlclang++ compiled frames use CXA-abi EH calls and any catch + // block will include a catch(...) block so it is safe to assume that + // the handler is found without checking the catch match. The + // catch(...) block will rethrow the exception if there isn't a + // match. + _LIBCXXABI_TRACE_STATETAB0("Found a catch handler, return _URC_HANDLER_FOUND\n"); + results.reason = _URC_HANDLER_FOUND; + return; + } + if (currFSMEntry->actionFlag == FSMEntryCount::terminate) { + _LIBCXXABI_TRACE_STATETAB0("Found the terminate state, return _URC_HANDLER_FOUND\n"); + results.reason = _URC_HANDLER_FOUND; + return; + } + if (currFSMEntry->flags & FSMEntryFlag::oldConditionalStateChange) { + // Deprecated conditional expression. + currState = *reinterpret_cast(currFSMEntry->nextStatePtr); + _LIBCXXABI_TRACE_STATETAB("Flag: FSMEntryFlag::oldConditionalStateChange, dereference " + "currFSMEntry->nextStatePtr(%ld), set state=%d\n", + currFSMEntry->nextStatePtr, currState); + continue; // We are done this iteration of the loop, since + // we changed a state. + } + if (currFSMEntry->flags & FSMEntryFlag::conditionalStateChange) { + void* addr = compute_addr_from_table(currFSMEntry, state, context); + currState = *reinterpret_cast(addr); + _LIBCXXABI_TRACE_STATETAB("Flag: FSMEntryFlag::conditionalStateChange, dereference " + "addr(%p), set state=%d\n", addr, currState); + continue; // We are done this iteration of the loop, since we + // changed the state. + } + // Go to the next state. + currState = currFSMEntry->nextState; + } + _LIBCXXABI_TRACE_STATETAB0("No catch handler found, return _URC_CONTINUE_UNWIND\n"); + results.reason = _URC_CONTINUE_UNWIND; + return; + } + if (_UA_CLEANUP_PHASE & actions) { + // Start walking the state table. + while (state->state > 0) { + currFSMEntry = &fsm->table[state->state - 1]; + + if (currFSMEntry->actionFlag == FSMEntryCount::terminate) { + _LIBCXXABI_TRACE_STATETAB0("Reached terminate state. Call terminate.\n"); + std::terminate(); + } + // Perform action according to the currFSMEntry->actionFlag, + // except when flag is FSMEntryFlag::conditionalStateChange or + // FSMEntryFlag::oldConditionalStateChange. + _LIBCXXABI_TRACE_STATETAB("Processing state=%d, flags=0x%hx\n", state->state, currFSMEntry->flags); + if (currFSMEntry->flags & FSMEntryFlag::oldConditionalStateChange) { + state->state = *reinterpret_cast(currFSMEntry->nextStatePtr); + _LIBCXXABI_TRACE_STATETAB("Flag: FSMEntryFlag::oldConditionalStateChange, dereference " + "currFSMEntry->nextStatePtr(%ld), set state=%d\n", + currFSMEntry->nextStatePtr, state->state); + continue; // We are done with this iteration of the loop, since we changed a state. + } + if (currFSMEntry->flags & FSMEntryFlag::conditionalStateChange) { + // A conditional state table entry holds the address of a local + // that holds the next state. + void* addr = compute_addr_from_table(currFSMEntry, state, context); + state->state = *reinterpret_cast(addr); + _LIBCXXABI_TRACE_STATETAB("Flag: FSMEntryFlag::conditionalStateChange, dereference " + "addr(%p), set state=%d\n", addr, state->state); + continue; // We are done with this iteration of the loop, since we changed a state. + } + if (currFSMEntry->actionFlag == FSMEntryCount::beginCatch || currFSMEntry->actionFlag == FSMEntryCount::endCatch || + currFSMEntry->actionFlag == FSMEntryCount::cleanupLabel) { + + _LIBCXXABI_TRACE_STATETAB( + "FSMEntryCount::%s: handler %p/%p, return _URC_HANDLER_FOUND\n", + (currFSMEntry->actionFlag == FSMEntryCount::beginCatch + ? "beginCatch" + : (currFSMEntry->actionFlag == FSMEntryCount::endCatch ? "endCatch" : "cleanupLabel")), + currFSMEntry->landingPad, *reinterpret_cast(currFSMEntry->landingPad)); + + state->state = currFSMEntry->nextState; + results.landingPad = reinterpret_cast(*reinterpret_cast(currFSMEntry->landingPad)); + results.reason = _URC_HANDLER_FOUND; + return; + } + if (currFSMEntry->elementCount > 0) { + if (currFSMEntry->flags & FSMEntryFlag::vBaseFlag && state->ignoreVBasePtrs) { + _LIBCXXABI_TRACE_STATETAB0("Ignoring virtual base dtor.\n"); + } else { + // We need to invoke the virtual base destructor. This must be + // a frame from the legacy xlC compiler as the xlclang++ compiler + // generates inline cleanup code rather than specifying + // the destructor via the state table. + void* addr = compute_addr_from_table(currFSMEntry, state, context); + + // An extra indirect to get to the object according to the object + // model used by the xlC compiler. + addr = reinterpret_cast(*reinterpret_cast(addr)); + _LIBCXXABI_TRACE_STATETAB("Invoke dtor for object=%p\n", addr); + invoke_destructor(currFSMEntry, addr); + } + } else if (currFSMEntry->actionFlag == FSMEntryCount::deleteObject) { + void* addr = compute_addr_from_table(currFSMEntry, state, context); + if (currFSMEntry->flags & FSMEntryFlag::vBaseFlag) { + // We need to invoke the virtual base delete function. This must be + // a frame from the legacy xlC compiler as the xlclang++ compiler + // generates inline cleanup code rather than specifying + // the delete function via the state table. + + // An extra indirect to get to the object according to the object + // model used by the xlC compiler. + addr = reinterpret_cast(*reinterpret_cast(addr)); + } + _LIBCXXABI_TRACE_STATETAB("Delete object at %p\n", addr); + invoke_delete(currFSMEntry, addr); + } else { + _LIBCXXABI_TRACE_STATETAB("Unknown entry in FSM (count=%ld), ignored\n", + currFSMEntry->elementCount); + } // End of action switching. + + // Go to next state. + state->state = currFSMEntry->nextState; + } + _LIBCXXABI_TRACE_STATETAB0("No catch handler, return _URC_CONTINUE_UNWIND\n"); + results.reason = _URC_CONTINUE_UNWIND; + return; + } + _LIBCXXABI_TRACE_STATETAB0("No state table entry for this exception, call_terminate()\n"); + // It is possible that no state table entry specify how to handle + // this exception. By spec, terminate it immediately. + call_terminate(native_exception, unwind_exception); +} + +// Personality routine for EH using the state table. +_Unwind_Reason_Code __xlcxx_personality_v0(int version, _Unwind_Action actions, uint64_t exceptionClass, + _Unwind_Exception* unwind_exception, _Unwind_Context* context) { + if (version != 1 || unwind_exception == 0 || context == 0) + return _URC_FATAL_PHASE1_ERROR; + + bool native_exception = (exceptionClass & get_vendor_and_language) == (kOurExceptionClass & get_vendor_and_language); + scan_results results; + scan_state_tab(results, actions, native_exception, unwind_exception, context); + if (actions & _UA_SEARCH_PHASE) { + // Phase 1 search: All we're looking for in phase 1 is a handler that + // halts unwinding + return results.reason; + } + if (actions & _UA_CLEANUP_PHASE) { + // Phase 2 cleanup: + if (results.reason == _URC_HANDLER_FOUND) { + // Jump to the handler. + _Unwind_SetGR(context, REG_EXCP_OBJ, reinterpret_cast(unwind_exception)); + _Unwind_SetIP(context, results.landingPad); + return _URC_INSTALL_CONTEXT; + } + // Did not find a handler. Return the results of the scan. Normally + // _URC_CONTINUE_UNWIND, but could have been _URC_FATAL_PHASE2_ERROR. + return results.reason; + } + // We were called improperly: neither a phase 1 or phase 2 search. + return _URC_FATAL_PHASE1_ERROR; +} +} // namespace __state_table_eh + +// The following are EH helper functions for xlclang++ compiled code. + +// __xlc_catch_matchv2 +// Check whether the thrown object matches the catch handler's exception +// declaration. If there is a match, the function returns true with adjusted +// address of the thrown object. Otherwise, returns false. +bool __xlc_catch_matchv2(_Unwind_Exception* exceptionObject, std::type_info* catchTypeInfo, void*& obj) { + _LIBCXXABI_TRACE_STATETAB("Entering %s, exceptionObject=%p\n", __func__, reinterpret_cast(exceptionObject)); + + if (!__isOurExceptionClass(exceptionObject)) { + _LIBCXXABI_TRACE_STATETAB0("No match, not a C++ exception\n"); + return false; + } + + __cxa_exception* exceptionHeader = 0; + + if (__getExceptionClass(exceptionObject) == kOurDependentExceptionClass) { + // Walk to the __cxa_dependent_exception primary exception for the + // exception object and its type_info. + __cxa_dependent_exception* dependentExceptionHeader = + reinterpret_cast<__cxa_dependent_exception*>(exceptionObject + 1) - 1; + exceptionHeader = reinterpret_cast<__cxa_exception*>(dependentExceptionHeader->primaryException) - 1; + _LIBCXXABI_TRACE_STATETAB("exceptionObject 0x%p is a dependent, primary 0x%p\n", + reinterpret_cast(exceptionObject), + reinterpret_cast(&exceptionHeader->unwindHeader)); + exceptionObject = &exceptionHeader->unwindHeader; + } else { + _LIBCXXABI_TRACE_STATETAB("exceptionObject %p is NOT a dependent\n", reinterpret_cast(exceptionObject)); + exceptionHeader = reinterpret_cast<__cxa_exception*>(exceptionObject + 1) - 1; + } + + void* thrownObject = reinterpret_cast(exceptionObject + 1); + std::type_info* throwTypeInfo = exceptionHeader->exceptionType; + + // Get the type info for the thrown type and this catch clause and + // see if the catch caluse can catch that type. + + __cxxabiv1::__shim_type_info* catchType = reinterpret_cast<__cxxabiv1::__shim_type_info*>(catchTypeInfo); + __cxxabiv1::__shim_type_info* throwType = reinterpret_cast<__cxxabiv1::__shim_type_info*>(throwTypeInfo); + _LIBCXXABI_TRACE_STATETAB("UnwindException=%p, thrownObject=%p, throwTypeInfo=%p(%s), catchTypeInfo=%p(%s)\n", + reinterpret_cast(exceptionObject), thrownObject, reinterpret_cast(throwType), + throwType->name(), reinterpret_cast(catchType), catchType->name()); + if (catchType->can_catch(throwType, thrownObject)) { + exceptionHeader->adjustedPtr = thrownObject; + obj = thrownObject; + _LIBCXXABI_TRACE_STATETAB("Match found for thrownObject=%p\n", thrownObject); + return true; + } + _LIBCXXABI_TRACE_STATETAB0("No match\n"); + return false; +} + +// __xlc_throw_badexception +// This function is for xlclang++. It allocates and throws a bad_exception. +// During unwinding for this bad_exception, the previous exception which is +// not matching the throw spec will be cleaned up. Thus having the same +// effect as replace the top most exception (which is bad) with a bad_exception. +void __xlc_throw_badexception() { + _LIBCXXABI_TRACE_STATETAB("Entering function: %s\n\n", __func__); + void* newexception = new (__cxa_allocate_exception(sizeof(std::bad_exception))) std::bad_exception; + __cxa_throw(newexception, const_cast(&typeid(std::bad_exception)), 0); +} + +// __xlc_exception_handle +// This function is for xlclang++. It returns the address of the exception +// object set in gpr14 by the personality routine for xlclang++ compiled code. +uintptr_t __xlc_exception_handle() { + uintptr_t exceptionObject; + asm("mr %0, 14" : "=r"(exceptionObject)); + return exceptionObject; +} + +// xlclang++ may generate calls to __Deleted_Virtual. +void __Deleted_Virtual() { abort(); } + +// __catchThrownException is called during AIX library initialization and +// termination to handle exceptions. An implementation is also provided in +// libC.a(shrcore.o). This implementation is provided for applications that +// link with -lc++ (the xlclang++ or ibm-clang++ link default.) +int __catchThrownException(void (*cdfunc)(void), // function which may fail + void (*cleanup)(void*), // cleanup function + void* cleanuparg, // parameter to cleanup function + int action) { // control exception throwing and termination + enum Action : int { None = 0, Rethrow = 1, Terminate = 2 }; + if (!cdfunc) + return 0; + if (action == Action::Rethrow && !cleanup) { + // No cleanup and rethrow is effectively no-op. + // Avoid the catch handler when possible to allow exceptions generated + // from xlC binaries to flow through. + (*cdfunc)(); + return 0; + } + try { + (*cdfunc)(); + } catch (...) { + if (action == Action::Terminate) + std::terminate(); + if (cleanup) + (*cleanup)(cleanuparg); + if (action == Action::Rethrow) + throw; + assert(action == Action::None); + return -1; // FAILED + } + return 0; +} + +} // extern "C" + +} // __cxxabiv1 diff --git a/libcxxabi/src/cxa_personality.cpp b/libcxxabi/src/cxa_personality.cpp index b7ff4c217e16ec..f58d4de774e10c 100644 --- a/libcxxabi/src/cxa_personality.cpp +++ b/libcxxabi/src/cxa_personality.cpp @@ -1305,3 +1305,9 @@ _LIBCXXABI_FUNC_VIS _Unwind_Reason_Code __xlcxx_personality_v1( } // extern "C" } // __cxxabiv1 + +#if defined(_AIX) +// Include implementation of the personality and helper functions for the +// state table based EH used by IBM legacy compilers xlC and xlclang++ on AIX. +# include "aix_state_tab_eh.inc" +#endif From ed0303aa2251e4484a2b4ff7f236c9f7cdfb2092 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 31 May 2022 14:30:28 +0100 Subject: [PATCH 890/908] [X86] LowerTRUNCATE - avoid creating extract_subvector(bitcast(vec)) patterns We have a generic DAG combine to attempt to fold extract_subvector(bitcast(vec)) -> bitcast(extract_subvector(vec)) but if we create these patterns late in lowering then we often miss them. Noticed while investigating Issue #55648 which gets caught in an infinite loop trying to split extract_subvector(bitcast(vselect()) patterns - this doesn't fix the issue yet but reduces the regressions from the WIP fix. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 37 +++++++++++++------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6b38e14704c74d..324161e590d971 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21934,27 +21934,25 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { - In = DAG.getBitcast(MVT::v8i32, In); - // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget.hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; + In = DAG.getBitcast(MVT::v8i32, In); In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, DAG.getIntPtrConstant(0, DL)); } - SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); - SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, - DAG.getIntPtrConstant(4, DL)); + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(2, DL)); static const int ShufMask[] = {0, 2, 4, 6}; - return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); + return DAG.getVectorShuffle(VT, DL, DAG.getBitcast(MVT::v4i32, OpLo), + DAG.getBitcast(MVT::v4i32, OpHi), ShufMask); } if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { - In = DAG.getBitcast(MVT::v32i8, In); - // On AVX2, v8i32 -> v8i16 becomes PSHUFB. if (Subtarget.hasInt256()) { // The PSHUFB mask: @@ -21962,27 +21960,30 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { -1, -1, -1, -1, -1, -1, -1, -1, 16, 17, 20, 21, 24, 25, 28, 29, -1, -1, -1, -1, -1, -1, -1, -1 }; + In = DAG.getBitcast(MVT::v32i8, In); In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1); In = DAG.getBitcast(MVT::v4i64, In); static const int ShufMask2[] = {0, 2, -1, -1}; In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, - DAG.getBitcast(MVT::v16i16, In), - DAG.getIntPtrConstant(0, DL)); + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(0, DL)); + return DAG.getBitcast(MVT::v8i16, In); } - SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i32, In, DAG.getIntPtrConstant(0, DL)); - SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, - DAG.getIntPtrConstant(16, DL)); + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i32, In, + DAG.getIntPtrConstant(4, DL)); // The PSHUFB mask: - static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, - -1, -1, -1, -1, -1, -1, -1, -1}; + static const int ShufMask1[] = {0, 2, 4, 6, -1, -1, -1, -1}; + + OpLo = DAG.getBitcast(MVT::v8i16, OpLo); + OpHi = DAG.getBitcast(MVT::v8i16, OpHi); - OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1); - OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1); + OpLo = DAG.getVectorShuffle(MVT::v8i16, DL, OpLo, OpLo, ShufMask1); + OpHi = DAG.getVectorShuffle(MVT::v8i16, DL, OpHi, OpHi, ShufMask1); OpLo = DAG.getBitcast(MVT::v4i32, OpLo); OpHi = DAG.getBitcast(MVT::v4i32, OpHi); From 858e6273d938cc4d08ee053ddff3fe7b19eb302a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 31 May 2022 15:40:46 +0200 Subject: [PATCH 891/908] [Clang] Always set opaque pointers mode Always set the opaque pointers mode, to make sure that -no-opaque-pointers continues working when the default on the LLVM side is flipped. --- clang/lib/CodeGen/CodeGenAction.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index af7a60c179a677..4ffbecdf274118 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -1017,8 +1017,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { if (BA != Backend_EmitNothing && !OS) return nullptr; - if (CI.getCodeGenOpts().OpaquePointers) - VMContext->setOpaquePointers(true); + VMContext->setOpaquePointers(CI.getCodeGenOpts().OpaquePointers); // Load bitcode modules to link with, if we need to. if (LinkModules.empty()) From 36cbdaa163bd4250923eff0812f0e84ca129347e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 May 2022 17:09:40 +0200 Subject: [PATCH 892/908] [InstCombine] Fix inbounds preservation when swapping GEPs (PR44206) When reassociating GEPs, we can only keep inbounds if both original GEPs were inbounds, and their offsets have the same sign. For the sake of simplicity, I only handle the case where both offsets are non-negative here. It would probably be fine to just not preserve inbounds at all here, but as I don't see a compile-time impact for adding the isKnownNonNegative() calls I went with this more conservative approach. Fixes https://github.com/llvm/llvm-project/issues/44206. Differential Revision: https://reviews.llvm.org/D126687 --- .../InstCombine/InstructionCombining.cpp | 7 +++- .../InstCombine/gep-combine-loop-invariant.ll | 12 +++--- .../LoopVectorize/ARM/mve-reductions.ll | 4 +- .../x86-interleaved-accesses-masked-group.ll | 4 +- ...86-interleaved-store-accesses-with-gaps.ll | 8 ++-- .../LoopVectorize/consecutive-ptr-uniforms.ll | 4 +- .../LoopVectorize/interleaved-accesses.ll | 42 +++++++++---------- 7 files changed, 43 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 07f44036cce5aa..ac196ba00c2ade 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1969,7 +1969,12 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, // invariant: this breaks the dependence between GEPs and allows LICM // to hoist the invariant part out of the loop. if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { - bool IsInBounds = Src->isInBounds() && GEP.isInBounds(); + // The swapped GEPs are inbounds if both original GEPs are inbounds + // and the sign of the offsets is the same. For simplicity, only + // handle both offsets being non-negative. + bool IsInBounds = Src->isInBounds() && GEP.isInBounds() && + isKnownNonNegative(SO1, DL, 0, &AC, &GEP, &DT) && + isKnownNonNegative(GO1, DL, 0, &AC, &GEP, &DT); // Put NewSrc at same location as %src. Builder.SetInsertPoint(cast(Src)); Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(), diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll index b17a9774c59ffc..bbb4709233f5e8 100644 --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -21,8 +21,8 @@ define i32 @foo(i8* nocapture readnone %match, i32 %cur_match, i32 %best_len, i3 ; CHECK: do.body: ; CHECK-NEXT: [[IDX_EXT:%.*]] = zext i32 [[TMP4:%.*]] to i64 ; CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds i8, i8* [[WIN]], i64 [[IDX_EXT1]] -; CHECK-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR1]], i64 -1 -; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR22]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR22:%.*]] = getelementptr i8, i8* [[ADD_PTR1]], i64 -1 +; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, i8* [[ADD_PTR22]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[ADD_PTR3]] to i32* ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP3]], [[SCAN_END]] @@ -195,7 +195,7 @@ define void @PR51485(<2 x i64> %v) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[V:%.*]], -; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, i8* getelementptr inbounds (i8, i8* bitcast (void (<2 x i64>)* @PR51485 to i8*), i64 80), <2 x i64> [[SL1]] +; CHECK-NEXT: [[E6:%.*]] = getelementptr i8, i8* getelementptr (i8, i8* bitcast (void (<2 x i64>)* @PR51485 to i8*), i64 80), <2 x i64> [[SL1]] ; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]]) ; CHECK-NEXT: br label [[LOOP]] ; @@ -292,9 +292,9 @@ define void @both_inbounds_one_neg(i8* %ptr, i1 %c, i32 noundef %arg) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i32 [[ARG:%.*]] to i64 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 -1 -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, i8* [[PTR21]], i64 [[ARG_EXT]] -; CHECK-NEXT: call void @use(i8* nonnull [[PTR3]]) +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i64 -1 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, i8* [[PTR21]], i64 [[ARG_EXT]] +; CHECK-NEXT: call void @use(i8* [[PTR3]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll index a699304abf9dfb..0e96422d34c508 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -1370,8 +1370,8 @@ define i32 @reduction_interleave_group(i32 %n, i32* %arr) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i32 -1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 -1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[TMP4]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index f848da61e50868..9beae34700f8d9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -3077,8 +3077,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]]) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = sub <8 x i8> zeroinitializer, [[TMP5]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 [[TMP4]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[TMP7]], i32 [[TMP4]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP9]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll index 00b4fe382e9a92..8100c42ffd7d93 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll @@ -74,7 +74,7 @@ define dso_local void @test1(i16* noalias nocapture %points, i16* noalias nocapt ; ; ENABLED_MASKED_STRIDED-LABEL: @test1( ; ENABLED_MASKED_STRIDED-NEXT: entry: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 -1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr i16, i16* [[POINTS:%.*]], i64 -1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -86,7 +86,7 @@ define dso_local void @test1(i16* noalias nocapture %points, i16* noalias nocapt ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 [[TMP6]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr i16, i16* [[TMP0]], i64 [[TMP6]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <16 x i16>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> [[INTERLEAVED_VEC]], <16 x i16>* [[TMP8]], i32 2, <16 x i1> ) @@ -246,7 +246,7 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16 ; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 -1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr i16, i16* [[POINTS:%.*]], i64 -1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -262,7 +262,7 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <4 x i16>* ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP6]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 [[TMP7]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP0]], i64 [[TMP7]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <16 x i16>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 3611d8bc2c3e56..5c94355baba979 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -50,8 +50,8 @@ for.end: ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECK: %offset.idx = sub i64 %n, %index ; CHECK-NOT: getelementptr -; CHECK: %[[G0:.+]] = getelementptr inbounds i32, i32* %a, i64 -3 -; CHECK: getelementptr inbounds i32, i32* %[[G0]], i64 %offset.idx +; CHECK: %[[G0:.+]] = getelementptr i32, i32* %a, i64 -3 +; CHECK: getelementptr i32, i32* %[[G0]], i64 %offset.idx ; CHECK-NOT: getelementptr ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body ; diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 54ccd7f0700033..0afa6f7f5673d2 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -115,11 +115,11 @@ define void @test_struct_array_load3_store3() { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC5]], -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[STRIDED_VEC6]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 [[INDEX]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 -2 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>* @@ -354,7 +354,7 @@ define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.ST2* [[A:%.*]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 -6 @@ -362,19 +362,19 @@ define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC3]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND1]] -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE4]], [[VEC_IND1]] +; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ST2]], %struct.ST2* [[B:%.*]], i64 [[OFFSET_IDX]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 -7 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* -; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE5]], <4 x i32> [[REVERSE6]], <8 x i32> +; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -691,8 +691,8 @@ define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 -1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 -1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[TMP5]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[TMP7]], align 4 @@ -759,12 +759,12 @@ define void @mixed_load3_store3(i32* nocapture %A) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2 -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC5]], [[VEC_IND]] -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[VEC_IND]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> @@ -1324,12 +1324,12 @@ define void @PR27626_4(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 -1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[A]], i64 -1 ; CHECK-NEXT: store i32 [[X:%.*]], i32* [[TMP7]], align 4 ; CHECK-NEXT: store i32 [[X]], i32* [[TMP8]], align 4 ; CHECK-NEXT: store i32 [[X]], i32* [[TMP9]], align 4 ; CHECK-NEXT: store i32 [[X]], i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[TMP13]], align 4 From b9443cb6fa6ba0be030cb0ed5f556d69c7f7de56 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 31 May 2022 14:57:10 +0100 Subject: [PATCH 893/908] [X86] narrowExtractedVectorSelect - don't peek through bitcasts to find source vector We don't seem to need this for any test coverage and it was making tracking of the uses() of the source vector more difficult Noticed while investigating Issue #55648 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 324161e590d971..fda8a22c1991bb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53722,7 +53722,7 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, /// This function should only be called with legal types (otherwise, the calls /// to get simple value types will assert). static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) { - SDValue Sel = peekThroughBitcasts(Ext->getOperand(0)); + SDValue Sel = Ext->getOperand(0); SmallVector CatOps; if (Sel.getOpcode() != ISD::VSELECT || !collectConcatOps(Sel.getOperand(0).getNode(), CatOps, DAG)) From 42861faa8e17058f0b4a027bba13ef59a600e051 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Tue, 29 Mar 2022 11:14:07 -0400 Subject: [PATCH 894/908] attributes: introduce allockind attr for describing allocator fn behavior I chose to encode the allockind information in a string constant because otherwise we would get a bit of an explosion of keywords to deal with the possible permutations of allocation function types. I'm not sure that CodeGen.h is the correct place for this enum, but it seemed to kind of match the UWTableKind enum so I put it in the same place. Constructive suggestions on a better location most certainly encouraged. Differential Revision: https://reviews.llvm.org/D123088 --- llvm/docs/LangRef.rst | 21 ++++++++++ llvm/include/llvm/AsmParser/LLParser.h | 1 + llvm/include/llvm/Bitcode/LLVMBitCodes.h | 1 + llvm/include/llvm/IR/Attributes.h | 22 +++++++++++ llvm/include/llvm/IR/Attributes.td | 3 ++ llvm/lib/AsmParser/LLParser.cpp | 41 +++++++++++++++++++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 4 ++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 + llvm/lib/IR/AttributeImpl.h | 1 + llvm/lib/IR/Attributes.cpp | 44 +++++++++++++++++++++ llvm/lib/IR/Verifier.cpp | 19 +++++++++ llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 + llvm/test/Assembler/allockind-missing.ll | 4 ++ llvm/test/Assembler/allockind.ll | 7 ++++ llvm/test/Bitcode/compatibility.ll | 8 +++- llvm/test/Verifier/allockind.ll | 16 ++++++++ 16 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Assembler/allockind-missing.ll create mode 100644 llvm/test/Assembler/allockind.ll create mode 100644 llvm/test/Verifier/allockind.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index cf97401afb3616..f7ebf8aadd47b4 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1584,6 +1584,27 @@ example: "_ZnwmSt11align_val_t" for aligned ``::operator::new`` and ``::operator::delete``. Matching malloc/realloc/free calls within a family can be optimized, but mismatched ones will be left alone. +``allockind("KIND")`` + Describes the behavior of an allocation function. The KIND string contains comma + separated entries from the following options: + * "alloc": the function returns a new block of memory or null. + * "realloc": the function returns a new block of memory or null. If the + result is non-null the memory contents from the start of the block up to + the smaller of the original allocation size and the new allocation size + will match that of the ``allocptr`` argument and the ``allocptr`` + argument is invalidated, even if the function returns the same address. + * "free": the function frees the block of memory specified by ``allocptr``. + * "uninitialized": Any newly-allocated memory (either a new block from + a "alloc" function or the enlarged capacity from a "realloc" function) + will be uninitialized. + * "zeroed": Any newly-allocated memory (either a new block from a "alloc" + function or the enlarged capacity from a "realloc" function) will be + zeroed. + * "aligned": the function returns memory aligned according to the + ``allocalign`` parameter. + The first three options are mutually exclusive, and the remaining options + describe more details of how the function behaves. The remaining options + are invalid for "free"-type functions. ``allocsize([, ])`` This attribute indicates that the annotated function will always return at least a given number of bytes (or null). Its arguments are zero-indexed diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 8fa110c1d8c67f..dd8d4daee51337 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -274,6 +274,7 @@ namespace llvm { bool AllowParens = false); bool parseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); bool parseOptionalUWTableKind(UWTableKind &Kind); + bool parseAllocKind(AllocFnKind &Kind); bool parseScopeAndOrdering(bool IsAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering); bool parseScope(SyncScope::ID &SSID); diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 0ca294a332791a..5a45d6cf597475 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -684,6 +684,7 @@ enum AttributeKindCodes { ATTR_KIND_NO_SANITIZE_BOUNDS = 79, ATTR_KIND_ALLOC_ALIGN = 80, ATTR_KIND_ALLOCATED_POINTER = 81, + ATTR_KIND_ALLOC_KIND = 82, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index 5663f7a69d7213..6a4e6d63a973ab 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -17,6 +17,7 @@ #include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -43,6 +44,18 @@ class Function; class LLVMContext; class Type; +enum class AllocFnKind : uint64_t { + Unknown = 0, + Alloc = 1 << 0, // Allocator function returns a new allocation + Realloc = 1 << 1, // Allocator function resizes the `allocptr` argument + Free = 1 << 2, // Allocator function frees the `allocptr` argument + Uninitialized = 1 << 3, // Allocator function returns uninitialized memory + Zeroed = 1 << 4, // Allocator function returns zeroed memory + Aligned = 1 << 5, // Allocator function aligns allocations per the + // `allocalign` argument + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Aligned) +}; + //===----------------------------------------------------------------------===// /// \class /// Functions, function parameters, and return types can have attributes @@ -228,6 +241,9 @@ class Attribute { // Returns the unwind table kind. UWTableKind getUWTableKind() const; + // Returns the allocator function kind. + AllocFnKind getAllocKind() const; + /// The Attribute is converted to a string of equivalent mnemonic. This /// is, presumably, for writing out the mnemonics for the assembly writer. std::string getAsString(bool InAttrGrp = false) const; @@ -359,6 +375,7 @@ class AttributeSet { unsigned getVScaleRangeMin() const; Optional getVScaleRangeMax() const; UWTableKind getUWTableKind() const; + AllocFnKind getAllocKind() const; std::string getAsString(bool InAttrGrp = false) const; /// Return true if this attribute set belongs to the LLVMContext. @@ -850,6 +867,8 @@ class AttributeList { /// Get the unwind table kind requested for the function. UWTableKind getUWTableKind() const; + AllocFnKind getAllocKind() const; + /// Return the attributes at the index as a string. std::string getAsString(unsigned Index, bool InAttrGrp = false) const; @@ -1203,6 +1222,9 @@ class AttrBuilder { /// Attribute. AttrBuilder &addUWTableAttr(UWTableKind Kind); + // This turns the allocator kind into the form used internally in Attribute. + AttrBuilder &addAllocKindAttr(AllocFnKind Kind); + ArrayRef attrs() const { return Attrs; } bool operator==(const AttrBuilder &B) const; diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 6bb093b3715f52..00b8a0ca9e86f2 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -51,6 +51,9 @@ def Alignment : IntAttr<"align", [ParamAttr, RetAttr]>; /// aligned_alloc and aligned ::operator::new. def AllocAlign: EnumAttr<"allocalign", [ParamAttr]>; +/// Describes behavior of an allocator function in terms of known properties. +def AllocKind: IntAttr<"allockind", [FnAttr]>; + /// Parameter is the pointer to be manipulated by the allocator function. def AllocatedPointer : EnumAttr<"allocptr", [ParamAttr]>; diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 1f8d68cca3dacb..aac1f01eeed5e9 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1362,6 +1362,13 @@ bool LLParser::parseEnumAttribute(Attribute::AttrKind Attr, AttrBuilder &B, B.addUWTableAttr(Kind); return false; } + case Attribute::AllocKind: { + AllocFnKind Kind = AllocFnKind::Unknown; + if (parseAllocKind(Kind)) + return true; + B.addAllocKindAttr(Kind); + return false; + } default: B.addAttribute(Attr); Lex.Lex(); @@ -2041,6 +2048,40 @@ bool LLParser::parseOptionalUWTableKind(UWTableKind &Kind) { return parseToken(lltok::rparen, "expected ')'"); } +bool LLParser::parseAllocKind(AllocFnKind &Kind) { + Lex.Lex(); + LocTy ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::lparen)) + return error(ParenLoc, "expected '('"); + LocTy KindLoc = Lex.getLoc(); + std::string Arg; + if (parseStringConstant(Arg)) + return error(KindLoc, "expected allockind value"); + for (StringRef A : llvm::split(Arg, ",")) { + if (A == "alloc") { + Kind |= AllocFnKind::Alloc; + } else if (A == "realloc") { + Kind |= AllocFnKind::Realloc; + } else if (A == "free") { + Kind |= AllocFnKind::Free; + } else if (A == "uninitialized") { + Kind |= AllocFnKind::Uninitialized; + } else if (A == "zeroed") { + Kind |= AllocFnKind::Zeroed; + } else if (A == "aligned") { + Kind |= AllocFnKind::Aligned; + } else { + return error(KindLoc, Twine("unknown allockind ") + A); + } + } + ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::rparen)) + return error(ParenLoc, "expected ')'"); + if (Kind == AllocFnKind::Unknown) + return error(KindLoc, "expected allockind value"); + return false; +} + /// parseOptionalCommaAlign /// ::= /// ::= ',' align 4 diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index cc5892959d0ad8..9dd2b1286552d6 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1536,6 +1536,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::DereferenceableOrNull; case bitc::ATTR_KIND_ALLOC_ALIGN: return Attribute::AllocAlign; + case bitc::ATTR_KIND_ALLOC_KIND: + return Attribute::AllocKind; case bitc::ATTR_KIND_ALLOC_SIZE: return Attribute::AllocSize; case bitc::ATTR_KIND_ALLOCATED_POINTER: @@ -1736,6 +1738,8 @@ Error BitcodeReader::parseAttributeGroupBlock() { B.addVScaleRangeAttrFromRawRepr(Record[++i]); else if (Kind == Attribute::UWTable) B.addUWTableAttr(UWTableKind(Record[++i])); + else if (Kind == Attribute::AllocKind) + B.addAllocKindAttr(static_cast(Record[++i])); } else if (Record[i] == 3 || Record[i] == 4) { // String attribute bool HasValue = (Record[i++] == 4); SmallString<64> KindStr; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index b4cafd49ba072d..66445c8346759b 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -650,6 +650,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_MIN_SIZE; case Attribute::AllocatedPointer: return bitc::ATTR_KIND_ALLOCATED_POINTER; + case Attribute::AllocKind: + return bitc::ATTR_KIND_ALLOC_KIND; case Attribute::Naked: return bitc::ATTR_KIND_NAKED; case Attribute::Nest: diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h index adf8a4d34a0a76..5eb958f5786a99 100644 --- a/llvm/lib/IR/AttributeImpl.h +++ b/llvm/lib/IR/AttributeImpl.h @@ -256,6 +256,7 @@ class AttributeSetNode final unsigned getVScaleRangeMin() const; Optional getVScaleRangeMax() const; UWTableKind getUWTableKind() const; + AllocFnKind getAllocKind() const; std::string getAsString(bool InAttrGrp) const; Type *getAttributeType(Attribute::AttrKind Kind) const; diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index 1aa2d44308c476..11308c33b7db25 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -376,6 +376,12 @@ UWTableKind Attribute::getUWTableKind() const { return UWTableKind(pImpl->getValueAsInt()); } +AllocFnKind Attribute::getAllocKind() const { + assert(hasAttribute(Attribute::AllocKind) && + "Trying to get allockind value from non-allockind attribute"); + return AllocFnKind(pImpl->getValueAsInt()); +} + std::string Attribute::getAsString(bool InAttrGrp) const { if (!pImpl) return {}; @@ -447,6 +453,26 @@ std::string Attribute::getAsString(bool InAttrGrp) const { } } + if (hasAttribute(Attribute::AllocKind)) { + AllocFnKind Kind = getAllocKind(); + SmallVector parts; + if ((Kind & AllocFnKind::Alloc) != AllocFnKind::Unknown) + parts.push_back("alloc"); + if ((Kind & AllocFnKind::Realloc) != AllocFnKind::Unknown) + parts.push_back("realloc"); + if ((Kind & AllocFnKind::Free) != AllocFnKind::Unknown) + parts.push_back("free"); + if ((Kind & AllocFnKind::Uninitialized) != AllocFnKind::Unknown) + parts.push_back("uninitialized"); + if ((Kind & AllocFnKind::Zeroed) != AllocFnKind::Unknown) + parts.push_back("zeroed"); + if ((Kind & AllocFnKind::Aligned) != AllocFnKind::Unknown) + parts.push_back("aligned"); + return ("allockind(\"" + + Twine(llvm::join(parts.begin(), parts.end(), ",")) + "\")") + .str(); + } + // Convert target-dependent attributes to strings of the form: // // "kind" @@ -735,6 +761,10 @@ UWTableKind AttributeSet::getUWTableKind() const { return SetNode ? SetNode->getUWTableKind() : UWTableKind::None; } +AllocFnKind AttributeSet::getAllocKind() const { + return SetNode ? SetNode->getAllocKind() : AllocFnKind::Unknown; +} + std::string AttributeSet::getAsString(bool InAttrGrp) const { return SetNode ? SetNode->getAsString(InAttrGrp) : ""; } @@ -907,6 +937,12 @@ UWTableKind AttributeSetNode::getUWTableKind() const { return UWTableKind::None; } +AllocFnKind AttributeSetNode::getAllocKind() const { + if (auto A = findEnumAttribute(Attribute::AllocKind)) + return A->getAllocKind(); + return AllocFnKind::Unknown; +} + std::string AttributeSetNode::getAsString(bool InAttrGrp) const { std::string Str; for (iterator I = begin(), E = end(); I != E; ++I) { @@ -1463,6 +1499,10 @@ UWTableKind AttributeList::getUWTableKind() const { return getFnAttrs().getUWTableKind(); } +AllocFnKind AttributeList::getAllocKind() const { + return getFnAttrs().getAllocKind(); +} + std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const { return getAttributes(Index).getAsString(InAttrGrp); } @@ -1690,6 +1730,10 @@ AttrBuilder &AttrBuilder::addUWTableAttr(UWTableKind Kind) { return addRawIntAttr(Attribute::UWTable, uint64_t(Kind)); } +AttrBuilder &AttrBuilder::addAllocKindAttr(AllocFnKind Kind) { + return addRawIntAttr(Attribute::AllocKind, static_cast(Kind)); +} + Type *AttrBuilder::getTypeAttr(Attribute::AttrKind Kind) const { assert(Attribute::isTypeAttrKind(Kind) && "Not a type attribute"); Attribute A = getAttribute(Kind); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 93063a50b39b7e..49522793e4ae11 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2080,6 +2080,25 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, return; } + if (Attrs.hasFnAttr(Attribute::AllocKind)) { + AllocFnKind K = Attrs.getAllocKind(); + AllocFnKind Type = + K & (AllocFnKind::Alloc | AllocFnKind::Realloc | AllocFnKind::Free); + if (!is_contained( + {AllocFnKind::Alloc, AllocFnKind::Realloc, AllocFnKind::Free}, + Type)) + CheckFailed( + "'allockind()' requires exactly one of alloc, realloc, and free"); + if ((Type == AllocFnKind::Free) && + ((K & (AllocFnKind::Uninitialized | AllocFnKind::Zeroed | + AllocFnKind::Aligned)) != AllocFnKind::Unknown)) + CheckFailed("'allockind(\"free\")' doesn't allow uninitialized, zeroed, " + "or aligned modifiers."); + AllocFnKind ZeroedUninit = AllocFnKind::Uninitialized | AllocFnKind::Zeroed; + if ((K & ZeroedUninit) == ZeroedUninit) + CheckFailed("'allockind()' can't be both zeroed and uninitialized"); + } + if (Attrs.hasFnAttr(Attribute::VScaleRange)) { unsigned VScaleMin = Attrs.getFnAttrs().getVScaleRangeMin(); if (VScaleMin == 0) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 2895b242ca4747..6e392dcee1eae5 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -920,6 +920,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::StackAlignment: case Attribute::WillReturn: case Attribute::WriteOnly: + case Attribute::AllocKind: continue; // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: diff --git a/llvm/test/Assembler/allockind-missing.ll b/llvm/test/Assembler/allockind-missing.ll new file mode 100644 index 00000000000000..e8672fe9db032a --- /dev/null +++ b/llvm/test/Assembler/allockind-missing.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare void @f0() allockind() +; CHECK: :[[#@LINE-1]]:30: error: expected allockind value diff --git a/llvm/test/Assembler/allockind.ll b/llvm/test/Assembler/allockind.ll new file mode 100644 index 00000000000000..78f810e3cbc2fc --- /dev/null +++ b/llvm/test/Assembler/allockind.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare void @f0() allockind("free") +declare void @f1() allockind("alloc,aligned,uninitialized") +declare void @f2() allockind("realloc,zeroed,aligned") +declare void @f3() allockind("fjord") +; CHECK: :[[#@LINE-1]]:30: error: unknown allockind fjord diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index a87e11e0fe0d6e..05de3e294862c3 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1520,7 +1520,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #49 + ; CHECK: call void @f.nobuiltin() #50 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1943,6 +1943,9 @@ declare void @f.allocsize_two(i32, i32) allocsize(1, 0) declare void @f.nosanitize_bounds() nosanitize_bounds ; CHECK: declare void @f.nosanitize_bounds() #48 +declare void @f.allockind() allockind("alloc,uninitialized") +; CHECK: declare void @f.allockind() #49 + ; CHECK: attributes #0 = { alignstack=4 } ; CHECK: attributes #1 = { alignstack=8 } ; CHECK: attributes #2 = { alwaysinline } @@ -1992,7 +1995,8 @@ declare void @f.nosanitize_bounds() nosanitize_bounds ; CHECK: attributes #46 = { allocsize(0) } ; CHECK: attributes #47 = { allocsize(1,0) } ; CHECK: attributes #48 = { nosanitize_bounds } -; CHECK: attributes #49 = { builtin } +; CHECK: attributes #49 = { allockind("alloc,uninitialized") } +; CHECK: attributes #50 = { builtin } ;; Metadata diff --git a/llvm/test/Verifier/allockind.ll b/llvm/test/Verifier/allockind.ll new file mode 100644 index 00000000000000..00e0fb72345630 --- /dev/null +++ b/llvm/test/Verifier/allockind.ll @@ -0,0 +1,16 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: 'allockind()' requires exactly one of alloc, realloc, and free +declare i8* @a(i32) allockind("aligned") + +; CHECK: 'allockind()' requires exactly one of alloc, realloc, and free +declare i8* @b(i32*) allockind("free,realloc") + +; CHECK: 'allockind("free")' doesn't allow uninitialized, zeroed, or aligned modifiers. +declare i8* @c(i32) allockind("free,zeroed") + +; CHECK: 'allockind()' can't be both zeroed and uninitialized +declare i8* @d(i32, i32*) allockind("realloc,uninitialized,zeroed") + +; CHECK: 'allockind()' requires exactly one of alloc, realloc, and free +declare i8* @e(i32, i32) allockind("alloc,free") From 73f664601c105e5897e12bf9766c2f7e79c92e62 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Tue, 29 Mar 2022 14:27:30 -0400 Subject: [PATCH 895/908] BuildLibCalls: infer allockind attributes on relevant functions Differential Revision: https://reviews.llvm.org/D123089 --- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 15 +++++++++ .../Transforms/InferFunctionAttrs/annotate.ll | 32 +++++++++---------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 1caddfad208693..666b2767f1954f 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -246,6 +246,14 @@ static bool setAllocFamily(Function &F, StringRef Family) { return true; } +static bool setAllocKind(Function &F, AllocFnKind K) { + if (F.hasFnAttribute(Attribute::AllocKind)) + return false; + F.addFnAttr( + Attribute::get(F.getContext(), Attribute::AllocKind, uint64_t(K))); + return true; +} + bool llvm::inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, const TargetLibraryInfo &TLI) { Function *F = M->getFunction(Name); @@ -442,12 +450,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, case LibFunc_aligned_alloc: Changed |= setAlignedAllocParam(F, 0); Changed |= setAllocSize(F, 1, None); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned); LLVM_FALLTHROUGH; case LibFunc_valloc: case LibFunc_malloc: case LibFunc_vec_malloc: Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_malloc ? "vec_malloc" : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized); Changed |= setAllocSize(F, 0, None); Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetAndArgsNoUndef(F); @@ -512,6 +522,8 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, return Changed; case LibFunc_memalign: Changed |= setAllocFamily(F, "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned | + AllocFnKind::Uninitialized); Changed |= setAllocSize(F, 1, None); Changed |= setAlignedAllocParam(F, 0); Changed |= setOnlyAccessesInaccessibleMemory(F); @@ -537,6 +549,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, case LibFunc_vec_realloc: Changed |= setAllocFamily( F, TheLibFunc == LibFunc_vec_realloc ? "vec_malloc" : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Realloc); Changed |= setAllocatedPointerParam(F, 0); Changed |= setAllocSize(F, 1, None); Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); @@ -614,6 +627,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, case LibFunc_vec_calloc: Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc" : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Zeroed); Changed |= setAllocSize(F, 0, 1); Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetAndArgsNoUndef(F); @@ -675,6 +689,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, case LibFunc_vec_free: Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc" : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Free); Changed |= setAllocatedPointerParam(F, 0); Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); Changed |= setArgsNoUndef(F); diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 5b737654bd3039..4cead43e9d456a 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -220,7 +220,7 @@ declare x86_fp80 @acoshl(x86_fp80) ; CHECK: declare x86_fp80 @acosl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @acosl(x86_fp80) -; CHECK: declare noalias noundef i8* @aligned_alloc(i64 allocalign noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE1_FAMILY_MALLOC:#[0-9]+]] +; CHECK: declare noalias noundef i8* @aligned_alloc(i64 allocalign noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE1_FAMILY_MALLOC:#[0-9]+]] declare i8* @aligned_alloc(i64, i64) ; CHECK: declare double @asin(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] @@ -290,7 +290,7 @@ declare void @bcopy(i8*, i8*, i64) ; CHECK: declare void @bzero(i8* nocapture writeonly, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] declare void @bzero(i8*, i64) -; CHECK: declare noalias noundef i8* @calloc(i64 noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_MALLOC:#[0-9]+]] +; CHECK: declare noalias noundef i8* @calloc(i64 noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCZEROED_ALLOCSIZE01_FAMILY_MALLOC:#[0-9]+]] declare i8* @calloc(i64, i64) ; CHECK-AIX: declare noalias noundef i8* @vec_calloc(i64 noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_VEC_MALLOC:#[0-9]+]] @@ -493,7 +493,7 @@ declare i32 @fputs(i8*, %opaque*) ; CHECK: declare noundef i64 @fread(i8* nocapture noundef, i64 noundef, i64 noundef, %opaque* nocapture noundef) [[NOFREE_NOUNWIND]] declare i64 @fread(i8*, i64, i64, %opaque*) -; CHECK: declare void @free(i8* allocptr nocapture noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_MALLOC:#[0-9]+]] +; CHECK: declare void @free(i8* allocptr nocapture noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_FREE_FAMILY_MALLOC:#[0-9]+]] declare void @free(i8*) ; CHECK-AIX: declare void @vec_free(i8* allocptr nocapture noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_VEC_MALLOC:#[0-9]+]] @@ -660,7 +660,7 @@ declare i32 @lstat(i8*, %opaque*) ; CHECK-LINUX: declare noundef i32 @lstat64(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[NOFREE_NOUNWIND]] declare i32 @lstat64(i8*, %opaque*) -; CHECK: declare noalias noundef i8* @malloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_MALLOC:#[0-9]+]] +; CHECK: declare noalias noundef i8* @malloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE0_FAMILY_MALLOC:#[0-9]+]] declare i8* @malloc(i64) ; CHECK-AIX: declare noalias noundef i8* @vec_malloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_VEC_MALLOC:#[0-9]+]] @@ -784,10 +784,10 @@ declare i64 @read(i32, i8*, i64) ; CHECK: declare noundef i64 @readlink(i8* nocapture noundef readonly, i8* nocapture noundef, i64 noundef) [[NOFREE_NOUNWIND]] declare i64 @readlink(i8*, i8*, i64) -; CHECK: declare noalias noundef i8* @realloc(i8* allocptr nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_MALLOC:#[0-9]+]] +; CHECK: declare noalias noundef i8* @realloc(i8* allocptr nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_REALLOC_ALLOCSIZE1_FAMILY_MALLOC:#[0-9]+]] declare i8* @realloc(i8*, i64) -; CHECK: declare noalias noundef i8* @reallocf(i8* allocptr nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_MALLOC]] +; CHECK: declare noalias noundef i8* @reallocf(i8* allocptr nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_REALLOC_ALLOCSIZE1_FAMILY_MALLOC]] declare i8* @reallocf(i8*, i64) ; CHECK-AIX: declare noalias noundef i8* @vec_realloc(i8* allocptr nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_VEC_MALLOC:#[0-9]+]] @@ -1039,7 +1039,7 @@ declare i32 @utime(i8*, %opaque*) ; CHECK: declare noundef i32 @utimes(i8* nocapture noundef readonly, %opaque* nocapture noundef readonly) [[NOFREE_NOUNWIND]] declare i32 @utimes(i8*, %opaque*) -; CHECK: declare noalias noundef i8* @valloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_MALLOC]] +; CHECK: declare noalias noundef i8* @valloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE0_FAMILY_MALLOC]] declare i8* @valloc(i64) ; CHECK: declare noundef i32 @vfprintf(%opaque* nocapture noundef, i8* nocapture noundef readonly, %opaque* noundef) [[NOFREE_NOUNWIND]] @@ -1078,23 +1078,23 @@ declare void @memset_pattern16(i8*, i8*, i64) ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { mustprogress nofree nounwind willreturn } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] = { mustprogress nofree nounwind willreturn writeonly } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND]] = { nofree nounwind } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE1_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(1) "alloc-family"="malloc" } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0,1) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE1_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCZEROED_ALLOCSIZE01_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind readonly willreturn } ; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { argmemonly mustprogress nofree nounwind willreturn } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY]] = { nofree nounwind readonly } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_FREE_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" } ; CHECK-DAG: attributes [[NOFREE_WILLRETURN]] = { mustprogress nofree willreturn } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE0_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } ; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly mustprogress nofree nounwind readonly willreturn } ; CHECK-DAG: attributes [[NOFREE]] = { nofree } ; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND]] = { argmemonly nofree nounwind } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allocsize(1) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_REALLOC_ALLOCSIZE1_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("realloc") allocsize(1) "alloc-family"="malloc" } ; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nofree nounwind willreturn "alloc-family"="malloc" } ; CHECK-NVPTX-DAG: attributes [[NOFREE_NOUNWIND_READNONE]] = { nofree nosync nounwind readnone } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_VEC_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0) "alloc-family"="vec_malloc" } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_VEC_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn "alloc-family"="vec_malloc" } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_VEC_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allocsize(1) "alloc-family"="vec_malloc" } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_VEC_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0,1) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_VEC_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_VEC_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_VEC_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("realloc") allocsize(1) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_VEC_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="vec_malloc" } From ae766526777ae5ff34d2dadb99038b596e3c715f Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 31 May 2022 10:14:51 -0400 Subject: [PATCH 896/908] Revert "[Libomptarget] Add `leaf` attribute to `vprintf` declaration" This is preventing users from calling `printf` on NVPTX code. Revert for now until there is a fix. This reverts commit eda4ef3add4d25345e0b29580776f1576040c525. --- openmp/libomptarget/DeviceRTL/src/Debug.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp index c6cf5071b95d1d..45e08fa5b16bfb 100644 --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -36,7 +36,7 @@ int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t); #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) -int32_t __attribute__((leaf)) vprintf(const char *, void *); +int32_t vprintf(const char *, void *); namespace impl { int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { return vprintf(Format, Arguments); From 9c38fc111b9e1319ffa8e4a165e40ff7c7814406 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Wed, 18 May 2022 07:26:50 +0100 Subject: [PATCH 897/908] [AArch64] Remove references to Streaming SVE from target features. Following discussion on D120261 and D121208 it seems better to remove the concept of Streaming SVE from the subtarget/assembler predicates and instead reason about 'SVE' and 'SME' as its higher level features, rather than trying to model this runtime mode through explicit feature flags. This patch is largely NFC. Reviewed By: paulwalker-arm, david-arm Differential Revision: https://reviews.llvm.org/D125977 --- llvm/lib/Target/AArch64/AArch64.td | 7 +- .../Target/AArch64/AArch64ISelLowering.cpp | 5 +- .../lib/Target/AArch64/AArch64InstrFormats.td | 4 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 10 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 34 +++-- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 124 +++++++++--------- llvm/lib/Target/AArch64/AArch64SchedA64FX.td | 2 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 6 +- .../sve-intrinsics-contiguous-prefetches.ll | 2 +- .../AArch64/sve-intrinsics-conversion.ll | 2 +- .../AArch64/sve-intrinsics-counting-bits.ll | 2 +- .../AArch64/sve-intrinsics-counting-elems.ll | 4 +- .../AArch64/sve-intrinsics-create-tuple.ll | 2 +- .../AArch64/sve-intrinsics-fp-converts.ll | 2 +- .../sve-intrinsics-insert-extract-tuple.ll | 2 +- .../sve-intrinsics-ldN-reg+imm-addr-mode.ll | 2 +- .../sve-intrinsics-ldN-reg+reg-addr-mode.ll | 2 +- ...e-intrinsics-ldN-sret-reg+imm-addr-mode.ll | 2 +- ...e-intrinsics-ldN-sret-reg+reg-addr-mode.ll | 2 +- .../CodeGen/AArch64/sve-intrinsics-logical.ll | 2 +- .../AArch64/sve-intrinsics-pred-creation.ll | 2 +- .../AArch64/sve-intrinsics-pred-operations.ll | 2 +- .../AArch64/sve-intrinsics-pred-testing.ll | 2 +- .../AArch64/sve-intrinsics-reinterpret.ll | 2 +- .../AArch64/sve-intrinsics-reversal.ll | 2 +- .../CodeGen/AArch64/sve-intrinsics-sel.ll | 2 +- .../CodeGen/AArch64/sve-intrinsics-sqdec.ll | 2 +- .../CodeGen/AArch64/sve-intrinsics-sqinc.ll | 2 +- ...-intrinsics-st1-addressing-mode-reg-imm.ll | 2 +- ...-intrinsics-st1-addressing-mode-reg-reg.ll | 2 +- .../CodeGen/AArch64/sve-intrinsics-st1.ll | 4 +- .../sve-intrinsics-stN-reg-imm-addr-mode.ll | 2 +- .../sve-intrinsics-stN-reg-reg-addr-mode.ll | 2 +- .../CodeGen/AArch64/sve-intrinsics-stores.ll | 2 +- .../CodeGen/AArch64/sve-intrinsics-uqdec.ll | 2 +- .../CodeGen/AArch64/sve-intrinsics-uqinc.ll | 2 +- .../CodeGen/AArch64/sve-intrinsics-while.ll | 2 +- ...ve2-intrinsics-binary-narrowing-add-sub.ll | 2 +- .../sve2-intrinsics-binary-narrowing-shr.ll | 2 +- .../AArch64/sve2-intrinsics-complex-dot.ll | 2 +- ...ntrinsics-contiguous-conflict-detection.ll | 2 +- .../AArch64/sve2-intrinsics-fp-converts.ll | 2 +- ...sve2-intrinsics-fp-int-binary-logarithm.ll | 2 +- .../sve2-intrinsics-fp-widening-mul-acc.ll | 2 +- .../AArch64/sve2-intrinsics-int-mul-lane.ll | 2 +- ...-intrinsics-non-widening-pairwise-arith.ll | 2 +- .../sve2-intrinsics-polynomial-arithmetic.ll | 2 +- .../sve2-intrinsics-unary-narrowing.ll | 2 +- .../sve2-intrinsics-uniform-complex-arith.ll | 2 +- .../CodeGen/AArch64/sve2-intrinsics-while.ll | 2 +- ...2-intrinsics-widening-complex-int-arith.ll | 2 +- .../AArch64/sve2-intrinsics-widening-dsp.ll | 2 +- ...sve2-intrinsics-widening-pairwise-arith.ll | 2 +- llvm/test/MC/AArch64/SME/feature.s | 2 +- .../MC/AArch64/SME/streaming-mode-neon-bf16.s | 10 +- .../MC/AArch64/SME/streaming-mode-neon-fp16.s | 8 +- .../test/MC/AArch64/SME/streaming-mode-neon.s | 8 +- llvm/test/MC/AArch64/SVE/abs.s | 2 +- llvm/test/MC/AArch64/SVE/add.s | 2 +- llvm/test/MC/AArch64/SVE/addpl.s | 2 +- llvm/test/MC/AArch64/SVE/addvl.s | 2 +- llvm/test/MC/AArch64/SVE/and.s | 2 +- llvm/test/MC/AArch64/SVE/ands.s | 2 +- llvm/test/MC/AArch64/SVE/andv.s | 2 +- llvm/test/MC/AArch64/SVE/asr.s | 2 +- llvm/test/MC/AArch64/SVE/asrd.s | 2 +- llvm/test/MC/AArch64/SVE/asrr.s | 2 +- llvm/test/MC/AArch64/SVE/bfcvt.s | 2 +- llvm/test/MC/AArch64/SVE/bfcvtnt.s | 2 +- llvm/test/MC/AArch64/SVE/bfdot.s | 2 +- llvm/test/MC/AArch64/SVE/bfmlal.s | 2 +- llvm/test/MC/AArch64/SVE/bic.s | 2 +- llvm/test/MC/AArch64/SVE/bics.s | 2 +- llvm/test/MC/AArch64/SVE/brka.s | 2 +- llvm/test/MC/AArch64/SVE/brkas.s | 2 +- llvm/test/MC/AArch64/SVE/brkb.s | 2 +- llvm/test/MC/AArch64/SVE/brkbs.s | 2 +- llvm/test/MC/AArch64/SVE/brkn.s | 2 +- llvm/test/MC/AArch64/SVE/brkns.s | 2 +- llvm/test/MC/AArch64/SVE/brkpa.s | 2 +- llvm/test/MC/AArch64/SVE/brkpas.s | 2 +- llvm/test/MC/AArch64/SVE/brkpb.s | 2 +- llvm/test/MC/AArch64/SVE/brkpbs.s | 2 +- llvm/test/MC/AArch64/SVE/clasta.s | 2 +- llvm/test/MC/AArch64/SVE/clastb.s | 2 +- llvm/test/MC/AArch64/SVE/cls.s | 2 +- llvm/test/MC/AArch64/SVE/clz.s | 2 +- llvm/test/MC/AArch64/SVE/cmpeq.s | 2 +- llvm/test/MC/AArch64/SVE/cmpge.s | 2 +- llvm/test/MC/AArch64/SVE/cmpgt.s | 2 +- llvm/test/MC/AArch64/SVE/cmphi.s | 2 +- llvm/test/MC/AArch64/SVE/cmphs.s | 2 +- llvm/test/MC/AArch64/SVE/cmple.s | 2 +- llvm/test/MC/AArch64/SVE/cmplo.s | 2 +- llvm/test/MC/AArch64/SVE/cmpls.s | 2 +- llvm/test/MC/AArch64/SVE/cmplt.s | 2 +- llvm/test/MC/AArch64/SVE/cmpne.s | 2 +- llvm/test/MC/AArch64/SVE/cnot.s | 2 +- llvm/test/MC/AArch64/SVE/cnt.s | 2 +- llvm/test/MC/AArch64/SVE/cntb.s | 2 +- llvm/test/MC/AArch64/SVE/cntd.s | 2 +- llvm/test/MC/AArch64/SVE/cnth.s | 2 +- llvm/test/MC/AArch64/SVE/cntp.s | 2 +- llvm/test/MC/AArch64/SVE/cntw.s | 2 +- llvm/test/MC/AArch64/SVE/compact.s | 2 +- llvm/test/MC/AArch64/SVE/cpy.s | 2 +- llvm/test/MC/AArch64/SVE/ctermeq.s | 2 +- llvm/test/MC/AArch64/SVE/ctermne.s | 2 +- llvm/test/MC/AArch64/SVE/decb.s | 2 +- llvm/test/MC/AArch64/SVE/decd.s | 2 +- llvm/test/MC/AArch64/SVE/dech.s | 2 +- llvm/test/MC/AArch64/SVE/decp.s | 2 +- llvm/test/MC/AArch64/SVE/decw.s | 2 +- llvm/test/MC/AArch64/SVE/dup.s | 2 +- llvm/test/MC/AArch64/SVE/dupm.s | 2 +- llvm/test/MC/AArch64/SVE/eon.s | 2 +- llvm/test/MC/AArch64/SVE/eor.s | 2 +- llvm/test/MC/AArch64/SVE/eors.s | 2 +- llvm/test/MC/AArch64/SVE/eorv.s | 2 +- llvm/test/MC/AArch64/SVE/ext.s | 2 +- llvm/test/MC/AArch64/SVE/fabd.s | 2 +- llvm/test/MC/AArch64/SVE/fabs.s | 2 +- llvm/test/MC/AArch64/SVE/facge.s | 2 +- llvm/test/MC/AArch64/SVE/facgt.s | 2 +- llvm/test/MC/AArch64/SVE/facle.s | 2 +- llvm/test/MC/AArch64/SVE/faclt.s | 2 +- llvm/test/MC/AArch64/SVE/fadd.s | 2 +- llvm/test/MC/AArch64/SVE/fadda.s | 2 +- llvm/test/MC/AArch64/SVE/faddv.s | 2 +- llvm/test/MC/AArch64/SVE/fcadd.s | 2 +- llvm/test/MC/AArch64/SVE/fcmeq.s | 2 +- llvm/test/MC/AArch64/SVE/fcmge.s | 2 +- llvm/test/MC/AArch64/SVE/fcmgt.s | 2 +- llvm/test/MC/AArch64/SVE/fcmla.s | 2 +- llvm/test/MC/AArch64/SVE/fcmle.s | 2 +- llvm/test/MC/AArch64/SVE/fcmlt.s | 2 +- llvm/test/MC/AArch64/SVE/fcmne.s | 2 +- llvm/test/MC/AArch64/SVE/fcmuo.s | 2 +- llvm/test/MC/AArch64/SVE/fcpy.s | 2 +- llvm/test/MC/AArch64/SVE/fcvt.s | 2 +- llvm/test/MC/AArch64/SVE/fcvtzs.s | 2 +- llvm/test/MC/AArch64/SVE/fcvtzu.s | 2 +- llvm/test/MC/AArch64/SVE/fdiv.s | 2 +- llvm/test/MC/AArch64/SVE/fdivr.s | 2 +- llvm/test/MC/AArch64/SVE/fdup.s | 2 +- llvm/test/MC/AArch64/SVE/fexpa.s | 2 +- llvm/test/MC/AArch64/SVE/fmad.s | 2 +- llvm/test/MC/AArch64/SVE/fmax.s | 2 +- llvm/test/MC/AArch64/SVE/fmaxnm.s | 2 +- llvm/test/MC/AArch64/SVE/fmaxnmv.s | 2 +- llvm/test/MC/AArch64/SVE/fmaxv.s | 2 +- llvm/test/MC/AArch64/SVE/fmin.s | 2 +- llvm/test/MC/AArch64/SVE/fminnm.s | 2 +- llvm/test/MC/AArch64/SVE/fminnmv.s | 2 +- llvm/test/MC/AArch64/SVE/fminv.s | 2 +- llvm/test/MC/AArch64/SVE/fmla.s | 2 +- llvm/test/MC/AArch64/SVE/fmls.s | 2 +- llvm/test/MC/AArch64/SVE/fmov.s | 2 +- llvm/test/MC/AArch64/SVE/fmsb.s | 2 +- llvm/test/MC/AArch64/SVE/fmul.s | 2 +- llvm/test/MC/AArch64/SVE/fmulx.s | 2 +- llvm/test/MC/AArch64/SVE/fneg.s | 2 +- llvm/test/MC/AArch64/SVE/fnmad.s | 2 +- llvm/test/MC/AArch64/SVE/fnmla.s | 2 +- llvm/test/MC/AArch64/SVE/fnmls.s | 2 +- llvm/test/MC/AArch64/SVE/fnmsb.s | 2 +- llvm/test/MC/AArch64/SVE/frecpe.s | 2 +- llvm/test/MC/AArch64/SVE/frecps.s | 2 +- llvm/test/MC/AArch64/SVE/frecpx.s | 2 +- llvm/test/MC/AArch64/SVE/frinta.s | 2 +- llvm/test/MC/AArch64/SVE/frinti.s | 2 +- llvm/test/MC/AArch64/SVE/frintm.s | 2 +- llvm/test/MC/AArch64/SVE/frintn.s | 2 +- llvm/test/MC/AArch64/SVE/frintp.s | 2 +- llvm/test/MC/AArch64/SVE/frintx.s | 2 +- llvm/test/MC/AArch64/SVE/frintz.s | 2 +- llvm/test/MC/AArch64/SVE/frsqrte.s | 2 +- llvm/test/MC/AArch64/SVE/frsqrts.s | 2 +- llvm/test/MC/AArch64/SVE/fscale.s | 2 +- llvm/test/MC/AArch64/SVE/fsqrt.s | 2 +- llvm/test/MC/AArch64/SVE/fsub.s | 2 +- llvm/test/MC/AArch64/SVE/fsubr.s | 2 +- llvm/test/MC/AArch64/SVE/ftsmul.s | 2 +- llvm/test/MC/AArch64/SVE/ftssel.s | 2 +- llvm/test/MC/AArch64/SVE/incb.s | 2 +- llvm/test/MC/AArch64/SVE/incd.s | 2 +- llvm/test/MC/AArch64/SVE/inch.s | 2 +- llvm/test/MC/AArch64/SVE/incp.s | 2 +- llvm/test/MC/AArch64/SVE/incw.s | 2 +- llvm/test/MC/AArch64/SVE/index.s | 2 +- llvm/test/MC/AArch64/SVE/insr.s | 2 +- llvm/test/MC/AArch64/SVE/lasta.s | 2 +- llvm/test/MC/AArch64/SVE/lastb.s | 2 +- llvm/test/MC/AArch64/SVE/ld1b-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/ld1b.s | 2 +- llvm/test/MC/AArch64/SVE/ld1d-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/ld1d.s | 2 +- llvm/test/MC/AArch64/SVE/ld1h-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/ld1h.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rb.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rd.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rh.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rqb.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rqd.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rqh.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rqw.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rsb.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rsh.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rsw.s | 2 +- llvm/test/MC/AArch64/SVE/ld1rw.s | 2 +- llvm/test/MC/AArch64/SVE/ld1sb-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/ld1sb.s | 2 +- llvm/test/MC/AArch64/SVE/ld1sh-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/ld1sh.s | 2 +- llvm/test/MC/AArch64/SVE/ld1sw-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/ld1sw.s | 2 +- llvm/test/MC/AArch64/SVE/ld1w-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/ld1w.s | 2 +- llvm/test/MC/AArch64/SVE/ld2b.s | 2 +- llvm/test/MC/AArch64/SVE/ld2d.s | 2 +- llvm/test/MC/AArch64/SVE/ld2h.s | 2 +- llvm/test/MC/AArch64/SVE/ld2w.s | 2 +- llvm/test/MC/AArch64/SVE/ld3b.s | 2 +- llvm/test/MC/AArch64/SVE/ld3d.s | 2 +- llvm/test/MC/AArch64/SVE/ld3h.s | 2 +- llvm/test/MC/AArch64/SVE/ld3w.s | 2 +- llvm/test/MC/AArch64/SVE/ld4b.s | 2 +- llvm/test/MC/AArch64/SVE/ld4d.s | 2 +- llvm/test/MC/AArch64/SVE/ld4h.s | 2 +- llvm/test/MC/AArch64/SVE/ld4w.s | 2 +- llvm/test/MC/AArch64/SVE/ldff1b.s | 2 +- llvm/test/MC/AArch64/SVE/ldff1d.s | 2 +- llvm/test/MC/AArch64/SVE/ldff1h.s | 2 +- llvm/test/MC/AArch64/SVE/ldff1sb.s | 2 +- llvm/test/MC/AArch64/SVE/ldff1sh.s | 2 +- llvm/test/MC/AArch64/SVE/ldff1sw.s | 2 +- llvm/test/MC/AArch64/SVE/ldff1w.s | 2 +- llvm/test/MC/AArch64/SVE/ldnf1b.s | 2 +- llvm/test/MC/AArch64/SVE/ldnf1d.s | 2 +- llvm/test/MC/AArch64/SVE/ldnf1h.s | 2 +- llvm/test/MC/AArch64/SVE/ldnf1sb.s | 2 +- llvm/test/MC/AArch64/SVE/ldnf1sh.s | 2 +- llvm/test/MC/AArch64/SVE/ldnf1sw.s | 2 +- llvm/test/MC/AArch64/SVE/ldnf1w.s | 2 +- llvm/test/MC/AArch64/SVE/ldnt1b.s | 2 +- llvm/test/MC/AArch64/SVE/ldnt1d.s | 2 +- llvm/test/MC/AArch64/SVE/ldnt1h.s | 2 +- llvm/test/MC/AArch64/SVE/ldnt1w.s | 2 +- llvm/test/MC/AArch64/SVE/ldr.s | 2 +- llvm/test/MC/AArch64/SVE/lsl.s | 2 +- llvm/test/MC/AArch64/SVE/lslr.s | 2 +- llvm/test/MC/AArch64/SVE/lsr.s | 2 +- llvm/test/MC/AArch64/SVE/lsrr.s | 2 +- llvm/test/MC/AArch64/SVE/mad.s | 2 +- .../MC/AArch64/SVE/matrix-multiply-fp32.s | 2 +- llvm/test/MC/AArch64/SVE/mla.s | 2 +- llvm/test/MC/AArch64/SVE/mls.s | 2 +- llvm/test/MC/AArch64/SVE/mov.s | 2 +- llvm/test/MC/AArch64/SVE/movprfx.s | 2 +- llvm/test/MC/AArch64/SVE/movs.s | 2 +- llvm/test/MC/AArch64/SVE/msb.s | 2 +- llvm/test/MC/AArch64/SVE/mul.s | 2 +- llvm/test/MC/AArch64/SVE/nand.s | 2 +- llvm/test/MC/AArch64/SVE/nands.s | 2 +- llvm/test/MC/AArch64/SVE/neg.s | 2 +- llvm/test/MC/AArch64/SVE/nor.s | 2 +- llvm/test/MC/AArch64/SVE/nors.s | 2 +- llvm/test/MC/AArch64/SVE/not.s | 2 +- llvm/test/MC/AArch64/SVE/nots.s | 2 +- llvm/test/MC/AArch64/SVE/orn.s | 2 +- llvm/test/MC/AArch64/SVE/orns.s | 2 +- llvm/test/MC/AArch64/SVE/orr.s | 2 +- llvm/test/MC/AArch64/SVE/orrs.s | 2 +- llvm/test/MC/AArch64/SVE/orv.s | 2 +- llvm/test/MC/AArch64/SVE/pfalse.s | 2 +- llvm/test/MC/AArch64/SVE/pfirst.s | 2 +- llvm/test/MC/AArch64/SVE/pnext.s | 2 +- llvm/test/MC/AArch64/SVE/prfb-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/prfb.s | 2 +- llvm/test/MC/AArch64/SVE/prfd-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/prfd.s | 2 +- llvm/test/MC/AArch64/SVE/prfh-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/prfh.s | 2 +- llvm/test/MC/AArch64/SVE/prfw-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/prfw.s | 2 +- llvm/test/MC/AArch64/SVE/ptest.s | 2 +- llvm/test/MC/AArch64/SVE/ptrue.s | 2 +- llvm/test/MC/AArch64/SVE/ptrues.s | 2 +- llvm/test/MC/AArch64/SVE/punpkhi.s | 2 +- llvm/test/MC/AArch64/SVE/punpklo.s | 2 +- llvm/test/MC/AArch64/SVE/rbit.s | 2 +- llvm/test/MC/AArch64/SVE/rdffr.s | 2 +- llvm/test/MC/AArch64/SVE/rdffrs.s | 2 +- llvm/test/MC/AArch64/SVE/rdvl.s | 2 +- llvm/test/MC/AArch64/SVE/rev.s | 2 +- llvm/test/MC/AArch64/SVE/revb.s | 2 +- llvm/test/MC/AArch64/SVE/revh.s | 2 +- llvm/test/MC/AArch64/SVE/revw.s | 2 +- llvm/test/MC/AArch64/SVE/sabd.s | 2 +- llvm/test/MC/AArch64/SVE/saddv.s | 2 +- llvm/test/MC/AArch64/SVE/scvtf.s | 2 +- llvm/test/MC/AArch64/SVE/sdiv.s | 2 +- llvm/test/MC/AArch64/SVE/sdivr.s | 2 +- llvm/test/MC/AArch64/SVE/sdot.s | 2 +- llvm/test/MC/AArch64/SVE/sel.s | 2 +- llvm/test/MC/AArch64/SVE/setffr.s | 2 +- llvm/test/MC/AArch64/SVE/smax.s | 2 +- llvm/test/MC/AArch64/SVE/smaxv.s | 2 +- llvm/test/MC/AArch64/SVE/smin.s | 2 +- llvm/test/MC/AArch64/SVE/sminv.s | 2 +- llvm/test/MC/AArch64/SVE/smulh.s | 2 +- llvm/test/MC/AArch64/SVE/splice.s | 2 +- llvm/test/MC/AArch64/SVE/sqadd.s | 2 +- llvm/test/MC/AArch64/SVE/sqdecb.s | 2 +- llvm/test/MC/AArch64/SVE/sqdecd.s | 2 +- llvm/test/MC/AArch64/SVE/sqdech.s | 2 +- llvm/test/MC/AArch64/SVE/sqdecp.s | 2 +- llvm/test/MC/AArch64/SVE/sqdecw.s | 2 +- llvm/test/MC/AArch64/SVE/sqincb.s | 2 +- llvm/test/MC/AArch64/SVE/sqincd.s | 2 +- llvm/test/MC/AArch64/SVE/sqinch.s | 2 +- llvm/test/MC/AArch64/SVE/sqincp.s | 2 +- llvm/test/MC/AArch64/SVE/sqincw.s | 2 +- llvm/test/MC/AArch64/SVE/sqsub.s | 2 +- llvm/test/MC/AArch64/SVE/st1b-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/st1b.s | 2 +- llvm/test/MC/AArch64/SVE/st1d-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/st1d.s | 2 +- llvm/test/MC/AArch64/SVE/st1h-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/st1h.s | 2 +- llvm/test/MC/AArch64/SVE/st1w-sve-only.s | 2 +- llvm/test/MC/AArch64/SVE/st1w.s | 2 +- llvm/test/MC/AArch64/SVE/st2b.s | 2 +- llvm/test/MC/AArch64/SVE/st2d.s | 2 +- llvm/test/MC/AArch64/SVE/st2h.s | 2 +- llvm/test/MC/AArch64/SVE/st2w.s | 2 +- llvm/test/MC/AArch64/SVE/st3b.s | 2 +- llvm/test/MC/AArch64/SVE/st3d.s | 2 +- llvm/test/MC/AArch64/SVE/st3h.s | 2 +- llvm/test/MC/AArch64/SVE/st3w.s | 2 +- llvm/test/MC/AArch64/SVE/st4b.s | 2 +- llvm/test/MC/AArch64/SVE/st4d.s | 2 +- llvm/test/MC/AArch64/SVE/st4h.s | 2 +- llvm/test/MC/AArch64/SVE/st4w.s | 2 +- llvm/test/MC/AArch64/SVE/stnt1b.s | 2 +- llvm/test/MC/AArch64/SVE/stnt1d.s | 2 +- llvm/test/MC/AArch64/SVE/stnt1h.s | 2 +- llvm/test/MC/AArch64/SVE/stnt1w.s | 2 +- llvm/test/MC/AArch64/SVE/str.s | 2 +- llvm/test/MC/AArch64/SVE/sub.s | 2 +- llvm/test/MC/AArch64/SVE/subr.s | 2 +- llvm/test/MC/AArch64/SVE/sunpkhi.s | 2 +- llvm/test/MC/AArch64/SVE/sunpklo.s | 2 +- llvm/test/MC/AArch64/SVE/sxtb.s | 2 +- llvm/test/MC/AArch64/SVE/sxth.s | 2 +- llvm/test/MC/AArch64/SVE/sxtw.s | 2 +- llvm/test/MC/AArch64/SVE/tbl.s | 2 +- llvm/test/MC/AArch64/SVE/trn1.s | 2 +- llvm/test/MC/AArch64/SVE/trn2.s | 2 +- llvm/test/MC/AArch64/SVE/uabd.s | 2 +- llvm/test/MC/AArch64/SVE/uaddv.s | 2 +- llvm/test/MC/AArch64/SVE/ucvtf.s | 2 +- llvm/test/MC/AArch64/SVE/udiv.s | 2 +- llvm/test/MC/AArch64/SVE/udivr.s | 2 +- llvm/test/MC/AArch64/SVE/udot.s | 2 +- llvm/test/MC/AArch64/SVE/umax.s | 2 +- llvm/test/MC/AArch64/SVE/umaxv.s | 2 +- llvm/test/MC/AArch64/SVE/umin.s | 2 +- llvm/test/MC/AArch64/SVE/uminv.s | 2 +- llvm/test/MC/AArch64/SVE/umulh.s | 2 +- llvm/test/MC/AArch64/SVE/uqadd.s | 2 +- llvm/test/MC/AArch64/SVE/uqdecb.s | 2 +- llvm/test/MC/AArch64/SVE/uqdecd.s | 2 +- llvm/test/MC/AArch64/SVE/uqdech.s | 2 +- llvm/test/MC/AArch64/SVE/uqdecp.s | 2 +- llvm/test/MC/AArch64/SVE/uqdecw.s | 2 +- llvm/test/MC/AArch64/SVE/uqincb.s | 2 +- llvm/test/MC/AArch64/SVE/uqincd.s | 2 +- llvm/test/MC/AArch64/SVE/uqinch.s | 2 +- llvm/test/MC/AArch64/SVE/uqincp.s | 2 +- llvm/test/MC/AArch64/SVE/uqincw.s | 2 +- llvm/test/MC/AArch64/SVE/uqsub.s | 2 +- llvm/test/MC/AArch64/SVE/uunpkhi.s | 2 +- llvm/test/MC/AArch64/SVE/uunpklo.s | 2 +- llvm/test/MC/AArch64/SVE/uxtb.s | 2 +- llvm/test/MC/AArch64/SVE/uxth.s | 2 +- llvm/test/MC/AArch64/SVE/uxtw.s | 2 +- llvm/test/MC/AArch64/SVE/uzp1.s | 2 +- llvm/test/MC/AArch64/SVE/uzp2.s | 2 +- llvm/test/MC/AArch64/SVE/whilele.s | 2 +- llvm/test/MC/AArch64/SVE/whilelo.s | 2 +- llvm/test/MC/AArch64/SVE/whilels.s | 2 +- llvm/test/MC/AArch64/SVE/whilelt.s | 2 +- llvm/test/MC/AArch64/SVE/wrffr.s | 2 +- llvm/test/MC/AArch64/SVE/zip1.s | 2 +- llvm/test/MC/AArch64/SVE/zip2.s | 2 +- llvm/test/MC/AArch64/SVE2/adclb.s | 2 +- llvm/test/MC/AArch64/SVE2/adclt.s | 2 +- llvm/test/MC/AArch64/SVE2/addhnb.s | 2 +- llvm/test/MC/AArch64/SVE2/addhnt.s | 2 +- llvm/test/MC/AArch64/SVE2/addp.s | 2 +- llvm/test/MC/AArch64/SVE2/aesd.s | 2 +- llvm/test/MC/AArch64/SVE2/aese.s | 2 +- llvm/test/MC/AArch64/SVE2/aesimc.s | 2 +- llvm/test/MC/AArch64/SVE2/aesmc.s | 2 +- llvm/test/MC/AArch64/SVE2/bcax.s | 2 +- llvm/test/MC/AArch64/SVE2/bdep.s | 2 +- llvm/test/MC/AArch64/SVE2/bext.s | 2 +- llvm/test/MC/AArch64/SVE2/bgrp.s | 2 +- llvm/test/MC/AArch64/SVE2/bsl.s | 2 +- llvm/test/MC/AArch64/SVE2/bsl1n.s | 2 +- llvm/test/MC/AArch64/SVE2/bsl2n.s | 2 +- llvm/test/MC/AArch64/SVE2/cadd.s | 2 +- llvm/test/MC/AArch64/SVE2/cdot.s | 2 +- llvm/test/MC/AArch64/SVE2/cmla.s | 2 +- llvm/test/MC/AArch64/SVE2/eor3.s | 2 +- llvm/test/MC/AArch64/SVE2/eorbt.s | 2 +- llvm/test/MC/AArch64/SVE2/eortb.s | 2 +- llvm/test/MC/AArch64/SVE2/ext.s | 2 +- llvm/test/MC/AArch64/SVE2/faddp.s | 2 +- llvm/test/MC/AArch64/SVE2/fcvtlt.s | 2 +- llvm/test/MC/AArch64/SVE2/fcvtnt.s | 2 +- llvm/test/MC/AArch64/SVE2/fcvtx.s | 2 +- llvm/test/MC/AArch64/SVE2/fcvtxnt.s | 2 +- llvm/test/MC/AArch64/SVE2/flogb.s | 2 +- llvm/test/MC/AArch64/SVE2/fmaxnmp.s | 2 +- llvm/test/MC/AArch64/SVE2/fmaxp.s | 2 +- llvm/test/MC/AArch64/SVE2/fminnmp.s | 2 +- llvm/test/MC/AArch64/SVE2/fminp.s | 2 +- llvm/test/MC/AArch64/SVE2/fmlalb.s | 2 +- llvm/test/MC/AArch64/SVE2/fmlalt.s | 2 +- llvm/test/MC/AArch64/SVE2/fmlslb.s | 2 +- llvm/test/MC/AArch64/SVE2/fmlslt.s | 2 +- llvm/test/MC/AArch64/SVE2/histcnt.s | 2 +- llvm/test/MC/AArch64/SVE2/histseg.s | 2 +- llvm/test/MC/AArch64/SVE2/ldnt1b.s | 2 +- llvm/test/MC/AArch64/SVE2/ldnt1d.s | 2 +- llvm/test/MC/AArch64/SVE2/ldnt1h.s | 2 +- llvm/test/MC/AArch64/SVE2/ldnt1sb.s | 2 +- llvm/test/MC/AArch64/SVE2/ldnt1sh.s | 2 +- llvm/test/MC/AArch64/SVE2/ldnt1sw.s | 2 +- llvm/test/MC/AArch64/SVE2/ldnt1w.s | 2 +- llvm/test/MC/AArch64/SVE2/match.s | 2 +- llvm/test/MC/AArch64/SVE2/mla.s | 2 +- llvm/test/MC/AArch64/SVE2/mls.s | 2 +- llvm/test/MC/AArch64/SVE2/mul.s | 2 +- llvm/test/MC/AArch64/SVE2/nbsl.s | 2 +- llvm/test/MC/AArch64/SVE2/nmatch.s | 2 +- llvm/test/MC/AArch64/SVE2/pmul.s | 2 +- llvm/test/MC/AArch64/SVE2/pmullb-128.s | 2 +- llvm/test/MC/AArch64/SVE2/pmullb.s | 2 +- llvm/test/MC/AArch64/SVE2/pmullt-128.s | 2 +- llvm/test/MC/AArch64/SVE2/pmullt.s | 2 +- llvm/test/MC/AArch64/SVE2/raddhnb.s | 2 +- llvm/test/MC/AArch64/SVE2/raddhnt.s | 2 +- llvm/test/MC/AArch64/SVE2/rax1.s | 2 +- llvm/test/MC/AArch64/SVE2/rshrnb.s | 2 +- llvm/test/MC/AArch64/SVE2/rshrnt.s | 2 +- llvm/test/MC/AArch64/SVE2/rsubhnb.s | 2 +- llvm/test/MC/AArch64/SVE2/rsubhnt.s | 2 +- llvm/test/MC/AArch64/SVE2/saba.s | 2 +- llvm/test/MC/AArch64/SVE2/sabalb.s | 2 +- llvm/test/MC/AArch64/SVE2/sabalt.s | 2 +- llvm/test/MC/AArch64/SVE2/sabdlb.s | 2 +- llvm/test/MC/AArch64/SVE2/sabdlt.s | 2 +- llvm/test/MC/AArch64/SVE2/sadalp.s | 2 +- llvm/test/MC/AArch64/SVE2/saddlb.s | 2 +- llvm/test/MC/AArch64/SVE2/saddlbt.s | 2 +- llvm/test/MC/AArch64/SVE2/saddlt.s | 2 +- llvm/test/MC/AArch64/SVE2/saddwb.s | 2 +- llvm/test/MC/AArch64/SVE2/saddwt.s | 2 +- llvm/test/MC/AArch64/SVE2/sbclb.s | 2 +- llvm/test/MC/AArch64/SVE2/sbclt.s | 2 +- llvm/test/MC/AArch64/SVE2/shadd.s | 2 +- llvm/test/MC/AArch64/SVE2/shrnb.s | 2 +- llvm/test/MC/AArch64/SVE2/shrnt.s | 2 +- llvm/test/MC/AArch64/SVE2/shsub.s | 2 +- llvm/test/MC/AArch64/SVE2/shsubr.s | 2 +- llvm/test/MC/AArch64/SVE2/sli.s | 2 +- llvm/test/MC/AArch64/SVE2/sm4e.s | 2 +- llvm/test/MC/AArch64/SVE2/sm4ekey.s | 2 +- llvm/test/MC/AArch64/SVE2/smaxp.s | 2 +- llvm/test/MC/AArch64/SVE2/sminp.s | 2 +- llvm/test/MC/AArch64/SVE2/smlalb.s | 2 +- llvm/test/MC/AArch64/SVE2/smlalt.s | 2 +- llvm/test/MC/AArch64/SVE2/smlslb.s | 2 +- llvm/test/MC/AArch64/SVE2/smlslt.s | 2 +- llvm/test/MC/AArch64/SVE2/smulh.s | 2 +- llvm/test/MC/AArch64/SVE2/smullb.s | 2 +- llvm/test/MC/AArch64/SVE2/smullt.s | 2 +- llvm/test/MC/AArch64/SVE2/splice.s | 2 +- llvm/test/MC/AArch64/SVE2/sqabs.s | 2 +- llvm/test/MC/AArch64/SVE2/sqadd.s | 2 +- llvm/test/MC/AArch64/SVE2/sqcadd.s | 2 +- llvm/test/MC/AArch64/SVE2/sqdmlalb.s | 2 +- llvm/test/MC/AArch64/SVE2/sqdmlalbt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqdmlalt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqdmlslb.s | 2 +- llvm/test/MC/AArch64/SVE2/sqdmlslbt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqdmlslt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqdmulh.s | 2 +- llvm/test/MC/AArch64/SVE2/sqdmullb.s | 2 +- llvm/test/MC/AArch64/SVE2/sqdmullt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqneg.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrdcmlah.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrdmlah.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrdmlsh.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrdmulh.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrshl.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrshlr.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrshrnb.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrshrnt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrshrunb.s | 2 +- llvm/test/MC/AArch64/SVE2/sqrshrunt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqshl.s | 2 +- llvm/test/MC/AArch64/SVE2/sqshlr.s | 2 +- llvm/test/MC/AArch64/SVE2/sqshlu.s | 2 +- llvm/test/MC/AArch64/SVE2/sqshrnb.s | 2 +- llvm/test/MC/AArch64/SVE2/sqshrnt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqshrunb.s | 2 +- llvm/test/MC/AArch64/SVE2/sqshrunt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqsub.s | 2 +- llvm/test/MC/AArch64/SVE2/sqsubr.s | 2 +- llvm/test/MC/AArch64/SVE2/sqxtnb.s | 2 +- llvm/test/MC/AArch64/SVE2/sqxtnt.s | 2 +- llvm/test/MC/AArch64/SVE2/sqxtunb.s | 2 +- llvm/test/MC/AArch64/SVE2/sqxtunt.s | 2 +- llvm/test/MC/AArch64/SVE2/srhadd.s | 2 +- llvm/test/MC/AArch64/SVE2/sri.s | 2 +- llvm/test/MC/AArch64/SVE2/srshl.s | 2 +- llvm/test/MC/AArch64/SVE2/srshlr.s | 2 +- llvm/test/MC/AArch64/SVE2/srshr.s | 2 +- llvm/test/MC/AArch64/SVE2/srsra.s | 2 +- llvm/test/MC/AArch64/SVE2/sshllb.s | 2 +- llvm/test/MC/AArch64/SVE2/sshllt.s | 2 +- llvm/test/MC/AArch64/SVE2/ssra.s | 2 +- llvm/test/MC/AArch64/SVE2/ssublb.s | 2 +- llvm/test/MC/AArch64/SVE2/ssublbt.s | 2 +- llvm/test/MC/AArch64/SVE2/ssublt.s | 2 +- llvm/test/MC/AArch64/SVE2/ssubltb.s | 2 +- llvm/test/MC/AArch64/SVE2/ssubwb.s | 2 +- llvm/test/MC/AArch64/SVE2/ssubwt.s | 2 +- llvm/test/MC/AArch64/SVE2/stnt1b.s | 2 +- llvm/test/MC/AArch64/SVE2/stnt1d.s | 2 +- llvm/test/MC/AArch64/SVE2/stnt1h.s | 2 +- llvm/test/MC/AArch64/SVE2/stnt1w.s | 2 +- llvm/test/MC/AArch64/SVE2/subhnb.s | 2 +- llvm/test/MC/AArch64/SVE2/subhnt.s | 2 +- llvm/test/MC/AArch64/SVE2/suqadd.s | 2 +- llvm/test/MC/AArch64/SVE2/tbl.s | 2 +- llvm/test/MC/AArch64/SVE2/tbx.s | 2 +- llvm/test/MC/AArch64/SVE2/uaba.s | 2 +- llvm/test/MC/AArch64/SVE2/uabalb.s | 2 +- llvm/test/MC/AArch64/SVE2/uabalt.s | 2 +- llvm/test/MC/AArch64/SVE2/uabdlb.s | 2 +- llvm/test/MC/AArch64/SVE2/uabdlt.s | 2 +- llvm/test/MC/AArch64/SVE2/uadalp.s | 2 +- llvm/test/MC/AArch64/SVE2/uaddlb.s | 2 +- llvm/test/MC/AArch64/SVE2/uaddlt.s | 2 +- llvm/test/MC/AArch64/SVE2/uaddwb.s | 2 +- llvm/test/MC/AArch64/SVE2/uaddwt.s | 2 +- llvm/test/MC/AArch64/SVE2/uhadd.s | 2 +- llvm/test/MC/AArch64/SVE2/uhsub.s | 2 +- llvm/test/MC/AArch64/SVE2/uhsubr.s | 2 +- llvm/test/MC/AArch64/SVE2/umaxp.s | 2 +- llvm/test/MC/AArch64/SVE2/uminp.s | 2 +- llvm/test/MC/AArch64/SVE2/umlalb.s | 2 +- llvm/test/MC/AArch64/SVE2/umlalt.s | 2 +- llvm/test/MC/AArch64/SVE2/umlslb.s | 2 +- llvm/test/MC/AArch64/SVE2/umlslt.s | 2 +- llvm/test/MC/AArch64/SVE2/umulh.s | 2 +- llvm/test/MC/AArch64/SVE2/umullb.s | 2 +- llvm/test/MC/AArch64/SVE2/umullt.s | 2 +- llvm/test/MC/AArch64/SVE2/uqadd.s | 2 +- llvm/test/MC/AArch64/SVE2/uqrshl.s | 2 +- llvm/test/MC/AArch64/SVE2/uqrshlr.s | 2 +- llvm/test/MC/AArch64/SVE2/uqrshrnb.s | 2 +- llvm/test/MC/AArch64/SVE2/uqrshrnt.s | 2 +- llvm/test/MC/AArch64/SVE2/uqshl.s | 2 +- llvm/test/MC/AArch64/SVE2/uqshlr.s | 2 +- llvm/test/MC/AArch64/SVE2/uqshrnb.s | 2 +- llvm/test/MC/AArch64/SVE2/uqshrnt.s | 2 +- llvm/test/MC/AArch64/SVE2/uqsub.s | 2 +- llvm/test/MC/AArch64/SVE2/uqsubr.s | 2 +- llvm/test/MC/AArch64/SVE2/uqxtnb.s | 2 +- llvm/test/MC/AArch64/SVE2/uqxtnt.s | 2 +- llvm/test/MC/AArch64/SVE2/urecpe.s | 2 +- llvm/test/MC/AArch64/SVE2/urhadd.s | 2 +- llvm/test/MC/AArch64/SVE2/urshl.s | 2 +- llvm/test/MC/AArch64/SVE2/urshlr.s | 2 +- llvm/test/MC/AArch64/SVE2/urshr.s | 2 +- llvm/test/MC/AArch64/SVE2/ursqrte.s | 2 +- llvm/test/MC/AArch64/SVE2/ursra.s | 2 +- llvm/test/MC/AArch64/SVE2/ushllb.s | 2 +- llvm/test/MC/AArch64/SVE2/ushllt.s | 2 +- llvm/test/MC/AArch64/SVE2/usqadd.s | 2 +- llvm/test/MC/AArch64/SVE2/usra.s | 2 +- llvm/test/MC/AArch64/SVE2/usublb.s | 2 +- llvm/test/MC/AArch64/SVE2/usublt.s | 2 +- llvm/test/MC/AArch64/SVE2/usubwb.s | 2 +- llvm/test/MC/AArch64/SVE2/usubwt.s | 2 +- llvm/test/MC/AArch64/SVE2/whilege.s | 2 +- llvm/test/MC/AArch64/SVE2/whilegt.s | 2 +- llvm/test/MC/AArch64/SVE2/whilehi.s | 2 +- llvm/test/MC/AArch64/SVE2/whilehs.s | 2 +- llvm/test/MC/AArch64/SVE2/whilerw.s | 2 +- llvm/test/MC/AArch64/SVE2/whilewr.s | 2 +- llvm/test/MC/AArch64/SVE2/xar.s | 2 +- 608 files changed, 703 insertions(+), 713 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 2682b9b61d8e49..ddbd762c1a4c7d 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -446,11 +446,8 @@ def FeatureEnhancedCounterVirtualization : def FeatureRME : SubtargetFeature<"rme", "HasRME", "true", "Enable Realm Management Extension">; -def FeatureStreamingSVE : SubtargetFeature<"streaming-sve", "HasStreamingSVE", "true", - "Enable instructions that are valid in Streaming SVE execution mode">; - def FeatureSME : SubtargetFeature<"sme", "HasSME", "true", - "Enable Scalable Matrix Extension (SME)", [FeatureStreamingSVE, FeatureBF16]>; + "Enable Scalable Matrix Extension (SME)", [FeatureBF16, FeatureUseScalarIncVL]>; def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true", "Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>; @@ -612,7 +609,7 @@ class AArch64Unsupported { list F; } def SVEUnsupported : AArch64Unsupported { let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, - HasSVE2BitPerm, HasSVEorStreamingSVE, HasSVE2orStreamingSVE]; + HasSVE2BitPerm, HasSVEorSME, HasSVE2orSME]; } def PAUnsupported : AArch64Unsupported { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2652e98fb30b2b..00c83814721831 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -289,7 +289,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, addQRTypeForNEON(MVT::v8bf16); } - if (Subtarget->hasSVE() || Subtarget->hasStreamingSVE()) { + if (Subtarget->hasSVE() || Subtarget->hasSME()) { // Add legal sve predicate types addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); @@ -18867,8 +18867,7 @@ static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); // Don't expand for SVE2 - if (!VT.isScalableVector() || Subtarget->hasSVE2() || - Subtarget->hasStreamingSVE()) + if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME()) return SDValue(); // Don't expand for NEON diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 6132e1f42831ac..f7c2f30d1183f2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7410,7 +7410,7 @@ class SIMDMovAlias { let Inst{20-16} = 0b00001; } @@ -7457,7 +7457,7 @@ multiclass SMov { multiclass UMov { // UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME) // streaming mode. - let Predicates = [HasNEONorStreamingSVE] in { + let Predicates = [HasNEONorSME] in { def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> { let Inst{20-16} = 0b00001; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index bb3f31cf87ff89..1c44976814de37 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3462,7 +3462,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Predicate register by ORRing with itself. if (AArch64::PPRRegClass.contains(DestReg) && AArch64::PPRRegClass.contains(SrcReg)) { - assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) && + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && "Unexpected SVE register."); BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) .addReg(SrcReg) // Pg @@ -3474,7 +3474,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Z register by ORRing with itself. if (AArch64::ZPRRegClass.contains(DestReg) && AArch64::ZPRRegClass.contains(SrcReg)) { - assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) && + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && "Unexpected SVE register."); BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) .addReg(SrcReg) @@ -3485,7 +3485,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Z register pair by copying the individual sub-registers. if (AArch64::ZPR2RegClass.contains(DestReg) && AArch64::ZPR2RegClass.contains(SrcReg)) { - assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) && + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && "Unexpected SVE register."); static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, @@ -3496,7 +3496,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Z register triple by copying the individual sub-registers. if (AArch64::ZPR3RegClass.contains(DestReg) && AArch64::ZPR3RegClass.contains(SrcReg)) { - assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) && + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && "Unexpected SVE register."); static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, AArch64::zsub2}; @@ -3508,7 +3508,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a Z register quad by copying the individual sub-registers. if (AArch64::ZPR4RegClass.contains(DestReg) && AArch64::ZPR4RegClass.contains(SrcReg)) { - assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) && + assert((Subtarget.hasSVE() || Subtarget.hasSME()) && "Unexpected SVE register."); static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, AArch64::zsub2, AArch64::zsub3}; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 5292dda9240b69..ca6d3f20547cd2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -142,23 +142,21 @@ def HasSMEF64 : Predicate<"Subtarget->hasSMEF64()">, AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">; def HasSMEI64 : Predicate<"Subtarget->hasSMEI64()">, AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">; -def HasStreamingSVE : Predicate<"Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(all_of FeatureStreamingSVE), "sme">; // A subset of SVE(2) instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. -def HasSVEorStreamingSVE - : Predicate<"Subtarget->hasSVE() || Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(any_of FeatureSVE, FeatureStreamingSVE), +def HasSVEorSME + : Predicate<"Subtarget->hasSVE() || Subtarget->hasSME()">, + AssemblerPredicate<(any_of FeatureSVE, FeatureSME), "sve or sme">; -def HasSVE2orStreamingSVE - : Predicate<"Subtarget->hasSVE2() || Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(any_of FeatureSVE2, FeatureStreamingSVE), +def HasSVE2orSME + : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, + AssemblerPredicate<(any_of FeatureSVE2, FeatureSME), "sve2 or sme">; // A subset of NEON instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. -def HasNEONorStreamingSVE - : Predicate<"Subtarget->hasNEON() || Subtarget->hasStreamingSVE()">, - AssemblerPredicate<(any_of FeatureNEON, FeatureStreamingSVE), +def HasNEONorSME + : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">, + AssemblerPredicate<(any_of FeatureNEON, FeatureSME), "neon or sme">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicate<(all_of FeatureRCPC), "rcpc">; @@ -943,7 +941,7 @@ def : Pat<(v2f32 (int_aarch64_neon_bfdot VectorIndexS:$idx)>; } -let Predicates = [HasNEONorStreamingSVE, HasBF16] in { +let Predicates = [HasNEONorSME, HasBF16] in { def BFCVT : BF16ToSinglePrecision<"bfcvt">; } @@ -4795,9 +4793,9 @@ defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; -defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>; -defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>; -defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>; +defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>; +defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>; +defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>; defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; @@ -4892,9 +4890,9 @@ defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; -defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>; -defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>; -defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>; +defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>; +defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>; +defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>; defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index eee713ec3857c1..5e60e0fe2cdb1b 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -323,7 +323,7 @@ let Predicates = [HasSVE] in { def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>; defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>; defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>; @@ -344,9 +344,9 @@ let Predicates = [HasSVEorStreamingSVE] in { defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", int_aarch64_sve_eor, DestructiveBinaryComm>; defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", "AND_ZPZZ", int_aarch64_sve_and, DestructiveBinaryComm>; defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", "BIC_ZPZZ", int_aarch64_sve_bic, DestructiveBinary>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME -let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { +let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; @@ -355,9 +355,9 @@ let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; -} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos +} // End HasSVEorSME, UseExperimentalZeroingPseudos -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>; defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>; defm SUBR_ZI : sve_int_arith_imm0_subr<0b011, "subr", sub>; @@ -501,9 +501,9 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FMIN_ZPZZ : sve_fp_bin_pred_hfd; defm FABD_ZPZZ : sve_fp_bin_pred_hfd; defm FDIV_ZPZZ : sve_fp_bin_pred_hfd; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME -let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { +let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; @@ -516,28 +516,28 @@ let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; -} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos +} // End HasSVEorSME, UseExperimentalZeroingPseudos -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>; defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>; defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", AArch64frecps>; defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", AArch64frsqrts>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>; defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>; @@ -593,26 +593,26 @@ let Predicates = [HasSVEorStreamingSVE] in { defm : fma; defm : fma; defm : fma; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>; defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>; defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>; defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // SVE floating point reductions. defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>; defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>; defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>; @@ -706,13 +706,13 @@ let Predicates = [HasSVEorStreamingSVE] in { defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>; @@ -736,13 +736,13 @@ let Predicates = [HasSVEorStreamingSVE] in { defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa", int_aarch64_sve_brkpa_z>; defm BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas", null_frag>; defm BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb", int_aarch64_sve_brkpb_z>; @@ -854,7 +854,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>; defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>; defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // non-faulting continuous load with reg+immediate @@ -894,7 +894,7 @@ let Predicates = [HasSVE] in { defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { // LD(2|3|4) structured loads with reg+immediate defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>; defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>; @@ -922,7 +922,7 @@ let Predicates = [HasSVEorStreamingSVE] in { def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>; def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>; def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // Gathers using unscaled 32-bit offsets, e.g. @@ -1038,7 +1038,7 @@ let Predicates = [HasSVE] in { defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { // Non-temporal contiguous loads (register + immediate) defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>; defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>; @@ -1074,7 +1074,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>; defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>; defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // Scatters using unpacked, unscaled 32-bit offsets, e.g. @@ -1128,7 +1128,7 @@ let Predicates = [HasSVE] in { defm SST1D : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { // ST(2|3|4) structured stores (register + immediate) defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>; defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>; @@ -1209,7 +1209,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm : sve_prefetch; defm : sve_prefetch; defm : sve_prefetch; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { // Gather prefetch using scaled 32-bit offsets, e.g. @@ -1315,7 +1315,7 @@ let Predicates = [HasSVE] in { defm : adrXtwShiftPat; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>; defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>; @@ -1545,7 +1545,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>; defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>; -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>; defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>; defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>; @@ -1642,16 +1642,16 @@ let Predicates = [HasSVEorStreamingSVE] in { defm ASR_ZPZI : sve_int_shift_pred_bhsd; defm LSR_ZPZI : sve_int_shift_pred_bhsd; defm LSL_ZPZI : sve_int_shift_pred_bhsd; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME -let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { +let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; -} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos +} // End HasSVEorSME, UseExperimentalZeroingPseudos -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">; defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">; defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">; @@ -1767,27 +1767,27 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>; defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>; defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME -let Predicates = [HasBF16, HasSVEorStreamingSVE] in { +let Predicates = [HasBF16, HasSVEorSME] in { defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>; defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>; -} // End HasBF16, HasSVEorStreamingSVE +} // End HasBF16, HasSVEorSME let Predicates = [HasBF16, HasSVE] in { defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>; } // End HasBF16, HasSVE -let Predicates = [HasBF16, HasSVEorStreamingSVE] in { +let Predicates = [HasBF16, HasSVEorSME] in { defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>; defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>; defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>; defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>; defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>; defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>; -} // End HasBF16, HasSVEorStreamingSVE +} // End HasBF16, HasSVEorSME -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { // InstAliases def : InstAlias<"mov $Zd, $Zn", (ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>; @@ -2002,7 +2002,7 @@ let Predicates = [HasSVEorStreamingSVE] in { (DECD_ZPiI ZPR:$op, 31, $imm)>; } - let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL], AddedComplexity = 5 in { + let Predicates = [HasSVEorSME, UseScalarIncVL], AddedComplexity = 5 in { def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))), (INCH_XPiI GPR64:$op, 31, $imm)>; def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))), @@ -2421,7 +2421,7 @@ let Predicates = [HasSVEorStreamingSVE] in { // 16-element contiguous loads defm : ld1; -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE] in { multiclass ldnf1 { @@ -2506,7 +2506,7 @@ let Predicates = [HasSVE] in { defm : ldff1; } // End HasSVE -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { multiclass st1 { // reg + reg @@ -2740,7 +2740,7 @@ let Predicates = [HasSVEorStreamingSVE] in { def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; } -} // End HasSVEorStreamingSVE +} // End HasSVEorSME let Predicates = [HasSVE, HasMatMulInt8] in { defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>; @@ -2748,11 +2748,11 @@ let Predicates = [HasSVE, HasMatMulInt8] in { defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>; } // End HasSVE, HasMatMulInt8 -let Predicates = [HasSVEorStreamingSVE, HasMatMulInt8] in { +let Predicates = [HasSVEorSME, HasMatMulInt8] in { defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>; defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>; defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>; -} // End HasSVEorStreamingSVE, HasMatMulInt8 +} // End HasSVEorSME, HasMatMulInt8 let Predicates = [HasSVE, HasMatMulFP32] in { defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>; @@ -2770,16 +2770,16 @@ let Predicates = [HasSVE, HasMatMulFP64] in { defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>; } // End HasSVE, HasMatMulFP64 -let Predicates = [HasSVEorStreamingSVE, HasMatMulFP64] in { +let Predicates = [HasSVEorSME, HasMatMulFP64] in { defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>; defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>; defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>; defm UZP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 1, "uzp2", int_aarch64_sve_uzp2q>; defm TRN1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 0, "trn1", int_aarch64_sve_trn1q>; defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>; -} // End HasSVEorStreamingSVE, HasMatMulFP64 +} // End HasSVEorSME, HasMatMulFP64 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 integer multiply-add (indexed) defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>; defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls", int_aarch64_sve_mls_lane>; @@ -2927,17 +2927,17 @@ let Predicates = [HasSVE2orStreamingSVE] in { defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME -let Predicates = [HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos] in { +let Predicates = [HasSVE2orSME, UseExperimentalZeroingPseudos] in { defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; -} // End HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos +} // End HasSVE2orSME, UseExperimentalZeroingPseudos -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 predicated shifts defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0110, "sqshl", "SQSHL_ZPZI", int_aarch64_sve_sqshl>; defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0111, "uqshl", "UQSHL_ZPZI", int_aarch64_sve_uqshl>; @@ -3050,7 +3050,7 @@ let Predicates = [HasSVE2orStreamingSVE] in { defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt", int_aarch64_sve_sqxtnt>; defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt", int_aarch64_sve_uqxtnt>; defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2] in { // SVE2 character match @@ -3058,7 +3058,7 @@ let Predicates = [HasSVE2] in { defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch", int_aarch64_sve_nmatch>; } // End HasSVE2 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 bitwise exclusive-or interleaved defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt", int_aarch64_sve_eorbt>; defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb", int_aarch64_sve_eortb>; @@ -3073,7 +3073,7 @@ let Predicates = [HasSVE2orStreamingSVE] in { defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>; defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>; defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2] in { // SVE2 histogram generation (segment) @@ -3083,7 +3083,7 @@ let Predicates = [HasSVE2] in { defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt", int_aarch64_sve_histcnt>; } // End HasSVE2 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 floating-point base 2 logarithm as integer defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>; @@ -3125,7 +3125,7 @@ let Predicates = [HasSVE2orStreamingSVE] in { // SVE2 extract vector (immediate offset, constructive) def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2] in { // SVE2 non-temporal gather loads @@ -3144,10 +3144,10 @@ let Predicates = [HasSVE2] in { defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>; } // End HasSVE2 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 vector splice (constructive) defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2] in { // SVE2 non-temporal scatter stores @@ -3161,7 +3161,7 @@ let Predicates = [HasSVE2] in { defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>; } // End HasSVE2 -let Predicates = [HasSVE2orStreamingSVE] in { +let Predicates = [HasSVE2orSME] in { // SVE2 table lookup (three sources) defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>; defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>; @@ -3180,7 +3180,7 @@ let Predicates = [HasSVE2orStreamingSVE] in { // SVE2 pointer conflict compare defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">; defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw", "int_aarch64_sve_whilerw">; -} // End HasSVE2orStreamingSVE +} // End HasSVE2orSME let Predicates = [HasSVE2AES] in { // SVE2 crypto destructive binary operations diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index c40672e4a5f7d9..d81f2c17c80224 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -22,7 +22,7 @@ def A64FXModel : SchedMachineModel { list UnsupportedFeatures = [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth, - HasSVE2orStreamingSVE]; + HasSVE2orSME]; let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 48af5a7b90654b..29d0a8e6282eab 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -332,7 +332,7 @@ multiclass sve_int_ptrue opc, string asm, SDPatternOperator op> { def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>; -let Predicates = [HasSVEorStreamingSVE] in { +let Predicates = [HasSVEorSME] in { defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>; defm PTRUES : sve_int_ptrue<0b001, "ptrues", null_frag>; @@ -995,7 +995,7 @@ class sve_int_pred_pattern_a opc, string asm> multiclass sve_int_pred_pattern_a opc, string asm, SDPatternOperator op, SDPatternOperator opcnt> { - let Predicates = [HasSVEorStreamingSVE] in { + let Predicates = [HasSVEorSME] in { def NAME : sve_int_pred_pattern_a; def : InstAlias opc, string asm, (!cast(NAME) GPR64:$Rdn, 0b11111, 1), 2>; } - let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL] in { + let Predicates = [HasSVEorSME, UseScalarIncVL] in { def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))), (!cast(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll index ab3561c61f4f7a..52ceec011420d7 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; Testing prfop encodings diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll index 78523479dadad8..ecb1c6bc00ef3b 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; SXTB diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll index 72a13029050bf3..dbf3474ae07ae9 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; CLS diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll index c6ba8435c31b1a..9a579dadeb4378 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl < %s | FileCheck %s -check-prefix=USE_SCALAR_INC -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -mattr=+use-scalar-inc-vl < %s | FileCheck %s -check-prefix=USE_SCALAR_INC +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -check-prefix=USE_SCALAR_INC +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s -check-prefix=USE_SCALAR_INC ; ; CNTB diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll index f846e6a345a019..61c31ea6786560 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=1 < %s | FileCheck %s -; RUN: llc -mtriple aarch64 -mattr=+streaming-sve -asm-verbose=1 < %s | FileCheck %s +; RUN: llc -mtriple aarch64 -mattr=+sme -asm-verbose=1 < %s | FileCheck %s ; ; SVCREATE2 (i8) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll index 3665ad0073b6dc..656b8a47ec159b 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; FCVT diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll index 113ddc912f41c3..6ad5afe9f430c6 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple aarch64 -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple aarch64 -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; All these tests create a vector tuple, insert z5 into one of the elements, ; and finally extracts that element from the wide vector to return it. These diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll index ff0edbbc35e8db..0e8cf3d842ce2b 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sme < %s | FileCheck %s ; NOTE: invalid, upper and lower bound immediate values of the regimm ; addressing mode are checked only for the byte version of each diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll index bf496f2b41fb5d..dd2c71befccd2f 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sme < %s | FileCheck %s ; ld2b define @ld2.nxv32i8( %Pg, i8 *%addr, i64 %a) { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll index d28d6ad65a4064..78abe8aabc497c 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sme < %s | FileCheck %s ; NOTE: invalid, upper and lower bound immediate values of the regimm ; addressing mode are checked only for the byte version of each diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll index 57ae39ea91fffa..2c0cd68ffc9cc0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sme < %s | FileCheck %s ; ld2b define { , } @ld2.nxv32i8( %Pg, i8 *%addr, i64 %a) { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll index 86088f959f20c7..59b546f62328d2 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; CNOT diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll index fc9b7e837df056..43749ad37d8c83 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; PTRUE diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll index 533d941f170030..8341e8f68aca66 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; BRKA diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll index 6005928ae107db..58d4e9e944710c 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; PTEST diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll index 3e9a21da0eb750..e19fbe57f96956 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; Converting to svbool_t () diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll index e981742ec61f50..73512a6bb5cc40 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; RBIT diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll index 01e2027b49b3de..55201f6745cc0d 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; SEL (Vectors) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll index 91ead1c07cee69..f89cad65791729 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for ; * @llvm.aarch64.sve.sqinc{b|h|w|d|p}, and diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll index f32f157cbce88c..4631eb7fe700e8 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for ; * @llvm.aarch64.sve.sqinc{b|h|w|d|p}, and diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll index 40c4eb292712df..91486dcc57b985 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; ST1B diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll index e46b0574fe34ad..7593b81e57ef73 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; ST1B diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll index 7bd6d88f3c2986..5c4db6ec7e131f 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s -; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s +; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; ST1B diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll index 43182549e10add..fe435300d9e3d3 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; NOTE: invalid, upper and lower bound immediate values of the reg+imm ; addressing mode are checked only for the byte version of each diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll index ced9a7f88e0aba..e034f0d47b704a 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; ; ST2B diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll index 54805e52ea57d2..ecd12f53c430a7 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; ST2B diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll index 69f7a520a4ce20..d7e6ac42291aa0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; Since UQDEC{B|H|W|D|P} and UQINC{B|H|W|D|P} have identical semantics, the tests for ; * @llvm.aarch64.sve.uqinc{b|h|w|d|p}, and diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll index 9aaa4c4000bb38..96324d90d7ef37 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; Since UQDEC{B|H|W|D|P} and UQINC{B|H|W|D|P} have identical semantics, the tests for ; * @llvm.aarch64.sve.uqinc{b|h|w|d|p}, and diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll index 05c7dbe5e1ec1c..4fefc05cc322df 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; WHILELE diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll index 15a11936685355..053c6cb3ebf2c2 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ADDHNB diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll index 656c68fae49662..d79e767664cbb8 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; SHRNB diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll index bf2fb8c42de701..04c0cd3e9a1f6a 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll index c0c595ef74ef50..8ca1584bc17925 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; ; WHILERW diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll index 9af957f4964222..1c3f52de5afb36 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; FCVTLT diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll index ea923cf744eb38..8c678135abf759 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; ; FLOGB diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll index e780d96a0d2f8e..44da020403b39b 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; FMLALB (Vectors) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll index 8378b8b9a147bf..2c7b50c28f13c7 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; MUL diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll index 332d9f0c5606fd..a15267adc77261 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; ADDP diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll index 16e70504d159c7..8b725f43d20e40 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s ; ; EORBT diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll index b26f4cdfe21b78..4d16a00f07bddb 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; SQXTNB diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll index d3e897ee950285..70bd77994a6d5f 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; CADD diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll index 6a8ff4bda0456e..ced16443cc92f7 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; WHILEGE diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll index cdfb59b509d284..caa53fdc1d9eb6 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; SADDLBT diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll index 2e461f469c48e9..f16a7dabea5664 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; SABALB diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll index 19607e9455f642..5edf240bbee6bb 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s ; ; SADALP diff --git a/llvm/test/MC/AArch64/SME/feature.s b/llvm/test/MC/AArch64/SME/feature.s index 1276f60fe36e42..092ebb4b978e82 100644 --- a/llvm/test/MC/AArch64/SME/feature.s +++ b/llvm/test/MC/AArch64/SME/feature.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+sme-f64 < %s | FileCheck %s // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+sme-i64 < %s | FileCheck %s -// Verify +sme flags imply +streaming-sve +// Verify +sme flags imply streaming compatible SVE instructions. tbx z0.b, z1.b, z2.b // CHECK: tbx z0.b, z1.b, z2.b diff --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-bf16.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-bf16.s index 6aa983ba1093c7..41868a8c790f1a 100644 --- a/llvm/test/MC/AArch64/SME/streaming-mode-neon-bf16.s +++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon-bf16.s @@ -1,15 +1,13 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+streaming-sve,+bf16 < %s \ -// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=-neon,+streaming-sve,+bf16 < %s \ -// RUN: | llvm-objdump --mattr=+bf16 -d - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=-neon,+sme < %s \ +// RUN: | llvm-objdump --mattr=-neon,+sme -d - | FileCheck %s --check-prefix=CHECK-INST // Disassemble encoding and check the re-encoding (-show-encoding) matches. -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+streaming-sve,+bf16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=-neon,+streaming-sve,+bf16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=-neon,+sme -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfcvt h5, s3 diff --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s index 9d838880c56098..6e1d6b8d4d4b7b 100644 --- a/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s +++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon-fp16.s @@ -1,13 +1,13 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+fullfp16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+fullfp16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+streaming-sve,+fullfp16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme,+fullfp16 < %s \ // RUN: | llvm-objdump --mattr=+fullfp16 -d - | FileCheck %s --check-prefix=CHECK-INST // Disassemble encoding and check the re-encoding (-show-encoding) matches. -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+fullfp16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+fullfp16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+streaming-sve,+fullfp16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme,+fullfp16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // Scalar FP instructions diff --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon.s index b744b49766351d..138a1fe0bb8e94 100644 --- a/llvm/test/MC/AArch64/SME/streaming-mode-neon.s +++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon.s @@ -1,15 +1,13 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+streaming-sve < %s \ -// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=-neon,+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=-neon,+sme < %s \ // RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST // Disassemble encoding and check the re-encoding (-show-encoding) matches. -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=-neon,+streaming-sve -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=-neon,+sme -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // Scalar FP instructions diff --git a/llvm/test/MC/AArch64/SVE/abs.s b/llvm/test/MC/AArch64/SVE/abs.s index 58143bc128f0d6..787aa0926ded68 100644 --- a/llvm/test/MC/AArch64/SVE/abs.s +++ b/llvm/test/MC/AArch64/SVE/abs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/add.s b/llvm/test/MC/AArch64/SVE/add.s index 1a57e499c1a78f..867378b2c482c9 100644 --- a/llvm/test/MC/AArch64/SVE/add.s +++ b/llvm/test/MC/AArch64/SVE/add.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/addpl.s b/llvm/test/MC/AArch64/SVE/addpl.s index 7a11345065990d..7610fd1312b5a3 100644 --- a/llvm/test/MC/AArch64/SVE/addpl.s +++ b/llvm/test/MC/AArch64/SVE/addpl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/addvl.s b/llvm/test/MC/AArch64/SVE/addvl.s index 8ac14fae10a221..508d425ee81f95 100644 --- a/llvm/test/MC/AArch64/SVE/addvl.s +++ b/llvm/test/MC/AArch64/SVE/addvl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/and.s b/llvm/test/MC/AArch64/SVE/and.s index ccdc8ebfb0d7a5..dcbd8567b4f53c 100644 --- a/llvm/test/MC/AArch64/SVE/and.s +++ b/llvm/test/MC/AArch64/SVE/and.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ands.s b/llvm/test/MC/AArch64/SVE/ands.s index c5b12f80798148..e4e47eab7815f6 100644 --- a/llvm/test/MC/AArch64/SVE/ands.s +++ b/llvm/test/MC/AArch64/SVE/ands.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/andv.s b/llvm/test/MC/AArch64/SVE/andv.s index ed4f57e5008506..328af774dfa3c8 100644 --- a/llvm/test/MC/AArch64/SVE/andv.s +++ b/llvm/test/MC/AArch64/SVE/andv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/asr.s b/llvm/test/MC/AArch64/SVE/asr.s index 3b9f4f354c5042..6b3bdd328d9192 100644 --- a/llvm/test/MC/AArch64/SVE/asr.s +++ b/llvm/test/MC/AArch64/SVE/asr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/asrd.s b/llvm/test/MC/AArch64/SVE/asrd.s index e5f10f21eb3dbb..ecb190639e695e 100644 --- a/llvm/test/MC/AArch64/SVE/asrd.s +++ b/llvm/test/MC/AArch64/SVE/asrd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/asrr.s b/llvm/test/MC/AArch64/SVE/asrr.s index 5a7b6333bfa524..96d729b5ec6466 100644 --- a/llvm/test/MC/AArch64/SVE/asrr.s +++ b/llvm/test/MC/AArch64/SVE/asrr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/bfcvt.s b/llvm/test/MC/AArch64/SVE/bfcvt.s index d370d2b7cccda0..3ae7028c36bad5 100644 --- a/llvm/test/MC/AArch64/SVE/bfcvt.s +++ b/llvm/test/MC/AArch64/SVE/bfcvt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+bf16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+bf16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/bfcvtnt.s b/llvm/test/MC/AArch64/SVE/bfcvtnt.s index 8751b6f4ed2d8f..5f3b71e28b91e0 100644 --- a/llvm/test/MC/AArch64/SVE/bfcvtnt.s +++ b/llvm/test/MC/AArch64/SVE/bfcvtnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+bf16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+bf16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/bfdot.s b/llvm/test/MC/AArch64/SVE/bfdot.s index c2b48e6a9a48cf..407bd9c9631b6e 100644 --- a/llvm/test/MC/AArch64/SVE/bfdot.s +++ b/llvm/test/MC/AArch64/SVE/bfdot.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+bf16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+bf16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/bfmlal.s b/llvm/test/MC/AArch64/SVE/bfmlal.s index 4395fe89282477..b97fd2edc9890d 100644 --- a/llvm/test/MC/AArch64/SVE/bfmlal.s +++ b/llvm/test/MC/AArch64/SVE/bfmlal.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -o - -triple=aarch64 -show-encoding -mattr=+sve,+bf16 %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve,+bf16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+bf16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -o - -triple=aarch64 -show-encoding %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/bic.s b/llvm/test/MC/AArch64/SVE/bic.s index af7db10944ceac..1be86b0c0f9130 100644 --- a/llvm/test/MC/AArch64/SVE/bic.s +++ b/llvm/test/MC/AArch64/SVE/bic.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/bics.s b/llvm/test/MC/AArch64/SVE/bics.s index 6605480cc473c4..f93e3ac43c4001 100644 --- a/llvm/test/MC/AArch64/SVE/bics.s +++ b/llvm/test/MC/AArch64/SVE/bics.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brka.s b/llvm/test/MC/AArch64/SVE/brka.s index 753299a860c44d..4bf66b8bba920a 100644 --- a/llvm/test/MC/AArch64/SVE/brka.s +++ b/llvm/test/MC/AArch64/SVE/brka.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brkas.s b/llvm/test/MC/AArch64/SVE/brkas.s index 7de38c524c4586..7e9e464e7c4040 100644 --- a/llvm/test/MC/AArch64/SVE/brkas.s +++ b/llvm/test/MC/AArch64/SVE/brkas.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brkb.s b/llvm/test/MC/AArch64/SVE/brkb.s index c28727dc609c0a..f6f471b699bd04 100644 --- a/llvm/test/MC/AArch64/SVE/brkb.s +++ b/llvm/test/MC/AArch64/SVE/brkb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brkbs.s b/llvm/test/MC/AArch64/SVE/brkbs.s index d72c5960765174..c1aeb25bd904c4 100644 --- a/llvm/test/MC/AArch64/SVE/brkbs.s +++ b/llvm/test/MC/AArch64/SVE/brkbs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brkn.s b/llvm/test/MC/AArch64/SVE/brkn.s index 8f8095fc61619e..134cb3e0cbd9f0 100644 --- a/llvm/test/MC/AArch64/SVE/brkn.s +++ b/llvm/test/MC/AArch64/SVE/brkn.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brkns.s b/llvm/test/MC/AArch64/SVE/brkns.s index c380e1c2ba39d1..a154d227463ba4 100644 --- a/llvm/test/MC/AArch64/SVE/brkns.s +++ b/llvm/test/MC/AArch64/SVE/brkns.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brkpa.s b/llvm/test/MC/AArch64/SVE/brkpa.s index e384b805e0f4c4..3afa3ea042be3c 100644 --- a/llvm/test/MC/AArch64/SVE/brkpa.s +++ b/llvm/test/MC/AArch64/SVE/brkpa.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brkpas.s b/llvm/test/MC/AArch64/SVE/brkpas.s index 5cd7863fe36d0c..8aa413cd527df2 100644 --- a/llvm/test/MC/AArch64/SVE/brkpas.s +++ b/llvm/test/MC/AArch64/SVE/brkpas.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brkpb.s b/llvm/test/MC/AArch64/SVE/brkpb.s index 5b0d3ec5acce56..8f4109a4ef3f85 100644 --- a/llvm/test/MC/AArch64/SVE/brkpb.s +++ b/llvm/test/MC/AArch64/SVE/brkpb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/brkpbs.s b/llvm/test/MC/AArch64/SVE/brkpbs.s index f7c9d408709fcb..153b6c59e63c98 100644 --- a/llvm/test/MC/AArch64/SVE/brkpbs.s +++ b/llvm/test/MC/AArch64/SVE/brkpbs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/clasta.s b/llvm/test/MC/AArch64/SVE/clasta.s index bed54441dc0c30..deb1ee9d754409 100644 --- a/llvm/test/MC/AArch64/SVE/clasta.s +++ b/llvm/test/MC/AArch64/SVE/clasta.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/clastb.s b/llvm/test/MC/AArch64/SVE/clastb.s index c564d1d100a4d5..a919ed7e4cea68 100644 --- a/llvm/test/MC/AArch64/SVE/clastb.s +++ b/llvm/test/MC/AArch64/SVE/clastb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cls.s b/llvm/test/MC/AArch64/SVE/cls.s index 127fb2a8678482..0c08556184b570 100644 --- a/llvm/test/MC/AArch64/SVE/cls.s +++ b/llvm/test/MC/AArch64/SVE/cls.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/clz.s b/llvm/test/MC/AArch64/SVE/clz.s index 07d1f9f4b33049..b81efce222a2f6 100644 --- a/llvm/test/MC/AArch64/SVE/clz.s +++ b/llvm/test/MC/AArch64/SVE/clz.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmpeq.s b/llvm/test/MC/AArch64/SVE/cmpeq.s index d5c79f8248a568..c9b1d3cb3cd383 100644 --- a/llvm/test/MC/AArch64/SVE/cmpeq.s +++ b/llvm/test/MC/AArch64/SVE/cmpeq.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmpge.s b/llvm/test/MC/AArch64/SVE/cmpge.s index 14fcfc40f2ac06..8b7990e4975935 100644 --- a/llvm/test/MC/AArch64/SVE/cmpge.s +++ b/llvm/test/MC/AArch64/SVE/cmpge.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmpgt.s b/llvm/test/MC/AArch64/SVE/cmpgt.s index 34366e2110e3c8..3f70d4b8d50191 100644 --- a/llvm/test/MC/AArch64/SVE/cmpgt.s +++ b/llvm/test/MC/AArch64/SVE/cmpgt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmphi.s b/llvm/test/MC/AArch64/SVE/cmphi.s index e2682d3b89ab39..618bdefa4f5cfc 100644 --- a/llvm/test/MC/AArch64/SVE/cmphi.s +++ b/llvm/test/MC/AArch64/SVE/cmphi.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmphs.s b/llvm/test/MC/AArch64/SVE/cmphs.s index bbe3ec323c8206..1082dc03b62a18 100644 --- a/llvm/test/MC/AArch64/SVE/cmphs.s +++ b/llvm/test/MC/AArch64/SVE/cmphs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmple.s b/llvm/test/MC/AArch64/SVE/cmple.s index 8de87478f04b21..0401809bfcfdac 100644 --- a/llvm/test/MC/AArch64/SVE/cmple.s +++ b/llvm/test/MC/AArch64/SVE/cmple.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmplo.s b/llvm/test/MC/AArch64/SVE/cmplo.s index 30a6120c021443..6a12c202001b36 100644 --- a/llvm/test/MC/AArch64/SVE/cmplo.s +++ b/llvm/test/MC/AArch64/SVE/cmplo.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmpls.s b/llvm/test/MC/AArch64/SVE/cmpls.s index 195a4099d2d267..4affbb45971836 100644 --- a/llvm/test/MC/AArch64/SVE/cmpls.s +++ b/llvm/test/MC/AArch64/SVE/cmpls.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmplt.s b/llvm/test/MC/AArch64/SVE/cmplt.s index 93af97cf23f622..5d5605de289246 100644 --- a/llvm/test/MC/AArch64/SVE/cmplt.s +++ b/llvm/test/MC/AArch64/SVE/cmplt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cmpne.s b/llvm/test/MC/AArch64/SVE/cmpne.s index ff7982e41df711..846ea71cb874db 100644 --- a/llvm/test/MC/AArch64/SVE/cmpne.s +++ b/llvm/test/MC/AArch64/SVE/cmpne.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cnot.s b/llvm/test/MC/AArch64/SVE/cnot.s index 29d1c4021be1bc..60521bcdcd3957 100644 --- a/llvm/test/MC/AArch64/SVE/cnot.s +++ b/llvm/test/MC/AArch64/SVE/cnot.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cnt.s b/llvm/test/MC/AArch64/SVE/cnt.s index c63436a1547e2c..7539a35bb0d2d9 100644 --- a/llvm/test/MC/AArch64/SVE/cnt.s +++ b/llvm/test/MC/AArch64/SVE/cnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cntb.s b/llvm/test/MC/AArch64/SVE/cntb.s index f1575f09099f64..22a8fd0e127b8f 100644 --- a/llvm/test/MC/AArch64/SVE/cntb.s +++ b/llvm/test/MC/AArch64/SVE/cntb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cntd.s b/llvm/test/MC/AArch64/SVE/cntd.s index f06544ba9ffa6e..e42aa406764922 100644 --- a/llvm/test/MC/AArch64/SVE/cntd.s +++ b/llvm/test/MC/AArch64/SVE/cntd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cnth.s b/llvm/test/MC/AArch64/SVE/cnth.s index e2ed0e237720f1..d11d88fdcbcda4 100644 --- a/llvm/test/MC/AArch64/SVE/cnth.s +++ b/llvm/test/MC/AArch64/SVE/cnth.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cntp.s b/llvm/test/MC/AArch64/SVE/cntp.s index 07abd09c3128c5..d9fd7ea1f5fef4 100644 --- a/llvm/test/MC/AArch64/SVE/cntp.s +++ b/llvm/test/MC/AArch64/SVE/cntp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/cntw.s b/llvm/test/MC/AArch64/SVE/cntw.s index 6a3d84e84b89e4..282fa45e9982de 100644 --- a/llvm/test/MC/AArch64/SVE/cntw.s +++ b/llvm/test/MC/AArch64/SVE/cntw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/compact.s b/llvm/test/MC/AArch64/SVE/compact.s index 77ebb11643bbc7..0ce3a0b1375def 100644 --- a/llvm/test/MC/AArch64/SVE/compact.s +++ b/llvm/test/MC/AArch64/SVE/compact.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/cpy.s b/llvm/test/MC/AArch64/SVE/cpy.s index a60ece8e188a3f..5095da117ef2d8 100644 --- a/llvm/test/MC/AArch64/SVE/cpy.s +++ b/llvm/test/MC/AArch64/SVE/cpy.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ctermeq.s b/llvm/test/MC/AArch64/SVE/ctermeq.s index a9da41cd58b77f..9e2f1ba25100df 100644 --- a/llvm/test/MC/AArch64/SVE/ctermeq.s +++ b/llvm/test/MC/AArch64/SVE/ctermeq.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ctermne.s b/llvm/test/MC/AArch64/SVE/ctermne.s index 35092ead6a8ecc..762925627ee06c 100644 --- a/llvm/test/MC/AArch64/SVE/ctermne.s +++ b/llvm/test/MC/AArch64/SVE/ctermne.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/decb.s b/llvm/test/MC/AArch64/SVE/decb.s index 02d12bb4f8b8e8..5d31df10df0ac5 100644 --- a/llvm/test/MC/AArch64/SVE/decb.s +++ b/llvm/test/MC/AArch64/SVE/decb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/decd.s b/llvm/test/MC/AArch64/SVE/decd.s index 6a1a2cd2c89e68..5510a1c31fea05 100644 --- a/llvm/test/MC/AArch64/SVE/decd.s +++ b/llvm/test/MC/AArch64/SVE/decd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/dech.s b/llvm/test/MC/AArch64/SVE/dech.s index f89ad118f525b6..20602876dc22c0 100644 --- a/llvm/test/MC/AArch64/SVE/dech.s +++ b/llvm/test/MC/AArch64/SVE/dech.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/decp.s b/llvm/test/MC/AArch64/SVE/decp.s index bae5fabbd351f8..dd01bafd2c51c0 100644 --- a/llvm/test/MC/AArch64/SVE/decp.s +++ b/llvm/test/MC/AArch64/SVE/decp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/decw.s b/llvm/test/MC/AArch64/SVE/decw.s index d064c39193b59e..0f336e00e76ba6 100644 --- a/llvm/test/MC/AArch64/SVE/decw.s +++ b/llvm/test/MC/AArch64/SVE/decw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/dup.s b/llvm/test/MC/AArch64/SVE/dup.s index dd7077e9d79071..b5207d22ab472e 100644 --- a/llvm/test/MC/AArch64/SVE/dup.s +++ b/llvm/test/MC/AArch64/SVE/dup.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/dupm.s b/llvm/test/MC/AArch64/SVE/dupm.s index ad4678ff656f08..01ef42651abfbe 100644 --- a/llvm/test/MC/AArch64/SVE/dupm.s +++ b/llvm/test/MC/AArch64/SVE/dupm.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/eon.s b/llvm/test/MC/AArch64/SVE/eon.s index cf66bb6deacd9e..7842dfcbd77d80 100644 --- a/llvm/test/MC/AArch64/SVE/eon.s +++ b/llvm/test/MC/AArch64/SVE/eon.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/eor.s b/llvm/test/MC/AArch64/SVE/eor.s index 5b38312b84fdc9..9827eddc00ba23 100644 --- a/llvm/test/MC/AArch64/SVE/eor.s +++ b/llvm/test/MC/AArch64/SVE/eor.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/eors.s b/llvm/test/MC/AArch64/SVE/eors.s index 4ec189c867bc80..97fd3c3025ce4f 100644 --- a/llvm/test/MC/AArch64/SVE/eors.s +++ b/llvm/test/MC/AArch64/SVE/eors.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/eorv.s b/llvm/test/MC/AArch64/SVE/eorv.s index 3a85be1bdf67bf..c76cf2d01a6c26 100644 --- a/llvm/test/MC/AArch64/SVE/eorv.s +++ b/llvm/test/MC/AArch64/SVE/eorv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ext.s b/llvm/test/MC/AArch64/SVE/ext.s index 733e5ee8ca7bda..d07a40497607c3 100644 --- a/llvm/test/MC/AArch64/SVE/ext.s +++ b/llvm/test/MC/AArch64/SVE/ext.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fabd.s b/llvm/test/MC/AArch64/SVE/fabd.s index d215cc4542e5ce..0d8586c9f7dd8b 100644 --- a/llvm/test/MC/AArch64/SVE/fabd.s +++ b/llvm/test/MC/AArch64/SVE/fabd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fabs.s b/llvm/test/MC/AArch64/SVE/fabs.s index c83db482d53778..2c55138587fb55 100644 --- a/llvm/test/MC/AArch64/SVE/fabs.s +++ b/llvm/test/MC/AArch64/SVE/fabs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/facge.s b/llvm/test/MC/AArch64/SVE/facge.s index e08deb567b3e51..b0d8f41fb09052 100644 --- a/llvm/test/MC/AArch64/SVE/facge.s +++ b/llvm/test/MC/AArch64/SVE/facge.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/facgt.s b/llvm/test/MC/AArch64/SVE/facgt.s index 4c2a01bbf61401..bcd842284681df 100644 --- a/llvm/test/MC/AArch64/SVE/facgt.s +++ b/llvm/test/MC/AArch64/SVE/facgt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/facle.s b/llvm/test/MC/AArch64/SVE/facle.s index a1b1340caf9a38..61e33b5805ae36 100644 --- a/llvm/test/MC/AArch64/SVE/facle.s +++ b/llvm/test/MC/AArch64/SVE/facle.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/faclt.s b/llvm/test/MC/AArch64/SVE/faclt.s index 7ba7e2cea92374..07e9aca0e8acfb 100644 --- a/llvm/test/MC/AArch64/SVE/faclt.s +++ b/llvm/test/MC/AArch64/SVE/faclt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fadd.s b/llvm/test/MC/AArch64/SVE/fadd.s index e44e4e9bf25fb9..46d0f0f5f35dc4 100644 --- a/llvm/test/MC/AArch64/SVE/fadd.s +++ b/llvm/test/MC/AArch64/SVE/fadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fadda.s b/llvm/test/MC/AArch64/SVE/fadda.s index 92e0f61a751d6b..aa4c2f21106768 100644 --- a/llvm/test/MC/AArch64/SVE/fadda.s +++ b/llvm/test/MC/AArch64/SVE/fadda.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/faddv.s b/llvm/test/MC/AArch64/SVE/faddv.s index 9a6056d91889ec..777898773a5262 100644 --- a/llvm/test/MC/AArch64/SVE/faddv.s +++ b/llvm/test/MC/AArch64/SVE/faddv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcadd.s b/llvm/test/MC/AArch64/SVE/fcadd.s index d10888de89956f..d7a2661ae73185 100644 --- a/llvm/test/MC/AArch64/SVE/fcadd.s +++ b/llvm/test/MC/AArch64/SVE/fcadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcmeq.s b/llvm/test/MC/AArch64/SVE/fcmeq.s index a523546141eb91..f7bc88619d2937 100644 --- a/llvm/test/MC/AArch64/SVE/fcmeq.s +++ b/llvm/test/MC/AArch64/SVE/fcmeq.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcmge.s b/llvm/test/MC/AArch64/SVE/fcmge.s index 9500c79affbc0d..8fd6b74a6d67e0 100644 --- a/llvm/test/MC/AArch64/SVE/fcmge.s +++ b/llvm/test/MC/AArch64/SVE/fcmge.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcmgt.s b/llvm/test/MC/AArch64/SVE/fcmgt.s index e352a5e17f0762..cd63009f5a21d0 100644 --- a/llvm/test/MC/AArch64/SVE/fcmgt.s +++ b/llvm/test/MC/AArch64/SVE/fcmgt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcmla.s b/llvm/test/MC/AArch64/SVE/fcmla.s index e3952e1943bef8..b7adae1905ef99 100644 --- a/llvm/test/MC/AArch64/SVE/fcmla.s +++ b/llvm/test/MC/AArch64/SVE/fcmla.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcmle.s b/llvm/test/MC/AArch64/SVE/fcmle.s index 95ed55a87f885a..66ac60a139debf 100644 --- a/llvm/test/MC/AArch64/SVE/fcmle.s +++ b/llvm/test/MC/AArch64/SVE/fcmle.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcmlt.s b/llvm/test/MC/AArch64/SVE/fcmlt.s index a145b386aef04e..f0d8c84facea4e 100644 --- a/llvm/test/MC/AArch64/SVE/fcmlt.s +++ b/llvm/test/MC/AArch64/SVE/fcmlt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcmne.s b/llvm/test/MC/AArch64/SVE/fcmne.s index 355044df20fb30..a54d59c5ee810a 100644 --- a/llvm/test/MC/AArch64/SVE/fcmne.s +++ b/llvm/test/MC/AArch64/SVE/fcmne.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcmuo.s b/llvm/test/MC/AArch64/SVE/fcmuo.s index d75ab77c18ca8b..890949b3315cfe 100644 --- a/llvm/test/MC/AArch64/SVE/fcmuo.s +++ b/llvm/test/MC/AArch64/SVE/fcmuo.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcpy.s b/llvm/test/MC/AArch64/SVE/fcpy.s index f2c4776ccf5289..82cbbcbc817071 100644 --- a/llvm/test/MC/AArch64/SVE/fcpy.s +++ b/llvm/test/MC/AArch64/SVE/fcpy.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcvt.s b/llvm/test/MC/AArch64/SVE/fcvt.s index d68d4f94945068..6aef58d8579bd8 100644 --- a/llvm/test/MC/AArch64/SVE/fcvt.s +++ b/llvm/test/MC/AArch64/SVE/fcvt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcvtzs.s b/llvm/test/MC/AArch64/SVE/fcvtzs.s index 84ad36694ce809..70f95e020da21c 100644 --- a/llvm/test/MC/AArch64/SVE/fcvtzs.s +++ b/llvm/test/MC/AArch64/SVE/fcvtzs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fcvtzu.s b/llvm/test/MC/AArch64/SVE/fcvtzu.s index bcab336eb3f8e0..324febd7c4ec22 100644 --- a/llvm/test/MC/AArch64/SVE/fcvtzu.s +++ b/llvm/test/MC/AArch64/SVE/fcvtzu.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fdiv.s b/llvm/test/MC/AArch64/SVE/fdiv.s index 8b137bdeb7b6c9..3a85d98d9602ce 100644 --- a/llvm/test/MC/AArch64/SVE/fdiv.s +++ b/llvm/test/MC/AArch64/SVE/fdiv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fdivr.s b/llvm/test/MC/AArch64/SVE/fdivr.s index 864e46271710b0..209c8568b1ea34 100644 --- a/llvm/test/MC/AArch64/SVE/fdivr.s +++ b/llvm/test/MC/AArch64/SVE/fdivr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fdup.s b/llvm/test/MC/AArch64/SVE/fdup.s index 7e68f5aaf72b88..5940000a4b3b0b 100644 --- a/llvm/test/MC/AArch64/SVE/fdup.s +++ b/llvm/test/MC/AArch64/SVE/fdup.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fexpa.s b/llvm/test/MC/AArch64/SVE/fexpa.s index e2d9c1883867c5..02cd159ab7a3dc 100644 --- a/llvm/test/MC/AArch64/SVE/fexpa.s +++ b/llvm/test/MC/AArch64/SVE/fexpa.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/fmad.s b/llvm/test/MC/AArch64/SVE/fmad.s index 3b8bdedc378d48..460b1e1ef18a65 100644 --- a/llvm/test/MC/AArch64/SVE/fmad.s +++ b/llvm/test/MC/AArch64/SVE/fmad.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmax.s b/llvm/test/MC/AArch64/SVE/fmax.s index be2f012f40d9f3..39be0865706f5c 100644 --- a/llvm/test/MC/AArch64/SVE/fmax.s +++ b/llvm/test/MC/AArch64/SVE/fmax.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmaxnm.s b/llvm/test/MC/AArch64/SVE/fmaxnm.s index f5b10bf20c94c2..e96eec5706a600 100644 --- a/llvm/test/MC/AArch64/SVE/fmaxnm.s +++ b/llvm/test/MC/AArch64/SVE/fmaxnm.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmaxnmv.s b/llvm/test/MC/AArch64/SVE/fmaxnmv.s index 92f4fd7294a847..0ce176af2165dc 100644 --- a/llvm/test/MC/AArch64/SVE/fmaxnmv.s +++ b/llvm/test/MC/AArch64/SVE/fmaxnmv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmaxv.s b/llvm/test/MC/AArch64/SVE/fmaxv.s index 09ea407e8b2aef..7d700cc4eb526d 100644 --- a/llvm/test/MC/AArch64/SVE/fmaxv.s +++ b/llvm/test/MC/AArch64/SVE/fmaxv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmin.s b/llvm/test/MC/AArch64/SVE/fmin.s index 16717fd704a420..c238a242bff4bf 100644 --- a/llvm/test/MC/AArch64/SVE/fmin.s +++ b/llvm/test/MC/AArch64/SVE/fmin.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fminnm.s b/llvm/test/MC/AArch64/SVE/fminnm.s index 968a4d3cf00a68..62844267ad47da 100644 --- a/llvm/test/MC/AArch64/SVE/fminnm.s +++ b/llvm/test/MC/AArch64/SVE/fminnm.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fminnmv.s b/llvm/test/MC/AArch64/SVE/fminnmv.s index 0c152446bb109a..a6aa3ac16ff4eb 100644 --- a/llvm/test/MC/AArch64/SVE/fminnmv.s +++ b/llvm/test/MC/AArch64/SVE/fminnmv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fminv.s b/llvm/test/MC/AArch64/SVE/fminv.s index 67079ebee72010..df364f44f9071e 100644 --- a/llvm/test/MC/AArch64/SVE/fminv.s +++ b/llvm/test/MC/AArch64/SVE/fminv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmla.s b/llvm/test/MC/AArch64/SVE/fmla.s index 17f32f4b5c4002..44c52dee488137 100644 --- a/llvm/test/MC/AArch64/SVE/fmla.s +++ b/llvm/test/MC/AArch64/SVE/fmla.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmls.s b/llvm/test/MC/AArch64/SVE/fmls.s index 3725ee1a4cf971..a8077ab50e6761 100644 --- a/llvm/test/MC/AArch64/SVE/fmls.s +++ b/llvm/test/MC/AArch64/SVE/fmls.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmov.s b/llvm/test/MC/AArch64/SVE/fmov.s index 41453acab3813c..e4f23ae723c07c 100644 --- a/llvm/test/MC/AArch64/SVE/fmov.s +++ b/llvm/test/MC/AArch64/SVE/fmov.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmsb.s b/llvm/test/MC/AArch64/SVE/fmsb.s index 58413f2387a2fa..5999f24116efe4 100644 --- a/llvm/test/MC/AArch64/SVE/fmsb.s +++ b/llvm/test/MC/AArch64/SVE/fmsb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmul.s b/llvm/test/MC/AArch64/SVE/fmul.s index 9d5c7ff683ecc0..9a098ce08ba69b 100644 --- a/llvm/test/MC/AArch64/SVE/fmul.s +++ b/llvm/test/MC/AArch64/SVE/fmul.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fmulx.s b/llvm/test/MC/AArch64/SVE/fmulx.s index e2f5a008959762..16ada6d6d15e8d 100644 --- a/llvm/test/MC/AArch64/SVE/fmulx.s +++ b/llvm/test/MC/AArch64/SVE/fmulx.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fneg.s b/llvm/test/MC/AArch64/SVE/fneg.s index 96b4ebd2fbddfc..16a0614adf91a9 100644 --- a/llvm/test/MC/AArch64/SVE/fneg.s +++ b/llvm/test/MC/AArch64/SVE/fneg.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fnmad.s b/llvm/test/MC/AArch64/SVE/fnmad.s index c2efaf73a44566..8e0cb93dce83c5 100644 --- a/llvm/test/MC/AArch64/SVE/fnmad.s +++ b/llvm/test/MC/AArch64/SVE/fnmad.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fnmla.s b/llvm/test/MC/AArch64/SVE/fnmla.s index 5dc4da4960fa13..a3ad56842e13f4 100644 --- a/llvm/test/MC/AArch64/SVE/fnmla.s +++ b/llvm/test/MC/AArch64/SVE/fnmla.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fnmls.s b/llvm/test/MC/AArch64/SVE/fnmls.s index 09b53ae18d4089..8b7643ac69b8cf 100644 --- a/llvm/test/MC/AArch64/SVE/fnmls.s +++ b/llvm/test/MC/AArch64/SVE/fnmls.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fnmsb.s b/llvm/test/MC/AArch64/SVE/fnmsb.s index c114e1e646820d..70aaa332618286 100644 --- a/llvm/test/MC/AArch64/SVE/fnmsb.s +++ b/llvm/test/MC/AArch64/SVE/fnmsb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frecpe.s b/llvm/test/MC/AArch64/SVE/frecpe.s index 61883653cfd969..201ef50e7005cd 100644 --- a/llvm/test/MC/AArch64/SVE/frecpe.s +++ b/llvm/test/MC/AArch64/SVE/frecpe.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frecps.s b/llvm/test/MC/AArch64/SVE/frecps.s index 97ac6d1fa88351..6260073aff2857 100644 --- a/llvm/test/MC/AArch64/SVE/frecps.s +++ b/llvm/test/MC/AArch64/SVE/frecps.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frecpx.s b/llvm/test/MC/AArch64/SVE/frecpx.s index 66c717ec005c61..ce57bf8ec4ea8a 100644 --- a/llvm/test/MC/AArch64/SVE/frecpx.s +++ b/llvm/test/MC/AArch64/SVE/frecpx.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frinta.s b/llvm/test/MC/AArch64/SVE/frinta.s index e30b83f417b307..ac4ba259d97414 100644 --- a/llvm/test/MC/AArch64/SVE/frinta.s +++ b/llvm/test/MC/AArch64/SVE/frinta.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frinti.s b/llvm/test/MC/AArch64/SVE/frinti.s index 82392b67abb8c5..ec1775588c52db 100644 --- a/llvm/test/MC/AArch64/SVE/frinti.s +++ b/llvm/test/MC/AArch64/SVE/frinti.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frintm.s b/llvm/test/MC/AArch64/SVE/frintm.s index b5271fc52f1110..3ea992d3292b68 100644 --- a/llvm/test/MC/AArch64/SVE/frintm.s +++ b/llvm/test/MC/AArch64/SVE/frintm.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frintn.s b/llvm/test/MC/AArch64/SVE/frintn.s index 47666251ebff49..5efced437e2224 100644 --- a/llvm/test/MC/AArch64/SVE/frintn.s +++ b/llvm/test/MC/AArch64/SVE/frintn.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frintp.s b/llvm/test/MC/AArch64/SVE/frintp.s index 06da956fcc1b44..10c4c276b85c15 100644 --- a/llvm/test/MC/AArch64/SVE/frintp.s +++ b/llvm/test/MC/AArch64/SVE/frintp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frintx.s b/llvm/test/MC/AArch64/SVE/frintx.s index e0099ed03f875c..bdac6dac85bdb0 100644 --- a/llvm/test/MC/AArch64/SVE/frintx.s +++ b/llvm/test/MC/AArch64/SVE/frintx.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frintz.s b/llvm/test/MC/AArch64/SVE/frintz.s index 35e6df4c39c8d4..edb1b7d8c09c31 100644 --- a/llvm/test/MC/AArch64/SVE/frintz.s +++ b/llvm/test/MC/AArch64/SVE/frintz.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frsqrte.s b/llvm/test/MC/AArch64/SVE/frsqrte.s index 49a81f4419a2d9..305e67985fe369 100644 --- a/llvm/test/MC/AArch64/SVE/frsqrte.s +++ b/llvm/test/MC/AArch64/SVE/frsqrte.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/frsqrts.s b/llvm/test/MC/AArch64/SVE/frsqrts.s index 2064fc330ac80d..84565e507d7c85 100644 --- a/llvm/test/MC/AArch64/SVE/frsqrts.s +++ b/llvm/test/MC/AArch64/SVE/frsqrts.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fscale.s b/llvm/test/MC/AArch64/SVE/fscale.s index 68f24d75a7e869..62ec9acd470c97 100644 --- a/llvm/test/MC/AArch64/SVE/fscale.s +++ b/llvm/test/MC/AArch64/SVE/fscale.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fsqrt.s b/llvm/test/MC/AArch64/SVE/fsqrt.s index 235bd63af45229..fa284d09288838 100644 --- a/llvm/test/MC/AArch64/SVE/fsqrt.s +++ b/llvm/test/MC/AArch64/SVE/fsqrt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fsub.s b/llvm/test/MC/AArch64/SVE/fsub.s index 51daccc437fad4..144ceb99f3976e 100644 --- a/llvm/test/MC/AArch64/SVE/fsub.s +++ b/llvm/test/MC/AArch64/SVE/fsub.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/fsubr.s b/llvm/test/MC/AArch64/SVE/fsubr.s index 975a9285c92e0b..3437dd1b3d2683 100644 --- a/llvm/test/MC/AArch64/SVE/fsubr.s +++ b/llvm/test/MC/AArch64/SVE/fsubr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ftsmul.s b/llvm/test/MC/AArch64/SVE/ftsmul.s index 5af0438076edcf..a4ef17c41f6386 100644 --- a/llvm/test/MC/AArch64/SVE/ftsmul.s +++ b/llvm/test/MC/AArch64/SVE/ftsmul.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ftssel.s b/llvm/test/MC/AArch64/SVE/ftssel.s index c463c035eb0ea4..6db707684013fb 100644 --- a/llvm/test/MC/AArch64/SVE/ftssel.s +++ b/llvm/test/MC/AArch64/SVE/ftssel.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/incb.s b/llvm/test/MC/AArch64/SVE/incb.s index 6dbdb8b2ac91ce..0e768fa4f03c16 100644 --- a/llvm/test/MC/AArch64/SVE/incb.s +++ b/llvm/test/MC/AArch64/SVE/incb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/incd.s b/llvm/test/MC/AArch64/SVE/incd.s index 5275ed749947a9..2e98d98b21b391 100644 --- a/llvm/test/MC/AArch64/SVE/incd.s +++ b/llvm/test/MC/AArch64/SVE/incd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/inch.s b/llvm/test/MC/AArch64/SVE/inch.s index 6032830045a3d5..9537c417911a00 100644 --- a/llvm/test/MC/AArch64/SVE/inch.s +++ b/llvm/test/MC/AArch64/SVE/inch.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/incp.s b/llvm/test/MC/AArch64/SVE/incp.s index 902befe4f03d82..47c3e26c57cd2c 100644 --- a/llvm/test/MC/AArch64/SVE/incp.s +++ b/llvm/test/MC/AArch64/SVE/incp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/incw.s b/llvm/test/MC/AArch64/SVE/incw.s index c4514f369fcc8a..815449ce4405e7 100644 --- a/llvm/test/MC/AArch64/SVE/incw.s +++ b/llvm/test/MC/AArch64/SVE/incw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/index.s b/llvm/test/MC/AArch64/SVE/index.s index f4aed61b328636..ec49fb9a3f2932 100644 --- a/llvm/test/MC/AArch64/SVE/index.s +++ b/llvm/test/MC/AArch64/SVE/index.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/insr.s b/llvm/test/MC/AArch64/SVE/insr.s index 37b00db8cdd9f9..1211ebaf3545b3 100644 --- a/llvm/test/MC/AArch64/SVE/insr.s +++ b/llvm/test/MC/AArch64/SVE/insr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/lasta.s b/llvm/test/MC/AArch64/SVE/lasta.s index d6beda10b8b10b..f8087a973202ab 100644 --- a/llvm/test/MC/AArch64/SVE/lasta.s +++ b/llvm/test/MC/AArch64/SVE/lasta.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/lastb.s b/llvm/test/MC/AArch64/SVE/lastb.s index 7f581fcb42c4c4..12cbccdf399297 100644 --- a/llvm/test/MC/AArch64/SVE/lastb.s +++ b/llvm/test/MC/AArch64/SVE/lastb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1b-sve-only.s b/llvm/test/MC/AArch64/SVE/ld1b-sve-only.s index 2e6dd362d224fe..9e2f0b0a202338 100644 --- a/llvm/test/MC/AArch64/SVE/ld1b-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/ld1b-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ld1b.s b/llvm/test/MC/AArch64/SVE/ld1b.s index cd07e49c2fa1b2..8d401c3d0378d4 100644 --- a/llvm/test/MC/AArch64/SVE/ld1b.s +++ b/llvm/test/MC/AArch64/SVE/ld1b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1d-sve-only.s b/llvm/test/MC/AArch64/SVE/ld1d-sve-only.s index 667ba2b98a095a..7d6844f0df2fc4 100644 --- a/llvm/test/MC/AArch64/SVE/ld1d-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/ld1d-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ld1d.s b/llvm/test/MC/AArch64/SVE/ld1d.s index e3ce674fc0bb13..b58d6014cec4b1 100644 --- a/llvm/test/MC/AArch64/SVE/ld1d.s +++ b/llvm/test/MC/AArch64/SVE/ld1d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1h-sve-only.s b/llvm/test/MC/AArch64/SVE/ld1h-sve-only.s index 16215c768e57ea..22f09b45ad3699 100644 --- a/llvm/test/MC/AArch64/SVE/ld1h-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/ld1h-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ld1h.s b/llvm/test/MC/AArch64/SVE/ld1h.s index 45cfabfb84f91f..94d6368135126a 100644 --- a/llvm/test/MC/AArch64/SVE/ld1h.s +++ b/llvm/test/MC/AArch64/SVE/ld1h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rb.s b/llvm/test/MC/AArch64/SVE/ld1rb.s index 536b9841bf3c0f..c2e61912435cee 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rb.s +++ b/llvm/test/MC/AArch64/SVE/ld1rb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rd.s b/llvm/test/MC/AArch64/SVE/ld1rd.s index 9a32d2dc6e7742..4b01c69f2c1925 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rd.s +++ b/llvm/test/MC/AArch64/SVE/ld1rd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rh.s b/llvm/test/MC/AArch64/SVE/ld1rh.s index 9d5544e6c364f0..acc3385af28dbe 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rh.s +++ b/llvm/test/MC/AArch64/SVE/ld1rh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rqb.s b/llvm/test/MC/AArch64/SVE/ld1rqb.s index 9ed90e8bdaa2d0..9e51902fe97eac 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rqb.s +++ b/llvm/test/MC/AArch64/SVE/ld1rqb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rqd.s b/llvm/test/MC/AArch64/SVE/ld1rqd.s index f9a1abe0df8ef9..e73ffaae7b243c 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rqd.s +++ b/llvm/test/MC/AArch64/SVE/ld1rqd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rqh.s b/llvm/test/MC/AArch64/SVE/ld1rqh.s index 87fb94a0154f23..5d7e5d9f9cf7aa 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rqh.s +++ b/llvm/test/MC/AArch64/SVE/ld1rqh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rqw.s b/llvm/test/MC/AArch64/SVE/ld1rqw.s index ed2d25da5a8cdc..498d31a0d2f9ae 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rqw.s +++ b/llvm/test/MC/AArch64/SVE/ld1rqw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rsb.s b/llvm/test/MC/AArch64/SVE/ld1rsb.s index 923869afacd08c..2b36d9e0d2fd6d 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rsb.s +++ b/llvm/test/MC/AArch64/SVE/ld1rsb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rsh.s b/llvm/test/MC/AArch64/SVE/ld1rsh.s index 5f5ca153691ba2..239c84fc85ca96 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rsh.s +++ b/llvm/test/MC/AArch64/SVE/ld1rsh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rsw.s b/llvm/test/MC/AArch64/SVE/ld1rsw.s index c7f9be9cfbafd6..cdb125bfe44e65 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rsw.s +++ b/llvm/test/MC/AArch64/SVE/ld1rsw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1rw.s b/llvm/test/MC/AArch64/SVE/ld1rw.s index f632f6cb5e0944..a58e6a77975357 100644 --- a/llvm/test/MC/AArch64/SVE/ld1rw.s +++ b/llvm/test/MC/AArch64/SVE/ld1rw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1sb-sve-only.s b/llvm/test/MC/AArch64/SVE/ld1sb-sve-only.s index d37c3a126f8b17..1c29c373bd522a 100644 --- a/llvm/test/MC/AArch64/SVE/ld1sb-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/ld1sb-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ld1sb.s b/llvm/test/MC/AArch64/SVE/ld1sb.s index 9f1f47654c5ce4..2c32742dbff071 100644 --- a/llvm/test/MC/AArch64/SVE/ld1sb.s +++ b/llvm/test/MC/AArch64/SVE/ld1sb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1sh-sve-only.s b/llvm/test/MC/AArch64/SVE/ld1sh-sve-only.s index 6ff35165dc1958..403ff85a2367cb 100644 --- a/llvm/test/MC/AArch64/SVE/ld1sh-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/ld1sh-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ld1sh.s b/llvm/test/MC/AArch64/SVE/ld1sh.s index 872e03fe10b1ed..78474e0337f67c 100644 --- a/llvm/test/MC/AArch64/SVE/ld1sh.s +++ b/llvm/test/MC/AArch64/SVE/ld1sh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1sw-sve-only.s b/llvm/test/MC/AArch64/SVE/ld1sw-sve-only.s index 8da9f90a0695f8..81b5eacd907b2c 100644 --- a/llvm/test/MC/AArch64/SVE/ld1sw-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/ld1sw-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ld1sw.s b/llvm/test/MC/AArch64/SVE/ld1sw.s index 645f6b4fed8a07..5e59edef66bfdc 100644 --- a/llvm/test/MC/AArch64/SVE/ld1sw.s +++ b/llvm/test/MC/AArch64/SVE/ld1sw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld1w-sve-only.s b/llvm/test/MC/AArch64/SVE/ld1w-sve-only.s index 7d683177ffe7d8..d78d54705c645a 100644 --- a/llvm/test/MC/AArch64/SVE/ld1w-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/ld1w-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ld1w.s b/llvm/test/MC/AArch64/SVE/ld1w.s index 8f82b3a433fcf4..31948666501046 100644 --- a/llvm/test/MC/AArch64/SVE/ld1w.s +++ b/llvm/test/MC/AArch64/SVE/ld1w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld2b.s b/llvm/test/MC/AArch64/SVE/ld2b.s index a756b4afb55973..b12d904652d7f9 100644 --- a/llvm/test/MC/AArch64/SVE/ld2b.s +++ b/llvm/test/MC/AArch64/SVE/ld2b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld2d.s b/llvm/test/MC/AArch64/SVE/ld2d.s index 6142a3a664b11c..fcf8bbf39af403 100644 --- a/llvm/test/MC/AArch64/SVE/ld2d.s +++ b/llvm/test/MC/AArch64/SVE/ld2d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld2h.s b/llvm/test/MC/AArch64/SVE/ld2h.s index f92efb7099235b..af6c74bedbc014 100644 --- a/llvm/test/MC/AArch64/SVE/ld2h.s +++ b/llvm/test/MC/AArch64/SVE/ld2h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld2w.s b/llvm/test/MC/AArch64/SVE/ld2w.s index 355071df8b53ac..db74d2e3847c06 100644 --- a/llvm/test/MC/AArch64/SVE/ld2w.s +++ b/llvm/test/MC/AArch64/SVE/ld2w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld3b.s b/llvm/test/MC/AArch64/SVE/ld3b.s index b16e7b0e3e60ed..610b69b3c45b4c 100644 --- a/llvm/test/MC/AArch64/SVE/ld3b.s +++ b/llvm/test/MC/AArch64/SVE/ld3b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld3d.s b/llvm/test/MC/AArch64/SVE/ld3d.s index 7840a3e0f8e4aa..1400c308ae11ae 100644 --- a/llvm/test/MC/AArch64/SVE/ld3d.s +++ b/llvm/test/MC/AArch64/SVE/ld3d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld3h.s b/llvm/test/MC/AArch64/SVE/ld3h.s index 5304481ebc5150..820a7fac2366c4 100644 --- a/llvm/test/MC/AArch64/SVE/ld3h.s +++ b/llvm/test/MC/AArch64/SVE/ld3h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld3w.s b/llvm/test/MC/AArch64/SVE/ld3w.s index 46d514ab95aae8..1fca86d72f9e40 100644 --- a/llvm/test/MC/AArch64/SVE/ld3w.s +++ b/llvm/test/MC/AArch64/SVE/ld3w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld4b.s b/llvm/test/MC/AArch64/SVE/ld4b.s index 768d3668829885..fd9668a8456087 100644 --- a/llvm/test/MC/AArch64/SVE/ld4b.s +++ b/llvm/test/MC/AArch64/SVE/ld4b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld4d.s b/llvm/test/MC/AArch64/SVE/ld4d.s index 9e077d78613fb1..181dfb43069397 100644 --- a/llvm/test/MC/AArch64/SVE/ld4d.s +++ b/llvm/test/MC/AArch64/SVE/ld4d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld4h.s b/llvm/test/MC/AArch64/SVE/ld4h.s index db73e6c825f1b6..14a20b75a090cc 100644 --- a/llvm/test/MC/AArch64/SVE/ld4h.s +++ b/llvm/test/MC/AArch64/SVE/ld4h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ld4w.s b/llvm/test/MC/AArch64/SVE/ld4w.s index 130309c44f8d70..af584d24848078 100644 --- a/llvm/test/MC/AArch64/SVE/ld4w.s +++ b/llvm/test/MC/AArch64/SVE/ld4w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ldff1b.s b/llvm/test/MC/AArch64/SVE/ldff1b.s index a69edaf4202f33..d3195604d3935e 100644 --- a/llvm/test/MC/AArch64/SVE/ldff1b.s +++ b/llvm/test/MC/AArch64/SVE/ldff1b.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldff1d.s b/llvm/test/MC/AArch64/SVE/ldff1d.s index 38cd6d55ec2895..0b90f307edd6fb 100644 --- a/llvm/test/MC/AArch64/SVE/ldff1d.s +++ b/llvm/test/MC/AArch64/SVE/ldff1d.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldff1h.s b/llvm/test/MC/AArch64/SVE/ldff1h.s index 0901a5d194f2d9..63ad62eddd00d3 100644 --- a/llvm/test/MC/AArch64/SVE/ldff1h.s +++ b/llvm/test/MC/AArch64/SVE/ldff1h.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldff1sb.s b/llvm/test/MC/AArch64/SVE/ldff1sb.s index 9b2b92c4a2f2b1..dbc3c9dd88744c 100644 --- a/llvm/test/MC/AArch64/SVE/ldff1sb.s +++ b/llvm/test/MC/AArch64/SVE/ldff1sb.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldff1sh.s b/llvm/test/MC/AArch64/SVE/ldff1sh.s index 6ac8c0eba1f62b..640ccc491bc800 100644 --- a/llvm/test/MC/AArch64/SVE/ldff1sh.s +++ b/llvm/test/MC/AArch64/SVE/ldff1sh.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldff1sw.s b/llvm/test/MC/AArch64/SVE/ldff1sw.s index 1ffbc41c8796f4..36246c06b8d62d 100644 --- a/llvm/test/MC/AArch64/SVE/ldff1sw.s +++ b/llvm/test/MC/AArch64/SVE/ldff1sw.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldff1w.s b/llvm/test/MC/AArch64/SVE/ldff1w.s index b220e29f0f86ae..f6bbecdcdfc910 100644 --- a/llvm/test/MC/AArch64/SVE/ldff1w.s +++ b/llvm/test/MC/AArch64/SVE/ldff1w.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldnf1b.s b/llvm/test/MC/AArch64/SVE/ldnf1b.s index d6c0802b21856a..272f1f5c37d7a1 100644 --- a/llvm/test/MC/AArch64/SVE/ldnf1b.s +++ b/llvm/test/MC/AArch64/SVE/ldnf1b.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldnf1d.s b/llvm/test/MC/AArch64/SVE/ldnf1d.s index da731793e2c0a6..82082bd316f65b 100644 --- a/llvm/test/MC/AArch64/SVE/ldnf1d.s +++ b/llvm/test/MC/AArch64/SVE/ldnf1d.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldnf1h.s b/llvm/test/MC/AArch64/SVE/ldnf1h.s index ea57a3328a6626..6a9442470a09f7 100644 --- a/llvm/test/MC/AArch64/SVE/ldnf1h.s +++ b/llvm/test/MC/AArch64/SVE/ldnf1h.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldnf1sb.s b/llvm/test/MC/AArch64/SVE/ldnf1sb.s index 1f5ca72a94b4b6..391e05648183d2 100644 --- a/llvm/test/MC/AArch64/SVE/ldnf1sb.s +++ b/llvm/test/MC/AArch64/SVE/ldnf1sb.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldnf1sh.s b/llvm/test/MC/AArch64/SVE/ldnf1sh.s index 19d61de9271cf1..afbfce265a89a0 100644 --- a/llvm/test/MC/AArch64/SVE/ldnf1sh.s +++ b/llvm/test/MC/AArch64/SVE/ldnf1sh.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldnf1sw.s b/llvm/test/MC/AArch64/SVE/ldnf1sw.s index 6291b2b69db67c..5b43521853661c 100644 --- a/llvm/test/MC/AArch64/SVE/ldnf1sw.s +++ b/llvm/test/MC/AArch64/SVE/ldnf1sw.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldnf1w.s b/llvm/test/MC/AArch64/SVE/ldnf1w.s index 3e903049044404..98420219af34ef 100644 --- a/llvm/test/MC/AArch64/SVE/ldnf1w.s +++ b/llvm/test/MC/AArch64/SVE/ldnf1w.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/ldnt1b.s b/llvm/test/MC/AArch64/SVE/ldnt1b.s index 55fd4139198f2f..e9254e705743c3 100644 --- a/llvm/test/MC/AArch64/SVE/ldnt1b.s +++ b/llvm/test/MC/AArch64/SVE/ldnt1b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ldnt1d.s b/llvm/test/MC/AArch64/SVE/ldnt1d.s index 627bec99170720..0d9e920a960a6e 100644 --- a/llvm/test/MC/AArch64/SVE/ldnt1d.s +++ b/llvm/test/MC/AArch64/SVE/ldnt1d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ldnt1h.s b/llvm/test/MC/AArch64/SVE/ldnt1h.s index 51e597ae1ed44c..9f41fc2613b747 100644 --- a/llvm/test/MC/AArch64/SVE/ldnt1h.s +++ b/llvm/test/MC/AArch64/SVE/ldnt1h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ldnt1w.s b/llvm/test/MC/AArch64/SVE/ldnt1w.s index be21f828ca7631..612164e0d95f15 100644 --- a/llvm/test/MC/AArch64/SVE/ldnt1w.s +++ b/llvm/test/MC/AArch64/SVE/ldnt1w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ldr.s b/llvm/test/MC/AArch64/SVE/ldr.s index 857e9f37c6fc44..5ef8b34f5bb80f 100644 --- a/llvm/test/MC/AArch64/SVE/ldr.s +++ b/llvm/test/MC/AArch64/SVE/ldr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/lsl.s b/llvm/test/MC/AArch64/SVE/lsl.s index 3befa0bd5c1640..68ec9b88fd32e9 100644 --- a/llvm/test/MC/AArch64/SVE/lsl.s +++ b/llvm/test/MC/AArch64/SVE/lsl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/lslr.s b/llvm/test/MC/AArch64/SVE/lslr.s index 5fbb8d62f7d11f..508aa630e50bab 100644 --- a/llvm/test/MC/AArch64/SVE/lslr.s +++ b/llvm/test/MC/AArch64/SVE/lslr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/lsr.s b/llvm/test/MC/AArch64/SVE/lsr.s index d0edca12d55176..32fe2641c58b11 100644 --- a/llvm/test/MC/AArch64/SVE/lsr.s +++ b/llvm/test/MC/AArch64/SVE/lsr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/lsrr.s b/llvm/test/MC/AArch64/SVE/lsrr.s index b5d4fca5382baa..3954943664aa5b 100644 --- a/llvm/test/MC/AArch64/SVE/lsrr.s +++ b/llvm/test/MC/AArch64/SVE/lsrr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/mad.s b/llvm/test/MC/AArch64/SVE/mad.s index a92e26262b6870..ba8add576120bc 100644 --- a/llvm/test/MC/AArch64/SVE/mad.s +++ b/llvm/test/MC/AArch64/SVE/mad.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp32.s b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp32.s index e3a79381b10bb8..201b786893a972 100644 --- a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp32.s +++ b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp32.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+f32mm < %s \ // RUN: | llvm-objdump -d --mattr=+sve,+f32mm - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/mla.s b/llvm/test/MC/AArch64/SVE/mla.s index 5d21bb92b3402f..cfa27f92828b39 100644 --- a/llvm/test/MC/AArch64/SVE/mla.s +++ b/llvm/test/MC/AArch64/SVE/mla.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/mls.s b/llvm/test/MC/AArch64/SVE/mls.s index 3068c2da049cd4..5ba2fd0f3dfaa6 100644 --- a/llvm/test/MC/AArch64/SVE/mls.s +++ b/llvm/test/MC/AArch64/SVE/mls.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/mov.s b/llvm/test/MC/AArch64/SVE/mov.s index f6af81375d9d88..f6fd154ec8b8e3 100644 --- a/llvm/test/MC/AArch64/SVE/mov.s +++ b/llvm/test/MC/AArch64/SVE/mov.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/movprfx.s b/llvm/test/MC/AArch64/SVE/movprfx.s index 52111b6f1073ae..6cf93be78003e4 100644 --- a/llvm/test/MC/AArch64/SVE/movprfx.s +++ b/llvm/test/MC/AArch64/SVE/movprfx.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/movs.s b/llvm/test/MC/AArch64/SVE/movs.s index 465ae9d4f53226..fabc2ff409df74 100644 --- a/llvm/test/MC/AArch64/SVE/movs.s +++ b/llvm/test/MC/AArch64/SVE/movs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/msb.s b/llvm/test/MC/AArch64/SVE/msb.s index 9a3179d80e910d..afa6a2d4fd0bdc 100644 --- a/llvm/test/MC/AArch64/SVE/msb.s +++ b/llvm/test/MC/AArch64/SVE/msb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/mul.s b/llvm/test/MC/AArch64/SVE/mul.s index 77ee314c177015..ce5692adafe14e 100644 --- a/llvm/test/MC/AArch64/SVE/mul.s +++ b/llvm/test/MC/AArch64/SVE/mul.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/nand.s b/llvm/test/MC/AArch64/SVE/nand.s index 0e6cef662a6fe7..66b54e65389ce2 100644 --- a/llvm/test/MC/AArch64/SVE/nand.s +++ b/llvm/test/MC/AArch64/SVE/nand.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/nands.s b/llvm/test/MC/AArch64/SVE/nands.s index 0b80f861ef980e..c1750840e5abc2 100644 --- a/llvm/test/MC/AArch64/SVE/nands.s +++ b/llvm/test/MC/AArch64/SVE/nands.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/neg.s b/llvm/test/MC/AArch64/SVE/neg.s index b69bb5d22d8ad0..3e17d1a17426e3 100644 --- a/llvm/test/MC/AArch64/SVE/neg.s +++ b/llvm/test/MC/AArch64/SVE/neg.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/nor.s b/llvm/test/MC/AArch64/SVE/nor.s index 70d6a67c7b0e19..eb7b12690c66fc 100644 --- a/llvm/test/MC/AArch64/SVE/nor.s +++ b/llvm/test/MC/AArch64/SVE/nor.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/nors.s b/llvm/test/MC/AArch64/SVE/nors.s index 2551b30e77216d..9bdaf9f206b8b2 100644 --- a/llvm/test/MC/AArch64/SVE/nors.s +++ b/llvm/test/MC/AArch64/SVE/nors.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/not.s b/llvm/test/MC/AArch64/SVE/not.s index 0fc7b368811721..2e9bd0dcf71bda 100644 --- a/llvm/test/MC/AArch64/SVE/not.s +++ b/llvm/test/MC/AArch64/SVE/not.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/nots.s b/llvm/test/MC/AArch64/SVE/nots.s index 4b6c79c36fa905..9a27fe4ab4ca97 100644 --- a/llvm/test/MC/AArch64/SVE/nots.s +++ b/llvm/test/MC/AArch64/SVE/nots.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/orn.s b/llvm/test/MC/AArch64/SVE/orn.s index ce1b9c9f6fcd86..695822a4094a47 100644 --- a/llvm/test/MC/AArch64/SVE/orn.s +++ b/llvm/test/MC/AArch64/SVE/orn.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/orns.s b/llvm/test/MC/AArch64/SVE/orns.s index de8aa4a4b736a6..1b9614e7afffdd 100644 --- a/llvm/test/MC/AArch64/SVE/orns.s +++ b/llvm/test/MC/AArch64/SVE/orns.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/orr.s b/llvm/test/MC/AArch64/SVE/orr.s index c848a34654be21..80d88e2885630d 100644 --- a/llvm/test/MC/AArch64/SVE/orr.s +++ b/llvm/test/MC/AArch64/SVE/orr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/orrs.s b/llvm/test/MC/AArch64/SVE/orrs.s index 2c6a4881b384f1..9d0911957db695 100644 --- a/llvm/test/MC/AArch64/SVE/orrs.s +++ b/llvm/test/MC/AArch64/SVE/orrs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/orv.s b/llvm/test/MC/AArch64/SVE/orv.s index fefdf238d37579..0a446b81a70228 100644 --- a/llvm/test/MC/AArch64/SVE/orv.s +++ b/llvm/test/MC/AArch64/SVE/orv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/pfalse.s b/llvm/test/MC/AArch64/SVE/pfalse.s index d87b12344561a0..199cb46ef9ba5b 100644 --- a/llvm/test/MC/AArch64/SVE/pfalse.s +++ b/llvm/test/MC/AArch64/SVE/pfalse.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/pfirst.s b/llvm/test/MC/AArch64/SVE/pfirst.s index 89db229d52663b..d47d0a6d45c1fd 100644 --- a/llvm/test/MC/AArch64/SVE/pfirst.s +++ b/llvm/test/MC/AArch64/SVE/pfirst.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/pnext.s b/llvm/test/MC/AArch64/SVE/pnext.s index 74e9830c991d13..630bdcbbfca963 100644 --- a/llvm/test/MC/AArch64/SVE/pnext.s +++ b/llvm/test/MC/AArch64/SVE/pnext.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/prfb-sve-only.s b/llvm/test/MC/AArch64/SVE/prfb-sve-only.s index 4bb39ea267845a..9600470c4f25b8 100644 --- a/llvm/test/MC/AArch64/SVE/prfb-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/prfb-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/prfb.s b/llvm/test/MC/AArch64/SVE/prfb.s index ea7e43b963f76d..525df1843cc69d 100644 --- a/llvm/test/MC/AArch64/SVE/prfb.s +++ b/llvm/test/MC/AArch64/SVE/prfb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/prfd-sve-only.s b/llvm/test/MC/AArch64/SVE/prfd-sve-only.s index 50df1b9ff21e15..8578d9489e28ee 100644 --- a/llvm/test/MC/AArch64/SVE/prfd-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/prfd-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/prfd.s b/llvm/test/MC/AArch64/SVE/prfd.s index df2dfb5825f2bf..42be5e5c81dfdd 100644 --- a/llvm/test/MC/AArch64/SVE/prfd.s +++ b/llvm/test/MC/AArch64/SVE/prfd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/prfh-sve-only.s b/llvm/test/MC/AArch64/SVE/prfh-sve-only.s index 6aebe59343a9c5..3b43a9431ee865 100644 --- a/llvm/test/MC/AArch64/SVE/prfh-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/prfh-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/prfh.s b/llvm/test/MC/AArch64/SVE/prfh.s index 211de5096eaf31..1301f918314a8b 100644 --- a/llvm/test/MC/AArch64/SVE/prfh.s +++ b/llvm/test/MC/AArch64/SVE/prfh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/prfw-sve-only.s b/llvm/test/MC/AArch64/SVE/prfw-sve-only.s index a64977929225af..dc752e78ac297d 100644 --- a/llvm/test/MC/AArch64/SVE/prfw-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/prfw-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/prfw.s b/llvm/test/MC/AArch64/SVE/prfw.s index 46d1ee2dc79101..8d760ee49ee3fc 100644 --- a/llvm/test/MC/AArch64/SVE/prfw.s +++ b/llvm/test/MC/AArch64/SVE/prfw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ptest.s b/llvm/test/MC/AArch64/SVE/ptest.s index 42ed050848dc24..294749b1121b33 100644 --- a/llvm/test/MC/AArch64/SVE/ptest.s +++ b/llvm/test/MC/AArch64/SVE/ptest.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ptrue.s b/llvm/test/MC/AArch64/SVE/ptrue.s index f230b1feec6d89..0d3971f5fe99da 100644 --- a/llvm/test/MC/AArch64/SVE/ptrue.s +++ b/llvm/test/MC/AArch64/SVE/ptrue.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ptrues.s b/llvm/test/MC/AArch64/SVE/ptrues.s index b8cff9e678cc56..a7d5265df86185 100644 --- a/llvm/test/MC/AArch64/SVE/ptrues.s +++ b/llvm/test/MC/AArch64/SVE/ptrues.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/punpkhi.s b/llvm/test/MC/AArch64/SVE/punpkhi.s index 9a368b87c2e96f..804e49fd5b0004 100644 --- a/llvm/test/MC/AArch64/SVE/punpkhi.s +++ b/llvm/test/MC/AArch64/SVE/punpkhi.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/punpklo.s b/llvm/test/MC/AArch64/SVE/punpklo.s index 8ca8bb2f75678b..091328eaa5fead 100644 --- a/llvm/test/MC/AArch64/SVE/punpklo.s +++ b/llvm/test/MC/AArch64/SVE/punpklo.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/rbit.s b/llvm/test/MC/AArch64/SVE/rbit.s index ac299bf1cbe089..a41e328fc4c8d5 100644 --- a/llvm/test/MC/AArch64/SVE/rbit.s +++ b/llvm/test/MC/AArch64/SVE/rbit.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/rdffr.s b/llvm/test/MC/AArch64/SVE/rdffr.s index ec1a48b88497e4..2e152aaab204b9 100644 --- a/llvm/test/MC/AArch64/SVE/rdffr.s +++ b/llvm/test/MC/AArch64/SVE/rdffr.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/rdffrs.s b/llvm/test/MC/AArch64/SVE/rdffrs.s index 1ebcd6f25740cc..057e9cd4a5e5fa 100644 --- a/llvm/test/MC/AArch64/SVE/rdffrs.s +++ b/llvm/test/MC/AArch64/SVE/rdffrs.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/rdvl.s b/llvm/test/MC/AArch64/SVE/rdvl.s index 50877f5c7342dd..f028597873af95 100644 --- a/llvm/test/MC/AArch64/SVE/rdvl.s +++ b/llvm/test/MC/AArch64/SVE/rdvl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/rev.s b/llvm/test/MC/AArch64/SVE/rev.s index f562401e9567c5..ea0b75a8776a09 100644 --- a/llvm/test/MC/AArch64/SVE/rev.s +++ b/llvm/test/MC/AArch64/SVE/rev.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/revb.s b/llvm/test/MC/AArch64/SVE/revb.s index d8e490dbe8eb6a..26be0ba8bb2787 100644 --- a/llvm/test/MC/AArch64/SVE/revb.s +++ b/llvm/test/MC/AArch64/SVE/revb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/revh.s b/llvm/test/MC/AArch64/SVE/revh.s index 687fe9e08e7cea..b8e57f935d38cd 100644 --- a/llvm/test/MC/AArch64/SVE/revh.s +++ b/llvm/test/MC/AArch64/SVE/revh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/revw.s b/llvm/test/MC/AArch64/SVE/revw.s index 3f50f4a95f4c7a..f211765c628852 100644 --- a/llvm/test/MC/AArch64/SVE/revw.s +++ b/llvm/test/MC/AArch64/SVE/revw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sabd.s b/llvm/test/MC/AArch64/SVE/sabd.s index e03136aa640216..35a6d2b86a5701 100644 --- a/llvm/test/MC/AArch64/SVE/sabd.s +++ b/llvm/test/MC/AArch64/SVE/sabd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/saddv.s b/llvm/test/MC/AArch64/SVE/saddv.s index f2ce4d91220c4b..c181cbba70afe2 100644 --- a/llvm/test/MC/AArch64/SVE/saddv.s +++ b/llvm/test/MC/AArch64/SVE/saddv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/scvtf.s b/llvm/test/MC/AArch64/SVE/scvtf.s index a7cd2079453f53..dbfb8efbb9f050 100644 --- a/llvm/test/MC/AArch64/SVE/scvtf.s +++ b/llvm/test/MC/AArch64/SVE/scvtf.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sdiv.s b/llvm/test/MC/AArch64/SVE/sdiv.s index d75e449292cf41..204030986e4c88 100644 --- a/llvm/test/MC/AArch64/SVE/sdiv.s +++ b/llvm/test/MC/AArch64/SVE/sdiv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sdivr.s b/llvm/test/MC/AArch64/SVE/sdivr.s index 6f8a75b9fa78f9..d66e39e2fb6c0b 100644 --- a/llvm/test/MC/AArch64/SVE/sdivr.s +++ b/llvm/test/MC/AArch64/SVE/sdivr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sdot.s b/llvm/test/MC/AArch64/SVE/sdot.s index 0fe300c3993799..2ef6babaf9f027 100644 --- a/llvm/test/MC/AArch64/SVE/sdot.s +++ b/llvm/test/MC/AArch64/SVE/sdot.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sel.s b/llvm/test/MC/AArch64/SVE/sel.s index 3cb4dbab39f437..8803a5df42b145 100644 --- a/llvm/test/MC/AArch64/SVE/sel.s +++ b/llvm/test/MC/AArch64/SVE/sel.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/setffr.s b/llvm/test/MC/AArch64/SVE/setffr.s index 2f6a91b8e7db38..f0e23ecae666db 100644 --- a/llvm/test/MC/AArch64/SVE/setffr.s +++ b/llvm/test/MC/AArch64/SVE/setffr.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/smax.s b/llvm/test/MC/AArch64/SVE/smax.s index 1de6ccf907ba1e..98dbb62b4079ce 100644 --- a/llvm/test/MC/AArch64/SVE/smax.s +++ b/llvm/test/MC/AArch64/SVE/smax.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/smaxv.s b/llvm/test/MC/AArch64/SVE/smaxv.s index 01d864c8f5e643..96a729723746fa 100644 --- a/llvm/test/MC/AArch64/SVE/smaxv.s +++ b/llvm/test/MC/AArch64/SVE/smaxv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/smin.s b/llvm/test/MC/AArch64/SVE/smin.s index 5df5aeeb05a89d..3a4ed6187a3a06 100644 --- a/llvm/test/MC/AArch64/SVE/smin.s +++ b/llvm/test/MC/AArch64/SVE/smin.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sminv.s b/llvm/test/MC/AArch64/SVE/sminv.s index c0ab800e73df40..420d5f74c6cf82 100644 --- a/llvm/test/MC/AArch64/SVE/sminv.s +++ b/llvm/test/MC/AArch64/SVE/sminv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/smulh.s b/llvm/test/MC/AArch64/SVE/smulh.s index 3c531620e0f682..0e498e68d07d11 100644 --- a/llvm/test/MC/AArch64/SVE/smulh.s +++ b/llvm/test/MC/AArch64/SVE/smulh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/splice.s b/llvm/test/MC/AArch64/SVE/splice.s index 64c7959cc2f88b..846f0a46ecdfa4 100644 --- a/llvm/test/MC/AArch64/SVE/splice.s +++ b/llvm/test/MC/AArch64/SVE/splice.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqadd.s b/llvm/test/MC/AArch64/SVE/sqadd.s index d20343d5fa70d8..4be310ad44118d 100644 --- a/llvm/test/MC/AArch64/SVE/sqadd.s +++ b/llvm/test/MC/AArch64/SVE/sqadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqdecb.s b/llvm/test/MC/AArch64/SVE/sqdecb.s index 9c9a75d3158427..89809d3f7ebb17 100644 --- a/llvm/test/MC/AArch64/SVE/sqdecb.s +++ b/llvm/test/MC/AArch64/SVE/sqdecb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqdecd.s b/llvm/test/MC/AArch64/SVE/sqdecd.s index a1074388890247..c61ef52458da8b 100644 --- a/llvm/test/MC/AArch64/SVE/sqdecd.s +++ b/llvm/test/MC/AArch64/SVE/sqdecd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqdech.s b/llvm/test/MC/AArch64/SVE/sqdech.s index 44ab11d90da5fb..d6155a25abf9c5 100644 --- a/llvm/test/MC/AArch64/SVE/sqdech.s +++ b/llvm/test/MC/AArch64/SVE/sqdech.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqdecp.s b/llvm/test/MC/AArch64/SVE/sqdecp.s index 16c6e984293ed2..85265259f53a60 100644 --- a/llvm/test/MC/AArch64/SVE/sqdecp.s +++ b/llvm/test/MC/AArch64/SVE/sqdecp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqdecw.s b/llvm/test/MC/AArch64/SVE/sqdecw.s index 9ce9e47498f298..ea66063b22d616 100644 --- a/llvm/test/MC/AArch64/SVE/sqdecw.s +++ b/llvm/test/MC/AArch64/SVE/sqdecw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqincb.s b/llvm/test/MC/AArch64/SVE/sqincb.s index 2ad48e30353660..3b840883134332 100644 --- a/llvm/test/MC/AArch64/SVE/sqincb.s +++ b/llvm/test/MC/AArch64/SVE/sqincb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqincd.s b/llvm/test/MC/AArch64/SVE/sqincd.s index 1dd8b62c32c2ba..def49151a06eda 100644 --- a/llvm/test/MC/AArch64/SVE/sqincd.s +++ b/llvm/test/MC/AArch64/SVE/sqincd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqinch.s b/llvm/test/MC/AArch64/SVE/sqinch.s index aeae6791491ba0..9672c3e07d0172 100644 --- a/llvm/test/MC/AArch64/SVE/sqinch.s +++ b/llvm/test/MC/AArch64/SVE/sqinch.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqincp.s b/llvm/test/MC/AArch64/SVE/sqincp.s index f8f279e40de4c5..758f75a0334520 100644 --- a/llvm/test/MC/AArch64/SVE/sqincp.s +++ b/llvm/test/MC/AArch64/SVE/sqincp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqincw.s b/llvm/test/MC/AArch64/SVE/sqincw.s index c602ba1a286724..28bcb020a667d2 100644 --- a/llvm/test/MC/AArch64/SVE/sqincw.s +++ b/llvm/test/MC/AArch64/SVE/sqincw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sqsub.s b/llvm/test/MC/AArch64/SVE/sqsub.s index 88ade7e1f160bd..9eff27b3e900e7 100644 --- a/llvm/test/MC/AArch64/SVE/sqsub.s +++ b/llvm/test/MC/AArch64/SVE/sqsub.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st1b-sve-only.s b/llvm/test/MC/AArch64/SVE/st1b-sve-only.s index e9ab95cd5bb52b..c30700bfa046c7 100644 --- a/llvm/test/MC/AArch64/SVE/st1b-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/st1b-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/st1b.s b/llvm/test/MC/AArch64/SVE/st1b.s index c221652794da2b..ce30e26ccccc7a 100644 --- a/llvm/test/MC/AArch64/SVE/st1b.s +++ b/llvm/test/MC/AArch64/SVE/st1b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st1d-sve-only.s b/llvm/test/MC/AArch64/SVE/st1d-sve-only.s index e106e9f50f1431..f0039971a0d546 100644 --- a/llvm/test/MC/AArch64/SVE/st1d-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/st1d-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/st1d.s b/llvm/test/MC/AArch64/SVE/st1d.s index 2f2911c14afa5e..8683aa288bfc35 100644 --- a/llvm/test/MC/AArch64/SVE/st1d.s +++ b/llvm/test/MC/AArch64/SVE/st1d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st1h-sve-only.s b/llvm/test/MC/AArch64/SVE/st1h-sve-only.s index 4071a829fb7166..587c218d90e884 100644 --- a/llvm/test/MC/AArch64/SVE/st1h-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/st1h-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/st1h.s b/llvm/test/MC/AArch64/SVE/st1h.s index 18ca96d5975e72..da3ffaa5b10cbe 100644 --- a/llvm/test/MC/AArch64/SVE/st1h.s +++ b/llvm/test/MC/AArch64/SVE/st1h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st1w-sve-only.s b/llvm/test/MC/AArch64/SVE/st1w-sve-only.s index dc86903d2268c7..ac4f7696d9b9db 100644 --- a/llvm/test/MC/AArch64/SVE/st1w-sve-only.s +++ b/llvm/test/MC/AArch64/SVE/st1w-sve-only.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/st1w.s b/llvm/test/MC/AArch64/SVE/st1w.s index 503c7fe82db76a..0194566ef31fa0 100644 --- a/llvm/test/MC/AArch64/SVE/st1w.s +++ b/llvm/test/MC/AArch64/SVE/st1w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st2b.s b/llvm/test/MC/AArch64/SVE/st2b.s index c8e45eabcf6b0f..aa94325a5b3b5a 100644 --- a/llvm/test/MC/AArch64/SVE/st2b.s +++ b/llvm/test/MC/AArch64/SVE/st2b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st2d.s b/llvm/test/MC/AArch64/SVE/st2d.s index afe72786feacb3..8336bf38af272c 100644 --- a/llvm/test/MC/AArch64/SVE/st2d.s +++ b/llvm/test/MC/AArch64/SVE/st2d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st2h.s b/llvm/test/MC/AArch64/SVE/st2h.s index c60b0acf70c50d..71e4af962ea308 100644 --- a/llvm/test/MC/AArch64/SVE/st2h.s +++ b/llvm/test/MC/AArch64/SVE/st2h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st2w.s b/llvm/test/MC/AArch64/SVE/st2w.s index 1407cd2600d97b..a0790d514be2f6 100644 --- a/llvm/test/MC/AArch64/SVE/st2w.s +++ b/llvm/test/MC/AArch64/SVE/st2w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st3b.s b/llvm/test/MC/AArch64/SVE/st3b.s index 7331e8cb21dd45..d937d53cd1caa1 100644 --- a/llvm/test/MC/AArch64/SVE/st3b.s +++ b/llvm/test/MC/AArch64/SVE/st3b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st3d.s b/llvm/test/MC/AArch64/SVE/st3d.s index 0ea0fb8ca53c2b..5e1930112e1dd1 100644 --- a/llvm/test/MC/AArch64/SVE/st3d.s +++ b/llvm/test/MC/AArch64/SVE/st3d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st3h.s b/llvm/test/MC/AArch64/SVE/st3h.s index a5edb651c6408c..96a9ae103ba897 100644 --- a/llvm/test/MC/AArch64/SVE/st3h.s +++ b/llvm/test/MC/AArch64/SVE/st3h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st3w.s b/llvm/test/MC/AArch64/SVE/st3w.s index d106184ba135d1..c5f41dcac5a6f5 100644 --- a/llvm/test/MC/AArch64/SVE/st3w.s +++ b/llvm/test/MC/AArch64/SVE/st3w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st4b.s b/llvm/test/MC/AArch64/SVE/st4b.s index ea270f42cce69d..ae40fb95ac2fa5 100644 --- a/llvm/test/MC/AArch64/SVE/st4b.s +++ b/llvm/test/MC/AArch64/SVE/st4b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st4d.s b/llvm/test/MC/AArch64/SVE/st4d.s index dab7d40bf0347f..1f7065f00990ff 100644 --- a/llvm/test/MC/AArch64/SVE/st4d.s +++ b/llvm/test/MC/AArch64/SVE/st4d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st4h.s b/llvm/test/MC/AArch64/SVE/st4h.s index cd1c17e6d89902..3319a4bf533466 100644 --- a/llvm/test/MC/AArch64/SVE/st4h.s +++ b/llvm/test/MC/AArch64/SVE/st4h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/st4w.s b/llvm/test/MC/AArch64/SVE/st4w.s index 4a05eb604ac3f8..4a6da2bc10f240 100644 --- a/llvm/test/MC/AArch64/SVE/st4w.s +++ b/llvm/test/MC/AArch64/SVE/st4w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/stnt1b.s b/llvm/test/MC/AArch64/SVE/stnt1b.s index bb75df59f78c72..4a1d2325c4829b 100644 --- a/llvm/test/MC/AArch64/SVE/stnt1b.s +++ b/llvm/test/MC/AArch64/SVE/stnt1b.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/stnt1d.s b/llvm/test/MC/AArch64/SVE/stnt1d.s index 467d2d753d2ce1..753dee1ba2b67a 100644 --- a/llvm/test/MC/AArch64/SVE/stnt1d.s +++ b/llvm/test/MC/AArch64/SVE/stnt1d.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/stnt1h.s b/llvm/test/MC/AArch64/SVE/stnt1h.s index c98076e982dc46..fde85342be3af5 100644 --- a/llvm/test/MC/AArch64/SVE/stnt1h.s +++ b/llvm/test/MC/AArch64/SVE/stnt1h.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/stnt1w.s b/llvm/test/MC/AArch64/SVE/stnt1w.s index bdde90686822eb..97eaf599fbca9d 100644 --- a/llvm/test/MC/AArch64/SVE/stnt1w.s +++ b/llvm/test/MC/AArch64/SVE/stnt1w.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/str.s b/llvm/test/MC/AArch64/SVE/str.s index 0294a52b81aed7..d9b399951cb8bb 100644 --- a/llvm/test/MC/AArch64/SVE/str.s +++ b/llvm/test/MC/AArch64/SVE/str.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sub.s b/llvm/test/MC/AArch64/SVE/sub.s index 7a639156357874..3f71a85ce0b894 100644 --- a/llvm/test/MC/AArch64/SVE/sub.s +++ b/llvm/test/MC/AArch64/SVE/sub.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/subr.s b/llvm/test/MC/AArch64/SVE/subr.s index b433bd2559dc56..471b02a0ec08d3 100644 --- a/llvm/test/MC/AArch64/SVE/subr.s +++ b/llvm/test/MC/AArch64/SVE/subr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sunpkhi.s b/llvm/test/MC/AArch64/SVE/sunpkhi.s index 7d122c07c4eef5..30f4633b15f98b 100644 --- a/llvm/test/MC/AArch64/SVE/sunpkhi.s +++ b/llvm/test/MC/AArch64/SVE/sunpkhi.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sunpklo.s b/llvm/test/MC/AArch64/SVE/sunpklo.s index 3dbfbbf87bd58c..16c84d191a3172 100644 --- a/llvm/test/MC/AArch64/SVE/sunpklo.s +++ b/llvm/test/MC/AArch64/SVE/sunpklo.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sxtb.s b/llvm/test/MC/AArch64/SVE/sxtb.s index 17694b4b3eae73..844ef8410d3874 100644 --- a/llvm/test/MC/AArch64/SVE/sxtb.s +++ b/llvm/test/MC/AArch64/SVE/sxtb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sxth.s b/llvm/test/MC/AArch64/SVE/sxth.s index a90f8338835e8d..1c866dc87df36f 100644 --- a/llvm/test/MC/AArch64/SVE/sxth.s +++ b/llvm/test/MC/AArch64/SVE/sxth.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/sxtw.s b/llvm/test/MC/AArch64/SVE/sxtw.s index 5c40fe3caa7257..c9231641cb62eb 100644 --- a/llvm/test/MC/AArch64/SVE/sxtw.s +++ b/llvm/test/MC/AArch64/SVE/sxtw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/tbl.s b/llvm/test/MC/AArch64/SVE/tbl.s index bbfbdceb11be49..5a29285b09eeee 100644 --- a/llvm/test/MC/AArch64/SVE/tbl.s +++ b/llvm/test/MC/AArch64/SVE/tbl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/trn1.s b/llvm/test/MC/AArch64/SVE/trn1.s index e573ad85901e62..49061f0707ecaa 100644 --- a/llvm/test/MC/AArch64/SVE/trn1.s +++ b/llvm/test/MC/AArch64/SVE/trn1.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/trn2.s b/llvm/test/MC/AArch64/SVE/trn2.s index 9ae60ffb99f969..2e94c1083f3f00 100644 --- a/llvm/test/MC/AArch64/SVE/trn2.s +++ b/llvm/test/MC/AArch64/SVE/trn2.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uabd.s b/llvm/test/MC/AArch64/SVE/uabd.s index 39745f9f09b3cf..f1a8d2fcd0dede 100644 --- a/llvm/test/MC/AArch64/SVE/uabd.s +++ b/llvm/test/MC/AArch64/SVE/uabd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uaddv.s b/llvm/test/MC/AArch64/SVE/uaddv.s index 449c6dc4f85c01..6af2a6097919a9 100644 --- a/llvm/test/MC/AArch64/SVE/uaddv.s +++ b/llvm/test/MC/AArch64/SVE/uaddv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/ucvtf.s b/llvm/test/MC/AArch64/SVE/ucvtf.s index 7b380eda0997b7..1026abeb66ac9b 100644 --- a/llvm/test/MC/AArch64/SVE/ucvtf.s +++ b/llvm/test/MC/AArch64/SVE/ucvtf.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/udiv.s b/llvm/test/MC/AArch64/SVE/udiv.s index 836fe20add50b2..80391328507617 100644 --- a/llvm/test/MC/AArch64/SVE/udiv.s +++ b/llvm/test/MC/AArch64/SVE/udiv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/udivr.s b/llvm/test/MC/AArch64/SVE/udivr.s index c179d992e99b6d..4bd42f6e58d722 100644 --- a/llvm/test/MC/AArch64/SVE/udivr.s +++ b/llvm/test/MC/AArch64/SVE/udivr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/udot.s b/llvm/test/MC/AArch64/SVE/udot.s index 405457f54d3df8..93f22e2cf02293 100644 --- a/llvm/test/MC/AArch64/SVE/udot.s +++ b/llvm/test/MC/AArch64/SVE/udot.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/umax.s b/llvm/test/MC/AArch64/SVE/umax.s index facb1f94c5a00b..3518a80e7cf958 100644 --- a/llvm/test/MC/AArch64/SVE/umax.s +++ b/llvm/test/MC/AArch64/SVE/umax.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/umaxv.s b/llvm/test/MC/AArch64/SVE/umaxv.s index e9d1d5998512da..2763d642e7bee6 100644 --- a/llvm/test/MC/AArch64/SVE/umaxv.s +++ b/llvm/test/MC/AArch64/SVE/umaxv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/umin.s b/llvm/test/MC/AArch64/SVE/umin.s index 7fbce57e8a3a0e..fae9d556fc42d6 100644 --- a/llvm/test/MC/AArch64/SVE/umin.s +++ b/llvm/test/MC/AArch64/SVE/umin.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uminv.s b/llvm/test/MC/AArch64/SVE/uminv.s index c43833300f58b1..127104438ae06f 100644 --- a/llvm/test/MC/AArch64/SVE/uminv.s +++ b/llvm/test/MC/AArch64/SVE/uminv.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/umulh.s b/llvm/test/MC/AArch64/SVE/umulh.s index 2e9d2a89459da9..2c2ce8d0fab035 100644 --- a/llvm/test/MC/AArch64/SVE/umulh.s +++ b/llvm/test/MC/AArch64/SVE/umulh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqadd.s b/llvm/test/MC/AArch64/SVE/uqadd.s index 047c0699dec5a2..37aa4d8e2712ec 100644 --- a/llvm/test/MC/AArch64/SVE/uqadd.s +++ b/llvm/test/MC/AArch64/SVE/uqadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqdecb.s b/llvm/test/MC/AArch64/SVE/uqdecb.s index 10f7f893b58355..b323ca97238bfb 100644 --- a/llvm/test/MC/AArch64/SVE/uqdecb.s +++ b/llvm/test/MC/AArch64/SVE/uqdecb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqdecd.s b/llvm/test/MC/AArch64/SVE/uqdecd.s index 0c13af33f2338a..4a8bc381d3cc57 100644 --- a/llvm/test/MC/AArch64/SVE/uqdecd.s +++ b/llvm/test/MC/AArch64/SVE/uqdecd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqdech.s b/llvm/test/MC/AArch64/SVE/uqdech.s index c5c4a40490fede..a35f79fdb132e4 100644 --- a/llvm/test/MC/AArch64/SVE/uqdech.s +++ b/llvm/test/MC/AArch64/SVE/uqdech.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqdecp.s b/llvm/test/MC/AArch64/SVE/uqdecp.s index 2deec983a3ddb6..54902e05e0c4f7 100644 --- a/llvm/test/MC/AArch64/SVE/uqdecp.s +++ b/llvm/test/MC/AArch64/SVE/uqdecp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqdecw.s b/llvm/test/MC/AArch64/SVE/uqdecw.s index f137aaea5da458..1deb1f088c0bf7 100644 --- a/llvm/test/MC/AArch64/SVE/uqdecw.s +++ b/llvm/test/MC/AArch64/SVE/uqdecw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqincb.s b/llvm/test/MC/AArch64/SVE/uqincb.s index b728cb75340d91..4e13bd8432d193 100644 --- a/llvm/test/MC/AArch64/SVE/uqincb.s +++ b/llvm/test/MC/AArch64/SVE/uqincb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqincd.s b/llvm/test/MC/AArch64/SVE/uqincd.s index cebea9327984d1..5e202d530f61e5 100644 --- a/llvm/test/MC/AArch64/SVE/uqincd.s +++ b/llvm/test/MC/AArch64/SVE/uqincd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqinch.s b/llvm/test/MC/AArch64/SVE/uqinch.s index a98cf2bd0083b5..d0ec9cef7ca26f 100644 --- a/llvm/test/MC/AArch64/SVE/uqinch.s +++ b/llvm/test/MC/AArch64/SVE/uqinch.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqincp.s b/llvm/test/MC/AArch64/SVE/uqincp.s index 07c4143abc3c48..a9bff830404553 100644 --- a/llvm/test/MC/AArch64/SVE/uqincp.s +++ b/llvm/test/MC/AArch64/SVE/uqincp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqincw.s b/llvm/test/MC/AArch64/SVE/uqincw.s index 51208942570475..221155e149d1fd 100644 --- a/llvm/test/MC/AArch64/SVE/uqincw.s +++ b/llvm/test/MC/AArch64/SVE/uqincw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uqsub.s b/llvm/test/MC/AArch64/SVE/uqsub.s index b1e219662d1086..43813f1a86fd38 100644 --- a/llvm/test/MC/AArch64/SVE/uqsub.s +++ b/llvm/test/MC/AArch64/SVE/uqsub.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uunpkhi.s b/llvm/test/MC/AArch64/SVE/uunpkhi.s index 32ea44637d231b..c10b4a6e6b74a5 100644 --- a/llvm/test/MC/AArch64/SVE/uunpkhi.s +++ b/llvm/test/MC/AArch64/SVE/uunpkhi.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uunpklo.s b/llvm/test/MC/AArch64/SVE/uunpklo.s index e9fa49ce315ecb..f7f5bc6b2d6511 100644 --- a/llvm/test/MC/AArch64/SVE/uunpklo.s +++ b/llvm/test/MC/AArch64/SVE/uunpklo.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uxtb.s b/llvm/test/MC/AArch64/SVE/uxtb.s index 23548252a4273a..00e82f79112ac5 100644 --- a/llvm/test/MC/AArch64/SVE/uxtb.s +++ b/llvm/test/MC/AArch64/SVE/uxtb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uxth.s b/llvm/test/MC/AArch64/SVE/uxth.s index 43bfa54f708c26..11c5fc623b77e3 100644 --- a/llvm/test/MC/AArch64/SVE/uxth.s +++ b/llvm/test/MC/AArch64/SVE/uxth.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uxtw.s b/llvm/test/MC/AArch64/SVE/uxtw.s index 435f733c9d10bd..f5fa363b1c3cd9 100644 --- a/llvm/test/MC/AArch64/SVE/uxtw.s +++ b/llvm/test/MC/AArch64/SVE/uxtw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uzp1.s b/llvm/test/MC/AArch64/SVE/uzp1.s index 08f6300fcbae36..a694eacf845664 100644 --- a/llvm/test/MC/AArch64/SVE/uzp1.s +++ b/llvm/test/MC/AArch64/SVE/uzp1.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/uzp2.s b/llvm/test/MC/AArch64/SVE/uzp2.s index 5a9285042811a0..196f8787364751 100644 --- a/llvm/test/MC/AArch64/SVE/uzp2.s +++ b/llvm/test/MC/AArch64/SVE/uzp2.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/whilele.s b/llvm/test/MC/AArch64/SVE/whilele.s index a6cd350e5a4f14..7048429ccf6ff2 100644 --- a/llvm/test/MC/AArch64/SVE/whilele.s +++ b/llvm/test/MC/AArch64/SVE/whilele.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/whilelo.s b/llvm/test/MC/AArch64/SVE/whilelo.s index 4d083d24aec586..90d09d30c439bb 100644 --- a/llvm/test/MC/AArch64/SVE/whilelo.s +++ b/llvm/test/MC/AArch64/SVE/whilelo.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/whilels.s b/llvm/test/MC/AArch64/SVE/whilels.s index 4ad0e3ef99f7b9..d6cfa9b8c04870 100644 --- a/llvm/test/MC/AArch64/SVE/whilels.s +++ b/llvm/test/MC/AArch64/SVE/whilels.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/whilelt.s b/llvm/test/MC/AArch64/SVE/whilelt.s index 5b7f4f16efc005..f77223af8a015d 100644 --- a/llvm/test/MC/AArch64/SVE/whilelt.s +++ b/llvm/test/MC/AArch64/SVE/whilelt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/wrffr.s b/llvm/test/MC/AArch64/SVE/wrffr.s index 197dce74aa82bb..b0ddecaee277ec 100644 --- a/llvm/test/MC/AArch64/SVE/wrffr.s +++ b/llvm/test/MC/AArch64/SVE/wrffr.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE/zip1.s b/llvm/test/MC/AArch64/SVE/zip1.s index c416bbc563a217..1d7ec98332cf32 100644 --- a/llvm/test/MC/AArch64/SVE/zip1.s +++ b/llvm/test/MC/AArch64/SVE/zip1.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE/zip2.s b/llvm/test/MC/AArch64/SVE/zip2.s index 7944b3804b8b85..39a5c16419dfaf 100644 --- a/llvm/test/MC/AArch64/SVE/zip2.s +++ b/llvm/test/MC/AArch64/SVE/zip2.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/adclb.s b/llvm/test/MC/AArch64/SVE2/adclb.s index e3150a458a927d..1cba6b1fce82d9 100644 --- a/llvm/test/MC/AArch64/SVE2/adclb.s +++ b/llvm/test/MC/AArch64/SVE2/adclb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/adclt.s b/llvm/test/MC/AArch64/SVE2/adclt.s index 57cfbd888e3723..d2ebec029b98cf 100644 --- a/llvm/test/MC/AArch64/SVE2/adclt.s +++ b/llvm/test/MC/AArch64/SVE2/adclt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/addhnb.s b/llvm/test/MC/AArch64/SVE2/addhnb.s index 5071e3bff0977d..7f23e35777b04d 100644 --- a/llvm/test/MC/AArch64/SVE2/addhnb.s +++ b/llvm/test/MC/AArch64/SVE2/addhnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/addhnt.s b/llvm/test/MC/AArch64/SVE2/addhnt.s index 9b69b71fe8bc6e..5c5bc96f7b9834 100644 --- a/llvm/test/MC/AArch64/SVE2/addhnt.s +++ b/llvm/test/MC/AArch64/SVE2/addhnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/addp.s b/llvm/test/MC/AArch64/SVE2/addp.s index 9e90f8997e3b25..5477424023b21d 100644 --- a/llvm/test/MC/AArch64/SVE2/addp.s +++ b/llvm/test/MC/AArch64/SVE2/addp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/aesd.s b/llvm/test/MC/AArch64/SVE2/aesd.s index 9d108136ae76e9..3172fbae327854 100644 --- a/llvm/test/MC/AArch64/SVE2/aesd.s +++ b/llvm/test/MC/AArch64/SVE2/aesd.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/aese.s b/llvm/test/MC/AArch64/SVE2/aese.s index b435d3378f0dc6..49592c79105215 100644 --- a/llvm/test/MC/AArch64/SVE2/aese.s +++ b/llvm/test/MC/AArch64/SVE2/aese.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/aesimc.s b/llvm/test/MC/AArch64/SVE2/aesimc.s index 32862590d56338..d70f13d30c0422 100644 --- a/llvm/test/MC/AArch64/SVE2/aesimc.s +++ b/llvm/test/MC/AArch64/SVE2/aesimc.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/aesmc.s b/llvm/test/MC/AArch64/SVE2/aesmc.s index d9cb9525fc57f3..bcb9811e099dd9 100644 --- a/llvm/test/MC/AArch64/SVE2/aesmc.s +++ b/llvm/test/MC/AArch64/SVE2/aesmc.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/bcax.s b/llvm/test/MC/AArch64/SVE2/bcax.s index 36752595c822d3..54311607d9f38b 100644 --- a/llvm/test/MC/AArch64/SVE2/bcax.s +++ b/llvm/test/MC/AArch64/SVE2/bcax.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/bdep.s b/llvm/test/MC/AArch64/SVE2/bdep.s index c8388baf9de8c9..04d2b7de795a89 100644 --- a/llvm/test/MC/AArch64/SVE2/bdep.s +++ b/llvm/test/MC/AArch64/SVE2/bdep.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-bitperm < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-bitperm - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/bext.s b/llvm/test/MC/AArch64/SVE2/bext.s index a9e7b3dbbc935c..80a42676223074 100644 --- a/llvm/test/MC/AArch64/SVE2/bext.s +++ b/llvm/test/MC/AArch64/SVE2/bext.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-bitperm < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-bitperm - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/bgrp.s b/llvm/test/MC/AArch64/SVE2/bgrp.s index d73c925c16ce7a..4e9f8b05a99ed9 100644 --- a/llvm/test/MC/AArch64/SVE2/bgrp.s +++ b/llvm/test/MC/AArch64/SVE2/bgrp.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-bitperm < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-bitperm - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/bsl.s b/llvm/test/MC/AArch64/SVE2/bsl.s index a3ec691c4fdfb9..55fdb48b47f245 100644 --- a/llvm/test/MC/AArch64/SVE2/bsl.s +++ b/llvm/test/MC/AArch64/SVE2/bsl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/bsl1n.s b/llvm/test/MC/AArch64/SVE2/bsl1n.s index 15438341b3affa..e1bedf19981b61 100644 --- a/llvm/test/MC/AArch64/SVE2/bsl1n.s +++ b/llvm/test/MC/AArch64/SVE2/bsl1n.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/bsl2n.s b/llvm/test/MC/AArch64/SVE2/bsl2n.s index 623f6e3605a2ff..35bdf55f28213f 100644 --- a/llvm/test/MC/AArch64/SVE2/bsl2n.s +++ b/llvm/test/MC/AArch64/SVE2/bsl2n.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/cadd.s b/llvm/test/MC/AArch64/SVE2/cadd.s index a27fc6c623bb70..9ba1544d362df9 100644 --- a/llvm/test/MC/AArch64/SVE2/cadd.s +++ b/llvm/test/MC/AArch64/SVE2/cadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/cdot.s b/llvm/test/MC/AArch64/SVE2/cdot.s index 1a9386fa486f89..dbd23d970c0bdc 100644 --- a/llvm/test/MC/AArch64/SVE2/cdot.s +++ b/llvm/test/MC/AArch64/SVE2/cdot.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/cmla.s b/llvm/test/MC/AArch64/SVE2/cmla.s index 4e93d1b384ca5f..7e9b96d32ea7b5 100644 --- a/llvm/test/MC/AArch64/SVE2/cmla.s +++ b/llvm/test/MC/AArch64/SVE2/cmla.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/eor3.s b/llvm/test/MC/AArch64/SVE2/eor3.s index f5ac2dba9ba05d..695b43791eba16 100644 --- a/llvm/test/MC/AArch64/SVE2/eor3.s +++ b/llvm/test/MC/AArch64/SVE2/eor3.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/eorbt.s b/llvm/test/MC/AArch64/SVE2/eorbt.s index 8f71c71a37f23f..e9bf7d9d9b8607 100644 --- a/llvm/test/MC/AArch64/SVE2/eorbt.s +++ b/llvm/test/MC/AArch64/SVE2/eorbt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/eortb.s b/llvm/test/MC/AArch64/SVE2/eortb.s index 1d3016e90c7bcd..0f35ef1b54d1b1 100644 --- a/llvm/test/MC/AArch64/SVE2/eortb.s +++ b/llvm/test/MC/AArch64/SVE2/eortb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ext.s b/llvm/test/MC/AArch64/SVE2/ext.s index 262b5ad0def275..e7849e3915f2d9 100644 --- a/llvm/test/MC/AArch64/SVE2/ext.s +++ b/llvm/test/MC/AArch64/SVE2/ext.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/faddp.s b/llvm/test/MC/AArch64/SVE2/faddp.s index 77b6dea6ff4566..e815402f76c70d 100644 --- a/llvm/test/MC/AArch64/SVE2/faddp.s +++ b/llvm/test/MC/AArch64/SVE2/faddp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fcvtlt.s b/llvm/test/MC/AArch64/SVE2/fcvtlt.s index 5120a74b19712f..d437863541bef3 100644 --- a/llvm/test/MC/AArch64/SVE2/fcvtlt.s +++ b/llvm/test/MC/AArch64/SVE2/fcvtlt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fcvtnt.s b/llvm/test/MC/AArch64/SVE2/fcvtnt.s index 1d314ef65ab1bb..e4c728bbbaac29 100644 --- a/llvm/test/MC/AArch64/SVE2/fcvtnt.s +++ b/llvm/test/MC/AArch64/SVE2/fcvtnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fcvtx.s b/llvm/test/MC/AArch64/SVE2/fcvtx.s index f22ba0f2a0bc10..6508456bee4abc 100644 --- a/llvm/test/MC/AArch64/SVE2/fcvtx.s +++ b/llvm/test/MC/AArch64/SVE2/fcvtx.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fcvtxnt.s b/llvm/test/MC/AArch64/SVE2/fcvtxnt.s index 66317d2e6f58fe..830110e46b3f2c 100644 --- a/llvm/test/MC/AArch64/SVE2/fcvtxnt.s +++ b/llvm/test/MC/AArch64/SVE2/fcvtxnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/flogb.s b/llvm/test/MC/AArch64/SVE2/flogb.s index 4e7e6a6668002a..507d7e059532c2 100644 --- a/llvm/test/MC/AArch64/SVE2/flogb.s +++ b/llvm/test/MC/AArch64/SVE2/flogb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fmaxnmp.s b/llvm/test/MC/AArch64/SVE2/fmaxnmp.s index a006ffbcb6bd4b..3bc2aa556566ec 100644 --- a/llvm/test/MC/AArch64/SVE2/fmaxnmp.s +++ b/llvm/test/MC/AArch64/SVE2/fmaxnmp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fmaxp.s b/llvm/test/MC/AArch64/SVE2/fmaxp.s index 0e507191445bc6..f24257099cef02 100644 --- a/llvm/test/MC/AArch64/SVE2/fmaxp.s +++ b/llvm/test/MC/AArch64/SVE2/fmaxp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fminnmp.s b/llvm/test/MC/AArch64/SVE2/fminnmp.s index 5a48e6dce466d0..fb7bf757549a46 100644 --- a/llvm/test/MC/AArch64/SVE2/fminnmp.s +++ b/llvm/test/MC/AArch64/SVE2/fminnmp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fminp.s b/llvm/test/MC/AArch64/SVE2/fminp.s index a7e5b3a3af15e3..57bf42210c68ff 100644 --- a/llvm/test/MC/AArch64/SVE2/fminp.s +++ b/llvm/test/MC/AArch64/SVE2/fminp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fmlalb.s b/llvm/test/MC/AArch64/SVE2/fmlalb.s index 02edbcf9e1b045..ef2f44f1876aab 100644 --- a/llvm/test/MC/AArch64/SVE2/fmlalb.s +++ b/llvm/test/MC/AArch64/SVE2/fmlalb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fmlalt.s b/llvm/test/MC/AArch64/SVE2/fmlalt.s index 953b05a2042b7e..692a3b9af99e4e 100644 --- a/llvm/test/MC/AArch64/SVE2/fmlalt.s +++ b/llvm/test/MC/AArch64/SVE2/fmlalt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fmlslb.s b/llvm/test/MC/AArch64/SVE2/fmlslb.s index e6db85233d176c..238ad17b4c8959 100644 --- a/llvm/test/MC/AArch64/SVE2/fmlslb.s +++ b/llvm/test/MC/AArch64/SVE2/fmlslb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/fmlslt.s b/llvm/test/MC/AArch64/SVE2/fmlslt.s index 96b1901d138f7d..d02184d711a4d6 100644 --- a/llvm/test/MC/AArch64/SVE2/fmlslt.s +++ b/llvm/test/MC/AArch64/SVE2/fmlslt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/histcnt.s b/llvm/test/MC/AArch64/SVE2/histcnt.s index ab2fc87f5821b6..d0d092734260ea 100644 --- a/llvm/test/MC/AArch64/SVE2/histcnt.s +++ b/llvm/test/MC/AArch64/SVE2/histcnt.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/histseg.s b/llvm/test/MC/AArch64/SVE2/histseg.s index 514d708321aa53..52192e6e3e3227 100644 --- a/llvm/test/MC/AArch64/SVE2/histseg.s +++ b/llvm/test/MC/AArch64/SVE2/histseg.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1b.s b/llvm/test/MC/AArch64/SVE2/ldnt1b.s index cbfceaddfc9a61..a94939a30acd93 100644 --- a/llvm/test/MC/AArch64/SVE2/ldnt1b.s +++ b/llvm/test/MC/AArch64/SVE2/ldnt1b.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1d.s b/llvm/test/MC/AArch64/SVE2/ldnt1d.s index 8c125773265dca..4ffb837b3dcc94 100644 --- a/llvm/test/MC/AArch64/SVE2/ldnt1d.s +++ b/llvm/test/MC/AArch64/SVE2/ldnt1d.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1h.s b/llvm/test/MC/AArch64/SVE2/ldnt1h.s index 253c8895e90e87..d9dcf06e1e441d 100644 --- a/llvm/test/MC/AArch64/SVE2/ldnt1h.s +++ b/llvm/test/MC/AArch64/SVE2/ldnt1h.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1sb.s b/llvm/test/MC/AArch64/SVE2/ldnt1sb.s index 8d7464391f5471..9ea60a8a02a4b2 100644 --- a/llvm/test/MC/AArch64/SVE2/ldnt1sb.s +++ b/llvm/test/MC/AArch64/SVE2/ldnt1sb.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1sh.s b/llvm/test/MC/AArch64/SVE2/ldnt1sh.s index b487d841e4c4b5..19afe13d43aca1 100644 --- a/llvm/test/MC/AArch64/SVE2/ldnt1sh.s +++ b/llvm/test/MC/AArch64/SVE2/ldnt1sh.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1sw.s b/llvm/test/MC/AArch64/SVE2/ldnt1sw.s index aa94ca3adca2f9..0b3d768bb99083 100644 --- a/llvm/test/MC/AArch64/SVE2/ldnt1sw.s +++ b/llvm/test/MC/AArch64/SVE2/ldnt1sw.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1w.s b/llvm/test/MC/AArch64/SVE2/ldnt1w.s index a43e0b9ee5a66b..de2deb92915d54 100644 --- a/llvm/test/MC/AArch64/SVE2/ldnt1w.s +++ b/llvm/test/MC/AArch64/SVE2/ldnt1w.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/match.s b/llvm/test/MC/AArch64/SVE2/match.s index 5289e180f75f1c..564d1caf11eef1 100644 --- a/llvm/test/MC/AArch64/SVE2/match.s +++ b/llvm/test/MC/AArch64/SVE2/match.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/mla.s b/llvm/test/MC/AArch64/SVE2/mla.s index ff5df0b9f4eced..c43a2684a95051 100644 --- a/llvm/test/MC/AArch64/SVE2/mla.s +++ b/llvm/test/MC/AArch64/SVE2/mla.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/mls.s b/llvm/test/MC/AArch64/SVE2/mls.s index c4ef42da09d19f..9aa9587d966229 100644 --- a/llvm/test/MC/AArch64/SVE2/mls.s +++ b/llvm/test/MC/AArch64/SVE2/mls.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/mul.s b/llvm/test/MC/AArch64/SVE2/mul.s index 120ece160176a1..184b7fefd1dafd 100644 --- a/llvm/test/MC/AArch64/SVE2/mul.s +++ b/llvm/test/MC/AArch64/SVE2/mul.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/nbsl.s b/llvm/test/MC/AArch64/SVE2/nbsl.s index af6ad419f02392..e1c3eaf67f0f18 100644 --- a/llvm/test/MC/AArch64/SVE2/nbsl.s +++ b/llvm/test/MC/AArch64/SVE2/nbsl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/nmatch.s b/llvm/test/MC/AArch64/SVE2/nmatch.s index 11cafbac73f6be..2d298abb749483 100644 --- a/llvm/test/MC/AArch64/SVE2/nmatch.s +++ b/llvm/test/MC/AArch64/SVE2/nmatch.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/pmul.s b/llvm/test/MC/AArch64/SVE2/pmul.s index 20599a9902de8b..db12120f7fa553 100644 --- a/llvm/test/MC/AArch64/SVE2/pmul.s +++ b/llvm/test/MC/AArch64/SVE2/pmul.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/pmullb-128.s b/llvm/test/MC/AArch64/SVE2/pmullb-128.s index 0db1b735612002..2c4f43131fccf6 100644 --- a/llvm/test/MC/AArch64/SVE2/pmullb-128.s +++ b/llvm/test/MC/AArch64/SVE2/pmullb-128.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/pmullb.s b/llvm/test/MC/AArch64/SVE2/pmullb.s index edd2a0907d37fc..c0c56854c0acaf 100644 --- a/llvm/test/MC/AArch64/SVE2/pmullb.s +++ b/llvm/test/MC/AArch64/SVE2/pmullb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/pmullt-128.s b/llvm/test/MC/AArch64/SVE2/pmullt-128.s index 68da4f2c81fe95..54f175d4ca217e 100644 --- a/llvm/test/MC/AArch64/SVE2/pmullt-128.s +++ b/llvm/test/MC/AArch64/SVE2/pmullt-128.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/pmullt.s b/llvm/test/MC/AArch64/SVE2/pmullt.s index 4a2328edf4e1ac..5c7ad12a4a4df7 100644 --- a/llvm/test/MC/AArch64/SVE2/pmullt.s +++ b/llvm/test/MC/AArch64/SVE2/pmullt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/raddhnb.s b/llvm/test/MC/AArch64/SVE2/raddhnb.s index 8f7678593a6863..c7ada1bc12da6a 100644 --- a/llvm/test/MC/AArch64/SVE2/raddhnb.s +++ b/llvm/test/MC/AArch64/SVE2/raddhnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/raddhnt.s b/llvm/test/MC/AArch64/SVE2/raddhnt.s index 2c8d7fa2c9592c..61a6c2bf5c04c0 100644 --- a/llvm/test/MC/AArch64/SVE2/raddhnt.s +++ b/llvm/test/MC/AArch64/SVE2/raddhnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/rax1.s b/llvm/test/MC/AArch64/SVE2/rax1.s index 6481f465fa295e..1a9a5d52a7cd30 100644 --- a/llvm/test/MC/AArch64/SVE2/rax1.s +++ b/llvm/test/MC/AArch64/SVE2/rax1.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-sha3 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-sha3 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/rshrnb.s b/llvm/test/MC/AArch64/SVE2/rshrnb.s index aa54e46d9b5f91..954e2dd028c1bb 100644 --- a/llvm/test/MC/AArch64/SVE2/rshrnb.s +++ b/llvm/test/MC/AArch64/SVE2/rshrnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/rshrnt.s b/llvm/test/MC/AArch64/SVE2/rshrnt.s index f5ad3df778e4d0..af5ed79055316f 100644 --- a/llvm/test/MC/AArch64/SVE2/rshrnt.s +++ b/llvm/test/MC/AArch64/SVE2/rshrnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/rsubhnb.s b/llvm/test/MC/AArch64/SVE2/rsubhnb.s index 5804679c55fea5..2da9065b02bbc2 100644 --- a/llvm/test/MC/AArch64/SVE2/rsubhnb.s +++ b/llvm/test/MC/AArch64/SVE2/rsubhnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/rsubhnt.s b/llvm/test/MC/AArch64/SVE2/rsubhnt.s index 7aa7dcd15e92bd..6f8923e057e9fc 100644 --- a/llvm/test/MC/AArch64/SVE2/rsubhnt.s +++ b/llvm/test/MC/AArch64/SVE2/rsubhnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/saba.s b/llvm/test/MC/AArch64/SVE2/saba.s index c07649fdcd25eb..142bc80d0490ae 100644 --- a/llvm/test/MC/AArch64/SVE2/saba.s +++ b/llvm/test/MC/AArch64/SVE2/saba.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sabalb.s b/llvm/test/MC/AArch64/SVE2/sabalb.s index 18ccaa0626c1df..2612eb631a54f1 100644 --- a/llvm/test/MC/AArch64/SVE2/sabalb.s +++ b/llvm/test/MC/AArch64/SVE2/sabalb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sabalt.s b/llvm/test/MC/AArch64/SVE2/sabalt.s index 59597f67d70311..829ea8db708596 100644 --- a/llvm/test/MC/AArch64/SVE2/sabalt.s +++ b/llvm/test/MC/AArch64/SVE2/sabalt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sabdlb.s b/llvm/test/MC/AArch64/SVE2/sabdlb.s index e651430aed3642..492a697f812851 100644 --- a/llvm/test/MC/AArch64/SVE2/sabdlb.s +++ b/llvm/test/MC/AArch64/SVE2/sabdlb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sabdlt.s b/llvm/test/MC/AArch64/SVE2/sabdlt.s index 6682d1b6fb3f82..61fe7befd605c2 100644 --- a/llvm/test/MC/AArch64/SVE2/sabdlt.s +++ b/llvm/test/MC/AArch64/SVE2/sabdlt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sadalp.s b/llvm/test/MC/AArch64/SVE2/sadalp.s index a7e4f6a6ae8feb..082f794416b785 100644 --- a/llvm/test/MC/AArch64/SVE2/sadalp.s +++ b/llvm/test/MC/AArch64/SVE2/sadalp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/saddlb.s b/llvm/test/MC/AArch64/SVE2/saddlb.s index b425afe8bf0cea..4ecc05bc7e9aa4 100644 --- a/llvm/test/MC/AArch64/SVE2/saddlb.s +++ b/llvm/test/MC/AArch64/SVE2/saddlb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/saddlbt.s b/llvm/test/MC/AArch64/SVE2/saddlbt.s index 1f285f70567ee0..bd0215ab606623 100644 --- a/llvm/test/MC/AArch64/SVE2/saddlbt.s +++ b/llvm/test/MC/AArch64/SVE2/saddlbt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/saddlt.s b/llvm/test/MC/AArch64/SVE2/saddlt.s index ec481e873a0bd4..8ecd5007e0acc3 100644 --- a/llvm/test/MC/AArch64/SVE2/saddlt.s +++ b/llvm/test/MC/AArch64/SVE2/saddlt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/saddwb.s b/llvm/test/MC/AArch64/SVE2/saddwb.s index a8a7f643af3128..b02cdc1fbc797b 100644 --- a/llvm/test/MC/AArch64/SVE2/saddwb.s +++ b/llvm/test/MC/AArch64/SVE2/saddwb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/saddwt.s b/llvm/test/MC/AArch64/SVE2/saddwt.s index f1eeacc36e2ad3..5c3f594800763f 100644 --- a/llvm/test/MC/AArch64/SVE2/saddwt.s +++ b/llvm/test/MC/AArch64/SVE2/saddwt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sbclb.s b/llvm/test/MC/AArch64/SVE2/sbclb.s index 36c9f61b7d0864..3bc2dab47ee5cd 100644 --- a/llvm/test/MC/AArch64/SVE2/sbclb.s +++ b/llvm/test/MC/AArch64/SVE2/sbclb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sbclt.s b/llvm/test/MC/AArch64/SVE2/sbclt.s index 056a6a94e7d129..cce571738bb754 100644 --- a/llvm/test/MC/AArch64/SVE2/sbclt.s +++ b/llvm/test/MC/AArch64/SVE2/sbclt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/shadd.s b/llvm/test/MC/AArch64/SVE2/shadd.s index d54cbe6ebc70e7..c62429005b9752 100644 --- a/llvm/test/MC/AArch64/SVE2/shadd.s +++ b/llvm/test/MC/AArch64/SVE2/shadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/shrnb.s b/llvm/test/MC/AArch64/SVE2/shrnb.s index 61386522d63af0..2fb938cb7058e9 100644 --- a/llvm/test/MC/AArch64/SVE2/shrnb.s +++ b/llvm/test/MC/AArch64/SVE2/shrnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/shrnt.s b/llvm/test/MC/AArch64/SVE2/shrnt.s index 070646d51a69cb..523b928fdf56fd 100644 --- a/llvm/test/MC/AArch64/SVE2/shrnt.s +++ b/llvm/test/MC/AArch64/SVE2/shrnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/shsub.s b/llvm/test/MC/AArch64/SVE2/shsub.s index 1b9f8740e7856e..ec951e513edb44 100644 --- a/llvm/test/MC/AArch64/SVE2/shsub.s +++ b/llvm/test/MC/AArch64/SVE2/shsub.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/shsubr.s b/llvm/test/MC/AArch64/SVE2/shsubr.s index 61251738686c0a..ef43a2d35c5ae1 100644 --- a/llvm/test/MC/AArch64/SVE2/shsubr.s +++ b/llvm/test/MC/AArch64/SVE2/shsubr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sli.s b/llvm/test/MC/AArch64/SVE2/sli.s index a835eadfd6516f..c9fc4f5cc01cd0 100644 --- a/llvm/test/MC/AArch64/SVE2/sli.s +++ b/llvm/test/MC/AArch64/SVE2/sli.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sm4e.s b/llvm/test/MC/AArch64/SVE2/sm4e.s index 5d252ec2008112..064fbf396eee03 100644 --- a/llvm/test/MC/AArch64/SVE2/sm4e.s +++ b/llvm/test/MC/AArch64/SVE2/sm4e.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-sm4 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-sm4 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/sm4ekey.s b/llvm/test/MC/AArch64/SVE2/sm4ekey.s index 254539a11ed8c0..62673ed48eb9bb 100644 --- a/llvm/test/MC/AArch64/SVE2/sm4ekey.s +++ b/llvm/test/MC/AArch64/SVE2/sm4ekey.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-sm4 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2-sm4 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/smaxp.s b/llvm/test/MC/AArch64/SVE2/smaxp.s index a28af2e33c6a68..b7de4a6665bcdb 100644 --- a/llvm/test/MC/AArch64/SVE2/smaxp.s +++ b/llvm/test/MC/AArch64/SVE2/smaxp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sminp.s b/llvm/test/MC/AArch64/SVE2/sminp.s index 60f9eebe751b25..2915230b029df3 100644 --- a/llvm/test/MC/AArch64/SVE2/sminp.s +++ b/llvm/test/MC/AArch64/SVE2/sminp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/smlalb.s b/llvm/test/MC/AArch64/SVE2/smlalb.s index d0d173185f48fc..724c480ee284aa 100644 --- a/llvm/test/MC/AArch64/SVE2/smlalb.s +++ b/llvm/test/MC/AArch64/SVE2/smlalb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/smlalt.s b/llvm/test/MC/AArch64/SVE2/smlalt.s index 6358271bcf342b..e35ce36e667249 100644 --- a/llvm/test/MC/AArch64/SVE2/smlalt.s +++ b/llvm/test/MC/AArch64/SVE2/smlalt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/smlslb.s b/llvm/test/MC/AArch64/SVE2/smlslb.s index b73fe155bd67c8..41956d0ac42a28 100644 --- a/llvm/test/MC/AArch64/SVE2/smlslb.s +++ b/llvm/test/MC/AArch64/SVE2/smlslb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/smlslt.s b/llvm/test/MC/AArch64/SVE2/smlslt.s index 689708d99ebd3b..f88be00b8d1303 100644 --- a/llvm/test/MC/AArch64/SVE2/smlslt.s +++ b/llvm/test/MC/AArch64/SVE2/smlslt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/smulh.s b/llvm/test/MC/AArch64/SVE2/smulh.s index dcb3696234438d..0e31bfb1917938 100644 --- a/llvm/test/MC/AArch64/SVE2/smulh.s +++ b/llvm/test/MC/AArch64/SVE2/smulh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/smullb.s b/llvm/test/MC/AArch64/SVE2/smullb.s index 78299c9e156cac..84d848413da841 100644 --- a/llvm/test/MC/AArch64/SVE2/smullb.s +++ b/llvm/test/MC/AArch64/SVE2/smullb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/smullt.s b/llvm/test/MC/AArch64/SVE2/smullt.s index f25a510d978f72..774443afc3afc8 100644 --- a/llvm/test/MC/AArch64/SVE2/smullt.s +++ b/llvm/test/MC/AArch64/SVE2/smullt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/splice.s b/llvm/test/MC/AArch64/SVE2/splice.s index d23071ad600e15..34b5fe33674438 100644 --- a/llvm/test/MC/AArch64/SVE2/splice.s +++ b/llvm/test/MC/AArch64/SVE2/splice.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqabs.s b/llvm/test/MC/AArch64/SVE2/sqabs.s index 437819b79031c8..574961c834b891 100644 --- a/llvm/test/MC/AArch64/SVE2/sqabs.s +++ b/llvm/test/MC/AArch64/SVE2/sqabs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqadd.s b/llvm/test/MC/AArch64/SVE2/sqadd.s index 8e76ec6c78b5ea..d08c802dd06ffe 100644 --- a/llvm/test/MC/AArch64/SVE2/sqadd.s +++ b/llvm/test/MC/AArch64/SVE2/sqadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqcadd.s b/llvm/test/MC/AArch64/SVE2/sqcadd.s index 0349ddda6b1242..01e6b7222de64f 100644 --- a/llvm/test/MC/AArch64/SVE2/sqcadd.s +++ b/llvm/test/MC/AArch64/SVE2/sqcadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqdmlalb.s b/llvm/test/MC/AArch64/SVE2/sqdmlalb.s index 7d05dcaf4cf008..8c54b1e4d14d7f 100644 --- a/llvm/test/MC/AArch64/SVE2/sqdmlalb.s +++ b/llvm/test/MC/AArch64/SVE2/sqdmlalb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqdmlalbt.s b/llvm/test/MC/AArch64/SVE2/sqdmlalbt.s index 1fabef483cb1d7..7250d95686b22f 100644 --- a/llvm/test/MC/AArch64/SVE2/sqdmlalbt.s +++ b/llvm/test/MC/AArch64/SVE2/sqdmlalbt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqdmlalt.s b/llvm/test/MC/AArch64/SVE2/sqdmlalt.s index c88da9931a7448..b3339853923256 100644 --- a/llvm/test/MC/AArch64/SVE2/sqdmlalt.s +++ b/llvm/test/MC/AArch64/SVE2/sqdmlalt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqdmlslb.s b/llvm/test/MC/AArch64/SVE2/sqdmlslb.s index 69937c8ef2444f..1cb3774debf281 100644 --- a/llvm/test/MC/AArch64/SVE2/sqdmlslb.s +++ b/llvm/test/MC/AArch64/SVE2/sqdmlslb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqdmlslbt.s b/llvm/test/MC/AArch64/SVE2/sqdmlslbt.s index a166523ae67fa5..18c5ecc36fc802 100644 --- a/llvm/test/MC/AArch64/SVE2/sqdmlslbt.s +++ b/llvm/test/MC/AArch64/SVE2/sqdmlslbt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqdmlslt.s b/llvm/test/MC/AArch64/SVE2/sqdmlslt.s index 1eb7d15dd5c985..986f8f49b588c2 100644 --- a/llvm/test/MC/AArch64/SVE2/sqdmlslt.s +++ b/llvm/test/MC/AArch64/SVE2/sqdmlslt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqdmulh.s b/llvm/test/MC/AArch64/SVE2/sqdmulh.s index e833f4c3bcfd98..9f6c0a6069f5ac 100644 --- a/llvm/test/MC/AArch64/SVE2/sqdmulh.s +++ b/llvm/test/MC/AArch64/SVE2/sqdmulh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqdmullb.s b/llvm/test/MC/AArch64/SVE2/sqdmullb.s index 6b06b2515186b2..272a9719c8040d 100644 --- a/llvm/test/MC/AArch64/SVE2/sqdmullb.s +++ b/llvm/test/MC/AArch64/SVE2/sqdmullb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqdmullt.s b/llvm/test/MC/AArch64/SVE2/sqdmullt.s index a051d3fefd2b88..7bcce2b833e826 100644 --- a/llvm/test/MC/AArch64/SVE2/sqdmullt.s +++ b/llvm/test/MC/AArch64/SVE2/sqdmullt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqneg.s b/llvm/test/MC/AArch64/SVE2/sqneg.s index 42d84836425a4f..21c56b753212a3 100644 --- a/llvm/test/MC/AArch64/SVE2/sqneg.s +++ b/llvm/test/MC/AArch64/SVE2/sqneg.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrdcmlah.s b/llvm/test/MC/AArch64/SVE2/sqrdcmlah.s index 267ac0b4cadbc6..81af1dc58c9152 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrdcmlah.s +++ b/llvm/test/MC/AArch64/SVE2/sqrdcmlah.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrdmlah.s b/llvm/test/MC/AArch64/SVE2/sqrdmlah.s index af31b9b9b64be8..d7ccc9ce0199f2 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrdmlah.s +++ b/llvm/test/MC/AArch64/SVE2/sqrdmlah.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrdmlsh.s b/llvm/test/MC/AArch64/SVE2/sqrdmlsh.s index 25d7b2efd8527a..050b02245ded67 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrdmlsh.s +++ b/llvm/test/MC/AArch64/SVE2/sqrdmlsh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrdmulh.s b/llvm/test/MC/AArch64/SVE2/sqrdmulh.s index 3791327d40abeb..8dbc2558150179 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrdmulh.s +++ b/llvm/test/MC/AArch64/SVE2/sqrdmulh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrshl.s b/llvm/test/MC/AArch64/SVE2/sqrshl.s index 0a8c9514a6a66d..0cb431fba3eaaf 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrshl.s +++ b/llvm/test/MC/AArch64/SVE2/sqrshl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrshlr.s b/llvm/test/MC/AArch64/SVE2/sqrshlr.s index 70644cfea42f6b..bcbaf28214e0d6 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrshlr.s +++ b/llvm/test/MC/AArch64/SVE2/sqrshlr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrnb.s b/llvm/test/MC/AArch64/SVE2/sqrshrnb.s index 9dde9933121554..d53aa99964f69c 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrshrnb.s +++ b/llvm/test/MC/AArch64/SVE2/sqrshrnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrnt.s b/llvm/test/MC/AArch64/SVE2/sqrshrnt.s index afd518ea41c02f..f5a609f8b5dc5f 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrshrnt.s +++ b/llvm/test/MC/AArch64/SVE2/sqrshrnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrunb.s b/llvm/test/MC/AArch64/SVE2/sqrshrunb.s index 471271cb36defa..12ad7a8d9e8220 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrshrunb.s +++ b/llvm/test/MC/AArch64/SVE2/sqrshrunb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrunt.s b/llvm/test/MC/AArch64/SVE2/sqrshrunt.s index 1c71e5240ce8b0..9314d45894e6b5 100644 --- a/llvm/test/MC/AArch64/SVE2/sqrshrunt.s +++ b/llvm/test/MC/AArch64/SVE2/sqrshrunt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqshl.s b/llvm/test/MC/AArch64/SVE2/sqshl.s index 9548d99ba704b8..9c409c2dd8b755 100644 --- a/llvm/test/MC/AArch64/SVE2/sqshl.s +++ b/llvm/test/MC/AArch64/SVE2/sqshl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqshlr.s b/llvm/test/MC/AArch64/SVE2/sqshlr.s index 819ea1757237d2..07867121f32837 100644 --- a/llvm/test/MC/AArch64/SVE2/sqshlr.s +++ b/llvm/test/MC/AArch64/SVE2/sqshlr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqshlu.s b/llvm/test/MC/AArch64/SVE2/sqshlu.s index cf4138027929c3..13667f6021806f 100644 --- a/llvm/test/MC/AArch64/SVE2/sqshlu.s +++ b/llvm/test/MC/AArch64/SVE2/sqshlu.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqshrnb.s b/llvm/test/MC/AArch64/SVE2/sqshrnb.s index ffe672501fb568..30e5c17e2f3d6b 100644 --- a/llvm/test/MC/AArch64/SVE2/sqshrnb.s +++ b/llvm/test/MC/AArch64/SVE2/sqshrnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqshrnt.s b/llvm/test/MC/AArch64/SVE2/sqshrnt.s index 24bfcf4f829cca..71f731d17c32e0 100644 --- a/llvm/test/MC/AArch64/SVE2/sqshrnt.s +++ b/llvm/test/MC/AArch64/SVE2/sqshrnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqshrunb.s b/llvm/test/MC/AArch64/SVE2/sqshrunb.s index 8811900c5ed27d..a48d270ca0b3a1 100644 --- a/llvm/test/MC/AArch64/SVE2/sqshrunb.s +++ b/llvm/test/MC/AArch64/SVE2/sqshrunb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqshrunt.s b/llvm/test/MC/AArch64/SVE2/sqshrunt.s index 50005c2badc7d3..4d5f1977d1aa6f 100644 --- a/llvm/test/MC/AArch64/SVE2/sqshrunt.s +++ b/llvm/test/MC/AArch64/SVE2/sqshrunt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqsub.s b/llvm/test/MC/AArch64/SVE2/sqsub.s index 4717bb3fc2d3bf..4ab71b00bb7d30 100644 --- a/llvm/test/MC/AArch64/SVE2/sqsub.s +++ b/llvm/test/MC/AArch64/SVE2/sqsub.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqsubr.s b/llvm/test/MC/AArch64/SVE2/sqsubr.s index 6b911b6b584484..34532f90184b95 100644 --- a/llvm/test/MC/AArch64/SVE2/sqsubr.s +++ b/llvm/test/MC/AArch64/SVE2/sqsubr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqxtnb.s b/llvm/test/MC/AArch64/SVE2/sqxtnb.s index 95a7eb6ce167f9..2cb979336bbfb4 100644 --- a/llvm/test/MC/AArch64/SVE2/sqxtnb.s +++ b/llvm/test/MC/AArch64/SVE2/sqxtnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqxtnt.s b/llvm/test/MC/AArch64/SVE2/sqxtnt.s index 7db4905d71c317..17124ea11322b6 100644 --- a/llvm/test/MC/AArch64/SVE2/sqxtnt.s +++ b/llvm/test/MC/AArch64/SVE2/sqxtnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqxtunb.s b/llvm/test/MC/AArch64/SVE2/sqxtunb.s index 6dcecc8646c70f..df20b9ce0f2fc1 100644 --- a/llvm/test/MC/AArch64/SVE2/sqxtunb.s +++ b/llvm/test/MC/AArch64/SVE2/sqxtunb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sqxtunt.s b/llvm/test/MC/AArch64/SVE2/sqxtunt.s index 2ba103a66fa287..3ac584e2b286bb 100644 --- a/llvm/test/MC/AArch64/SVE2/sqxtunt.s +++ b/llvm/test/MC/AArch64/SVE2/sqxtunt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/srhadd.s b/llvm/test/MC/AArch64/SVE2/srhadd.s index 5d0375a7f8a219..1d342f1042dee4 100644 --- a/llvm/test/MC/AArch64/SVE2/srhadd.s +++ b/llvm/test/MC/AArch64/SVE2/srhadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sri.s b/llvm/test/MC/AArch64/SVE2/sri.s index 107bae5267502f..54fdccff629b94 100644 --- a/llvm/test/MC/AArch64/SVE2/sri.s +++ b/llvm/test/MC/AArch64/SVE2/sri.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/srshl.s b/llvm/test/MC/AArch64/SVE2/srshl.s index 323ce653beee77..349297467eeab1 100644 --- a/llvm/test/MC/AArch64/SVE2/srshl.s +++ b/llvm/test/MC/AArch64/SVE2/srshl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/srshlr.s b/llvm/test/MC/AArch64/SVE2/srshlr.s index e2f488f1d262c8..5c4ee8f14a7d1a 100644 --- a/llvm/test/MC/AArch64/SVE2/srshlr.s +++ b/llvm/test/MC/AArch64/SVE2/srshlr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/srshr.s b/llvm/test/MC/AArch64/SVE2/srshr.s index 6a5bab3c882926..2771769b6d1727 100644 --- a/llvm/test/MC/AArch64/SVE2/srshr.s +++ b/llvm/test/MC/AArch64/SVE2/srshr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/srsra.s b/llvm/test/MC/AArch64/SVE2/srsra.s index ef6f95efa0ef31..3d01a3a979b255 100644 --- a/llvm/test/MC/AArch64/SVE2/srsra.s +++ b/llvm/test/MC/AArch64/SVE2/srsra.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sshllb.s b/llvm/test/MC/AArch64/SVE2/sshllb.s index 8b486050b9f08c..f8685ee06ebae5 100644 --- a/llvm/test/MC/AArch64/SVE2/sshllb.s +++ b/llvm/test/MC/AArch64/SVE2/sshllb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/sshllt.s b/llvm/test/MC/AArch64/SVE2/sshllt.s index 70e766ff466801..828c1251092afe 100644 --- a/llvm/test/MC/AArch64/SVE2/sshllt.s +++ b/llvm/test/MC/AArch64/SVE2/sshllt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ssra.s b/llvm/test/MC/AArch64/SVE2/ssra.s index ab836a1b8a6290..49b99d2a25eed8 100644 --- a/llvm/test/MC/AArch64/SVE2/ssra.s +++ b/llvm/test/MC/AArch64/SVE2/ssra.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ssublb.s b/llvm/test/MC/AArch64/SVE2/ssublb.s index c006b0b2758a6c..924b135d28bef9 100644 --- a/llvm/test/MC/AArch64/SVE2/ssublb.s +++ b/llvm/test/MC/AArch64/SVE2/ssublb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ssublbt.s b/llvm/test/MC/AArch64/SVE2/ssublbt.s index c9fd11f35f932c..584decded4bbcc 100644 --- a/llvm/test/MC/AArch64/SVE2/ssublbt.s +++ b/llvm/test/MC/AArch64/SVE2/ssublbt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ssublt.s b/llvm/test/MC/AArch64/SVE2/ssublt.s index d92135ea3abc36..bb2e6a77fe7159 100644 --- a/llvm/test/MC/AArch64/SVE2/ssublt.s +++ b/llvm/test/MC/AArch64/SVE2/ssublt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ssubltb.s b/llvm/test/MC/AArch64/SVE2/ssubltb.s index 889ea63a9b6e5f..4eaa3ff66fd2ea 100644 --- a/llvm/test/MC/AArch64/SVE2/ssubltb.s +++ b/llvm/test/MC/AArch64/SVE2/ssubltb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ssubwb.s b/llvm/test/MC/AArch64/SVE2/ssubwb.s index ae625fc83d8ab2..f0bbbbddcffe6b 100644 --- a/llvm/test/MC/AArch64/SVE2/ssubwb.s +++ b/llvm/test/MC/AArch64/SVE2/ssubwb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ssubwt.s b/llvm/test/MC/AArch64/SVE2/ssubwt.s index 13bad073db887c..115d68e15b7102 100644 --- a/llvm/test/MC/AArch64/SVE2/ssubwt.s +++ b/llvm/test/MC/AArch64/SVE2/ssubwt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/stnt1b.s b/llvm/test/MC/AArch64/SVE2/stnt1b.s index cdbd2d489d950f..3725c4443a586e 100644 --- a/llvm/test/MC/AArch64/SVE2/stnt1b.s +++ b/llvm/test/MC/AArch64/SVE2/stnt1b.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/stnt1d.s b/llvm/test/MC/AArch64/SVE2/stnt1d.s index 346f4a9e22b49a..453548b5eb459e 100644 --- a/llvm/test/MC/AArch64/SVE2/stnt1d.s +++ b/llvm/test/MC/AArch64/SVE2/stnt1d.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/stnt1h.s b/llvm/test/MC/AArch64/SVE2/stnt1h.s index 63b1c3e90ff984..ba604bc03ae49f 100644 --- a/llvm/test/MC/AArch64/SVE2/stnt1h.s +++ b/llvm/test/MC/AArch64/SVE2/stnt1h.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/stnt1w.s b/llvm/test/MC/AArch64/SVE2/stnt1w.s index 6aa7a0b39174a2..e95b6a252abe7a 100644 --- a/llvm/test/MC/AArch64/SVE2/stnt1w.s +++ b/llvm/test/MC/AArch64/SVE2/stnt1w.s @@ -2,7 +2,7 @@ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s 2>&1 \ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST diff --git a/llvm/test/MC/AArch64/SVE2/subhnb.s b/llvm/test/MC/AArch64/SVE2/subhnb.s index 8a3a434a158dd0..041ac90e5dc455 100644 --- a/llvm/test/MC/AArch64/SVE2/subhnb.s +++ b/llvm/test/MC/AArch64/SVE2/subhnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/subhnt.s b/llvm/test/MC/AArch64/SVE2/subhnt.s index ec3599dffb459f..92614f704f90bc 100644 --- a/llvm/test/MC/AArch64/SVE2/subhnt.s +++ b/llvm/test/MC/AArch64/SVE2/subhnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/suqadd.s b/llvm/test/MC/AArch64/SVE2/suqadd.s index e450fa7e20179f..b210773c83cd56 100644 --- a/llvm/test/MC/AArch64/SVE2/suqadd.s +++ b/llvm/test/MC/AArch64/SVE2/suqadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/tbl.s b/llvm/test/MC/AArch64/SVE2/tbl.s index e3f764a78acd70..0c52a20f42acfa 100644 --- a/llvm/test/MC/AArch64/SVE2/tbl.s +++ b/llvm/test/MC/AArch64/SVE2/tbl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/tbx.s b/llvm/test/MC/AArch64/SVE2/tbx.s index 45f3c5d0ce2a60..0596b51a054a6b 100644 --- a/llvm/test/MC/AArch64/SVE2/tbx.s +++ b/llvm/test/MC/AArch64/SVE2/tbx.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uaba.s b/llvm/test/MC/AArch64/SVE2/uaba.s index ff241366f78799..0fec7521d9d72e 100644 --- a/llvm/test/MC/AArch64/SVE2/uaba.s +++ b/llvm/test/MC/AArch64/SVE2/uaba.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uabalb.s b/llvm/test/MC/AArch64/SVE2/uabalb.s index 964b9f79edd788..b16b1b99961b41 100644 --- a/llvm/test/MC/AArch64/SVE2/uabalb.s +++ b/llvm/test/MC/AArch64/SVE2/uabalb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uabalt.s b/llvm/test/MC/AArch64/SVE2/uabalt.s index db529fed0f0a2f..9483adeb76aafa 100644 --- a/llvm/test/MC/AArch64/SVE2/uabalt.s +++ b/llvm/test/MC/AArch64/SVE2/uabalt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uabdlb.s b/llvm/test/MC/AArch64/SVE2/uabdlb.s index d38b1b0db43031..4d5aea0f5d2ef0 100644 --- a/llvm/test/MC/AArch64/SVE2/uabdlb.s +++ b/llvm/test/MC/AArch64/SVE2/uabdlb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uabdlt.s b/llvm/test/MC/AArch64/SVE2/uabdlt.s index 70b746e88bb99b..04a71f26325c3a 100644 --- a/llvm/test/MC/AArch64/SVE2/uabdlt.s +++ b/llvm/test/MC/AArch64/SVE2/uabdlt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uadalp.s b/llvm/test/MC/AArch64/SVE2/uadalp.s index 31d2d8ace3373e..8f044824ebed10 100644 --- a/llvm/test/MC/AArch64/SVE2/uadalp.s +++ b/llvm/test/MC/AArch64/SVE2/uadalp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uaddlb.s b/llvm/test/MC/AArch64/SVE2/uaddlb.s index 9f44ab38e99cc6..482e2c3d7b3ea4 100644 --- a/llvm/test/MC/AArch64/SVE2/uaddlb.s +++ b/llvm/test/MC/AArch64/SVE2/uaddlb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uaddlt.s b/llvm/test/MC/AArch64/SVE2/uaddlt.s index 19626f08aae641..4d7e7a32a4ef6b 100644 --- a/llvm/test/MC/AArch64/SVE2/uaddlt.s +++ b/llvm/test/MC/AArch64/SVE2/uaddlt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uaddwb.s b/llvm/test/MC/AArch64/SVE2/uaddwb.s index ce7395152ddd33..7334516176137b 100644 --- a/llvm/test/MC/AArch64/SVE2/uaddwb.s +++ b/llvm/test/MC/AArch64/SVE2/uaddwb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uaddwt.s b/llvm/test/MC/AArch64/SVE2/uaddwt.s index 7adab15d8a14d2..668fccbe997c93 100644 --- a/llvm/test/MC/AArch64/SVE2/uaddwt.s +++ b/llvm/test/MC/AArch64/SVE2/uaddwt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uhadd.s b/llvm/test/MC/AArch64/SVE2/uhadd.s index 5fbffc13f4cd79..277d08956558e1 100644 --- a/llvm/test/MC/AArch64/SVE2/uhadd.s +++ b/llvm/test/MC/AArch64/SVE2/uhadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uhsub.s b/llvm/test/MC/AArch64/SVE2/uhsub.s index 1a12620c940f83..650b18270250ab 100644 --- a/llvm/test/MC/AArch64/SVE2/uhsub.s +++ b/llvm/test/MC/AArch64/SVE2/uhsub.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uhsubr.s b/llvm/test/MC/AArch64/SVE2/uhsubr.s index d8eeb4124527bf..101eaeb0945a90 100644 --- a/llvm/test/MC/AArch64/SVE2/uhsubr.s +++ b/llvm/test/MC/AArch64/SVE2/uhsubr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/umaxp.s b/llvm/test/MC/AArch64/SVE2/umaxp.s index 65e9555a6e02ad..305341d371f5f5 100644 --- a/llvm/test/MC/AArch64/SVE2/umaxp.s +++ b/llvm/test/MC/AArch64/SVE2/umaxp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uminp.s b/llvm/test/MC/AArch64/SVE2/uminp.s index 3b9033f4758cc3..bf47d30e756ae7 100644 --- a/llvm/test/MC/AArch64/SVE2/uminp.s +++ b/llvm/test/MC/AArch64/SVE2/uminp.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/umlalb.s b/llvm/test/MC/AArch64/SVE2/umlalb.s index 82660736b385e4..b48a692c704010 100644 --- a/llvm/test/MC/AArch64/SVE2/umlalb.s +++ b/llvm/test/MC/AArch64/SVE2/umlalb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/umlalt.s b/llvm/test/MC/AArch64/SVE2/umlalt.s index 4077df70545f57..989d33748b0d74 100644 --- a/llvm/test/MC/AArch64/SVE2/umlalt.s +++ b/llvm/test/MC/AArch64/SVE2/umlalt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/umlslb.s b/llvm/test/MC/AArch64/SVE2/umlslb.s index 9dd1f977bb50d0..824232e5ff2efa 100644 --- a/llvm/test/MC/AArch64/SVE2/umlslb.s +++ b/llvm/test/MC/AArch64/SVE2/umlslb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/umlslt.s b/llvm/test/MC/AArch64/SVE2/umlslt.s index 3f8de7dff2cad5..a112adb0db3d2d 100644 --- a/llvm/test/MC/AArch64/SVE2/umlslt.s +++ b/llvm/test/MC/AArch64/SVE2/umlslt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/umulh.s b/llvm/test/MC/AArch64/SVE2/umulh.s index 749c27571efa1f..d1c0ed976ebf1d 100644 --- a/llvm/test/MC/AArch64/SVE2/umulh.s +++ b/llvm/test/MC/AArch64/SVE2/umulh.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/umullb.s b/llvm/test/MC/AArch64/SVE2/umullb.s index 37cd499cd2e1b7..15846bd97e640d 100644 --- a/llvm/test/MC/AArch64/SVE2/umullb.s +++ b/llvm/test/MC/AArch64/SVE2/umullb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/umullt.s b/llvm/test/MC/AArch64/SVE2/umullt.s index 3acaaae293e540..cf2fe2ede98aec 100644 --- a/llvm/test/MC/AArch64/SVE2/umullt.s +++ b/llvm/test/MC/AArch64/SVE2/umullt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqadd.s b/llvm/test/MC/AArch64/SVE2/uqadd.s index be127c4333395d..41937fb6a342a7 100644 --- a/llvm/test/MC/AArch64/SVE2/uqadd.s +++ b/llvm/test/MC/AArch64/SVE2/uqadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqrshl.s b/llvm/test/MC/AArch64/SVE2/uqrshl.s index d25e6d80779a42..ef7e29968291ac 100644 --- a/llvm/test/MC/AArch64/SVE2/uqrshl.s +++ b/llvm/test/MC/AArch64/SVE2/uqrshl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqrshlr.s b/llvm/test/MC/AArch64/SVE2/uqrshlr.s index 68aee0cafdb6de..29688c4d47769e 100644 --- a/llvm/test/MC/AArch64/SVE2/uqrshlr.s +++ b/llvm/test/MC/AArch64/SVE2/uqrshlr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqrshrnb.s b/llvm/test/MC/AArch64/SVE2/uqrshrnb.s index 7da923d55b0af0..ec27abc2cc8e17 100644 --- a/llvm/test/MC/AArch64/SVE2/uqrshrnb.s +++ b/llvm/test/MC/AArch64/SVE2/uqrshrnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqrshrnt.s b/llvm/test/MC/AArch64/SVE2/uqrshrnt.s index ce831f5f26cce8..2c0c443a870988 100644 --- a/llvm/test/MC/AArch64/SVE2/uqrshrnt.s +++ b/llvm/test/MC/AArch64/SVE2/uqrshrnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqshl.s b/llvm/test/MC/AArch64/SVE2/uqshl.s index fbe8501651d15f..4344a6a1f64db1 100644 --- a/llvm/test/MC/AArch64/SVE2/uqshl.s +++ b/llvm/test/MC/AArch64/SVE2/uqshl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqshlr.s b/llvm/test/MC/AArch64/SVE2/uqshlr.s index 1d41a4beb51d32..f8fce2eaa5ac4a 100644 --- a/llvm/test/MC/AArch64/SVE2/uqshlr.s +++ b/llvm/test/MC/AArch64/SVE2/uqshlr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqshrnb.s b/llvm/test/MC/AArch64/SVE2/uqshrnb.s index 23a0e30803f367..59d869062961ee 100644 --- a/llvm/test/MC/AArch64/SVE2/uqshrnb.s +++ b/llvm/test/MC/AArch64/SVE2/uqshrnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqshrnt.s b/llvm/test/MC/AArch64/SVE2/uqshrnt.s index d3a0ffab3cfe3f..62cf76dcf55727 100644 --- a/llvm/test/MC/AArch64/SVE2/uqshrnt.s +++ b/llvm/test/MC/AArch64/SVE2/uqshrnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqsub.s b/llvm/test/MC/AArch64/SVE2/uqsub.s index ecb108acb69f8f..dd7700f7dc5692 100644 --- a/llvm/test/MC/AArch64/SVE2/uqsub.s +++ b/llvm/test/MC/AArch64/SVE2/uqsub.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqsubr.s b/llvm/test/MC/AArch64/SVE2/uqsubr.s index ede46a645ad57d..eb924dea24e3f3 100644 --- a/llvm/test/MC/AArch64/SVE2/uqsubr.s +++ b/llvm/test/MC/AArch64/SVE2/uqsubr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqxtnb.s b/llvm/test/MC/AArch64/SVE2/uqxtnb.s index 9dea65c2d2e6c7..8df5de50dbfcd0 100644 --- a/llvm/test/MC/AArch64/SVE2/uqxtnb.s +++ b/llvm/test/MC/AArch64/SVE2/uqxtnb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/uqxtnt.s b/llvm/test/MC/AArch64/SVE2/uqxtnt.s index c12405805f8885..60e7ee66381f69 100644 --- a/llvm/test/MC/AArch64/SVE2/uqxtnt.s +++ b/llvm/test/MC/AArch64/SVE2/uqxtnt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/urecpe.s b/llvm/test/MC/AArch64/SVE2/urecpe.s index ff91435f60d75a..b834b6fc7e3ee4 100644 --- a/llvm/test/MC/AArch64/SVE2/urecpe.s +++ b/llvm/test/MC/AArch64/SVE2/urecpe.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/urhadd.s b/llvm/test/MC/AArch64/SVE2/urhadd.s index 7fc35f598de578..067a1ff1f0e195 100644 --- a/llvm/test/MC/AArch64/SVE2/urhadd.s +++ b/llvm/test/MC/AArch64/SVE2/urhadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/urshl.s b/llvm/test/MC/AArch64/SVE2/urshl.s index fd425df9533bdf..f765f8a7fcd489 100644 --- a/llvm/test/MC/AArch64/SVE2/urshl.s +++ b/llvm/test/MC/AArch64/SVE2/urshl.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/urshlr.s b/llvm/test/MC/AArch64/SVE2/urshlr.s index fee2aaf4c4fdd0..58593f409c37fb 100644 --- a/llvm/test/MC/AArch64/SVE2/urshlr.s +++ b/llvm/test/MC/AArch64/SVE2/urshlr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/urshr.s b/llvm/test/MC/AArch64/SVE2/urshr.s index f2e95b98002389..bb07c1ea4acc46 100644 --- a/llvm/test/MC/AArch64/SVE2/urshr.s +++ b/llvm/test/MC/AArch64/SVE2/urshr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ursqrte.s b/llvm/test/MC/AArch64/SVE2/ursqrte.s index ac66a15ab566e3..872646968953ea 100644 --- a/llvm/test/MC/AArch64/SVE2/ursqrte.s +++ b/llvm/test/MC/AArch64/SVE2/ursqrte.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ursra.s b/llvm/test/MC/AArch64/SVE2/ursra.s index 005e5511e7087b..dad04c5c44f146 100644 --- a/llvm/test/MC/AArch64/SVE2/ursra.s +++ b/llvm/test/MC/AArch64/SVE2/ursra.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ushllb.s b/llvm/test/MC/AArch64/SVE2/ushllb.s index ea1d85052f42d7..644e11e123d165 100644 --- a/llvm/test/MC/AArch64/SVE2/ushllb.s +++ b/llvm/test/MC/AArch64/SVE2/ushllb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/ushllt.s b/llvm/test/MC/AArch64/SVE2/ushllt.s index c4f351fb72429e..67daa63f57ceb3 100644 --- a/llvm/test/MC/AArch64/SVE2/ushllt.s +++ b/llvm/test/MC/AArch64/SVE2/ushllt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/usqadd.s b/llvm/test/MC/AArch64/SVE2/usqadd.s index 4fa372118547da..bf0a2d532956d4 100644 --- a/llvm/test/MC/AArch64/SVE2/usqadd.s +++ b/llvm/test/MC/AArch64/SVE2/usqadd.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/usra.s b/llvm/test/MC/AArch64/SVE2/usra.s index 51e942a49e09e5..b39599b7b051b7 100644 --- a/llvm/test/MC/AArch64/SVE2/usra.s +++ b/llvm/test/MC/AArch64/SVE2/usra.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/usublb.s b/llvm/test/MC/AArch64/SVE2/usublb.s index 86ab0485f82ece..518910261f4dee 100644 --- a/llvm/test/MC/AArch64/SVE2/usublb.s +++ b/llvm/test/MC/AArch64/SVE2/usublb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/usublt.s b/llvm/test/MC/AArch64/SVE2/usublt.s index 5cc9f7eeb255df..4e7db387bec6e4 100644 --- a/llvm/test/MC/AArch64/SVE2/usublt.s +++ b/llvm/test/MC/AArch64/SVE2/usublt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/usubwb.s b/llvm/test/MC/AArch64/SVE2/usubwb.s index 34ce06e520f0ba..a093a061cbb3f8 100644 --- a/llvm/test/MC/AArch64/SVE2/usubwb.s +++ b/llvm/test/MC/AArch64/SVE2/usubwb.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/usubwt.s b/llvm/test/MC/AArch64/SVE2/usubwt.s index 3fa8ef34cab475..d857b743c85599 100644 --- a/llvm/test/MC/AArch64/SVE2/usubwt.s +++ b/llvm/test/MC/AArch64/SVE2/usubwt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/whilege.s b/llvm/test/MC/AArch64/SVE2/whilege.s index 566699a17a2a13..720b88977d2875 100644 --- a/llvm/test/MC/AArch64/SVE2/whilege.s +++ b/llvm/test/MC/AArch64/SVE2/whilege.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/whilegt.s b/llvm/test/MC/AArch64/SVE2/whilegt.s index 4c14d50a40d179..dcaa93b3e67bfb 100644 --- a/llvm/test/MC/AArch64/SVE2/whilegt.s +++ b/llvm/test/MC/AArch64/SVE2/whilegt.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/whilehi.s b/llvm/test/MC/AArch64/SVE2/whilehi.s index ae6404e07e53da..052261b58557ce 100644 --- a/llvm/test/MC/AArch64/SVE2/whilehi.s +++ b/llvm/test/MC/AArch64/SVE2/whilehi.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/whilehs.s b/llvm/test/MC/AArch64/SVE2/whilehs.s index 390d61cf792790..e5cbab41e76bc9 100644 --- a/llvm/test/MC/AArch64/SVE2/whilehs.s +++ b/llvm/test/MC/AArch64/SVE2/whilehs.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/whilerw.s b/llvm/test/MC/AArch64/SVE2/whilerw.s index 4772eff64a8fd8..6ec594a30feda1 100644 --- a/llvm/test/MC/AArch64/SVE2/whilerw.s +++ b/llvm/test/MC/AArch64/SVE2/whilerw.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/whilewr.s b/llvm/test/MC/AArch64/SVE2/whilewr.s index c4b4c40ae4cfee..4dd9e372596e40 100644 --- a/llvm/test/MC/AArch64/SVE2/whilewr.s +++ b/llvm/test/MC/AArch64/SVE2/whilewr.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/AArch64/SVE2/xar.s b/llvm/test/MC/AArch64/SVE2/xar.s index 1e40aba3fae5e1..c88ff8b1df9e22 100644 --- a/llvm/test/MC/AArch64/SVE2/xar.s +++ b/llvm/test/MC/AArch64/SVE2/xar.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+streaming-sve < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR From 35b1cfc76f08faabf3f27c0bd054acc7c854a6ca Mon Sep 17 00:00:00 2001 From: Daniel McIntosh Date: Mon, 30 May 2022 13:13:29 -0400 Subject: [PATCH 898/908] [Driver][Modules] Remove dependence on linking support from clang/test/Driver/modules.cpp The new tests in clang/test/Driver/modules.cpp added by D120540 will fail if the toolchain getting tested doesn't support linking. This reduces the utility of the test since we would like a failure of this test to reflect a problem with modules. We should already have other tests that validate linking support. Reviewed By: ChuanqiXu Differential Revision: https://reviews.llvm.org/D126669 --- clang/test/Driver/modules.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/clang/test/Driver/modules.cpp b/clang/test/Driver/modules.cpp index 472b07c6579927..f532b5b3c775fd 100644 --- a/clang/test/Driver/modules.cpp +++ b/clang/test/Driver/modules.cpp @@ -76,11 +76,8 @@ FOO; // Check the independent use of -fcxx-modules // -// RUN: %clang -fcxx-modules -std=c++17 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX17-MODULES -// CHECK-CXX17-MODULES: "-fcxx-modules" -// RUN: %clang -fcxx-modules -std=c++14 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX14-MODULES -// CHECK-CXX14-MODULES: "-fcxx-modules" -// RUN: %clang -fcxx-modules -std=c++11 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX11-MODULES -// CHECK-CXX11-MODULES: "-fcxx-modules" -// RUN: %clang -fcxx-modules -std=c++03 -### %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX03-MODULES -// CHECK-CXX03-MODULES: "-fcxx-modules" +// RUN: %clang -fcxx-modules -std=c++17 -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX-MODULES +// RUN: %clang -fcxx-modules -std=c++14 -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX-MODULES +// RUN: %clang -fcxx-modules -std=c++11 -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX-MODULES +// RUN: %clang -fcxx-modules -std=c++03 -### -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-CXX-MODULES +// CHECK-CXX-MODULES: "-fcxx-modules" From b0a1a308f268249565823b7992249393f2a52887 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Tue, 31 May 2022 11:06:15 -0400 Subject: [PATCH 899/908] LangRef: fix bad indentation in allockind bullets --- llvm/docs/LangRef.rst | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index f7ebf8aadd47b4..d5540229a4b2a1 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1587,21 +1587,23 @@ example: ``allockind("KIND")`` Describes the behavior of an allocation function. The KIND string contains comma separated entries from the following options: - * "alloc": the function returns a new block of memory or null. - * "realloc": the function returns a new block of memory or null. If the - result is non-null the memory contents from the start of the block up to - the smaller of the original allocation size and the new allocation size - will match that of the ``allocptr`` argument and the ``allocptr`` - argument is invalidated, even if the function returns the same address. - * "free": the function frees the block of memory specified by ``allocptr``. - * "uninitialized": Any newly-allocated memory (either a new block from - a "alloc" function or the enlarged capacity from a "realloc" function) - will be uninitialized. - * "zeroed": Any newly-allocated memory (either a new block from a "alloc" - function or the enlarged capacity from a "realloc" function) will be - zeroed. - * "aligned": the function returns memory aligned according to the - ``allocalign`` parameter. + + * "alloc": the function returns a new block of memory or null. + * "realloc": the function returns a new block of memory or null. If the + result is non-null the memory contents from the start of the block up to + the smaller of the original allocation size and the new allocation size + will match that of the ``allocptr`` argument and the ``allocptr`` + argument is invalidated, even if the function returns the same address. + * "free": the function frees the block of memory specified by ``allocptr``. + * "uninitialized": Any newly-allocated memory (either a new block from + a "alloc" function or the enlarged capacity from a "realloc" function) + will be uninitialized. + * "zeroed": Any newly-allocated memory (either a new block from a "alloc" + function or the enlarged capacity from a "realloc" function) will be + zeroed. + * "aligned": the function returns memory aligned according to the + ``allocalign`` parameter. + The first three options are mutually exclusive, and the remaining options describe more details of how the function behaves. The remaining options are invalid for "free"-type functions. From af0113cf77b3df93495182507cb41e54927188c7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 31 May 2022 16:13:41 +0100 Subject: [PATCH 900/908] [X86] combineEXTRACT_SUBVECTOR - pull out repeated getVectorNumElements() calls. NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fda8a22c1991bb..d24ffeea775e5f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53796,6 +53796,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, EVT InVecVT = InVec.getValueType(); unsigned SizeInBits = VT.getSizeInBits(); unsigned InSizeInBits = InVecVT.getSizeInBits(); + unsigned NumSubElts = VT.getVectorNumElements(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && @@ -53833,9 +53834,8 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, } if (InVec.getOpcode() == ISD::BUILD_VECTOR) - return DAG.getBuildVector( - VT, SDLoc(N), - InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements())); + return DAG.getBuildVector(VT, SDLoc(N), + InVec->ops().slice(IdxVal, NumSubElts)); // If we are extracting from an insert into a larger vector, replace with a // smaller insert if we don't access less than the original subvector. Don't @@ -53868,8 +53868,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits); // Attempt to extract from the source of a shuffle vector. - if ((InSizeInBits % SizeInBits) == 0 && - (IdxVal % VT.getVectorNumElements()) == 0) { + if ((InSizeInBits % SizeInBits) == 0 && (IdxVal % NumSubElts) == 0) { SmallVector ShuffleMask; SmallVector ScaledMask; SmallVector ShuffleInputs; @@ -53877,7 +53876,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, // Decode the shuffle mask and scale it so its shuffling subvectors. if (getTargetShuffleInputs(InVecBC, ShuffleInputs, ShuffleMask, DAG) && scaleShuffleElements(ShuffleMask, NumSubVecs, ScaledMask)) { - unsigned SubVecIdx = IdxVal / VT.getVectorNumElements(); + unsigned SubVecIdx = IdxVal / NumSubElts; if (ScaledMask[SubVecIdx] == SM_SentinelUndef) return DAG.getUNDEF(VT); if (ScaledMask[SubVecIdx] == SM_SentinelZero) @@ -53885,7 +53884,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs]; if (Src.getValueSizeInBits() == InSizeInBits) { unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs; - unsigned SrcEltIdx = SrcSubVecIdx * VT.getVectorNumElements(); + unsigned SrcEltIdx = SrcSubVecIdx * NumSubElts; return extractSubVector(DAG.getBitcast(InVecVT, Src), SrcEltIdx, DAG, SDLoc(N), SizeInBits); } From ae67984ca6d89c7ccdbdca258cd05c151d8b6431 Mon Sep 17 00:00:00 2001 From: David Goldman Date: Tue, 26 Apr 2022 15:30:50 -0400 Subject: [PATCH 901/908] [clangd] ExtractVariable support for C and Objective-C - Use the expression's type for non-C++ as the variable type. This works well, but might not preserve the typedefs due to type canonicalization. - Improve support for Objective-C property references which are represented using `ObjCPropertyRefExpr` and `BuiltinType::PseudoObject`. Differential Revision: https://reviews.llvm.org/D124486 --- .../refactor/tweaks/ExtractVariable.cpp | 63 ++++++++-- .../unittests/tweaks/ExtractVariableTests.cpp | 118 +++++++++++++++++- 2 files changed, 164 insertions(+), 17 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp index 942d7eec6dbd85..5efe09fb3cb277 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "AST.h" #include "ParsedAST.h" #include "Protocol.h" #include "Selection.h" @@ -50,6 +51,7 @@ class ExtractionContext { private: bool Extractable = false; const clang::Expr *Expr; + QualType VarType; const SelectionTree::Node *ExprNode; // Stmt before which we will extract const clang::Stmt *InsertionPoint = nullptr; @@ -81,6 +83,31 @@ computeReferencedDecls(const clang::Expr *Expr) { return Visitor.ReferencedDecls; } +static QualType computeVariableType(const Expr *Expr, const ASTContext &Ctx) { + if (Ctx.getLangOpts().CPlusPlus11) + return Ctx.getAutoDeductType(); + + if (Expr->hasPlaceholderType(BuiltinType::PseudoObject)) { + if (const auto *PR = dyn_cast(Expr)) { + if (PR->isMessagingSetter()) { + // Don't support extracting a compound reference like `self.prop += 1` + // since the meaning changes after extraction since we'll no longer call + // the setter. Non compound access like `self.prop = 1` is invalid since + // it returns nil (setter method must have a void return type). + return QualType(); + } else if (PR->isMessagingGetter()) { + if (PR->isExplicitProperty()) + return PR->getExplicitProperty()->getType(); + else + return PR->getImplicitPropertyGetter()->getReturnType(); + } + } else { + return QualType(); + } + } + return Expr->getType(); +} + ExtractionContext::ExtractionContext(const SelectionTree::Node *Node, const SourceManager &SM, const ASTContext &Ctx) @@ -90,6 +117,12 @@ ExtractionContext::ExtractionContext(const SelectionTree::Node *Node, InsertionPoint = computeInsertionPoint(); if (InsertionPoint) Extractable = true; + VarType = computeVariableType(Expr, Ctx); + if (VarType.isNull()) + Extractable = false; + else + // Strip the outer nullability since it's not common for local variables. + AttributedType::stripOuterNullability(VarType); } // checks whether extracting before InsertionPoint will take a @@ -173,9 +206,9 @@ ExtractionContext::insertDeclaration(llvm::StringRef VarName, toHalfOpenFileRange(SM, Ctx.getLangOpts(), InsertionPoint->getSourceRange()) ->getBegin(); - // FIXME: Replace auto with explicit type and add &/&& as necessary - std::string ExtractedVarDecl = std::string("auto ") + VarName.str() + " = " + - ExtractionCode.str() + "; "; + std::string ExtractedVarDecl = + printType(VarType, ExprNode->getDeclContext(), VarName) + " = " + + ExtractionCode.str() + "; "; return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl); } @@ -365,15 +398,27 @@ bool eligibleForExtraction(const SelectionTree::Node *N) { return false; // Void expressions can't be assigned to variables. - if (const Type *ExprType = E->getType().getTypePtrOrNull()) - if (ExprType->isVoidType()) - return false; + const Type *ExprType = E->getType().getTypePtrOrNull(); + if (!ExprType || ExprType->isVoidType()) + return false; + + // Must know the type of the result in order to spell it, or instead use + // `auto` in C++. + if (!N->getDeclContext().getParentASTContext().getLangOpts().CPlusPlus11 && + !ExprType) + return false; // A plain reference to a name (e.g. variable) isn't worth extracting. // FIXME: really? What if it's e.g. `std::is_same::value`? - if (llvm::isa(E) || llvm::isa(E)) + if (llvm::isa(E)) return false; + // Similarly disallow extraction for member exprs with an implicit `this`. + if (const auto *ME = dyn_cast(E)) + if (const auto *TE = dyn_cast(ME->getBase()->IgnoreImpCasts())) + if (TE->isImplicit()) + return false; + // Extracting Exprs like a = 1 gives placeholder = a = 1 which isn't useful. // FIXME: we could still hoist the assignment, and leave the variable there? ParsedBinaryOperator BinOp; @@ -460,10 +505,6 @@ bool ExtractVariable::prepare(const Selection &Inputs) { if (Inputs.SelectionBegin == Inputs.SelectionEnd) return false; const ASTContext &Ctx = Inputs.AST->getASTContext(); - // FIXME: Enable non-C++ cases once we start spelling types explicitly instead - // of making use of auto. - if (!Ctx.getLangOpts().CPlusPlus) - return false; const SourceManager &SM = Inputs.AST->getSourceManager(); if (const SelectionTree::Node *N = computeExtractedExpr(Inputs.ASTSelection.commonAncestor())) diff --git a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp index 9740ea2d7098c6..bd06f318b2a991 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp @@ -24,7 +24,7 @@ TEST_F(ExtractVariableTest, Test) { int z; } t; // return statement - return [[[[t.b[[a]]r]](t.z)]]; + return [[[[t.b[[a]]r]]([[t.z]])]]; } void f() { int a = [[5 +]] [[4 * [[[[xyz]]()]]]]; @@ -59,11 +59,22 @@ TEST_F(ExtractVariableTest, Test) { EXPECT_AVAILABLE(AvailableCases); ExtraArgs = {"-xc"}; - const char *AvailableButC = R"cpp( + const char *AvailableC = R"cpp( void foo() { int x = [[1]]; })cpp"; - EXPECT_UNAVAILABLE(AvailableButC); + EXPECT_AVAILABLE(AvailableC); + ExtraArgs = {"-xobjective-c"}; + const char *AvailableObjC = R"cpp( + __attribute__((objc_root_class)) + @interface Foo + @end + @implementation Foo + - (void)method { + int x = [[1 + 2]]; + } + @end)cpp"; + EXPECT_AVAILABLE(AvailableObjC); ExtraArgs = {}; const char *NoCrashCases = R"cpp( @@ -79,10 +90,12 @@ TEST_F(ExtractVariableTest, Test) { const char *UnavailableCases = R"cpp( int xyz(int a = [[1]]) { struct T { - int bar(int a = [[1]]); + int bar(int a = [[1]]) { + int b = [[z]]; + } int z = [[1]]; } t; - return [[t]].bar([[[[t]].z]]); + return [[t]].bar([[t]].z); } void v() { return; } // function default argument @@ -123,7 +136,7 @@ TEST_F(ExtractVariableTest, Test) { EXPECT_UNAVAILABLE(UnavailableCases); // vector of pairs of input and output strings - const std::vector> InputOutputs = { + std::vector> InputOutputs = { // extraction from variable declaration/assignment {R"cpp(void varDecl() { int a = 5 * (4 + (3 [[- 1)]]); @@ -291,6 +304,99 @@ TEST_F(ExtractVariableTest, Test) { for (const auto &IO : InputOutputs) { EXPECT_EQ(IO.second, apply(IO.first)) << IO.first; } + + ExtraArgs = {"-xobjective-c"}; + EXPECT_UNAVAILABLE(R"cpp( + __attribute__((objc_root_class)) + @interface Foo + - (void)setMethod1:(int)a; + - (int)method1; + @property int prop1; + @end + @implementation Foo + - (void)method { + [[self.method1]] = 1; + [[self.method1]] += 1; + [[self.prop1]] = 1; + [[self.prop1]] += 1; + } + @end)cpp"); + InputOutputs = { + // Function Pointers + {R"cpp(struct Handlers { + void (*handlerFunc)(int); + }; + void runFunction(void (*func)(int)) {} + void f(struct Handlers *handler) { + runFunction([[handler->handlerFunc]]); + })cpp", + R"cpp(struct Handlers { + void (*handlerFunc)(int); + }; + void runFunction(void (*func)(int)) {} + void f(struct Handlers *handler) { + void (*placeholder)(int) = handler->handlerFunc; runFunction(placeholder); + })cpp"}, + {R"cpp(int (*foo(char))(int); + void bar() { + (void)[[foo('c')]]; + })cpp", + R"cpp(int (*foo(char))(int); + void bar() { + int (*placeholder)(int) = foo('c'); (void)placeholder; + })cpp"}, + {R"cpp(typedef long NSInteger; + void varDecl() { + NSInteger a = 2 * 5; + NSInteger b = [[a * 7]] + 3; + })cpp", + R"cpp(typedef long NSInteger; + void varDecl() { + NSInteger a = 2 * 5; + long placeholder = a * 7; NSInteger b = placeholder + 3; + })cpp"}, + // Support ObjC property references (explicit property getter). + {R"cpp(__attribute__((objc_root_class)) + @interface Foo + @property int prop1; + @end + @implementation Foo + - (void)method { + int x = [[self.prop1]] + 1; + } + @end)cpp", + R"cpp(__attribute__((objc_root_class)) + @interface Foo + @property int prop1; + @end + @implementation Foo + - (void)method { + int placeholder = self.prop1; int x = placeholder + 1; + } + @end)cpp"}, + // Support ObjC property references (implicit property getter). + {R"cpp(__attribute__((objc_root_class)) + @interface Foo + - (int)method1; + @end + @implementation Foo + - (void)method { + int x = [[self.method1]] + 1; + } + @end)cpp", + R"cpp(__attribute__((objc_root_class)) + @interface Foo + - (int)method1; + @end + @implementation Foo + - (void)method { + int placeholder = self.method1; int x = placeholder + 1; + } + @end)cpp"}, + }; + for (const auto &IO : InputOutputs) { + EXPECT_EQ(IO.second, apply(IO.first)) << IO.first; + } } } // namespace From e8860bee283855b2e1ce7eefb085a86e9ae3262b Mon Sep 17 00:00:00 2001 From: Joe Nash Date: Thu, 12 May 2022 09:27:48 -0400 Subject: [PATCH 902/908] [AMDGPU] gfx11 Image instructions MC layer support for instructions in the MIMG encoding(Image instructions). Contributors: Carl Ritson Patch 13/N for upstreaming of AMDGPU gfx11 architecture. Depends on D125992 Reviewed By: rampitec, #amdgpu Differential Revision: https://reviews.llvm.org/D126463 --- .../Disassembler/AMDGPUDisassembler.cpp | 17 +- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 598 +++++++++++++----- llvm/lib/Target/AMDGPU/SIInstrFormats.td | 44 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 1 + llvm/test/MC/AMDGPU/gfx11_asm_mimg.s | 308 +++++++++ llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s | 148 +++++ .../MC/Disassembler/AMDGPU/gfx11_mimg.txt | 304 +++++++++ 7 files changed, 1248 insertions(+), 172 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_mimg.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx11_mimg.txt diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 4d5c6d1875860b..ab257f6006a3e1 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -18,6 +18,8 @@ #include "Disassembler/AMDGPUDisassembler.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" +#include "SIRegisterInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm-c/DisassemblerTypes.h" @@ -593,8 +595,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = MCDisassembler::Fail; } else { for (unsigned i = 0; i < NSAArgs; ++i) { - MI.insert(MI.begin() + VAddr0Idx + 1 + i, - decodeOperand_VGPR_32(Bytes[i])); + const unsigned VAddrIdx = VAddr0Idx + 1 + i; + auto VAddrRCID = MCII->get(MI.getOpcode()).OpInfo[VAddrIdx].RegClass; + MI.insert(MI.begin() + VAddrIdx, + createRegOperand(VAddrRCID, Bytes[i])); } Bytes = Bytes.slice(4 * NSAWords); } @@ -741,7 +745,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { bool IsNSA = false; unsigned AddrSize = Info->VAddrDwords; - if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { + if (isGFX10Plus()) { unsigned DimIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); int A16Idx = @@ -753,7 +757,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AddrSize = AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI)); - IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA; + IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA || + Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA; if (!IsNSA) { if (AddrSize > 8) AddrSize = 16; @@ -804,9 +809,9 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { } } + // If not using NSA on GFX10+, widen address register to correct size. unsigned NewVAddr0 = AMDGPU::NoRegister; - if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA && - AddrSize != Info->VAddrDwords) { + if (isGFX10Plus() && !IsNSA && AddrSize != Info->VAddrDwords) { unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg(); unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0); VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 6358547a983e34..273bb1c22c1322 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -14,6 +14,8 @@ // - MIMGEncGfx90a: encoding for gfx90a for atomics // - MIMGEncGfx10Default: gfx10 default (non-NSA) encoding // - MIMGEncGfx10NSA: gfx10 NSA encoding +// - MIMGEncGfx11Default: gfx11 default (non-NSA) encoding +// - MIMGEncGfx11NSA: gfx11 NSA encoding class MIMGEncoding; def MIMGEncGfx6 : MIMGEncoding; @@ -21,6 +23,8 @@ def MIMGEncGfx8 : MIMGEncoding; def MIMGEncGfx90a : MIMGEncoding; def MIMGEncGfx10Default : MIMGEncoding; def MIMGEncGfx10NSA : MIMGEncoding; +def MIMGEncGfx11Default : MIMGEncoding; +def MIMGEncGfx11NSA : MIMGEncoding; def MIMGEncoding : GenericEnum { let FilterClass = "MIMGEncoding"; @@ -90,11 +94,13 @@ def MIMG { int NOP = -1; } -class mimgopc { - field bits<8> BASE = base; // Opcode for all but atomics +class mimgopc { + field bits<8> GFX11 = gfx11; + field bits<8> GFX10M = gfx10m; // GFX10minus for all but atomics field bits<8> VI = vi; // VI is only used for atomic instructions field bits<8> SI = si; // SI is only used for atomic instructions - bit HAS_BASE = !ne(base, MIMG.NOP); + bit HAS_GFX11 = !ne(gfx11, MIMG.NOP); + bit HAS_GFX10M = !ne(gfx10m, MIMG.NOP); bit HAS_VI = !ne(vi, MIMG.NOP); bit HAS_SI = !ne(si, MIMG.NOP); } @@ -207,12 +213,16 @@ class MIMG MIMGEncoding MIMGEncoding; bits<8> VDataDwords; bits<8> VAddrDwords; + + // If NSA is used this counts number of operands VAddrDwords is split into. + bits<8> VAddrOperands; } def MIMGInfoTable : GenericTable { let FilterClass = "MIMG"; let CppTypeName = "MIMGInfo"; - let Fields = ["Opcode", "BaseOpcode", "MIMGEncoding", "VDataDwords", "VAddrDwords"]; + let Fields = ["Opcode", "BaseOpcode", "MIMGEncoding", "VDataDwords", + "VAddrDwords", "VAddrOperands"]; string TypeOf_BaseOpcode = "MIMGBaseOpcode"; string TypeOf_MIMGEncoding = "MIMGEncoding"; @@ -227,11 +237,12 @@ def getMIMGInfo : SearchIndex { // This class used to use !foldl to memoize the AddrAsmNames list. // It turned out that that was much slower than using !filter. -class MIMGNSAHelper { +class MIMGNSAHelper addr_types=!listsplat(VGPR_32, num_addrs)> { list AddrAsmNames = !foreach(i, !filter(i, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], !lt(i, num_addrs)), "vaddr" # i); - dag AddrIns = !dag(ins, !foreach(arg, AddrAsmNames, VGPR_32), AddrAsmNames); + dag AddrIns = !dag(ins, addr_types, AddrAsmNames); string AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]"; int NSA = !if(!le(num_addrs, 1), ?, @@ -247,6 +258,7 @@ class MIMG_gfx6789 op, dag outs, string dns = ""> let AssemblerPredicate = isGFX6GFX7GFX8GFX9NotGFX90A; let MIMGEncoding = MIMGEncGfx6; + let VAddrOperands = 1; let d16 = !if(BaseOpcode.HasD16, ?, 0); } @@ -257,6 +269,7 @@ class MIMG_gfx90a op, dag outs, string dns = ""> let AssemblerPredicate = isGFX90APlus; let MIMGEncoding = MIMGEncGfx90a; + let VAddrOperands = 1; let d16 = !if(BaseOpcode.HasD16, ?, 0); } @@ -264,10 +277,11 @@ class MIMG_gfx90a op, dag outs, string dns = ""> // Base class of all non-NSA gfx10 MIMG instructions. class MIMG_gfx10 : MIMG, MIMGe_gfx10 { - let SubtargetPredicate = isGFX10Plus; - let AssemblerPredicate = isGFX10Plus; + let SubtargetPredicate = isGFX10Only; + let AssemblerPredicate = isGFX10Only; let MIMGEncoding = MIMGEncGfx10Default; + let VAddrOperands = 1; let d16 = !if(BaseOpcode.HasD16, ?, 0); let nsa = 0; @@ -277,10 +291,11 @@ class MIMG_gfx10 // Note that 1-dword addresses always use non-NSA variants. class MIMG_nsa_gfx10 : MIMG, MIMGe_gfx10 { - let SubtargetPredicate = isGFX10Plus; - let AssemblerPredicate = isGFX10Plus; + let SubtargetPredicate = isGFX10Only; + let AssemblerPredicate = isGFX10Only; let MIMGEncoding = MIMGEncGfx10NSA; + let VAddrOperands = num_addrs; MIMGNSAHelper nsah = MIMGNSAHelper; dag AddrIns = nsah.AddrIns; @@ -290,11 +305,45 @@ class MIMG_nsa_gfx10 let nsa = nsah.NSA; } +// Base class of all non-NSA gfx11 MIMG instructions. +class MIMG_gfx11 + : MIMG, MIMGe_gfx11 { + let SubtargetPredicate = isGFX11Plus; + let AssemblerPredicate = isGFX11Plus; + + let MIMGEncoding = MIMGEncGfx11Default; + let VAddrOperands = 1; + + let d16 = !if(BaseOpcode.HasD16, ?, 0); + let nsa = 0; +} + +// Base class for all NSA MIMG instructions. +// Note that 1-dword addresses always use non-NSA variants. +class MIMG_nsa_gfx11 addr_types=[]> + : MIMG, MIMGe_gfx11 { + let SubtargetPredicate = isGFX11Plus; + let AssemblerPredicate = isGFX11Plus; + + let MIMGEncoding = MIMGEncGfx11NSA; + let VAddrOperands = num_addrs; + + MIMGNSAHelper nsah = !if(!empty(addr_types), + MIMGNSAHelper, + MIMGNSAHelper); + dag AddrIns = nsah.AddrIns; + string AddrAsm = nsah.AddrAsm; + + let d16 = !if(BaseOpcode.HasD16, ?, 0); + let nsa = nsah.NSA; +} + class MIMG_NoSampler_Helper - : MIMG_gfx6789 { + : MIMG_gfx6789 { let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), @@ -307,7 +356,7 @@ class MIMG_NoSampler_Helper_gfx90a - : MIMG_gfx90a .ret:$vdata), dns> { + : MIMG_gfx90a .ret:$vdata), dns> { let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da), @@ -319,7 +368,7 @@ class MIMG_NoSampler_Helper_gfx90a - : MIMG_gfx10 { + : MIMG_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), @@ -331,7 +380,32 @@ class MIMG_NoSampler_gfx10 - : MIMG_nsa_gfx10 { + : MIMG_nsa_gfx10 { + let InOperandList = !con(AddrIns, + (ins SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_NoSampler_gfx11 + : MIMG_gfx11 { + let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_NoSampler_nsa_gfx11 + : MIMG_nsa_gfx11 { let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, @@ -347,7 +421,7 @@ multiclass MIMG_NoSampler_Src_Helper { let ssamp = 0 in { let VAddrDwords = 1 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V1 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then @@ -356,30 +430,42 @@ multiclass MIMG_NoSampler_Src_Helper ; } + if op.HAS_GFX11 then { + def _V1_gfx11 : MIMG_NoSampler_gfx11; + } } let VAddrDwords = 2 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V2 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a ; def _V2_gfx10 : MIMG_NoSampler_gfx10; def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } + if op.HAS_GFX11 then { + def _V2_gfx11 : MIMG_NoSampler_gfx11; + def _V2_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; + } } let VAddrDwords = 3 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V3 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a ; def _V3_gfx10 : MIMG_NoSampler_gfx10; def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } + if op.HAS_GFX11 then { + def _V3_gfx11 : MIMG_NoSampler_gfx11; + def _V3_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; + } } let VAddrDwords = 4 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V4 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a ; @@ -387,6 +473,11 @@ multiclass MIMG_NoSampler_Src_Helper ; } + if op.HAS_GFX11 then { + def _V4_gfx11 : MIMG_NoSampler_gfx11; + def _V4_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; + } } } } @@ -420,7 +511,7 @@ class MIMG_Store_Helper - : MIMG_gfx6789 { + : MIMG_gfx6789 { let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), @@ -433,7 +524,7 @@ class MIMG_Store_Helper_gfx90a - : MIMG_gfx90a { + : MIMG_gfx90a { let InOperandList = !con((ins getLdStRegisterOperand.ret:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, @@ -446,7 +537,7 @@ class MIMG_Store_Helper_gfx90a - : MIMG_gfx10 { + : MIMG_gfx10 { let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), @@ -458,7 +549,33 @@ class MIMG_Store_gfx10 - : MIMG_nsa_gfx10 { + : MIMG_nsa_gfx10 { + let InOperandList = !con((ins DataRC:$vdata), + AddrIns, + (ins SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Store_gfx11 + : MIMG_gfx11 { + let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Store_nsa_gfx11 + : MIMG_nsa_gfx11 { let InOperandList = !con((ins DataRC:$vdata), AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, @@ -475,7 +592,7 @@ multiclass MIMG_Store_Addr_Helper ; let hasPostISelHook = 1 in @@ -484,31 +601,48 @@ multiclass MIMG_Store_Addr_Helper ; } + if op.HAS_GFX11 then { + def _V1_gfx11 : MIMG_Store_gfx11 ; + } } let VAddrDwords = 2 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V2 : MIMG_Store_Helper ; def _V2_gfx90a : MIMG_Store_Helper_gfx90a ; def _V2_gfx10 : MIMG_Store_gfx10 ; def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V2_gfx11 : MIMG_Store_gfx11 ; + def _V2_nsa_gfx11 : MIMG_Store_nsa_gfx11 ; + } } let VAddrDwords = 3 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V3 : MIMG_Store_Helper ; def _V3_gfx90a : MIMG_Store_Helper_gfx90a ; def _V3_gfx10 : MIMG_Store_gfx10 ; def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V3_gfx11 : MIMG_Store_gfx11 ; + def _V3_nsa_gfx11 : MIMG_Store_nsa_gfx11 ; + } } let VAddrDwords = 4 in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V4 : MIMG_Store_Helper ; def _V4_gfx90a : MIMG_Store_Helper_gfx90a ; def _V4_gfx10 : MIMG_Store_gfx10 ; def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V4_gfx11 : MIMG_Store_gfx11 ; + def _V4_nsa_gfx11 : MIMG_Store_nsa_gfx11 ; + } } } } @@ -583,7 +717,7 @@ class MIMG_Atomic_gfx90a - : MIMG_gfx10(op.BASE), (outs DataRC:$vdst), + : MIMG_gfx10(op.GFX10M), (outs DataRC:$vdst), !if(enableDisasm, "AMDGPU", "")> { let Constraints = "$vdst = $vdata"; let AsmMatchConverter = "cvtMIMGAtomic"; @@ -597,7 +731,37 @@ class MIMG_Atomic_gfx10 - : MIMG_nsa_gfx10(op.BASE), (outs DataRC:$vdst), num_addrs, + : MIMG_nsa_gfx10(op.GFX10M), (outs DataRC:$vdst), num_addrs, + !if(enableDisasm, "AMDGPU", "")> { + let Constraints = "$vdst = $vdata"; + let AsmMatchConverter = "cvtMIMGAtomic"; + + let InOperandList = !con((ins DataRC:$vdata), + AddrIns, + (ins SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe)); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; +} + +class MIMG_Atomic_gfx11 + : MIMG_gfx11(op.GFX11), (outs DataRC:$vdst), + !if(enableDisasm, "AMDGPU", "")> { + let Constraints = "$vdst = $vdata"; + let AsmMatchConverter = "cvtMIMGAtomic"; + + let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe); + let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; +} + +class MIMG_Atomic_nsa_gfx11 + : MIMG_nsa_gfx11(op.GFX11), (outs DataRC:$vdst), num_addrs, !if(enableDisasm, "AMDGPU", "")> { let Constraints = "$vdst = $vdata"; let AsmMatchConverter = "cvtMIMGAtomic"; @@ -626,9 +790,12 @@ multiclass MIMG_Atomic_Addr_Helper_m ; } - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V1_gfx10 : MIMG_Atomic_gfx10 ; } + if op.HAS_GFX11 then { + def _V1_gfx11 : MIMG_Atomic_gfx11 ; + } } let VAddrDwords = 2 in { if op.HAS_SI then { @@ -638,10 +805,14 @@ multiclass MIMG_Atomic_Addr_Helper_m ; def _V2_gfx90a : MIMG_Atomic_gfx90a ; } - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V2_gfx10 : MIMG_Atomic_gfx10 ; def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V2_gfx11 : MIMG_Atomic_gfx11 ; + def _V2_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; + } } let VAddrDwords = 3 in { if op.HAS_SI then { @@ -651,10 +822,14 @@ multiclass MIMG_Atomic_Addr_Helper_m ; def _V3_gfx90a : MIMG_Atomic_gfx90a ; } - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V3_gfx10 : MIMG_Atomic_gfx10 ; def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V3_gfx11 : MIMG_Atomic_gfx11 ; + def _V3_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; + } } let VAddrDwords = 4 in { if op.HAS_SI then { @@ -664,10 +839,14 @@ multiclass MIMG_Atomic_Addr_Helper_m ; def _V4_gfx90a : MIMG_Atomic_gfx90a ; } - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V4_gfx10 : MIMG_Atomic_gfx10 ; def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; } + if op.HAS_GFX11 then { + def _V4_gfx11 : MIMG_Atomic_gfx11 ; + def _V4_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; + } } } } @@ -693,7 +872,7 @@ multiclass MIMG_Atomic class MIMG_Sampler_Helper - : MIMG_gfx6789 { + : MIMG_gfx6789 { let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), @@ -704,7 +883,7 @@ class MIMG_Sampler_Helper - : MIMG_gfx90a.ret:$vdata), dns> { + : MIMG_gfx90a.ret:$vdata), dns> { let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da), @@ -716,7 +895,7 @@ class MIMG_Sampler_gfx90a - : MIMG_gfx10 { + : MIMG_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), @@ -729,7 +908,34 @@ class MIMG_Sampler_gfx10 - : MIMG_nsa_gfx10 { + : MIMG_nsa_gfx10 { + let InOperandList = !con(AddrIns, + (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm" + #"$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Sampler_gfx11 + : MIMG_gfx11 { + let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm" + #"$cpol$r128$a16$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Sampler_nsa_gfx11 + : MIMG_nsa_gfx11 { let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, @@ -825,7 +1031,7 @@ multiclass MIMG_Sampler_Src_Helper { foreach addr = MIMG_Sampler_AddrSizes.MachineInstrs in { let VAddrDwords = addr.NumWords in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V # addr.NumWords : MIMG_Sampler_Helper ; @@ -837,16 +1043,26 @@ multiclass MIMG_Sampler_Src_Helper ; } + if op.HAS_GFX11 then { + def _V # addr.NumWords # _gfx11 + : MIMG_Sampler_gfx11 ; + } } } foreach addr = MIMG_Sampler_AddrSizes.NSAInstrs in { let VAddrDwords = addr.NumWords in { - if op.HAS_BASE then { + if op.HAS_GFX10M then { def _V # addr.NumWords # _nsa_gfx10 : MIMG_Sampler_nsa_gfx10; } + if !and(op.HAS_GFX11, !le(addr.NumWords, 5)) then { + def _V # addr.NumWords # _nsa_gfx11 + : MIMG_Sampler_nsa_gfx11; + } } } } @@ -913,10 +1129,17 @@ class MIMG_IntersectRay_Helper { // when we only need 9, 11 or 12 depending on A16 field and ptr size. RegisterClass RegClass = MIMGAddrSize.RegClass; int VAddrDwords = !srl(RegClass.Size, 5); + + int gfx11_nsa_addrs = !if(A16, 4, 5); + RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); + list gfx11_addr_types = + !if(A16, + [node_ptr_type, VGPR_32, VReg_96, VReg_96], + [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); } class MIMG_IntersectRay_gfx10 - : MIMG_gfx10 { + : MIMG_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc), !if(A16, (ins GFX10A16:$a16), (ins))); @@ -926,7 +1149,27 @@ class MIMG_IntersectRay_gfx10 - : MIMG_nsa_gfx10 { + : MIMG_nsa_gfx10 { + let InOperandList = !con(nsah.AddrIns, + (ins SReg_128:$srsrc), + !if(A16, (ins GFX10A16:$a16), (ins))); + let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(A16, "$a16", ""); +} + +class MIMG_IntersectRay_gfx11 + : MIMG_gfx11 { + + let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc), + !if(A16, (ins GFX10A16:$a16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(A16, "$a16", ""); + + let nsa = 0; +} + +class MIMG_IntersectRay_nsa_gfx11 addr_types> + : MIMG_nsa_gfx11 { let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc), !if(A16, (ins GFX10A16:$a16), (ins))); @@ -955,9 +1198,37 @@ multiclass MIMG_IntersectRay { def _sa_gfx10 : MIMG_IntersectRay_gfx10 { let VAddrDwords = info.VAddrDwords; } + def _sa_gfx11 : MIMG_IntersectRay_gfx11 { + let VAddrDwords = info.VAddrDwords; + } def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10 { let VAddrDwords = info.num_addrs; } + def _nsa_gfx11 : MIMG_IntersectRay_nsa_gfx11 { + let VAddrDwords = info.num_addrs; + } + } +} + +multiclass MIMG_MSAA_Load { + def "" : MIMGBaseOpcode { + let HasD16 = 1; + let Gather4 = 1; /* for appropriate dmask handling */ + let MSAA = 1; + } + + let BaseOpcode = !cast(NAME), + Gather4 = 1, hasPostISelHook = 0, mayLoad = 1 in { + let VDataDwords = 2 in + defm _V2 : MIMG_NoSampler_Src_Helper; /* packed D16 */ + let VDataDwords = 3 in + defm _V3 : MIMG_NoSampler_Src_Helper; /* packed D16 + tfe */ + let VDataDwords = 4 in + defm _V4 : MIMG_NoSampler_Src_Helper; + let VDataDwords = 5 in + defm _V5 : MIMG_NoSampler_Src_Helper; } } @@ -966,138 +1237,141 @@ multiclass MIMG_IntersectRay { //===----------------------------------------------------------------------===// let OtherPredicates = [HasImageInsts] in { -defm IMAGE_LOAD : MIMG_NoSampler , "image_load", 1>; -defm IMAGE_LOAD_MIP : MIMG_NoSampler , "image_load_mip", 1, 1>; -defm IMAGE_LOAD_PCK : MIMG_NoSampler , "image_load_pck", 0>; -defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler , "image_load_pck_sgn", 0>; -defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler , "image_load_mip_pck", 0, 1>; -defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler , "image_load_mip_pck_sgn", 0, 1>; -defm IMAGE_STORE : MIMG_Store , "image_store", 1>; -defm IMAGE_STORE_MIP : MIMG_Store , "image_store_mip", 1, 1>; -defm IMAGE_STORE_PCK : MIMG_Store , "image_store_pck", 0>; -defm IMAGE_STORE_MIP_PCK : MIMG_Store , "image_store_mip_pck", 0, 1>; - -defm IMAGE_GET_RESINFO : MIMG_NoSampler , "image_get_resinfo", 0, 1, 1>; - -defm IMAGE_ATOMIC_SWAP : MIMG_Atomic , "image_atomic_swap">; -defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic , "image_atomic_cmpswap", 1>; -defm IMAGE_ATOMIC_ADD : MIMG_Atomic , "image_atomic_add">; -defm IMAGE_ATOMIC_SUB : MIMG_Atomic , "image_atomic_sub">; -defm IMAGE_ATOMIC_RSUB : MIMG_Atomic , "image_atomic_rsub">; -defm IMAGE_ATOMIC_SMIN : MIMG_Atomic , "image_atomic_smin">; -defm IMAGE_ATOMIC_UMIN : MIMG_Atomic , "image_atomic_umin">; -defm IMAGE_ATOMIC_SMAX : MIMG_Atomic , "image_atomic_smax">; -defm IMAGE_ATOMIC_UMAX : MIMG_Atomic , "image_atomic_umax">; -defm IMAGE_ATOMIC_AND : MIMG_Atomic , "image_atomic_and">; -defm IMAGE_ATOMIC_OR : MIMG_Atomic , "image_atomic_or">; -defm IMAGE_ATOMIC_XOR : MIMG_Atomic , "image_atomic_xor">; -defm IMAGE_ATOMIC_INC : MIMG_Atomic , "image_atomic_inc">; -defm IMAGE_ATOMIC_DEC : MIMG_Atomic , "image_atomic_dec">; -defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic , "image_atomic_fcmpswap", 1, 1>; -defm IMAGE_ATOMIC_FMIN : MIMG_Atomic , "image_atomic_fmin", 0, 1>; -defm IMAGE_ATOMIC_FMAX : MIMG_Atomic , "image_atomic_fmax", 0, 1>; - -defm IMAGE_SAMPLE : MIMG_Sampler_WQM , AMDGPUSample>; +defm IMAGE_LOAD : MIMG_NoSampler , "image_load", 1>; +defm IMAGE_LOAD_MIP : MIMG_NoSampler , "image_load_mip", 1, 1>; +defm IMAGE_LOAD_PCK : MIMG_NoSampler , "image_load_pck", 0>; +defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler , "image_load_pck_sgn", 0>; +defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler , "image_load_mip_pck", 0, 1>; +defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler , "image_load_mip_pck_sgn", 0, 1>; +defm IMAGE_STORE : MIMG_Store , "image_store", 1>; +defm IMAGE_STORE_MIP : MIMG_Store , "image_store_mip", 1, 1>; +defm IMAGE_STORE_PCK : MIMG_Store , "image_store_pck", 0>; +defm IMAGE_STORE_MIP_PCK : MIMG_Store , "image_store_mip_pck", 0, 1>; + +defm IMAGE_GET_RESINFO : MIMG_NoSampler , "image_get_resinfo", 0, 1, 1>; + +defm IMAGE_ATOMIC_SWAP : MIMG_Atomic , "image_atomic_swap">; +defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic , "image_atomic_cmpswap", 1>; +defm IMAGE_ATOMIC_ADD : MIMG_Atomic , "image_atomic_add">; +defm IMAGE_ATOMIC_SUB : MIMG_Atomic , "image_atomic_sub">; +defm IMAGE_ATOMIC_RSUB : MIMG_Atomic , "image_atomic_rsub">; +defm IMAGE_ATOMIC_SMIN : MIMG_Atomic , "image_atomic_smin">; +defm IMAGE_ATOMIC_UMIN : MIMG_Atomic , "image_atomic_umin">; +defm IMAGE_ATOMIC_SMAX : MIMG_Atomic , "image_atomic_smax">; +defm IMAGE_ATOMIC_UMAX : MIMG_Atomic , "image_atomic_umax">; +defm IMAGE_ATOMIC_AND : MIMG_Atomic , "image_atomic_and">; +defm IMAGE_ATOMIC_OR : MIMG_Atomic , "image_atomic_or">; +defm IMAGE_ATOMIC_XOR : MIMG_Atomic , "image_atomic_xor">; +defm IMAGE_ATOMIC_INC : MIMG_Atomic , "image_atomic_inc">; +defm IMAGE_ATOMIC_DEC : MIMG_Atomic , "image_atomic_dec">; +defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic , "image_atomic_fcmpswap", 1, 1>; +defm IMAGE_ATOMIC_FMIN : MIMG_Atomic , "image_atomic_fmin", 0, 1>; +defm IMAGE_ATOMIC_FMAX : MIMG_Atomic , "image_atomic_fmax", 0, 1>; + +defm IMAGE_SAMPLE : MIMG_Sampler_WQM , AMDGPUSample>; let OtherPredicates = [HasExtendedImageInsts] in { -defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM , AMDGPUSample_cl>; -defm IMAGE_SAMPLE_D : MIMG_Sampler , AMDGPUSample_d>; -defm IMAGE_SAMPLE_D_CL : MIMG_Sampler , AMDGPUSample_d_cl>; -defm IMAGE_SAMPLE_L : MIMG_Sampler , AMDGPUSample_l>; -defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM , AMDGPUSample_b>; -defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM , AMDGPUSample_b_cl>; -defm IMAGE_SAMPLE_LZ : MIMG_Sampler , AMDGPUSample_lz>; -defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM , AMDGPUSample_c>; -defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM , AMDGPUSample_c_cl>; -defm IMAGE_SAMPLE_C_D : MIMG_Sampler , AMDGPUSample_c_d>; -defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler , AMDGPUSample_c_d_cl>; -defm IMAGE_SAMPLE_C_L : MIMG_Sampler , AMDGPUSample_c_l>; -defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM , AMDGPUSample_c_b>; -defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM , AMDGPUSample_c_b_cl>; -defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler , AMDGPUSample_c_lz>; -defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM , AMDGPUSample_o>; -defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM , AMDGPUSample_cl_o>; -defm IMAGE_SAMPLE_D_O : MIMG_Sampler , AMDGPUSample_d_o>; -defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler , AMDGPUSample_d_cl_o>; -defm IMAGE_SAMPLE_L_O : MIMG_Sampler , AMDGPUSample_l_o>; -defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM , AMDGPUSample_b_o>; -defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM , AMDGPUSample_b_cl_o>; -defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler , AMDGPUSample_lz_o>; -defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM , AMDGPUSample_c_o>; -defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM , AMDGPUSample_c_cl_o>; -defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler , AMDGPUSample_c_d_o>; -defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler , AMDGPUSample_c_d_cl_o>; -defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler , AMDGPUSample_c_l_o>; -defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM , AMDGPUSample_c_b_cl_o>; -defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM , AMDGPUSample_c_b_o>; -defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler , AMDGPUSample_c_lz_o>; -defm IMAGE_GATHER4 : MIMG_Gather_WQM , AMDGPUSample>; -defm IMAGE_GATHER4_CL : MIMG_Gather_WQM , AMDGPUSample_cl>; -defm IMAGE_GATHER4_L : MIMG_Gather , AMDGPUSample_l>; -defm IMAGE_GATHER4_B : MIMG_Gather_WQM , AMDGPUSample_b>; -defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM , AMDGPUSample_b_cl>; -defm IMAGE_GATHER4_LZ : MIMG_Gather , AMDGPUSample_lz>; -defm IMAGE_GATHER4_C : MIMG_Gather_WQM , AMDGPUSample_c>; -defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM , AMDGPUSample_c_cl>; -defm IMAGE_GATHER4_C_L : MIMG_Gather , AMDGPUSample_c_l>; -defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM , AMDGPUSample_c_b>; -defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM , AMDGPUSample_c_b_cl>; -defm IMAGE_GATHER4_C_LZ : MIMG_Gather , AMDGPUSample_c_lz>; -defm IMAGE_GATHER4_O : MIMG_Gather_WQM , AMDGPUSample_o>; -defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM , AMDGPUSample_cl_o>; -defm IMAGE_GATHER4_L_O : MIMG_Gather , AMDGPUSample_l_o>; -defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM , AMDGPUSample_b_o>; -defm IMAGE_GATHER4_B_CL_O : MIMG_Gather , AMDGPUSample_b_cl_o>; -defm IMAGE_GATHER4_LZ_O : MIMG_Gather , AMDGPUSample_lz_o>; -defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM , AMDGPUSample_c_o>; -defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM , AMDGPUSample_c_cl_o>; -defm IMAGE_GATHER4_C_L_O : MIMG_Gather , AMDGPUSample_c_l_o>; -defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM , AMDGPUSample_c_b_o>; -defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM , AMDGPUSample_c_b_cl_o>; -defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather , AMDGPUSample_c_lz_o>; -//defm IMAGE_GATHER4H : MIMG_Gather_WQM , ?>; - -defm IMAGE_GET_LOD : MIMG_Sampler , AMDGPUSample, 1, 0, 1, "image_get_lod">; - -defm IMAGE_SAMPLE_CD : MIMG_Sampler , AMDGPUSample_cd>; -defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler , AMDGPUSample_cd_cl>; -defm IMAGE_SAMPLE_C_CD : MIMG_Sampler , AMDGPUSample_c_cd>; -defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler , AMDGPUSample_c_cd_cl>; -defm IMAGE_SAMPLE_CD_O : MIMG_Sampler , AMDGPUSample_cd_o>; -defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler , AMDGPUSample_cd_cl_o>; -defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler , AMDGPUSample_c_cd_o>; -defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler , AMDGPUSample_c_cd_cl_o>; +defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM , AMDGPUSample_cl>; +defm IMAGE_SAMPLE_D : MIMG_Sampler , AMDGPUSample_d>; +defm IMAGE_SAMPLE_D_CL : MIMG_Sampler , AMDGPUSample_d_cl>; +defm IMAGE_SAMPLE_L : MIMG_Sampler , AMDGPUSample_l>; +defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM , AMDGPUSample_b>; +defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM , AMDGPUSample_b_cl>; +defm IMAGE_SAMPLE_LZ : MIMG_Sampler , AMDGPUSample_lz>; +defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM , AMDGPUSample_c>; +defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM , AMDGPUSample_c_cl>; +defm IMAGE_SAMPLE_C_D : MIMG_Sampler , AMDGPUSample_c_d>; +defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler , AMDGPUSample_c_d_cl>; +defm IMAGE_SAMPLE_C_L : MIMG_Sampler , AMDGPUSample_c_l>; +defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM , AMDGPUSample_c_b>; +defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM , AMDGPUSample_c_b_cl>; +defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler , AMDGPUSample_c_lz>; +defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM , AMDGPUSample_o>; +defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM , AMDGPUSample_cl_o>; +defm IMAGE_SAMPLE_D_O : MIMG_Sampler , AMDGPUSample_d_o>; +defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler , AMDGPUSample_d_cl_o>; +defm IMAGE_SAMPLE_L_O : MIMG_Sampler , AMDGPUSample_l_o>; +defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM , AMDGPUSample_b_o>; +defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM , AMDGPUSample_b_cl_o>; +defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler , AMDGPUSample_lz_o>; +defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM , AMDGPUSample_c_o>; +defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM , AMDGPUSample_c_cl_o>; +defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler , AMDGPUSample_c_d_o>; +defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler , AMDGPUSample_c_d_cl_o>; +defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler , AMDGPUSample_c_l_o>; +defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM , AMDGPUSample_c_b_cl_o>; +defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM , AMDGPUSample_c_b_o>; +defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler , AMDGPUSample_c_lz_o>; +defm IMAGE_GATHER4 : MIMG_Gather_WQM , AMDGPUSample>; +defm IMAGE_GATHER4_CL : MIMG_Gather_WQM , AMDGPUSample_cl>; +defm IMAGE_GATHER4_L : MIMG_Gather , AMDGPUSample_l>; +defm IMAGE_GATHER4_B : MIMG_Gather_WQM , AMDGPUSample_b>; +defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM , AMDGPUSample_b_cl>; +defm IMAGE_GATHER4_LZ : MIMG_Gather , AMDGPUSample_lz>; +defm IMAGE_GATHER4_C : MIMG_Gather_WQM , AMDGPUSample_c>; +defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM , AMDGPUSample_c_cl>; +defm IMAGE_GATHER4_C_L : MIMG_Gather , AMDGPUSample_c_l>; +defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM , AMDGPUSample_c_b>; +defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM , AMDGPUSample_c_b_cl>; +defm IMAGE_GATHER4_C_LZ : MIMG_Gather , AMDGPUSample_c_lz>; +defm IMAGE_GATHER4_O : MIMG_Gather_WQM , AMDGPUSample_o>; +defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM , AMDGPUSample_cl_o>; +defm IMAGE_GATHER4_L_O : MIMG_Gather , AMDGPUSample_l_o>; +defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM , AMDGPUSample_b_o>; +defm IMAGE_GATHER4_B_CL_O : MIMG_Gather , AMDGPUSample_b_cl_o>; +defm IMAGE_GATHER4_LZ_O : MIMG_Gather , AMDGPUSample_lz_o>; +defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM , AMDGPUSample_c_o>; +defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM , AMDGPUSample_c_cl_o>; +defm IMAGE_GATHER4_C_L_O : MIMG_Gather , AMDGPUSample_c_l_o>; +defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM , AMDGPUSample_c_b_o>; +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM , AMDGPUSample_c_b_cl_o>; +defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather , AMDGPUSample_c_lz_o>; +//defm IMAGE_GATHER4H : MIMG_Gather_WQM , ?>; + +defm IMAGE_GET_LOD : MIMG_Sampler , AMDGPUSample, 1, 0, 1, "image_get_lod">; + +defm IMAGE_SAMPLE_CD : MIMG_Sampler , AMDGPUSample_cd>; +defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler , AMDGPUSample_cd_cl>; +defm IMAGE_SAMPLE_C_CD : MIMG_Sampler , AMDGPUSample_c_cd>; +defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler , AMDGPUSample_c_cd_cl>; +defm IMAGE_SAMPLE_CD_O : MIMG_Sampler , AMDGPUSample_cd_o>; +defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler , AMDGPUSample_cd_cl_o>; +defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler , AMDGPUSample_c_cd_o>; +defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler , AMDGPUSample_c_cd_cl_o>; } // End OtherPredicates = [HasExtendedImageInsts] let OtherPredicates = [HasExtendedImageInsts,HasG16] in { -defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler , AMDGPUSample_d, 0, 1>; -defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler , AMDGPUSample_d_cl, 0, 1>; -defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler , AMDGPUSample_c_d, 0, 1>; -defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler , AMDGPUSample_c_d_cl, 0, 1>; -defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler , AMDGPUSample_d_o, 0, 1>; -defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler , AMDGPUSample_d_cl_o, 0, 1>; -defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler , AMDGPUSample_c_d_o, 0, 1>; -defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler , AMDGPUSample_c_d_cl_o, 0, 1>; -defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler , AMDGPUSample_cd, 0, 1>; -defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler , AMDGPUSample_cd_cl, 0, 1>; -defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler , AMDGPUSample_c_cd, 0, 1>; -defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler , AMDGPUSample_c_cd_cl, 0, 1>; -defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler , AMDGPUSample_cd_o, 0, 1>; -defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler , AMDGPUSample_cd_cl_o, 0, 1>; -defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler , AMDGPUSample_c_cd_o, 0, 1>; -defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler , AMDGPUSample_c_cd_cl_o, 0, 1>; +defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler , AMDGPUSample_d, 0, 1>; +defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler , AMDGPUSample_d_cl, 0, 1>; +defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler , AMDGPUSample_c_d, 0, 1>; +defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler , AMDGPUSample_c_d_cl, 0, 1>; +defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler , AMDGPUSample_d_o, 0, 1>; +defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler , AMDGPUSample_d_cl_o, 0, 1>; +defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler , AMDGPUSample_c_d_o, 0, 1>; +defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler , AMDGPUSample_c_d_cl_o, 0, 1>; +defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler , AMDGPUSample_cd, 0, 1>; +defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler , AMDGPUSample_cd_cl, 0, 1>; +defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler , AMDGPUSample_c_cd, 0, 1>; +defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler , AMDGPUSample_c_cd_cl, 0, 1>; +defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler , AMDGPUSample_cd_o, 0, 1>; +defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler , AMDGPUSample_cd_cl_o, 0, 1>; +defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler , AMDGPUSample_c_cd_o, 0, 1>; +defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler , AMDGPUSample_c_cd_cl_o, 0, 1>; } // End OtherPredicates = [HasExtendedImageInsts,HasG16] -//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>; -//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>; +//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", mimgopc<0x7e>>; +//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", mimgopc<0x7f>>; + +let SubtargetPredicate = isGFX10Only, OtherPredicates = [HasGFX10_BEncoding] in +defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler , "image_msaa_load", 1, 0, 0, 1>; let OtherPredicates = [HasGFX10_AEncoding] in -defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler , "image_msaa_load", 1, 0, 0, 1>; +defm IMAGE_MSAA_LOAD : MIMG_MSAA_Load , "image_msaa_load">; let OtherPredicates = [HasGFX10_AEncoding] in { -defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 0>; -defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 1>; -defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 0>; -defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 1>; +defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 0>; +defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 1>; +defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 0>; +defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 1>; } // End OtherPredicates = [HasGFX10_AEncoding] } // End let OtherPredicates = [HasImageInsts] diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 50239840c22517..8455b471be3a0b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -293,7 +293,7 @@ class VINTRPe op> : Enc32 { let Inst{31-26} = 0x32; // encoding } -class MIMGe : Enc64 { +class MIMGe_gfxpre11 : Enc64 { bits<10> vdata; bits<4> dmask; bits<1> unorm; @@ -318,7 +318,7 @@ class MIMGe : Enc64 { let Inst{63} = d16; } -class MIMGe_gfx6789 op> : MIMGe { +class MIMGe_gfx6789 op> : MIMGe_gfxpre11 { bits<8> vaddr; bits<1> da; @@ -330,7 +330,7 @@ class MIMGe_gfx6789 op> : MIMGe { let Inst{39-32} = vaddr; } -class MIMGe_gfx90a op> : MIMGe { +class MIMGe_gfx90a op> : MIMGe_gfxpre11 { bits<8> vaddr; bits<1> da; @@ -342,7 +342,7 @@ class MIMGe_gfx90a op> : MIMGe { let Inst{39-32} = vaddr; } -class MIMGe_gfx10 op> : MIMGe { +class MIMGe_gfx10 op> : MIMGe_gfxpre11 { bits<8> vaddr0; bits<3> dim; bits<2> nsa; @@ -358,6 +358,42 @@ class MIMGe_gfx10 op> : MIMGe { let Inst{62} = a16; } +class MIMGe_gfx11 op> : Enc64 { + bits<8> vdata; + bits<4> dmask; + bits<1> unorm; + bits<5> cpol; + bits<1> r128; + bits<1> tfe; + bits<1> lwe; + bits<7> srsrc; + bits<7> ssamp; + bit d16; + bits<1> a16; + bits<8> vaddr0; + bits<3> dim; + bits<1> nsa; + + let Inst{0} = nsa; + let Inst{4-2} = dim; + let Inst{7} = unorm; + let Inst{11-8} = dmask; + let Inst{12} = cpol{CPolBit.SLC}; + let Inst{13} = cpol{CPolBit.DLC}; + let Inst{14} = cpol{CPolBit.GLC}; + let Inst{15} = r128; + let Inst{16} = a16; + let Inst{17} = d16; + let Inst{25-18} = op; + let Inst{31-26} = 0x3c; + let Inst{39-32} = vaddr0; + let Inst{47-40} = vdata; + let Inst{52-48} = srsrc{6-2}; + let Inst{53} = tfe; + let Inst{54} = lwe; + let Inst{62-58} = ssamp{6-2}; +} + class EXPe : Enc64 { bits<4> en; bits<6> tgt; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 6287fada56333c..41c90ae890ddbe 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -398,6 +398,7 @@ struct MIMGInfo { uint8_t MIMGEncoding; uint8_t VDataDwords; uint8_t VAddrDwords; + uint8_t VAddrOperands; }; LLVM_READONLY diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s b/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s new file mode 100644 index 00000000000000..eee9e1a24fd1f8 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s @@ -0,0 +1,308 @@ +; RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s + +image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm +; GFX11: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x80,0x0f,0x00,0xf0,0x00,0x00,0x00,0x00] + +image_load v[1:4], [v2, v3], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm +; GFX11: image_load v[1:4], [v2, v3], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x85,0x0f,0x00,0xf0,0x02,0x01,0x01,0x00,0x03,0x00,0x00,0x00] + +image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_3D unorm +; GFX11: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x89,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] + +image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm +; GFX11: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x8d,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] + +image_load v[0:3], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm +; GFX11: image_load v[0:3], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x91,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x00,0x00,0x00] + +image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm +; GFX11: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x95,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] + +image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm +; GFX11: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x99,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] + +image_load v[0:3], [v4, v5, v6, v7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm +; GFX11: image_load v[0:3], [v4, v5, v6, v7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x9d,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x07,0x00] + +image_load v[0:1], v0, s[0:7] dmask:0x9 dim:1D +; GFX11: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x09,0x00,0xf0,0x00,0x00,0x00,0x00] + +image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D dlc +; GFX11: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D dlc ; encoding: [0x00,0x21,0x00,0xf0,0x00,0x00,0x00,0x00] + +image_load v255, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX11: image_load v255, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x00,0xf0,0x00,0xff,0x00,0x00] + +image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D slc +; GFX11: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D slc ; encoding: [0x00,0x11,0x00,0xf0,0x00,0x00,0x00,0x00] + +image_load v0, v255, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D d16 +; GFX11: image_load v0, v255, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x06,0x02,0xf0,0xff,0x00,0x00,0x00] + +// FIXME: This test is incorrect because r128 assumes a 128-bit SRSRC. +image_load v0, v255, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D r128 +; GFX11: image_load v0, v255, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D r128 ; encoding: [0x00,0x81,0x00,0xf0,0xff,0x00,0x00,0x00] + +image_load v0, v[2:3], s[0:7] dmask:0x1 dim:2D +; GFX11: image_load v0, v[2:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:4], s[0:7] dmask:0x1 dim:3D +; GFX11: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:4], s[0:7] dmask:0x1 dim:CUBE +; GFX11: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:3], s[0:7] dmask:0x1 dim:1D_ARRAY +; GFX11: image_load v0, v[2:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:4], s[0:7] dmask:0x1 dim:2D_ARRAY +; GFX11: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x14,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:4], s[0:7] dmask:0x1 dim:2D_MSAA +; GFX11: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA ; encoding: [0x18,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:5], s[0:7] dmask:0x1 dim:2D_MSAA_ARRAY +; GFX11: image_load v0, v[2:5], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1c,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load_mip v[252:255], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX11: image_load_mip v[252:255], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x04,0xf0,0x00,0xfc,0x00,0x00] + +image_load_mip v[253:255], [v255, v254], s[0:7] dmask:0xe dim:SQ_RSRC_IMG_1D +; GFX11: image_load_mip v[253:255], [v255, v254], s[0:7] dmask:0xe dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0e,0x04,0xf0,0xff,0xfd,0x00,0x00,0xfe,0x00,0x00,0x00] + +image_load_mip v[254:255], [v254, v255, v253], s[0:7] dmask:0xc dim:SQ_RSRC_IMG_2D +; GFX11: image_load_mip v[254:255], [v254, v255, v253], s[0:7] dmask:0xc dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x0c,0x04,0xf0,0xfe,0xfe,0x00,0x00,0xff,0xfd,0x00,0x00] + +image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_3D +; GFX11: image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00] + +image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_CUBE +; GFX11: image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00] + +image_load_mip v255, [v254, v255, v253], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX11: image_load_mip v255, [v254, v255, v253], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0x00,0x00] + +image_load_mip v255, [v254, v255, v253, v255], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX11: image_load_mip v255, [v254, v255, v253, v255], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xff,0x00] + +image_store v[0:3], [v254, v255, v253, v255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +; GFX11: image_store v[0:3], [v254, v255, v253, v255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x0f,0x18,0xf0,0xfe,0x00,0x18,0x00,0xff,0xfd,0xff,0x00] + +image_store v[0:3], v[254:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11: image_store v[0:3], v[254:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x0f,0x18,0xf0,0xfe,0x00,0x18,0x00] + +image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11: image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x0f,0x1c,0xf0,0xfd,0x00,0x18,0x00] + +image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D +; GFX11: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x0f,0x5c,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc +; GFX11: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc +; GFX11: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] + +image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc +; GFX11: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] + +image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc +; GFX11: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x11,0x41,0x34,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00] + +image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc +; GFX11: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x15,0x41,0x38,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] + +image_atomic_umin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA glc +; GFX11: image_atomic_umin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA glc ; encoding: [0x19,0x41,0x3c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] + +image_atomic_smax v4, [v32, v1, v2, v3], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY glc +; GFX11: image_atomic_smax v4, [v32, v1, v2, v3], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY glc ; encoding: [0x1d,0x41,0x40,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x03,0x00] + +image_atomic_umax v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D glc +; GFX11: image_atomic_umax v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D glc ; encoding: [0x05,0x41,0x44,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00] + +image_atomic_and v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX11: image_atomic_and v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x48,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_or v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX11: image_atomic_or v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x4c,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_xor v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX11: image_atomic_xor v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x50,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_inc v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX11: image_atomic_inc v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x54,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX11: image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00] + +image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D +; GFX11: image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64] + +image_sample_cl v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D +; GFX11: image_sample_cl v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x00,0x00,0x00] + +image_sample_cl v[64:66], [v32, v16, v15], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D +; GFX11: image_sample_cl v[64:66], [v32, v16, v15], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x0f,0x00,0x00] + +image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x0f,0x14,0x00] + +image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE +; GFX11: image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x0f,0x14,0x00] + +image_sample_cl v[64:66], [v32, v16, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX11: image_sample_cl v[64:66], [v32, v16, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x00,0x00] + +image_sample_cl v[64:66], [v32, v16, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX11: image_sample_cl v[64:66], [v32, v16, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x15,0x00] + +image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D +; GFX11: image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00] + +image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D +; GFX11: image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64] + +image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64] + +image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX11: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00] + +image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX11: image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x74,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00] + +image_sample_b v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX11: image_sample_b v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x78,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00] + +image_sample_b_cl v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX11: image_sample_b_cl v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x08,0xf1,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00] + +image_sample_lz v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX11: image_sample_lz v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x7c,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x00,0x00] + +image_sample_c v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE +; GFX11: image_sample_c v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x07,0x80,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00] + +image_sample_c_cl v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE +; GFX11: image_sample_c_cl v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x07,0x0c,0xf1,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] + +image_sample_c_l v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_c_l v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x88,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] + +image_sample_c_b v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_c_b v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x8c,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] + +image_sample_c_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_c_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0xa8,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] + +image_sample_c_lz v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_c_lz v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x90,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00] + +image_sample_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x94,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00] + +image_sample_cl_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_cl_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x18,0xf1,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] + +image_sample_b_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_b_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0xa0,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] + +image_sample_l_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_l_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x9c,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] + +image_sample_lz_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX11: image_sample_lz_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0xa4,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00] + +image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D +; GFX11: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00] + +image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64] + +image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE +; GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64] + +image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX11: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x04,0xc0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00] + +image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D +; GFX11: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x08,0xc4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00] + +image_gather4_b_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX11: image_gather4_b_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x01,0x84,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] + +image_gather4_lz v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX11: image_gather4_lz v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xc8,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00] + +image_gather4_c v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX11: image_gather4_c v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xcc,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00] + +image_gather4_c_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX11: image_gather4_c_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0x88,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] + +image_gather4_c_l v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX11: image_gather4_c_l v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0x8c,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] + +image_gather4_c_b v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX11: image_gather4_c_b v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0x90,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] + +image_gather4_c_lz v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX11: image_gather4_c_lz v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xd0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00] + +image_gather4_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX11: image_gather4_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xd4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00] + +image_gather4_lz_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX11: image_gather4_lz_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xd8,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00] + +image_gather4_c_lz_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX11: image_gather4_c_lz_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xdc,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] + +image_get_lod v64, v[32:33], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GFX11: image_get_lod v64, v[32:33], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x01,0xe0,0xf0,0x20,0x40,0x01,0x64] + +image_get_lod v[64:65], [v32, v0, v16], s[4:11], s[100:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX11: image_get_lod v[64:65], [v32, v0, v16], s[4:11], s[100:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x03,0xe0,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00] + +image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA +; GFX11: image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA ; encoding: [0x18,0x01,0x60,0xf0,0x05,0x01,0x02,0x00] + +image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA glc +; GFX11: image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA glc ; encoding: [0x18,0x42,0x60,0xf0,0x05,0x01,0x02,0x00] + +image_msaa_load v[1:2], v[1:3], s[8:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA d16 +; GFX11: image_msaa_load v[1:2], v[1:3], s[8:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA d16 ; encoding: [0x18,0x04,0x62,0xf0,0x01,0x01,0x02,0x00] + +image_msaa_load v[1:4], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +; GFX11: image_msaa_load v[1:4], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1c,0x02,0x60,0xf0,0x05,0x01,0x02,0x00] + +image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16 +; GFX11: image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16 ; encoding: [0x1c,0x02,0x62,0xf0,0x05,0x01,0x02,0x00] + +image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +; GFX11: image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00] + +image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] +; GFX11: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00] + +image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 +; GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00] + +image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] +; GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00] + +image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 +; GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00] + +image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] +; GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f] + +image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 +; GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00] + +image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15] +; GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x68,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f] + +image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42]], s[12:15] a16 +; GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x69,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s new file mode 100644 index 00000000000000..e12d6b5600d0ec --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s @@ -0,0 +1,148 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefixes=NOGFX11 --implicit-check-not=error: %s + +image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_d v[64:66], [v32, v16, v8, v4, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_d_cl v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21, v48], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_c_d v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_c_d_cl v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17, v18], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_c_b_cl v[64:66], [v32, v16, v0, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_d_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_d_cl_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_c_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_c_d_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_c_d_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_c_l_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_c_b_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_sample_c_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_gather4_c_b_cl v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_gather4_cl_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_gather4_l_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_gather4_b_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_gather4_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_gather4_c_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_gather4_c_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_gather4_c_l_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_gather4_c_b_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_gather4_c_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_sample_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_sample_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_sample_c_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_sample_c_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_sample_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_sample_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_sample_c_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_sample_c_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_atomic_fcmpswap v[4:5], v32, s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_1D glc +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_atomic_fmin v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_atomic_fmax v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid image_gather dmask: only one bit must be set + +image_msaa_load v5, v[1:3], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA d16 +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +image_bvh_intersect_ray v[4:6], v[0:15], s[4:7] +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_bvh_intersect_ray v[4:7], v[0:15], s[4:7] a16 +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_bvh64_intersect_ray v[4:6], v[0:15], s[4:7] +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_bvh64_intersect_ray v[4:7], v[0:7], s[4:7] a16 +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49], v0], s[12:15] +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_bvh_intersect_ray v[39:42], [v50, v46, v47, v[40:42]], s[12:15] a16 +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_bvh64_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22]], s[12:15] a16 +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_mimg.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_mimg.txt new file mode 100644 index 00000000000000..d7a1cf1c6a79a8 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_mimg.txt @@ -0,0 +1,304 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s + +# GFX11: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x80,0x0f,0x00,0xf0,0x00,0x00,0x00,0x00] +0x80,0x0f,0x00,0xf0,0x00,0x00,0x00,0x00 + +# GFX11: image_load v[1:4], [v2, v3], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x85,0x0f,0x00,0xf0,0x02,0x01,0x01,0x00,0x03,0x00,0x00,0x00] +0x85,0x0f,0x00,0xf0,0x02,0x01,0x01,0x00,0x03,0x00,0x00,0x00 + +# GFX11: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x89,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] +0x89,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00 + +# GFX11: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x8d,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] +0x8d,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00 + +# GFX11: image_load v[0:3], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x91,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x00,0x00,0x00] +0x91,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x00,0x00,0x00 + +# GFX11: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x95,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] +0x95,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00 + +# GFX11: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x99,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] +0x99,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00 + +# GFX11: image_load v[0:3], [v4, v5, v6, v7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x9d,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x07,0x00] +0x9d,0x0f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x07,0x00 + +# GFX11: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x09,0x00,0xf0,0x00,0x00,0x00,0x00] +0x00,0x09,0x00,0xf0,0x00,0x00,0x00,0x00 + +# GFX11: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D dlc ; encoding: [0x00,0x21,0x00,0xf0,0x00,0x00,0x00,0x00] +0x00,0x21,0x00,0xf0,0x00,0x00,0x00,0x00 + +# GFX11: image_load v255, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x00,0xf0,0x00,0xff,0x00,0x00] +0x00,0x41,0x00,0xf0,0x00,0xff,0x00,0x00 + +# GFX11: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D slc ; encoding: [0x00,0x11,0x00,0xf0,0x00,0x00,0x00,0x00] +0x00,0x11,0x00,0xf0,0x00,0x00,0x00,0x00 + +# GFX11: image_load v0, v255, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x06,0x02,0xf0,0xff,0x00,0x00,0x00] +0x00,0x06,0x02,0xf0,0xff,0x00,0x00,0x00 + +# GFX11: image_load v0, v255, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D r128 ; encoding: [0x00,0x81,0x00,0xf0,0xff,0x00,0x00,0x00] +0x00,0x81,0x00,0xf0,0xff,0x00,0x00,0x00 + +# GFX11: image_load v0, v[2:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] +0x04,0x01,0x00,0xf0,0x02,0x00,0x00,0x00 + +# GFX11: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] +0x08,0x01,0x00,0xf0,0x02,0x00,0x00,0x00 + +# GFX11: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] +0x0c,0x01,0x00,0xf0,0x02,0x00,0x00,0x00 + +# GFX11: image_load v0, v[2:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x10,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] +0x10,0x01,0x00,0xf0,0x02,0x00,0x00,0x00 + +# GFX11: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x14,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] +0x14,0x01,0x00,0xf0,0x02,0x00,0x00,0x00 + +# GFX11: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA ; encoding: [0x18,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] +0x18,0x01,0x00,0xf0,0x02,0x00,0x00,0x00 + +# GFX11: image_load v0, v[2:5], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1c,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] +0x1c,0x01,0x00,0xf0,0x02,0x00,0x00,0x00 + +# GFX11: image_load_mip v[252:255], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x04,0xf0,0x00,0xfc,0x00,0x00] +0x00,0x0f,0x04,0xf0,0x00,0xfc,0x00,0x00 + +# GFX11: image_load_mip v[253:255], [v255, v254], s[0:7] dmask:0xe dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0e,0x04,0xf0,0xff,0xfd,0x00,0x00,0xfe,0x00,0x00,0x00] +0x01,0x0e,0x04,0xf0,0xff,0xfd,0x00,0x00,0xfe,0x00,0x00,0x00 + +# GFX11: image_load_mip v[254:255], [v254, v255, v253], s[0:7] dmask:0xc dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x0c,0x04,0xf0,0xfe,0xfe,0x00,0x00,0xff,0xfd,0x00,0x00] +0x05,0x0c,0x04,0xf0,0xfe,0xfe,0x00,0x00,0xff,0xfd,0x00,0x00 + +# GFX11: image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00] +0x09,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00 + +# GFX11: image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00] +0x0d,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00 + +# GFX11: image_load_mip v255, [v254, v255, v253], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0x00,0x00] +0x11,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0x00,0x00 + +# GFX11: image_load_mip v255, [v254, v255, v253, v255], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xff,0x00] +0x15,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xff,0x00 + +# GFX11: image_store v[0:3], [v254, v255, v253, v255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x0f,0x18,0xf0,0xfe,0x00,0x18,0x00,0xff,0xfd,0xff,0x00] +0x1d,0x0f,0x18,0xf0,0xfe,0x00,0x18,0x00,0xff,0xfd,0xff,0x00 + +# GFX11: image_store v[0:3], v[254:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x0f,0x18,0xf0,0xfe,0x00,0x18,0x00] +0x04,0x0f,0x18,0xf0,0xfe,0x00,0x18,0x00 + +# GFX11: image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x0f,0x1c,0xf0,0xfd,0x00,0x18,0x00] +0x04,0x0f,0x1c,0xf0,0xfd,0x00,0x18,0x00 + +# GFX11: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x0f,0x5c,0xf0,0x20,0x04,0x18,0x00] +0x08,0x0f,0x5c,0xf0,0x20,0x04,0x18,0x00 + +# GFX11: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00] +0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00 + +# GFX11: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] +0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00 + +# GFX11: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] +0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00 + +# GFX11: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x11,0x41,0x34,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00] +0x11,0x41,0x34,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00 + +# GFX11: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x15,0x41,0x38,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] +0x15,0x41,0x38,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00 + +# GFX11: image_atomic_umin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA glc ; encoding: [0x19,0x41,0x3c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] +0x19,0x41,0x3c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00 + +# GFX11: image_atomic_smax v4, [v32, v1, v2, v3], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY glc ; encoding: [0x1d,0x41,0x40,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x03,0x00] +0x1d,0x41,0x40,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x03,0x00 + +# GFX11: image_atomic_umax v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D glc ; encoding: [0x05,0x41,0x44,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00] +0x05,0x41,0x44,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00 + +# GFX11: image_atomic_and v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x48,0xf0,0x20,0x04,0x18,0x00] +0x00,0x41,0x48,0xf0,0x20,0x04,0x18,0x00 + +# GFX11: image_atomic_or v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x4c,0xf0,0x20,0x04,0x18,0x00] +0x00,0x41,0x4c,0xf0,0x20,0x04,0x18,0x00 + +# GFX11: image_atomic_xor v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x50,0xf0,0x20,0x04,0x18,0x00] +0x00,0x41,0x50,0xf0,0x20,0x04,0x18,0x00 + +# GFX11: image_atomic_inc v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x54,0xf0,0x20,0x04,0x18,0x00] +0x00,0x41,0x54,0xf0,0x20,0x04,0x18,0x00 + +# GFX11: image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00] +0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00 + +# GFX11: image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64] +0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64 + +# GFX11: image_sample_cl v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x00,0x00,0x00] +0x01,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x00,0x00,0x00 + +# GFX11: image_sample_cl v[64:66], [v32, v16, v15], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x0f,0x00,0x00] +0x05,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x0f,0x00,0x00 + +# GFX11: image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x0f,0x14,0x00] +0x09,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x0f,0x14,0x00 + +# GFX11: image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x0f,0x14,0x00] +0x0d,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x0f,0x14,0x00 + +# GFX11: image_sample_cl v[64:66], [v32, v16, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x00,0x00] +0x11,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x00,0x00 + +# GFX11: image_sample_cl v[64:66], [v32, v16, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x15,0x00] +0x15,0x07,0x00,0xf1,0x20,0x40,0x01,0x64,0x10,0x14,0x15,0x00 + +# GFX11: image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00] +0x01,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00 + +# GFX11: image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64] +0x04,0x07,0x70,0xf0,0x20,0x40,0x01,0x64 + +# GFX11: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64] +0x08,0x07,0x70,0xf0,0x20,0x40,0x01,0x64 + +# GFX11: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00] +0x11,0x07,0x70,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00 + +# GFX11: image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x74,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00] +0x11,0x07,0x74,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00 + +# GFX11: image_sample_b v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x78,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00] +0x11,0x07,0x78,0xf0,0x20,0x40,0x01,0x64,0x10,0x08,0x00,0x00 + +# GFX11: image_sample_b_cl v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x08,0xf1,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00] +0x11,0x07,0x08,0xf1,0x20,0x40,0x01,0x64,0x10,0x08,0x04,0x00 + +# GFX11: image_sample_lz v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x07,0x7c,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x00,0x00] +0x11,0x07,0x7c,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x00,0x00 + +# GFX11: image_sample_c v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x07,0x80,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00] +0x0d,0x07,0x80,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00 + +# GFX11: image_sample_c_cl v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x07,0x0c,0xf1,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] +0x0d,0x07,0x0c,0xf1,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01 + +# GFX11: image_sample_c_l v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x88,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] +0x09,0x07,0x88,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01 + +# GFX11: image_sample_c_b v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x8c,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] +0x09,0x07,0x8c,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01 + +# GFX11: image_sample_c_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0xa8,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] +0x09,0x07,0xa8,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01 + +# GFX11: image_sample_c_lz v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x90,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00] +0x09,0x07,0x90,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00 + +# GFX11: image_sample_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x94,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00] +0x09,0x07,0x94,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00 + +# GFX11: image_sample_cl_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x18,0xf1,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] +0x09,0x07,0x18,0xf1,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01 + +# GFX11: image_sample_b_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0xa0,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] +0x09,0x07,0xa0,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01 + +# GFX11: image_sample_l_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0x9c,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01] +0x09,0x07,0x9c,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x01 + +# GFX11: image_sample_lz_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0xa4,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00] +0x09,0x07,0xa4,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00 + +# GFX11: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00] +0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00 + +# GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64] +0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64 + +# GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64] +0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64 + +# GFX11: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x04,0xc0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00] +0x11,0x04,0xc0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00 + +# GFX11: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x08,0xc4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00] +0x05,0x08,0xc4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00 + +# GFX11: image_gather4_b_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x01,0x84,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] +0x15,0x01,0x84,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06 + +# GFX11: image_gather4_lz v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xc8,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00] +0x09,0x01,0xc8,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00 + +# GFX11: image_gather4_c v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xcc,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00] +0x09,0x01,0xcc,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00 + +# GFX11: image_gather4_c_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0x88,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] +0x09,0x01,0x88,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06 + +# GFX11: image_gather4_c_l v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0x8c,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] +0x09,0x01,0x8c,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06 + +# GFX11: image_gather4_c_b v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0x90,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] +0x09,0x01,0x90,0xf1,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06 + +# GFX11: image_gather4_c_lz v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xd0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00] +0x09,0x01,0xd0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00 + +# GFX11: image_gather4_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xd4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00] +0x09,0x01,0xd4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00 + +# GFX11: image_gather4_lz_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xd8,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00] +0x09,0x01,0xd8,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x00 + +# GFX11: image_gather4_c_lz_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x01,0xdc,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06] +0x09,0x01,0xdc,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x05,0x06 + +# GFX11: image_get_lod v64, v[32:33], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x01,0xe0,0xf0,0x20,0x40,0x01,0x64] +0x04,0x01,0xe0,0xf0,0x20,0x40,0x01,0x64 + +# GFX11: image_get_lod v[64:65], [v32, v0, v16], s[4:11], s[100:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x03,0xe0,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00] +0x15,0x03,0xe0,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00 + +# GFX11: image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA glc ; encoding: [0x18,0x42,0x60,0xf0,0x05,0x01,0x02,0x00] +0x18,0x42,0x60,0xf0,0x05,0x01,0x02,0x00 + +# GFX11: image_msaa_load v[1:2], v[1:3], s[8:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA d16 ; encoding: [0x18,0x04,0x62,0xf0,0x01,0x01,0x02,0x00] +0x18,0x04,0x62,0xf0,0x01,0x01,0x02,0x00 + +# GFX11: image_msaa_load v[1:4], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1c,0x02,0x60,0xf0,0x05,0x01,0x02,0x00] +0x1c,0x02,0x60,0xf0,0x05,0x01,0x02,0x00 + +# GFX11: image_msaa_load v[1:2], v[5:8], s[8:15] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY d16 ; encoding: [0x1c,0x02,0x62,0xf0,0x05,0x01,0x02,0x00] +0x1c,0x02,0x62,0xf0,0x05,0x01,0x02,0x00 + +# GFX11: image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00] +0x1d,0x01,0x60,0xf0,0xcc,0x0a,0x0a,0x00,0x0b,0x0e,0x13,0x00 + +# GFX11: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00] +0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00 + +# GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00] +0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00 + +# GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] ; encoding: [0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00] +0x80,0x8f,0x68,0xf0,0x09,0x04,0x01,0x00 + +# GFX11: image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 ; encoding: [0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00] +0x80,0x8f,0x69,0xf0,0x09,0x04,0x01,0x00 + +# GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f] +0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f + +# GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00] +0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00 + +# GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x68,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f] +0x81,0x8f,0x68,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f + +# GFX11: image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x69,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00] +0x81,0x8f,0x69,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00 From 10555a82df22bf67a9c30165e952b44969b46b6f Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Tue, 31 May 2022 08:21:10 -0700 Subject: [PATCH 903/908] [PS5] Tweak dllexport test Post-commit review pointed out that both PS4 and PS5 were using the same -std argument, better to use different ones just in case. --- clang/test/SemaCXX/dllexport.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/SemaCXX/dllexport.cpp b/clang/test/SemaCXX/dllexport.cpp index 4aa1427563a2e1..7d8bfdadbb839f 100644 --- a/clang/test/SemaCXX/dllexport.cpp +++ b/clang/test/SemaCXX/dllexport.cpp @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -triple x86_64-mingw32 -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template %s // RUN: %clang_cc1 -triple i686-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI %s -// RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI %s +// RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify -std=c++11 -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fdeclspec -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI %s // Helper structs to make templates more expressive. From 1257315b20d5f2487c4445c129b7437d5f2debfd Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 31 May 2022 17:21:58 +0200 Subject: [PATCH 904/908] [Scalarizer] Regenerate test checks (NFC) --- .../Scalarizer/basic-inseltpoison.ll | 889 ++++++++++++------ llvm/test/Transforms/Scalarizer/basic.ll | 889 ++++++++++++------ 2 files changed, 1186 insertions(+), 592 deletions(-) diff --git a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll index 5eb04df3fe7776..7e95ca17d706d3 100644 --- a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll +++ b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -6,57 +7,58 @@ declare <4 x float> @ext(<4 x float>) define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) { ; CHECK-LABEL: @f1( -; CHECK: entry: -; CHECK: %init.i0 = extractelement <4 x float> %init, i32 0 -; CHECK: %init.i1 = extractelement <4 x float> %init, i32 1 -; CHECK: %init.i2 = extractelement <4 x float> %init, i32 2 -; CHECK: %init.i3 = extractelement <4 x float> %init, i32 3 -; CHECK: br label %loop -; CHECK: loop: -; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] -; CHECK: %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ] -; CHECK: %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ] -; CHECK: %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ] -; CHECK: %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ] -; CHECK: %nexti = sub i32 %i, 1 -; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i -; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float* -; CHECK: %val.i0 = load float, float* %ptr.i0, align 16 -; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1 -; CHECK: %val.i1 = load float, float* %ptr.i1, align 4 -; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2 -; CHECK: %val.i2 = load float, float* %ptr.i2, align 8 -; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3 -; CHECK: %val.i3 = load float, float* %ptr.i3, align 4 -; CHECK: %add.i0 = fadd float %val.i0, %val.i2 -; CHECK: %add.i1 = fadd float %val.i1, %val.i3 -; CHECK: %add.i2 = fadd float %acc.i0, %acc.i2 -; CHECK: %add.i3 = fadd float %acc.i1, %acc.i3 -; CHECK: %add.upto0 = insertelement <4 x float> poison, float %add.i0, i32 0 -; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1 -; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2 -; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3 -; CHECK: %call = call <4 x float> @ext(<4 x float> %add) -; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0 -; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.0 -; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1 -; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.0 -; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2 -; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.0 -; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3 -; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.0 -; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0 -; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0 -; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0 -; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0 -; CHECK: store float %sel.i0, float* %ptr.i0 -; CHECK: store float %sel.i1, float* %ptr.i1 -; CHECK: store float %sel.i2, float* %ptr.i2 -; CHECK: store float %sel.i3, float* %ptr.i3 -; CHECK: %test = icmp eq i32 %nexti, 0 -; CHECK: br i1 %test, label %loop, label %exit -; CHECK: exit: -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i32 0 +; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i32 1 +; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i32 2 +; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i32 3 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float* +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16 +; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4 +; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8 +; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4 +; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]] +; CHECK-NEXT: [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]] +; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i32 0 +; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i32 1 +; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i32 2 +; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i32 3 +; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]]) +; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 +; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 +; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 +; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 +; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 +; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 +; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 +; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 +; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 +; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 +; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00 +; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00 +; CHECK-NEXT: store float [[SEL_I0]], float* [[PTR_I0]], align 16 +; CHECK-NEXT: store float [[SEL_I1]], float* [[PTR_I1]], align 4 +; CHECK-NEXT: store float [[SEL_I2]], float* [[PTR_I2]], align 8 +; CHECK-NEXT: store float [[SEL_I3]], float* [[PTR_I3]], align 4 +; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 +; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -70,17 +72,17 @@ loop: %dval = bitcast <4 x float> %val to <2 x double> %dacc = bitcast <4 x float> %acc to <2 x double> %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc, - <2 x i32> + <2 x i32> %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc, - <2 x i32> + <2 x i32> %f1 = bitcast <2 x double> %shuffle1 to <4 x float> %f2 = bitcast <2 x double> %shuffle2 to <4 x float> %add = fadd <4 x float> %f1, %f2 %call = call <4 x float> @ext(<4 x float> %add) %cmp = fcmp ogt <4 x float> %call, - + %sel = select <4 x i1> %cmp, <4 x float> %call, - <4 x float> + <4 x float> store <4 x float> %sel, <4 x float> *%ptr %test = icmp eq i32 %nexti, 0 @@ -91,57 +93,58 @@ exit: } define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) { -; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) { -; CHECK: entry: -; CHECK: %init.i0 = extractelement <4 x i32> %init, i32 0 -; CHECK: %init.i1 = extractelement <4 x i32> %init, i32 1 -; CHECK: %init.i2 = extractelement <4 x i32> %init, i32 2 -; CHECK: %init.i3 = extractelement <4 x i32> %init, i32 3 -; CHECK: br label %loop -; CHECK: loop: -; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] -; CHECK: %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ] -; CHECK: %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ] -; CHECK: %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ] -; CHECK: %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ] -; CHECK: %nexti = sub i32 %i, 1 -; CHECK: %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i -; CHECK: %ptr.i0 = bitcast <4 x i8>* %ptr to i8* -; CHECK: %val.i0 = load i8, i8* %ptr.i0, align 4 -; CHECK: %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1 -; CHECK: %val.i1 = load i8, i8* %ptr.i1, align 1 -; CHECK: %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2 -; CHECK: %val.i2 = load i8, i8* %ptr.i2, align 2 -; CHECK: %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3 -; CHECK: %val.i3 = load i8, i8* %ptr.i3, align 1 -; CHECK: %ext.i0 = sext i8 %val.i0 to i32 -; CHECK: %ext.i1 = sext i8 %val.i1 to i32 -; CHECK: %ext.i2 = sext i8 %val.i2 to i32 -; CHECK: %ext.i3 = sext i8 %val.i3 to i32 -; CHECK: %add.i0 = add i32 %ext.i0, %acc.i0 -; CHECK: %add.i1 = add i32 %ext.i1, %acc.i1 -; CHECK: %add.i2 = add i32 %ext.i2, %acc.i2 -; CHECK: %add.i3 = add i32 %ext.i3, %acc.i3 -; CHECK: %cmp.i0 = icmp slt i32 %add.i0, -10 -; CHECK: %cmp.i1 = icmp slt i32 %add.i1, -11 -; CHECK: %cmp.i2 = icmp slt i32 %add.i2, -12 -; CHECK: %cmp.i3 = icmp slt i32 %add.i3, -13 -; CHECK: %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i -; CHECK: %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i -; CHECK: %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i -; CHECK: %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i -; CHECK: %trunc.i0 = trunc i32 %sel.i0 to i8 -; CHECK: %trunc.i1 = trunc i32 %sel.i1 to i8 -; CHECK: %trunc.i2 = trunc i32 %sel.i2 to i8 -; CHECK: %trunc.i3 = trunc i32 %sel.i3 to i8 -; CHECK: store i8 %trunc.i0, i8* %ptr.i0, align 4 -; CHECK: store i8 %trunc.i1, i8* %ptr.i1, align 1 -; CHECK: store i8 %trunc.i2, i8* %ptr.i2, align 2 -; CHECK: store i8 %trunc.i3, i8* %ptr.i3, align 1 -; CHECK: %test = icmp eq i32 %nexti, 0 -; CHECK: br i1 %test, label %loop, label %exit -; CHECK: exit: -; CHECK: ret void +; CHECK-LABEL: @f2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i32 0 +; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i32 1 +; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i32 2 +; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i32 3 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, <4 x i8>* [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x i8>* [[PTR]] to i8* +; CHECK-NEXT: [[VAL_I0:%.*]] = load i8, i8* [[PTR_I0]], align 4 +; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i8, i8* [[PTR_I1]], align 1 +; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i8, i8* [[PTR_I2]], align 2 +; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i8, i8* [[PTR_I3]], align 1 +; CHECK-NEXT: [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32 +; CHECK-NEXT: [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32 +; CHECK-NEXT: [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32 +; CHECK-NEXT: [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32 +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]] +; CHECK-NEXT: [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10 +; CHECK-NEXT: [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11 +; CHECK-NEXT: [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12 +; CHECK-NEXT: [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13 +; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]] +; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]] +; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]] +; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]] +; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8 +; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8 +; CHECK-NEXT: [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8 +; CHECK-NEXT: [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8 +; CHECK-NEXT: store i8 [[TRUNC_I0]], i8* [[PTR_I0]], align 4 +; CHECK-NEXT: store i8 [[TRUNC_I1]], i8* [[PTR_I1]], align 1 +; CHECK-NEXT: store i8 [[TRUNC_I2]], i8* [[PTR_I2]], align 2 +; CHECK-NEXT: store i8 [[TRUNC_I3]], i8* [[PTR_I3]], align 1 +; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 +; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -157,7 +160,7 @@ loop: %cmp = icmp slt <4 x i32> %add, %single = insertelement <4 x i32> poison, i32 %i, i32 0 %limit = shufflevector <4 x i32> %single, <4 x i32> poison, - <4 x i32> zeroinitializer + <4 x i32> zeroinitializer %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit %trunc = trunc <4 x i32> %sel to <4 x i8> store <4 x i8> %trunc, <4 x i8> *%ptr @@ -172,15 +175,28 @@ exit: ; Check that !tbaa information is preserved. define void @f3(<4 x i32> *%src, <4 x i32> *%dst) { ; CHECK-LABEL: @f3( -; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]] -; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]] -; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]] -; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]] -; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]] -; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]] -; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]] -; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]] -; CHECK: ret void +; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32* +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1 +; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2 +; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] +; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: ret void +; %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1 %add = add <4 x i32> %val, %val store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2 @@ -190,15 +206,28 @@ define void @f3(<4 x i32> *%src, <4 x i32> *%dst) { ; Check that !tbaa.struct information is preserved. define void @f4(<4 x i32> *%src, <4 x i32> *%dst) { ; CHECK-LABEL: @f4( -; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]] -; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]] -; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]] -; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]] -; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]] -; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]] -; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]] -; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]] -; CHECK: ret void +; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32* +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1 +; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2 +; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa.struct !5 +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa.struct !5 +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa.struct !5 +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa.struct !5 +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] +; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa.struct !5 +; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa.struct !5 +; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa.struct !5 +; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa.struct !5 +; CHECK-NEXT: ret void +; %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5 %add = add <4 x i32> %val, %val store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5 @@ -208,15 +237,38 @@ define void @f4(<4 x i32> *%src, <4 x i32> *%dst) { ; Check that llvm.access.group information is preserved. define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) { ; CHECK-LABEL: @f5( -; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.access.group ![[TAG:[0-9]*]] -; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.access.group ![[TAG]] -; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.access.group ![[TAG]] -; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.access.group ![[TAG]] -; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.access.group ![[TAG]] -; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.access.group ![[TAG]] -; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.access.group ![[TAG]] -; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.access.group ![[TAG]] -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[SRC:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[THIS_SRC_I0:%.*]] = bitcast <4 x i32>* [[THIS_SRC]] to i32* +; CHECK-NEXT: [[THIS_SRC_I1:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 1 +; CHECK-NEXT: [[THIS_SRC_I2:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 2 +; CHECK-NEXT: [[THIS_SRC_I3:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 3 +; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[DST:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[THIS_DST_I0:%.*]] = bitcast <4 x i32>* [[THIS_DST]] to i32* +; CHECK-NEXT: [[THIS_DST_I1:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 1 +; CHECK-NEXT: [[THIS_DST_I2:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 2 +; CHECK-NEXT: [[THIS_DST_I3:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 3 +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[THIS_SRC_I0]], align 16, !llvm.access.group !6 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[THIS_SRC_I1]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[THIS_SRC_I2]], align 8, !llvm.access.group !6 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[THIS_SRC_I3]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] +; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[THIS_DST_I0]], align 16, !llvm.access.group !6 +; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[THIS_DST_I1]], align 4, !llvm.access.group !6 +; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[THIS_DST_I2]], align 8, !llvm.access.group !6 +; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[THIS_DST_I3]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[NEXT_INDEX]] = add i32 [[INDEX]], -1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: end: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -238,29 +290,50 @@ end: ; Check that fpmath information is preserved. define <4 x float> @f6(<4 x float> %x) { ; CHECK-LABEL: @f6( -; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0 -; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]] -; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1 -; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]] -; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2 -; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]] -; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3 -; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]] -; CHECK: %res.upto0 = insertelement <4 x float> poison, float %res.i0, i32 0 -; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1 -; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2 -; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3 -; CHECK: ret <4 x float> %res +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9 +; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i32 2 +; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9 +; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i32 3 +; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i32 2 +; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i32 3 +; CHECK-NEXT: ret <4 x float> [[RES]] +; %res = fadd <4 x float> %x, , - !fpmath !4 + !fpmath !4 ret <4 x float> %res } ; Check that random metadata isn't kept. define void @f7(<4 x i32> *%src, <4 x i32> *%dst) { ; CHECK-LABEL: @f7( -; CHECK-NOT: !foo -; CHECK: ret void +; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32* +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1 +; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2 +; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16 +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4 +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8 +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4 +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] +; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16 +; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4 +; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8 +; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4 +; CHECK-NEXT: ret void +; %val = load <4 x i32> , <4 x i32> *%src, !foo !5 %add = add <4 x i32> %val, %val store <4 x i32> %add, <4 x i32> *%dst, !foo !5 @@ -269,26 +342,27 @@ define void @f7(<4 x i32> *%src, <4 x i32> *%dst) { ; Test GEP with vectors. define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0, - float *%other) { ; CHECK-LABEL: @f8( -; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** -; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 -; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 -; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 -; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0 -; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2 -; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3 -; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1 -; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3 -; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100 -; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1 -; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100 -; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3 -; CHECK: store float* %val.i0, float** %dest.i0, align 32 -; CHECK: store float* %val.i1, float** %dest.i1, align 8 -; CHECK: store float* %val.i2, float** %dest.i2, align 16 -; CHECK: store float* %val.i3, float** %dest.i3, align 8 -; CHECK: ret void +; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float** +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1 +; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2 +; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3 +; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x float*> [[PTR0:%.*]], i32 0 +; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x float*> [[PTR0]], i32 2 +; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x float*> [[PTR0]], i32 3 +; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i32 1 +; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i32 3 +; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, float* [[PTR0_I0]], i32 100 +; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, float* [[OTHER:%.*]], i32 [[I0_I1]] +; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, float* [[PTR0_I2]], i32 100 +; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr float, float* [[PTR0_I3]], i32 [[I0_I3]] +; CHECK-NEXT: store float* [[VAL_I0]], float** [[DEST_I0]], align 32 +; CHECK-NEXT: store float* [[VAL_I1]], float** [[DEST_I1]], align 8 +; CHECK-NEXT: store float* [[VAL_I2]], float** [[DEST_I2]], align 16 +; CHECK-NEXT: store float* [[VAL_I3]], float** [[DEST_I3]], align 8 +; CHECK-NEXT: ret void +; + float *%other) { %i1 = insertelement <4 x i32> %i0, i32 100, i32 0 %i2 = insertelement <4 x i32> %i1, i32 100, i32 2 %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1 @@ -299,24 +373,25 @@ define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0, ; Test the handling of unaligned loads. define void @f9(<4 x float> *%dest, <4 x float> *%src) { -; CHECK: @f9( -; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* -; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1 -; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2 -; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3 -; CHECK: %src.i0 = bitcast <4 x float>* %src to float* -; CHECK: %val.i0 = load float, float* %src.i0, align 4 -; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1 -; CHECK: %val.i1 = load float, float* %src.i1, align 4 -; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2 -; CHECK: %val.i2 = load float, float* %src.i2, align 4 -; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3 -; CHECK: %val.i3 = load float, float* %src.i3, align 4 -; CHECK: store float %val.i0, float* %dest.i0, align 8 -; CHECK: store float %val.i1, float* %dest.i1, align 4 -; CHECK: store float %val.i2, float* %dest.i2, align 8 -; CHECK: store float %val.i3, float* %dest.i3, align 4 -; CHECK: ret void +; CHECK-LABEL: @f9( +; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float* +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1 +; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2 +; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float* +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 4 +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 4 +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 4 +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 4 +; CHECK-NEXT: store float [[VAL_I0]], float* [[DEST_I0]], align 8 +; CHECK-NEXT: store float [[VAL_I1]], float* [[DEST_I1]], align 4 +; CHECK-NEXT: store float [[VAL_I2]], float* [[DEST_I2]], align 8 +; CHECK-NEXT: store float [[VAL_I3]], float* [[DEST_I3]], align 4 +; CHECK-NEXT: ret void +; %val = load <4 x float> , <4 x float> *%src, align 4 store <4 x float> %val, <4 x float> *%dest, align 8 ret void @@ -324,24 +399,25 @@ define void @f9(<4 x float> *%dest, <4 x float> *%src) { ; ...and again with subelement alignment. define void @f10(<4 x float> *%dest, <4 x float> *%src) { -; CHECK: @f10( -; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* -; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1 -; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2 -; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3 -; CHECK: %src.i0 = bitcast <4 x float>* %src to float* -; CHECK: %val.i0 = load float, float* %src.i0, align 1 -; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1 -; CHECK: %val.i1 = load float, float* %src.i1, align 1 -; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2 -; CHECK: %val.i2 = load float, float* %src.i2, align 1 -; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3 -; CHECK: %val.i3 = load float, float* %src.i3, align 1 -; CHECK: store float %val.i0, float* %dest.i0, align 2 -; CHECK: store float %val.i1, float* %dest.i1, align 2 -; CHECK: store float %val.i2, float* %dest.i2, align 2 -; CHECK: store float %val.i3, float* %dest.i3, align 2 -; CHECK: ret void +; CHECK-LABEL: @f10( +; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float* +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1 +; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2 +; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float* +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 1 +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 1 +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 1 +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 1 +; CHECK-NEXT: store float [[VAL_I0]], float* [[DEST_I0]], align 2 +; CHECK-NEXT: store float [[VAL_I1]], float* [[DEST_I1]], align 2 +; CHECK-NEXT: store float [[VAL_I2]], float* [[DEST_I2]], align 2 +; CHECK-NEXT: store float [[VAL_I3]], float* [[DEST_I3]], align 2 +; CHECK-NEXT: ret void +; %val = load <4 x float> , <4 x float> *%src, align 1 store <4 x float> %val, <4 x float> *%dest, align 2 ret void @@ -349,11 +425,141 @@ define void @f10(<4 x float> *%dest, <4 x float> *%src) { ; Test that sub-byte loads aren't scalarized. define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) { -; CHECK: @f11( -; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0 -; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1 -; CHECK: store <32 x i1> %and, <32 x i1>* %dest -; CHECK: ret void +; CHECK-LABEL: @f11( +; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, <32 x i1>* [[SRC0:%.*]], i32 1 +; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, <32 x i1>* [[SRC0]], align 4 +; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i32 0 +; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i32 1 +; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i32 2 +; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i32 3 +; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i32 4 +; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i32 5 +; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i32 6 +; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i32 7 +; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i32 8 +; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i32 9 +; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i32 10 +; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i32 11 +; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i32 12 +; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i32 13 +; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i32 14 +; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i32 15 +; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i32 16 +; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i32 17 +; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i32 18 +; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i32 19 +; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i32 20 +; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i32 21 +; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i32 22 +; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i32 23 +; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i32 24 +; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i32 25 +; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i32 26 +; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i32 27 +; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i32 28 +; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i32 29 +; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i32 30 +; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i32 31 +; CHECK-NEXT: [[VAL1:%.*]] = load <32 x i1>, <32 x i1>* [[SRC1]], align 4 +; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i32 0 +; CHECK-NEXT: [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]] +; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i32 1 +; CHECK-NEXT: [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]] +; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i32 2 +; CHECK-NEXT: [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]] +; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i32 3 +; CHECK-NEXT: [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]] +; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i32 4 +; CHECK-NEXT: [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]] +; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i32 5 +; CHECK-NEXT: [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]] +; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i32 6 +; CHECK-NEXT: [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]] +; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i32 7 +; CHECK-NEXT: [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]] +; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i32 8 +; CHECK-NEXT: [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]] +; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i32 9 +; CHECK-NEXT: [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]] +; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i32 10 +; CHECK-NEXT: [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]] +; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i32 11 +; CHECK-NEXT: [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]] +; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i32 12 +; CHECK-NEXT: [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]] +; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i32 13 +; CHECK-NEXT: [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]] +; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i32 14 +; CHECK-NEXT: [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]] +; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i32 15 +; CHECK-NEXT: [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]] +; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i32 16 +; CHECK-NEXT: [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]] +; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i32 17 +; CHECK-NEXT: [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]] +; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i32 18 +; CHECK-NEXT: [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]] +; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i32 19 +; CHECK-NEXT: [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]] +; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i32 20 +; CHECK-NEXT: [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]] +; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i32 21 +; CHECK-NEXT: [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]] +; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i32 22 +; CHECK-NEXT: [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]] +; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i32 23 +; CHECK-NEXT: [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]] +; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i32 24 +; CHECK-NEXT: [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]] +; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i32 25 +; CHECK-NEXT: [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]] +; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i32 26 +; CHECK-NEXT: [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]] +; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i32 27 +; CHECK-NEXT: [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]] +; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i32 28 +; CHECK-NEXT: [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]] +; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i32 29 +; CHECK-NEXT: [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]] +; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i32 30 +; CHECK-NEXT: [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]] +; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i32 31 +; CHECK-NEXT: [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]] +; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i32 0 +; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i32 1 +; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i32 2 +; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i32 3 +; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i32 4 +; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i32 5 +; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i32 6 +; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i32 7 +; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i32 8 +; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i32 9 +; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i32 10 +; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i32 11 +; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i32 12 +; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i32 13 +; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i32 14 +; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i32 15 +; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i32 16 +; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i32 17 +; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i32 18 +; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i32 19 +; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i32 20 +; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i32 21 +; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i32 22 +; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i32 23 +; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i32 24 +; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i32 25 +; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i32 26 +; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i32 27 +; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i32 28 +; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i32 29 +; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i32 30 +; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i32 31 +; CHECK-NEXT: store <32 x i1> [[AND]], <32 x i1>* [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1 %val0 = load <32 x i1> , <32 x i1> *%src0 %val1 = load <32 x i1> , <32 x i1> *%src1 @@ -364,32 +570,33 @@ define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) { ; Test vector GEPs with more than one index. define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, - float *%other) { ; CHECK-LABEL: @f13( -; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** -; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 -; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 -; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 -; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0 -; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0 -; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0 -; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1 -; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1 -; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1 -; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2 -; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2 -; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2 -; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3 -; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3 -; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3 -; CHECK: store float* %val.i0, float** %dest.i0, align 32 -; CHECK: store float* %val.i1, float** %dest.i1, align 8 -; CHECK: store float* %val.i2, float** %dest.i2, align 16 -; CHECK: store float* %val.i3, float** %dest.i3, align 8 -; CHECK: ret void +; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float** +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1 +; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2 +; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i32 0 +; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x [4 x float]*> [[PTR:%.*]], i32 0 +; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I0]], i32 0, i32 [[I_I0]] +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i32 1 +; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I1]], i32 1, i32 [[I_I1]] +; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i32 2 +; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I2]], i32 2, i32 [[I_I2]] +; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i32 3 +; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I3]], i32 3, i32 [[I_I3]] +; CHECK-NEXT: store float* [[VAL_I0]], float** [[DEST_I0]], align 32 +; CHECK-NEXT: store float* [[VAL_I1]], float** [[DEST_I1]], align 8 +; CHECK-NEXT: store float* [[VAL_I2]], float** [[DEST_I2]], align 16 +; CHECK-NEXT: store float* [[VAL_I3]], float** [[DEST_I3]], align 8 +; CHECK-NEXT: ret void +; + float *%other) { %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr, - <4 x i32> , - <4 x i32> %i + <4 x i32> , + <4 x i32> %i store <4 x float *> %val, <4 x float *> *%dest ret void } @@ -397,16 +604,41 @@ define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, ; Test combinations of vector and non-vector PHIs. define <4 x float> @f14(<4 x float> %acc, i32 %count) { ; CHECK-LABEL: @f14( -; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ] -; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ] -; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ] -; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ] -; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] -; CHECK: %this_acc.upto0 = insertelement <4 x float> poison, float %this_acc.i0, i32 0 -; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1 -; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2 -; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3 -; CHECK: ret <4 x float> %next_acc +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i32 0 +; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i32 1 +; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i32 2 +; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i32 3 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i32 0 +; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i32 1 +; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i32 2 +; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i32 3 +; CHECK-NEXT: [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]]) +; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i32 0 +; CHECK-NEXT: [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]] +; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i32 1 +; CHECK-NEXT: [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]] +; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i32 2 +; CHECK-NEXT: [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]] +; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i32 3 +; CHECK-NEXT: [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]] +; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i32 0 +; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i32 1 +; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i32 2 +; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i32 3 +; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret <4 x float> [[NEXT_ACC]] +; entry: br label %loop @@ -426,40 +658,50 @@ exit: ; Test unary operator scalarization. define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) { ; CHECK-LABEL: @f15( -; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i -; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float* -; CHECK: %val.i0 = load float, float* %ptr.i0, align 16 -; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1 -; CHECK: %val.i1 = load float, float* %ptr.i1, align 4 -; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2 -; CHECK: %val.i2 = load float, float* %ptr.i2, align 8 -; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3 -; CHECK: %val.i3 = load float, float* %ptr.i3, align 4 -; CHECK: %neg.i0 = fneg float %val.i0 -; CHECK: %neg.i1 = fneg float %val.i1 -; CHECK: %neg.i2 = fneg float %val.i2 -; CHECK: %neg.i3 = fneg float %val.i3 -; CHECK: %neg.upto0 = insertelement <4 x float> poison, float %neg.i0, i32 0 -; CHECK: %neg.upto1 = insertelement <4 x float> %neg.upto0, float %neg.i1, i32 1 -; CHECK: %neg.upto2 = insertelement <4 x float> %neg.upto1, float %neg.i2, i32 2 -; CHECK: %neg = insertelement <4 x float> %neg.upto2, float %neg.i3, i32 3 -; CHECK: %call = call <4 x float> @ext(<4 x float> %neg) -; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0 -; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.000000e+00 -; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1 -; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.000000e+00 -; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2 -; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.000000e+00 -; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3 -; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.000000e+00 -; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.000000e+00 -; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.000000e+00 -; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.000000e+00 -; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.000000e+00 -; CHECK: store float %sel.i0, float* %ptr.i0, align 16 -; CHECK: store float %sel.i1, float* %ptr.i1, align 4 -; CHECK: store float %sel.i2, float* %ptr.i2, align 8 -; CHECK: store float %sel.i3, float* %ptr.i3, align 4 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float* +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16 +; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4 +; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8 +; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4 +; CHECK-NEXT: [[NEG_I0:%.*]] = fneg float [[VAL_I0]] +; CHECK-NEXT: [[NEG_I1:%.*]] = fneg float [[VAL_I1]] +; CHECK-NEXT: [[NEG_I2:%.*]] = fneg float [[VAL_I2]] +; CHECK-NEXT: [[NEG_I3:%.*]] = fneg float [[VAL_I3]] +; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i32 0 +; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i32 1 +; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i32 2 +; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i32 3 +; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]]) +; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 +; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 +; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 +; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 +; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 +; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 +; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 +; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 +; CHECK-NEXT: [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 +; CHECK-NEXT: [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 +; CHECK-NEXT: [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00 +; CHECK-NEXT: [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00 +; CHECK-NEXT: store float [[SEL_I0]], float* [[PTR_I0]], align 16 +; CHECK-NEXT: store float [[SEL_I1]], float* [[PTR_I1]], align 4 +; CHECK-NEXT: store float [[SEL_I2]], float* [[PTR_I2]], align 8 +; CHECK-NEXT: store float [[SEL_I3]], float* [[PTR_I3]], align 4 +; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 +; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -488,52 +730,106 @@ exit: ; Check that IR flags are preserved. define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @f16( -; CHECK: %res.i0 = add nuw nsw i32 -; CHECK: %res.i1 = add nuw nsw i32 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[RES]] +; %res = add nuw nsw <2 x i32> %i, %j ret <2 x i32> %res } define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @f17( -; CHECK: %res.i0 = sdiv exact i32 -; CHECK: %res.i1 = sdiv exact i32 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]] +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[RES]] +; %res = sdiv exact <2 x i32> %i, %j ret <2 x i32> %res } define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @f18( -; CHECK: %res.i0 = fadd fast float -; CHECK: %res.i1 = fadd fast float +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]] +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x float> [[RES]] +; %res = fadd fast <2 x float> %x, %y ret <2 x float> %res } define <2 x float> @f19(<2 x float> %x) { ; CHECK-LABEL: @f19( -; CHECK: %res.i0 = fneg fast float -; CHECK: %res.i1 = fneg fast float +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]] +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x float> [[RES]] +; %res = fneg fast <2 x float> %x ret <2 x float> %res } define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @f20( -; CHECK: %res.i0 = fcmp fast ogt float -; CHECK: %res.i1 = fcmp fast ogt float +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]] +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x i1> [[RES]] +; %res = fcmp fast ogt <2 x float> %x, %y ret <2 x i1> %res } declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) define <2 x float> @f21(<2 x float> %x) { ; CHECK-LABEL: @f21( -; CHECK: %res.i0 = call fast float @llvm.sqrt.f32 -; CHECK: %res.i1 = call fast float @llvm.sqrt.f32 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]]) +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x float> [[RES]] +; %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) ret <2 x float> %res } declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { ; CHECK-LABEL: @f22( -; CHECK: %res.i0 = call fast float @llvm.fma.f32 -; CHECK: %res.i1 = call fast float @llvm.fma.f32 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]]) +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x float> [[RES]] +; %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) ret <2 x float> %res } @@ -541,10 +837,11 @@ define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { ; See https://reviews.llvm.org/D83101#2133062 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { ; CHECK-LABEL: @f23_crash( -; CHECK: %v0 = extractelement <2 x i32> %srcvec, i32 0 -; CHECK: %t1.upto0 = insertelement <2 x i32> poison, i32 %v0, i32 0 -; CHECK: %t1 = insertelement <2 x i32> %t1.upto0, i32 %v1, i32 1 -; CHECK: ret <2 x i32> %t1 +; CHECK-NEXT: [[V0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i32 0 +; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[V0]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[T1]] +; %v0 = extractelement <2 x i32> %srcvec, i32 0 %t0 = insertelement <2 x i32> poison, i32 %v0, i32 0 %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1 diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll index 23eef9ec927dee..9fb6db4de0b76f 100644 --- a/llvm/test/Transforms/Scalarizer/basic.ll +++ b/llvm/test/Transforms/Scalarizer/basic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -6,57 +7,58 @@ declare <4 x float> @ext(<4 x float>) define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) { ; CHECK-LABEL: @f1( -; CHECK: entry: -; CHECK: %init.i0 = extractelement <4 x float> %init, i32 0 -; CHECK: %init.i1 = extractelement <4 x float> %init, i32 1 -; CHECK: %init.i2 = extractelement <4 x float> %init, i32 2 -; CHECK: %init.i3 = extractelement <4 x float> %init, i32 3 -; CHECK: br label %loop -; CHECK: loop: -; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] -; CHECK: %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ] -; CHECK: %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ] -; CHECK: %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ] -; CHECK: %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ] -; CHECK: %nexti = sub i32 %i, 1 -; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i -; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float* -; CHECK: %val.i0 = load float, float* %ptr.i0, align 16 -; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1 -; CHECK: %val.i1 = load float, float* %ptr.i1, align 4 -; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2 -; CHECK: %val.i2 = load float, float* %ptr.i2, align 8 -; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3 -; CHECK: %val.i3 = load float, float* %ptr.i3, align 4 -; CHECK: %add.i0 = fadd float %val.i0, %val.i2 -; CHECK: %add.i1 = fadd float %val.i1, %val.i3 -; CHECK: %add.i2 = fadd float %acc.i0, %acc.i2 -; CHECK: %add.i3 = fadd float %acc.i1, %acc.i3 -; CHECK: %add.upto0 = insertelement <4 x float> poison, float %add.i0, i32 0 -; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1 -; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2 -; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3 -; CHECK: %call = call <4 x float> @ext(<4 x float> %add) -; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0 -; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.0 -; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1 -; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.0 -; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2 -; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.0 -; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3 -; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.0 -; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0 -; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0 -; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0 -; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0 -; CHECK: store float %sel.i0, float* %ptr.i0 -; CHECK: store float %sel.i1, float* %ptr.i1 -; CHECK: store float %sel.i2, float* %ptr.i2 -; CHECK: store float %sel.i3, float* %ptr.i3 -; CHECK: %test = icmp eq i32 %nexti, 0 -; CHECK: br i1 %test, label %loop, label %exit -; CHECK: exit: -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i32 0 +; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i32 1 +; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i32 2 +; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i32 3 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float* +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16 +; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4 +; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8 +; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4 +; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]] +; CHECK-NEXT: [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]] +; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i32 0 +; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i32 1 +; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i32 2 +; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i32 3 +; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]]) +; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 +; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 +; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 +; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 +; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 +; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 +; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 +; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 +; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 +; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 +; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00 +; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00 +; CHECK-NEXT: store float [[SEL_I0]], float* [[PTR_I0]], align 16 +; CHECK-NEXT: store float [[SEL_I1]], float* [[PTR_I1]], align 4 +; CHECK-NEXT: store float [[SEL_I2]], float* [[PTR_I2]], align 8 +; CHECK-NEXT: store float [[SEL_I3]], float* [[PTR_I3]], align 4 +; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 +; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -70,17 +72,17 @@ loop: %dval = bitcast <4 x float> %val to <2 x double> %dacc = bitcast <4 x float> %acc to <2 x double> %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc, - <2 x i32> + <2 x i32> %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc, - <2 x i32> + <2 x i32> %f1 = bitcast <2 x double> %shuffle1 to <4 x float> %f2 = bitcast <2 x double> %shuffle2 to <4 x float> %add = fadd <4 x float> %f1, %f2 %call = call <4 x float> @ext(<4 x float> %add) %cmp = fcmp ogt <4 x float> %call, - + %sel = select <4 x i1> %cmp, <4 x float> %call, - <4 x float> + <4 x float> store <4 x float> %sel, <4 x float> *%ptr %test = icmp eq i32 %nexti, 0 @@ -91,57 +93,58 @@ exit: } define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) { -; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) { -; CHECK: entry: -; CHECK: %init.i0 = extractelement <4 x i32> %init, i32 0 -; CHECK: %init.i1 = extractelement <4 x i32> %init, i32 1 -; CHECK: %init.i2 = extractelement <4 x i32> %init, i32 2 -; CHECK: %init.i3 = extractelement <4 x i32> %init, i32 3 -; CHECK: br label %loop -; CHECK: loop: -; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] -; CHECK: %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ] -; CHECK: %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ] -; CHECK: %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ] -; CHECK: %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ] -; CHECK: %nexti = sub i32 %i, 1 -; CHECK: %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i -; CHECK: %ptr.i0 = bitcast <4 x i8>* %ptr to i8* -; CHECK: %val.i0 = load i8, i8* %ptr.i0, align 4 -; CHECK: %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1 -; CHECK: %val.i1 = load i8, i8* %ptr.i1, align 1 -; CHECK: %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2 -; CHECK: %val.i2 = load i8, i8* %ptr.i2, align 2 -; CHECK: %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3 -; CHECK: %val.i3 = load i8, i8* %ptr.i3, align 1 -; CHECK: %ext.i0 = sext i8 %val.i0 to i32 -; CHECK: %ext.i1 = sext i8 %val.i1 to i32 -; CHECK: %ext.i2 = sext i8 %val.i2 to i32 -; CHECK: %ext.i3 = sext i8 %val.i3 to i32 -; CHECK: %add.i0 = add i32 %ext.i0, %acc.i0 -; CHECK: %add.i1 = add i32 %ext.i1, %acc.i1 -; CHECK: %add.i2 = add i32 %ext.i2, %acc.i2 -; CHECK: %add.i3 = add i32 %ext.i3, %acc.i3 -; CHECK: %cmp.i0 = icmp slt i32 %add.i0, -10 -; CHECK: %cmp.i1 = icmp slt i32 %add.i1, -11 -; CHECK: %cmp.i2 = icmp slt i32 %add.i2, -12 -; CHECK: %cmp.i3 = icmp slt i32 %add.i3, -13 -; CHECK: %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i -; CHECK: %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i -; CHECK: %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i -; CHECK: %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i -; CHECK: %trunc.i0 = trunc i32 %sel.i0 to i8 -; CHECK: %trunc.i1 = trunc i32 %sel.i1 to i8 -; CHECK: %trunc.i2 = trunc i32 %sel.i2 to i8 -; CHECK: %trunc.i3 = trunc i32 %sel.i3 to i8 -; CHECK: store i8 %trunc.i0, i8* %ptr.i0, align 4 -; CHECK: store i8 %trunc.i1, i8* %ptr.i1, align 1 -; CHECK: store i8 %trunc.i2, i8* %ptr.i2, align 2 -; CHECK: store i8 %trunc.i3, i8* %ptr.i3, align 1 -; CHECK: %test = icmp eq i32 %nexti, 0 -; CHECK: br i1 %test, label %loop, label %exit -; CHECK: exit: -; CHECK: ret void +; CHECK-LABEL: @f2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i32 0 +; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i32 1 +; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i32 2 +; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i32 3 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, <4 x i8>* [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x i8>* [[PTR]] to i8* +; CHECK-NEXT: [[VAL_I0:%.*]] = load i8, i8* [[PTR_I0]], align 4 +; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i8, i8* [[PTR_I1]], align 1 +; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i8, i8* [[PTR_I2]], align 2 +; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i8, i8* [[PTR_I3]], align 1 +; CHECK-NEXT: [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32 +; CHECK-NEXT: [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32 +; CHECK-NEXT: [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32 +; CHECK-NEXT: [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32 +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]] +; CHECK-NEXT: [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10 +; CHECK-NEXT: [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11 +; CHECK-NEXT: [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12 +; CHECK-NEXT: [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13 +; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]] +; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]] +; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]] +; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]] +; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8 +; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8 +; CHECK-NEXT: [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8 +; CHECK-NEXT: [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8 +; CHECK-NEXT: store i8 [[TRUNC_I0]], i8* [[PTR_I0]], align 4 +; CHECK-NEXT: store i8 [[TRUNC_I1]], i8* [[PTR_I1]], align 1 +; CHECK-NEXT: store i8 [[TRUNC_I2]], i8* [[PTR_I2]], align 2 +; CHECK-NEXT: store i8 [[TRUNC_I3]], i8* [[PTR_I3]], align 1 +; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 +; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -157,7 +160,7 @@ loop: %cmp = icmp slt <4 x i32> %add, %single = insertelement <4 x i32> undef, i32 %i, i32 0 %limit = shufflevector <4 x i32> %single, <4 x i32> undef, - <4 x i32> zeroinitializer + <4 x i32> zeroinitializer %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit %trunc = trunc <4 x i32> %sel to <4 x i8> store <4 x i8> %trunc, <4 x i8> *%ptr @@ -172,15 +175,28 @@ exit: ; Check that !tbaa information is preserved. define void @f3(<4 x i32> *%src, <4 x i32> *%dst) { ; CHECK-LABEL: @f3( -; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]] -; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]] -; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]] -; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]] -; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]] -; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]] -; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]] -; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]] -; CHECK: ret void +; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32* +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1 +; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2 +; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] +; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: ret void +; %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1 %add = add <4 x i32> %val, %val store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2 @@ -190,15 +206,28 @@ define void @f3(<4 x i32> *%src, <4 x i32> *%dst) { ; Check that !tbaa.struct information is preserved. define void @f4(<4 x i32> *%src, <4 x i32> *%dst) { ; CHECK-LABEL: @f4( -; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]] -; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]] -; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]] -; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]] -; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]] -; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]] -; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]] -; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]] -; CHECK: ret void +; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32* +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1 +; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2 +; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa.struct !5 +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa.struct !5 +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa.struct !5 +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa.struct !5 +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] +; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa.struct !5 +; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa.struct !5 +; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa.struct !5 +; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa.struct !5 +; CHECK-NEXT: ret void +; %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5 %add = add <4 x i32> %val, %val store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5 @@ -208,15 +237,38 @@ define void @f4(<4 x i32> *%src, <4 x i32> *%dst) { ; Check that llvm.access.group information is preserved. define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) { ; CHECK-LABEL: @f5( -; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.access.group ![[TAG:[0-9]*]] -; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.access.group ![[TAG]] -; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.access.group ![[TAG]] -; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.access.group ![[TAG]] -; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.access.group ![[TAG]] -; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.access.group ![[TAG]] -; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.access.group ![[TAG]] -; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.access.group ![[TAG]] -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[SRC:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[THIS_SRC_I0:%.*]] = bitcast <4 x i32>* [[THIS_SRC]] to i32* +; CHECK-NEXT: [[THIS_SRC_I1:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 1 +; CHECK-NEXT: [[THIS_SRC_I2:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 2 +; CHECK-NEXT: [[THIS_SRC_I3:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 3 +; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[DST:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[THIS_DST_I0:%.*]] = bitcast <4 x i32>* [[THIS_DST]] to i32* +; CHECK-NEXT: [[THIS_DST_I1:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 1 +; CHECK-NEXT: [[THIS_DST_I2:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 2 +; CHECK-NEXT: [[THIS_DST_I3:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 3 +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[THIS_SRC_I0]], align 16, !llvm.access.group !6 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[THIS_SRC_I1]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[THIS_SRC_I2]], align 8, !llvm.access.group !6 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[THIS_SRC_I3]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] +; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[THIS_DST_I0]], align 16, !llvm.access.group !6 +; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[THIS_DST_I1]], align 4, !llvm.access.group !6 +; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[THIS_DST_I2]], align 8, !llvm.access.group !6 +; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[THIS_DST_I3]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[NEXT_INDEX]] = add i32 [[INDEX]], -1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: end: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -238,29 +290,50 @@ end: ; Check that fpmath information is preserved. define <4 x float> @f6(<4 x float> %x) { ; CHECK-LABEL: @f6( -; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0 -; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]] -; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1 -; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]] -; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2 -; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]] -; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3 -; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]] -; CHECK: %res.upto0 = insertelement <4 x float> poison, float %res.i0, i32 0 -; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1 -; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2 -; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3 -; CHECK: ret <4 x float> %res +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9 +; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i32 2 +; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9 +; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i32 3 +; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i32 2 +; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i32 3 +; CHECK-NEXT: ret <4 x float> [[RES]] +; %res = fadd <4 x float> %x, , - !fpmath !4 + !fpmath !4 ret <4 x float> %res } ; Check that random metadata isn't kept. define void @f7(<4 x i32> *%src, <4 x i32> *%dst) { ; CHECK-LABEL: @f7( -; CHECK-NOT: !foo -; CHECK: ret void +; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32* +; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1 +; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2 +; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* +; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16 +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4 +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8 +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4 +; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] +; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] +; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] +; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] +; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16 +; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4 +; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8 +; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4 +; CHECK-NEXT: ret void +; %val = load <4 x i32> , <4 x i32> *%src, !foo !5 %add = add <4 x i32> %val, %val store <4 x i32> %add, <4 x i32> *%dst, !foo !5 @@ -269,26 +342,27 @@ define void @f7(<4 x i32> *%src, <4 x i32> *%dst) { ; Test GEP with vectors. define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0, - float *%other) { ; CHECK-LABEL: @f8( -; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** -; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 -; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 -; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 -; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0 -; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2 -; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3 -; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1 -; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3 -; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100 -; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1 -; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100 -; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3 -; CHECK: store float* %val.i0, float** %dest.i0, align 32 -; CHECK: store float* %val.i1, float** %dest.i1, align 8 -; CHECK: store float* %val.i2, float** %dest.i2, align 16 -; CHECK: store float* %val.i3, float** %dest.i3, align 8 -; CHECK: ret void +; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float** +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1 +; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2 +; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3 +; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x float*> [[PTR0:%.*]], i32 0 +; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x float*> [[PTR0]], i32 2 +; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x float*> [[PTR0]], i32 3 +; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i32 1 +; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i32 3 +; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, float* [[PTR0_I0]], i32 100 +; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, float* [[OTHER:%.*]], i32 [[I0_I1]] +; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, float* [[PTR0_I2]], i32 100 +; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr float, float* [[PTR0_I3]], i32 [[I0_I3]] +; CHECK-NEXT: store float* [[VAL_I0]], float** [[DEST_I0]], align 32 +; CHECK-NEXT: store float* [[VAL_I1]], float** [[DEST_I1]], align 8 +; CHECK-NEXT: store float* [[VAL_I2]], float** [[DEST_I2]], align 16 +; CHECK-NEXT: store float* [[VAL_I3]], float** [[DEST_I3]], align 8 +; CHECK-NEXT: ret void +; + float *%other) { %i1 = insertelement <4 x i32> %i0, i32 100, i32 0 %i2 = insertelement <4 x i32> %i1, i32 100, i32 2 %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1 @@ -299,24 +373,25 @@ define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0, ; Test the handling of unaligned loads. define void @f9(<4 x float> *%dest, <4 x float> *%src) { -; CHECK: @f9( -; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* -; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1 -; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2 -; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3 -; CHECK: %src.i0 = bitcast <4 x float>* %src to float* -; CHECK: %val.i0 = load float, float* %src.i0, align 4 -; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1 -; CHECK: %val.i1 = load float, float* %src.i1, align 4 -; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2 -; CHECK: %val.i2 = load float, float* %src.i2, align 4 -; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3 -; CHECK: %val.i3 = load float, float* %src.i3, align 4 -; CHECK: store float %val.i0, float* %dest.i0, align 8 -; CHECK: store float %val.i1, float* %dest.i1, align 4 -; CHECK: store float %val.i2, float* %dest.i2, align 8 -; CHECK: store float %val.i3, float* %dest.i3, align 4 -; CHECK: ret void +; CHECK-LABEL: @f9( +; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float* +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1 +; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2 +; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float* +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 4 +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 4 +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 4 +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 4 +; CHECK-NEXT: store float [[VAL_I0]], float* [[DEST_I0]], align 8 +; CHECK-NEXT: store float [[VAL_I1]], float* [[DEST_I1]], align 4 +; CHECK-NEXT: store float [[VAL_I2]], float* [[DEST_I2]], align 8 +; CHECK-NEXT: store float [[VAL_I3]], float* [[DEST_I3]], align 4 +; CHECK-NEXT: ret void +; %val = load <4 x float> , <4 x float> *%src, align 4 store <4 x float> %val, <4 x float> *%dest, align 8 ret void @@ -324,24 +399,25 @@ define void @f9(<4 x float> *%dest, <4 x float> *%src) { ; ...and again with subelement alignment. define void @f10(<4 x float> *%dest, <4 x float> *%src) { -; CHECK: @f10( -; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* -; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1 -; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2 -; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3 -; CHECK: %src.i0 = bitcast <4 x float>* %src to float* -; CHECK: %val.i0 = load float, float* %src.i0, align 1 -; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1 -; CHECK: %val.i1 = load float, float* %src.i1, align 1 -; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2 -; CHECK: %val.i2 = load float, float* %src.i2, align 1 -; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3 -; CHECK: %val.i3 = load float, float* %src.i3, align 1 -; CHECK: store float %val.i0, float* %dest.i0, align 2 -; CHECK: store float %val.i1, float* %dest.i1, align 2 -; CHECK: store float %val.i2, float* %dest.i2, align 2 -; CHECK: store float %val.i3, float* %dest.i3, align 2 -; CHECK: ret void +; CHECK-LABEL: @f10( +; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float* +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1 +; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2 +; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3 +; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float* +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 1 +; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 1 +; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 1 +; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 1 +; CHECK-NEXT: store float [[VAL_I0]], float* [[DEST_I0]], align 2 +; CHECK-NEXT: store float [[VAL_I1]], float* [[DEST_I1]], align 2 +; CHECK-NEXT: store float [[VAL_I2]], float* [[DEST_I2]], align 2 +; CHECK-NEXT: store float [[VAL_I3]], float* [[DEST_I3]], align 2 +; CHECK-NEXT: ret void +; %val = load <4 x float> , <4 x float> *%src, align 1 store <4 x float> %val, <4 x float> *%dest, align 2 ret void @@ -349,11 +425,141 @@ define void @f10(<4 x float> *%dest, <4 x float> *%src) { ; Test that sub-byte loads aren't scalarized. define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) { -; CHECK: @f11( -; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0 -; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1 -; CHECK: store <32 x i1> %and, <32 x i1>* %dest -; CHECK: ret void +; CHECK-LABEL: @f11( +; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, <32 x i1>* [[SRC0:%.*]], i32 1 +; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, <32 x i1>* [[SRC0]], align 4 +; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i32 0 +; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i32 1 +; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i32 2 +; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i32 3 +; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i32 4 +; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i32 5 +; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i32 6 +; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i32 7 +; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i32 8 +; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i32 9 +; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i32 10 +; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i32 11 +; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i32 12 +; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i32 13 +; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i32 14 +; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i32 15 +; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i32 16 +; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i32 17 +; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i32 18 +; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i32 19 +; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i32 20 +; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i32 21 +; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i32 22 +; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i32 23 +; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i32 24 +; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i32 25 +; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i32 26 +; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i32 27 +; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i32 28 +; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i32 29 +; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i32 30 +; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i32 31 +; CHECK-NEXT: [[VAL1:%.*]] = load <32 x i1>, <32 x i1>* [[SRC1]], align 4 +; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i32 0 +; CHECK-NEXT: [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]] +; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i32 1 +; CHECK-NEXT: [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]] +; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i32 2 +; CHECK-NEXT: [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]] +; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i32 3 +; CHECK-NEXT: [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]] +; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i32 4 +; CHECK-NEXT: [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]] +; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i32 5 +; CHECK-NEXT: [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]] +; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i32 6 +; CHECK-NEXT: [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]] +; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i32 7 +; CHECK-NEXT: [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]] +; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i32 8 +; CHECK-NEXT: [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]] +; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i32 9 +; CHECK-NEXT: [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]] +; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i32 10 +; CHECK-NEXT: [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]] +; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i32 11 +; CHECK-NEXT: [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]] +; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i32 12 +; CHECK-NEXT: [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]] +; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i32 13 +; CHECK-NEXT: [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]] +; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i32 14 +; CHECK-NEXT: [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]] +; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i32 15 +; CHECK-NEXT: [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]] +; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i32 16 +; CHECK-NEXT: [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]] +; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i32 17 +; CHECK-NEXT: [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]] +; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i32 18 +; CHECK-NEXT: [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]] +; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i32 19 +; CHECK-NEXT: [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]] +; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i32 20 +; CHECK-NEXT: [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]] +; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i32 21 +; CHECK-NEXT: [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]] +; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i32 22 +; CHECK-NEXT: [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]] +; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i32 23 +; CHECK-NEXT: [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]] +; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i32 24 +; CHECK-NEXT: [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]] +; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i32 25 +; CHECK-NEXT: [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]] +; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i32 26 +; CHECK-NEXT: [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]] +; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i32 27 +; CHECK-NEXT: [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]] +; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i32 28 +; CHECK-NEXT: [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]] +; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i32 29 +; CHECK-NEXT: [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]] +; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i32 30 +; CHECK-NEXT: [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]] +; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i32 31 +; CHECK-NEXT: [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]] +; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i32 0 +; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i32 1 +; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i32 2 +; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i32 3 +; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i32 4 +; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i32 5 +; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i32 6 +; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i32 7 +; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i32 8 +; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i32 9 +; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i32 10 +; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i32 11 +; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i32 12 +; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i32 13 +; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i32 14 +; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i32 15 +; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i32 16 +; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i32 17 +; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i32 18 +; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i32 19 +; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i32 20 +; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i32 21 +; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i32 22 +; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i32 23 +; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i32 24 +; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i32 25 +; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i32 26 +; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i32 27 +; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i32 28 +; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i32 29 +; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i32 30 +; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i32 31 +; CHECK-NEXT: store <32 x i1> [[AND]], <32 x i1>* [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1 %val0 = load <32 x i1> , <32 x i1> *%src0 %val1 = load <32 x i1> , <32 x i1> *%src1 @@ -364,32 +570,33 @@ define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) { ; Test vector GEPs with more than one index. define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, - float *%other) { ; CHECK-LABEL: @f13( -; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** -; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 -; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 -; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 -; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0 -; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0 -; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0 -; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1 -; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1 -; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1 -; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2 -; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2 -; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2 -; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3 -; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3 -; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3 -; CHECK: store float* %val.i0, float** %dest.i0, align 32 -; CHECK: store float* %val.i1, float** %dest.i1, align 8 -; CHECK: store float* %val.i2, float** %dest.i2, align 16 -; CHECK: store float* %val.i3, float** %dest.i3, align 8 -; CHECK: ret void +; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float** +; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1 +; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2 +; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i32 0 +; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x [4 x float]*> [[PTR:%.*]], i32 0 +; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I0]], i32 0, i32 [[I_I0]] +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i32 1 +; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I1]], i32 1, i32 [[I_I1]] +; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i32 2 +; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I2]], i32 2, i32 [[I_I2]] +; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i32 3 +; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I3]], i32 3, i32 [[I_I3]] +; CHECK-NEXT: store float* [[VAL_I0]], float** [[DEST_I0]], align 32 +; CHECK-NEXT: store float* [[VAL_I1]], float** [[DEST_I1]], align 8 +; CHECK-NEXT: store float* [[VAL_I2]], float** [[DEST_I2]], align 16 +; CHECK-NEXT: store float* [[VAL_I3]], float** [[DEST_I3]], align 8 +; CHECK-NEXT: ret void +; + float *%other) { %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr, - <4 x i32> , - <4 x i32> %i + <4 x i32> , + <4 x i32> %i store <4 x float *> %val, <4 x float *> *%dest ret void } @@ -397,16 +604,41 @@ define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, ; Test combinations of vector and non-vector PHIs. define <4 x float> @f14(<4 x float> %acc, i32 %count) { ; CHECK-LABEL: @f14( -; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ] -; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ] -; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ] -; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ] -; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] -; CHECK: %this_acc.upto0 = insertelement <4 x float> poison, float %this_acc.i0, i32 0 -; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1 -; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2 -; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3 -; CHECK: ret <4 x float> %next_acc +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i32 0 +; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i32 1 +; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i32 2 +; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i32 3 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i32 0 +; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i32 1 +; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i32 2 +; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i32 3 +; CHECK-NEXT: [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]]) +; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i32 0 +; CHECK-NEXT: [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]] +; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i32 1 +; CHECK-NEXT: [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]] +; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i32 2 +; CHECK-NEXT: [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]] +; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i32 3 +; CHECK-NEXT: [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]] +; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i32 0 +; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i32 1 +; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i32 2 +; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i32 3 +; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret <4 x float> [[NEXT_ACC]] +; entry: br label %loop @@ -426,40 +658,50 @@ exit: ; Test unary operator scalarization. define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) { ; CHECK-LABEL: @f15( -; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i -; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float* -; CHECK: %val.i0 = load float, float* %ptr.i0, align 16 -; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1 -; CHECK: %val.i1 = load float, float* %ptr.i1, align 4 -; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2 -; CHECK: %val.i2 = load float, float* %ptr.i2, align 8 -; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3 -; CHECK: %val.i3 = load float, float* %ptr.i3, align 4 -; CHECK: %neg.i0 = fneg float %val.i0 -; CHECK: %neg.i1 = fneg float %val.i1 -; CHECK: %neg.i2 = fneg float %val.i2 -; CHECK: %neg.i3 = fneg float %val.i3 -; CHECK: %neg.upto0 = insertelement <4 x float> poison, float %neg.i0, i32 0 -; CHECK: %neg.upto1 = insertelement <4 x float> %neg.upto0, float %neg.i1, i32 1 -; CHECK: %neg.upto2 = insertelement <4 x float> %neg.upto1, float %neg.i2, i32 2 -; CHECK: %neg = insertelement <4 x float> %neg.upto2, float %neg.i3, i32 3 -; CHECK: %call = call <4 x float> @ext(<4 x float> %neg) -; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0 -; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.000000e+00 -; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1 -; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.000000e+00 -; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2 -; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.000000e+00 -; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3 -; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.000000e+00 -; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.000000e+00 -; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.000000e+00 -; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.000000e+00 -; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.000000e+00 -; CHECK: store float %sel.i0, float* %ptr.i0, align 16 -; CHECK: store float %sel.i1, float* %ptr.i1, align 4 -; CHECK: store float %sel.i2, float* %ptr.i2, align 8 -; CHECK: store float %sel.i3, float* %ptr.i3, align 4 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]] +; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float* +; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16 +; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4 +; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2 +; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8 +; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3 +; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4 +; CHECK-NEXT: [[NEG_I0:%.*]] = fneg float [[VAL_I0]] +; CHECK-NEXT: [[NEG_I1:%.*]] = fneg float [[VAL_I1]] +; CHECK-NEXT: [[NEG_I2:%.*]] = fneg float [[VAL_I2]] +; CHECK-NEXT: [[NEG_I3:%.*]] = fneg float [[VAL_I3]] +; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i32 0 +; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i32 1 +; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i32 2 +; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i32 3 +; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]]) +; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 +; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 +; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 +; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 +; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 +; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 +; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 +; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 +; CHECK-NEXT: [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 +; CHECK-NEXT: [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 +; CHECK-NEXT: [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00 +; CHECK-NEXT: [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00 +; CHECK-NEXT: store float [[SEL_I0]], float* [[PTR_I0]], align 16 +; CHECK-NEXT: store float [[SEL_I1]], float* [[PTR_I1]], align 4 +; CHECK-NEXT: store float [[SEL_I2]], float* [[PTR_I2]], align 8 +; CHECK-NEXT: store float [[SEL_I3]], float* [[PTR_I3]], align 4 +; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 +; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -488,52 +730,106 @@ exit: ; Check that IR flags are preserved. define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @f16( -; CHECK: %res.i0 = add nuw nsw i32 -; CHECK: %res.i1 = add nuw nsw i32 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[RES]] +; %res = add nuw nsw <2 x i32> %i, %j ret <2 x i32> %res } define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @f17( -; CHECK: %res.i0 = sdiv exact i32 -; CHECK: %res.i1 = sdiv exact i32 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]] +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[RES]] +; %res = sdiv exact <2 x i32> %i, %j ret <2 x i32> %res } define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @f18( -; CHECK: %res.i0 = fadd fast float -; CHECK: %res.i1 = fadd fast float +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]] +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x float> [[RES]] +; %res = fadd fast <2 x float> %x, %y ret <2 x float> %res } define <2 x float> @f19(<2 x float> %x) { ; CHECK-LABEL: @f19( -; CHECK: %res.i0 = fneg fast float -; CHECK: %res.i1 = fneg fast float +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]] +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x float> [[RES]] +; %res = fneg fast <2 x float> %x ret <2 x float> %res } define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @f20( -; CHECK: %res.i0 = fcmp fast ogt float -; CHECK: %res.i1 = fcmp fast ogt float +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]] +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x i1> [[RES]] +; %res = fcmp fast ogt <2 x float> %x, %y ret <2 x i1> %res } declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) define <2 x float> @f21(<2 x float> %x) { ; CHECK-LABEL: @f21( -; CHECK: %res.i0 = call fast float @llvm.sqrt.f32 -; CHECK: %res.i1 = call fast float @llvm.sqrt.f32 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]]) +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x float> [[RES]] +; %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) ret <2 x float> %res } declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { ; CHECK-LABEL: @f22( -; CHECK: %res.i0 = call fast float @llvm.fma.f32 -; CHECK: %res.i1 = call fast float @llvm.fma.f32 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]]) +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x float> [[RES]] +; %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) ret <2 x float> %res } @@ -541,10 +837,11 @@ define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { ; See https://reviews.llvm.org/D83101#2133062 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { ; CHECK-LABEL: @f23_crash( -; CHECK: %v0 = extractelement <2 x i32> %srcvec, i32 0 -; CHECK: %t1.upto0 = insertelement <2 x i32> poison, i32 %v0, i32 0 -; CHECK: %t1 = insertelement <2 x i32> %t1.upto0, i32 %v1, i32 1 -; CHECK: ret <2 x i32> %t1 +; CHECK-NEXT: [[V0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i32 0 +; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[V0]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[T1]] +; %v0 = extractelement <2 x i32> %srcvec, i32 0 %t0 = insertelement <2 x i32> undef, i32 %v0, i32 0 %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1 From f3d8335383672df3b3b950a082c2b11a80cbdb9d Mon Sep 17 00:00:00 2001 From: PeixinQiao Date: Tue, 31 May 2022 23:26:00 +0800 Subject: [PATCH 905/908] [flang] Support BIND(C) variable scope check As Fortran 2018 C819, a variable with the BIND attribute shall be declared in the specification part of a module. Add the support for this check. Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D126653 --- flang/lib/Semantics/check-declarations.cpp | 15 +++++++--- flang/test/Semantics/resolve113.f90 | 34 ++++++++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 flang/test/Semantics/resolve113.f90 diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 25f4b29b45cbca..6b559545e448fb 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -104,7 +104,7 @@ class CheckHelper { } } bool IsResultOkToDiffer(const FunctionResult &); - void CheckBindCName(const Symbol &); + void CheckBindC(const Symbol &); // Check functions for defined I/O procedures void CheckDefinedIoProc( const Symbol &, const GenericDetails &, GenericKind::DefinedIo); @@ -236,7 +236,7 @@ void CheckHelper::Check(const Symbol &symbol) { if (symbol.attrs().test(Attr::VOLATILE)) { CheckVolatile(symbol, derived); } - CheckBindCName(symbol); + CheckBindC(symbol); if (isDone) { return; // following checks do not apply } @@ -1869,8 +1869,15 @@ static const std::string *DefinesBindCName(const Symbol &symbol) { } } -// Check that BIND(C) names are distinct -void CheckHelper::CheckBindCName(const Symbol &symbol) { +// Check that BIND(C) names are distinct and BIND(C) variable declared in module +void CheckHelper::CheckBindC(const Symbol &symbol) { + if (!symbol.attrs().test(Attr::BIND_C)) { + return; + } + if (symbol.has() && !symbol.owner().IsModule()) { + messages_.Say(symbol.name(), + "A variable with BIND(C) attribute may only appear in the specification part of a module"_err_en_US); + } if (const std::string * name{DefinesBindCName(symbol)}) { auto pair{bindC_.emplace(*name, symbol)}; if (!pair.second) { diff --git a/flang/test/Semantics/resolve113.f90 b/flang/test/Semantics/resolve113.f90 new file mode 100644 index 00000000000000..9755e280cfeb08 --- /dev/null +++ b/flang/test/Semantics/resolve113.f90 @@ -0,0 +1,34 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +module m +interface + module subroutine dump() + end subroutine +end interface + integer, bind(c, name="a") :: x1 + integer, bind(c) :: x2 +end + +subroutine sub() + !ERROR: A variable with BIND(C) attribute may only appear in the specification part of a module + integer, bind(c, name="b") :: x3 + !ERROR: A variable with BIND(C) attribute may only appear in the specification part of a module + integer, bind(c) :: x4 +end + +program main + !ERROR: A variable with BIND(C) attribute may only appear in the specification part of a module + integer, bind(c, name="c") :: x5 + !ERROR: A variable with BIND(C) attribute may only appear in the specification part of a module + integer, bind(c) :: x6 +end + +submodule(m) m2 + !ERROR: A variable with BIND(C) attribute may only appear in the specification part of a module + integer, bind(c, name="d") :: x7 + !ERROR: A variable with BIND(C) attribute may only appear in the specification part of a module + integer, bind(c) :: x8 +contains + module procedure dump + end procedure +end From c797952d4f012275b2e23f5ffcab1f39eacd184d Mon Sep 17 00:00:00 2001 From: David Goldman Date: Tue, 31 May 2022 11:32:23 -0400 Subject: [PATCH 906/908] [clangd] Minor fixes to ExtractVariableTests missed in D124486 --- .../unittests/tweaks/ExtractVariableTests.cpp | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp index bd06f318b2a991..70ecc202d0b288 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp @@ -305,22 +305,7 @@ TEST_F(ExtractVariableTest, Test) { EXPECT_EQ(IO.second, apply(IO.first)) << IO.first; } - ExtraArgs = {"-xobjective-c"}; - EXPECT_UNAVAILABLE(R"cpp( - __attribute__((objc_root_class)) - @interface Foo - - (void)setMethod1:(int)a; - - (int)method1; - @property int prop1; - @end - @implementation Foo - - (void)method { - [[self.method1]] = 1; - [[self.method1]] += 1; - [[self.prop1]] = 1; - [[self.prop1]] += 1; - } - @end)cpp"); + ExtraArgs = {"-xc"}; InputOutputs = { // Function Pointers {R"cpp(struct Handlers { @@ -345,6 +330,7 @@ TEST_F(ExtractVariableTest, Test) { void bar() { int (*placeholder)(int) = foo('c'); (void)placeholder; })cpp"}, + // Arithmetic on typedef types yields plain integer types {R"cpp(typedef long NSInteger; void varDecl() { NSInteger a = 2 * 5; @@ -355,6 +341,28 @@ TEST_F(ExtractVariableTest, Test) { NSInteger a = 2 * 5; long placeholder = a * 7; NSInteger b = placeholder + 3; })cpp"}, + }; + for (const auto &IO : InputOutputs) { + EXPECT_EQ(IO.second, apply(IO.first)) << IO.first; + } + + ExtraArgs = {"-xobjective-c"}; + EXPECT_UNAVAILABLE(R"cpp( + __attribute__((objc_root_class)) + @interface Foo + - (void)setMethod1:(int)a; + - (int)method1; + @property int prop1; + @end + @implementation Foo + - (void)method { + [[self.method1]] = 1; + [[self.method1]] += 1; + [[self.prop1]] = 1; + [[self.prop1]] += 1; + } + @end)cpp"); + InputOutputs = { // Support ObjC property references (explicit property getter). {R"cpp(__attribute__((objc_root_class)) @interface Foo From e22b02d9b4f8bb968628ac7cf2d9a42bf13e2898 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 31 May 2022 11:52:44 -0400 Subject: [PATCH 907/908] [Clang][Docs] Document the clang-offload-packager better Summary: This patch adds more in-depth documentation to the clang-offload-packacker's binary format. This format is used to create fat binaries and link them. --- clang/docs/ClangOffloadPackager.rst | 135 +++++++++++++++++++++++++--- 1 file changed, 122 insertions(+), 13 deletions(-) diff --git a/clang/docs/ClangOffloadPackager.rst b/clang/docs/ClangOffloadPackager.rst index 5d15cc4c5caf6e..2211f3bd600f3c 100644 --- a/clang/docs/ClangOffloadPackager.rst +++ b/clang/docs/ClangOffloadPackager.rst @@ -16,21 +16,130 @@ together. The image format is a small header wrapping around a string map. This tool creates bundled binaries so that they can be embedded into the host to create a fat-binary. -An embedded binary is marked by the ``0x10FF10AD`` magic bytes, followed by a +Binary Format +============= + +The binary format is marked by the ``0x10FF10AD`` magic bytes, followed by a version. Each created binary contains its own magic bytes. This allows us to locate all the embedded offloading sections even after they may have been merged -by the linker, such as when using relocatable linking. The format used is -primarily a binary serialization of the following struct. - -.. code-block:: c++ - - struct OffloadingImage { - uint16_t TheImageKind; - uint16_t TheOffloadKind; - uint32_t Flags; - StringMap StringData; - MemoryBufferRef Image; - }; +by the linker, such as when using relocatable linking. Conceptually, this binary +format is a serialization of a string map and an image buffer. The binary header +is described in the following :ref:`table`. + +.. table:: Offloading Binary Header + :name: table-binary_header + + +----------+--------------+----------------------------------------------------+ + | Type | Identifier | Description | + +==========+==============+====================================================+ + | uint8_t | magic | The magic bytes for the binary format (0x10FF10AD) | + +----------+--------------+----------------------------------------------------+ + | uint32_t | version | Version of this format (currently version 1) | + +----------+--------------+----------------------------------------------------+ + | uint64_t | size | Size of this binary in bytes | + +----------+--------------+----------------------------------------------------+ + | uint64_t | entry offset | Absolute offset of the offload entries in bytes | + +----------+--------------+----------------------------------------------------+ + | uint64_t | entry size | Size of the offload entries in bytes | + +----------+--------------+----------------------------------------------------+ + +Once identified through the magic bytes, we use the size field to take a slice +of the binary blob containing the information for a single offloading image. We +can then use the offset field to find the actual offloading entries containing +the image and metadata. The offload entry contains information about the device +image. It contains the fields shown in the following +:ref:`table`. + +.. table:: Offloading Entry Table + :name: table-binary_entry + + +----------+---------------+----------------------------------------------------+ + | Type | Identifier | Description | + +==========+===============+====================================================+ + | uint16_t | image kind | The kind of the device image (e.g. bc, cubin) | + +----------+---------------+----------------------------------------------------+ + | uint16_t | offload kind | The producer of the image (e.g. openmp, cuda) | + +----------+---------------+----------------------------------------------------+ + | uint32_t | flags | Generic flags for the image | + +----------+---------------+----------------------------------------------------+ + | uint64_t | string offset | Absolute offset of the string metadata table | + +----------+---------------+----------------------------------------------------+ + | uint64_t | num strings | Number of string entries in the table | + +----------+---------------+----------------------------------------------------+ + | uint64_t | image offset | Absolute offset of the device image in bytes | + +----------+---------------+----------------------------------------------------+ + | uint64_t | image size | Size of the device image in bytes | + +----------+---------------+----------------------------------------------------+ + +This table contains the offsets of the string table and the device image itself +along with some other integer information. The image kind lets us easily +identify the type of image stored here without needing to inspect the binary. +The offloading kind is used to determine which registration code or linking +semantics are necessary for this image. These are stored as enumerations with +the following values for the :ref:`offload kind`. + +.. table:: Image Kind + :name: table-image_kind + + +---------------+-------+---------------------------------------+ + | Name | Value | Description | + +===============+=======+=======================================+ + | IMG_None | 0x00 | No image information provided | + +---------------+-------+---------------------------------------+ + | IMG_Object | 0x01 | The image is a generic object file | + +---------------+-------+---------------------------------------+ + | IMG_Bitcode | 0x02 | The image is an LLVM-IR bitcode file | + +---------------+-------+---------------------------------------+ + | IMG_Cubin | 0x03 | The image is a CUDA object file | + +---------------+-------+---------------------------------------+ + | IMG_Fatbinary | 0x04 | The image is a CUDA fatbinary file | + +---------------+-------+---------------------------------------+ + | IMG_PTX | 0x05 | The iamge is a CUDA PTX file | + +---------------+-------+---------------------------------------+ + +.. table:: Offload Kind + :name: table-offload_kind + + +------------+-------+---------------------------------------+ + | Name | Value | Description | + +============+=======+=======================================+ + | OFK_None | 0x00 | No offloading information provided | + +------------+-------+---------------------------------------+ + | OFK_OpenMP | 0x01 | The producer was OpenMP offloading | + +------------+-------+---------------------------------------+ + | OFK_CUDA | 0x02 | The producer was CUDA | + +------------+-------+---------------------------------------+ + | OFK_HIP | 0x03 | The producer was HIP | + +------------+-------+---------------------------------------+ + +The flags are used to signify certain conditions, such as the presence of +debugging information or whether or not LTO was used. The string entry table is +used to generically contain any arbitrary key-value pair. This is stored as an +array of the :ref:`string entry` format. + +.. table:: Offloading String Entry + :name: table-binary_string + + +----------+--------------+-------------------------------------------------------+ + | Type | Identifier | Description | + +==========+==============+=======================================================+ + | uint64_t | key offset | Absolute byte offset of the key in th string table | + +----------+--------------+-------------------------------------------------------+ + | uint64_t | value offset | Absolute byte offset of the value in the string table | + +----------+--------------+-------------------------------------------------------+ + +The string entries simply provide offsets to a key and value pair in the +binary images string table. The string table is simply a collection of null +terminated strings with defined offsets in the image. The string entry allows us +to create a key-value pair from this string table. This is used for passing +arbitrary arguments to the image, such as the triple and architecture. + +All of these structures are combined to form a single binary blob, the order +does not matter because of the use of absolute offsets. This makes it easier to +extend in the future. As mentioned previously, multiple offloading images are +bundled together by simply concatenating them in this format. Because we have +the magic bytes and size of each image, we can extract them as-needed. Usage ===== From 850dbff708f1e371722306363231a3816179ad7b Mon Sep 17 00:00:00 2001 From: lorenzo chelini Date: Mon, 30 May 2022 12:19:10 +0200 Subject: [PATCH 908/908] [MLIR][Math] Improve docs (NFC) Remove boilerplate examples and add a text at the dialect level to describe what kind of operands the operations accept (i.e., scalar, tensor or vector). Left a shorter sentence describing the input operands for each operation as this redundancy is convenient when browsing the documentation using the website. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D126648 --- mlir/include/mlir/Dialect/Math/IR/MathBase.td | 16 +- mlir/include/mlir/Dialect/Math/IR/MathOps.td | 259 +++++------------- 2 files changed, 91 insertions(+), 184 deletions(-) diff --git a/mlir/include/mlir/Dialect/Math/IR/MathBase.td b/mlir/include/mlir/Dialect/Math/IR/MathBase.td index 8df9e2ce6d1360..85c8c2baff6524 100644 --- a/mlir/include/mlir/Dialect/Math/IR/MathBase.td +++ b/mlir/include/mlir/Dialect/Math/IR/MathBase.td @@ -13,7 +13,21 @@ def Math_Dialect : Dialect { let cppNamespace = "::mlir::math"; let description = [{ The math dialect is intended to hold mathematical operations on integer and - floating type beyond simple arithmetics. + floating types beyond simple arithmetics. Each operation works on scalar, vector + or tensor type. On vector and tensor type operations apply elementwise unless + explicitly specified otherwise. As an example, the floating point absolute value + can be expressed as: + + ```mlir + // Scalar absolute value. + %a = math.abs %b : f64 + + // Vector elementwise absolute value. + %f = math.abs %g : vector<4xf32> + + // Tensor elementwise absolute value. + %x = math.abs %y : tensor<4x?xf8> + ``` }]; let hasConstantMaterializer = 1; let emitAccessorPrefix = kEmitAccessorPrefix_Prefixed; diff --git a/mlir/include/mlir/Dialect/Math/IR/MathOps.td b/mlir/include/mlir/Dialect/Math/IR/MathOps.td index 221af3f6a5f2ca..1378135cb354e8 100644 --- a/mlir/include/mlir/Dialect/Math/IR/MathOps.td +++ b/mlir/include/mlir/Dialect/Math/IR/MathOps.td @@ -21,9 +21,9 @@ class Math_Op traits = []> : DeclareOpInterfaceMethods] # ElementwiseMappable.traits>; -// Base class for unary math operations on integer types. Require a operand and -// result of the same type. This type can be an integer type, or vector or tensor -// thereof. +// Base class for unary math operations on integer types. Require an operand +// and result of the same type. This type can be an integer type, vector or +// tensor thereof. class Math_IntegerUnaryOp traits = []> : Math_Op { let arguments = (ins SignlessIntegerLike:$operand); @@ -32,9 +32,9 @@ class Math_IntegerUnaryOp traits = []> : let assemblyFormat = "$operand attr-dict `:` type($result)"; } -// Base class for unary math operations on floating point types. Require a +// Base class for unary math operations on floating point types. Require an // operand and result of the same type. This type can be a floating point type, -// or vector or tensor thereof. +// vector or tensor thereof. class Math_FloatUnaryOp traits = []> : Math_Op { let arguments = (ins FloatLike:$operand); @@ -45,7 +45,7 @@ class Math_FloatUnaryOp traits = []> : // Base class for binary math operations on floating point types. Require two // operands and one result of the same type. This type can be a floating point -// type, or a vector or tensor thereof. +// type, vector or tensor thereof. class Math_FloatBinaryOp traits = []> : Math_Op { let arguments = (ins FloatLike:$lhs, FloatLike:$rhs); @@ -55,8 +55,8 @@ class Math_FloatBinaryOp traits = []> : } // Base class for floating point ternary operations. Require three operands and -// one result of the same type. This type can be a floating point type, or a -// vector or tensor thereof. +// one result of the same type. This type can be a floating point type, vector +// or tensor thereof. class Math_FloatTernaryOp traits = []> : Math_Op { let arguments = (ins FloatLike:$a, FloatLike:$b, FloatLike:$c); @@ -72,21 +72,15 @@ class Math_FloatTernaryOp traits = []> : def Math_AbsOp : Math_FloatUnaryOp<"abs"> { let summary = "floating point absolute-value operation"; let description = [{ - The `abs` operation computes the absolute value. It takes one operand and - returns one result of the same type. This type may be a float scalar type, - a vector whose element type is float, or a tensor of floats. + The `abs` operation computes the absolute value. It takes one operand of + floating point type (i.e., scalar, tensor or vector) and returns one result of + the same type. Example: ```mlir // Scalar absolute value. %a = math.abs %b : f64 - - // SIMD vector element-wise absolute value. - %f = math.abs %g : vector<4xf32> - - // Tensor element-wise absolute value. - %x = math.abs %y : tensor<4x?xf8> ``` }]; let hasFolder = 1; @@ -106,21 +100,14 @@ def Math_AtanOp : Math_FloatUnaryOp<"atan">{ ``` The `atan` operation computes the arcus tangent of a given value. It takes - one operand and returns one result of the same type. This type may be a - float scalar type, a vector whose element type is float, or a tensor of - floats. It has no standard attributes. + one operand of floating point type (i.e., scalar, tensor or vector) and returns + one result of the same type. It has no standard attributes. Example: ```mlir // Arcus tangent of scalar value. %a = math.atan %b : f64 - - // SIMD vector element-wise arcus tangent. - %f = math.atan %g : vector<4xf32> - - // Tensor element-wise arcus tangent. - %x = math.atan %y : tensor<4x?xf8> ``` }]; } @@ -139,9 +126,8 @@ def Math_Atan2Op : Math_FloatBinaryOp<"atan2">{ ``` The `atan2` operation takes two operands and returns one result, all of - which must be of the same type. This type may be a floating point scalar - type, a vector whose element type is a floating point type, or a floating - point tensor. + which must be of the same type. The operands must be of floating point type + (i.e., scalar, tensor or vector). The 2-argument arcus tangent `atan2(y, x)` returns the angle in the Euclidian plane between the positive x-axis and the ray through the point @@ -155,12 +141,6 @@ def Math_Atan2Op : Math_FloatBinaryOp<"atan2">{ ```mlir // Scalar variant. %a = math.atan2 %b, %c : f32 - - // SIMD vector variant. - %f = math.atan2 %g, %h : vector<4xf32> - - // Tensor variant. - %x = math.atan2 %y, %z : tensor<4x?xf32> ``` }]; } @@ -179,21 +159,14 @@ def Math_CeilOp : Math_FloatUnaryOp<"ceil"> { ``` The `ceil` operation computes the ceiling of a given value. It takes one - operand and returns one result of the same type. This type may be a float - scalar type, a vector whose element type is float, or a tensor of floats. - It has no standard attributes. + operand of floating point type (i.e., scalar, tensor or vector) and returns one + result of the same type. It has no standard attributes. Example: ```mlir // Scalar ceiling value. %a = math.ceil %b : f64 - - // SIMD vector element-wise ceiling value. - %f = math.ceil %g : vector<4xf32> - - // Tensor element-wise ceiling value. - %x = math.ceil %y : tensor<4x?xf8> ``` }]; let hasFolder = 1; @@ -213,22 +186,15 @@ def Math_CopySignOp : Math_FloatBinaryOp<"copysign"> { ``` The `copysign` returns a value with the magnitude of the first operand and - the sign of the second operand. It takes two operands and returns one - result of the same type. This type may be a float scalar type, a vector - whose element type is float, or a tensor of floats. It has no standard - attributes. + the sign of the second operand. It takes two operands and returns one result of + the same type. The operands must be of floating point type (i.e., scalar, + tensor or vector). It has no standard attributes. Example: ```mlir // Scalar copysign value. %a = math.copysign %b, %c : f64 - - // SIMD vector element-wise copysign value. - %f = math.copysign %g, %h : vector<4xf32> - - // Tensor element-wise copysign value. - %x = math.copysign %y, %z : tensor<4x?xf8> ``` }]; let hasFolder = 1; @@ -248,21 +214,14 @@ def Math_CosOp : Math_FloatUnaryOp<"cos"> { ``` The `cos` operation computes the cosine of a given value. It takes one - operand and returns one result of the same type. This type may be a float - scalar type, a vector whose element type is float, or a tensor of floats. - It has no standard attributes. + operand of floating point type (i.e., scalar, tensor or vector) and returns one + result of the same type. It has no standard attributes. Example: ```mlir // Scalar cosine value. %a = math.cos %b : f64 - - // SIMD vector element-wise cosine value. - %f = math.cos %g : vector<4xf32> - - // Tensor element-wise cosine value. - %x = math.cos %y : tensor<4x?xf8> ``` }]; } @@ -281,21 +240,14 @@ def Math_SinOp : Math_FloatUnaryOp<"sin"> { ``` The `sin` operation computes the sine of a given value. It takes one - operand and returns one result of the same type. This type may be a float - scalar type, a vector whose element type is float, or a tensor of floats. - It has no standard attributes. + operand of floating point type (i.e., scalar, tensor or vector) and returns one + result of the same type. It has no standard attributes. Example: ```mlir // Scalar sine value. %a = math.sin %b : f64 - - // SIMD vector element-wise sine value. - %f = math.sin %g : vector<4xf32> - - // Tensor element-wise sine value. - %x = math.sin %y : tensor<4x?xf8> ``` }]; } @@ -308,18 +260,13 @@ def Math_CountLeadingZerosOp : Math_IntegerUnaryOp<"ctlz"> { let summary = "counts the leading zeros an integer value"; let description = [{ The `ctlz` operation computes the number of leading zeros of an integer value. + It operates on scalar, tensor or vector. Example: ```mlir // Scalar ctlz function value. %a = math.ctlz %b : i32 - - // SIMD vector element-wise ctlz function value. - %f = math.ctlz %g : vector<4xi16> - - // Tensor element-wise ctlz function value. - %x = math.ctlz %y : tensor<4x?xi8> ``` }]; let hasFolder = 1; @@ -333,18 +280,13 @@ def Math_CountTrailingZerosOp : Math_IntegerUnaryOp<"cttz"> { let summary = "counts the trailing zeros an integer value"; let description = [{ The `cttz` operation computes the number of trailing zeros of an integer value. + It operates on scalar, tensor or vector. Example: ```mlir // Scalar cttz function value. %a = math.cttz %b : i32 - - // SIMD vector element-wise cttz function value. - %f = math.cttz %g : vector<4xi16> - - // Tensor element-wise cttz function value. - %x = math.cttz %y : tensor<4x?xi8> ``` }]; let hasFolder = 1; @@ -358,18 +300,13 @@ def Math_CtPopOp : Math_IntegerUnaryOp<"ctpop"> { let summary = "counts the number of set bits of an integer value"; let description = [{ The `ctpop` operation computes the number of set bits of an integer value. + It operates on scalar, tensor or vector. Example: ```mlir // Scalar ctpop function value. %a = math.ctpop %b : i32 - - // SIMD vector element-wise ctpop function value. - %f = math.ctpop %g : vector<4xi16> - - // Tensor element-wise ctpop function value. - %x = math.ctpop %y : tensor<4x?xi8> ``` }]; let hasFolder = 1; @@ -388,22 +325,15 @@ def Math_ErfOp : Math_FloatUnaryOp<"erf"> { operation ::= ssa-id `=` `math.erf` ssa-use `:` type ``` - The `erf` operation computes the error function. It takes one operand - and returns one result of the same type. This type may be a float scalar - type, a vector whose element type is float, or a tensor of floats. It has - no standard attributes. + The `erf` operation computes the error function. It takes one operand of + floating point type (i.e., scalar, tensor or vector) and returns one result of + the same type. It has no standard attributes. Example: ```mlir // Scalar error function value. %a = math.erf %b : f64 - - // SIMD vector element-wise error function value. - %f = math.erf %g : vector<4xf32> - - // Tensor element-wise error function value. - %x = math.erf %y : tensor<4x?xf8> ``` }]; } @@ -422,21 +352,15 @@ def Math_ExpOp : Math_FloatUnaryOp<"exp"> { operation ::= ssa-id `=` `math.exp` ssa-use `:` type ``` - The `exp` operation takes one operand and returns one result of the same - type. This type may be a float scalar type, a vector whose element type is - float, or a tensor of floats. It has no standard attributes. + The `exp` operation takes one operand of floating point type (i.e., scalar, + tensor or vector) and returns one result of the same type. It has no standard + attributes. Example: ```mlir // Scalar natural exponential. %a = math.exp %b : f64 - - // SIMD vector element-wise natural exponential. - %f = math.exp %g : vector<4xf32> - - // Tensor element-wise natural exponential. - %x = math.exp %y : tensor<4x?xf8> ``` }]; } @@ -455,21 +379,15 @@ def Math_Exp2Op : Math_FloatUnaryOp<"exp2"> { operation ::= ssa-id `=` `math.exp2` ssa-use `:` type ``` - The `exp` operation takes one operand and returns one result of the same - type. This type may be a float scalar type, a vector whose element type is - float, or a tensor of floats. It has no standard attributes. + The `exp` operation takes one operand of floating point type (i.e., scalar, + tensor or vector) and returns one result of the same type. It has no standard + attributes. Example: ```mlir // Scalar natural exponential. %a = math.exp2 %b : f64 - - // SIMD vector element-wise natural exponential. - %f = math.exp2 %g : vector<4xf32> - - // Tensor element-wise natural exponential. - %x = math.exp2 %y : tensor<4x?xf8> ``` }]; } @@ -489,21 +407,15 @@ def Math_ExpM1Op : Math_FloatUnaryOp<"expm1"> { expm1(x) := exp(x) - 1 - The `expm1` operation takes one operand and returns one result of the same - type. This type may be a float scalar type, a vector whose element type is - float, or a tensor of floats. It has no standard attributes. + The `expm1` operation takes one operand of floating point type (i.e., + scalar, tensor or vector) and returns one result of the same type. It has no + standard attributes. Example: ```mlir // Scalar natural exponential minus 1. %a = math.expm1 %b : f64 - - // SIMD vector element-wise natural exponential minus 1. - %f = math.expm1 %g : vector<4xf32> - - // Tensor element-wise natural exponential minus 1. - %x = math.expm1 %y : tensor<4x?xf8> ``` }]; } @@ -522,21 +434,14 @@ def Math_FloorOp : Math_FloatUnaryOp<"floor"> { ``` The `floor` operation computes the floor of a given value. It takes one - operand and returns one result of the same type. This type may be a float - scalar type, a vector whose element type is float, or a tensor of floats. - It has no standard attributes. + operand of floating point type (i.e., scalar, tensor or vector) and returns one + result of the same type. It has no standard attributes. Example: ```mlir // Scalar floor value. - %a = math.floor %b : f64 - - // SIMD vector element-wise floor value. - %f = math.floor %g : vector<4xf32> - - // Tensor element-wise floor value. - %x = math.floor %y : tensor<4x?xf8> + %a = math.floor %b : f64 ``` }]; } @@ -555,21 +460,14 @@ def Math_FmaOp : Math_FloatTernaryOp<"fma"> { ``` The `fma` operation takes three operands and returns one result, each of - these is required to be the same type. This type may be a floating point - scalar type, a vector whose element type is a floating point type, or a - floating point tensor. + these is required to be the same type. Operands must be of floating point type + (i.e., scalar, tensor or vector). Example: ```mlir // Scalar fused multiply-add: d = a*b + c %d = math.fma %a, %b, %c : f64 - - // SIMD vector fused multiply-add, e.g. for Intel SSE. - %i = math.fma %f, %g, %h : vector<4xf32> - - // Tensor fused multiply-add. - %w = math.fma %x, %y, %z : tensor<4x?xbf16> ``` The semantics of the operation correspond to those of the `llvm.fma` @@ -587,12 +485,14 @@ def Math_LogOp : Math_FloatUnaryOp<"log"> { let summary = "base-e logarithm of the specified value"; let description = [{ - Computes the base-e logarithm of the given value. It takes one operand and - returns one result of the same type. + Computes the base-e logarithm of the given value. It takes one operand of + floating point type (i.e., scalar, tensor or vector) and returns one result of + the same type. Example: ```mlir + // Scalar log operation. %y = math.log %x : f64 ``` }]; @@ -606,12 +506,14 @@ def Math_Log10Op : Math_FloatUnaryOp<"log10"> { let summary = "base-10 logarithm of the specified value"; let description = [{ - Computes the base-10 logarithm of the given value. It takes one operand and - returns one result of the same type. + Computes the base-10 logarithm of the given value. It takes one operand of + floating point type (i.e., scalar, tensor or vector) and returns one result of + the same type. Example: ```mlir + // Scalar log10 operation. %y = math.log10 %x : f64 ``` }]; @@ -626,13 +528,15 @@ def Math_Log1pOp : Math_FloatUnaryOp<"log1p"> { let description = [{ Computes the base-e logarithm of one plus the given value. It takes one - operand and returns one result of the same type. + operand of floating point type (i.e., scalar, tensor or vector) and returns one + result of the same type. log1p(x) := log(1 + x) Example: ```mlir + // Scalar log1p operation. %y = math.log1p %x : f64 ``` }]; @@ -646,12 +550,14 @@ def Math_Log2Op : Math_FloatUnaryOp<"log2"> { let summary = "base-2 logarithm of the specified value"; let description = [{ - Computes the base-2 logarithm of the given value. It takes one operand and - returns one result of the same type. + Computes the base-2 logarithm of the given value. It takes one operand of + floating point type (i.e., scalar, tensor or vector) and returns one result of + the same type. Example: ```mlir + // Scalar log2 operation. %y = math.log2 %x : f64 ``` }]; @@ -671,22 +577,15 @@ def Math_PowFOp : Math_FloatBinaryOp<"powf"> { operation ::= ssa-id `=` `math.powf` ssa-use `,` ssa-use `:` type ``` - The `powf` operation takes two operands and returns one result, each of - these is required to be the same type. This type may be a floating point - scalar type, a vector whose element type is a floating point type, or a - floating point tensor. + The `powf` operation takes two operands of floating point type (i.e., + scalar, tensor or vector) and returns one result of the same type. Operands + must have the same type. Example: ```mlir // Scalar exponentiation. %a = math.powf %b, %c : f64 - - // SIMD pointwise vector exponentiation - %f = math.powf %g, %h : vector<4xf32> - - // Tensor pointwise exponentiation. - %x = math.powf %y, %z : tensor<4x?xbf16> ``` }]; let hasFolder = 1; @@ -700,9 +599,15 @@ def Math_RsqrtOp : Math_FloatUnaryOp<"rsqrt"> { let summary = "reciprocal of sqrt (1 / sqrt of the specified value)"; let description = [{ The `rsqrt` operation computes the reciprocal of the square root. It takes - one operand and returns one result of the same type. This type may be a - float scalar type, a vector whose element type is float, or a tensor of - floats. It has no standard attributes. + one operand of floating point type (i.e., scalar, tensor or vector) and returns + one result of the same type. It has no standard attributes. + + Example: + + ```mlir + // Scalar reciprocal square root value. + %a = math.rsqrt %b : f64 + ``` }]; } @@ -713,20 +618,15 @@ def Math_RsqrtOp : Math_FloatUnaryOp<"rsqrt"> { def Math_SqrtOp : Math_FloatUnaryOp<"sqrt"> { let summary = "sqrt of the specified value"; let description = [{ - The `sqrt` operation computes the square root. It takes one operand and - returns one result of the same type. This type may be a float scalar type, a - vector whose element type is float, or a tensor of floats. It has no standard - attributes. + The `sqrt` operation computes the square root. It takes one operand of + floating point type (i.e., scalar, tensor or vector) and returns one result of + the same type. It has no standard attributes. Example: ```mlir // Scalar square root value. %a = math.sqrt %b : f64 - // SIMD vector element-wise square root value. - %f = math.sqrt %g : vector<4xf32> - // Tensor element-wise square root value. - %x = math.sqrt %y : tensor<4x?xf32> ``` }]; let hasFolder = 1; @@ -740,21 +640,14 @@ def Math_TanhOp : Math_FloatUnaryOp<"tanh"> { let summary = "hyperbolic tangent of the specified value"; let description = [{ The `tanh` operation computes the hyperbolic tangent. It takes one operand - and returns one result of the same type. This type may be a float scalar - type, a vector whose element type is float, or a tensor of floats. It has - no standard attributes. + of floating point type (i.e., scalar, tensor or vector) and returns one + result of the same type. It has no standard attributes. Example: ```mlir // Scalar hyperbolic tangent value. %a = math.tanh %b : f64 - - // SIMD vector element-wise hyperbolic tangent value. - %f = math.tanh %g : vector<4xf32> - - // Tensor element-wise hyperbolic tangent value. - %x = math.tanh %y : tensor<4x?xf8> ``` }]; }